1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_LRN_HPP
6 #define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_LRN_HPP
10 #include "../pointer.hpp"
11 #include "../workspace.hpp"
13 #include <opencv2/core.hpp>
19 namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn {
28 LRNDescriptor() noexcept : descriptor{ nullptr } { }
29 LRNDescriptor(const LRNDescriptor&) = delete;
30 LRNDescriptor(LRNDescriptor&& other) noexcept
31 : descriptor{ other.descriptor }, type{ other.type } {
32 other.descriptor = nullptr;
35 /** sets up a LRN descriptor
37 * @param local_size size of the normalization window
38 * @param alpha variance scaling parameter
39 * @param beta power parameter
40 * @param k bias parameter
42 * @note \p alpha is divided by the window width in across channels mode
43 * @note \p alpha is divided by the (window width)^spatialDimensions in within channel mode
45 * @note the \p alpha, \p beta and \p k will be type casted to the tensor datatype during operation
47 * Exception Guarantee: Basic
49 LRNDescriptor(std::size_t local_size, double alpha, double beta, double k, LRNType type_) {
50 constructor(local_size, alpha, beta, k, type_);
53 ~LRNDescriptor() noexcept {
54 if (descriptor != nullptr) {
55 /* cudnnDestroyLRNDescriptor will not fail for a valid descriptor */
56 CUDA4DNN_CHECK_CUDNN(cudnnDestroyLRNDescriptor(descriptor));
60 LRNDescriptor& operator=(const LRNDescriptor&) = delete;
61 LRNDescriptor& operator=(LRNDescriptor&& other) noexcept {
62 descriptor = other.descriptor;
64 other.descriptor = nullptr;
68 cudnnLRNDescriptor_t get() const noexcept { return descriptor; }
69 LRNType getType() const noexcept { return type; }
72 void constructor(std::size_t local_size, double alpha, double beta, double k, LRNType type_) {
73 CV_Assert(CUDNN_LRN_MIN_N <= local_size && local_size <= CUDNN_LRN_MAX_N);
77 CUDA4DNN_CHECK_CUDNN(cudnnCreateLRNDescriptor(&descriptor));
80 cudnnSetLRNDescriptor(
89 /* cudnnDestroyLRNDescriptor will not fail for a valid descriptor */
90 CUDA4DNN_CHECK_CUDNN(cudnnDestroyLRNDescriptor(descriptor));
95 cudnnLRNDescriptor_t descriptor;
99 /** @brief performs local response normalization
101 * dstValue = alpha * result + beta * priorDstValue
103 * @tparam T element type (must be `half` or `float`)
105 * @param handle valid cuDNN Handle
106 * @param lrnDesc LRN description
107 * @param inputDesc tensor descriptor describing the input
108 * @param[in] inputPtr pointer to input tensor in device memory
109 * @param alpha result scale factor
110 * @param beta previous value scale factor
111 * @param outputDesc tensor descriptor describing the output
112 * @param[out] outputPtr pointer to output tensor in device memory
113 * @param workspace workspace memory which meets the requirements of \p convAlgo
115 * Exception Guarantee: Basic
119 const Handle& handle,
120 const LRNDescriptor& lrnDesc,
121 const TensorDescriptor<T>& inputDesc,
122 DevicePtr<const T> inputPtr,
124 const TensorDescriptor<T>& outputDesc,
125 DevicePtr<T> outputPtr,
126 WorkspaceInstance workspace)
130 if (lrnDesc.getType() == LRNDescriptor::LRNType::ACROSS_CHANNELS) {
131 CUDA4DNN_CHECK_CUDNN(
132 cudnnLRNCrossChannelForward(
134 lrnDesc.get(), CUDNN_LRN_CROSS_CHANNEL_DIM1,
135 &alpha, inputDesc.get(), inputPtr.get(),
136 &beta, outputDesc.get(), outputPtr.get()
139 } else if (lrnDesc.getType() == LRNDescriptor::LRNType::WITHIN_CHANNEL) {
141 CUDA4DNN_CHECK_CUDNN(cudnnGetTensorSizeInBytes(inputDesc.get(), &size));
143 DevicePtr<void> temp1 = workspace.get_span<half>(size).data();
144 DevicePtr<void> temp2 = workspace.get_span<half>(size).data();
146 CUDA4DNN_CHECK_CUDNN(
147 cudnnDivisiveNormalizationForward(
149 lrnDesc.get(), CUDNN_DIVNORM_PRECOMPUTED_MEANS,
150 &alpha, inputDesc.get(), inputPtr.get(),
152 static_cast<void*>(temp1), static_cast<void*>(temp2),
153 &beta, outputDesc.get(), outputPtr.get()
161 const Handle& handle,
162 const LRNDescriptor& lrnDesc,
163 const TensorDescriptor<half>& inputDesc,
164 DevicePtr<const half> inputPtr,
165 half alpha, half beta,
166 const TensorDescriptor<half>& outputDesc,
167 DevicePtr<half> outputPtr,
168 WorkspaceInstance workspace)
172 /* we specalize for fp16 as the scaling factors must be provided as `float` */
173 float alpha_ = alpha, beta_ = beta;
174 if (lrnDesc.getType() == LRNDescriptor::LRNType::ACROSS_CHANNELS) {
175 CUDA4DNN_CHECK_CUDNN(
176 cudnnLRNCrossChannelForward(
178 lrnDesc.get(), CUDNN_LRN_CROSS_CHANNEL_DIM1,
179 &alpha_, inputDesc.get(), inputPtr.get(),
180 &beta_, outputDesc.get(), outputPtr.get()
183 } else if (lrnDesc.getType() == LRNDescriptor::LRNType::WITHIN_CHANNEL) {
185 CUDA4DNN_CHECK_CUDNN(cudnnGetTensorSizeInBytes(inputDesc.get(), &size));
187 DevicePtr<void> temp1 = workspace.get_span<half>(size).data();
188 DevicePtr<void> temp2 = workspace.get_span<half>(size).data();
190 CUDA4DNN_CHECK_CUDNN(
191 cudnnDivisiveNormalizationForward(
193 lrnDesc.get(), CUDNN_DIVNORM_PRECOMPUTED_MEANS,
194 &alpha_, inputDesc.get(), inputPtr.get(),
196 static_cast<void*>(temp1), static_cast<void*>(temp2),
197 &beta_, outputDesc.get(), outputPtr.get()
203 }}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */
205 #endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_LRN_HPP */