1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_TRANSFORM_HPP
6 #define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_TRANSFORM_HPP
8 #include "../pointer.hpp"
14 #include <type_traits>
17 namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn {
19 /** describes a tensor transform operation
21 * Supported transformations:
22 * - add or remove asymmetric padding
24 class TensorTransformDescriptor {
26 TensorTransformDescriptor() noexcept : descriptor{ nullptr } { }
27 TensorTransformDescriptor(const TensorTransformDescriptor&) = delete;
28 TensorTransformDescriptor(TensorTransformDescriptor&& other) noexcept
29 : descriptor{ other.descriptor } {
30 other.descriptor = nullptr;
33 /** constructs a convolution descriptor
36 * - \p padding_left and \p padding_right must have the same size
38 * The length of the containers is interpreted as the rank of the tensors which will be given.
40 * @note \p padding_left and \p padding_right may have negative values to remove padding
42 * Exception Guarantee: Basic
44 template <class SequenceContainer, typename = decltype(std::begin(std::declval<SequenceContainer>()))>
45 TensorTransformDescriptor(
46 const SequenceContainer& padding_left,
47 const SequenceContainer& padding_right)
49 constructor(padding_left, padding_right);
52 ~TensorTransformDescriptor() noexcept {
53 if (descriptor != nullptr) {
54 /* cudnnDestroyTensorTransformDescriptor will not fail for a valid descriptor */
55 CUDA4DNN_CHECK_CUDNN(cudnnDestroyTensorTransformDescriptor(descriptor));
59 TensorTransformDescriptor& operator=(const TensorTransformDescriptor&) = delete;
60 TensorTransformDescriptor& operator=(TensorTransformDescriptor&& other) noexcept {
61 descriptor = other.descriptor;
62 other.descriptor = nullptr;
66 cudnnTensorTransformDescriptor_t get() const noexcept { return descriptor; }
69 template <class SequenceContainer>
71 const SequenceContainer& padding_left,
72 const SequenceContainer& padding_right
75 CV_Assert(padding_left.size() == padding_right.size());
77 auto ipadding_left = std::vector<int32_t>(std::begin(padding_left), std::end(padding_left));
78 auto ipadding_right = std::vector<int32_t>(std::begin(padding_right), std::end(padding_right));
79 CUDA4DNN_CHECK_CUDNN(cudnnCreateTensorTransformDescriptor(&descriptor));
82 cudnnSetTensorTransformDescriptor(
84 ipadding_left.size(), CUDNN_TENSOR_NCHW,
85 ipadding_left.data(), ipadding_right.data(),
86 NULL, CUDNN_TRANSFORM_FOLD
90 /* cudnnDestroyTensorTransformDescriptor will not fail for a valid descriptor */
91 CUDA4DNN_CHECK_CUDNN(cudnnDestroyTensorTransformDescriptor(descriptor));
96 cudnnTensorTransformDescriptor_t descriptor;
101 const Handle& handle,
102 const TensorTransformDescriptor& transDesc,
103 const TensorDescriptor<T>& inputDesc,
104 DevicePtr<const T> inputPtr,
105 const TensorDescriptor<T>& outputDesc,
106 DevicePtr<T> outputPtr)
108 T alpha = 1.0, beta = 0.0;
109 CUDA4DNN_CHECK_CUDNN(
110 cudnnTransformTensorEx(
113 &alpha, inputDesc.get(), inputPtr.get(),
114 &beta, outputDesc.get(), outputPtr.get()
121 const Handle& handle,
122 const TensorTransformDescriptor& transDesc,
123 const TensorDescriptor<half>& inputDesc,
124 DevicePtr<const half> inputPtr,
125 const TensorDescriptor<half>& outputDesc,
126 DevicePtr<half> outputPtr)
128 /* we specalize for fp16 as the scaling factors must be provided as `float` */
129 float alpha = 1.0, beta = 0.0;
130 CUDA4DNN_CHECK_CUDNN(
131 cudnnTransformTensorEx(
134 &alpha, inputDesc.get(), inputPtr.get(),
135 &beta, outputDesc.get(), outputPtr.get()
140 }}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */
142 #endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_TRANSFORM_HPP */