Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / cuda4dnn / primitives / permute.hpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PERMUTE_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PERMUTE_HPP
7
8 #include "../../op_cuda.hpp"
9
10 #include "../csl/stream.hpp"
11 #include "../csl/tensor_ops.hpp"
12
13 #include "../kernels/permute.hpp"
14
15 #include <opencv2/core.hpp>
16
17 #include <cstddef>
18 #include <vector>
19 #include <utility>
20
21 namespace cv { namespace dnn { namespace cuda4dnn {
22
23     template <class T>
24     class PermuteOp final : public CUDABackendNode {
25     public:
26         using wrapper_type = GetCUDABackendWrapperType<T>;
27
28         PermuteOp(csl::Stream stream_, std::vector<std::size_t> order_)
29             : stream(std::move(stream_)), order(std::move(order_)) { }
30
31         void forward(
32             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
33             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
34             csl::Workspace& workspace) override
35         {
36             for (int i = 0; i < inputs.size(); i++)
37             {
38                 auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
39                 auto input = input_wrapper->getView();
40
41                 auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
42                 auto output = output_wrapper->getSpan();
43
44                 auto needsPermute = [&] {
45                     for (int i = 0; i < order.size(); i++)
46                         if (order[i] != i)
47                             return true;
48                     return false;
49                 }();
50
51                 if (needsPermute)
52                 {
53                     kernels::permute(stream, output, input, order);
54                 }
55                 else
56                 {
57                     if (input.get() != output.get())
58                         csl::tensor_ops::copy(stream, output, input);
59                 }
60             }
61         }
62
63     private:
64         csl::Stream stream;
65         std::vector<std::size_t> order;
66     };
67
68 }}} /* namespace cv::dnn::cuda4dnn */
69
70 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PERMUTE_HPP */