modules/dnn/src/cuda4dnn/primitives/permute.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PERMUTE_HPP
   6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PERMUTE_HPP
   7
   8 #include "../../op_cuda.hpp"
   9
  10 #include "../csl/stream.hpp"
  11 #include "../csl/tensor_ops.hpp"
  12
  13 #include "../kernels/permute.hpp"
  14
  15 #include <opencv2/core.hpp>
  16
  17 #include <cstddef>
  18 #include <vector>
  19 #include <utility>
  20
  21 namespace cv { namespace dnn { namespace cuda4dnn {
  22
  23     template <class T>
  24     class PermuteOp final : public CUDABackendNode {
  25     public:
  26         using wrapper_type = GetCUDABackendWrapperType<T>;
  27
  28         PermuteOp(csl::Stream stream_, std::vector<std::size_t> order_)
  29             : stream(std::move(stream_)), order(std::move(order_)) { }
  30
  31         void forward(
  32             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
  33             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
  34             csl::Workspace& workspace) override
  35         {
  36             for (int i = 0; i < inputs.size(); i++)
  37             {
  38                 auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
  39                 auto input = input_wrapper->getView();
  40
  41                 auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
  42                 auto output = output_wrapper->getSpan();
  43
  44                 auto needsPermute = [&] {
  45                     for (int i = 0; i < order.size(); i++)
  46                         if (order[i] != i)
  47                             return true;
  48                     return false;
  49                 }();
  50
  51                 if (needsPermute)
  52                 {
  53                     kernels::permute(stream, output, input, order);
  54                 }
  55                 else
  56                 {
  57                     if (input.get() != output.get())
  58                         csl::tensor_ops::copy(stream, output, input);
  59                 }
  60             }
  61         }
  62
  63     private:
  64         csl::Stream stream;
  65         std::vector<std::size_t> order;
  66     };
  67
  68 }}} /* namespace cv::dnn::cuda4dnn */
  69
  70 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PERMUTE_HPP */