modules/dnn/src/cuda4dnn/primitives/eltwise.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP
   6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP
   7
   8 #include "../../op_cuda.hpp"
   9
  10 #include "../csl/stream.hpp"
  11 #include "../csl/tensor.hpp"
  12 #include "../csl/tensor_ops.hpp"
  13
  14 #include "../kernels/eltwise_ops.hpp"
  15
  16 #include <opencv2/core.hpp>
  17
  18 #include <cstddef>
  19 #include <vector>
  20 #include <utility>
  21
  22 namespace cv { namespace dnn { namespace cuda4dnn {
  23
  24     enum class EltwiseOpType {
  25         MAX,
  26         SUM,
  27         PRODUCT
  28     };
  29
  30     template <class T>
  31     class EltwiseOp final : public CUDABackendNode {
  32     public:
  33         using wrapper_type = GetCUDABackendWrapperType<T>;
  34
  35         template <class V>
  36         EltwiseOp(csl::Stream stream_, EltwiseOpType op_, std::vector<V> coeffs_)
  37             : stream(std::move(stream_)), op{ op_ }, coeffs(std::begin(coeffs_), std::end(coeffs_))
  38         {
  39         }
  40
  41         void forward(
  42             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
  43             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
  44             csl::Workspace& workspace) override
  45         {
  46             CV_Assert(inputs.size() >= 2);
  47             CV_Assert(outputs.size() == 1);
  48
  49             CV_Assert(coeffs.size() == 0 || op == EltwiseOpType::SUM);
  50             CV_Assert(coeffs.size() == 0 || inputs.size() == coeffs.size());
  51
  52             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
  53             auto output = output_wrapper->getSpan();
  54
  55             if (inputs.size() == 2)
  56             {
  57                 auto input_wrapper_x = inputs[0].dynamicCast<wrapper_type>();
  58                 auto input_x = input_wrapper_x->getView();
  59
  60                 auto input_wrapper_y = inputs[1].dynamicCast<wrapper_type>();
  61                 auto input_y = input_wrapper_y->getView();
  62
  63                 switch (op)
  64                 {
  65                 case EltwiseOpType::MAX: kernels::eltwise_max_2<T>(stream, output, input_x, input_y); break;
  66                 case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2<T>(stream, output, input_x, input_y); break;
  67                 case EltwiseOpType::SUM:
  68                     if (coeffs.empty() || (coeffs[0] == 1 && coeffs[1] == 1))
  69                         kernels::eltwise_sum_2<T>(stream, output, input_x, input_y);
  70                     else
  71                         kernels::eltwise_sum_coeff_2<T>(stream, output, coeffs[0], input_x, coeffs[1], input_y);
  72                     break;
  73                 }
  74             }
  75             else
  76             {
  77                 auto input_wrapper_0 = inputs[0].dynamicCast<wrapper_type>();
  78                 auto input_0 = input_wrapper_0->getView();
  79
  80                 /* we first make a copy and then apply EltwiseOp cumulatively */
  81                 csl::tensor_ops::copy(stream, output, input_0);
  82
  83                 for (int i = 1; i < inputs.size(); i++)
  84                 {
  85                     auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
  86                     auto input = input_wrapper->getView();
  87
  88                     switch (op)
  89                     {
  90                     case EltwiseOpType::MAX: kernels::eltwise_max_2<T>(stream, output, output, input); break;
  91                     case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2<T>(stream, output, output, input); break;
  92                     case EltwiseOpType::SUM:
  93                         if (coeffs.empty() || coeffs[i] == 1)
  94                             kernels::eltwise_sum_2<T>(stream, output, output, input);
  95                         else
  96                         {
  97                             /* if this is the first op, we must scale output too */
  98                             auto coeff_x = (i == 1) ? coeffs[0] : static_cast<T>(1.0);
  99                             kernels::eltwise_sum_coeff_2<T>(stream, output, coeff_x, output, coeffs[i], input);
 100                         }
 101                         break;
 102                     }
 103                 }
 104             }
 105         }
 106
 107     private:
 108         csl::Stream stream;
 109         EltwiseOpType op;
 110         std::vector<T> coeffs;
 111     };
 112
 113 }}} /* namespace cv::dnn::cuda4dnn */
 114
 115 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP */