Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / cuda4dnn / primitives / eltwise.hpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP
7
8 #include "../../op_cuda.hpp"
9
10 #include "../csl/stream.hpp"
11 #include "../csl/tensor.hpp"
12 #include "../csl/tensor_ops.hpp"
13
14 #include "../kernels/eltwise_ops.hpp"
15
16 #include <opencv2/core.hpp>
17
18 #include <cstddef>
19 #include <vector>
20 #include <utility>
21
22 namespace cv { namespace dnn { namespace cuda4dnn {
23
24     enum class EltwiseOpType {
25         MAX,
26         SUM,
27         PRODUCT
28     };
29
30     template <class T>
31     class EltwiseOp final : public CUDABackendNode {
32     public:
33         using wrapper_type = GetCUDABackendWrapperType<T>;
34
35         template <class V>
36         EltwiseOp(csl::Stream stream_, EltwiseOpType op_, std::vector<V> coeffs_)
37             : stream(std::move(stream_)), op{ op_ }, coeffs(std::begin(coeffs_), std::end(coeffs_))
38         {
39         }
40
41         void forward(
42             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
43             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
44             csl::Workspace& workspace) override
45         {
46             CV_Assert(inputs.size() >= 2);
47             CV_Assert(outputs.size() == 1);
48
49             CV_Assert(coeffs.size() == 0 || op == EltwiseOpType::SUM);
50             CV_Assert(coeffs.size() == 0 || inputs.size() == coeffs.size());
51
52             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
53             auto output = output_wrapper->getSpan();
54
55             if (inputs.size() == 2)
56             {
57                 auto input_wrapper_x = inputs[0].dynamicCast<wrapper_type>();
58                 auto input_x = input_wrapper_x->getView();
59
60                 auto input_wrapper_y = inputs[1].dynamicCast<wrapper_type>();
61                 auto input_y = input_wrapper_y->getView();
62
63                 switch (op)
64                 {
65                 case EltwiseOpType::MAX: kernels::eltwise_max_2<T>(stream, output, input_x, input_y); break;
66                 case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2<T>(stream, output, input_x, input_y); break;
67                 case EltwiseOpType::SUM:
68                     if (coeffs.empty() || (coeffs[0] == 1 && coeffs[1] == 1))
69                         kernels::eltwise_sum_2<T>(stream, output, input_x, input_y);
70                     else
71                         kernels::eltwise_sum_coeff_2<T>(stream, output, coeffs[0], input_x, coeffs[1], input_y);
72                     break;
73                 }
74             }
75             else
76             {
77                 auto input_wrapper_0 = inputs[0].dynamicCast<wrapper_type>();
78                 auto input_0 = input_wrapper_0->getView();
79
80                 /* we first make a copy and then apply EltwiseOp cumulatively */
81                 csl::tensor_ops::copy(stream, output, input_0);
82
83                 for (int i = 1; i < inputs.size(); i++)
84                 {
85                     auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
86                     auto input = input_wrapper->getView();
87
88                     switch (op)
89                     {
90                     case EltwiseOpType::MAX: kernels::eltwise_max_2<T>(stream, output, output, input); break;
91                     case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2<T>(stream, output, output, input); break;
92                     case EltwiseOpType::SUM:
93                         if (coeffs.empty() || coeffs[i] == 1)
94                             kernels::eltwise_sum_2<T>(stream, output, output, input);
95                         else
96                         {
97                             /* if this is the first op, we must scale output too */
98                             auto coeff_x = (i == 1) ? coeffs[0] : static_cast<T>(1.0);
99                             kernels::eltwise_sum_coeff_2<T>(stream, output, coeff_x, output, coeffs[i], input);
100                         }
101                         break;
102                     }
103                 }
104             }
105         }
106
107     private:
108         csl::Stream stream;
109         EltwiseOpType op;
110         std::vector<T> coeffs;
111     };
112
113 }}} /* namespace cv::dnn::cuda4dnn */
114
115 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP */