1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP
8 #include "../../op_cuda.hpp"
10 #include "../csl/stream.hpp"
11 #include "../csl/tensor.hpp"
12 #include "../csl/tensor_ops.hpp"
14 #include "../kernels/eltwise_ops.hpp"
16 #include <opencv2/core.hpp>
22 namespace cv { namespace dnn { namespace cuda4dnn {
24 enum class EltwiseOpType {
31 class EltwiseOp final : public CUDABackendNode {
33 using wrapper_type = GetCUDABackendWrapperType<T>;
36 EltwiseOp(csl::Stream stream_, EltwiseOpType op_, std::vector<V> coeffs_)
37 : stream(std::move(stream_)), op{ op_ }, coeffs(std::begin(coeffs_), std::end(coeffs_))
42 const std::vector<cv::Ptr<BackendWrapper>>& inputs,
43 const std::vector<cv::Ptr<BackendWrapper>>& outputs,
44 csl::Workspace& workspace) override
46 CV_Assert(inputs.size() >= 2);
47 CV_Assert(outputs.size() == 1);
49 CV_Assert(coeffs.size() == 0 || op == EltwiseOpType::SUM);
50 CV_Assert(coeffs.size() == 0 || inputs.size() == coeffs.size());
52 auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
53 auto output = output_wrapper->getSpan();
55 if (inputs.size() == 2)
57 auto input_wrapper_x = inputs[0].dynamicCast<wrapper_type>();
58 auto input_x = input_wrapper_x->getView();
60 auto input_wrapper_y = inputs[1].dynamicCast<wrapper_type>();
61 auto input_y = input_wrapper_y->getView();
65 case EltwiseOpType::MAX: kernels::eltwise_max_2<T>(stream, output, input_x, input_y); break;
66 case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2<T>(stream, output, input_x, input_y); break;
67 case EltwiseOpType::SUM:
68 if (coeffs.empty() || (coeffs[0] == 1 && coeffs[1] == 1))
69 kernels::eltwise_sum_2<T>(stream, output, input_x, input_y);
71 kernels::eltwise_sum_coeff_2<T>(stream, output, coeffs[0], input_x, coeffs[1], input_y);
77 auto input_wrapper_0 = inputs[0].dynamicCast<wrapper_type>();
78 auto input_0 = input_wrapper_0->getView();
80 /* we first make a copy and then apply EltwiseOp cumulatively */
81 csl::tensor_ops::copy(stream, output, input_0);
83 for (int i = 1; i < inputs.size(); i++)
85 auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
86 auto input = input_wrapper->getView();
90 case EltwiseOpType::MAX: kernels::eltwise_max_2<T>(stream, output, output, input); break;
91 case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2<T>(stream, output, output, input); break;
92 case EltwiseOpType::SUM:
93 if (coeffs.empty() || coeffs[i] == 1)
94 kernels::eltwise_sum_2<T>(stream, output, output, input);
97 /* if this is the first op, we must scale output too */
98 auto coeff_x = (i == 1) ? coeffs[0] : static_cast<T>(1.0);
99 kernels::eltwise_sum_coeff_2<T>(stream, output, coeff_x, output, coeffs[i], input);
110 std::vector<T> coeffs;
113 }}} /* namespace cv::dnn::cuda4dnn */
115 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP */