Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / cuda4dnn / primitives / inner_product.hpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INNER_PRODUCT_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INNER_PRODUCT_HPP
7
8 #include "../../op_cuda.hpp"
9
10 #include "../csl/stream.hpp"
11 #include "../csl/cublas.hpp"
12 #include "../csl/tensor.hpp"
13 #include "../csl/tensor_ops.hpp"
14
15 #include "../kernels/scale_shift.hpp"
16
17 #include <opencv2/core.hpp>
18
19 #include <cstddef>
20 #include <vector>
21 #include <utility>
22
23 namespace cv { namespace dnn { namespace cuda4dnn {
24
25     template <class T>
26     class InnerProductOp final : public CUDABackendNode {
27     public:
28         using wrapper_type = GetCUDABackendWrapperType<T>;
29
30         InnerProductOp(csl::Stream stream_, csl::cublas::Handle handle, std::size_t axis, const Mat& weights, const Mat& bias)
31             : stream(std::move(stream_)), cublasHandle(std::move(handle)), axis{ axis }
32         {
33             weightsTensor = csl::makeTensorHeader<T>(weights);
34             CV_Assert(get_effective_rank(weightsTensor) == 2);
35             csl::copyMatToTensor<T>(weights, weightsTensor, stream);
36
37             if (!bias.empty())
38             {
39                 biasTensor = csl::makeTensorHeader<T>(bias);
40                 csl::copyMatToTensor<T>(bias, biasTensor, stream);
41                 CV_Assert(weightsTensor.get_axis_size(-2) == biasTensor.size());
42             }
43         }
44
45         void forward(
46             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
47             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
48             csl::Workspace& workspace) override
49         {
50             for (int i = 0; i < inputs.size(); i++)
51             {
52                 auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
53                 auto input = input_wrapper->getView();
54
55                 auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
56                 auto output = output_wrapper->getSpan();
57
58                 std::size_t batch_size = input.size_range(0, axis);
59
60                 auto input_size = input.size() / batch_size;
61                 CV_Assert(input_size == weightsTensor.get_axis_size(-1));
62
63                 auto output_size = output.size() / batch_size;
64                 CV_Assert(output_size == weightsTensor.get_axis_size(-2));
65
66                 /* we treat the input and output as a matrix with dimensions (batch_size, input_size)
67                  * and (batch_size, output_size) respectively
68                  *
69                  * weight matrix dimensions: (output_size, input_size)
70                  *
71                  * I(W^T) = O
72                  * (batch_size, input_size) * (input_size, output_size) = (batch_size, output_size)
73                  */
74                 input.reshape(batch_size, input_size);
75                 output.reshape(batch_size, output_size);
76                 csl::tensor_ops::gemm<T>(cublasHandle, 0.0, output, 1.0, false, input, true, weightsTensor);
77
78                 if (!biasTensor.empty())
79                     kernels::biasN<T>(stream, output, output, 1, biasTensor);
80             }
81         }
82
83     private:
84         csl::Stream stream;
85         csl::cublas::Handle cublasHandle;
86         csl::Tensor<T> weightsTensor, biasTensor;
87         std::size_t axis;
88     };
89
90 }}} /* namespace cv::dnn::cuda4dnn */
91
92 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INNER_PRODUCT_HPP */