modules/dnn/src/cuda4dnn/primitives/inner_product.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INNER_PRODUCT_HPP
   6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INNER_PRODUCT_HPP
   7
   8 #include "../../op_cuda.hpp"
   9
  10 #include "../csl/stream.hpp"
  11 #include "../csl/cublas.hpp"
  12 #include "../csl/tensor.hpp"
  13 #include "../csl/tensor_ops.hpp"
  14
  15 #include "../kernels/scale_shift.hpp"
  16
  17 #include <opencv2/core.hpp>
  18
  19 #include <cstddef>
  20 #include <vector>
  21 #include <utility>
  22
  23 namespace cv { namespace dnn { namespace cuda4dnn {
  24
  25     template <class T>
  26     class InnerProductOp final : public CUDABackendNode {
  27     public:
  28         using wrapper_type = GetCUDABackendWrapperType<T>;
  29
  30         InnerProductOp(csl::Stream stream_, csl::cublas::Handle handle, std::size_t axis, const Mat& weights, const Mat& bias)
  31             : stream(std::move(stream_)), cublasHandle(std::move(handle)), axis{ axis }
  32         {
  33             weightsTensor = csl::makeTensorHeader<T>(weights);
  34             CV_Assert(get_effective_rank(weightsTensor) == 2);
  35             csl::copyMatToTensor<T>(weights, weightsTensor, stream);
  36
  37             if (!bias.empty())
  38             {
  39                 biasTensor = csl::makeTensorHeader<T>(bias);
  40                 csl::copyMatToTensor<T>(bias, biasTensor, stream);
  41                 CV_Assert(weightsTensor.get_axis_size(-2) == biasTensor.size());
  42             }
  43         }
  44
  45         void forward(
  46             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
  47             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
  48             csl::Workspace& workspace) override
  49         {
  50             for (int i = 0; i < inputs.size(); i++)
  51             {
  52                 auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
  53                 auto input = input_wrapper->getView();
  54
  55                 auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
  56                 auto output = output_wrapper->getSpan();
  57
  58                 std::size_t batch_size = input.size_range(0, axis);
  59
  60                 auto input_size = input.size() / batch_size;
  61                 CV_Assert(input_size == weightsTensor.get_axis_size(-1));
  62
  63                 auto output_size = output.size() / batch_size;
  64                 CV_Assert(output_size == weightsTensor.get_axis_size(-2));
  65
  66                 /* we treat the input and output as a matrix with dimensions (batch_size, input_size)
  67                  * and (batch_size, output_size) respectively
  68                  *
  69                  * weight matrix dimensions: (output_size, input_size)
  70                  *
  71                  * I(W^T) = O
  72                  * (batch_size, input_size) * (input_size, output_size) = (batch_size, output_size)
  73                  */
  74                 input.reshape(batch_size, input_size);
  75                 output.reshape(batch_size, output_size);
  76                 csl::tensor_ops::gemm<T>(cublasHandle, 0.0, output, 1.0, false, input, true, weightsTensor);
  77
  78                 if (!biasTensor.empty())
  79                     kernels::biasN<T>(stream, output, output, 1, biasTensor);
  80             }
  81         }
  82
  83     private:
  84         csl::Stream stream;
  85         csl::cublas::Handle cublasHandle;
  86         csl::Tensor<T> weightsTensor, biasTensor;
  87         std::size_t axis;
  88     };
  89
  90 }}} /* namespace cv::dnn::cuda4dnn */
  91
  92 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INNER_PRODUCT_HPP */