modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_NORMALIZE_BBOX_HPP
   6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_NORMALIZE_BBOX_HPP
   7
   8 #include "../../op_cuda.hpp"
   9
  10 #include "../csl/stream.hpp"
  11 #include "../csl/span.hpp"
  12 #include "../csl/tensor.hpp"
  13 #include "../csl/workspace.hpp"
  14
  15 #include "../kernels/scale_shift.hpp"
  16 #include "../kernels/normalize.hpp"
  17
  18 #include <opencv2/core.hpp>
  19
  20 #include <cstddef>
  21 #include <vector>
  22 #include <utility>
  23
  24 namespace cv { namespace dnn { namespace cuda4dnn {
  25
  26     template <class T>
  27     struct NormalizeConfiguration {
  28         std::vector<std::size_t> input_shape;
  29
  30         /* axis range across which values are normalized
  31          *
  32          * [0, axis_start) = outer range
  33          * [axis_start, axis_end) = mid range
  34          * [axis_end + 1, -1) = inner range
  35          *
  36          * for each location in the outer and inner range, all the values in the mid range are
  37          * normalized together
  38          */
  39         std::size_t axis_start, axis_end;
  40
  41         /* 1 for L1 norm, 2 for L2 norm */
  42         std::size_t norm;
  43
  44         /* epsilon to use to avoid divison by zero */
  45         T eps;
  46     };
  47
  48     template <class T>
  49     class NormalizeOp final : public CUDABackendNode {
  50     public:
  51         using wrapper_type = GetCUDABackendWrapperType<T>;
  52
  53         template <class V>
  54         NormalizeOp(csl::Stream stream_, const Mat& weights, const NormalizeConfiguration<V>& config)
  55             : stream(std::move(stream_)), weight{ 1.0 }
  56         {
  57             norm_order = config.norm;
  58             epsilon = config.eps;
  59             axis_start = config.axis_start;
  60             axis_end = config.axis_end;
  61
  62             if (!weights.empty())
  63             {
  64                 if (weights.total() == 1)
  65                 {
  66                     CV_Assert(weights.type() == CV_32F);
  67                     weight = weights.at<float>(0, 0);
  68                 }
  69                 else
  70                 {
  71                     weightsTensor = csl::makeTensorHeader<T>(weights);
  72                     csl::copyMatToTensor<T>(weights, weightsTensor, stream);
  73                 }
  74             }
  75
  76             std::size_t outer_size = 1;
  77             for (int i = 0; i < axis_start; i++)
  78                 outer_size *= config.input_shape[i];
  79
  80             std::size_t inner_size = 1;
  81             for (int i = axis_end; i < config.input_shape.size(); i++)
  82                 inner_size *= config.input_shape[i];
  83
  84             csl::WorkspaceBuilder builder;
  85             builder.require<T>(outer_size * inner_size);
  86             scratch_mem_in_bytes = builder.required_workspace_size();
  87         }
  88
  89         void forward(
  90             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
  91             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
  92             csl::Workspace& workspace) override
  93         {
  94             CV_Assert(inputs.size() == 1 && outputs.size() == 1);
  95
  96             auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
  97             auto input = input_wrapper->getView();
  98
  99             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
 100             auto output = output_wrapper->getSpan();
 101
 102             std::size_t outer_size = input.size_range(0, axis_start);
 103             std::size_t mid_size = input.size_range(axis_start, axis_end);
 104             std::size_t inner_size = input.size_range(axis_end, input.rank());
 105
 106             auto ws_allocator = csl::WorkspaceAllocator(workspace);
 107             auto scratch = ws_allocator.get_span<T>();
 108             kernels::normalize<T>(stream, output, input, outer_size, mid_size, inner_size, norm_order, epsilon, scratch);
 109
 110             /* there might be a single weight in which case `weight` will be not equal to 1.0
 111              * or there might be several weights
 112              * or we don't have to scale
 113              */
 114             if (weight != 1.0)
 115             {
 116                 kernels::scale1<T>(stream, output, input, weight);
 117             }
 118             else if (!weightsTensor.empty())
 119             {
 120                 CV_Assert(weightsTensor.size() != 1); /* constructor should have set up to use `weight` */
 121                 CV_Assert(weightsTensor.size() == mid_size);
 122                 kernels::scaleN<T>(stream, output, input, inner_size, weightsTensor);
 123             }
 124         }
 125
 126         std::size_t get_workspace_memory_in_bytes() const noexcept override { return scratch_mem_in_bytes; }
 127
 128     private:
 129         csl::Stream stream;
 130         csl::Tensor<T> weightsTensor;
 131         T weight; /* if there is only one weight, we use this */
 132
 133         T epsilon;
 134         std::size_t norm_order;
 135         std::size_t axis_start, axis_end;
 136
 137         std::size_t scratch_mem_in_bytes;
 138     };
 139
 140 }}} /* namespace cv::dnn::cuda4dnn */
 141
 142 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_NORMALIZE_BBOX_HPP */