modules/dnn/src/cuda4dnn/primitives/region.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP
   6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP
   7
   8 #include "../../op_cuda.hpp"
   9
  10 #include "../csl/stream.hpp"
  11 #include "../csl/cudnn.hpp"
  12 #include "../csl/tensor_ops.hpp"
  13
  14 #include "../kernels/region.hpp"
  15
  16 #include "../../nms.inl.hpp"
  17
  18 #include <opencv2/core.hpp>
  19
  20 #include <cstddef>
  21 #include <utility>
  22 #include <vector>
  23
  24 namespace cv { namespace dnn { namespace cuda4dnn {
  25
  26     enum class SquashMethod {
  27         SOFTMAX,
  28         SIGMOID
  29     };
  30
  31     template <class T>
  32     struct RegionConfiguration {
  33         /* The image is divided into (H, W) cells.
  34          *
  35          * Each cell is interested in exactly one object and predicts `boxes_per_cell` bounding boxes
  36          * for that object.
  37          *
  38          * Each bounding box contains:
  39          * - 4 box coordinates
  40          * - objectness confidence score
  41          * - `classes` number of class scores
  42          *
  43          * The object score is reduced to a probability using sigmoid and the class scores are reduced to
  44          * probabilities by either applying sigmoid or softmax (which is a configuration option).
  45          *
  46          * object_prob = sigmoid(object_score)
  47          * conditional_class_prob = sigmoid, softmax across all classes
  48          *
  49          * actual class probability = conditional_class_prob * object_prob
  50          */
  51
  52         /* method for reducing class scores to probabilities */
  53         SquashMethod squash_method;
  54
  55         std::size_t classes, boxes_per_cell;
  56
  57         std::size_t width_norm, height_norm;
  58
  59         /* prob cutoffs below which the prediction is nulled */
  60         T object_prob_cutoff;
  61         T class_prob_cutoff;
  62
  63         T nms_iou_threshold;
  64     };
  65
  66     template <class T>
  67     class RegionOp final : public CUDABackendNode {
  68     public:
  69         using wrapper_type = GetCUDABackendWrapperType<T>;
  70
  71         template <class V>
  72         RegionOp(csl::Stream stream_, const cv::Mat& bias, const RegionConfiguration<V>& config)
  73             : stream(std::move(stream_))
  74         {
  75             biasTensor = csl::makeTensorHeader<T>(bias);
  76             csl::copyMatToTensor<T>(bias, biasTensor, stream);
  77
  78             classes = config.classes;
  79             boxes_per_cell = config.boxes_per_cell;
  80
  81             width_norm = config.width_norm;
  82             height_norm = config.height_norm;
  83
  84             squash_type = config.squash_method;
  85
  86             object_prob_cutoff = config.object_prob_cutoff;
  87             class_prob_cutoff = config.class_prob_cutoff;
  88
  89             nms_iou_threshold = config.nms_iou_threshold;
  90         }
  91
  92         void forward(
  93             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
  94             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
  95             csl::Workspace& workspace) override
  96         {
  97             CV_Assert(outputs.size() == 1);
  98
  99             auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
 100             auto input = input_wrapper->getView();
 101
 102             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
 103             auto output = output_wrapper->getSpan();
 104
 105             csl::memcpy<T>(output.get(), input.get(), output.size(), stream);
 106
 107             auto rows = input.get_axis_size(1);
 108             auto cols = input.get_axis_size(2);
 109
 110             auto cell_box_size = classes + 4 + 1;
 111
 112             /* we squash class scores into probabilities using softmax or sigmoid */
 113             if (squash_type == SquashMethod::SOFTMAX)
 114                 kernels::softmax_strided<T>(stream, output, input, classes, cell_box_size, 5);
 115             else if (squash_type == SquashMethod::SIGMOID)
 116                 kernels::sigmoid_strided<T>(stream, output, input, classes, cell_box_size, 5);
 117
 118             kernels::region_finalize<T>(stream, output, input, biasTensor, object_prob_cutoff, class_prob_cutoff,
 119                 height_norm, width_norm, rows, cols, boxes_per_cell, cell_box_size, classes);
 120
 121             if (nms_iou_threshold > 0) {
 122                 auto output_mat = output_wrapper->getMutableHostMat();
 123                 CV_Assert(output_mat.type() == CV_32F);
 124                 for (int i = 0; i < input.get_axis_size(0); i++) {
 125                     auto sample_size = rows * cols * boxes_per_cell * cell_box_size;
 126                     do_nms_sort(reinterpret_cast<float*>(output_mat.data) + i * sample_size, rows * cols * boxes_per_cell, class_prob_cutoff, nms_iou_threshold);
 127                 }
 128             }
 129         }
 130
 131     private:
 132         void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
 133         {
 134             std::vector<Rect2d> boxes(total);
 135             std::vector<float> scores(total);
 136
 137             for (int i = 0; i < total; ++i)
 138             {
 139                 Rect2d &b = boxes[i];
 140                 int box_index = i * (classes + 4 + 1);
 141                 b.width = detections[box_index + 2];
 142                 b.height = detections[box_index + 3];
 143                 b.x = detections[box_index + 0] - b.width / 2;
 144                 b.y = detections[box_index + 1] - b.height / 2;
 145             }
 146
 147             std::vector<int> indices;
 148             for (int k = 0; k < classes; ++k)
 149             {
 150                 for (int i = 0; i < total; ++i)
 151                 {
 152                     int box_index = i * (classes + 4 + 1);
 153                     int class_index = box_index + 5;
 154                     scores[i] = detections[class_index + k];
 155                     detections[class_index + k] = 0;
 156                 }
 157                 NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
 158                 for (int i = 0, n = indices.size(); i < n; ++i)
 159                 {
 160                     int box_index = indices[i] * (classes + 4 + 1);
 161                     int class_index = box_index + 5;
 162                     detections[class_index + k] = scores[indices[i]];
 163                 }
 164             }
 165         }
 166
 167     private:
 168         csl::Stream stream;
 169
 170         csl::Tensor<T> biasTensor;
 171         std::size_t classes, boxes_per_cell;
 172         std::size_t width_norm, height_norm;
 173         SquashMethod squash_type;
 174
 175         T object_prob_cutoff, class_prob_cutoff;
 176         T nms_iou_threshold;
 177     };
 178
 179 }}} /* namespace cv::dnn::cuda4dnn */
 180
 181 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP */