inference-engine/thirdparty/clDNN/api/detection_output.hpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #pragma once
  19 #include <limits>
  20 #include "primitive.hpp"
  21
  22 namespace cldnn {
  23 /// @addtogroup cpp_api C++ API
  24 /// @{
  25 /// @addtogroup cpp_topology Network Topology
  26 /// @{
  27 /// @addtogroup cpp_primitives Primitives
  28 /// @{
  29
  30 /// @brief Select method for coding the prior-boxes in the @ref detection output layer.
  31 enum class prior_box_code_type : int32_t {
  32     corner,
  33     center_size,
  34     corner_size
  35 };
  36
  37 /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
  38 /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
  39 /// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
  40 struct detection_output : public primitive_base<detection_output> {
  41     CLDNN_DECLARE_PRIMITIVE(detection_output)
  42
  43     /// @brief Constructs detection output primitive.
  44     /// @param id This primitive id.
  45     /// @param input_location Input location primitive id.
  46     /// @param input_confidence Input confidence primitive id.
  47     /// @param input_prior_box Input prior-box primitive id.
  48     /// @param num_classes Number of classes to be predicted.
  49     /// @param keep_top_k Number of total bounding boxes to be kept per image after NMS step.
  50     /// @param share_location If true bounding box are shared among different classes.
  51     /// @param background_label_id Background label id (-1 if there is no background class).
  52     /// @param nms_threshold Threshold for NMS step.
  53     /// @param top_k Maximum number of results to be kept in NMS.
  54     /// @param eta Used for adaptive NMS.
  55     /// @param code_type Type of coding method for bounding box.
  56     /// @param variance_encoded_in_target If true, variance is encoded in target; otherwise we need to adjust the predicted offset accordingly.
  57     /// @param confidence_threshold Only keep detections with confidences larger than this threshold.
  58     detection_output(const primitive_id& id,
  59                      const primitive_id& input_location,
  60                      const primitive_id& input_confidence,
  61                      const primitive_id& input_prior_box,
  62                      const uint32_t num_classes,
  63                      const uint32_t keep_top_k,
  64                      const bool share_location = true,
  65                      const int background_label_id = 0,
  66                      const float nms_threshold = 0.3,
  67                      const int top_k = -1,
  68                      const float eta = 1.f,
  69                      const prior_box_code_type code_type = prior_box_code_type::corner,
  70                      const bool variance_encoded_in_target = false,
  71                      const float confidence_threshold = -std::numeric_limits<float>::max(),
  72                      const int32_t prior_info_size = 4,
  73                      const int32_t prior_coordinates_offset = 0,
  74                      const bool prior_is_normalized = true,
  75                      const int32_t input_width = -1,
  76                      const int32_t input_height = -1,
  77                      const bool decrease_label_id = false,
  78                      const bool clip_before_nms = false,
  79                      const bool clip_after_nms = false,
  80                      const padding& output_padding = padding())
  81         : primitive_base(id, {input_location, input_confidence, input_prior_box}, output_padding),
  82           num_classes(num_classes),
  83           keep_top_k(keep_top_k),
  84           share_location(share_location),
  85           background_label_id(background_label_id),
  86           nms_threshold(nms_threshold),
  87           top_k(top_k),
  88           eta(eta),
  89           code_type(code_type),
  90           variance_encoded_in_target(variance_encoded_in_target),
  91           confidence_threshold(confidence_threshold),
  92           prior_info_size(prior_info_size),
  93           prior_coordinates_offset(prior_coordinates_offset),
  94           prior_is_normalized(prior_is_normalized),
  95           input_width(input_width),
  96           input_height(input_height),
  97           decrease_label_id(decrease_label_id),
  98           clip_before_nms(clip_before_nms),
  99           clip_after_nms(clip_after_nms) {
 100         if (decrease_label_id && background_label_id != 0)
 101             throw std::invalid_argument(
 102                 "Cannot use decrease_label_id and background_label_id parameter simultaneously.");
 103     }
 104
 105     /// @brief Number of classes to be predicted.
 106     const uint32_t num_classes;
 107     /// @brief Number of total bounding boxes to be kept per image after NMS step.
 108     const int keep_top_k;
 109     /// @brief If true, bounding box are shared among different classes.
 110     const bool share_location;
 111     /// @brief Background label id (-1 if there is no background class).
 112     const int background_label_id;
 113     /// @brief Threshold for NMS step.
 114     const float nms_threshold;
 115     /// @brief Maximum number of results to be kept in NMS.
 116     const int top_k;
 117     /// @brief Used for adaptive NMS.
 118     const float eta;
 119     /// @brief Type of coding method for bounding box.
 120     const prior_box_code_type code_type;
 121     /// @brief If true, variance is encoded in target; otherwise we need to adjust the predicted offset accordingly.
 122     const bool variance_encoded_in_target;
 123     /// @brief Only keep detections with confidences larger than this threshold.
 124     const float confidence_threshold;
 125     /// @brief Number of elements in a single prior description (4 if priors calculated using PriorBox layer, 5 - if Proposal)
 126     const int32_t prior_info_size;
 127     /// @brief Offset of the box coordinates w.r.t. the beginning of a prior info record
 128     const int32_t prior_coordinates_offset;
 129     /// @brief If true, priors are normalized to [0; 1] range.
 130     const bool prior_is_normalized;
 131     /// @brief Width of input image.
 132     const int32_t input_width;
 133     /// @brief Height of input image.
 134     const int32_t input_height;
 135     /// @brief Decrease label id to skip background label equal to 0. Can't be used simultaneously with background_label_id.
 136     const bool decrease_label_id;
 137     /// @brief Clip decoded boxes right after decoding
 138     const bool clip_before_nms;
 139     /// @brief Clip decoded boxes after nms step
 140     const bool clip_after_nms;
 141
 142 protected:
 143 };
 144
 145 /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
 146 /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
 147 /// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
 148 struct detection_output_sort
 149     : public primitive_base<detection_output_sort> {
 150     CLDNN_DECLARE_PRIMITIVE(detection_output_sort)
 151
 152     /// @brief Constructs detection output primitive.
 153     /// @param id This primitive id.
 154     /// @param input_bboxes Input bounding boxes primitive id.
 155     /// @param num_images Number of images to be predicted.
 156     /// @param num_classes Number of classes to be predicted.
 157     /// @param keep_top_k Number of total bounding boxes to be kept per image after NMS step.
 158     /// @param share_location If true bounding box are shared among different classes.
 159     /// @param top_k Maximum number of results to be kept in NMS.
 160     /// @param output_padding Output padding.
 161     detection_output_sort(const primitive_id& id,
 162                           const primitive_id& input_bboxes,
 163                           const uint32_t num_images,
 164                           const uint32_t num_classes,
 165                           const uint32_t keep_top_k,
 166                           const bool share_location = true,
 167                           const int top_k = -1,
 168                           const int background_label_id = -1,
 169                           const padding& output_padding = padding())
 170         : primitive_base(id, {input_bboxes}, output_padding),
 171           num_images(num_images),
 172           num_classes(num_classes),
 173           keep_top_k(keep_top_k),
 174           share_location(share_location),
 175           top_k(top_k),
 176           background_label_id(background_label_id) {}
 177
 178     /// @brief Number of classes to be predicted.
 179     const uint32_t num_images;
 180     /// @brief Number of classes to be predicted.
 181     const uint32_t num_classes;
 182     /// @brief Number of total bounding boxes to be kept per image after NMS step.
 183     const int keep_top_k;
 184     /// @brief If true, bounding box are shared among different classes.
 185     const bool share_location;
 186     /// @brief Maximum number of results to be kept in NMS.
 187     const int top_k;
 188     /// @brief Background label id (-1 if there is no background class).
 189     const int background_label_id;
 190 };
 191 /// @}
 192 /// @}
 193 /// @}
 194 }  // namespace cldnn