2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
20 #include "primitive.hpp"
23 /// @addtogroup cpp_api C++ API
25 /// @addtogroup cpp_topology Network Topology
27 /// @addtogroup cpp_primitives Primitives
30 /// @brief Select method for coding the prior-boxes in the @ref detection output layer.
31 enum class prior_box_code_type : int32_t {
37 /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
38 /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
39 /// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
40 struct detection_output : public primitive_base<detection_output> {
41 CLDNN_DECLARE_PRIMITIVE(detection_output)
43 /// @brief Constructs detection output primitive.
44 /// @param id This primitive id.
45 /// @param input_location Input location primitive id.
46 /// @param input_confidence Input confidence primitive id.
47 /// @param input_prior_box Input prior-box primitive id.
48 /// @param num_classes Number of classes to be predicted.
49 /// @param keep_top_k Number of total bounding boxes to be kept per image after NMS step.
50 /// @param share_location If true bounding box are shared among different classes.
51 /// @param background_label_id Background label id (-1 if there is no background class).
52 /// @param nms_threshold Threshold for NMS step.
53 /// @param top_k Maximum number of results to be kept in NMS.
54 /// @param eta Used for adaptive NMS.
55 /// @param code_type Type of coding method for bounding box.
56 /// @param variance_encoded_in_target If true, variance is encoded in target; otherwise we need to adjust the predicted offset accordingly.
57 /// @param confidence_threshold Only keep detections with confidences larger than this threshold.
58 detection_output(const primitive_id& id,
59 const primitive_id& input_location,
60 const primitive_id& input_confidence,
61 const primitive_id& input_prior_box,
62 const uint32_t num_classes,
63 const uint32_t keep_top_k,
64 const bool share_location = true,
65 const int background_label_id = 0,
66 const float nms_threshold = 0.3,
68 const float eta = 1.f,
69 const prior_box_code_type code_type = prior_box_code_type::corner,
70 const bool variance_encoded_in_target = false,
71 const float confidence_threshold = -std::numeric_limits<float>::max(),
72 const int32_t prior_info_size = 4,
73 const int32_t prior_coordinates_offset = 0,
74 const bool prior_is_normalized = true,
75 const int32_t input_width = -1,
76 const int32_t input_height = -1,
77 const bool decrease_label_id = false,
78 const bool clip_before_nms = false,
79 const bool clip_after_nms = false,
80 const padding& output_padding = padding())
81 : primitive_base(id, {input_location, input_confidence, input_prior_box}, output_padding),
82 num_classes(num_classes),
83 keep_top_k(keep_top_k),
84 share_location(share_location),
85 background_label_id(background_label_id),
86 nms_threshold(nms_threshold),
90 variance_encoded_in_target(variance_encoded_in_target),
91 confidence_threshold(confidence_threshold),
92 prior_info_size(prior_info_size),
93 prior_coordinates_offset(prior_coordinates_offset),
94 prior_is_normalized(prior_is_normalized),
95 input_width(input_width),
96 input_height(input_height),
97 decrease_label_id(decrease_label_id),
98 clip_before_nms(clip_before_nms),
99 clip_after_nms(clip_after_nms) {
100 if (decrease_label_id && background_label_id != 0)
101 throw std::invalid_argument(
102 "Cannot use decrease_label_id and background_label_id parameter simultaneously.");
105 /// @brief Number of classes to be predicted.
106 const uint32_t num_classes;
107 /// @brief Number of total bounding boxes to be kept per image after NMS step.
108 const int keep_top_k;
109 /// @brief If true, bounding box are shared among different classes.
110 const bool share_location;
111 /// @brief Background label id (-1 if there is no background class).
112 const int background_label_id;
113 /// @brief Threshold for NMS step.
114 const float nms_threshold;
115 /// @brief Maximum number of results to be kept in NMS.
117 /// @brief Used for adaptive NMS.
119 /// @brief Type of coding method for bounding box.
120 const prior_box_code_type code_type;
121 /// @brief If true, variance is encoded in target; otherwise we need to adjust the predicted offset accordingly.
122 const bool variance_encoded_in_target;
123 /// @brief Only keep detections with confidences larger than this threshold.
124 const float confidence_threshold;
125 /// @brief Number of elements in a single prior description (4 if priors calculated using PriorBox layer, 5 - if Proposal)
126 const int32_t prior_info_size;
127 /// @brief Offset of the box coordinates w.r.t. the beginning of a prior info record
128 const int32_t prior_coordinates_offset;
129 /// @brief If true, priors are normalized to [0; 1] range.
130 const bool prior_is_normalized;
131 /// @brief Width of input image.
132 const int32_t input_width;
133 /// @brief Height of input image.
134 const int32_t input_height;
135 /// @brief Decrease label id to skip background label equal to 0. Can't be used simultaneously with background_label_id.
136 const bool decrease_label_id;
137 /// @brief Clip decoded boxes right after decoding
138 const bool clip_before_nms;
139 /// @brief Clip decoded boxes after nms step
140 const bool clip_after_nms;
145 /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
146 /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
147 /// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
148 struct detection_output_sort
149 : public primitive_base<detection_output_sort> {
150 CLDNN_DECLARE_PRIMITIVE(detection_output_sort)
152 /// @brief Constructs detection output primitive.
153 /// @param id This primitive id.
154 /// @param input_bboxes Input bounding boxes primitive id.
155 /// @param num_images Number of images to be predicted.
156 /// @param num_classes Number of classes to be predicted.
157 /// @param keep_top_k Number of total bounding boxes to be kept per image after NMS step.
158 /// @param share_location If true bounding box are shared among different classes.
159 /// @param top_k Maximum number of results to be kept in NMS.
160 /// @param output_padding Output padding.
161 detection_output_sort(const primitive_id& id,
162 const primitive_id& input_bboxes,
163 const uint32_t num_images,
164 const uint32_t num_classes,
165 const uint32_t keep_top_k,
166 const bool share_location = true,
167 const int top_k = -1,
168 const int background_label_id = -1,
169 const padding& output_padding = padding())
170 : primitive_base(id, {input_bboxes}, output_padding),
171 num_images(num_images),
172 num_classes(num_classes),
173 keep_top_k(keep_top_k),
174 share_location(share_location),
176 background_label_id(background_label_id) {}
178 /// @brief Number of classes to be predicted.
179 const uint32_t num_images;
180 /// @brief Number of classes to be predicted.
181 const uint32_t num_classes;
182 /// @brief Number of total bounding boxes to be kept per image after NMS step.
183 const int keep_top_k;
184 /// @brief If true, bounding box are shared among different classes.
185 const bool share_location;
186 /// @brief Maximum number of results to be kept in NMS.
188 /// @brief Background label id (-1 if there is no background class).
189 const int background_label_id;