2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
20 #include "../C/detection_output.h"
21 #include "../C/detection_output_sort.h"
22 #include "primitive.hpp"
26 /// @addtogroup cpp_api C++ API
28 /// @addtogroup cpp_topology Network Topology
30 /// @addtogroup cpp_primitives Primitives
33 /// @brief Select method for coding the prior-boxes in the @ref detection output layer.
34 enum class prior_box_code_type : int32_t
36 corner = cldnn_code_type_corner,
37 center_size = cldnn_code_type_center_size,
38 corner_size = cldnn_code_type_corner_size
41 /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
42 /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
43 /// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
44 struct detection_output : public primitive_base<detection_output, CLDNN_PRIMITIVE_DESC(detection_output)>
46 CLDNN_DECLARE_PRIMITIVE(detection_output)
48 /// @brief Constructs detection output primitive.
49 /// @param id This primitive id.
50 /// @param input_location Input location primitive id.
51 /// @param input_confidence Input confidence primitive id.
52 /// @param input_prior_box Input prior-box primitive id.
53 /// @param num_classes Number of classes to be predicted.
54 /// @param keep_top_k Number of total bounding boxes to be kept per image after NMS step.
55 /// @param share_location If true bounding box are shared among different classes.
56 /// @param background_label_id Background label id (-1 if there is no background class).
57 /// @param nms_threshold Threshold for NMS step.
58 /// @param top_k Maximum number of results to be kept in NMS.
59 /// @param eta Used for adaptive NMS.
60 /// @param code_type Type of coding method for bounding box.
61 /// @param variance_encoded_in_target If true, variance is encoded in target; otherwise we need to adjust the predicted offset accordingly.
62 /// @param confidence_threshold Only keep detections with confidences larger than this threshold.
64 const primitive_id& id,
65 const primitive_id& input_location,
66 const primitive_id& input_confidence,
67 const primitive_id& input_prior_box,
68 const uint32_t num_classes,
69 const uint32_t keep_top_k,
70 const bool share_location = true,
71 const int background_label_id = 0,
72 const float nms_threshold = 0.3,
74 const float eta = 1.f,
75 const prior_box_code_type code_type = prior_box_code_type::corner,
76 const bool variance_encoded_in_target = false,
77 const float confidence_threshold = -std::numeric_limits<float>::max(),
78 const int32_t prior_info_size = 4,
79 const int32_t prior_coordinates_offset = 0,
80 const bool prior_is_normalized = true,
81 const int32_t input_width = -1,
82 const int32_t input_height = -1,
83 const bool decrease_label_id = false,
84 const bool clip_before_nms = false,
85 const bool clip_after_nms = false,
86 const padding& output_padding = padding()
88 : primitive_base(id, { input_location, input_confidence, input_prior_box }, output_padding)
89 , num_classes(num_classes)
90 , keep_top_k(keep_top_k)
91 , share_location(share_location)
92 , background_label_id(background_label_id)
93 , nms_threshold(nms_threshold)
96 , code_type(code_type)
97 , variance_encoded_in_target(variance_encoded_in_target)
98 , confidence_threshold(confidence_threshold)
99 , prior_info_size(prior_info_size)
100 , prior_coordinates_offset(prior_coordinates_offset)
101 , prior_is_normalized(prior_is_normalized)
102 , input_width(input_width)
103 , input_height(input_height)
104 , decrease_label_id(decrease_label_id)
105 , clip_before_nms(clip_before_nms)
106 , clip_after_nms(clip_after_nms)
108 if (decrease_label_id && background_label_id != 0)
109 throw std::invalid_argument("Cannot use decrease_label_id and background_label_id parameter simultaneously.");
112 /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{detection_output}
113 detection_output(const dto* dto)
114 : primitive_base(dto)
115 , num_classes(dto->num_classes)
116 , keep_top_k(dto->keep_top_k)
117 , share_location(dto->share_location != 0)
118 , background_label_id(dto->background_label_id)
119 , nms_threshold(dto->nms_threshold)
122 , code_type(static_cast<prior_box_code_type>(dto->code_type))
123 , variance_encoded_in_target(dto->variance_encoded_in_target != 0)
124 , confidence_threshold(dto->confidence_threshold)
125 , prior_info_size(dto->prior_info_size)
126 , prior_coordinates_offset(dto->prior_coordinates_offset)
127 , prior_is_normalized(dto->prior_is_normalized != 0)
128 , input_width(dto->input_width)
129 , input_height(dto->input_height)
130 , decrease_label_id(dto->decrease_label_id != 0)
131 , clip_before_nms(dto->clip_before_nms != 0)
132 , clip_after_nms(dto->clip_after_nms != 0)
134 if (decrease_label_id && background_label_id != 0)
135 throw std::invalid_argument("Cannot use decrease_label_id and background_label_id parameter simultaneously.");
138 /// @brief Number of classes to be predicted.
139 const uint32_t num_classes;
140 /// @brief Number of total bounding boxes to be kept per image after NMS step.
141 const int keep_top_k;
142 /// @brief If true, bounding box are shared among different classes.
143 const bool share_location;
144 /// @brief Background label id (-1 if there is no background class).
145 const int background_label_id;
146 /// @brief Threshold for NMS step.
147 const float nms_threshold;
148 /// @brief Maximum number of results to be kept in NMS.
150 /// @brief Used for adaptive NMS.
152 /// @brief Type of coding method for bounding box.
153 const prior_box_code_type code_type;
154 /// @brief If true, variance is encoded in target; otherwise we need to adjust the predicted offset accordingly.
155 const bool variance_encoded_in_target;
156 /// @brief Only keep detections with confidences larger than this threshold.
157 const float confidence_threshold;
158 /// @brief Number of elements in a single prior description (4 if priors calculated using PriorBox layer, 5 - if Proposal)
159 const int32_t prior_info_size;
160 /// @brief Offset of the box coordinates w.r.t. the beginning of a prior info record
161 const int32_t prior_coordinates_offset;
162 /// @brief If true, priors are normalized to [0; 1] range.
163 const bool prior_is_normalized;
164 /// @brief Width of input image.
165 const int32_t input_width;
166 /// @brief Height of input image.
167 const int32_t input_height;
168 /// @brief Decrease label id to skip background label equal to 0. Can't be used simultaneously with background_label_id.
169 const bool decrease_label_id;
170 /// @brief Clip decoded boxes right after decoding
171 const bool clip_before_nms;
172 /// @brief Clip decoded boxes after nms step
173 const bool clip_after_nms;
176 void update_dto(dto& dto) const override
178 dto.num_classes = num_classes;
179 dto.share_location = share_location;
180 dto.background_label_id = background_label_id;
181 dto.nms_threshold = nms_threshold;
184 dto.code_type = static_cast<int32_t>(code_type);
185 dto.variance_encoded_in_target = variance_encoded_in_target;
186 dto.keep_top_k = keep_top_k;
187 dto.confidence_threshold = confidence_threshold;
188 dto.prior_info_size = prior_info_size;
189 dto.prior_coordinates_offset = prior_coordinates_offset;
190 dto.prior_is_normalized = prior_is_normalized;
191 dto.input_width = input_width;
192 dto.input_height = input_height;
193 dto.decrease_label_id = decrease_label_id;
194 dto.clip_before_nms = clip_before_nms;
195 dto.clip_after_nms = clip_after_nms;
199 /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
200 /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
201 /// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
202 struct detection_output_sort : public primitive_base<detection_output_sort, CLDNN_PRIMITIVE_DESC(detection_output_sort)>
204 CLDNN_DECLARE_PRIMITIVE(detection_output_sort)
206 /// @brief Constructs detection output primitive.
207 /// @param id This primitive id.
208 /// @param input_bboxes Input bounding boxes primitive id.
209 /// @param num_images Number of images to be predicted.
210 /// @param num_classes Number of classes to be predicted.
211 /// @param keep_top_k Number of total bounding boxes to be kept per image after NMS step.
212 /// @param share_location If true bounding box are shared among different classes.
213 /// @param top_k Maximum number of results to be kept in NMS.
214 /// @param output_padding Output padding.
215 detection_output_sort(
216 const primitive_id& id,
217 const primitive_id& input_bboxes,
218 const uint32_t num_images,
219 const uint32_t num_classes,
220 const uint32_t keep_top_k,
221 const bool share_location = true,
222 const int top_k = -1,
223 const int background_label_id = -1,
224 const padding& output_padding = padding()
226 : primitive_base(id, { input_bboxes }, output_padding)
227 , num_images(num_images)
228 , num_classes(num_classes)
229 , keep_top_k(keep_top_k)
230 , share_location(share_location)
232 , background_label_id(background_label_id)
235 /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{detection_output}
236 detection_output_sort(const dto* dto)
237 : primitive_base(dto)
238 , num_images(dto->num_images)
239 , num_classes(dto->num_classes)
240 , keep_top_k(dto->keep_top_k)
241 , share_location(dto->share_location != 0)
243 , background_label_id(dto->background_label_id)
246 /// @brief Number of classes to be predicted.
247 const uint32_t num_images;
248 /// @brief Number of classes to be predicted.
249 const uint32_t num_classes;
250 /// @brief Number of total bounding boxes to be kept per image after NMS step.
251 const int keep_top_k;
252 /// @brief If true, bounding box are shared among different classes.
253 const bool share_location;
254 /// @brief Maximum number of results to be kept in NMS.
256 /// @brief Background label id (-1 if there is no background class).
257 const int background_label_id;
261 void update_dto(dto& dto) const override
263 dto.num_classes = num_classes;
264 dto.num_images = num_images;
265 dto.keep_top_k = keep_top_k;
266 dto.share_location = share_location;
268 dto.background_label_id = background_label_id;