inference-engine/src/extension/ext_detectionoutput.cpp

   1 // Copyright (C) 2018 Intel Corporation
   2 //
   3 // SPDX-License-Identifier: Apache-2.0
   4 //
   5
   6 #include "ext_list.hpp"
   7 #include "ext_base.hpp"
   8
   9 #include <cfloat>
  10 #include <vector>
  11 #include <cmath>
  12 #include <string>
  13 #include <utility>
  14 #include <algorithm>
  15
  16 namespace InferenceEngine {
  17 namespace Extensions {
  18 namespace Cpu {
  19
  20 template <typename T>
  21 static bool SortScorePairDescend(const std::pair<float, T>& pair1,
  22                                  const std::pair<float, T>& pair2) {
  23     return pair1.first > pair2.first;
  24 }
  25
  26 class DetectionOutputImpl: public ExtLayerBase {
  27 public:
  28     explicit DetectionOutputImpl(const CNNLayer* layer) {
  29         try {
  30             if (layer->insData.size() != 3)
  31                 THROW_IE_EXCEPTION << "Incorrect number of input edges.";
  32             if (layer->outData.empty())
  33                 THROW_IE_EXCEPTION << "Incorrect number of output edges.";
  34
  35             _num_classes = layer->GetParamAsInt("num_classes");
  36             _background_label_id = layer->GetParamAsInt("background_label_id", 0);
  37             _top_k = layer->GetParamAsInt("top_k", -1);
  38             _variance_encoded_in_target = layer->GetParamsAsBool("variance_encoded_in_target", false);
  39             _keep_top_k = layer->GetParamAsInt("keep_top_k", -1);
  40             _nms_threshold = layer->GetParamAsFloat("nms_threshold");
  41             _confidence_threshold = layer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
  42             _share_location = layer->GetParamsAsBool("share_location", true);
  43             _clip = layer->GetParamsAsBool("clip", false);
  44             _decrease_label_id = layer->GetParamsAsBool("decrease_label_id", false);
  45             _normalized = layer->GetParamsAsBool("normalized", true);
  46             _image_height = layer->GetParamAsInt("input_height", 1);
  47             _image_width = layer->GetParamAsInt("input_width", 1);
  48             _prior_size = _normalized ? 4 : 5;
  49             _offset = _normalized ? 0 : 1;
  50             _num_loc_classes = _share_location ? 1 : _num_classes;
  51
  52             std::string code_type_str = layer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
  53             _code_type = (code_type_str == "caffe.PriorBoxParameter.CENTER_SIZE" ? CodeType::CENTER_SIZE
  54                                                                                  : CodeType::CORNER);
  55
  56             _num_priors = static_cast<int>(layer->insData[idx_priors].lock()->dims[0] / _prior_size);
  57
  58             if (_num_priors * _num_loc_classes * 4 != layer->insData[idx_location].lock()->dims[0])
  59                 THROW_IE_EXCEPTION << "Number of priors must match number of location predictions.";
  60
  61             if (_num_priors * _num_classes != layer->insData[idx_confidence].lock()->dims[0])
  62                 THROW_IE_EXCEPTION << "Number of priors must match number of confidence predictions.";
  63
  64             if (_decrease_label_id && _background_label_id != 0)
  65                 THROW_IE_EXCEPTION << "Cannot use decrease_label_id and background_label_id parameter simultaneously.";
  66
  67             _num = static_cast<int>(layer->insData[idx_confidence].lock()->getTensorDesc().getDims()[0]);
  68
  69             InferenceEngine::SizeVector bboxes_size{static_cast<size_t>(_num),
  70                                                     static_cast<size_t>(_num_classes),
  71                                                     static_cast<size_t>(_num_priors),
  72                                                     4};
  73             _decoded_bboxes = InferenceEngine::make_shared_blob<float>({Precision::UNSPECIFIED, bboxes_size, NCHW});
  74             _decoded_bboxes->allocate();
  75
  76             InferenceEngine::SizeVector buf_size{static_cast<size_t>(_num),
  77                                                  static_cast<size_t>(_num_classes),
  78                                                  static_cast<size_t>(_num_priors)};
  79             _buffer = InferenceEngine::make_shared_blob<int>({Precision::UNSPECIFIED, buf_size, {buf_size, {0, 1, 2}}});
  80             _buffer->allocate();
  81
  82             InferenceEngine::SizeVector indices_size{static_cast<size_t>(_num),
  83                                                      static_cast<size_t>(_num_classes),
  84                                                      static_cast<size_t>(_num_priors)};
  85             _indices = InferenceEngine::make_shared_blob<int>(
  86                     {Precision::UNSPECIFIED, indices_size, {indices_size, {0, 1, 2}}});
  87             _indices->allocate();
  88
  89             InferenceEngine::SizeVector detections_size{static_cast<size_t>(_num * _num_classes)};
  90             _detections_count = InferenceEngine::make_shared_blob<int>({Precision::UNSPECIFIED, detections_size, C});
  91             _detections_count->allocate();
  92
  93             InferenceEngine::SizeVector conf_size = layer->insData[idx_confidence].lock()->dims;
  94             _reordered_conf = InferenceEngine::make_shared_blob<float>({Precision::FP32, conf_size, ANY});
  95             _reordered_conf->allocate();
  96
  97             InferenceEngine::SizeVector decoded_bboxes_size{static_cast<size_t>(_num),
  98                                                             static_cast<size_t>(_num_priors),
  99                                                             static_cast<size_t>(_num_classes)};
 100             _bbox_sizes = InferenceEngine::make_shared_blob<float>(
 101                     {Precision::FP32, decoded_bboxes_size, {decoded_bboxes_size, {0, 1, 2}}});
 102             _bbox_sizes->allocate();
 103
 104             InferenceEngine::SizeVector num_priors_actual_size{static_cast<size_t>(_num)};
 105             _num_priors_actual = InferenceEngine::make_shared_blob<int>({Precision::UNSPECIFIED, num_priors_actual_size, C});
 106             _num_priors_actual->allocate();
 107
 108             addConfig(layer, {DataConfigurator(ConfLayout::PLN),
 109                        DataConfigurator(ConfLayout::PLN),
 110                        DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
 111         } catch (InferenceEngine::details::InferenceEngineException &ex) {
 112             errorMsg = ex.what();
 113         }
 114     }
 115
 116     StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
 117                        ResponseDesc *resp) noexcept override {
 118         float *dst_data = outputs[0]->buffer();
 119
 120         const float *loc_data    = inputs[idx_location]->buffer();
 121         const float *conf_data   = inputs[idx_confidence]->buffer();
 122         const float *prior_data  = inputs[idx_priors]->buffer();
 123
 124         const int N = inputs[idx_confidence]->getTensorDesc().getDims()[0];
 125
 126         float *decoded_bboxes_data = _decoded_bboxes->buffer();
 127         float *reordered_conf_data = _reordered_conf->buffer();
 128         float *bbox_sizes_data     = _bbox_sizes->buffer();
 129         int *detections_data       = _detections_count->buffer();
 130         int *buffer_data           = _buffer->buffer();
 131         int *indices_data          = _indices->buffer();
 132         int *num_priors_actual     = _num_priors_actual->buffer();
 133
 134         const float *prior_variances = prior_data + _num_priors*_prior_size;
 135         const float *ppriors = prior_data;
 136
 137         for (int n = 0; n < N; ++n) {
 138             if (_share_location) {
 139                 const float *ploc = loc_data + n*4*_num_priors;
 140                 float *pboxes = decoded_bboxes_data + n*4*_num_priors;
 141                 float *psizes = bbox_sizes_data + n*_num_priors;
 142                 decodeBBoxes(ppriors, ploc, prior_variances, pboxes, psizes, num_priors_actual, n);
 143             } else {
 144                 for (int c = 0; c < _num_loc_classes; ++c) {
 145                     if (c == _background_label_id) {
 146                         continue;
 147                     }
 148
 149                     const float *ploc = loc_data + n*4*_num_loc_classes*_num_priors + c*4;
 150                     float *pboxes = decoded_bboxes_data + n*4*_num_loc_classes*_num_priors + c*4*_num_priors;
 151                     float *psizes = bbox_sizes_data + n*_num_loc_classes*_num_priors + c*_num_priors;
 152                     decodeBBoxes(ppriors, ploc, prior_variances, pboxes, psizes, num_priors_actual, n);
 153                 }
 154             }
 155         }
 156
 157         for (int n = 0; n < N; ++n) {
 158             for (int c = 0; c < _num_classes; ++c) {
 159                 for (int p = 0; p < _num_priors; ++p) {
 160                     reordered_conf_data[n*_num_priors*_num_classes + c*_num_priors + p] = conf_data[n*_num_priors*_num_classes + p*_num_classes + c];
 161                 }
 162             }
 163         }
 164
 165         memset(detections_data, 0, N*_num_classes*sizeof(int));
 166
 167         for (int n = 0; n < N; ++n) {
 168             int detections_total = 0;
 169
 170 #pragma omp parallel for schedule(static)
 171             for (int c = 0; c < _num_classes; ++c) {
 172                 if (c == _background_label_id) {
 173                     // Ignore background class.
 174                     continue;
 175                 }
 176
 177                 int *pindices    = indices_data + n*_num_classes*_num_priors + c*_num_priors;
 178                 int *pbuffer     = buffer_data + c*_num_priors;
 179                 int *pdetections = detections_data + n*_num_classes + c;
 180
 181                 const float *pconf = reordered_conf_data + n*_num_classes*_num_priors + c*_num_priors;
 182                 const float *pboxes;
 183                 const float *psizes;
 184                 if (_share_location) {
 185                     pboxes = decoded_bboxes_data + n*4*_num_priors;
 186                     psizes = bbox_sizes_data + n*_num_priors;
 187                 } else {
 188                     pboxes = decoded_bboxes_data + n*4*_num_classes*_num_priors + c*4*_num_priors;
 189                     psizes = bbox_sizes_data + n*_num_classes*_num_priors + c*_num_priors;
 190                 }
 191
 192                 nms(pconf, pboxes, psizes, pbuffer, pindices, *pdetections, num_priors_actual[n]);
 193             }
 194
 195             for (int c = 0; c < _num_classes; ++c) {
 196                 detections_total += detections_data[n*_num_classes + c];
 197             }
 198
 199             if (_keep_top_k > -1 && detections_total > _keep_top_k) {
 200                 std::vector<std::pair<float, std::pair<int, int>>> conf_index_class_map;
 201
 202                 for (int c = 0; c < _num_classes; ++c) {
 203                     int detections = detections_data[n*_num_classes + c];
 204                     int *pindices = indices_data + n*_num_classes*_num_priors + c*_num_priors;
 205                     float *pconf  = reordered_conf_data + n*_num_classes*_num_priors + c*_num_priors;
 206
 207                     for (int i = 0; i < detections; ++i) {
 208                         int idx = pindices[i];
 209                         conf_index_class_map.push_back(std::make_pair(pconf[idx], std::make_pair(c, idx)));
 210                     }
 211                 }
 212
 213                 std::sort(conf_index_class_map.begin(), conf_index_class_map.end(),
 214                           SortScorePairDescend<std::pair<int, int>>);
 215                 conf_index_class_map.resize(_keep_top_k);
 216
 217                 // Store the new indices.
 218                 memset(detections_data + n*_num_classes, 0, _num_classes * sizeof(int));
 219
 220                 for (int j = 0; j < conf_index_class_map.size(); ++j) {
 221                     int label = conf_index_class_map[j].second.first;
 222                     int idx = conf_index_class_map[j].second.second;
 223                     int *pindices = indices_data + n * _num_classes * _num_priors + label * _num_priors;
 224                     pindices[detections_data[n*_num_classes + label]] = idx;
 225                     detections_data[n*_num_classes + label]++;
 226                 }
 227             }
 228         }
 229
 230         const int DETECTION_SIZE = outputs[0]->getTensorDesc().getDims()[3];
 231         if (DETECTION_SIZE != 7) {
 232             return NOT_IMPLEMENTED;
 233         }
 234
 235         auto dst_data_size = N * _keep_top_k * DETECTION_SIZE * sizeof(float);
 236
 237         if (dst_data_size > outputs[0]->byteSize()) {
 238             return OUT_OF_BOUNDS;
 239         }
 240
 241         memset(dst_data, 0, dst_data_size);
 242
 243         int count = 0;
 244         for (int n = 0; n < N; ++n) {
 245             const float *pconf   = reordered_conf_data + n * _num_priors * _num_classes;
 246             const float *pboxes  = decoded_bboxes_data + n*_num_priors*4*_num_loc_classes;
 247             const int *pindices  = indices_data + n*_num_classes*_num_priors;
 248
 249             for (int c = 0; c < _num_classes; ++c) {
 250                 for (int i = 0; i < detections_data[n*_num_classes + c]; ++i) {
 251                     int idx = pindices[c*_num_priors + i];
 252
 253                     dst_data[count * DETECTION_SIZE + 0] = n;
 254                     dst_data[count * DETECTION_SIZE + 1] = _decrease_label_id ? c-1 : c;
 255                     dst_data[count * DETECTION_SIZE + 2] = pconf[c*_num_priors + idx];
 256
 257                     float xmin = _share_location ? pboxes[idx*4 + 0] :
 258                                  pboxes[c*4*_num_priors + idx*4 + 0];
 259                     float ymin = _share_location ? pboxes[idx*4 + 1] :
 260                                  pboxes[c*4*_num_priors + idx*4 + 1];
 261                     float xmax = _share_location ? pboxes[idx*4 + 2] :
 262                                  pboxes[c*4*_num_priors + idx*4 + 2];
 263                     float ymax = _share_location ? pboxes[idx*4 + 3] :
 264                                  pboxes[c*4*_num_priors + idx*4 + 3];
 265
 266                     dst_data[count * DETECTION_SIZE + 3] = xmin;
 267                     dst_data[count * DETECTION_SIZE + 4] = ymin;
 268                     dst_data[count * DETECTION_SIZE + 5] = xmax;
 269                     dst_data[count * DETECTION_SIZE + 6] = ymax;
 270
 271                     ++count;
 272                 }
 273             }
 274         }
 275
 276         if (count < N*_keep_top_k) {
 277             // marker at end of boxes list
 278             dst_data[count * DETECTION_SIZE + 0] = -1;
 279         }
 280
 281         return OK;
 282     }
 283
 284 private:
 285     const int idx_location = 0;
 286     const int idx_confidence = 1;
 287     const int idx_priors = 2;
 288
 289
 290     int _num_classes = 0;
 291     int _background_label_id = 0;
 292     int _top_k = 0;
 293     int _variance_encoded_in_target = 0;
 294     int _keep_top_k = 0;
 295     int _code_type = 0;
 296
 297     bool _share_location = false;
 298     bool _clip = false;
 299     bool _decrease_label_id = false;
 300
 301     int _image_width = 0;
 302     int _image_height = 0;
 303     int _prior_size = 4;
 304     bool _normalized = true;
 305     int _offset = 0;
 306
 307     float _nms_threshold = 0.0f;
 308     float _confidence_threshold = 0.0f;
 309
 310     int _num = 0;
 311     int _num_loc_classes = 0;
 312     int _num_priors = 0;
 313
 314     enum CodeType {
 315         CORNER = 1,
 316         CENTER_SIZE = 2,
 317     };
 318
 319     void decodeBBoxes(const float *prior_data, const float *loc_data, const float *variance_data,
 320                       float *decoded_bboxes, float *decoded_bbox_sizes, int* num_priors_actual, int n);
 321
 322     void nms(const float *conf_data, const float *bboxes, const float *sizes,
 323              int *buffer, int *indices, int &detections, int num_priors_actual);
 324
 325     InferenceEngine::Blob::Ptr _decoded_bboxes;
 326     InferenceEngine::Blob::Ptr _buffer;
 327     InferenceEngine::Blob::Ptr _indices;
 328     InferenceEngine::Blob::Ptr _detections_count;
 329     InferenceEngine::Blob::Ptr _reordered_conf;
 330     InferenceEngine::Blob::Ptr _bbox_sizes;
 331     InferenceEngine::Blob::Ptr _num_priors_actual;
 332 };
 333
 334 struct ConfidenceComparator {
 335     explicit ConfidenceComparator(const float* conf_data) : _conf_data(conf_data) {}
 336
 337     bool operator()(int idx1, int idx2) {
 338         if (_conf_data[idx1] > _conf_data[idx2]) return true;
 339         if (_conf_data[idx1] < _conf_data[idx2]) return false;
 340         return idx1 < idx2;
 341     }
 342
 343     const float* _conf_data;
 344 };
 345
 346 static inline float JaccardOverlap(const float *decoded_bbox,
 347                                    const float *bbox_sizes,
 348                                    const int idx1,
 349                                    const int idx2) {
 350     float xmin1 = decoded_bbox[idx1*4 + 0];
 351     float ymin1 = decoded_bbox[idx1*4 + 1];
 352     float xmax1 = decoded_bbox[idx1*4 + 2];
 353     float ymax1 = decoded_bbox[idx1*4 + 3];
 354
 355     float xmin2 = decoded_bbox[idx2*4 + 0];
 356     float ymin2 = decoded_bbox[idx2*4 + 1];
 357     float ymax2 = decoded_bbox[idx2*4 + 3];
 358     float xmax2 = decoded_bbox[idx2*4 + 2];
 359
 360     if (xmin2 > xmax1 || xmax2 < xmin1 || ymin2 > ymax1 || ymax2 < ymin1) {
 361         return 0.0f;
 362     }
 363
 364     float intersect_xmin = std::max(xmin1, xmin2);
 365     float intersect_ymin = std::max(ymin1, ymin2);
 366     float intersect_xmax = std::min(xmax1, xmax2);
 367     float intersect_ymax = std::min(ymax1, ymax2);
 368
 369     float intersect_width  = intersect_xmax - intersect_xmin;
 370     float intersect_height = intersect_ymax - intersect_ymin;
 371
 372     if (intersect_width <= 0 || intersect_height <= 0) {
 373         return 0.0f;
 374     }
 375
 376     float intersect_size = intersect_width * intersect_height;
 377     float bbox1_size = bbox_sizes[idx1];
 378     float bbox2_size = bbox_sizes[idx2];
 379
 380     return intersect_size / (bbox1_size + bbox2_size - intersect_size);
 381 }
 382
 383 void DetectionOutputImpl::decodeBBoxes(const float *prior_data,
 384                                    const float *loc_data,
 385                                    const float *variance_data,
 386                                    float *decoded_bboxes,
 387                                    float *decoded_bbox_sizes,
 388                                    int* num_priors_actual,
 389                                    int n) {
 390     num_priors_actual[n] = _num_priors;
 391     if (!_normalized) {
 392         int num = 0;
 393         for (; num < _num_priors; ++num) {
 394             float batch_id = prior_data[num * _prior_size + 0];
 395             if (batch_id == -1.f) {
 396                 num_priors_actual[n] = num;
 397                 break;
 398             }
 399         }
 400     }
 401
 402     #pragma omp parallel for schedule(static)
 403     for (int p = 0; p < num_priors_actual[n]; ++p) {
 404         float new_xmin = 0.0f;
 405         float new_ymin = 0.0f;
 406         float new_xmax = 0.0f;
 407         float new_ymax = 0.0f;
 408
 409         float prior_xmin = prior_data[p*_prior_size + 0 + _offset];
 410         float prior_ymin = prior_data[p*_prior_size + 1 + _offset];
 411         float prior_xmax = prior_data[p*_prior_size + 2 + _offset];
 412         float prior_ymax = prior_data[p*_prior_size + 3 + _offset];
 413
 414         float loc_xmin = loc_data[4*p*_num_loc_classes + 0];
 415         float loc_ymin = loc_data[4*p*_num_loc_classes + 1];
 416         float loc_xmax = loc_data[4*p*_num_loc_classes + 2];
 417         float loc_ymax = loc_data[4*p*_num_loc_classes + 3];
 418
 419         if (!_normalized) {
 420             prior_xmin /= _image_width;
 421             prior_ymin /= _image_height;
 422             prior_xmax /= _image_width;
 423             prior_ymax /= _image_height;
 424         }
 425
 426         if (_code_type == CodeType::CORNER) {
 427             if (_variance_encoded_in_target) {
 428                 // variance is encoded in target, we simply need to add the offset predictions.
 429                 new_xmin = prior_xmin + loc_xmin;
 430                 new_ymin = prior_ymin + loc_ymin;
 431                 new_xmax = prior_xmax + loc_xmax;
 432                 new_ymax = prior_ymax + loc_ymax;
 433             } else {
 434                 new_xmin = prior_xmin + variance_data[p*4 + 0] * loc_xmin;
 435                 new_ymin = prior_ymin + variance_data[p*4 + 1] * loc_ymin;
 436                 new_xmax = prior_xmax + variance_data[p*4 + 2] * loc_xmax;
 437                 new_ymax = prior_ymax + variance_data[p*4 + 3] * loc_ymax;
 438             }
 439         } else if (_code_type == CodeType::CENTER_SIZE) {
 440             float prior_width    =  prior_xmax - prior_xmin;
 441             float prior_height   =  prior_ymax - prior_ymin;
 442             float prior_center_x = (prior_xmin + prior_xmax) / 2.0f;
 443             float prior_center_y = (prior_ymin + prior_ymax) / 2.0f;
 444
 445             float decode_bbox_center_x, decode_bbox_center_y;
 446             float decode_bbox_width, decode_bbox_height;
 447
 448             if (_variance_encoded_in_target) {
 449                 // variance is encoded in target, we simply need to restore the offset predictions.
 450                 decode_bbox_center_x = loc_xmin * prior_width  + prior_center_x;
 451                 decode_bbox_center_y = loc_ymin * prior_height + prior_center_y;
 452                 decode_bbox_width  = std::exp(loc_xmax) * prior_width;
 453                 decode_bbox_height = std::exp(loc_ymax) * prior_height;
 454             } else {
 455                 // variance is encoded in bbox, we need to scale the offset accordingly.
 456                 decode_bbox_center_x = variance_data[p*4 + 0] * loc_xmin * prior_width + prior_center_x;
 457                 decode_bbox_center_y = variance_data[p*4 + 1] * loc_ymin * prior_height + prior_center_y;
 458                 decode_bbox_width    = std::exp(variance_data[p*4 + 2] * loc_xmax) * prior_width;
 459                 decode_bbox_height   = std::exp(variance_data[p*4 + 3] * loc_ymax) * prior_height;
 460             }
 461
 462             new_xmin = decode_bbox_center_x - decode_bbox_width  / 2.0f;
 463             new_ymin = decode_bbox_center_y - decode_bbox_height / 2.0f;
 464             new_xmax = decode_bbox_center_x + decode_bbox_width  / 2.0f;
 465             new_ymax = decode_bbox_center_y + decode_bbox_height / 2.0f;
 466         }
 467
 468         if (_clip) {
 469             new_xmin = std::max(0.0f, std::min(1.0f, new_xmin));
 470             new_ymin = std::max(0.0f, std::min(1.0f, new_ymin));
 471             new_xmax = std::max(0.0f, std::min(1.0f, new_xmax));
 472             new_ymax = std::max(0.0f, std::min(1.0f, new_ymax));
 473         }
 474
 475         decoded_bboxes[p*4 + 0] = new_xmin;
 476         decoded_bboxes[p*4 + 1] = new_ymin;
 477         decoded_bboxes[p*4 + 2] = new_xmax;
 478         decoded_bboxes[p*4 + 3] = new_ymax;
 479
 480         decoded_bbox_sizes[p] = (new_xmax - new_xmin) * (new_ymax - new_ymin);
 481     }
 482 }
 483
 484 void DetectionOutputImpl::nms(const float* conf_data,
 485                           const float* bboxes,
 486                           const float* sizes,
 487                           int* buffer,
 488                           int* indices,
 489                           int& detections,
 490                           int num_priors_actual) {
 491     int count = 0;
 492     for (int i = 0; i < num_priors_actual; ++i) {
 493         if (conf_data[i] > _confidence_threshold) {
 494             indices[count] = i;
 495             count++;
 496         }
 497     }
 498
 499     int num_output_scores = (_top_k == -1 ? count : std::min<int>(_top_k, count));
 500
 501     std::partial_sort_copy(indices, indices + count,
 502                            buffer, buffer + num_output_scores,
 503                            ConfidenceComparator(conf_data));
 504
 505     for (int i = 0; i < num_output_scores; ++i) {
 506         const int idx = buffer[i];
 507
 508         bool keep = true;
 509         for (int k = 0; k < detections; ++k) {
 510             const int kept_idx = indices[k];
 511             float overlap = JaccardOverlap(bboxes, sizes, idx, kept_idx);
 512             if (overlap > _nms_threshold) {
 513                 keep = false;
 514                 break;
 515             }
 516         }
 517         if (keep) {
 518             indices[detections] = idx;
 519             detections++;
 520         }
 521     }
 522 }
 523
 524 REG_FACTORY_FOR(ImplFactory<DetectionOutputImpl>, DetectionOutput);
 525
 526 }  // namespace Cpu
 527 }  // namespace Extensions
 528 }  // namespace InferenceEngine