1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include <gtest/gtest.h>
6 #include <gmock/gmock-spec-builders.h>
7 #include "mkldnn_plugin/mkldnn_graph.h"
9 #include "test_graph.hpp"
11 #include "single_layer_common.hpp"
12 #include <mkldnn_plugin/mkldnn_extension_utils.h>
13 #include <extension/ext_list.hpp>
14 #include "tests_common.hpp"
17 using namespace ::testing;
19 using namespace mkldnn;
21 struct nmsTF_test_params {
23 InferenceEngine::SizeVector scoresDim;
24 std::vector<float> boxes;
25 std::vector<float> scores;
26 std::vector<int> max_output_boxes_per_class;
27 std::vector<float> iou_threshold;
28 std::vector<float> score_threshold;
30 int num_selected_indices;
33 std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
36 static float intersectionOverUnion(float* boxesI, float* boxesJ, bool center_point_box) {
37 float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ;
38 if (center_point_box) {
39 // box format: x_center, y_center, width, height
40 yminI = boxesI[1] - boxesI[3] / 2.f;
41 xminI = boxesI[0] - boxesI[2] / 2.f;
42 ymaxI = boxesI[1] + boxesI[3] / 2.f;
43 xmaxI = boxesI[0] + boxesI[2] / 2.f;
44 yminJ = boxesJ[1] - boxesJ[3] / 2.f;
45 xminJ = boxesJ[0] - boxesJ[2] / 2.f;
46 ymaxJ = boxesJ[1] + boxesJ[3] / 2.f;
47 xmaxJ = boxesJ[0] + boxesJ[2] / 2.f;
49 // box format: y1, x1, y2, x2
50 yminI = (std::min)(boxesI[0], boxesI[2]);
51 xminI = (std::min)(boxesI[1], boxesI[3]);
52 ymaxI = (std::max)(boxesI[0], boxesI[2]);
53 xmaxI = (std::max)(boxesI[1], boxesI[3]);
54 yminJ = (std::min)(boxesJ[0], boxesJ[2]);
55 xminJ = (std::min)(boxesJ[1], boxesJ[3]);
56 ymaxJ = (std::max)(boxesJ[0], boxesJ[2]);
57 xmaxJ = (std::max)(boxesJ[1], boxesJ[3]);
60 float areaI = (ymaxI - yminI) * (xmaxI - xminI);
61 float areaJ = (ymaxJ - yminJ) * (xmaxJ - xminJ);
62 if (areaI <= 0.f || areaJ <= 0.f)
65 float intersection_area =
66 (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ), 0.f) *
67 (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ), 0.f);
68 return intersection_area / (areaI + areaJ - intersection_area);
79 InferenceEngine::TBlob<float> &srcBoxes,
80 InferenceEngine::TBlob<float> &srcScores,
81 InferenceEngine::TBlob<int> &selected_idxs,
84 float *boxes = srcBoxes.data();
85 float *scores = srcScores.data();
87 InferenceEngine::SizeVector scores_dims = srcScores.getTensorDesc().getDims();
88 int num_boxes = static_cast<int>(scores_dims[2]);
89 int max_output_boxes_per_class = num_boxes;
90 if (p.max_output_boxes_per_class.size())
91 max_output_boxes_per_class = (std::min)(max_output_boxes_per_class, p.max_output_boxes_per_class[0]);
93 float iou_threshold = 1.f; // Value range [0, 1]
94 if (p.iou_threshold.size())
95 iou_threshold = (std::min)(iou_threshold, p.iou_threshold[0]);
97 float score_threshold = 0.f;
98 if (p.score_threshold.size())
99 score_threshold = p.score_threshold[0];
101 int* selected_indices = selected_idxs.data();
102 InferenceEngine::SizeVector selected_indices_dims = selected_idxs.getTensorDesc().getDims();
104 InferenceEngine::SizeVector boxesStrides = srcBoxes.getTensorDesc().getBlockingDesc().getStrides();
105 InferenceEngine::SizeVector scoresStrides = srcScores.getTensorDesc().getBlockingDesc().getStrides();
107 // boxes shape: {num_batches, num_boxes, 4}
108 // scores shape: {num_batches, num_classes, num_boxes}
109 int num_batches = static_cast<int>(scores_dims[0]);
110 int num_classes = static_cast<int>(scores_dims[1]);
111 std::vector<filteredBoxes> fb;
113 for (int batch = 0; batch < num_batches; batch++) {
114 float *boxesPtr = boxes + batch * boxesStrides[0];
115 for (int class_idx = 0; class_idx < num_classes; class_idx++) {
116 float *scoresPtr = scores + batch * scoresStrides[0] + class_idx * scoresStrides[1];
117 std::vector<std::pair<float, int> > scores_vector;
118 for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
119 if (scoresPtr[box_idx] > score_threshold)
120 scores_vector.push_back(std::make_pair(scoresPtr[box_idx], box_idx));
123 if (scores_vector.size()) {
124 std::sort(scores_vector.begin(), scores_vector.end(),
125 [](const std::pair<float, int>& l, const std::pair<float, int>& r) { return l.first > r.first; });
127 int io_selection_size = 1;
128 fb.push_back({ scores_vector[0].first, batch, class_idx, scores_vector[0].second });
129 for (int box_idx = 1; (box_idx < static_cast<int>(scores_vector.size()) && io_selection_size < max_output_boxes_per_class); box_idx++) {
130 bool box_is_selected = true;
131 for (int idx = io_selection_size - 1; idx >= 0; idx--) {
132 float iou = intersectionOverUnion(&boxesPtr[scores_vector[box_idx].second * 4],
133 &boxesPtr[scores_vector[idx].second * 4], (p.center_point_box == 1));
134 if (iou > iou_threshold) {
135 box_is_selected = false;
140 if (box_is_selected) {
141 scores_vector[io_selection_size] = scores_vector[box_idx];
143 fb.push_back({ scores_vector[box_idx].first, batch, class_idx, scores_vector[box_idx].second });
150 std::sort(fb.begin(), fb.end(), [](const filteredBoxes& l, const filteredBoxes& r) { return l.score > r.score; });
151 int selected_indicesStride = selected_idxs.getTensorDesc().getBlockingDesc().getStrides()[0];
152 int* selected_indicesPtr = selected_indices;
154 for (idx = 0; idx < (std::min)(selected_indices_dims[0], fb.size()); idx++) {
155 selected_indicesPtr[0] = fb[idx].batch_index;
156 selected_indicesPtr[1] = fb[idx].class_index;
157 selected_indicesPtr[2] = fb[idx].box_index;
158 selected_indicesPtr += selected_indicesStride;
160 for (; idx < selected_indices_dims[0]; idx++) {
161 selected_indicesPtr[0] = -1;
162 selected_indicesPtr[1] = -1;
163 selected_indicesPtr[2] = -1;
164 selected_indicesPtr += selected_indicesStride;
168 class MKLDNNCPUExtNonMaxSuppressionTFTests : public TestsCommon, public WithParamInterface<nmsTF_test_params> {
169 std::string model_t2 = R"V0G0N(
170 <net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
172 <layer name="InputBoxes" type="Input" precision="FP32" id="1">
179 <layer name="InputScores" type="Input" precision="FP32" id="2">
186 <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
187 <data center_point_box="_CPB_"/>
197 <port id="6" precision="I32">
204 <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
205 <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
210 std::string model_t3 = R"V0G0N(
211 <net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
213 <layer name="InputBoxes" type="Input" precision="FP32" id="1">
220 <layer name="InputScores" type="Input" precision="FP32" id="2">
227 <layer name="InputBoxesPerClass" type="Input" precision="I32" id="3">
234 <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
235 <data center_point_box="_CPB_"/>
243 <port id="3" precision="I32">
248 <port id="6" precision="I32">
255 <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
256 <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
257 <edge from-layer="3" from-port="3" to-layer="6" to-port="3"/>
261 std::string model_t4 = R"V0G0N(
262 <net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
264 <layer name="InputBoxes" type="Input" precision="FP32" id="1">
271 <layer name="InputScores" type="Input" precision="FP32" id="2">
278 <layer name="InputBoxesPerClass" type="Input" precision="I32" id="3">
285 <layer name="InputIouThr" type="Input" precision="FP32" id="4">
292 <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
293 <data center_point_box="_CPB_"/>
301 <port id="3" precision="I32">
309 <port id="6" precision="I32">
316 <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
317 <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
318 <edge from-layer="3" from-port="3" to-layer="6" to-port="3"/>
319 <edge from-layer="4" from-port="4" to-layer="6" to-port="4"/>
324 std::string model_t5 = R"V0G0N(
325 <net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
327 <layer name="InputBoxes" type="Input" precision="FP32" id="1">
334 <layer name="InputScores" type="Input" precision="FP32" id="2">
341 <layer name="InputBoxesPerClass" type="Input" precision="I32" id="3">
348 <layer name="InputIouThr" type="Input" precision="FP32" id="4">
355 <layer name="InputScoreThr" type="Input" precision="FP32" id="5">
362 <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
363 <data center_point_box="_CPB_"/>
371 <port id="3" precision="I32">
382 <port id="6" precision="I32">
389 <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
390 <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
391 <edge from-layer="3" from-port="3" to-layer="6" to-port="3"/>
392 <edge from-layer="4" from-port="4" to-layer="6" to-port="4"/>
393 <edge from-layer="5" from-port="5" to-layer="6" to-port="5"/>
398 std::string getModel(nmsTF_test_params p) {
400 if (!p.max_output_boxes_per_class.size())
402 else if (!p.iou_threshold.size())
404 else if (!p.score_threshold.size())
410 std::string inScores;
413 inBoxes += "<dim>" + std::to_string(p.scoresDim[0]) + "</dim>\n";
414 inBoxes += "<dim>" + std::to_string(p.scoresDim[2]) + "</dim>\n";
415 inBoxes += "<dim>4</dim>";
418 for (auto& scr : p.scoresDim) {
420 inScores += std::to_string(scr) + "</dim>\n";
423 out += "<dim>" + std::to_string(p.num_selected_indices) + "</dim>\n";
424 out += "<dim>3</dim>";
426 REPLACE_WITH_STR(model, "_IBOXES_", inBoxes);
427 REPLACE_WITH_STR(model, "_ISCORES_", inScores);
428 REPLACE_WITH_STR(model, "_IOUT_", out);
429 REPLACE_WITH_NUM(model, "_CPB_", p.center_point_box);
435 virtual void TearDown() {
438 virtual void SetUp() {
440 TestsCommon::SetUp();
441 nmsTF_test_params p = ::testing::WithParamInterface<nmsTF_test_params>::GetParam();
442 std::string model = getModel(p);
443 //std::cout << model << std::endl;
444 InferenceEngine::CNNNetReader net_reader;
445 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
447 InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
448 MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
449 extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
451 MKLDNNGraphTestClass graph;
452 graph.CreateGraph(net_reader.getNetwork(), extMgr);
455 InferenceEngine::BlobMap srcs;
458 InferenceEngine::SizeVector boxesDim = {p.scoresDim[0], p.scoresDim[2], 4};
459 InferenceEngine::Blob::Ptr srcBoxes = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, boxesDim, InferenceEngine::TensorDesc::getLayoutByDims(boxesDim) });
460 srcBoxes->allocate();
461 for (size_t i = 0; i < p.boxes.size(); i++) {
462 static_cast<float*>(srcBoxes->buffer())[i] = static_cast<float>(p.boxes[i]);
464 //memcpy(srcBoxes->buffer(), &p.boxes[0], sizeof(float)*boxes.size());
465 auto * srcBoxesPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcBoxes.get());
466 if (srcBoxesPtr == nullptr)
467 FAIL() << "Cannot cast blob to TBlob<float>.";
468 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputBoxes", srcBoxes));
471 InferenceEngine::Blob::Ptr srcScores = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.scoresDim, InferenceEngine::TensorDesc::getLayoutByDims(p.scoresDim) });
472 srcScores->allocate();
473 for (size_t i = 0; i < p.scores.size(); i++) {
474 static_cast<float*>(srcScores->buffer())[i] = static_cast<float>(p.scores[i]);
476 auto * srcScoresPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcScores.get());
477 if (srcScoresPtr == nullptr)
478 FAIL() << "Cannot cast blob to TBlob<float>.";
479 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputScores", srcScores));
481 // Input BoxesPerClass
482 InferenceEngine::Blob::Ptr srcBoxesPerClass;
483 InferenceEngine::Blob::Ptr srcIouThr;
484 InferenceEngine::Blob::Ptr srcScoreThr;
485 if (p.max_output_boxes_per_class.size()) {
486 srcBoxesPerClass = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, InferenceEngine::SizeVector(1,1), InferenceEngine::TensorDesc::getLayoutByDims(InferenceEngine::SizeVector(1,1)) });
487 srcBoxesPerClass->allocate();
488 memcpy(static_cast<int32_t*>(srcBoxesPerClass->buffer()), &p.max_output_boxes_per_class[0], sizeof(int32_t));
489 auto * srcBoxesPerClassPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(srcBoxesPerClass.get());
490 if (srcBoxesPerClassPtr == nullptr)
491 FAIL() << "Cannot cast blob to TBlob<int32_t>.";
492 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputBoxesPerClass", srcBoxesPerClass));
496 if (p.iou_threshold.size()) {
497 srcIouThr = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, InferenceEngine::SizeVector(1,1), InferenceEngine::TensorDesc::getLayoutByDims(InferenceEngine::SizeVector(1,1)) });
498 srcIouThr->allocate();
499 memcpy(static_cast<float*>(srcIouThr->buffer()), &p.iou_threshold[0], sizeof(float));
500 auto * srcIouThrPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcIouThr.get());
501 if (srcIouThrPtr == nullptr)
502 FAIL() << "Cannot cast blob to TBlob<float>.";
503 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputIouThr", srcIouThr));
507 if (p.score_threshold.size()) {
508 srcScoreThr = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, InferenceEngine::SizeVector(1,1), InferenceEngine::TensorDesc::getLayoutByDims(InferenceEngine::SizeVector(1,1)) });
509 srcScoreThr->allocate();
510 memcpy(static_cast<float*>(srcScoreThr->buffer()), &p.score_threshold[0], sizeof(float));
511 auto * srcScoreThrPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcScoreThr.get());
512 if (srcScoreThrPtr == nullptr)
513 FAIL() << "Cannot cast blob to TBlob<float>.";
514 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputScoreThr", srcScoreThr));
518 InferenceEngine::OutputsDataMap out;
519 out = net_reader.getNetwork().getOutputsInfo();
520 InferenceEngine::BlobMap outputBlobs;
521 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
522 InferenceEngine::TBlob<int32_t>::Ptr output;
523 output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
525 outputBlobs[item.first] = output;
528 graph.Infer(srcs, outputBlobs);
532 InferenceEngine::TBlob <int32_t> selected_indices_ref(item.second->getTensorDesc());
533 selected_indices_ref.allocate();
534 ref_nms(*srcBoxesPtr, *srcScoresPtr, selected_indices_ref, p);
535 compare(*output, selected_indices_ref);
538 if (memcmp((*output).data(), &p.ref[0], p.ref.size()) != 0)
539 FAIL() << "Wrong result with compare TF reference!";
541 } catch (const InferenceEngine::details::InferenceEngineException &e) {
547 TEST_P(MKLDNNCPUExtNonMaxSuppressionTFTests, TestsNonMaxSuppression) {}
549 static std::vector<float> boxes = { 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0 };
550 static std::vector<float> scores = { 0.9f, 0.75f, 0.6f, 0.95f, 0.5f, 0.3f };
551 static std::vector<int> reference = { 0,0,3,0,0,0,0,0,5 };
553 INSTANTIATE_TEST_CASE_P(
554 TestsNonMaxSuppression, MKLDNNCPUExtNonMaxSuppressionTFTests,
556 // Params: center_point_box, scoresDim, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, num_selected_indices, ref
558 nmsTF_test_params{ 1, {1,1,6}, { 0.5f, 0.5f, 1.0f, 1.0f,0.5f, 0.6f, 1.0f, 1.0f,0.5f, 0.4f, 1.0f, 1.0f,0.5f, 10.5f, 1.0f, 1.0f, 0.5f, 10.6f, 1.0f, 1.0f, 0.5f, 100.5f, 1.0f, 1.0f },
559 scores,{ 3 },{ 0.5f },{ 0.f }, 3, reference }, /*nonmaxsuppression_center_point_box_format*/
561 nmsTF_test_params{ 0, {1,1,6}, { 1.0, 1.0, 0.0, 0.0, 0.0, 0.1, 1.0, 1.1, 0.0, 0.9, 1.0, -0.1, 0.0, 10.0, 1.0, 11.0, 1.0, 10.1, 0.0, 11.1, 1.0, 101.0, 0.0, 100.0 },
562 scores,{ 3 },{ 0.5 },{ 0.0 }, 3, reference }, /*nonmaxsuppression_flipped_coordinates*/
564 nmsTF_test_params{ 0, { 1,1,10 },{ 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0,
565 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0 },
566 { 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9 },{ 3 },{ 0.5 },{ 0.0 }, 1,{ 0,0,0 } }, /*nonmaxsuppression_identical_boxes*/
568 nmsTF_test_params{ 0, { 1,1,6 }, boxes, scores,{ 2 },{ 0.5 },{ 0.0 }, 2,{ 0,0,3,0,0,0 } }, /*nonmaxsuppression_limit_output_size*/
570 nmsTF_test_params{ 0,{ 1,1,1 },{ 0.0, 0.0, 1.0, 1.0 }, { 0.9 },{ 3 },{ 0.5 },{ 0.0 }, 1, { 0,0,0 } }, /*nonmaxsuppression_single_box*/
572 nmsTF_test_params{ 0, { 1,1,6 }, boxes, scores, { 3 }, { 0.5 }, { 0.0 }, 3, reference }, /*nonmaxsuppression_suppress_by_IOU*/
574 nmsTF_test_params{ 0, { 2,1,6 },{ 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
575 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0 },
576 { 0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.9, 0.75, 0.6, 0.95, 0.5, 0.3 },{ 2 },{ 0.5 },{ 0.0 }, 4,{ 0,0,3,0,0,0,1,0,3,1,0,0 } }, /*nonmaxsuppression_two_batches*/
578 nmsTF_test_params{ 0, { 1,2,6 }, boxes,
579 { 0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.9, 0.75, 0.6, 0.95, 0.5, 0.3 },{ 2 },{ 0.5 },{ 0.0 }, 4,{ 0,0,3,0,0,0,0,1,3,0,1,0 } }, /*nonmaxsuppression_two_classes*/
581 nmsTF_test_params{ 0, { 1,1,6 }, boxes, scores, { 3 }, { 0.5 }, {}, 3, reference }, /*nonmaxsuppression_no_score_threshold*/
583 nmsTF_test_params{ 0, { 1,1,6 }, boxes, scores, { 3 }, {}, {}, 3, reference }, /*nonmaxsuppression_no_iou_threshold_and_score_threshold*/
585 nmsTF_test_params{ 0, { 1,1,6 }, boxes, scores, {}, {}, {}, 3, {} } /*nonmaxsuppression_no_max_output_boxes_per_class_and_iou_threshold_and_score_threshold*/