1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include <gtest/gtest.h>
6 #include <gmock/gmock-spec-builders.h>
7 #include "mkldnn_plugin/mkldnn_graph.h"
9 #include "single_layer_common.hpp"
10 #include <mkldnn_plugin/mkldnn_extension_utils.h>
11 #include "tests_common.hpp"
12 #include "../test_graph.hpp"
13 #include <ext_list.hpp>
14 #include <ie_builders.hpp>
16 using namespace ::testing;
18 using namespace mkldnn;
20 class MKLDNNGraphStructureTests: public TestsCommon {};
22 TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReorders) {
23 std::string model = R"V0G0N(
24 <net name="PVANET" version="2" batch="1">
26 <layer name="data" type="Input" precision="FP32" id="0">
36 <layer name="conv1_1_conv" type="Convolution" precision="FP32" id="2">
37 <convolution_data stride-x="2" stride-y="2" pad-x="3" pad-y="3" kernel-x="7" kernel-y="7" output="16" group="1"/>
54 <weights offset="0" size="9408"/>
55 <biases offset="9408" size="64"/>
57 <layer name="conv1_1_neg" type="Power" precision="FP32" id="3">
58 <power_data power="1" scale="-1" shift="0"/>
76 <layer name="conv1_1_concat" type="Concat" precision="FP32" id="4">
77 <concat_data axis="1"/>
101 <layer name="conv1_1_scale" type="ScaleShift" precision="FP32" id="5">
118 <weights offset="9472" size="128"/>
119 <biases offset="9600" size="128"/>
121 <layer name="conv1_1_relu" type="ReLU" precision="FP32" id="6">
122 <data negative_slope="0" engine="caffe.ReLUParameter.DEFAULT"/>
140 <layer name="pool1" type="Pooling" precision="FP32" id="7">
141 <pooling_data kernel-x="3" kernel-y="3" pad-x="0" pad-y="0" stride-x="2" stride-y="2" rounding-type="ceil" pool-method="max"/>
161 <edge from-layer="0" from-port="0" to-layer="2" to-port="2"/>
162 <edge from-layer="2" from-port="3" to-layer="3" to-port="4"/>
163 <edge from-layer="2" from-port="3" to-layer="4" to-port="6"/>
164 <edge from-layer="3" from-port="5" to-layer="4" to-port="7"/>
165 <edge from-layer="4" from-port="8" to-layer="5" to-port="9"/>
166 <edge from-layer="5" from-port="10" to-layer="6" to-port="11"/>
167 <edge from-layer="6" from-port="12" to-layer="7" to-port="13"/>
173 InferenceEngine::CNNNetReader net_reader;
174 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
176 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {9728});
178 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
179 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
181 net_reader.SetWeights(weights_ptr);
184 MKLDNNGraphTestClass graph;
185 graph.CreateGraph(net_reader.getNetwork());
187 size_t reorders_num = 0;
188 auto& nodes = graph.getNodes();
189 for (auto &node : nodes) {
190 if (node->getType() == MKLDNNPlugin::Reorder) {
192 ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
195 ASSERT_EQ(reorders_num, 1);
198 TEST_F(MKLDNNGraphStructureTests, TestRedundantReorderBeforeConvWithC_3) {
199 std::string model = R"V0G0N(
200 <net name="net" version="2" batch="1">
202 <layer name="data" type="Input" precision="FP32" id="0">
212 <layer name="data_norm_bn" type="BatchNormalization" precision="FP32" id="1">
213 <batch_norm_data epsilon="9.9999997473787516e-06"/>
230 <biases offset="0" size="12"/>
231 <weights offset="12" size="12"/>
233 <layer name="data_norm_scale" type="ScaleShift" precision="FP32" id="2">
250 <weights offset="24" size="12"/>
251 <biases offset="36" size="12"/>
253 <layer name="init_conv" type="Convolution" precision="FP32" id="3">
254 <convolution_data stride-x="2" stride-y="2" pad-x="3" pad-y="3" kernel-x="7" kernel-y="7" output="64" group="1"/>
271 <weights offset="48" size="37632"/>
272 <biases offset="37680" size="256"/>
276 <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
277 <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
278 <edge from-layer="2" from-port="4" to-layer="3" to-port="5"/>
283 InferenceEngine::CNNNetReader net_reader;
284 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
286 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {37936});
288 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
289 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
291 net_reader.SetWeights(weights_ptr);
294 MKLDNNGraphTestClass graph;
295 graph.CreateGraph(net_reader.getNetwork());
297 size_t reorders_num = 0;
298 auto& nodes = graph.getNodes();
299 for (auto &node : nodes) {
300 if (node->getType() == MKLDNNPlugin::Reorder) {
302 if (node->getChildEdgeAt(0)->getChild()->getName() == "init_conv"){
303 ASSERT_EQ(MKLDNNPlugin::Convolution, node->getChildEdgeAt(0)->getChild()->getType());
304 ASSERT_EQ(InferenceEngine::Layout::NCHW,
305 node->getChildEdgeAt(0)->getBlob()->getTensorDesc().getLayout());
309 ASSERT_EQ(reorders_num, 3);
312 TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersBeforeConcat) {
313 std::string model = R"V0G0N(
314 <net name="net" version="2" batch="1">
316 <layer name="data" type="Input" precision="FP32" id="0">
326 <layer name="conv1_1_conv" type="Convolution" precision="FP32" id="2">
327 <convolution_data stride-x="2" stride-y="2" pad-x="3" pad-y="3" kernel-x="7" kernel-y="7" output="4" group="1"/>
344 <weights offset="0" size="2352"/>
345 <biases offset="2352" size="16"/>
347 <layer name="conv1_1_neg" type="Power" precision="FP32" id="3">
348 <power_data power="1" scale="-1" shift="0"/>
366 <layer name="conv1_1_concat" type="Concat" precision="FP32" id="4">
367 <concat_data axis="1"/>
391 <layer name="conv1_1_scale" type="ScaleShift" precision="FP32" id="5">
408 <weights offset="2368" size="32"/>
409 <biases offset="2400" size="32"/>
411 <layer name="conv1_1_relu" type="ReLU" precision="FP32" id="6">
431 <edge from-layer="0" from-port="0" to-layer="2" to-port="2"/>
432 <edge from-layer="2" from-port="3" to-layer="3" to-port="4"/>
433 <edge from-layer="2" from-port="3" to-layer="4" to-port="6"/>
434 <edge from-layer="3" from-port="5" to-layer="4" to-port="7"/>
435 <edge from-layer="4" from-port="8" to-layer="5" to-port="9"/>
436 <edge from-layer="5" from-port="10" to-layer="6" to-port="11"/>
441 InferenceEngine::CNNNetReader net_reader;
442 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
444 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {2432});
446 float * data = weights->buffer();
448 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
449 size_t idx = 592; // Convolution weights
450 size_t size = 8; // Scale and shift sizes
451 for (size_t i = 0; i < size; i++, idx++) {
454 for (size_t i = 0; i < size; i++, idx++) {
458 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
460 net_reader.SetWeights(weights_ptr);
462 MKLDNNGraphTestClass graph;
463 graph.CreateGraph(net_reader.getNetwork());
465 size_t reorders_num = 0;
466 auto& nodes = graph.getNodes();
467 for (auto &node : nodes) {
468 if (node->getType() == MKLDNNPlugin::Reorder && node->getChildEdgeAt(0)->getChild()->getType() != MKLDNNPlugin::Output) {
472 ASSERT_EQ(reorders_num, 2);
473 InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 7, 7}, InferenceEngine::NCHW);
474 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
476 data = src->buffer().as<float *>();
477 for (size_t i = 0; i < src->size(); i++) {
478 data[i] = (i % 2) ? 1 : -1;
481 InferenceEngine::BlobMap srcs;
482 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
484 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
486 InferenceEngine::BlobMap outputBlobs;
487 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
489 InferenceEngine::TBlob<float>::Ptr output;
490 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
492 outputBlobs[item.first] = output;
494 graph.Infer(srcs, outputBlobs);
496 std::vector<float> refDst = {0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.040f, 0.000f,
497 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
498 0.119f, 0.000f, 0.000f, 1.889f, 0.000f, 0.000f, 0.000f, 1.138f, 0.647f, 0.000f, 0.348f,
499 0.000f, 1.711f, 1.311f, 0.000f, 0.000f, 3.045f, 1.203f, 0.000f, 0.927f, 2.041f, 0.000f,
500 0.564f, 1.415f, 1.524f, 0.000f, 1.812f, 0.486f, 0.103f, 1.606f, 0.999f, 0.000f, 1.145f,
501 2.158f, 0.712f, 0.000f, 0.009f, 0.756f, 0.000f, 0.000f, 0.008f, 0.243f,
503 0.381f, 0.363f, 1.846f, 0.804f, 1.372f, 1.113f, 2.453f, 1.609f, 0.557f, 0.000f, 3.020f,
504 1.422f, 0.481f, 0.221f, 1.137f, 0.401f, 1.475f, 0.301f, 0.862f, 2.052f, 2.680f, 0.284f,
505 0.000f, 2.389f, 0.917f, 0.000f, 0.358f, 1.989f, 0.355f, 0.000f, 0.000f, 0.570f, 0.000f,
506 0.761f, 0.000f, 0.000f, 0.652f, 0.910f, 0.000f, 0.000f, 0.226f, 0.000f, 0.000f, 0.323f,
507 0.000f, 0.000f, 0.000f, 0.108f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.866f, 0.000f,
508 0.000f, 0.000f, 0.759f, 0.000f, 0.000f, 0.029f, 1.186f, 0.000f, 0.000f};
509 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
511 compare(*output, *dstOut);
513 // Compare for batch2
514 net_reader.getNetwork().setBatchSize(2);
515 graph.CreateGraph(net_reader.getNetwork());
516 desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {2, 3, 7, 7}, InferenceEngine::NCHW);
518 InferenceEngine::Blob::Ptr srcBatch = InferenceEngine::make_shared_blob<float>(desc);
519 srcBatch->allocate();
520 data = srcBatch->buffer().as<float *>();
521 float *originData = src->buffer().as<float *>();
522 for(size_t b = 0; b < 2; b++) {
523 for (size_t i = 0; i < src->size(); i++) {
524 data[srcBatch->getTensorDesc().offset(b*src->size() + i)] = originData[src->getTensorDesc().offset(i)];
529 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", srcBatch));
530 out = net_reader.getNetwork().getOutputsInfo();
534 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
536 outputBlobs[item.first] = output;
538 graph.Infer(srcs, outputBlobs);
539 dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
541 data = dstOut->buffer().as<float *>();
542 for(size_t b = 0; b < 2; b++) {
543 for (size_t i = 0; i < refDst.size(); i++) {
544 data[dstOut->getTensorDesc().offset(b*refDst.size() + i)] = refDst[i];
548 compare(*output, *dstOut);
551 TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersBeforeDWConvolution) {
552 std::string model = R"V0G0N(
553 <net name="net" version="2" batch="1">
555 <layer name="input" type="Input" precision="FP32" id="0">
565 <layer name="conv2_1_1" type="Convolution" precision="FP32" id="1">
566 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="4" group="1"/>
583 <weights offset="0" size="48"/>
584 <biases offset="48" size="16"/>
586 <layer name="conv2_1_1_relu" type="ReLU" precision="FP32" id="2">
587 <data negative_slope="0" engine="caffe.ReLUParameter.DEFAULT"/>
605 <layer name="conv2_1_2_orig" type="Convolution" precision="FP32" id="3">
606 <convolution_data stride-x="1" stride-y="1" pad-x="1" pad-y="1" kernel-x="3" kernel-y="3" output="4" group="4"/>
623 <weights offset="64" size="144"/>
624 <biases offset="208" size="16"/>
626 <layer name="conv2_1_2_neg" type="Power" precision="FP32" id="4">
627 <power_data power="1" scale="-1" shift="0"/>
645 <layer name="conv2_1_2" type="Concat" precision="FP32" id="5">
646 <concat_data axis="1"/>
670 <layer name="conv2_1_2_scale" type="ScaleShift" precision="FP32" id="6">
687 <weights offset="224" size="32"/>
688 <biases offset="256" size="32"/>
690 <layer name="conv2_1_2_relu" type="ReLU" precision="FP32" id="7">
691 <data negative_slope="0" engine="caffe.ReLUParameter.DEFAULT"/>
711 <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
712 <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
713 <edge from-layer="2" from-port="4" to-layer="3" to-port="5"/>
714 <edge from-layer="3" from-port="6" to-layer="4" to-port="7"/>
715 <edge from-layer="3" from-port="6" to-layer="5" to-port="9"/>
716 <edge from-layer="4" from-port="8" to-layer="5" to-port="10"/>
717 <edge from-layer="5" from-port="11" to-layer="6" to-port="12"/>
718 <edge from-layer="6" from-port="13" to-layer="7" to-port="14"/>
723 InferenceEngine::CNNNetReader net_reader;
724 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
726 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {288});
728 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
730 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
732 net_reader.SetWeights(weights_ptr);
734 MKLDNNGraphTestClass graph;
735 graph.CreateGraph(net_reader.getNetwork());
737 size_t reorders_num = 0;
738 auto& nodes = graph.getNodes();
739 for (auto &node : nodes) {
740 if (node->getType() == MKLDNNPlugin::Reorder) {
744 ASSERT_EQ(reorders_num, 2);
745 InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {2, 3, 5, 5}, InferenceEngine::NCHW);
746 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
748 auto *data = src->buffer().as<float *>();
749 size_t sizeB1 = src->size() / 2;
750 fill_data(data, sizeB1);
751 for (size_t i = 0; i < sizeB1; i++) {
752 data[sizeB1 + i] = data[i];
755 InferenceEngine::BlobMap srcs;
756 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
758 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
760 InferenceEngine::BlobMap outputBlobs;
761 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
763 InferenceEngine::TBlob<float>::Ptr output;
764 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
766 outputBlobs[item.first] = output;
768 graph.Infer(srcs, outputBlobs);
770 std::vector<float> refDst = {0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f,
771 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f,
772 0.920f, 0.920f, 0.920f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f,
773 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f,
774 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
775 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
776 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
777 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
778 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
779 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
780 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
781 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
782 0.000f, 0.185f, 0.176f, 0.000f, 0.000f, 0.000f, 0.215f, 0.000f, 0.957f, 1.092f, 0.000f,
783 0.000f, 0.213f, 0.020f, 1.391f, 2.359f, 0.583f, 0.000f, 0.000f, 0.138f, 0.043f, 0.000f,
784 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.720f, 0.000f, 0.000f, 0.000f,
785 0.000f, 0.069f, 0.188f, 0.046f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.045f,
786 0.041f, 0.000f, 0.000f, 0.056f, 0.000f, 0.000f, 0.086f, 0.025f, 0.000f, 0.000f, 0.000f,
787 0.000f, 0.012f, 0.056f, 0.000f, 0.060f, 0.055f, 0.000f, 0.000f, 0.037f, 0.000f, 0.000f,
790 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f,
791 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f,
792 0.920f, 0.920f, 0.920f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f,
793 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f,
794 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
795 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
796 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
797 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
798 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
799 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
800 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
801 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
802 0.000f, 0.185f, 0.176f, 0.000f, 0.000f, 0.000f, 0.215f, 0.000f, 0.957f, 1.092f, 0.000f,
803 0.000f, 0.213f, 0.020f, 1.391f, 2.359f, 0.583f, 0.000f, 0.000f, 0.138f, 0.043f, 0.000f,
804 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.720f, 0.000f, 0.000f, 0.000f,
805 0.000f, 0.069f, 0.188f, 0.046f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.045f,
806 0.041f, 0.000f, 0.000f, 0.056f, 0.000f, 0.000f, 0.086f, 0.025f, 0.000f, 0.000f, 0.000f,
807 0.000f, 0.012f, 0.056f, 0.000f, 0.060f, 0.055f, 0.000f, 0.000f, 0.037f, 0.000f, 0.000f,
809 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
811 compare(*output, *dstOut);
814 // TODO change hardcoded reference to dynamically generated
815 TEST_F(MKLDNNGraphStructureTests, DISABLED_TestNoRedundantReordersBeforeDWDeconvolution) {
816 std::string model = R"V0G0N(
817 <net name="deconv" version="2" batch="1">
819 <layer name="input" type="Input" precision="FP32" id="0">
829 <layer name="conv1" type="Convolution" precision="FP32" id="1">
830 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="12" group="1"/>
847 <weights offset="0" size="144"/>
849 <layer name="deconv1" type="Deconvolution" precision="FP32" id="2">
850 <deconvolution_data stride-x="2" stride-y="2" pad-x="1" pad-y="1" kernel-x="4" kernel-y="4" output="12" group="12"/>
867 <weights offset="144" size="768"/>
868 <biases offset="912" size="48"/>
870 <layer name="deconv2" type="Deconvolution" precision="FP32" id="3">
871 <deconvolution_data stride-x="1" stride-y="1" pad-x="1" pad-y="1" kernel-x="2" kernel-y="2" output="24" group="1"/>
888 <weights offset="960" size="4608"/>
889 <biases offset="5568" size="96"/>
893 <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
894 <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
895 <edge from-layer="1" from-port="2" to-layer="3" to-port="5"/>
900 InferenceEngine::CNNNetReader net_reader;
901 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
903 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {5664});
905 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
907 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
909 net_reader.SetWeights(weights_ptr);
911 MKLDNNGraphTestClass graph;
912 graph.CreateGraph(net_reader.getNetwork());
914 size_t reorders_num = 0;
915 auto& nodes = graph.getNodes();
916 for (auto &node : nodes) {
917 if (node->getType() == MKLDNNPlugin::Reorder) {
919 ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
922 ASSERT_EQ(reorders_num, 2);
923 InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 2, 2}, InferenceEngine::NCHW);
924 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
926 fill_data(src->buffer(), src->size());
928 InferenceEngine::BlobMap srcs;
929 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
931 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
933 InferenceEngine::BlobMap outputBlobs;
934 InferenceEngine::DataPtr item = out["deconv1"];
935 InferenceEngine::TBlob<float>::Ptr output1;
936 output1 = InferenceEngine::make_shared_blob<float>(item->getTensorDesc());
938 outputBlobs["deconv1"] = output1;
940 item = out["deconv2"];
941 InferenceEngine::TBlob<float>::Ptr output2;
942 output2 = InferenceEngine::make_shared_blob<float>(item->getTensorDesc());
944 outputBlobs["deconv2"] = output2;
946 graph.Infer(srcs, outputBlobs);
948 std::vector<float> refDst1 = {-0.042f, -0.563f, -0.150f, 0.396f, 0.224f, 0.229f, -0.335f, -0.390f, -0.213f, 0.959f, 0.520f, -0.507f,
949 -0.200f, -0.202f, 0.441f, 0.499f, 0.000f, 0.000f, 0.000f, 0.000f, 0.363f, 0.141f, -0.497f, -0.332f, -0.311f,
950 0.423f, 0.693f, -0.012f, -0.328f, -0.106f, 0.518f, 0.353f, 0.000f, 0.000f, 0.000f, 0.000f, 0.050f, -0.352f,
951 -0.045f, 0.000f, -0.303f, 0.605f, 0.754f, -0.143f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.012f, 0.298f, 0.000f,
952 -0.066f, -0.303f, -0.318f, -0.054f, 0.322f, 0.002f, 0.050f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
953 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
954 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
955 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.328f, -0.162f, -0.765f, -0.221f, 0.422f, 0.715f, 0.726f, 0.375f,
956 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, -0.744f, -0.038f, -0.109f, 0.000f, 0.583f, 0.892f,
957 0.039f, -0.356f, 0.000f, 0.000f, 0.000f, 0.000f, -0.514f, 0.320f, 0.193f, 0.000f, -0.785f, -0.508f, 0.160f, -0.104f,
958 0.473f, 0.214f, 0.129f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, -0.299f, 0.784f, 0.953f, -0.163f, -1.160f, -0.547f,
959 0.401f, -0.066f, 0.275f, -0.172f, -0.683f, -0.188f, 0.384f, -0.149f, 0.151f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
960 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
961 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f};
962 InferenceEngine::TBlob<float>::Ptr dstOut1 = InferenceEngine::make_shared_blob<float>(out["deconv1"]->getTensorDesc(), refDst1.data());
964 std::vector<float> refDst2 = {-0.814f, -0.337f, -1.081f, 1.139f, -0.197f, 1.547f, -0.778f, -2.467f, 1.409f, -1.472f, 2.827f, 0.663f,
965 -0.645f, 0.105f, -1.873f, -0.272f, 1.071f, 2.706f, -1.705f, 0.602f, -1.956f, 0.734f, 2.325f, -2.147f};
966 InferenceEngine::TBlob<float>::Ptr dstOut2 = InferenceEngine::make_shared_blob<float>(out["deconv2"]->getTensorDesc(), refDst2.data());
968 compare(*output1, *dstOut1);
969 compare(*output2, *dstOut2);
972 TEST_F(MKLDNNGraphStructureTests, TestSeveralOutputToNextLayer) {
973 std::string model = R"V0G0N(
974 <?xml version="1.0" ?>
975 <net batch="1" name="model" version="2">
977 <layer id="0" name="data" precision="FP32" type="Input">
987 <layer id="1" name="Slice1" precision="FP32" type="Slice">
1018 <layer id="5" name="Concat2" precision="FP32" type="Concat">
1051 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
1052 <edge from-layer="1" from-port="1" to-layer="5" to-port="0"/>
1053 <edge from-layer="1" from-port="2" to-layer="5" to-port="1"/>
1054 <edge from-layer="1" from-port="3" to-layer="5" to-port="2"/>
1059 InferenceEngine::CNNNetReader net_reader;
1060 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
1062 MKLDNNGraphTestClass graph;
1063 graph.CreateGraph(net_reader.getNetwork());
1065 size_t reorders_num = 0;
1066 auto& nodes = graph.getNodes();
1067 for (auto &node : nodes) {
1068 if (node->getType() == MKLDNNPlugin::Reorder) {
1072 ASSERT_EQ(reorders_num, 3);
1073 InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 2, 2}, InferenceEngine::NCHW);
1074 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
1076 fill_data(src->buffer(), src->size());
1078 InferenceEngine::BlobMap srcs;
1079 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
1081 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
1083 InferenceEngine::BlobMap outputBlobs;
1084 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
1086 InferenceEngine::TBlob<float>::Ptr output;
1087 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
1089 outputBlobs[item.first] = output;
1091 graph.Infer(srcs, outputBlobs);
1093 compare(*output, *src);
1097 TEST_F(MKLDNNGraphStructureTests, TestOutputAfterInplacePlusConcat) {
1098 std::string model = R"V0G0N(
1099 <?xml version="1.0" ?>
1100 <net batch="1" name="model" version="2">
1102 <layer id="0" name="data" precision="FP32" type="Input">
1112 <layer id="1" name="Slice1" precision="FP32" type="Slice">
1143 <layer id="2" name="Concat2" precision="FP32" type="Concat">
1174 <layer id="3" name="Reshape3" precision="FP32" type="Reshape">
1175 <data axis="0" dim="1,12" num_axes="-1"/>
1194 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
1195 <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
1196 <edge from-layer="1" from-port="2" to-layer="2" to-port="1"/>
1197 <edge from-layer="1" from-port="3" to-layer="2" to-port="2"/>
1198 <edge from-layer="2" from-port="3" to-layer="3" to-port="0"/>
1203 InferenceEngine::CNNNetReader net_reader;
1204 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
1205 MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(net_reader.getNetwork(), {}, {}));
1206 InferenceEngine::InputsDataMap _networkInputs = net_reader.getNetwork().getInputsInfo();
1207 InferenceEngine::OutputsDataMap _networkOutputs = net_reader.getNetwork().getOutputsInfo();
1208 execNetwork->setNetworkInputs(_networkInputs);
1209 execNetwork->setNetworkOutputs(_networkOutputs);
1210 InferenceEngine::IInferRequest::Ptr inferRequest;
1211 execNetwork->CreateInferRequest(inferRequest);
1213 InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 2, 2}, InferenceEngine::NCHW);
1214 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
1216 fill_data(src->buffer(), src->size());
1218 InferenceEngine::ResponseDesc resp;
1220 InferenceEngine::StatusCode sts = inferRequest->SetBlob("data", src, &resp);
1221 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
1223 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
1225 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
1227 InferenceEngine::TBlob<float>::Ptr output;
1228 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
1231 sts = inferRequest->SetBlob(item.first.c_str(), output, &resp);
1232 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
1234 sts = inferRequest->Infer(&resp);
1235 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
1237 compare(*output, *src);
1240 TEST_F(MKLDNNGraphStructureTests, TestResnetPart) {
1241 std::string modelB = R"V0G0N(
1242 <net name="ResNet-152" version="2" batch="1">
1244 <layer name="input" type="Input" precision="FP32" id="0">
1254 <layer name="conv1" type="Convolution" precision="FP32" id="1">
1255 <convolution_data stride-x="2" stride-y="2" pad-x="3" pad-y="3" kernel-x="7" kernel-y="7" output="64" group="1"/>
1272 <weights offset="0" size="37632"/>
1273 <biases offset="37632" size="256"/>
1275 <layer name="conv1_relu" type="ReLU" precision="FP32" id="4">
1293 <layer name="pool1" type="Pooling" precision="FP32" id="5">
1294 <pooling_data kernel-x="3" kernel-y="3" pad-x="0" pad-y="0" stride-x="2" stride-y="2" rounding-type="ceil" pool-method="max"/>
1312 <layer name="res2a_branch2a" type="Convolution" precision="FP32" id="9">
1313 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="64" group="1"/>
1330 <weights offset="37888" size="16384"/>
1331 <biases offset="54272" size="256"/>
1333 <layer name="res2a_branch2a_relu" type="ReLU" precision="FP32" id="12">
1351 <layer name="res2a_branch2b" type="Convolution" precision="FP32" id="13">
1352 <convolution_data stride-x="1" stride-y="1" pad-x="1" pad-y="1" kernel-x="3" kernel-y="3" output="64" group="1"/>
1369 <weights offset="54528" size="147456"/>
1370 <biases offset="201984" size="256"/>
1372 <layer name="res2a_branch2b_relu" type="ReLU" precision="FP32" id="16">
1390 <layer name="res2a_branch2c" type="Convolution" precision="FP32" id="17">
1391 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="256" group="1"/>
1408 <weights offset="202240" size="65536"/>
1409 <biases offset="267776" size="1024"/>
1411 <layer name="res2a_branch1" type="Convolution" precision="FP32" id="6">
1412 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="256" group="1"/>
1429 <weights offset="268800" size="65536"/>
1430 <biases offset="334336" size="1024"/>
1432 <layer name="res2a" type="Eltwise" precision="FP32" id="20">
1433 <elementwise_data operation="sum"/>
1457 <layer name="res2a_relu" type="ReLU" precision="FP32" id="21">
1475 <layer name="res2b_branch2a" type="Convolution" precision="FP32" id="22">
1476 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="64" group="1"/>
1493 <weights offset="335360" size="65536"/>
1494 <biases offset="400896" size="256"/>
1496 <layer name="res2b_branch2a_relu" type="ReLU" precision="FP32" id="25">
1514 <layer name="res2b_branch2b" type="Convolution" precision="FP32" id="26">
1515 <convolution_data stride-x="1" stride-y="1" pad-x="1" pad-y="1" kernel-x="3" kernel-y="3" output="64" group="1"/>
1532 <weights offset="401152" size="147456"/>
1533 <biases offset="548608" size="256"/>
1535 std::string modelE =R"V0G0N(
1536 <layer name="res2b_branch2b_relu" type="ReLU" precision="FP32" id="29">
1554 <layer name="res2b_branch2c" type="Convolution" precision="FP32" id="30">
1555 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="256" group="1"/>
1572 <weights offset="548864" size="65536"/>
1573 <biases offset="614400" size="1024"/>
1575 <layer name="res2b" type="Eltwise" precision="FP32" id="33">
1576 <elementwise_data operation="sum"/>
1600 <layer name="res2b_relu" type="ReLU" precision="FP32" id="34">
1618 <layer name="pool5" type="Pooling" precision="FP32" id="668">
1619 <pooling_data kernel-x="56" kernel-y="56" pad-x="0" pad-y="0" stride-x="1" stride-y="1" rounding-type="ceil" pool-method="avg"/>
1637 <layer name="fc1000" type="FullyConnected" precision="FP32" id="669">
1638 <fc_data out-size="1000"/>
1653 <weights offset="615424" size="1024000"/>
1654 <biases offset="1639424" size="4000"/>
1656 <layer name="prob" type="SoftMax" precision="FP32" id="670">
1672 <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
1673 <edge from-layer="1" from-port="2" to-layer="4" to-port="7"/>
1674 <edge from-layer="4" from-port="8" to-layer="5" to-port="9"/>
1675 <edge from-layer="5" from-port="10" to-layer="6" to-port="11"/>
1676 <edge from-layer="5" from-port="10" to-layer="9" to-port="17"/>
1677 <edge from-layer="12" from-port="24" to-layer="13" to-port="25"/>
1678 <edge from-layer="16" from-port="32" to-layer="17" to-port="33"/>
1679 <edge from-layer="6" from-port="12" to-layer="20" to-port="39"/>
1680 <edge from-layer="9" from-port="18" to-layer="12" to-port="23"/>
1681 <edge from-layer="13" from-port="26" to-layer="16" to-port="31"/>
1682 <edge from-layer="17" from-port="34" to-layer="20" to-port="40"/>
1683 <edge from-layer="20" from-port="41" to-layer="21" to-port="42"/>
1684 <edge from-layer="21" from-port="43" to-layer="22" to-port="44"/>
1685 <edge from-layer="25" from-port="51" to-layer="26" to-port="52"/>
1686 <edge from-layer="29" from-port="59" to-layer="30" to-port="60"/>
1687 <edge from-layer="21" from-port="43" to-layer="33" to-port="66"/>
1688 <edge from-layer="22" from-port="45" to-layer="25" to-port="50"/>
1689 <edge from-layer="26" from-port="53" to-layer="29" to-port="58"/>
1690 <edge from-layer="30" from-port="61" to-layer="33" to-port="67"/>
1691 <edge from-layer="33" from-port="68" to-layer="34" to-port="69"/>
1692 <edge from-layer="34" from-port="70" to-layer="668" to-port="1385"/>
1693 <edge from-layer="668" from-port="1386" to-layer="669" to-port="1387"/>
1694 <edge from-layer="669" from-port="1388" to-layer="670" to-port="1389"/>
1696 <pre-process reference-layer-name="input" mean-precision="FP16">
1698 <mean value="104.00698793"/>
1701 <mean value="116.66876762"/>
1704 <mean value="122.67891434"/>
1710 std::string model = modelB + modelE;
1711 InferenceEngine::CNNNetReader net_reader;
1712 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
1714 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {1643424});
1715 weights->allocate();
1716 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
1717 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
1719 net_reader.SetWeights(weights_ptr);
1721 MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(net_reader.getNetwork(), {}, {}));
1722 InferenceEngine::InputsDataMap _networkInputs = net_reader.getNetwork().getInputsInfo();
1723 InferenceEngine::OutputsDataMap _networkOutputs = net_reader.getNetwork().getOutputsInfo();
1724 execNetwork->setNetworkInputs(_networkInputs);
1725 execNetwork->setNetworkOutputs(_networkOutputs);
1726 InferenceEngine::IInferRequest::Ptr inferRequest;
1727 execNetwork->CreateInferRequest(inferRequest);
1729 InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 224, 224}, InferenceEngine::NCHW);
1730 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
1732 fill_data(src->buffer(), src->size());
1734 InferenceEngine::ResponseDesc resp;
1736 InferenceEngine::StatusCode sts = inferRequest->SetBlob("input", src, &resp);
1737 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
1739 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
1741 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
1743 InferenceEngine::TBlob<float>::Ptr output;
1744 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
1747 sts = inferRequest->SetBlob(item.first.c_str(), output, &resp);
1748 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
1750 sts = inferRequest->Infer(&resp);
1751 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
1754 TEST_F(MKLDNNGraphStructureTests, TestConcatAfterConcat) {
1755 std::string model = R"V0G0N(
1756 <net batch="1" name="model" version="2">
1758 <layer id="0" name="data" precision="FP32" type="Input">
1768 <layer id="1" name="data2" precision="FP32" type="Input">
1778 <layer id="2" name="data3" precision="FP32" type="Input">
1788 <layer id="3" name="Concat1" precision="FP32" type="Concat">
1813 <layer id="4" name="Concat2" precision="FP32" type="Concat">
1838 <layer name="pool" type="Pooling" precision="FP32" id="5">
1839 <pooling_data kernel-x="20" kernel-y="20" pad-x="0" pad-y="0" stride-x="1" stride-y="1" rounding-type="ceil" pool-method="avg"/>
1859 <edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
1860 <edge from-layer="2" from-port="0" to-layer="3" to-port="1"/>
1861 <edge from-layer="1" from-port="0" to-layer="4" to-port="0"/>
1862 <edge from-layer="3" from-port="2" to-layer="4" to-port="1"/>
1863 <edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
1868 InferenceEngine::CNNNetReader net_reader;
1869 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
1870 MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(net_reader.getNetwork(), {}, {}));
1871 InferenceEngine::InputsDataMap _networkInputs = net_reader.getNetwork().getInputsInfo();
1872 InferenceEngine::OutputsDataMap _networkOutputs = net_reader.getNetwork().getOutputsInfo();
1873 execNetwork->setNetworkInputs(_networkInputs);
1874 execNetwork->setNetworkOutputs(_networkOutputs);
1875 InferenceEngine::IInferRequest::Ptr inferRequest;
1876 execNetwork->CreateInferRequest(inferRequest);
1878 InferenceEngine::TensorDesc desc1(InferenceEngine::Precision::FP32, {1, 3, 20, 20}, InferenceEngine::NCHW);
1879 InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc1);
1881 fill_data(src1->buffer(), src1->size());
1883 InferenceEngine::TensorDesc desc2(InferenceEngine::Precision::FP32, {1, 4, 20, 20}, InferenceEngine::NCHW);
1884 InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>(desc2);
1886 fill_data(src2->buffer(), src2->size());
1888 InferenceEngine::TensorDesc desc3(InferenceEngine::Precision::FP32, {1, 2, 20, 20}, InferenceEngine::NCHW);
1889 InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>(desc3);
1891 fill_data(src3->buffer(), src3->size());
1893 InferenceEngine::ResponseDesc resp;
1895 InferenceEngine::StatusCode sts = inferRequest->SetBlob("data1", src1, &resp);
1896 sts = inferRequest->SetBlob("data2", src2, &resp);
1897 sts = inferRequest->SetBlob("data3", src3, &resp);
1898 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
1900 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
1902 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
1904 InferenceEngine::TBlob<float>::Ptr output;
1905 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
1908 sts = inferRequest->SetBlob(item.first.c_str(), output, &resp);
1909 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
1911 sts = inferRequest->Infer(&resp);
1912 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
1914 // compare(*output, *src);
1917 TEST_F(MKLDNNGraphStructureTests, Test2ConcatFromConcat) {
1918 std::string model = R"V0G0N(
1919 <net batch="1" name="model" version="2">
1921 <layer id="0" name="data1" precision="FP32" type="Input">
1931 <layer id="1" name="data2" precision="FP32" type="Input">
1941 <layer id="2" name="data3" precision="FP32" type="Input">
1951 <layer id="3" name="data4" precision="FP32" type="Input">
1961 <layer id="4" name="Concat0" precision="FP32" type="Concat">
1986 <layer id="5" name="Concat1" precision="FP32" type="Concat">
2011 <layer id="6" name="Concat2" precision="FP32" type="Concat">
2038 <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
2039 <edge from-layer="2" from-port="0" to-layer="4" to-port="1"/>
2040 <edge from-layer="1" from-port="0" to-layer="5" to-port="1"/>
2041 <edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
2042 <edge from-layer="3" from-port="0" to-layer="6" to-port="1"/>
2043 <edge from-layer="4" from-port="2" to-layer="6" to-port="0"/>
2048 InferenceEngine::CNNNetReader net_reader;
2049 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
2050 MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(net_reader.getNetwork(), {}, {}));
2051 InferenceEngine::InputsDataMap _networkInputs = net_reader.getNetwork().getInputsInfo();
2052 InferenceEngine::OutputsDataMap _networkOutputs = net_reader.getNetwork().getOutputsInfo();
2053 execNetwork->setNetworkInputs(_networkInputs);
2054 execNetwork->setNetworkOutputs(_networkOutputs);
2055 InferenceEngine::IInferRequest::Ptr inferRequest;
2056 execNetwork->CreateInferRequest(inferRequest);
2058 InferenceEngine::TensorDesc desc1(InferenceEngine::Precision::FP32, {1, 3, 2, 2}, InferenceEngine::NCHW);
2059 InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc1);
2061 fill_data(src1->buffer(), src1->size());
2063 InferenceEngine::TensorDesc desc2(InferenceEngine::Precision::FP32, {1, 4, 2, 2}, InferenceEngine::NCHW);
2064 InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>(desc2);
2066 fill_data(src2->buffer(), src2->size());
2068 InferenceEngine::TensorDesc desc3(InferenceEngine::Precision::FP32, {1, 2, 2, 2}, InferenceEngine::NCHW);
2069 InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>(desc3);
2071 fill_data(src3->buffer(), src3->size());
2073 InferenceEngine::TensorDesc desc4(InferenceEngine::Precision::FP32, {1, 1, 2, 2}, InferenceEngine::NCHW);
2074 InferenceEngine::Blob::Ptr src4 = InferenceEngine::make_shared_blob<float>(desc4);
2076 fill_data(src4->buffer(), src4->size());
2078 InferenceEngine::ResponseDesc resp;
2080 InferenceEngine::StatusCode sts = inferRequest->SetBlob("data1", src1, &resp);
2081 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2082 sts = inferRequest->SetBlob("data2", src2, &resp);
2083 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2084 sts = inferRequest->SetBlob("data3", src3, &resp);
2085 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2086 sts = inferRequest->SetBlob("data4", src4, &resp);
2087 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2089 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
2091 std::vector<InferenceEngine::TBlob<float>::Ptr> outputs;
2092 std::vector<InferenceEngine::TBlob<float>::Ptr> refOutputs;
2093 for (const auto& it : out) {
2094 InferenceEngine::TBlob<float>::Ptr output;
2095 output = InferenceEngine::make_shared_blob<float>(it.second->getTensorDesc());
2097 outputs.push_back(output);
2099 InferenceEngine::TBlob<float>::Ptr refOutput;
2100 refOutput = InferenceEngine::make_shared_blob<float>(it.second->getTensorDesc());
2101 refOutput->allocate();
2103 float * refData = refOutput->buffer().as<float *>();
2105 if (it.first == "Concat1") {
2106 float *srcData = src1->buffer().as<float *>();
2107 for (size_t i = 0; i < src1->size(); i++, ref_idx++) {
2108 refData[ref_idx] = srcData[i];
2110 srcData = src3->buffer().as<float *>();
2111 for (size_t i = 0; i < src3->size(); i++, ref_idx++) {
2112 refData[ref_idx] = srcData[i];
2114 srcData = src2->buffer().as<float *>();
2115 for (size_t i = 0; i < src2->size(); i++, ref_idx++) {
2116 refData[ref_idx] = srcData[i];
2120 } else if (it.first == "Concat2") {
2121 float *srcData = src1->buffer().as<float *>();
2122 for (size_t i = 0; i < src1->size(); i++, ref_idx++) {
2123 refData[ref_idx] = srcData[i];
2125 srcData = src3->buffer().as<float *>();
2126 for (size_t i = 0; i < src3->size(); i++, ref_idx++) {
2127 refData[ref_idx] = srcData[i];
2129 srcData = src4->buffer().as<float *>();
2130 for (size_t i = 0; i < src4->size(); i++, ref_idx++) {
2131 refData[ref_idx] = srcData[i];
2135 refOutputs.push_back(refOutput);
2137 sts = inferRequest->SetBlob(it.first.c_str(), output, &resp);
2138 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2141 sts = inferRequest->Infer(&resp);
2142 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2144 for (size_t i = 0; i < outputs.size(); i++) {
2145 compare(*outputs[i], *refOutputs[i]);
2149 TEST_F(MKLDNNGraphStructureTests, TestResultsAfterGroupedConvWithStrides) {
2150 std::string model = R"V0G0N(
2151 <net name="net" version="2" batch="1">
2153 <layer name="data" type="Input" precision="FP32" id="0">
2163 <layer name="conv1_1_conv" type="Convolution" precision="FP32" id="2">
2164 <data dilation-x="1" dilation-y="1" group="6" kernel-x="3" kernel-y="3" output="24" pad-x="1" pad-y="1" stride="1,1,1,1" stride-x="1" stride-y="1"/>
2181 <weights offset="0" size="3456"/>
2182 <biases offset="3456" size="96"/>
2184 <layer name="conv1_1_neg" type="Power" precision="FP32" id="3">
2185 <power_data power="1" scale="-1" shift="0"/>
2203 <layer name="conv1_1_concat" type="Concat" precision="FP32" id="4">
2204 <concat_data axis="1"/>
2230 <edge from-layer="0" from-port="0" to-layer="2" to-port="2"/>
2231 <edge from-layer="2" from-port="3" to-layer="3" to-port="4"/>
2232 <edge from-layer="2" from-port="3" to-layer="4" to-port="6"/>
2233 <edge from-layer="3" from-port="5" to-layer="4" to-port="7"/>
2238 InferenceEngine::CNNNetReader net_reader;
2239 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
2241 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {3552});
2242 weights->allocate();
2243 float * data = weights->buffer();
2244 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
2245 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
2247 net_reader.SetWeights(weights_ptr);
2249 MKLDNNGraphTestClass graph;
2250 graph.CreateGraph(net_reader.getNetwork());
2252 InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 24, 80, 80}, InferenceEngine::NCHW);
2253 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
2255 fill_data((float *) src->buffer(), src->size());
2257 InferenceEngine::BlobMap srcs;
2258 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
2260 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
2262 InferenceEngine::BlobMap outputBlobs;
2263 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
2265 InferenceEngine::TBlob<float>::Ptr refOutput;
2266 refOutput = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
2267 refOutput->allocate();
2268 outputBlobs[item.first] = refOutput;
2270 graph.Infer(srcs, outputBlobs);
2272 // Compare for batch2
2273 net_reader.getNetwork().setBatchSize(2);
2274 graph.CreateGraph(net_reader.getNetwork());
2275 desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {2, 24, 80, 80}, InferenceEngine::NCHW);
2277 InferenceEngine::Blob::Ptr srcBatch = InferenceEngine::make_shared_blob<float>(desc);
2278 srcBatch->allocate();
2279 data = srcBatch->buffer().as<float *>();
2280 float *originData = src->buffer().as<float *>();
2281 for(size_t b = 0; b < 2; b++) {
2282 for (size_t i = 0; i < src->size(); i++) {
2283 data[srcBatch->getTensorDesc().offset(b*src->size() + i)] = originData[src->getTensorDesc().offset(i)];
2288 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", srcBatch));
2289 out = net_reader.getNetwork().getOutputsInfo();
2291 outputBlobs.clear();
2292 item = *out.begin();
2294 InferenceEngine::TBlob<float>::Ptr output;
2295 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
2297 outputBlobs[item.first] = output;
2299 graph.Infer(srcs, outputBlobs);
2300 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
2302 data = dstOut->buffer().as<float *>();
2303 originData = refOutput->buffer().as<float *>();
2304 for(size_t b = 0; b < 2; b++) {
2305 for (size_t i = 0; i < refOutput->size(); i++) {
2306 data[dstOut->getTensorDesc().offset(b*refOutput->size() + i)] = originData[refOutput->getTensorDesc().offset(i)];
2310 compare(*output, *dstOut);
2313 TEST_F(MKLDNNGraphStructureTests, TestLoadTopologyWithConstLayer) {
2314 std::string model = R"V0G0N(
2315 <net batch="1" name="model" version="2">
2317 <layer id="0" name="data" precision="FP32" type="Input">
2327 <layer id="1" name="data1" precision="FP32" type="Const">
2337 <custom offset="0" size="6400"/>
2340 <layer id="3" name="Concat1" precision="FP32" type="Concat">
2367 <edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
2368 <edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
2373 InferenceEngine::CNNNetReader net_reader;
2374 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
2376 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {6400});
2377 weights->allocate();
2378 float * data = weights->buffer();
2379 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
2380 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
2382 net_reader.SetWeights(weights_ptr);
2383 MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(net_reader.getNetwork(), {}, {}));
2384 InferenceEngine::InputsDataMap _networkInputs = net_reader.getNetwork().getInputsInfo();
2385 InferenceEngine::OutputsDataMap _networkOutputs = net_reader.getNetwork().getOutputsInfo();
2386 execNetwork->setNetworkInputs(_networkInputs);
2387 execNetwork->setNetworkOutputs(_networkOutputs);
2388 InferenceEngine::IInferRequest::Ptr inferRequest;
2389 execNetwork->CreateInferRequest(inferRequest);
2391 InferenceEngine::TensorDesc desc1(InferenceEngine::Precision::FP32, {1, 3, 20, 20}, InferenceEngine::NCHW);
2392 InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc1);
2394 fill_data(src1->buffer(), src1->size());
2396 InferenceEngine::ResponseDesc resp;
2398 InferenceEngine::StatusCode sts = inferRequest->SetBlob("data", src1, &resp);
2399 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2401 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
2403 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
2405 InferenceEngine::TBlob<float>::Ptr output;
2406 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
2409 sts = inferRequest->SetBlob(item.first.c_str(), output, &resp);
2410 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2412 sts = inferRequest->Infer(&resp);
2413 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2416 TEST_F(MKLDNNGraphStructureTests, TestLoadTopologyWithEltwiseBeforeConcat) {
2417 std::string model = R"V0G0N(
2418 <net batch="1" name="model" version="2">
2420 <layer id="0" name="data" precision="FP32" type="Input">
2430 <layer id="1" name="data1" precision="FP32" type="Const">
2440 <custom offset="0" size="4800"/>
2443 <layer id="2" name="data2" precision="FP32" type="Const">
2453 <custom offset="4800" size="1600"/>
2456 <layer name="Eltwise1" type="Eltwise" id="3" precision="FP32">
2457 <data operation="sum" />
2481 <layer id="4" name="Concat1" precision="FP32" type="Concat">
2508 <edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
2509 <edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
2510 <edge from-layer="3" from-port="2" to-layer="4" to-port="1"/>
2511 <edge from-layer="2" from-port="0" to-layer="4" to-port="0"/>
2516 InferenceEngine::CNNNetReader net_reader;
2517 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
2519 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {6400});
2520 weights->allocate();
2521 float * data = weights->buffer();
2522 for (size_t i = 0; i < 1200; i++) {
2525 for (size_t i = 1200; i < 1600; i++) {
2528 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
2530 net_reader.SetWeights(weights_ptr);
2531 MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(net_reader.getNetwork(), {}, {}));
2532 InferenceEngine::InputsDataMap _networkInputs = net_reader.getNetwork().getInputsInfo();
2533 InferenceEngine::OutputsDataMap _networkOutputs = net_reader.getNetwork().getOutputsInfo();
2534 execNetwork->setNetworkInputs(_networkInputs);
2535 execNetwork->setNetworkOutputs(_networkOutputs);
2536 InferenceEngine::IInferRequest::Ptr inferRequest;
2537 execNetwork->CreateInferRequest(inferRequest);
2539 InferenceEngine::TensorDesc desc1(InferenceEngine::Precision::FP32, {1, 3, 20, 20}, InferenceEngine::NCHW);
2540 InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc1);
2542 data = src1->buffer();
2543 for (size_t i = 0; i < 1200; i++) {
2547 InferenceEngine::ResponseDesc resp;
2549 InferenceEngine::StatusCode sts = inferRequest->SetBlob("data", src1, &resp);
2550 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2552 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
2554 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
2556 InferenceEngine::TBlob<float>::Ptr output;
2557 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
2560 sts = inferRequest->SetBlob(item.first.c_str(), output, &resp);
2561 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2563 sts = inferRequest->Infer(&resp);
2564 ASSERT_EQ(InferenceEngine::OK, sts) << resp.msg;
2566 auto *res_ptr = output->buffer().as<float*>();
2567 size_t res_size = output->size();
2569 for (size_t i = 0; i < res_size; i++) {
2570 ASSERT_NEAR(res_ptr[i], 4, 0.01f);
2573 MKLDNNGraphTestClass graph;
2574 graph.CreateGraph(net_reader.getNetwork());
2576 size_t reorders_num = 0;
2577 auto& nodes = graph.getNodes();
2578 for (auto &node : nodes) {
2579 if (node->getType() == MKLDNNPlugin::Reorder) {
2581 ASSERT_EQ(MKLDNNPlugin::Input, node->getParentEdgeAt(0)->getParent()->getType());
2582 ASSERT_EQ(MKLDNNPlugin::Eltwise, node->getChildEdgeAt(0)->getChild()->getType());
2585 ASSERT_EQ(reorders_num, 0);
2587 TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersRmnet_SSSSD) {
2588 std::string model = R"V0G0N(
2589 <?xml version="1.0" ?>
2590 <net batch="1" name="model" version="2">
2592 <layer id="26" name="data" precision="FP32" type="Input">
2602 <layer id="9" name="Mul_115/Fused_Mul_157/FusedScaleShift_204" precision="FP32" type="ScaleShift">
2620 <weights offset="7180" size="12"/>
2621 <biases offset="2528" size="12"/>
2624 <layer id="51" name="init_block1/dim_inc/conv" precision="FP32" type="Convolution">
2625 <data dilation-x="1" dilation-y="1" group="1" kernel-x="3" kernel-y="3" output="32" pad-x="1" pad-y="1" stride="1,1,2,2" stride-x="2" stride-y="2"/>
2643 <weights offset="3596" size="3456"/>
2644 <biases offset="8536" size="128"/>
2647 <layer id="43" name="init_block1/dim_inc/fn" precision="FP32" type="ReLU">
2648 <data engine="caffe.ReLUParameter.DEFAULT" negative_slope="0.0"/>
2666 <layer id="11" name="bottleneck1_1/dim_red/conv" precision="FP32" type="Convolution">
2667 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="8" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
2685 <weights offset="32" size="1024"/>
2686 <biases offset="1472" size="32"/>
2689 <layer id="22" name="bottleneck1_1/dim_red/fn" precision="FP32" type="ELU">
2708 <layer id="34" name="bottleneck1_1/inner/dw1/conv" precision="FP32" type="Convolution">
2709 <data dilation-x="1" dilation-y="1" group="8" kernel-x="3" kernel-y="3" output="8" pad-x="1" pad-y="1" stride="1,1,1,1" stride-x="1" stride-y="1"/>
2727 <weights offset="8248" size="288"/>
2728 <biases offset="3564" size="32"/>
2731 <layer id="39" name="bottleneck1_1/inner/dw1/fn" precision="FP32" type="ELU">
2750 <layer id="18" name="bottleneck1_1/dim_inc/conv" precision="FP32" type="Convolution">
2751 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="32" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
2769 <weights offset="2540" size="1024"/>
2770 <biases offset="7052" size="128"/>
2773 <layer id="32" name="bottleneck1_1/add" precision="FP32" type="Eltwise">
2774 <data coeff="" operation="sum"/>
2798 <layer id="7" name="bottleneck1_1/fn" precision="FP32" type="ELU">
2817 <layer id="29" name="bottleneck1_2/dim_red/conv" precision="FP32" type="Convolution">
2818 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="8" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
2836 <weights offset="7192" size="1024"/>
2837 <biases offset="0" size="32"/>
2840 <layer id="45" name="bottleneck1_2/dim_red/fn" precision="FP32" type="ELU">
2859 <layer id="41" name="bottleneck1_2/inner/dw1/conv" precision="FP32" type="Convolution">
2860 <data dilation-x="1" dilation-y="1" group="8" kernel-x="3" kernel-y="3" output="8" pad-x="1" pad-y="1" stride="1,1,1,1" stride-x="1" stride-y="1"/>
2878 <weights offset="1184" size="288"/>
2879 <biases offset="8216" size="32"/>
2882 <layer id="25" name="bottleneck1_2/inner/dw1/fn" precision="FP32" type="ELU">
2901 <layer id="6" name="bottleneck1_2/dim_inc/conv" precision="FP32" type="Convolution">
2902 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="32" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
2920 <weights offset="1504" size="1024"/>
2921 <biases offset="1056" size="128"/>
2924 <layer id="44" name="bottleneck1_2/add" precision="FP32" type="Eltwise">
2925 <data coeff="" operation="sum"/>
2949 <layer id="49" name="bottleneck1_2/fn" precision="FP32" type="ELU">
2970 <edge from-layer="26" from-port="0" to-layer="9" to-port="0"/>
2971 <edge from-layer="9" from-port="3" to-layer="51" to-port="0"/>
2972 <edge from-layer="51" from-port="3" to-layer="43" to-port="0"/>
2973 <edge from-layer="43" from-port="1" to-layer="11" to-port="0"/>
2974 <edge from-layer="11" from-port="3" to-layer="22" to-port="0"/>
2975 <edge from-layer="22" from-port="1" to-layer="34" to-port="0"/>
2976 <edge from-layer="34" from-port="3" to-layer="39" to-port="0"/>
2977 <edge from-layer="39" from-port="1" to-layer="18" to-port="0"/>
2978 <edge from-layer="43" from-port="1" to-layer="32" to-port="0"/>
2979 <edge from-layer="18" from-port="3" to-layer="32" to-port="1"/>
2980 <edge from-layer="32" from-port="2" to-layer="7" to-port="0"/>
2981 <edge from-layer="7" from-port="1" to-layer="29" to-port="0"/>
2982 <edge from-layer="29" from-port="3" to-layer="45" to-port="0"/>
2983 <edge from-layer="45" from-port="1" to-layer="41" to-port="0"/>
2984 <edge from-layer="41" from-port="3" to-layer="25" to-port="0"/>
2985 <edge from-layer="25" from-port="1" to-layer="6" to-port="0"/>
2986 <edge from-layer="7" from-port="1" to-layer="44" to-port="0"/>
2987 <edge from-layer="6" from-port="3" to-layer="44" to-port="1"/>
2988 <edge from-layer="44" from-port="2" to-layer="49" to-port="0"/>
2992 InferenceEngine::CNNNetReader net_reader;
2993 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
2995 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {8664});
2996 weights->allocate();
2997 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
2998 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
3000 net_reader.SetWeights(weights_ptr);
3002 MKLDNNGraphTestClass graph;
3003 graph.CreateGraph(net_reader.getNetwork());
3005 size_t reorders_num = 0;
3006 auto& nodes = graph.getNodes();
3007 for (auto &node : nodes) {
3008 if (node->getType() == MKLDNNPlugin::Reorder) {
3010 ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
3014 ASSERT_EQ(reorders_num, 1);
3017 TEST_F(MKLDNNGraphStructureTests, TestFailedPartDPN92) {
3018 std::string model = R"V0G0N(
3019 <net name="net" version="2" batch="1">
3021 <layer name="data" type="Input" precision="FP32" id="0">
3031 <layer name="data2" type="Input" precision="FP32" id="1">
3041 <layer id="132" name="dpn8_match_conv" precision="FP32" type="Convolution">
3042 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="96" pad-x="0" pad-y="0" stride="1,1,2,2" stride-x="2" stride-y="2"/>
3060 <weights offset="0" size="24576"/>
3063 <layer id="133" name="dpn8_match_conv_Slice" precision="FP32" type="Slice">
3088 <layer id="145" name="dpn8_conv3" precision="FP32" type="Convolution">
3089 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="72" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
3107 <weights offset="24576" size="9216"/>
3110 <layer id="146" name="dpn8_conv3_Slice" precision="FP32" type="Slice">
3135 <layer id="147" name="dpn8_elewise" precision="FP32" type="Eltwise">
3136 <data coeff="" operation="sum"/>
3160 <layer id="148" name="dpn8_concat" precision="FP32" type="Concat">
3185 <layer id="149" name="dpn9_concat_input" precision="FP32" type="Concat">
3212 <edge from-layer="0" from-port="0" to-layer="145" to-port="0"/>
3213 <edge from-layer="1" from-port="0" to-layer="132" to-port="0"/>
3214 <edge from-layer="145" from-port="2" to-layer="146" to-port="0"/>
3215 <edge from-layer="132" from-port="2" to-layer="133" to-port="0"/>
3216 <edge from-layer="133" from-port="1" to-layer="147" to-port="0"/>
3217 <edge from-layer="146" from-port="1" to-layer="147" to-port="1"/>
3218 <edge from-layer="133" from-port="2" to-layer="148" to-port="0"/>
3219 <edge from-layer="146" from-port="2" to-layer="148" to-port="1"/>
3220 <edge from-layer="148" from-port="2" to-layer="149" to-port="1"/>
3221 <edge from-layer="147" from-port="2" to-layer="149" to-port="0"/>
3225 InferenceEngine::CNNNetReader net_reader;
3226 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
3228 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {33792});
3229 weights->allocate();
3230 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
3232 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
3234 net_reader.SetWeights(weights_ptr);
3236 MKLDNNGraphTestClass graph;
3237 graph.CreateGraph(net_reader.getNetwork());
3239 InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 32, 14, 14}, InferenceEngine::NCHW);
3240 InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc);
3242 fill_data((float *) src1->buffer(), src1->size());
3245 desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {1, 64, 28, 28}, InferenceEngine::NCHW);
3246 InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>(desc);
3248 fill_data((float *) src2->buffer(), src2->size());
3250 InferenceEngine::BlobMap srcs;
3251 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src1));
3252 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data2", src2));
3254 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
3256 InferenceEngine::BlobMap outputBlobs;
3257 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
3259 InferenceEngine::TBlob<float>::Ptr output;
3260 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
3262 outputBlobs[item.first] = output;
3264 graph.Infer(srcs, outputBlobs);
3266 std::vector<float> refDst(output->size());
3267 auto *data = output->buffer().as<float *>();
3268 for (size_t i = 0; i < output->size(); i++) {
3269 refDst[i] = data[output->getTensorDesc().offset(i)];
3272 // Compare for batch2
3273 net_reader.getNetwork().setBatchSize(2);
3274 graph.CreateGraph(net_reader.getNetwork());
3275 desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {2, 32, 14, 14}, InferenceEngine::NCHW);
3277 InferenceEngine::Blob::Ptr src1Batch = InferenceEngine::make_shared_blob<float>(desc);
3278 src1Batch->allocate();
3279 data = src1Batch->buffer().as<float *>();
3280 auto *originData = src1->buffer().as<float *>();
3281 for(size_t b = 0; b < 2; b++) {
3282 for (size_t i = 0; i < src1->size(); i++) {
3283 data[src1Batch->getTensorDesc().offset(b*src1->size() + i)] = originData[src1->getTensorDesc().offset(i)];
3287 desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {2, 64, 28, 28}, InferenceEngine::NCHW);
3289 InferenceEngine::Blob::Ptr src2Batch = InferenceEngine::make_shared_blob<float>(desc);
3290 src2Batch->allocate();
3291 data = src2Batch->buffer().as<float *>();
3292 originData = src2->buffer().as<float *>();
3293 for(size_t b = 0; b < 2; b++) {
3294 for (size_t i = 0; i < src2->size(); i++) {
3295 data[src2Batch->getTensorDesc().offset(b*src2->size() + i)] = originData[src2->getTensorDesc().offset(i)];
3300 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src1Batch));
3301 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data2", src2Batch));
3302 out = net_reader.getNetwork().getOutputsInfo();
3304 outputBlobs.clear();
3305 item = *out.begin();
3306 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
3308 outputBlobs[item.first] = output;
3310 graph.Infer(srcs, outputBlobs);
3311 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
3313 data = dstOut->buffer().as<float *>();
3314 for(size_t b = 0; b < 2; b++) {
3315 for (size_t i = 0; i < refDst.size(); i++) {
3316 data[dstOut->getTensorDesc().offset(b*refDst.size() + i)] = refDst[i];
3320 compare(*output, *dstOut);
3323 TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersForXceptionTopology) {
3324 std::string model = R"V0G0N(
3325 <net batch="1" name="xception" version="2">
3327 <layer id="1" name="input_1" precision="FP32" type="Input">
3337 <layer id="2" name="block1_conv1" precision="FP32" type="Convolution">
3338 <data dilation-x="1" dilation-y="1" group="1" kernel-x="3" kernel-y="3" output="32" pad-x="0" pad-y="0" stride-x="2" stride-y="2"/>
3355 <weights offset="0" size="3456"/>
3356 <biases offset="3456" size="128"/>
3358 <layer id="4" name="block1_conv1_act" precision="FP32" type="ReLU">
3376 <layer id="5" name="block1_conv2" precision="FP32" type="Convolution">
3377 <data dilation-x="1" dilation-y="1" group="1" kernel-x="3" kernel-y="3" output="64" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
3394 <weights offset="3584" size="73728"/>
3395 <biases offset="77312" size="256"/>
3397 <layer id="7" name="block1_conv2_act" precision="FP32" type="ReLU">
3415 <layer id="136" name="block2_sepconv1_depth" precision="FP32" type="Convolution">
3416 <data dilation-x="1" dilation-y="1" group="64" kernel-x="3" kernel-y="3" output="64" pad-x="1" pad-y="1" stride-x="1" stride-y="1"/>
3433 <weights offset="77568" size="2304"/>
3435 <layer id="137" name="block2_sepconv1_point" precision="FP32" type="Convolution">
3436 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="128" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
3453 <weights offset="79872" size="32768"/>
3454 <biases offset="112640" size="512"/>
3456 <layer id="10" name="block2_sepconv2_act" precision="FP32" type="ReLU">
3474 <layer id="138" name="block2_sepconv2_depth" precision="FP32" type="Convolution">
3475 <data dilation-x="1" dilation-y="1" group="128" kernel-x="3" kernel-y="3" output="128" pad-x="1" pad-y="1" stride-x="1" stride-y="1"/>
3492 <weights offset="113152" size="4608"/>
3494 <layer id="139" name="block2_sepconv2_point" precision="FP32" type="Convolution">
3495 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="128" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
3512 <weights offset="117760" size="65536"/>
3513 <biases offset="183296" size="512"/>
3515 <layer id="13" name="conv2d_1" precision="FP32" type="Convolution">
3516 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="128" pad-x="0" pad-y="0" stride-x="2" stride-y="2"/>
3533 <weights offset="183808" size="32768"/>
3534 <biases offset="216576" size="512"/>
3536 <layer id="14" name="block2_pool" precision="FP32" type="Pooling">
3537 <data kernel-x="3" kernel-y="3" pad-x="1" pad-y="1" pool-method="max" stride-x="2" stride-y="2"/>
3555 <layer id="16" name="add_1" precision="FP32" type="Eltwise">
3579 <layer id="17" name="block3_sepconv1_act" precision="FP32" type="ReLU">
3597 <layer id="140" name="block3_sepconv1_depth" precision="FP32" type="Convolution">
3598 <data dilation-x="1" dilation-y="1" group="128" kernel-x="3" kernel-y="3" output="128" pad-x="1" pad-y="1" stride-x="1" stride-y="1"/>
3615 <weights offset="217088" size="4608"/>
3617 <layer id="141" name="block3_sepconv1_point" precision="FP32" type="Convolution">
3618 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="256" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
3635 <weights offset="221696" size="131072"/>
3636 <biases offset="352768" size="1024"/>
3638 <layer id="20" name="block3_sepconv2_act" precision="FP32" type="ReLU">
3656 <layer id="142" name="block3_sepconv2_depth" precision="FP32" type="Convolution">
3657 <data dilation-x="1" dilation-y="1" group="256" kernel-x="3" kernel-y="3" output="256" pad-x="1" pad-y="1" stride-x="1" stride-y="1"/>
3674 <weights offset="353792" size="9216"/>
3676 <layer id="143" name="block3_sepconv2_point" precision="FP32" type="Convolution">
3677 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="256" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
3694 <weights offset="363008" size="262144"/>
3695 <biases offset="625152" size="1024"/>
3697 <layer id="23" name="conv2d_2" precision="FP32" type="Convolution">
3698 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="256" pad-x="0" pad-y="0" stride-x="2" stride-y="2"/>
3715 <weights offset="626176" size="131072"/>
3716 <biases offset="757248" size="1024"/>
3718 <layer id="24" name="block3_pool" precision="FP32" type="Pooling">
3719 <data kernel-x="3" kernel-y="3" pad-x="0" pad-y="0" pool-method="max" stride-x="2" stride-y="2"/>
3737 <layer id="26" name="add_2" precision="FP32" type="Eltwise">
3763 <edge from-layer="1" from-port="1" to-layer="2" to-port="2"/>
3764 <edge from-layer="2" from-port="3" to-layer="4" to-port="6"/>
3765 <edge from-layer="4" from-port="7" to-layer="5" to-port="8"/>
3766 <edge from-layer="5" from-port="9" to-layer="7" to-port="12"/>
3767 <edge from-layer="7" from-port="13" to-layer="13" to-port="15"/>
3768 <edge from-layer="137" from-port="285" to-layer="10" to-port="19"/>
3769 <edge from-layer="139" from-port="289" to-layer="14" to-port="25"/>
3770 <edge from-layer="14" from-port="28" to-layer="16" to-port="29"/>
3771 <edge from-layer="13" from-port="26" to-layer="16" to-port="31"/>
3772 <edge from-layer="16" from-port="32" to-layer="17" to-port="33"/>
3773 <edge from-layer="16" from-port="32" to-layer="23" to-port="34"/>
3774 <edge from-layer="141" from-port="293" to-layer="20" to-port="40"/>
3775 <edge from-layer="143" from-port="297" to-layer="24" to-port="46"/>
3776 <edge from-layer="24" from-port="49" to-layer="26" to-port="50"/>
3777 <edge from-layer="23" from-port="47" to-layer="26" to-port="52"/>
3778 <edge from-layer="7" from-port="13" to-layer="136" to-port="282"/>
3779 <edge from-layer="136" from-port="283" to-layer="137" to-port="284"/>
3780 <edge from-layer="10" from-port="20" to-layer="138" to-port="286"/>
3781 <edge from-layer="138" from-port="287" to-layer="139" to-port="288"/>
3782 <edge from-layer="17" from-port="35" to-layer="140" to-port="290"/>
3783 <edge from-layer="140" from-port="291" to-layer="141" to-port="292"/>
3784 <edge from-layer="20" from-port="41" to-layer="142" to-port="294"/>
3785 <edge from-layer="142" from-port="295" to-layer="143" to-port="296"/>
3792 InferenceEngine::CNNNetReader net_reader;
3793 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
3795 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {758272});
3796 weights->allocate();
3797 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
3798 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
3800 net_reader.SetWeights(weights_ptr);
3802 MKLDNNGraphTestClass graph;
3803 graph.CreateGraph(net_reader.getNetwork());
3805 size_t reorders_num = 0;
3806 auto& nodes = graph.getNodes();
3807 for (auto &node : nodes) {
3808 if (node->getType() == MKLDNNPlugin::Reorder) {
3810 ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
3813 ASSERT_EQ(reorders_num, 1);
3816 TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersForGrayscaleInput) {
3817 std::string model = R"V0G0N(
3818 <net batch="1" name="xception" version="2">
3820 <layer id="1" name="data" precision="FP32" type="Input">
3830 <layer id="2" name="conv1" precision="FP32" type="Convolution">
3831 <data dilation-x="1" dilation-y="1" group="1" kernel-x="3" kernel-y="3" output="32" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
3848 <weights offset="0" size="1152"/>
3849 <biases offset="1152" size="128"/>
3853 <edge from-layer="1" from-port="1" to-layer="2" to-port="2"/>
3860 InferenceEngine::CNNNetReader net_reader;
3861 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
3863 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {1280});
3864 weights->allocate();
3865 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
3866 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
3868 net_reader.SetWeights(weights_ptr);
3870 MKLDNNGraphTestClass graph;
3871 graph.CreateGraph(net_reader.getNetwork());
3873 size_t reorders_num = 0;
3874 auto& nodes = graph.getNodes();
3875 for (auto &node : nodes) {
3876 if (node->getType() == MKLDNNPlugin::Reorder) {
3878 ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
3881 ASSERT_EQ(reorders_num, 1);
3884 TEST_F(MKLDNNGraphStructureTests, TestFailedPartPlateRecognitionBarrier0001) {
3885 std::string model = R"V0G0N(
3886 <net name="net" version="2" batch="1">
3888 <layer name="data" type="Input" precision="FP32" id="0">
3898 <layer id="32" name="conv3_w" precision="FP32" type="Convolution">
3899 <data dilation-x="1" dilation-y="1" group="1" kernel-x="13" kernel-y="1" output="71" pad-x="6" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
3917 <weights offset="0" size="472576"/>
3918 <biases offset="472576" size="284"/>
3921 <layer id="33" name="relu_conv3_w" precision="FP32" type="ReLU">
3922 <data negative_slope="0.0"/>
3940 <layer id="34" name="pattern" precision="FP32" type="FullyConnected">
3941 <data out-size="128"/>
3957 <weights offset="472860" size="3198976"/>
3958 <biases offset="3671836" size="512"/>
3961 <layer id="35" name="reshape" precision="FP32" type="Reshape">
3962 <data axis="0" dim="-1,128,1,1" num_axes="-1"/>
3978 <layer id="36" name="tile" precision="FP32" type="Tile">
3979 <data axis="3" tiles="88"/>
3997 <layer id="37" name="concat" precision="FP32" type="Concat">
4024 <edge from-layer="0" from-port="0" to-layer="32" to-port="0"/>
4025 <edge from-layer="32" from-port="3" to-layer="33" to-port="0"/>
4026 <edge from-layer="33" from-port="1" to-layer="34" to-port="0"/>
4027 <edge from-layer="34" from-port="3" to-layer="35" to-port="0"/>
4028 <edge from-layer="35" from-port="1" to-layer="36" to-port="0"/>
4029 <edge from-layer="33" from-port="1" to-layer="37" to-port="0"/>
4030 <edge from-layer="36" from-port="1" to-layer="37" to-port="1"/>
4034 InferenceEngine::CNNNetReader net_reader;
4035 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
4037 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {3672348});
4038 weights->allocate();
4039 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
4041 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
4043 net_reader.SetWeights(weights_ptr);
4045 MKLDNNGraphTestClass graph;
4046 graph.CreateGraph(net_reader.getNetwork());
4048 InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 128, 1, 88}, InferenceEngine::NCHW);
4049 InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc);
4051 fill_data((float *) src1->buffer(), src1->size());
4053 InferenceEngine::BlobMap srcs;
4054 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src1));
4056 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
4058 InferenceEngine::BlobMap outputBlobs;
4059 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
4061 InferenceEngine::TBlob<float>::Ptr output;
4062 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
4064 outputBlobs[item.first] = output;
4066 graph.Infer(srcs, outputBlobs);
4068 std::vector<float> refDst(output->size());
4069 auto *data = output->buffer().as<float *>();
4070 for (size_t i = 0; i < output->size(); i++) {
4071 refDst[i] = data[output->getTensorDesc().offset(i)];
4074 // Compare for batch2
4075 net_reader.getNetwork().setBatchSize(2);
4076 graph.CreateGraph(net_reader.getNetwork());
4077 desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {2, 128, 1, 88}, InferenceEngine::NCHW);
4079 InferenceEngine::Blob::Ptr src1Batch = InferenceEngine::make_shared_blob<float>(desc);
4080 src1Batch->allocate();
4081 data = src1Batch->buffer().as<float *>();
4082 auto *originData = src1->buffer().as<float *>();
4083 for(size_t b = 0; b < 2; b++) {
4084 for (size_t i = 0; i < src1->size(); i++) {
4085 data[src1Batch->getTensorDesc().offset(b*src1->size() + i)] = originData[src1->getTensorDesc().offset(i)];
4090 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src1Batch));
4091 out = net_reader.getNetwork().getOutputsInfo();
4093 outputBlobs.clear();
4094 item = *out.begin();
4095 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
4097 outputBlobs[item.first] = output;
4099 graph.Infer(srcs, outputBlobs);
4100 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
4102 data = dstOut->buffer().as<float *>();
4103 for(size_t b = 0; b < 2; b++) {
4104 for (size_t i = 0; i < refDst.size(); i++) {
4105 data[dstOut->getTensorDesc().offset(b*refDst.size() + i)] = refDst[i];
4109 compare(*output, *dstOut);
4112 TEST_F(MKLDNNGraphStructureTests, TestFailedVNect0001) {
4113 std::string model = R"V0G0N(
4114 <net name="net" version="2" batch="1">
4116 <layer name="data" type="Input" precision="FP32" id="0">
4126 <layer id="207" name="res5c_branch1a" precision="FP32" type="Deconvolution">
4127 <data dilation-x="1" dilation-y="1" group="1" kernel-x="4" kernel-y="4" output="63" pad-x="1" pad-y="1" stride="1,1,2,2" stride-x="2" stride-y="2"/>
4145 <weights offset="0" size="1032192"/>
4148 <layer id="347" name="res5c_branch1a_sqr" precision="FP32" type="Eltwise">
4149 <data operation="mul"/>
4173 <layer id="236" name="split_res5c_branch1a" precision="FP32" type="Slice">
4206 <edge from-layer="0" from-port="0" to-layer="207" to-port="0"/>
4207 <edge from-layer="207" from-port="2" to-layer="347" to-port="0"/>
4208 <edge from-layer="207" from-port="2" to-layer="347" to-port="1"/>
4209 <edge from-layer="207" from-port="2" to-layer="236" to-port="0"/>
4213 InferenceEngine::CNNNetReader net_reader;
4214 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
4216 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::FP32, InferenceEngine::C, { 1032192 });
4217 weights->allocate();
4218 fill_data((float *)weights->buffer(), weights->size() / sizeof(float));
4220 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
4222 net_reader.SetWeights(weights_ptr);
4224 MKLDNNGraphTestClass graph;
4225 ASSERT_NO_THROW(graph.CreateGraph(net_reader.getNetwork()));
4228 TEST_F(MKLDNNGraphStructureTests, TestFailedVNect0002) {
4229 std::string model = R"V0G0N(
4230 <net batch="1" name="vnect" version="2">
4232 <layer id="1" name="data" precision="FP32" type="Input">
4242 <layer id="32" name="res5c_branch2c" precision="FP32" type="Convolution">
4243 <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="84" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
4261 <weights offset="0" size="43008"/>
4264 <layer id="160" name="slice_heatmaps" precision="FP32" type="Slice">
4303 <edge from-layer="1" from-port="1" to-layer="32" to-port="0"/>
4304 <edge from-layer="32" from-port="2" to-layer="160" to-port="0"/>
4309 InferenceEngine::CNNNetReader net_reader;
4310 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
4312 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::FP32, InferenceEngine::C, { 43008 });
4313 weights->allocate();
4314 fill_data((float *)weights->buffer(), weights->size() / sizeof(float));
4316 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
4318 net_reader.SetWeights(weights_ptr);
4320 MKLDNNGraphTestClass graph;
4321 graph.CreateGraph(net_reader.getNetwork());
4323 size_t outputs_num = 0;
4324 auto& nodes = graph.getNodes();
4325 for (auto &node : nodes) {
4326 if ( node->getType() == MKLDNNPlugin::Output &&
4327 (node->getName() == "out_slice_heatmaps.0" ||
4328 node->getName() == "out_slice_heatmaps.1" ||
4329 node->getName() == "out_slice_heatmaps.2" ||
4330 node->getName() == "out_slice_heatmaps.3" ) ) {
4334 ASSERT_EQ(outputs_num, 4);
4338 TEST_F(MKLDNNGraphStructureTests, TestFailedVNect0003) {
4339 std::string model = R"V0G0N(
4340 <net name="net" version="2" batch="1">
4342 <layer name="data1" type="Input" precision="FP32" id="0">
4352 <layer name="data2" type="Input" precision="FP32" id="1">
4362 <layer name="data3" type="Input" precision="FP32" id="2">
4372 <layer id="86" name="res5c_branch2a_relu" precision="FP32" type="ReLU">
4373 <data engine="caffe.ReLUParameter.DEFAULT" negative_slope="0.0"/>
4391 <layer id="236" name="split_res5c_branch1a" precision="FP32" type="Slice">
4422 <layer id="67" name="res5c_bone_length" precision="FP32" type="Power">
4423 <data power="0.5" scale="1.0" shift="0.0"/>
4441 <layer id="24" name="res5c_branch2a_feat" precision="FP32" type="Concat">
4486 <edge from-layer="0" from-port="0" to-layer="86" to-port="0"/>
4487 <edge from-layer="1" from-port="0" to-layer="236" to-port="0"/>
4488 <edge from-layer="2" from-port="0" to-layer="67" to-port="0"/>
4489 <edge from-layer="86" from-port="1" to-layer="24" to-port="0"/>
4490 <edge from-layer="236" from-port="1" to-layer="24" to-port="1"/>
4491 <edge from-layer="236" from-port="2" to-layer="24" to-port="2"/>
4492 <edge from-layer="236" from-port="3" to-layer="24" to-port="3"/>
4493 <edge from-layer="67" from-port="1" to-layer="24" to-port="4"/>
4497 InferenceEngine::CNNNetReader net_reader;
4498 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
4500 MKLDNNGraphTestClass graph;
4501 ASSERT_NO_THROW(graph.CreateGraph(net_reader.getNetwork()));
4504 TEST_F(MKLDNNGraphStructureTests, TestConvolutionDWConvolutionSumFusing) {
4505 std::string model = R"V0G0N(
4506 <net name="net" version="2" batch="1">
4508 <layer name="data0" type="Input" precision="FP32" id="0">
4518 <layer name="data1" type="Input" precision="FP32" id="1">
4528 <layer name="conv0" type="Convolution" precision="FP32" id="2">
4529 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="48" group="1"/>
4546 <weights offset="0" size="6144"/>
4547 <biases offset="6144" size="192"/>
4549 <layer name="conv1" type="Convolution" precision="FP32" id="3">
4550 <convolution_data stride-x="2" stride-y="2" pad-x="1" pad-y="1" kernel-x="3" kernel-y="3" output="48" group="48"/>
4567 <weights offset="6336" size="1728"/>
4568 <biases offset="7872" size="192"/>
4570 <layer name="eltwise" type="Eltwise" precision="FP32" id="4">
4571 <elementwise_data operation="sum"/>
4595 <layer name="relu" type="ReLU" precision="FP32" id="5">
4613 <layer name="power" type="Power" precision="FP32" id="6">
4614 <power_data power="1" scale="-1" shift="0"/>
4634 <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
4635 <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
4636 <edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
4637 <edge from-layer="1" from-port="0" to-layer="4" to-port="1"/>
4638 <edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
4639 <edge from-layer="5" from-port="1" to-layer="6" to-port="0"/>
4644 InferenceEngine::CNNNetReader net_reader;
4645 net_reader.ReadNetwork(model.data(), model.length());
4647 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {8064});
4648 weights->allocate();
4649 float * data = weights->buffer();
4650 memset((float *) weights->buffer(), 0, weights->size());
4652 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
4654 net_reader.SetWeights(weights_ptr);
4656 MKLDNNGraphTestClass graph;
4657 graph.CreateGraph(net_reader.getNetwork());
4659 InferenceEngine::TensorDesc src0_desc(InferenceEngine::Precision::FP32, {1, 32, 300, 600}, InferenceEngine::NCHW);
4660 InferenceEngine::Blob::Ptr src0 = InferenceEngine::make_shared_blob<float>(src0_desc);
4662 data = src0->buffer().as<float *>();
4663 for (size_t i = 0; i < src0->size(); i++) {
4667 InferenceEngine::TensorDesc src1_desc(InferenceEngine::Precision::FP32, {1, 48, 150, 300}, InferenceEngine::NCHW);
4668 InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(src1_desc);
4670 data = src1->buffer().as<float *>();
4671 for (size_t i = 0; i < src1->size(); i++) {
4675 std::vector<float> refDst(src1->size());
4676 for (size_t i = 0; i < refDst.size(); i++) {
4677 refDst[i] = -1 * data[i];
4680 InferenceEngine::BlobMap srcs;
4681 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data0", src0));
4682 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data1", src1));
4684 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
4686 InferenceEngine::BlobMap outputBlobs;
4687 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
4689 InferenceEngine::TBlob<float>::Ptr output;
4690 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
4692 outputBlobs[item.first] = output;
4694 graph.Infer(srcs, outputBlobs);
4696 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
4698 compare(*output, *dstOut);
4701 TEST_F(MKLDNNGraphStructureTests, TestConstantLayerAsOutput) {
4702 std::string model = R"V0G0N(
4703 <net batch="1" name="ResNet10_SSD" version="2">
4705 <layer id="0" name="data" precision="FP32" type="Input">
4715 <layer id="1" name="Add_" precision="FP32" type="ScaleShift">
4733 <weights offset="0" size="12"/>
4734 <biases offset="12" size="12"/>
4737 <layer id="2" name="Convolution1" precision="FP32" type="Convolution">
4738 <data dilation-x="1" dilation-y="1" group="1" kernel-x="7" kernel-y="7" output="64" pad-x="3" pad-y="3" stride="1,1,2,2" stride-x="2" stride-y="2"/>
4756 <weights offset="24" size="37632"/>
4757 <biases offset="37656" size="256"/>
4760 <layer id="3" name="x32_priorbox" precision="FP32" type="PriorBoxClustered">
4761 <data clip="0" flip="0" height="118.25800323486328,105.21199798583984,141.15499877929688,128.63600158691406,174.2689971923828,176.98300170898438" offset="0.5" step="32.0" variance="0.10000000149011612,0.10000000149011612,0.20000000298023224,0.20000000298023224" width="104.06500244140625,130.3560028076172,136.86500549316406,179.89199829101562,181.1739959716797,248.28199768066406"/>
4786 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
4787 <edge from-layer="1" from-port="3" to-layer="2" to-port="0"/>
4788 <edge from-layer="2" from-port="3" to-layer="3" to-port="0"/>
4789 <edge from-layer="1" from-port="3" to-layer="3" to-port="1"/>
4794 InferenceEngine::CNNNetReader net_reader;
4795 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
4797 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {37912});
4798 weights->allocate();
4799 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
4801 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
4803 net_reader.SetWeights(weights_ptr);
4805 InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
4806 MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
4807 extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
4809 MKLDNNGraphTestClass graph;
4810 graph.CreateGraph(net_reader.getNetwork(), extMgr);
4812 InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 10, 10}, InferenceEngine::NCHW);
4813 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
4815 auto *data = src->buffer().as<float *>();
4816 size_t sizeB1 = src->size() / 2;
4817 fill_data(data, sizeB1);
4818 for (size_t i = 0; i < sizeB1; i++) {
4819 data[sizeB1 + i] = data[i];
4822 InferenceEngine::BlobMap srcs;
4823 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
4825 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
4827 InferenceEngine::BlobMap outputBlobs;
4828 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
4830 InferenceEngine::TBlob<float>::Ptr output;
4831 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
4833 outputBlobs[item.first] = output;
4835 graph.Infer(srcs, outputBlobs);
4837 std::vector<float> refDst = {-3.603f,-4.313f,6.803f,7.513f,-4.918f,-3.661f,8.118f,6.861f,-5.243f,-5.458f,8.443f,8.658f,-7.395f,-4.832f,10.595f,8.032f,
4838 -7.459f,-7.113f,10.659f,10.313f,-10.814f,-7.249f,14.014f,10.449f,-0.403f,-4.313f,10.003f,7.513f,-1.718f,-3.661f,11.318f,6.861f,
4839 -2.043f,-5.458f,11.643f,8.658f,-4.195f,-4.832f,13.795f,8.032f,-4.259f,-7.113f,13.859f,10.313f,-7.614f,-7.249f,17.214f,10.449f,
4840 2.797f,-4.313f,13.203f,7.513f,1.482f,-3.661f,14.518f,6.861f,1.157f,-5.458f,14.843f,8.658f,-0.995f,-4.832f,16.995f,8.032f,
4841 -1.059f,-7.113f,17.059f,10.313f,-4.414f,-7.249f,20.414f,10.449f,5.997f,-4.313f,16.403f,7.513f,4.682f,-3.661f,17.718f,6.861f,
4842 4.357f,-5.458f,18.043f,8.658f,2.205f,-4.832f,20.195f,8.032f,2.141f,-7.113f,20.259f,10.313f,-1.214f,-7.249f,23.614f,10.449f,
4843 9.197f,-4.313f,19.603f,7.513f,7.882f,-3.661f,20.918f,6.861f,7.557f,-5.458f,21.243f,8.658f,5.405f,-4.832f,23.395f,8.032f,5.341f,
4844 -7.113f,23.459f,10.313f,1.986f,-7.249f,26.814f,10.449f,-3.603f,-1.113f,6.803f,10.713f,-4.918f,-0.461f,8.118f,10.061f,-5.243f,-2.258f,
4845 8.443f,11.858f,-7.395f,-1.632f,10.595f,11.232f,-7.459f,-3.913f,10.659f,13.513f,-10.814f,-4.049f,14.014f,13.649f,-0.403f,-1.113f,
4846 10.003f,10.713f,-1.718f,-0.461f,11.318f,10.061f,-2.043f,-2.258f,11.643f,11.858f,-4.195f,-1.632f,13.795f,11.232f,-4.259f,-3.913f,
4847 13.859f,13.513f,-7.614f,-4.049f,17.214f,13.649f,2.797f,-1.113f,13.203f,10.713f,1.482f,-0.461f,14.518f,10.061f,1.157f,-2.258f,14.843f,
4848 11.858f,-0.995f,-1.632f,16.995f,11.232f,-1.059f,-3.913f,17.059f,13.513f,-4.414f,-4.049f,20.414f,13.649f,5.997f,-1.113f,16.403f,10.713f,
4849 4.682f,-0.461f,17.718f,10.061f,4.357f,-2.258f,18.043f,11.858f,2.205f,-1.632f,20.195f,11.232f,2.141f,-3.913f,20.259f,13.513f,-1.214f,
4850 -4.049f,23.614f,13.649f,9.197f,-1.113f,19.603f,10.713f,7.882f,-0.461f,20.918f,10.061f,7.557f,-2.258f,21.243f,11.858f,5.405f,-1.632f,
4851 23.395f,11.232f,5.341f,-3.913f,23.459f,13.513f,1.986f,-4.049f,26.814f,13.649f,-3.603f,2.087f,6.803f,13.913f,-4.918f,2.739f,8.118f,
4852 13.261f,-5.243f,0.942f,8.443f,15.058f,-7.395f,1.568f,10.595f,14.432f,-7.459f,-0.713f,10.659f,16.713f,-10.814f,-0.849f,14.014f,16.849f,
4853 -0.403f,2.087f,10.003f,13.913f,-1.718f,2.739f,11.318f,13.261f,-2.043f,0.942f,11.643f,15.058f,-4.195f,1.568f,13.795f,14.432f,-4.259f,
4854 -0.713f,13.859f,16.713f,-7.614f,-0.849f,17.214f,16.849f,2.797f,2.087f,13.203f,13.913f,1.482f,2.739f,14.518f,13.261f,1.157f,0.942f,14.843f,
4855 15.058f,-0.995f,1.568f,16.995f,14.432f,-1.059f,-0.713f,17.059f,16.713f,-4.414f,-0.849f,20.414f,16.849f,5.997f,2.087f,16.403f,13.913f,
4856 4.682f,2.739f,17.718f,13.261f,4.357f,0.942f,18.043f,15.058f,2.205f,1.568f,20.195f,14.432f,2.141f,-0.713f,20.259f,16.713f,-1.214f,-0.849f,
4857 23.614f,16.849f,9.197f,2.087f,19.603f,13.913f,7.882f,2.739f,20.918f,13.261f,7.557f,0.942f,21.243f,15.058f,5.405f,1.568f,23.395f,14.432f,
4858 5.341f,-0.713f,23.459f,16.713f,1.986f,-0.849f,26.814f,16.849f,-3.603f,5.287f,6.803f,17.113f,-4.918f,5.939f,8.118f,16.461f,-5.243f,4.142f,
4859 8.443f,18.258f,-7.395f,4.768f,10.595f,17.632f,-7.459f,2.487f,10.659f,19.913f,-10.814f,2.351f,14.014f,20.049f,-0.403f,5.287f,10.003f,
4860 17.113f,-1.718f,5.939f,11.318f,16.461f,-2.043f,4.142f,11.643f,18.258f,-4.195f,4.768f,13.795f,17.632f,-4.259f,2.487f,13.859f,19.913f,
4861 -7.614f,2.351f,17.214f,20.049f,2.797f,5.287f,13.203f,17.113f,1.482f,5.939f,14.518f,16.461f,1.157f,4.142f,14.843f,18.258f,-0.995f,4.768f,
4862 16.995f,17.632f,-1.059f,2.487f,17.059f,19.913f,-4.414f,2.351f,20.414f,20.049f,5.997f,5.287f,16.403f,17.113f,4.682f,5.939f,17.718f,16.461f,
4863 4.357f,4.142f,18.043f,18.258f,2.205f,4.768f,20.195f,17.632f,2.141f,2.487f,20.259f,19.913f,-1.214f,2.351f,23.614f,20.049f,9.197f,5.287f,
4864 19.603f,17.113f,7.882f,5.939f,20.918f,16.461f,7.557f,4.142f,21.243f,18.258f,5.405f,4.768f,23.395f,17.632f,5.341f,2.487f,23.459f,19.913f,
4865 1.986f,2.351f,26.814f,20.049f,-3.603f,8.487f,6.803f,20.313f,-4.918f,9.139f,8.118f,19.661f,-5.243f,7.342f,8.443f,21.458f,-7.395f,7.968f,
4866 10.595f,20.832f,-7.459f,5.687f,10.659f,23.113f,-10.814f,5.551f,14.014f,23.249f,-0.403f,8.487f,10.003f,20.313f,-1.718f,9.139f,11.318f,
4867 19.661f,-2.043f,7.342f,11.643f,21.458f,-4.195f,7.968f,13.795f,20.832f,-4.259f,5.687f,13.859f,23.113f,-7.614f,5.551f,17.214f,23.249f,2.797f,
4868 8.487f,13.203f,20.313f,1.482f,9.139f,14.518f,19.661f,1.157f,7.342f,14.843f,21.458f,-0.995f,7.968f,16.995f,20.832f,-1.059f,5.687f,17.059f,
4869 23.113f,-4.414f,5.551f,20.414f,23.249f,5.997f,8.487f,16.403f,20.313f,4.682f,9.139f,17.718f,19.661f,4.357f,7.342f,18.043f,21.458f,2.205f,
4870 7.968f,20.195f,20.832f,2.141f,5.687f,20.259f,23.113f,-1.214f,5.551f,23.614f,23.249f,9.197f,8.487f,19.603f,20.313f,7.882f,9.139f,20.918f,
4871 19.661f,7.557f,7.342f,21.243f,21.458f,5.405f,7.968f,23.395f,20.832f,5.341f,5.687f,23.459f,23.113f,1.986f,5.551f,26.814f,23.249f,0.100f,
4872 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4873 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4874 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4875 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4876 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4877 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4878 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4879 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4880 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4881 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4882 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4883 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4884 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4885 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4886 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4887 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4888 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4889 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4890 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4891 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4892 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4893 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4894 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4895 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4896 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4897 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4898 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4899 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4900 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
4901 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f};
4902 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
4904 compare(*output, *dstOut);
4907 TEST_F(MKLDNNGraphStructureTests, TestGemmConvolutionWithConcat) {
4908 std::string model = R"V0G0N(
4909 <net batch="1" name="squeezenet1.1" version="3">
4911 <layer id="0" name="data" precision="FP32" type="Input">
4921 <layer id="1" name="conv1" precision="FP32" type="Convolution">
4922 <data dilations="1,1" group="1" kernel="3,3" output="64" pads_begin="0,0" pads_end="0,0" strides="2,2"/>
4940 <weights offset="0" size="6912"/>
4941 <biases offset="6912" size="256"/>
4944 <layer id="2" name="relu_conv1" precision="FP32" type="ReLU">
4945 <data negative_slope="0"/>
4963 <layer id="3" name="pool1" precision="FP32" type="Pooling">
4964 <data exclude-pad="false" kernel="3,3" pads_begin="0,0" pads_end="0,0" pool-method="max" rounding_type="ceil" strides="2,2"/>
4982 <layer id="4" name="fire2/squeeze1x1" precision="FP32" type="Convolution">
4983 <data dilation="1,1" group="1" kernel="1,1" output="16" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
5001 <weights offset="7168" size="4096"/>
5002 <biases offset="11264" size="64"/>
5005 <layer id="5" name="fire2/relu_squeeze1x1" precision="FP32" type="ReLU">
5006 <data negative_slope="0"/>
5024 <layer id="6" name="fire2/expand1x1" precision="FP32" type="Convolution">
5025 <data dilation="1,1" group="1" kernel="1,1" output="64" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
5043 <weights offset="11328" size="4096"/>
5044 <biases offset="15424" size="256"/>
5047 <layer id="7" name="fire2/relu_expand1x1" precision="FP32" type="ReLU">
5048 <data negative_slope="0"/>
5066 <layer id="8" name="fire2/expand3x3" precision="FP32" type="Convolution">
5067 <data dilation="1,1" group="1" kernel="3,3" output="64" pads_begin="1,1" pads_end="1,1" strides="1,1"/>
5085 <weights offset="15680" size="36864"/>
5086 <biases offset="52544" size="256"/>
5089 <layer id="9" name="fire2/relu_expand3x3" precision="FP32" type="ReLU">
5090 <data negative_slope="0"/>
5108 <layer id="10" name="fire2/concat" precision="FP32" type="Concat">
5135 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
5136 <edge from-layer="1" from-port="3" to-layer="2" to-port="0"/>
5137 <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
5138 <edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
5139 <edge from-layer="4" from-port="3" to-layer="5" to-port="0"/>
5140 <edge from-layer="5" from-port="1" to-layer="6" to-port="0"/>
5141 <edge from-layer="6" from-port="3" to-layer="7" to-port="0"/>
5142 <edge from-layer="5" from-port="1" to-layer="8" to-port="0"/>
5143 <edge from-layer="8" from-port="3" to-layer="9" to-port="0"/>
5144 <edge from-layer="7" from-port="1" to-layer="10" to-port="0"/>
5145 <edge from-layer="9" from-port="1" to-layer="10" to-port="1"/>
5150 InferenceEngine::CNNNetReader net_reader;
5151 net_reader.ReadNetwork(model.data(), model.length());
5153 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {52800});
5154 weights->allocate();
5155 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
5156 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
5157 net_reader.SetWeights(weights_ptr);
5159 auto graphInfer = [](InferenceEngine::CNNNetwork network, InferenceEngine::BlobMap& inBlobs,
5160 InferenceEngine::BlobMap& outBlobs, std::string primitivesPriority) {
5161 for (auto it = network.begin(); !primitivesPriority.empty() && it !=network.end(); it++) {
5162 (*it)->params["PrimitivesPriority"] = primitivesPriority;
5165 MKLDNNGraphTestClass graph;
5166 graph.CreateGraph(network);
5167 graph.Infer(inBlobs, outBlobs);
5170 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap;
5171 graph.GetPerfData(perfMap);
5173 long long totalTime = 0;
5174 // Print performance counts
5176 std::cout << std::endl << "performance counts:" << std::endl << std::endl;
5177 for (const auto & it : perfMap) {
5178 std::string toPrint(it.first);
5179 const int maxLayerName = 30;
5181 if (it.first.length() >= maxLayerName) {
5182 toPrint = it.first.substr(0, maxLayerName - 4);
5187 std::cout << std::setw(maxLayerName) << std::left << toPrint;
5188 switch (it.second.status) {
5189 case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
5190 std::cout << std::setw(15) << std::left << "EXECUTED";
5192 case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
5193 std::cout << std::setw(15) << std::left << "NOT_RUN";
5195 case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
5196 std::cout << std::setw(15) << std::left << "OPTIMIZED_OUT";
5199 std::cout << std::setw(30) << std::left << "layerType: " + std::string(it.second.layer_type) + " ";
5200 std::cout << std::setw(20) << std::left << "realTime: " + std::to_string(it.second.realTime_uSec);
5201 std::cout << std::setw(20) << std::left << " cpu: " + std::to_string(it.second.cpu_uSec);
5202 std::cout << " execType: " << it.second.exec_type << std::endl;
5203 if (it.second.realTime_uSec > 0) {
5204 totalTime += it.second.realTime_uSec;
5207 std::cout << std::setw(20) << std::left << "Total time: " + std::to_string(totalTime) << " microseconds" << std::endl;
5211 InferenceEngine::InputsDataMap inputsMap = net_reader.getNetwork().getInputsInfo();
5212 InferenceEngine::BlobMap inputBlobs;
5214 for (const auto& input : inputsMap) {
5215 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(input.second->getTensorDesc());
5217 fill_data((float *) src->buffer(), src->size());
5218 inputBlobs[input.first] = src;
5221 InferenceEngine::OutputsDataMap outsMap = net_reader.getNetwork().getOutputsInfo();
5222 InferenceEngine::BlobMap outputBlobs1;
5223 InferenceEngine::BlobMap outputBlobs2;
5224 for (const auto& output : outsMap) {
5225 InferenceEngine::TBlob<float>::Ptr dst1, dst2;
5226 dst1 = InferenceEngine::make_shared_blob<float>(output.second->getTensorDesc());
5228 outputBlobs1[output.first] = dst1;
5229 dst2 = InferenceEngine::make_shared_blob<float>(output.second->getTensorDesc());
5231 outputBlobs2[output.first] = dst2;
5234 graphInfer(net_reader.getNetwork(), inputBlobs, outputBlobs1, "");
5235 graphInfer(net_reader.getNetwork(), inputBlobs, outputBlobs2, "cpu:gemm_blas");
5236 compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
5238 graphInfer(net_reader.getNetwork(), inputBlobs, outputBlobs2, "cpu:gemm_avx512");
5239 compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
5241 graphInfer(net_reader.getNetwork(), inputBlobs, outputBlobs2, "cpu:gemm_avx2");
5242 compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
5244 graphInfer(net_reader.getNetwork(), inputBlobs, outputBlobs2, "cpu:gemm_sse42");
5245 compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
5247 graphInfer(net_reader.getNetwork(), inputBlobs, outputBlobs2, "cpu:gemm_any");
5248 compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
5252 TEST_F(MKLDNNGraphStructureTests, TestRefPoolingWithConcat) {
5253 std::string model = R"V0G0N(
5254 <net batch="1" name="squeezenet1.1" version="3">
5256 <layer id="0" name="data" precision="FP32" type="Input">
5266 <layer id="1" name="conv1" precision="FP32" type="Convolution">
5267 <data dilation="1,1" group="1" kernel="3,3" output="64" pads_begin="0,0" pads_end="0,0" strides="2,2"/>
5285 <weights offset="0" size="6912"/>
5286 <biases offset="6912" size="256"/>
5289 <layer id="2" name="relu_conv1" precision="FP32" type="ReLU">
5290 <data negative_slope="0"/>
5308 <layer id="3" name="pool1" precision="FP32" type="Pooling">
5309 <data exclude-pad="false" kernel="3,3" pads_begin="0,0" pads_end="0,0" pool-method="max" rounding_type="ceil" strides="2,2"/>
5327 <layer id="4" name="fire2/squeeze1x1" precision="FP32" type="Convolution">
5328 <data dilation="1,1" group="1" kernel="1,1" output="16" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
5346 <weights offset="7168" size="4096"/>
5347 <biases offset="11264" size="64"/>
5350 <layer id="5" name="fire2/relu_squeeze1x1" precision="FP32" type="ReLU">
5351 <data negative_slope="0"/>
5369 <layer id="6" name="fire2/expand1x1" precision="FP32" type="Convolution">
5370 <data dilation="1,1" group="1" kernel="1,1" output="64" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
5388 <weights offset="11328" size="4096"/>
5389 <biases offset="15424" size="256"/>
5392 <layer id="7" name="fire2/relu_expand1x1" precision="FP32" type="ReLU">
5393 <data negative_slope="0"/>
5411 <layer id="8" name="fire2/expand3x3" precision="FP32" type="Pooling">
5412 <data exclude-pad="false" kernel="3,3" pads_begin="1,1" pool-method="avg" rounding_type="ceil" stride="1,1"/>
5430 <weights offset="15680" size="36864"/>
5431 <biases offset="52544" size="256"/>
5434 <layer id="10" name="fire2/concat" precision="FP32" type="Concat">
5461 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
5462 <edge from-layer="1" from-port="3" to-layer="2" to-port="0"/>
5463 <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
5464 <edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
5465 <edge from-layer="4" from-port="3" to-layer="5" to-port="0"/>
5466 <edge from-layer="5" from-port="1" to-layer="6" to-port="0"/>
5467 <edge from-layer="6" from-port="3" to-layer="7" to-port="0"/>
5468 <edge from-layer="5" from-port="1" to-layer="8" to-port="0"/>
5469 <edge from-layer="7" from-port="1" to-layer="10" to-port="0"/>
5470 <edge from-layer="8" from-port="3" to-layer="10" to-port="1"/>
5475 InferenceEngine::CNNNetReader net_reader;
5476 net_reader.ReadNetwork(model.data(), model.length());
5478 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {52800});
5479 weights->allocate();
5480 fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
5481 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
5482 net_reader.SetWeights(weights_ptr);
5484 auto graphInfer = [](InferenceEngine::CNNNetwork network, InferenceEngine::BlobMap& inBlobs,
5485 InferenceEngine::BlobMap& outBlobs, std::string primitivesPriority) {
5486 for (auto it = network.begin(); !primitivesPriority.empty() && it !=network.end(); it++) {
5487 (*it)->params["PrimitivesPriority"] = primitivesPriority;
5490 MKLDNNGraphTestClass graph;
5491 graph.CreateGraph(network);
5492 graph.Infer(inBlobs, outBlobs);
5495 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap;
5496 graph.GetPerfData(perfMap);
5498 long long totalTime = 0;
5499 // Print performance counts
5501 std::cout << std::endl << "performance counts:" << std::endl << std::endl;
5502 for (const auto & it : perfMap) {
5503 std::string toPrint(it.first);
5504 const int maxLayerName = 30;
5506 if (it.first.length() >= maxLayerName) {
5507 toPrint = it.first.substr(0, maxLayerName - 4);
5512 std::cout << std::setw(maxLayerName) << std::left << toPrint;
5513 switch (it.second.status) {
5514 case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
5515 std::cout << std::setw(15) << std::left << "EXECUTED";
5517 case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
5518 std::cout << std::setw(15) << std::left << "NOT_RUN";
5520 case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
5521 std::cout << std::setw(15) << std::left << "OPTIMIZED_OUT";
5524 std::cout << std::setw(30) << std::left << "layerType: " + std::string(it.second.layer_type) + " ";
5525 std::cout << std::setw(20) << std::left << "realTime: " + std::to_string(it.second.realTime_uSec);
5526 std::cout << std::setw(20) << std::left << " cpu: " + std::to_string(it.second.cpu_uSec);
5527 std::cout << " execType: " << it.second.exec_type << std::endl;
5528 if (it.second.realTime_uSec > 0) {
5529 totalTime += it.second.realTime_uSec;
5532 std::cout << std::setw(20) << std::left << "Total time: " + std::to_string(totalTime) << " microseconds" << std::endl;
5536 InferenceEngine::InputsDataMap inputsMap = net_reader.getNetwork().getInputsInfo();
5537 InferenceEngine::BlobMap inputBlobs;
5539 for (const auto& input : inputsMap) {
5540 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(input.second->getTensorDesc());
5542 fill_data((float *) src->buffer(), src->size());
5543 inputBlobs[input.first] = src;
5546 InferenceEngine::OutputsDataMap outsMap = net_reader.getNetwork().getOutputsInfo();
5547 InferenceEngine::BlobMap outputBlobs1;
5548 InferenceEngine::BlobMap outputBlobs2;
5549 for (const auto& output : outsMap) {
5550 InferenceEngine::TBlob<float>::Ptr dst1, dst2;
5551 dst1 = InferenceEngine::make_shared_blob<float>(output.second->getTensorDesc());
5553 outputBlobs1[output.first] = dst1;
5554 dst2 = InferenceEngine::make_shared_blob<float>(output.second->getTensorDesc());
5556 outputBlobs2[output.first] = dst2;
5559 graphInfer(net_reader.getNetwork(), inputBlobs, outputBlobs1, "");
5560 graphInfer(net_reader.getNetwork(), inputBlobs, outputBlobs2, "cpu:gemm_blas,cpu:ref_any");
5561 compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
5563 graphInfer(net_reader.getNetwork(), inputBlobs, outputBlobs2, "cpu:ref_any");
5564 compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
5567 TEST_F(MKLDNNGraphStructureTests, TestConvolutionWith2DepthwiseOpFusing) {
5568 std::string model = R"V0G0N(
5569 <net name="net" version="2" batch="1">
5571 <layer name="data" type="Input" precision="FP32" id="0">
5581 <layer name="conv" type="Convolution" precision="FP32" id="1">
5582 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="48" group="1"/>
5599 <weights offset="0" size="6144"/>
5600 <biases offset="6144" size="192"/>
5602 <layer name="depthwise0" type="PReLU" precision="FP32" id="2">
5603 <data channel_shared="1"/>
5620 <weights offset="6336" size="4"/>
5622 <layer name="depthwise1" type="ScaleShift" precision="FP32" id="3">
5639 <weights offset="6340" size="192"/>
5640 <biases offset="6532" size="192"/>
5644 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
5645 <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
5646 <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
5651 InferenceEngine::CNNNetReader net_reader;
5652 net_reader.ReadNetwork(model.data(), model.length());
5654 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {6724});
5655 weights->allocate();
5656 float* wdata = weights->buffer();
5658 for (int i = 0; i < weights->size() / sizeof(float); i++)
5660 wdata[1584] = 2; // 2 for prelu weights
5662 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
5664 net_reader.SetWeights(weights_ptr);
5666 MKLDNNGraphTestClass graph;
5667 graph.CreateGraph(net_reader.getNetwork());
5669 const auto& nodes = graph.getNodes();
5670 ASSERT_EQ(nodes.size(), 5);
5671 ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
5672 ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Reorder);
5673 ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Convolution_Depthwise);
5674 ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Reorder);
5675 ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
5677 InferenceEngine::TensorDesc src_desc(InferenceEngine::Precision::FP32, {1, 32, 300, 600}, InferenceEngine::NCHW);
5678 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(src_desc);
5680 float* sdata = src->buffer().as<float *>();
5681 for (size_t i = 0; i < src->size(); i++) {
5685 std::vector<float> refDst(1 * 48 * 300 * 600);
5686 for (size_t i = 0; i < refDst.size(); i++) {
5687 refDst[i] = -61; // (-32 + 1) * 2 * 1 + 1
5690 InferenceEngine::BlobMap srcs;
5691 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
5693 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
5695 InferenceEngine::BlobMap outputBlobs;
5696 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
5698 InferenceEngine::TBlob<float>::Ptr output;
5699 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
5701 outputBlobs[item.first] = output;
5703 graph.Infer(srcs, outputBlobs);
5705 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
5707 compare(*output, *dstOut);
5710 TEST_F(MKLDNNGraphStructureTests, TestConvolutionWith2EltwiseOpFusing) {
5711 std::string model = R"V0G0N(
5712 <net name="net" version="2" batch="1">
5714 <layer name="data" type="Input" precision="FP32" id="0">
5724 <layer name="conv" type="Convolution" precision="FP32" id="1">
5725 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="48" group="1"/>
5742 <weights offset="0" size="192"/>
5743 <biases offset="192" size="192"/>
5745 <layer name="eltwise0" type="Logistic" precision="FP32" id="2">
5763 <layer name="eltwise1" type="Clamp" precision="FP32" id="3">
5764 <data max="1" min="0.3"/>
5784 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
5785 <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
5786 <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
5791 InferenceEngine::CNNNetReader net_reader;
5792 net_reader.ReadNetwork(model.data(), model.length());
5794 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {384});
5795 weights->allocate();
5796 float* wdata = weights->buffer();
5798 for (int i = 0; i < weights->size() / sizeof(float); i++)
5801 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
5803 net_reader.SetWeights(weights_ptr);
5805 MKLDNNGraphTestClass graph;
5806 graph.CreateGraph(net_reader.getNetwork());
5808 const auto& nodes = graph.getNodes();
5809 ASSERT_EQ(nodes.size(), 4);
5810 ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
5811 ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution_Activation);
5812 ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Reorder);
5813 ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Output);
5815 InferenceEngine::TensorDesc src_desc(InferenceEngine::Precision::FP32, {1, 1, 300, 600}, InferenceEngine::NCHW);
5816 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(src_desc);
5818 float* sdata = src->buffer().as<float *>();
5819 for (size_t i = 0; i < src->size(); i++) {
5820 sdata[i] = i % 2 == 0 ? 2 : -2;
5823 std::vector<float> refDst(1 * 48 * 300 * 600);
5824 for (size_t i = 0; i < refDst.size(); i++) {
5825 refDst[i] = i % 2 == 0 ? 0.952574127f : 0.3f;
5828 InferenceEngine::BlobMap srcs;
5829 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
5831 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
5833 InferenceEngine::BlobMap outputBlobs;
5834 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
5836 InferenceEngine::TBlob<float>::Ptr output;
5837 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
5839 outputBlobs[item.first] = output;
5841 graph.Infer(srcs, outputBlobs);
5843 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
5845 compare(*output, *dstOut);
5848 TEST_F(MKLDNNGraphStructureTests, TestGemmConvolutionWith2DepthwiseOpFusing) {
5849 std::string model = R"V0G0N(
5850 <net name="net" version="2" batch="1">
5852 <layer name="data" type="Input" precision="FP32" id="0">
5862 <layer name="conv" type="Convolution" precision="FP32" id="1">
5863 <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="8" group="2"/>
5880 <weights offset="0" size="128"/>
5881 <biases offset="128" size="32"/>
5883 <layer name="depthwise0" type="PReLU" precision="FP32" id="2">
5884 <data channel_shared="1"/>
5901 <weights offset="160" size="4"/>
5903 <layer name="depthwise1" type="ScaleShift" precision="FP32" id="3">
5920 <weights offset="164" size="32"/>
5921 <biases offset="196" size="32"/>
5925 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
5926 <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
5927 <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
5932 InferenceEngine::CNNNetReader net_reader;
5933 net_reader.ReadNetwork(model.data(), model.length());
5935 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {228});
5936 weights->allocate();
5937 float* wdata = weights->buffer();
5939 for (int i = 0; i < weights->size() / sizeof(float); i++)
5941 wdata[40] = 2; // 2 for prelu weights
5943 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
5945 net_reader.SetWeights(weights_ptr);
5947 MKLDNNGraphTestClass graph;
5948 graph.CreateGraph(net_reader.getNetwork());
5950 const auto& nodes = graph.getNodes();
5951 ASSERT_EQ(nodes.size(), 3);
5952 ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
5953 ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution_Depthwise);
5954 ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Output);
5956 InferenceEngine::TensorDesc src_desc(InferenceEngine::Precision::FP32, {1, 8, 300, 600}, InferenceEngine::NCHW);
5957 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(src_desc);
5959 float* sdata = src->buffer().as<float *>();
5960 for (size_t i = 0; i < src->size(); i++) {
5964 std::vector<float> refDst(1 * 8 * 300 * 600);
5965 for (size_t i = 0; i < refDst.size(); i++) {
5966 refDst[i] = -5; // (-4 + 1) * 2 * 1 + 1
5969 InferenceEngine::BlobMap srcs;
5970 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
5972 InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
5974 InferenceEngine::BlobMap outputBlobs;
5975 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
5977 InferenceEngine::TBlob<float>::Ptr output;
5978 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
5980 outputBlobs[item.first] = output;
5982 graph.Infer(srcs, outputBlobs);
5984 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
5986 compare(*output, *dstOut);
5989 TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithSplit) {
5990 std::string model = R"V0G0N(
5991 <net name="net" version="2" batch="1">
5993 <layer name="data" type="Input" precision="FP32" id="0">
6003 <layer id="71" name="Split" precision="FP32" type="Split">
6030 <edge from-layer="0" from-port="0" to-layer="71" to-port="0"/>
6035 const size_t batchHeight = 8;
6036 const size_t batchWidth = 8;
6037 const InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::FP32, { 1, 2, batchHeight, batchWidth }, InferenceEngine::NCHW);
6038 const size_t batchSize = batchHeight * batchWidth;
6039 const float channel1Value = 1.0;
6040 const float channel2Value = 2.0;
6042 InferenceEngine::Blob::Ptr inputBlob = InferenceEngine::make_shared_blob<float>(tensorDesc);
6043 inputBlob->allocate();
6044 float* inputData = inputBlob->buffer().as<float *>();
6045 for (size_t i = 0; i < inputBlob->size(); i++) {
6046 inputData[i] = (i < batchSize) ? channel1Value : channel2Value;
6049 InferenceEngine::CNNNetReader reader;
6050 reader.ReadNetwork(model.data(), model.size());
6052 InferenceEngine::TBlob<uint8_t>* weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, { 228 });
6053 weights->allocate();
6054 float* weightsData = weights->buffer();
6055 for (size_t i = 0ULL; i < weights->size() / sizeof(float); i++) {
6056 weightsData[i] = 1.0;
6059 const InferenceEngine::TBlob<uint8_t>::Ptr weightsPtr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
6060 reader.SetWeights(weightsPtr);
6062 MKLDNNGraphTestClass graph;
6063 graph.CreateGraph(reader.getNetwork());
6065 const auto& nodes = graph.getNodes();
6066 ASSERT_EQ(nodes.size(), 5);
6067 ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
6068 ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Split);
6069 ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Reorder);
6070 ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Output);
6071 ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
6073 InferenceEngine::OutputsDataMap outputs = reader.getNetwork().getOutputsInfo();
6074 const std::pair<std::string, InferenceEngine::DataPtr> splitOutputItem1 {"Split.0", outputs["Split.0"]};
6075 const std::pair<std::string, InferenceEngine::DataPtr> splitOutputItem2 {"Split.1", outputs["Split.1"]};
6077 std::vector<float> splitExpectedOutputData1(batchSize);
6078 std::vector<float> splitExpectedOutputData2(batchSize);
6079 for (size_t i = 0; i < splitExpectedOutputData1.size(); i++) {
6080 splitExpectedOutputData1[i] = 1.0;
6081 splitExpectedOutputData2[i] = 2.0;
6083 const InferenceEngine::TBlob<float>::Ptr splitExpectedOutputBlob1 = InferenceEngine::make_shared_blob<float>(
6084 splitOutputItem1.second->getTensorDesc(),
6085 splitExpectedOutputData1.data());
6086 const InferenceEngine::TBlob<float>::Ptr splitExpectedOutputBlob2 = InferenceEngine::make_shared_blob<float>(
6087 splitOutputItem2.second->getTensorDesc(),
6088 splitExpectedOutputData2.data());
6090 InferenceEngine::BlobMap outputBlobs;
6093 InferenceEngine::TBlob<float>::Ptr splitOutputBlob1 = InferenceEngine::make_shared_blob<float>(splitOutputItem1.second->getTensorDesc());
6094 splitOutputBlob1->allocate();
6095 outputBlobs[splitOutputItem1.first] = splitOutputBlob1;
6098 InferenceEngine::TBlob<float>::Ptr splitOutputBlob2 = InferenceEngine::make_shared_blob<float>(splitOutputItem2.second->getTensorDesc());
6099 splitOutputBlob2->allocate();
6100 outputBlobs[splitOutputItem2.first] = splitOutputBlob2;
6102 const InferenceEngine::BlobMap inputsBlobMap = { std::pair<std::string, InferenceEngine::Blob::Ptr>("data", inputBlob) };
6103 graph.Infer(inputsBlobMap, outputBlobs);
6105 compare(*splitOutputBlob1, *splitExpectedOutputBlob1);
6106 compare(*splitOutputBlob2, *splitExpectedOutputBlob2);
6109 TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithFakeOutput) {
6110 std::string modelTemplate = R"V0G0N(
6111 <net name="net" version="2" batch="1">
6113 <layer name="data" type="Input" precision="FP32" id="0">
6123 <layer id="71" name="Split" precision="FP32" type="Split">
6148 <layer id="72" name="Reshape" precision="FP32" type="Reshape">
6149 <data axis="0" dim="1,64,64" num_axes="-1"/>
6168 <edge from-layer="0" from-port="0" to-layer="71" to-port="0"/>
6169 <edge from-layer="71" from-port="%d" to-layer="72" to-port="0"/>
6174 const size_t bufferForValues = 1024;
6175 std::vector<char> model(modelTemplate.size() + bufferForValues);
6177 const size_t batchHeight = 8;
6178 const size_t batchWidth = 8;
6179 const InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::FP32, { 1, 2, batchHeight, batchWidth }, InferenceEngine::NCHW);
6180 const size_t batchSize = batchHeight * batchWidth;
6181 const float channel1Value = 1.0;
6182 const float channel2Value = 2.0;
6184 InferenceEngine::Blob::Ptr inputBlob = InferenceEngine::make_shared_blob<float>(tensorDesc);
6185 inputBlob->allocate();
6186 float* inputData = inputBlob->buffer().as<float *>();
6187 for (size_t i = 0; i < inputBlob->size(); i++) {
6188 inputData[i] = (i < batchSize) ? channel1Value : channel2Value;
6191 for (int splitFromPortNumber = 1; splitFromPortNumber <= 2; ++splitFromPortNumber) {
6192 sprintf(model.data(), modelTemplate.c_str(), splitFromPortNumber);
6194 InferenceEngine::CNNNetReader reader;
6195 reader.ReadNetwork(model.data(), model.size());
6197 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, { 228 });
6198 weights->allocate();
6199 float* weightsData = weights->buffer();
6200 for (size_t i = 0ULL; i < weights->size() / sizeof(float); i++) {
6201 weightsData[i] = 1.0;
6204 const InferenceEngine::TBlob<uint8_t>::Ptr weightsPtr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
6205 reader.SetWeights(weightsPtr);
6207 MKLDNNGraphTestClass graph;
6208 graph.CreateGraph(reader.getNetwork());
6210 InferenceEngine::OutputsDataMap outputs = reader.getNetwork().getOutputsInfo();
6211 const std::pair<std::string, InferenceEngine::DataPtr> reshapeOutputItem = std::make_pair("Reshape", outputs["Reshape"]);
6212 const std::string splitOutputName = std::string("Split.") + (splitFromPortNumber == 1 ? "1" : "0");
6213 const std::pair<std::string, InferenceEngine::DataPtr> splitOutputItem = std::make_pair(splitOutputName, outputs[splitOutputName]);
6215 std::vector<float> reshapeExpectedOutputData(batchSize);
6216 std::vector<float> splitExpectedOutputData(batchSize);
6217 for (size_t i = 0; i < reshapeExpectedOutputData.size(); i++) {
6218 reshapeExpectedOutputData[i] = (splitFromPortNumber == 1) ? 1.0 : 2.0;
6219 splitExpectedOutputData[i] = (splitFromPortNumber == 1) ? 2.0 : 1.0;
6221 const InferenceEngine::TBlob<float>::Ptr reshapeExpectedOutputBlob = InferenceEngine::make_shared_blob<float>(
6222 reshapeOutputItem.second->getTensorDesc(),
6223 reshapeExpectedOutputData.data());
6224 const InferenceEngine::TBlob<float>::Ptr splitExpectedOutputBlob = InferenceEngine::make_shared_blob<float>(
6225 splitOutputItem.second->getTensorDesc(),
6226 splitExpectedOutputData.data());
6228 InferenceEngine::BlobMap outputBlobs;
6231 InferenceEngine::TBlob<float>::Ptr reshapeOutputBlob = InferenceEngine::make_shared_blob<float>(reshapeOutputItem.second->getTensorDesc());
6232 reshapeOutputBlob->allocate();
6233 outputBlobs[reshapeOutputItem.first] = reshapeOutputBlob;
6236 InferenceEngine::TBlob<float>::Ptr splitOutputBlob = InferenceEngine::make_shared_blob<float>(splitOutputItem.second->getTensorDesc());
6237 splitOutputBlob->allocate();
6238 outputBlobs[splitOutputItem.first] = splitOutputBlob;
6240 const InferenceEngine::BlobMap inputsBlobMap = { std::pair<std::string, InferenceEngine::Blob::Ptr>("data", inputBlob) };
6241 graph.Infer(inputsBlobMap, outputBlobs);
6243 compare(*reshapeOutputBlob, *reshapeExpectedOutputBlob);
6244 compare(*splitOutputBlob, *splitExpectedOutputBlob);
6248 TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithMultipleData) {
6249 std::string model = R"V0G0N(
6250 <net name="net" version="2" batch="1">
6252 <layer name="data" type="Input" precision="FP32" id="0">
6262 <layer id="1" name="split" precision="FP32" type="Split">
6287 <layer id="2" name="reshape1" precision="FP32" type="Reshape">
6288 <data axis="0" dim="1,64,64" num_axes="-1"/>
6305 <layer id="3" name="reshape2" precision="FP32" type="Reshape">
6306 <data axis="0" dim="1,64,64" num_axes="-1"/>
6323 <layer id="4" name="reshape3" precision="FP32" type="Reshape">
6324 <data axis="0" dim="1,64,64" num_axes="-1"/>
6343 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
6344 <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
6345 <edge from-layer="1" from-port="1" to-layer="3" to-port="0"/>
6346 <edge from-layer="1" from-port="2" to-layer="4" to-port="0"/>
6351 const size_t batchHeight = 8;
6352 const size_t batchWidth = 8;
6353 const InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::FP32, { 1, 2, batchHeight, batchWidth }, InferenceEngine::NCHW);
6354 const size_t batchSize = batchHeight * batchWidth;
6355 const float channel1Value = 1.0;
6356 const float channel2Value = 2.0;
6358 InferenceEngine::Blob::Ptr inputBlob = InferenceEngine::make_shared_blob<float>(tensorDesc);
6359 inputBlob->allocate();
6360 float* inputData = inputBlob->buffer().as<float *>();
6361 for (size_t i = 0; i < inputBlob->size(); i++) {
6362 inputData[i] = (i < batchSize) ? channel1Value : channel2Value;
6366 InferenceEngine::CNNNetReader reader;
6367 reader.ReadNetwork(model.data(), model.size());
6369 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, { 228 });
6370 weights->allocate();
6371 float* weightsData = weights->buffer();
6372 for (size_t i = 0ULL; i < weights->size() / sizeof(float); i++) {
6373 weightsData[i] = 1.0;
6376 const InferenceEngine::TBlob<uint8_t>::Ptr weightsPtr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
6377 reader.SetWeights(weightsPtr);
6379 reader.getNetwork().addOutput("split");
6381 MKLDNNGraphTestClass graph;
6382 graph.CreateGraph(reader.getNetwork());
6384 const auto& nodes = graph.getNodes();
6385 ASSERT_EQ(nodes.size(), 12);
6386 ASSERT_EQ(nodes[0]->getType(), MKLDNNPlugin::Type::Input);
6387 ASSERT_EQ(nodes[1]->getType(), MKLDNNPlugin::Type::Split);
6388 ASSERT_EQ(nodes[2]->getType(), MKLDNNPlugin::Type::Reorder);
6389 ASSERT_EQ(nodes[3]->getType(), MKLDNNPlugin::Type::Reshape);
6390 ASSERT_EQ(nodes[4]->getType(), MKLDNNPlugin::Type::Output);
6391 ASSERT_EQ(nodes[5]->getType(), MKLDNNPlugin::Type::Reorder);
6392 ASSERT_EQ(nodes[6]->getType(), MKLDNNPlugin::Type::Reshape);
6393 ASSERT_EQ(nodes[7]->getType(), MKLDNNPlugin::Type::Output);
6394 ASSERT_EQ(nodes[8]->getType(), MKLDNNPlugin::Type::Reorder);
6395 ASSERT_EQ(nodes[9]->getType(), MKLDNNPlugin::Type::Reshape);
6396 ASSERT_EQ(nodes[10]->getType(), MKLDNNPlugin::Type::Output);
6397 ASSERT_EQ(nodes[11]->getType(), MKLDNNPlugin::Type::Output);
6399 InferenceEngine::OutputsDataMap outputs = reader.getNetwork().getOutputsInfo();
6400 std::vector<std::pair<std::string, InferenceEngine::DataPtr>> outputItems = {
6401 std::make_pair("reshape1", outputs.find("reshape1")->second),
6402 std::make_pair("reshape2", outputs.find("reshape2")->second),
6403 std::make_pair("reshape3", outputs.find("reshape3")->second),
6404 std::make_pair("split.0", outputs.find("split.0")->second)
6407 std::vector<std::vector<float>> expectedOutputData = {
6408 std::vector<float>(batchSize),
6409 std::vector<float>(batchSize),
6410 std::vector<float>(batchSize),
6411 std::vector<float>(batchSize)
6413 for (size_t i = 0; i < batchSize; i++) {
6414 expectedOutputData[0][i] = channel1Value;
6415 expectedOutputData[1][i] = channel1Value;
6416 expectedOutputData[2][i] = channel2Value;
6418 expectedOutputData[3][i] = channel1Value;
6421 std::vector<InferenceEngine::TBlob<float>::Ptr> expectedOutputBlobs(outputs.size());
6422 for (size_t i = 0; i < outputs.size(); i++) {
6423 expectedOutputBlobs[i] = InferenceEngine::make_shared_blob<float>(
6424 outputItems[i].second->getTensorDesc(),
6425 expectedOutputData[i].data());
6428 std::vector<InferenceEngine::TBlob<float>::Ptr> outputBlobs;
6429 outputBlobs.reserve(outputItems.size());
6431 InferenceEngine::BlobMap outputBlobsMap;
6432 for(const std::pair<std::string, InferenceEngine::DataPtr>& item : outputItems) {
6433 InferenceEngine::TBlob<float>::Ptr blob = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
6434 outputBlobs.push_back(blob);
6436 outputBlobsMap[item.first] = blob;
6439 const InferenceEngine::BlobMap inputsBlobMap = { std::pair<std::string, InferenceEngine::Blob::Ptr>("data", inputBlob) };
6440 graph.Infer(inputsBlobMap, outputBlobsMap);
6442 for(size_t i = 0; i < 3; i++) {
6443 compare(*outputBlobs[i], *expectedOutputBlobs[i]);
6447 TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithMultipleData_2) {
6448 std::string model = R"V0G0N(
6449 <net name="net" version="2" batch="1">
6451 <layer name="data" type="Input" precision="FP32" id="0">
6461 <layer id="1" name="split" precision="FP32" type="Split">
6486 <layer id="2" name="power" precision="FP32" type="Power">
6487 <data power="1" scale="-1.0" shift="0.0"/>
6507 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
6508 <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
6512 using namespace InferenceEngine;
6516 const size_t imgSz = H * W;
6517 const float channel1Value = 1.0;
6518 const float channel2Value = 2.0;
6520 const auto weights = std::make_shared<TBlob<uint8_t>>(Precision::U8, Layout::C, SizeVector{0});
6522 InferenceEngine::CNNNetReader reader;
6523 reader.ReadNetwork(model.data(), model.size());
6524 reader.SetWeights(weights);
6526 auto net = reader.getNetwork();
6527 net.addOutput("split", 0);
6529 MKLDNNGraphTestClass graph;
6530 graph.CreateGraph(net);
6532 auto inBlob = make_shared_blob<float>(Precision::FP32, SizeVector{1, 2, H, W});
6533 auto outBlob1 = make_shared_blob<float>(Precision::FP32, SizeVector{1, 1, H, W});
6534 auto outBlob2 = make_shared_blob<float>(Precision::FP32, SizeVector{1, 1, H, W});
6535 auto outBlob3 = make_shared_blob<float>(Precision::FP32, SizeVector{1, 1, H, W});
6538 outBlob1->allocate();
6539 outBlob2->allocate();
6540 outBlob3->allocate();
6542 auto in_ptr = inBlob->buffer().as<float*>();
6543 for (int i = 0; i < imgSz; i++) {
6544 in_ptr[i] = channel1Value;
6545 in_ptr[i + imgSz] = channel2Value;
6548 BlobMap inputBlobMap = { {"data" , inBlob } },
6549 outputBlobMap = { {"split.0", outBlob1},
6550 {"split.1", outBlob2},
6551 {"power" , outBlob3} };
6553 graph.Infer(inputBlobMap, outputBlobMap);
6555 auto out_check = [] ( Blob::Ptr blob, float val) {
6556 auto size = blob->size();
6557 auto ptr = blob->buffer().as<float*>();
6559 for (int i = 0; i < size; i++)
6560 res &= ( std::abs( ptr[i] - val ) < 0.00001f );
6564 EXPECT_TRUE(out_check(outBlob1, 1));
6565 EXPECT_TRUE(out_check(outBlob2, 2));
6566 EXPECT_TRUE(out_check(outBlob3, -1));
6569 TEST_F(MKLDNNGraphStructureTests, TestCreateGraphAllDataToConcat) {
6570 using namespace InferenceEngine;
6571 // Build the network.
6572 Builder::Network netBuilder("");
6574 // First input layer
6575 idx_t inpId = netBuilder.addLayer(InferenceEngine::Builder::InputLayer("input").setPort(InferenceEngine::Port({1, 1, 4, 5})));
6577 std::vector<size_t> weightsSize = {1, 1, 1, 1}; // OIHW
6578 auto weights = make_shared_blob<float>(Precision::FP32, InferenceEngine::Layout::OIHW, weightsSize);
6579 weights->allocate();
6581 std::vector<float> twos(1, 2);
6583 idx_t weightsId = netBuilder.addLayer({}, Builder::ConstLayer("weights").setData(weights));
6585 // Convolution layer
6586 idx_t firstConvId = netBuilder.addLayer({{inpId}, {weightsId}}, Builder::ConvolutionLayer("conv").setKernel({1, 1})
6587 .setStrides({1, 1}).setDilation({1, 1}).setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0}).setGroup(1).setOutDepth(1));
6589 weights = make_shared_blob<float>(Precision::FP32, InferenceEngine::Layout::OIHW, weightsSize);
6590 weights->allocate();
6592 std::vector<float> threes(1, 3);
6593 weights->set(threes);
6595 weightsId = netBuilder.addLayer({}, Builder::ConstLayer("weights").setData(weights));
6596 // Convolution layer
6597 idx_t secondConvId = netBuilder.addLayer({{inpId}, {weightsId}}, Builder::ConvolutionLayer("conv").setKernel({1, 1})
6598 .setStrides({1, 1}).setDilation({1, 1}).setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0}).setGroup(1).setOutDepth(1));
6601 idx_t concatId = netBuilder.addLayer({{inpId}, {firstConvId}, {secondConvId}},
6602 InferenceEngine::Builder::ConcatLayer("concat").setAxis(1).setInputPorts(std::vector<InferenceEngine::Port>(3)));
6605 InferenceEngine::Builder::OutputLayer outLayer("output");
6606 netBuilder.addLayer({concatId}, outLayer);
6608 auto cnn = CNNNetwork(Builder::convertToICNNNetwork(netBuilder.build()));
6611 std::vector<size_t> inpSize = {5, 4, 1, 1};
6612 std::vector<size_t> outSize = {5, 4, 3, 1};
6614 InferenceEngine::BlobMap inputBlobs;
6615 InferenceEngine::BlobMap outputBlobs;
6617 std::vector<float> inpData(4*5, 1);
6618 std::vector<float> outData(3*4*5, 1);
6619 for (int i = 0; i < 4*5; ++i)
6624 inputBlobs["input"] = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, inpSize, &inpData[0]);
6625 outputBlobs["concat"] = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, outSize, &outData[0]);
6628 MKLDNNGraphTestClass graph;
6629 graph.CreateGraph(cnn);
6630 graph.Infer(inputBlobs, outputBlobs);
6632 std::vector<float> refDst = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
6633 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38,
6634 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57};
6636 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(outputBlobs["concat"]->getTensorDesc(), refDst.data());
6638 compare(*outputBlobs["concat"], *dstOut);
6641 TEST_F(MKLDNNGraphStructureTests, TestCreateGraphAllDataFromInputToConcat) {
6642 using namespace InferenceEngine;
6643 // Build the network.
6644 Builder::Network netBuilder("");
6646 // First input layer
6647 idx_t inpId = netBuilder.addLayer(InferenceEngine::Builder::InputLayer("input").setPort(InferenceEngine::Port({1, 1, 4, 5})));
6650 idx_t concatId = netBuilder.addLayer({{inpId}, {inpId}, {inpId}},
6651 InferenceEngine::Builder::ConcatLayer("concat").setAxis(1).setInputPorts(std::vector<InferenceEngine::Port>(3)));
6654 InferenceEngine::Builder::OutputLayer outLayer("output");
6655 netBuilder.addLayer({concatId}, outLayer);
6657 auto cnn = CNNNetwork(Builder::convertToICNNNetwork(netBuilder.build()));
6660 std::vector<size_t> inpSize = {5, 4, 1, 1};
6661 std::vector<size_t> outSize = {5, 4, 3, 1};
6663 InferenceEngine::BlobMap inputBlobs;
6664 InferenceEngine::BlobMap outputBlobs;
6666 std::vector<float> inpData(4*5, 1);
6667 std::vector<float> outData(3*4*5, 1);
6668 for (int i = 0; i < 4*5; ++i)
6673 inputBlobs["input"] = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, inpSize, &inpData[0]);
6674 outputBlobs["concat"] = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, outSize, &outData[0]);
6677 MKLDNNGraphTestClass graph;
6678 graph.CreateGraph(cnn);
6679 graph.Infer(inputBlobs, outputBlobs);
6681 std::vector<float> refDst = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
6682 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
6683 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,};
6685 InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(outputBlobs["concat"]->getTensorDesc(), refDst.data());
6687 compare(*outputBlobs["concat"], *dstOut);
6691 TEST_F(MKLDNNGraphStructureTests, TestCheckIncorrectScaleShift) {
6692 std::string model = R"V0G0N(
6693 <net name="net" version="2" batch="1">
6695 <layer name="data" type="Input" precision="FP32" id="0">
6704 <layer id="1" name="test" precision="FP32" type="ScaleShift">
6720 <weights offset="0" size="64"/>
6721 <biases offset="0" size="64"/>
6726 <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
6730 using namespace InferenceEngine;
6731 const auto weights = std::make_shared<TBlob<uint8_t>>(Precision::U8, Layout::C, SizeVector{64});
6733 InferenceEngine::CNNNetReader reader;
6734 reader.ReadNetwork(model.data(), model.size());
6735 reader.SetWeights(weights);
6737 MKLDNNGraphTestClass graph;
6738 ASSERT_THROW(graph.CreateGraph(reader.getNetwork()), InferenceEngine::details::InferenceEngineException);