From 7972f47ed44e0b0391d99fd89064b950a06047e8 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Fri, 16 Mar 2018 19:27:04 +0300
Subject: [PATCH] Load networks from intermediate representation of Intel's
 Deep learning deployment toolkit.

---
 modules/dnn/include/opencv2/dnn/dnn.hpp |  20 +++++
 modules/dnn/perf/perf_net.cpp           |  51 ++++--------
 modules/dnn/src/dnn.cpp                 | 119 ++++++++++++++++++--------
 modules/dnn/src/op_inf_engine.cpp       | 143 ++++++++++++++++++++++----------
 modules/dnn/src/op_inf_engine.hpp       |  36 +++++++-
 modules/dnn/test/test_backends.cpp      |  54 +++++-------
 modules/dnn/test/test_layers.cpp        |  21 +++++
 7 files changed, 300 insertions(+), 144 deletions(-)

diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index 4812bd0..f1e220c 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -341,6 +341,14 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         CV_WRAP Net();  //!< Default constructor.
         CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore.
 
+        /** @brief Create a network from Intel's Model Optimizer intermediate representation.
+         *  @param[in] xml XML configuration file with network's topology.
+         *  @param[in] bin Binary file with trained weights.
+         *  Networks imported from Intel's Model Optimizer are lauched in Intel's Inference Engine
+         *  backend.
+         */
+        CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin);
+
         /** Returns true if there are no layers in the network. */
         CV_WRAP bool empty() const;
 
@@ -691,11 +699,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
       *                  * `*.pb` (TensorFlow, https://www.tensorflow.org/)
       *                  * `*.t7` | `*.net` (Torch, http://torch.ch/)
       *                  * `*.weights` (Darknet, https://pjreddie.com/darknet/)
+      *                  * `*.bin` (DLDT, https://software.seek.intel.com/deep-learning-deployment)
       * @param[in] config Text file contains network configuration. It could be a
       *                   file with the following extensions:
       *                  * `*.prototxt` (Caffe, http://caffe.berkeleyvision.org/)
       *                  * `*.pbtxt` (TensorFlow, https://www.tensorflow.org/)
       *                  * `*.cfg` (Darknet, https://pjreddie.com/darknet/)
+      *                  * `*.xml` (DLDT, https://software.seek.intel.com/deep-learning-deployment)
       * @param[in] framework Explicit framework name tag to determine a format.
       * @returns Net object.
       *
@@ -710,6 +720,16 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
      *  @warning This function has the same limitations as readNetFromTorch().
      */
     CV_EXPORTS_W Mat readTorchBlob(const String &filename, bool isBinary = true);
+
+    /** @brief Load a network from Intel's Model Optimizer intermediate representation.
+     *  @param[in] xml XML configuration file with network's topology.
+     *  @param[in] bin Binary file with trained weights.
+     *  @returns Net object.
+     *  Networks imported from Intel's Model Optimizer are lauched in Intel's Inference Engine
+     *  backend.
+     */
+    CV_EXPORTS_W Net readNetFromModelOptimizer(const String &xml, const String &bin);
+
     /** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
      *  subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
      *  @param image input image (with 1-, 3- or 4-channels).
diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
index 808bb0d..92719a8 100644
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -30,8 +30,7 @@ public:
     }
 
     void processNet(std::string weights, std::string proto, std::string halide_scheduler,
-                    const Mat& input, const std::string& outputLayer,
-                    const std::string& framework)
+                    const Mat& input, const std::string& outputLayer = "")
     {
         if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL)
         {
@@ -57,21 +56,7 @@ public:
             if (!halide_scheduler.empty())
                 halide_scheduler = findDataFile(std::string("dnn/halide_scheduler_") + (target == DNN_TARGET_OPENCL ? "opencl_" : "") + halide_scheduler, true);
         }
-        if (framework == "caffe")
-        {
-            net = cv::dnn::readNetFromCaffe(proto, weights);
-        }
-        else if (framework == "torch")
-        {
-            net = cv::dnn::readNetFromTorch(weights);
-        }
-        else if (framework == "tensorflow")
-        {
-            net = cv::dnn::readNetFromTensorflow(weights, proto);
-        }
-        else
-            CV_Error(Error::StsNotImplemented, "Unknown framework " + framework);
-
+        net = readNet(proto, weights);
         net.setInput(blobFromImage(input, 1.0, Size(), Scalar(), false));
         net.setPreferableBackend(backend);
         net.setPreferableTarget(target);
@@ -105,25 +90,25 @@ public:
 PERF_TEST_P_(DNNTestNetwork, AlexNet)
 {
     processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
-            "alexnet.yml", Mat(cv::Size(227, 227), CV_32FC3), "prob", "caffe");
+            "alexnet.yml", Mat(cv::Size(227, 227), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, GoogLeNet)
 {
     processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
-            "", Mat(cv::Size(224, 224), CV_32FC3), "prob", "caffe");
+            "", Mat(cv::Size(224, 224), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, ResNet_50)
 {
     processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
-            "resnet_50.yml", Mat(cv::Size(224, 224), CV_32FC3), "prob", "caffe");
+            "resnet_50.yml", Mat(cv::Size(224, 224), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, SqueezeNet_v1_1)
 {
     processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt",
-            "squeezenet_v1_1.yml", Mat(cv::Size(227, 227), CV_32FC3), "prob", "caffe");
+            "squeezenet_v1_1.yml", Mat(cv::Size(227, 227), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, Inception_5h)
@@ -131,35 +116,35 @@ PERF_TEST_P_(DNNTestNetwork, Inception_5h)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE) throw SkipTestException("");
     processNet("dnn/tensorflow_inception_graph.pb", "",
             "inception_5h.yml",
-            Mat(cv::Size(224, 224), CV_32FC3), "softmax2", "tensorflow");
+            Mat(cv::Size(224, 224), CV_32FC3), "softmax2");
 }
 
 PERF_TEST_P_(DNNTestNetwork, ENet)
 {
     if (backend == DNN_BACKEND_INFERENCE_ENGINE) throw SkipTestException("");
     processNet("dnn/Enet-model-best.net", "", "enet.yml",
-            Mat(cv::Size(512, 256), CV_32FC3), "l367_Deconvolution", "torch");
+            Mat(cv::Size(512, 256), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, SSD)
 {
     if (backend == DNN_BACKEND_INFERENCE_ENGINE) throw SkipTestException("");
     processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", "dnn/ssd_vgg16.prototxt", "disabled",
-            Mat(cv::Size(300, 300), CV_32FC3), "detection_out", "caffe");
+            Mat(cv::Size(300, 300), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, OpenFace)
 {
     if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     processNet("dnn/openface_nn4.small2.v1.t7", "", "",
-            Mat(cv::Size(96, 96), CV_32FC3), "", "torch");
+            Mat(cv::Size(96, 96), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
 {
     if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", "",
-            Mat(cv::Size(300, 300), CV_32FC3), "detection_out", "caffe");
+            Mat(cv::Size(300, 300), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_TensorFlow)
@@ -168,28 +153,28 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_TensorFlow)
         backend == DNN_BACKEND_HALIDE)
         throw SkipTestException("");
     processNet("dnn/ssd_mobilenet_v1_coco.pb", "ssd_mobilenet_v1_coco.pbtxt", "",
-            Mat(cv::Size(300, 300), CV_32FC3), "", "tensorflow");
+            Mat(cv::Size(300, 300), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, DenseNet_121)
 {
     if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", "",
-               Mat(cv::Size(224, 224), CV_32FC3), "", "caffe");
+               Mat(cv::Size(224, 224), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_coco)
 {
     if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     processNet("dnn/openpose_pose_coco.caffemodel", "dnn/openpose_pose_coco.prototxt", "",
-               Mat(cv::Size(368, 368), CV_32FC3), "", "caffe");
+               Mat(cv::Size(368, 368), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_mpi)
 {
     if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi.prototxt", "",
-               Mat(cv::Size(368, 368), CV_32FC3), "", "caffe");
+               Mat(cv::Size(368, 368), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
@@ -198,7 +183,7 @@ PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
     // The same .caffemodel but modified .prototxt
     // See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp
     processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi_faster_4_stages.prototxt", "",
-               Mat(cv::Size(368, 368), CV_32FC3), "", "caffe");
+               Mat(cv::Size(368, 368), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
@@ -207,14 +192,14 @@ PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
         backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL)
         throw SkipTestException("");
     processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", "",
-               Mat(cv::Size(300, 300), CV_32FC3), "", "caffe");
+               Mat(cv::Size(300, 300), CV_32FC3));
 }
 
 PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
 {
     if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "ssd_inception_v2_coco_2017_11_17.pbtxt", "",
-            Mat(cv::Size(300, 300), CV_32FC3), "", "tensorflow");
+            Mat(cv::Size(300, 300), CV_32FC3));
 }
 
 const tuple<DNNBackend, DNNTarget> testCases[] = {
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index f782d96..8055cea 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -420,7 +420,6 @@ struct DataLayer : public Layer
         return false;
     }
 
-private:
     std::vector<String> outNames;
 };
 
@@ -700,10 +699,10 @@ struct Net::Impl
         fusion = true;
         preferableBackend = DNN_BACKEND_DEFAULT;
         preferableTarget = DNN_TARGET_CPU;
+        skipInfEngineInit = false;
     }
 
     Ptr<DataLayer> netInputLayer;
-    std::vector<int> netOutputs;
     std::vector<LayerPin> blobsToKeep;
     MapIdToLayerData layers;
     std::map<String, int> layerNameToId;
@@ -711,6 +710,7 @@ struct Net::Impl
     int preferableBackend;
     int preferableTarget;
     String halideConfigFile;
+    bool skipInfEngineInit;
     // Map host data to backend specific wrapper.
     std::map<void*, Ptr<BackendWrapper> > backendWrappers;
 
@@ -857,7 +857,6 @@ struct Net::Impl
             clear();
 
             allocateLayers(blobsToKeep_);
-            computeNetOutputLayers();
             initBackend();
 
             if (!netWasAllocated )
@@ -1019,29 +1018,6 @@ struct Net::Impl
         ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
     }
 
-    void computeNetOutputLayers()
-    {
-        CV_TRACE_FUNCTION();
-
-        netOutputs.clear();
-
-        MapIdToLayerData::iterator it;
-        for (it = layers.begin(); it != layers.end(); it++)
-        {
-            int lid = it->first;
-            LayerData &ld = it->second;
-
-            if (ld.requiredOutputs.size() == 0)
-                netOutputs.push_back(lid);
-        }
-
-        #ifndef NDEBUG
-        std::cout << "\nNet Outputs(" << netOutputs.size() << "):\n";
-        for (size_t i = 0; i < netOutputs.size(); i++)
-            std::cout << layers[netOutputs[i]].name << "\n";
-        #endif
-    }
-
     void initBackend()
     {
         CV_TRACE_FUNCTION();
@@ -1150,14 +1126,42 @@ struct Net::Impl
 
     void initInfEngineBackend()
     {
-        // Build Inference Engine networks from sets of layers that support this
-        // backend. Split a whole model on several Inference Engine networks if
-        // some of layers is not implemented.
         CV_TRACE_FUNCTION();
         CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE, haveInfEngine());
 #ifdef HAVE_INF_ENGINE
         MapIdToLayerData::iterator it;
         Ptr<InfEngineBackendNet> net;
+
+        if (skipInfEngineInit)
+        {
+            Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
+            CV_Assert(!node.empty());
+
+            Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
+            CV_Assert(!ieNode.empty());
+
+            for (it = layers.begin(); it != layers.end(); ++it)
+            {
+                LayerData &ld = it->second;
+
+                for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
+                {
+                    InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
+                    dataPtr->name = ld.id == 0 ? netInputLayer->outNames[i] : ld.name;
+                }
+                ieNode->net->addBlobs(ld.inputBlobsWrappers);
+                ieNode->net->addBlobs(ld.outputBlobsWrappers);
+                ld.skip = true;
+            }
+            layers[lastLayerId].skip = false;
+            ieNode->net->init();
+            return;
+        }
+
+        // Build Inference Engine networks from sets of layers that support this
+        // backend. Split a whole model on several Inference Engine networks if
+        // some of layers is not implemented.
+
         // Set of all input and output blobs wrappers for current network.
         std::map<int, Ptr<BackendWrapper> > netBlobsWrappers;
         for (it = layers.begin(); it != layers.end(); ++it)
@@ -1272,7 +1276,7 @@ struct Net::Impl
 
             if (!ieNode->net->isInitialized())
             {
-                ieNode->net->initEngine();
+                ieNode->net->init();
                 ld.skip = false;
             }
         }
@@ -1383,7 +1387,6 @@ struct Net::Impl
 
         // scan through all the layers. If there is convolution layer followed by the activation layer,
         // we try to embed this activation into the convolution and disable separate execution of the activation
-        std::vector<String> outnames;
         std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
                                       blobsToKeep_.end());
         MapIdToLayerData::iterator it;
@@ -1397,8 +1400,6 @@ struct Net::Impl
                 continue;
             }
             printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
-            if( ld.consumers.size() == 0 )
-                outnames.push_back(ld.layerInstance->name);
 
             // the optimization #1. try to fuse batch norm, scaling and/or activation layers
             // with the current layer if they follow it. Normally, the are fused with the convolution layer,
@@ -1912,6 +1913,46 @@ Net::Net() : impl(new Net::Impl)
 {
 }
 
+Net Net::readFromModelOptimizer(const String& xml, const String& bin)
+{
+    Net cvNet;
+#ifndef HAVE_INF_ENGINE
+    CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
+#else
+    InferenceEngine::CNNNetReader reader;
+    reader.ReadNetwork(xml);
+    reader.ReadWeights(bin);
+
+    InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
+
+    std::vector<String> inputsNames;
+    for (auto& it : ieNet.getInputsInfo())
+    {
+        inputsNames.push_back(it.first);
+    }
+
+    cvNet.setInputsNames(inputsNames);
+
+    Ptr<InfEngineBackendNode> backendNode(new InfEngineBackendNode(0));
+    backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
+    for (auto& it : ieNet.getOutputsInfo())
+    {
+        LayerParams lp;
+        int lid = cvNet.addLayer(it.first, "", lp);
+
+        LayerData& ld = cvNet.impl->layers[lid];
+        ld.layerInstance = Ptr<Layer>(new InfEngineBackendLayer(it.second));
+        ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
+
+        cvNet.connect(0, 0, lid, 0);
+    }
+    cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
+
+    cvNet.impl->skipInfEngineInit = true;
+#endif  // HAVE_INF_ENGINE
+    return cvNet;
+}
+
 Net::~Net()
 {
 }
@@ -2846,10 +2887,22 @@ Net readNet(const String& _model, const String& _config, const String& _framewor
             std::swap(model, config);
         return readNetFromDarknet(config, model);
     }
+    if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
+                               modelExt == "xml" || configExt == "xml")
+    {
+        if (modelExt == "xml" || configExt == "bin")
+            std::swap(model, config);
+        return readNetFromModelOptimizer(config, model);
+    }
     CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
                               model + (config.empty() ? "" : ", " + config));
     return Net();
 }
 
+Net readNetFromModelOptimizer(const String &xml, const String &bin)
+{
+    return Net::readFromModelOptimizer(xml, bin);
+}
+
 CV__DNN_EXPERIMENTAL_NS_END
 }} // namespace
diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp
index 1b4d260..c43b2b3 100644
--- a/modules/dnn/src/op_inf_engine.cpp
+++ b/modules/dnn/src/op_inf_engine.cpp
@@ -102,6 +102,18 @@ void InfEngineBackendWrapper::setHostDirty()
 
 }
 
+InfEngineBackendNet::InfEngineBackendNet()
+{
+}
+
+InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net)
+{
+    inputs = net.getInputsInfo();
+    outputs = net.getOutputsInfo();
+    layers.resize(net.layerCount());  // A hack to execute InfEngineBackendNet::layerCount correctly.
+    initPlugin(net);
+}
+
 void InfEngineBackendNet::Release() noexcept
 {
     layers.clear();
@@ -213,57 +225,62 @@ size_t InfEngineBackendNet::getBatchSize() const noexcept
     return 0;
 }
 
-void InfEngineBackendNet::initEngine()
+void InfEngineBackendNet::init()
 {
-    CV_Assert(!isInitialized(), !layers.empty());
-
-    // Collect all external input blobs.
-    std::map<std::string, InferenceEngine::DataPtr> internalOutputs;
-    for (const auto& l : layers)
+    if (inputs.empty())
     {
-        for (const InferenceEngine::DataWeakPtr& ptr : l->insData)
+        // Collect all external input blobs.
+        inputs.clear();
+        std::map<std::string, InferenceEngine::DataPtr> internalOutputs;
+        for (const auto& l : layers)
         {
-            InferenceEngine::DataPtr inp(ptr);
-            if (internalOutputs.find(inp->name) == internalOutputs.end())
+            for (const InferenceEngine::DataWeakPtr& ptr : l->insData)
             {
-                InferenceEngine::InputInfo::Ptr inpInfo(new InferenceEngine::InputInfo());
-                inpInfo->setInputData(inp);
-                if (inputs.find(inp->name) == inputs.end())
-                    inputs[inp->name] = inpInfo;
+                InferenceEngine::DataPtr inp(ptr);
+                if (internalOutputs.find(inp->name) == internalOutputs.end())
+                {
+                    InferenceEngine::InputInfo::Ptr inpInfo(new InferenceEngine::InputInfo());
+                    inpInfo->setInputData(inp);
+                    if (inputs.find(inp->name) == inputs.end())
+                        inputs[inp->name] = inpInfo;
+                }
+            }
+            for (const InferenceEngine::DataPtr& out : l->outData)
+            {
+                // TODO: Replace to uniquness assertion.
+                if (internalOutputs.find(out->name) == internalOutputs.end())
+                    internalOutputs[out->name] = out;
             }
         }
-        for (const InferenceEngine::DataPtr& out : l->outData)
-        {
-            // TODO: Replace to uniquness assertion.
-            if (internalOutputs.find(out->name) == internalOutputs.end())
-                internalOutputs[out->name] = out;
-        }
+        CV_Assert(!inputs.empty());
     }
-    CV_Assert(!inputs.empty());
 
-    // Add all unconnected blobs to output blobs.
-    InferenceEngine::OutputsDataMap unconnectedOuts;
-    for (const auto& l : layers)
+    if (outputs.empty())
     {
-        // Add all outputs.
-        for (const InferenceEngine::DataPtr& out : l->outData)
+        // Add all unconnected blobs to output blobs.
+        InferenceEngine::OutputsDataMap unconnectedOuts;
+        for (const auto& l : layers)
         {
-            // TODO: Replace to uniquness assertion.
-            if (unconnectedOuts.find(out->name) == unconnectedOuts.end())
-                unconnectedOuts[out->name] = out;
+            // Add all outputs.
+            for (const InferenceEngine::DataPtr& out : l->outData)
+            {
+                // TODO: Replace to uniquness assertion.
+                if (unconnectedOuts.find(out->name) == unconnectedOuts.end())
+                    unconnectedOuts[out->name] = out;
+            }
+            // Remove internally connected outputs.
+            for (const InferenceEngine::DataWeakPtr& inp : l->insData)
+            {
+                unconnectedOuts.erase(InferenceEngine::DataPtr(inp)->name);
+            }
         }
-        // Remove internally connected outputs.
-        for (const InferenceEngine::DataWeakPtr& inp : l->insData)
+        CV_Assert(!unconnectedOuts.empty());
+
+        for (auto it = unconnectedOuts.begin(); it != unconnectedOuts.end(); ++it)
         {
-            unconnectedOuts.erase(InferenceEngine::DataPtr(inp)->name);
+            outputs[it->first] = it->second;
         }
     }
-    CV_Assert(!unconnectedOuts.empty());
-
-    for (auto it = unconnectedOuts.begin(); it != unconnectedOuts.end(); ++it)
-    {
-        outputs[it->first] = it->second;
-    }
 
     // Set up input blobs.
     inpBlobs.clear();
@@ -281,20 +298,27 @@ void InfEngineBackendNet::initEngine()
         outBlobs[it.first] = allBlobs[it.first];
     }
 
+    if (!isInitialized())
+        initPlugin(*this);
+}
+
+void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
+{
+    CV_Assert(!isInitialized());
 #ifdef _WIN32
-    engine = InferenceEngine::InferenceEnginePluginPtr("MKLDNNPlugin.dll");
+    plugin = InferenceEngine::InferenceEnginePluginPtr("MKLDNNPlugin.dll");
 #else
-    engine = InferenceEngine::InferenceEnginePluginPtr("libMKLDNNPlugin.so");
+    plugin = InferenceEngine::InferenceEnginePluginPtr("libMKLDNNPlugin.so");
 #endif  // _WIN32
     InferenceEngine::ResponseDesc resp;
-    InferenceEngine::StatusCode status = engine->LoadNetwork(*this, &resp);
+    InferenceEngine::StatusCode status = plugin->LoadNetwork(net, &resp);
     if (status != InferenceEngine::StatusCode::OK)
         CV_Error(Error::StsAssert, resp.msg);
 }
 
 bool InfEngineBackendNet::isInitialized()
 {
-    return (bool)engine;
+    return (bool)plugin;
 }
 
 void InfEngineBackendNet::addBlobs(const std::vector<Ptr<BackendWrapper> >& ptrs)
@@ -309,7 +333,7 @@ void InfEngineBackendNet::addBlobs(const std::vector<Ptr<BackendWrapper> >& ptrs
 void InfEngineBackendNet::forward()
 {
     InferenceEngine::ResponseDesc resp;
-    InferenceEngine::StatusCode status = engine->Infer(inpBlobs, outBlobs, &resp);
+    InferenceEngine::StatusCode status = plugin->Infer(inpBlobs, outBlobs, &resp);
     if (status != InferenceEngine::StatusCode::OK)
         CV_Error(Error::StsAssert, resp.msg);
 }
@@ -373,6 +397,41 @@ void fuseConvWeights(const std::shared_ptr<InferenceEngine::ConvolutionLayer>& c
         conv->_biases = wrapToInfEngineBlob(b);
 }
 
+InfEngineBackendLayer::InfEngineBackendLayer(const InferenceEngine::DataPtr& output_)
+{
+    output = output_;
+}
+
+bool InfEngineBackendLayer::getMemoryShapes(const std::vector<MatShape> &inputs,
+                                            const int requiredOutputs,
+                                            std::vector<MatShape> &outputs,
+                                            std::vector<MatShape> &internals) const
+{
+    std::vector<size_t> dims = output->dims;
+    std::vector<int> shape(dims.begin(), dims.end());
+    std::reverse(shape.begin(), shape.end());
+    outputs.assign(1, shape);
+    return false;
+}
+
+bool InfEngineBackendLayer::supportBackend(int backendId)
+{
+    return backendId == DNN_BACKEND_DEFAULT ||
+           backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
+}
+
+void InfEngineBackendLayer::forward(std::vector<Mat*> &input, std::vector<Mat> &output,
+                                    std::vector<Mat> &internals)
+{
+    CV_Error(Error::StsError, "Choose Inference Engine as a preferable backend.");
+}
+
+void InfEngineBackendLayer::forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs,
+                                    OutputArrayOfArrays internals)
+{
+    CV_Error(Error::StsInternal, "Choose Inference Engine as a preferable backend.");
+}
+
 #endif  // HAVE_INF_ENGINE
 
 bool haveInfEngine()
diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp
index 72e4d0d..ead02c3 100644
--- a/modules/dnn/src/op_inf_engine.hpp
+++ b/modules/dnn/src/op_inf_engine.hpp
@@ -19,6 +19,10 @@ namespace cv { namespace dnn {
 class InfEngineBackendNet : public InferenceEngine::ICNNNetwork
 {
 public:
+    InfEngineBackendNet();
+
+    InfEngineBackendNet(InferenceEngine::CNNNetwork& net);
+
     virtual void Release() noexcept;
 
     virtual InferenceEngine::Precision getPrecision() noexcept;
@@ -55,7 +59,7 @@ public:
 
     virtual size_t getBatchSize() const noexcept;
 
-    void initEngine();
+    void init();
 
     void addBlobs(const std::vector<Ptr<BackendWrapper> >& wrappers);
 
@@ -70,7 +74,9 @@ private:
     InferenceEngine::BlobMap inpBlobs;
     InferenceEngine::BlobMap outBlobs;
     InferenceEngine::BlobMap allBlobs;
-    InferenceEngine::InferenceEnginePluginPtr engine;
+    InferenceEngine::InferenceEnginePluginPtr plugin;
+
+    void initPlugin(InferenceEngine::ICNNNetwork& net);
 };
 
 class InfEngineBackendNode : public BackendNode
@@ -111,6 +117,32 @@ InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr);
 void fuseConvWeights(const std::shared_ptr<InferenceEngine::ConvolutionLayer>& conv,
                      const Mat& w, const Mat& b = Mat());
 
+// This is a fake class to run networks from Model Optimizer. Objects of that
+// class simulate responses of layers are imported by OpenCV and supported by
+// Inference Engine. The main difference is that they do not perform forward pass.
+class InfEngineBackendLayer : public Layer
+{
+public:
+    InfEngineBackendLayer(const InferenceEngine::DataPtr& output);
+
+    virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
+                                 const int requiredOutputs,
+                                 std::vector<MatShape> &outputs,
+                                 std::vector<MatShape> &internals) const;
+
+    virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output,
+                         std::vector<Mat> &internals);
+
+    virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs,
+                         OutputArrayOfArrays internals);
+
+    virtual bool supportBackend(int backendId);
+
+private:
+    InferenceEngine::DataPtr output;
+};
+
+
 #endif  // HAVE_INF_ENGINE
 
 bool haveInfEngine();
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index b6d5240..db657ee 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -10,19 +10,6 @@
 
 namespace opencv_test { namespace {
 
-static void loadNet(const std::string& weights, const std::string& proto,
-                    const std::string& framework, Net* net)
-{
-    if (framework == "caffe")
-        *net = cv::dnn::readNetFromCaffe(proto, weights);
-    else if (framework == "torch")
-        *net = cv::dnn::readNetFromTorch(weights);
-    else if (framework == "tensorflow")
-        *net = cv::dnn::readNetFromTensorflow(weights, proto);
-    else
-        CV_Error(Error::StsNotImplemented, "Unknown framework " + framework);
-}
-
 class DNNTestNetwork : public TestWithParam <tuple<DNNBackend, DNNTarget> >
 {
 public:
@@ -37,7 +24,7 @@ public:
 
     void processNet(const std::string& weights, const std::string& proto,
                     Size inpSize, const std::string& outputLayer,
-                    const std::string& framework, const std::string& halideScheduler = "",
+                    const std::string& halideScheduler = "",
                     double l1 = 1e-5, double lInf = 1e-4)
     {
         // Create a common input blob.
@@ -45,12 +32,12 @@ public:
         Mat inp(4, blobSize, CV_32FC1);
         randu(inp, 0.0f, 1.0f);
 
-        processNet(weights, proto, inp, outputLayer, framework, halideScheduler, l1, lInf);
+        processNet(weights, proto, inp, outputLayer, halideScheduler, l1, lInf);
     }
 
     void processNet(std::string weights, std::string proto,
                     Mat inp, const std::string& outputLayer,
-                    const std::string& framework, std::string halideScheduler = "",
+                    std::string halideScheduler = "",
                     double l1 = 1e-5, double lInf = 1e-4)
     {
         if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL)
@@ -67,9 +54,8 @@ public:
             proto = findDataFile(proto, false);
 
         // Create two networks - with default backend and target and a tested one.
-        Net netDefault, net;
-        loadNet(weights, proto, framework, &netDefault);
-        loadNet(weights, proto, framework, &net);
+        Net netDefault = readNet(weights, proto);
+        Net net = readNet(weights, proto);
 
         netDefault.setInput(inp);
         Mat outDefault = netDefault.forward(outputLayer).clone();
@@ -115,7 +101,7 @@ public:
 TEST_P(DNNTestNetwork, AlexNet)
 {
     processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
-               Size(227, 227), "prob", "caffe",
+               Size(227, 227), "prob",
                target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_alexnet.yml" :
                                              "dnn/halide_scheduler_alexnet.yml");
 }
@@ -123,7 +109,7 @@ TEST_P(DNNTestNetwork, AlexNet)
 TEST_P(DNNTestNetwork, ResNet_50)
 {
     processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
-               Size(224, 224), "prob", "caffe",
+               Size(224, 224), "prob",
                target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_resnet_50.yml" :
                                              "dnn/halide_scheduler_resnet_50.yml");
 }
@@ -131,7 +117,7 @@ TEST_P(DNNTestNetwork, ResNet_50)
 TEST_P(DNNTestNetwork, SqueezeNet_v1_1)
 {
     processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt",
-               Size(227, 227), "prob", "caffe",
+               Size(227, 227), "prob",
                target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_squeezenet_v1_1.yml" :
                                              "dnn/halide_scheduler_squeezenet_v1_1.yml");
 }
@@ -139,13 +125,13 @@ TEST_P(DNNTestNetwork, SqueezeNet_v1_1)
 TEST_P(DNNTestNetwork, GoogLeNet)
 {
     processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
-               Size(224, 224), "prob", "caffe");
+               Size(224, 224), "prob");
 }
 
 TEST_P(DNNTestNetwork, Inception_5h)
 {
     if (backend == DNN_BACKEND_INFERENCE_ENGINE) throw SkipTestException("");
-    processNet("dnn/tensorflow_inception_graph.pb", "", Size(224, 224), "softmax2", "tensorflow",
+    processNet("dnn/tensorflow_inception_graph.pb", "", Size(224, 224), "softmax2",
                target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_inception_5h.yml" :
                                              "dnn/halide_scheduler_inception_5h.yml");
 }
@@ -153,7 +139,7 @@ TEST_P(DNNTestNetwork, Inception_5h)
 TEST_P(DNNTestNetwork, ENet)
 {
     if (backend == DNN_BACKEND_INFERENCE_ENGINE) throw SkipTestException("");
-    processNet("dnn/Enet-model-best.net", "", Size(512, 512), "l367_Deconvolution", "torch",
+    processNet("dnn/Enet-model-best.net", "", Size(512, 512), "l367_Deconvolution",
                target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_enet.yml" :
                                              "dnn/halide_scheduler_enet.yml",
                2e-5, 0.15);
@@ -166,7 +152,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
     Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
 
     processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt",
-               inp, "detection_out", "caffe");
+               inp, "detection_out");
 }
 
 TEST_P(DNNTestNetwork, MobileNet_SSD_TensorFlow)
@@ -175,7 +161,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_TensorFlow)
     Mat sample = imread(findDataFile("dnn/street.png", false));
     Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
     processNet("dnn/ssd_mobilenet_v1_coco.pb", "dnn/ssd_mobilenet_v1_coco.pbtxt",
-               inp, "detection_out", "tensorflow");
+               inp, "detection_out");
 }
 
 TEST_P(DNNTestNetwork, SSD_VGG16)
@@ -185,21 +171,21 @@ TEST_P(DNNTestNetwork, SSD_VGG16)
         backend == DNN_BACKEND_INFERENCE_ENGINE)
         throw SkipTestException("");
     processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel",
-               "dnn/ssd_vgg16.prototxt", Size(300, 300), "detection_out", "caffe");
+               "dnn/ssd_vgg16.prototxt", Size(300, 300), "detection_out");
 }
 
 TEST_P(DNNTestNetwork, OpenPose_pose_coco)
 {
     if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     processNet("dnn/openpose_pose_coco.caffemodel", "dnn/openpose_pose_coco.prototxt",
-               Size(368, 368), "", "caffe");
+               Size(368, 368), "");
 }
 
 TEST_P(DNNTestNetwork, OpenPose_pose_mpi)
 {
     if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi.prototxt",
-               Size(368, 368), "", "caffe");
+               Size(368, 368), "");
 }
 
 TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
@@ -208,13 +194,13 @@ TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
     // The same .caffemodel but modified .prototxt
     // See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp
     processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi_faster_4_stages.prototxt",
-               Size(368, 368), "", "caffe");
+               Size(368, 368), "");
 }
 
 TEST_P(DNNTestNetwork, OpenFace)
 {
     if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
-    processNet("dnn/openface_nn4.small2.v1.t7", "", Size(96, 96), "", "torch");
+    processNet("dnn/openface_nn4.small2.v1.t7", "", Size(96, 96), "");
 }
 
 TEST_P(DNNTestNetwork, opencv_face_detector)
@@ -223,7 +209,7 @@ TEST_P(DNNTestNetwork, opencv_face_detector)
     Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
     Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
     processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt",
-               inp, "detection_out", "caffe");
+               inp, "detection_out");
 }
 
 TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
@@ -232,7 +218,7 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
     Mat sample = imread(findDataFile("dnn/street.png", false));
     Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
     processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
-               inp, "detection_out", "tensorflow");
+               inp, "detection_out");
 }
 
 const tuple<DNNBackend, DNNTarget> testCases[] = {
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index 703f832..d6423b7 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -865,4 +865,25 @@ TEST(Layer_PriorBox, squares)
     normAssert(out.reshape(1, 4), target);
 }
 
+#ifdef HAVE_INF_ENGINE
+// Using Intel's Model Optimizer generate .xml and .bin files:
+// ./ModelOptimizer -w /path/to/caffemodel -d /path/to/prototxt \
+//                  -p FP32 -i -b ${batch_size} -o /path/to/output/folder
+TEST(Layer_Test_Convolution_DLDT, Accuracy)
+{
+    Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt"));
+    Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
+
+    Mat inp = blobFromNPY(_tf("blob.npy"));
+
+    netDefault.setInput(inp);
+    Mat outDefault = netDefault.forward();
+
+    net.setInput(inp);
+    Mat out = net.forward();
+
+    normAssert(outDefault, out);
+}
+#endif  // HAVE_INF_ENGINE
+
 }} // namespace
-- 
2.7.4