From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Mon, 7 Mar 2022 22:26:20 +0000 (+0000)
Subject: dnn: use inheritance for OpenVINO net impl
X-Git-Tag: accepted/tizen/unified/20230127.161057~1^2~131^2~1
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ca7f964104727c4832ae499b6e57639b696a643a;p=platform%2Fupstream%2Fopencv.git

dnn: use inheritance for OpenVINO net impl
---

diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index 6f03a8c..3382b27 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -52,6 +52,11 @@
 
 namespace cv {
 namespace dnn {
+
+namespace accessor {
+class DnnNetAccessor;  // forward declaration
+}
+
 CV__DNN_INLINE_NS_BEGIN
 //! @addtogroup dnn
 //! @{
@@ -840,8 +845,12 @@ CV__DNN_INLINE_NS_BEGIN
          */
         CV_WRAP int64 getPerfProfile(CV_OUT std::vector<double>& timings);
 
-    private:
+
         struct Impl;
+        inline Impl* getImpl() const { return impl.get(); }
+        inline Impl& getImplRef() const { CV_DbgAssert(impl); return *impl.get(); }
+        friend class accessor::DnnNetAccessor;
+    protected:
         Ptr<Impl> impl;
     };
 
diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp
index b580b9f..f5c3cce 100644
--- a/modules/dnn/src/dnn_common.hpp
+++ b/modules/dnn/src/dnn_common.hpp
@@ -156,6 +156,18 @@ static inline std::string toString(const Mat& blob, const std::string& name = st
 
 
 CV__DNN_INLINE_NS_END
+
+namespace accessor {
+class DnnNetAccessor
+{
+public:
+    static inline Ptr<Net::Impl>& getImplPtrRef(Net& net)
+    {
+        return net.impl;
+    }
+};
+}
+
 }}  // namespace
 
 #endif  // __OPENCV_DNN_COMMON_HPP__
diff --git a/modules/dnn/src/dnn_params.cpp b/modules/dnn/src/dnn_params.cpp
index 48e89c6..86a43db 100644
--- a/modules/dnn/src/dnn_params.cpp
+++ b/modules/dnn/src/dnn_params.cpp
@@ -36,11 +36,7 @@ bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES()
 int getParam_DNN_BACKEND_DEFAULT()
 {
     static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
-#ifdef HAVE_INF_ENGINE
-            (size_t)DNN_BACKEND_INFERENCE_ENGINE
-#else
             (size_t)DNN_BACKEND_OPENCV
-#endif
     );
     return PARAM_DNN_BACKEND_DEFAULT;
 }
diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp
index d2bb2f1..49b0345 100644
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@@ -988,14 +988,6 @@ InferenceEngine::DataPtr ngraphDataOutputNode(
     return w.dataPtr;
 }
 
-void forwardNgraph(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
-                      Ptr<BackendNode>& node, bool isAsync)
-{
-    CV_Assert(!node.empty());
-    Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
-    CV_Assert(!ieNode.empty());
-    ieNode->net->forward(outBlobsWrappers, isAsync);
-}
 
 void InfEngineNgraphNet::reset()
 {
@@ -1192,12 +1184,6 @@ void InfEngineNgraphNet::forward(const std::vector<Ptr<BackendWrapper> >& outBlo
     }
 }
 
-#else
-void forwardNgraph(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
-                   Ptr<BackendNode>& node, bool isAsync)
-{
-    CV_Assert(false && "nGraph is not enabled in this OpenCV build");
-}
 #endif
 
 }}
diff --git a/modules/dnn/src/ie_ngraph.hpp b/modules/dnn/src/ie_ngraph.hpp
index 0d287a2..9ccc182 100644
--- a/modules/dnn/src/ie_ngraph.hpp
+++ b/modules/dnn/src/ie_ngraph.hpp
@@ -158,9 +158,6 @@ private:
 
 #endif  // HAVE_DNN_NGRAPH
 
-void forwardNgraph(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
-                   Ptr<BackendNode>& node, bool isAsync);
-
 }}  // namespace cv::dnn
 
 
diff --git a/modules/dnn/src/layer_internals.hpp b/modules/dnn/src/layer_internals.hpp
index 9ded354..e805ab5 100644
--- a/modules/dnn/src/layer_internals.hpp
+++ b/modules/dnn/src/layer_internals.hpp
@@ -112,6 +112,30 @@ struct LayerData
 
         return layerInstance;
     }
+
+    void resetAllocation()
+    {
+        if (id == 0)
+            return;  // skip "input" layer (assertion in Net::Impl::allocateLayers)
+
+        layerInstance.release();
+        outputBlobs.clear();
+        inputBlobs.clear();
+        internals.clear();
+
+        outputBlobsWrappers.clear();
+        inputBlobsWrappers.clear();
+        internalBlobsWrappers.clear();
+
+        backendNodes.clear();
+
+        skip = false;
+        flag = 0;
+
+#ifdef HAVE_CUDA
+        cudaD2HBackgroundTransfers.clear();
+#endif
+    }
 };
 
 
diff --git a/modules/dnn/src/legacy_backend.cpp b/modules/dnn/src/legacy_backend.cpp
index fa9407a..431c597 100644
--- a/modules/dnn/src/legacy_backend.cpp
+++ b/modules/dnn/src/legacy_backend.cpp
@@ -75,11 +75,7 @@ Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
     }
     else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
     {
-#ifdef HAVE_DNN_NGRAPH
-        return Ptr<BackendWrapper>(new NgraphBackendWrapper(targetId, m));
-#else
-        CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO / Inference Engine + nGraph");
-#endif
+        CV_Assert(0 && "Internal error: DNN_BACKEND_INFERENCE_ENGINE_NGRAPH must be implemented through inheritance");
     }
     else if (backendId == DNN_BACKEND_WEBNN)
     {
diff --git a/modules/dnn/src/net.cpp b/modules/dnn/src/net.cpp
index 33f2274..b3cf811 100644
--- a/modules/dnn/src/net.cpp
+++ b/modules/dnn/src/net.cpp
@@ -120,7 +120,7 @@ Net Net::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtyp
     CV_TRACE_FUNCTION();
     CV_Assert(impl);
     CV_Assert(!empty());
-    return impl->quantize(calibData, inputsDtype, outputsDtype, perChannel);
+    return impl->quantize(*this, calibData, inputsDtype, outputsDtype, perChannel);
 }
 
 // FIXIT drop from inference API
@@ -146,7 +146,7 @@ void Net::setPreferableBackend(int backendId)
     CV_TRACE_FUNCTION();
     CV_TRACE_ARG(backendId);
     CV_Assert(impl);
-    return impl->setPreferableBackend(backendId);
+    return impl->setPreferableBackend(*this, backendId);
 }
 
 void Net::setPreferableTarget(int targetId)
diff --git a/modules/dnn/src/net_impl.cpp b/modules/dnn/src/net_impl.cpp
index 24fb31f..dfab6de 100644
--- a/modules/dnn/src/net_impl.cpp
+++ b/modules/dnn/src/net_impl.cpp
@@ -30,6 +30,12 @@ std::string detail::NetImplBase::getDumpFileNameBase() const
 }
 
 
+Net::Impl::~Impl()
+{
+    // nothing
+}
+
+
 Net::Impl::Impl()
 {
     // allocate fake net input layer
@@ -46,9 +52,8 @@ Net::Impl::Impl()
     netWasQuantized = false;
     fusion = true;
     isAsync = false;
-    preferableBackend = DNN_BACKEND_DEFAULT;
+    preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
     preferableTarget = DNN_TARGET_CPU;
-    skipInfEngineInit = false;
     hasDynamicShapes = false;
 }
 
@@ -86,22 +91,10 @@ void Net::Impl::clear()
 }
 
 
-void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
+void Net::Impl::validateBackendAndTarget()
 {
     CV_TRACE_FUNCTION();
 
-    if (dumpLevel && networkDumpCounter == 0)
-    {
-        dumpNetworkToFile();
-    }
-
-    if (preferableBackend == DNN_BACKEND_DEFAULT)
-        preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
-#ifdef HAVE_INF_ENGINE
-    if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
-        preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  // = getInferenceEngineBackendTypeParam();
-#endif
-
     CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
               preferableTarget == DNN_TARGET_CPU ||
               preferableTarget == DNN_TARGET_OPENCL ||
@@ -109,19 +102,6 @@ void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
     CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
               preferableTarget == DNN_TARGET_CPU ||
               preferableTarget == DNN_TARGET_OPENCL);
-#ifdef HAVE_INF_ENGINE
-    if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-    {
-        CV_Assert(
-              (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) ||
-              preferableTarget == DNN_TARGET_OPENCL ||
-              preferableTarget == DNN_TARGET_OPENCL_FP16 ||
-              preferableTarget == DNN_TARGET_MYRIAD ||
-              preferableTarget == DNN_TARGET_HDDL ||
-              preferableTarget == DNN_TARGET_FPGA
-        );
-    }
-#endif
 #ifdef HAVE_WEBNN
     if (preferableBackend == DNN_BACKEND_WEBNN)
     {
@@ -136,6 +116,20 @@ void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
     CV_Assert(preferableBackend != DNN_BACKEND_TIMVX ||
               preferableTarget == DNN_TARGET_NPU);
 
+    CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && "Inheritance internal error");
+}
+
+void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
+{
+    CV_TRACE_FUNCTION();
+
+    if (dumpLevel && networkDumpCounter == 0)
+    {
+        dumpNetworkToFile();
+    }
+
+    validateBackendAndTarget();
+
     if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
     {
         if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
@@ -813,12 +807,10 @@ void Net::Impl::forwardLayer(LayerData& ld)
             {
                 forwardHalide(ld.outputBlobsWrappers, node);
             }
-#ifdef HAVE_INF_ENGINE
             else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
             {
-                forwardNgraph(ld.outputBlobsWrappers, node, isAsync);
+                CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && "Inheritance internal error");
             }
-#endif
             else if (preferableBackend == DNN_BACKEND_WEBNN)
             {
                 forwardWebnn(ld.outputBlobsWrappers, node, isAsync);
@@ -844,7 +836,7 @@ void Net::Impl::forwardLayer(LayerData& ld)
 #endif
             else
             {
-                CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
+                CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend));
             }
         }
 
@@ -1369,30 +1361,7 @@ Mat Net::Impl::getBlob(String outputName) const
 AsyncArray Net::Impl::getBlobAsync(const LayerPin& pin)
 {
     CV_TRACE_FUNCTION();
-#ifdef HAVE_INF_ENGINE
-    if (!pin.valid())
-        CV_Error(Error::StsObjectNotFound, "Requested blob not found");
-
-    LayerData& ld = layers[pin.lid];
-    if ((size_t)pin.oid >= ld.outputBlobs.size())
-    {
-        CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
-                                              "the #%d was requested",
-                                               ld.name.c_str(), (int)ld.outputBlobs.size(), (int)pin.oid));
-    }
-    if (preferableTarget != DNN_TARGET_CPU)
-    {
-        CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
-        // Transfer data to CPU if it's require.
-        ld.outputBlobsWrappers[pin.oid]->copyToHost();
-    }
-    CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-
-    Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
-    return std::move(wrapper->futureMat);
-#else
     CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required");
-#endif  // HAVE_INF_ENGINE
 }
 
 
diff --git a/modules/dnn/src/net_impl.hpp b/modules/dnn/src/net_impl.hpp
index 5f0563d..269a46b 100644
--- a/modules/dnn/src/net_impl.hpp
+++ b/modules/dnn/src/net_impl.hpp
@@ -38,7 +38,12 @@ struct Net::Impl : public detail::NetImplBase
     typedef std::map<int, LayerShapes> LayersShapesMap;
     typedef std::map<int, LayerData> MapIdToLayerData;
 
+    virtual ~Impl();
     Impl();
+    Impl(const Impl&) = delete;
+
+    // Inheritance support
+    Ptr<Net::Impl> basePtr_;
 
     Ptr<DataLayer> netInputLayer;
     std::vector<LayerPin> blobsToKeep;
@@ -49,7 +54,7 @@ struct Net::Impl : public detail::NetImplBase
     int preferableBackend;
     int preferableTarget;
     String halideConfigFile;
-    bool skipInfEngineInit;
+//    bool skipInfEngineInit;
     bool hasDynamicShapes;
     // Map host data to backend specific wrapper.
     std::map<void*, Ptr<BackendWrapper>> backendWrappers;
@@ -59,19 +64,22 @@ struct Net::Impl : public detail::NetImplBase
     bool netWasAllocated;
     bool netWasQuantized;
     bool fusion;
-    bool isAsync;
+    bool isAsync;  // FIXIT: drop
     std::vector<int64> layersTimings;
 
 
-    bool empty() const;
-    void setPreferableBackend(int backendId);
-    void setPreferableTarget(int targetId);
+    virtual bool empty() const;
+    virtual void setPreferableBackend(Net& net, int backendId);
+    virtual void setPreferableTarget(int targetId);
 
     // FIXIT use inheritance
-    Ptr<BackendWrapper> wrap(Mat& host);
+    virtual Ptr<BackendWrapper> wrap(Mat& host);
+
 
+    virtual void clear();
 
-    void clear();
+
+    virtual void validateBackendAndTarget();
 
     void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>());
 
@@ -118,7 +126,7 @@ struct Net::Impl : public detail::NetImplBase
 
     void setInputsNames(const std::vector<String>& inputBlobNames);
     void setInputShape(const String& inputName, const MatShape& shape);
-    void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean);
+    virtual void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean);
     Mat getParam(int layer, int numParam) const;
     void setParam(int layer, int numParam, const Mat& blob);
     std::vector<Ptr<Layer>> getLayerInputs(int layerId) const;
@@ -130,8 +138,7 @@ struct Net::Impl : public detail::NetImplBase
     int getLayersCount(const String& layerType) const;
 
 
-    // FIXIT use inheritance
-    void initBackend(const std::vector<LayerPin>& blobsToKeep_);
+    virtual void initBackend(const std::vector<LayerPin>& blobsToKeep_);
 
     void setHalideScheduler(const String& scheduler);
 #ifdef HAVE_HALIDE
@@ -139,11 +146,6 @@ struct Net::Impl : public detail::NetImplBase
     void initHalideBackend();
 #endif
 
-#ifdef HAVE_DNN_NGRAPH
-    void addNgraphOutputs(LayerData& ld);
-    void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_);
-#endif
-
 #ifdef HAVE_WEBNN
     void addWebnnOutputs(LayerData& ld);
     void initWebnnBackend(const std::vector<LayerPin>& blobsToKeep_);
@@ -183,11 +185,11 @@ struct Net::Impl : public detail::NetImplBase
     // TODO add getter
     void enableFusion(bool fusion_);
 
-    void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);
+    virtual void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);
 
     void allocateLayers(const std::vector<LayerPin>& blobsToKeep_);
 
-    void forwardLayer(LayerData& ld);
+    virtual void forwardLayer(LayerData& ld);
 
     void forwardToLayer(LayerData& ld, bool clearFlags = true);
 
@@ -243,22 +245,17 @@ struct Net::Impl : public detail::NetImplBase
     Mat getBlob(String outputName) const;
 
 #ifdef CV_CXX11
-    AsyncArray getBlobAsync(const LayerPin& pin);
+    virtual AsyncArray getBlobAsync(const LayerPin& pin);
 
     AsyncArray getBlobAsync(String outputName);
 #endif  // CV_CXX11
 
-#ifdef HAVE_INF_ENGINE
-    static
-    Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
-#endif
-
     string dump(bool forceAllocation = false) const;
 
     void dumpNetworkToFile() const;
 
     // FIXIT drop from inference API
-    Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel) /*const*/;
+    Net quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel) /*const*/;
     void getInputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/;
     void getOutputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/;
 
diff --git a/modules/dnn/src/net_impl_backend.cpp b/modules/dnn/src/net_impl_backend.cpp
index e26126d..c9c61eb 100644
--- a/modules/dnn/src/net_impl_backend.cpp
+++ b/modules/dnn/src/net_impl_backend.cpp
@@ -109,11 +109,7 @@ void Net::Impl::initBackend(const std::vector<LayerPin>& blobsToKeep_)
     }
     else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
     {
-#ifdef HAVE_DNN_NGRAPH
-        initNgraphBackend(blobsToKeep_);
-#else
-        CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO");
-#endif
+        CV_Assert(0 && "Inheritance must be used with OpenVINO backend");
     }
     else if (preferableBackend == DNN_BACKEND_WEBNN)
     {
@@ -154,26 +150,30 @@ void Net::Impl::initBackend(const std::vector<LayerPin>& blobsToKeep_)
 }
 
 
-void Net::Impl::setPreferableBackend(int backendId)
+void Net::Impl::setPreferableBackend(Net& net, int backendId)
 {
     if (backendId == DNN_BACKEND_DEFAULT)
         backendId = (Backend)getParam_DNN_BACKEND_DEFAULT();
 
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
+        backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  // = getInferenceEngineBackendTypeParam();
+
     if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX)
     {
         CV_LOG_WARNING(NULL, "DNN: Only default and TIMVX backends support quantized networks");
         backendId = DNN_BACKEND_OPENCV;
     }
 
-#ifdef HAVE_INF_ENGINE
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
-        backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
-#endif
-
     if (preferableBackend != backendId)
     {
         preferableBackend = backendId;
         clear();
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        {
+            switchToOpenVINOBackend(net);
+        }
+#endif
     }
 }
 
diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp
index a546b02..77186f0 100644
--- a/modules/dnn/src/net_openvino.cpp
+++ b/modules/dnn/src/net_openvino.cpp
@@ -17,11 +17,205 @@ CV__DNN_INLINE_NS_BEGIN
 
 #ifdef HAVE_INF_ENGINE
 
+// TODO: use "string" target specifier
+class NetImplOpenVINO CV_FINAL : public Net::Impl
+{
+public:
+    typedef Net::Impl Base;
+
+    // this default constructor is used with OpenVINO native loader
+    // TODO: dedicated Impl?
+    NetImplOpenVINO()
+        : Net::Impl()
+    {
+        preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+    }
+
+    // constructor to derive execution implementation from the loaded network
+    explicit NetImplOpenVINO(const Ptr<Net::Impl>& basePtr)
+        : Net::Impl()
+    {
+        basePtr_ = basePtr;
+        init();
+    }
+
+    void init()
+    {
+        CV_TRACE_FUNCTION();
+        CV_Assert(basePtr_);
+        Net::Impl& base = *basePtr_;
+        CV_Assert(!base.netWasAllocated);
+        CV_Assert(!base.netWasQuantized);
+        netInputLayer = base.netInputLayer;
+        blobsToKeep = base.blobsToKeep;
+        layers = base.layers;
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
+        {
+            LayerData& ld = it->second;
+            ld.resetAllocation();
+        }
+        layerNameToId = base.layerNameToId;
+        outputNameToId = base.outputNameToId;
+        //blobManager = base.blobManager;
+        preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  //base.preferableBackend;
+        preferableTarget = base.preferableTarget;
+        hasDynamicShapes = base.hasDynamicShapes;
+        CV_Assert(base.backendWrappers.empty());  //backendWrappers = base.backendWrappers;
+        lastLayerId = base.lastLayerId;
+        netWasAllocated = base.netWasAllocated;
+        netWasQuantized = base.netWasQuantized;
+        fusion = base.fusion;
+    }
+
+
+    //bool isAsync;  // FIXIT: drop
+
+
+    bool empty() const override
+    {
+        return Base::empty();
+    }
+    void setPreferableBackend(Net& net, int backendId) override
+    {
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return;  // no-op
+        if (!basePtr_)
+            CV_Error(Error::StsError, "DNN: Can't switch backend of network created by OpenVINO");
+        Ptr<Net::Impl>& impl_ptr_ref = accessor::DnnNetAccessor::getImplPtrRef(net);
+        impl_ptr_ref = basePtr_;
+        return basePtr_->setPreferableBackend(net, backendId);
+    }
+    void setPreferableTarget(int targetId) override
+    {
+        if (preferableTarget != targetId)
+        {
+            preferableTarget = targetId;
+            clear();
+        }
+    }
+
+    Ptr<BackendWrapper> wrap(Mat& host) override
+    {
+        return Ptr<BackendWrapper>(new NgraphBackendWrapper(preferableTarget, host));
+    }
+
+
+    void clear() override
+    {
+        Base::clear();
+    }
+
+    void validateBackendAndTarget() override
+    {
+        CV_TRACE_FUNCTION();
+
+        CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
+        CV_Check((int)preferableTarget,
+              preferableTarget == DNN_TARGET_CPU ||
+              preferableTarget == DNN_TARGET_OPENCL ||
+              preferableTarget == DNN_TARGET_OPENCL_FP16 ||
+              preferableTarget == DNN_TARGET_MYRIAD ||
+              preferableTarget == DNN_TARGET_HDDL ||
+              preferableTarget == DNN_TARGET_FPGA,
+              "Unknown OpenVINO target"
+        );
+    }
+
+    //void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>()) override;
+
+
+    //void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) override;
+
+    void addNgraphOutputs(LayerData& ld);
+
+    void initBackend(const std::vector<LayerPin>& blobsToKeep_) override;
+
+    void fuseLayers(const std::vector<LayerPin>& blobsToKeep_) override;
+
+    //void allocateLayers(const std::vector<LayerPin>& blobsToKeep_) override;
+
+    void forwardLayer(LayerData& ld) override;
+
+    AsyncArray getBlobAsync(const LayerPin& pin) override;
+
+    //string dump(bool forceAllocation = false) const override;
+
+    static
+    Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
+
+};  // NetImplOpenVINO
+
+
+void NetImplOpenVINO::forwardLayer(LayerData& ld)
+{
+    CV_TRACE_FUNCTION();
+
+    Ptr<Layer> layer = ld.layerInstance;
+
+    if (!ld.skip)
+    {
+        auto it = ld.backendNodes.find(preferableBackend);
+        if (ld.id == 0 ||  // input layer
+            it == ld.backendNodes.end()  // non-supported layer or its mode
+        )
+        {
+            return Base::forwardLayer(ld);
+        }
+
+        CV_Assert(it != ld.backendNodes.end());
+        const Ptr<BackendNode>& node = it->second;
+        CV_Assert(!node.empty());
+        Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
+        CV_Assert(!ieNode.empty());
+        CV_Assert(ieNode->net);
+
+        TickMeter tm;
+        tm.start();
+
+        ieNode->net->forward(ld.outputBlobsWrappers, isAsync);
+
+        tm.stop();
+        int64 t = tm.getTimeTicks();
+        layersTimings[ld.id] = (t > 0) ? t : t + 1;  // zero for skipped layers only
+    }
+    else
+    {
+        layersTimings[ld.id] = 0;
+    }
+
+    ld.flag = 1;
+}
+
+AsyncArray NetImplOpenVINO::getBlobAsync(const LayerPin& pin)
+{
+    CV_TRACE_FUNCTION();
+    if (!pin.valid())
+        CV_Error(Error::StsObjectNotFound, "Requested blob not found");
+
+    LayerData& ld = layers[pin.lid];
+    if ((size_t)pin.oid >= ld.outputBlobs.size())
+    {
+        CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
+                                              "the #%d was requested",
+                                               ld.name.c_str(), (int)ld.outputBlobs.size(), (int)pin.oid));
+    }
+    if (preferableTarget != DNN_TARGET_CPU)
+    {
+        CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
+        // Transfer data to CPU if it's require.
+        ld.outputBlobsWrappers[pin.oid]->copyToHost();
+    }
+    CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
+
+    Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
+    return std::move(wrapper->futureMat);
+}
+
 
 /** mark input pins as outputs from other subnetworks
  * FIXIT must be done by DNN engine not ngraph.
  */
-void Net::Impl::addNgraphOutputs(LayerData& ld)
+void NetImplOpenVINO::addNgraphOutputs(LayerData& ld)
 {
     CV_TRACE_FUNCTION();
 
@@ -59,7 +253,7 @@ void Net::Impl::addNgraphOutputs(LayerData& ld)
     }
 }
 
-void Net::Impl::initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
+void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
 {
     CV_TRACE_FUNCTION();
     CV_CheckEQ(preferableBackend, DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, "");
@@ -92,7 +286,7 @@ void Net::Impl::initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
         }
     }
 
-    if (skipInfEngineInit)
+    if (!basePtr_)  // model is loaded by OpenVINO
     {
         Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
         CV_Assert(!node.empty());
@@ -399,10 +593,104 @@ void Net::Impl::initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
     }
 }
 
-//}  // Net::Impl
+
+#if 0
+#define printf_(args) printf args
+#else
+#define printf_(args)
+#endif
+
+void NetImplOpenVINO::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
+{
+    CV_TRACE_FUNCTION();
+
+    if(!fusion)
+       return;
+
+    CV_Check((int)preferableBackend, preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, "");
+
+#if 0  // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes
+    return;
+#endif
+
+    // scan through all the layers. If there is convolution layer followed by the activation layer,
+    // we try to embed this activation into the convolution and disable separate execution of the activation
+
+    // FIXIT replace by layersToKeep to avoid hacks like "LayerPin(lid, 0)"
+    std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
+                                  blobsToKeep_.end());
+    for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
+    {
+        int lid = it->first;
+        LayerData& ld = layers[lid];
+        if (ld.skip)
+        {
+            printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
+            continue;
+        }
+        printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
+
+        // the optimization #1. try to fuse batch norm, scaling and/or activation layers
+        // with the current layer if they follow it. Normally, the are fused with the convolution layer,
+        // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
+        // some other layers.
+        Ptr<Layer>& currLayer = ld.layerInstance;
+        if (ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0)
+        {
+            LayerData* nextData = &layers[ld.consumers[0].lid];
+            LayerPin lpNext(ld.consumers[0].lid, 0);
+            while (nextData)
+            {
+                if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && pinsToKeep.count(lpNext) != 0)
+                {
+                    CV_LOG_DEBUG(NULL, "DNN/IE: skip fusing with 'output' node: " << nextData->name << "@" << nextData->type);
+                    break;
+                }
+
+                /* we use `tryFuse` member of convolution layer to fuse eltwise later
+                 * it's not intended to be fused here; hence, we stop when we encounter eltwise
+                 */
+                Ptr<Layer> nextLayer = nextData->layerInstance;
+                if (currLayer->tryFuse(nextLayer))
+                {
+                    printf_(("\tfused with %s\n", nextLayer->name.c_str()));
+                    nextData->skip = true;
+                    ld.outputBlobs = layers[lpNext.lid].outputBlobs;
+                    ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
+                    if (nextData->consumers.size() == 1)
+                    {
+                        int nextLayerId = nextData->consumers[0].lid;
+                        nextData = &layers[nextLayerId];
+                        lpNext = LayerPin(nextLayerId, 0);
+                    }
+                    else
+                    {
+                        nextData = 0;
+                        break;
+                    }
+                }
+                else
+                    break;
+            }
+        }
+    }
+}
+
+
+
+void switchToOpenVINOBackend(Net& net)
+{
+    CV_TRACE_FUNCTION();
+    CV_LOG_INFO(NULL, "DNN: switching to OpenVINO backend...");
+    Ptr<Net::Impl>& impl_ptr_ref = accessor::DnnNetAccessor::getImplPtrRef(net);
+    Ptr<NetImplOpenVINO> openvino_impl_ptr = makePtr<NetImplOpenVINO>(impl_ptr_ref);
+    impl_ptr_ref = openvino_impl_ptr;
+}
+
+
 
 /*static*/
-Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
+Net NetImplOpenVINO::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
 {
     CV_TRACE_FUNCTION();
 
@@ -418,6 +706,10 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
     }
 
     Net cvNet;
+    Ptr<NetImplOpenVINO> openvino_impl_ptr = makePtr<NetImplOpenVINO>();
+    NetImplOpenVINO& openvino_impl = *openvino_impl_ptr;
+    accessor::DnnNetAccessor::getImplPtrRef(cvNet) = openvino_impl_ptr;
+
     cvNet.setInputsNames(inputsNames);
 
     // set empty input to determine input shapes
@@ -432,7 +724,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
     {
         auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape {});
         Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
-        backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
+        backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(openvino_impl, ieNet));
         backendNode = backendNodeNGraph;
     }
 
@@ -450,7 +742,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
         LayerParams lp;
         int lid = cvNet.addLayer(it.first, "", lp);
 
-        LayerData& ld = cvNet.impl->layers[lid];
+        LayerData& ld = openvino_impl.layers[lid];
 
         {
             Ptr<Layer> cvLayer(new NgraphBackendLayer(ieNet));
@@ -498,7 +790,6 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
 
     cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
 
-    cvNet.impl->skipInfEngineInit = true;
     return cvNet;
 }
 #endif  // HAVE_INF_ENGINE
@@ -516,7 +807,7 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
     InferenceEngine::Core& ie = getCore("");
     InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
 
-    return Impl::createNetworkFromModelOptimizer(ieNet);
+    return NetImplOpenVINO::createNetworkFromModelOptimizer(ieNet);
 #endif  // HAVE_INF_ENGINE
 }
 
@@ -560,7 +851,7 @@ Net Net::readFromModelOptimizer(
         CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
     }
 
-    return Impl::createNetworkFromModelOptimizer(ieNet);
+    return NetImplOpenVINO::createNetworkFromModelOptimizer(ieNet);
 #endif  // HAVE_INF_ENGINE
 }
 
diff --git a/modules/dnn/src/net_quantization.cpp b/modules/dnn/src/net_quantization.cpp
index 8316687..0add2d2 100644
--- a/modules/dnn/src/net_quantization.cpp
+++ b/modules/dnn/src/net_quantization.cpp
@@ -33,7 +33,7 @@ void getQuantizationParams(const Mat& src, std::vector<float>& scales, std::vect
 }
 
 // FIXIT drop from inference API
-Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel)
+Net Net::Impl::quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel)
 {
     // Net can be quantized only once.
     if (netWasQuantized)
@@ -47,7 +47,8 @@ Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outpu
     int prefTarget = preferableTarget;
 
     // Disable fusions and use CPU backend to quantize net
-    setPreferableBackend(DNN_BACKEND_OPENCV);
+    // FIXIT: we should not modify original network!
+    setPreferableBackend(net, DNN_BACKEND_OPENCV);
     setPreferableTarget(DNN_TARGET_CPU);
     enableFusion(false);
 
@@ -163,7 +164,7 @@ Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outpu
     Net::Impl& dstNet = *(dstNet_.impl);
     dstNet.netWasQuantized = true;
     dstNet.setInputsNames(netInputLayer->outNames);
-    dstNet.setPreferableBackend(prefBackend);
+    dstNet.setPreferableBackend(dstNet_, prefBackend);
     dstNet.setPreferableTarget(prefTarget);
     dstNet.enableFusion(originalFusion);
 
@@ -253,7 +254,7 @@ Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outpu
         }
     }
     // Restore FP32 Net's backend, target and fusion
-    setPreferableBackend(prefBackend);
+    setPreferableBackend(net, prefBackend);
     setPreferableTarget(prefTarget);
     enableFusion(originalFusion);
     return dstNet_;
diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp
index 856441e..7f9f4bf 100644
--- a/modules/dnn/src/op_inf_engine.hpp
+++ b/modules/dnn/src/op_inf_engine.hpp
@@ -73,6 +73,8 @@ void infEngineBlobsToMats(const std::vector<InferenceEngine::Blob::Ptr>& blobs,
 
 CV__DNN_INLINE_NS_BEGIN
 
+void switchToOpenVINOBackend(Net& net);
+
 namespace openvino {
 
 // TODO: use std::string as parameter