From: Anton Voronov <anton.voronov@intel.com>
Date: Tue, 17 Nov 2020 06:04:49 +0000 (+0300)
Subject: [CPU] added MergePermuteAndReorder optimization + added test (#2519)
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6467a9f5b8ef8fdd36a591f1843d25d765a32ec6;p=platform%2Fupstream%2Fdldt.git

[CPU] added MergePermuteAndReorder optimization + added test (#2519)
---

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
index 0ab3e0a..bcb2dee 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
@@ -124,3 +124,16 @@ bool MKLDNNExtensionUtils::initTensorsAreEqual(const InferenceEngine::TensorDesc
     return !(in1Block.getOffsetPadding() != in2Block.getOffsetPadding() &&
         in1Block.getOffsetPadding() != uninitNum && in2Block.getOffsetPadding() != uninitNum);
 }
+
+std::string MKLDNNExtensionUtils::getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
+    std::string inArgs, outArgs;
+    if (parentDesc.getPrecision() != childDesc.getPrecision()) {
+        inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
+        outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
+    }
+    if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) {
+        inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat());
+        outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat());
+    }
+    return inArgs + "_" + outArgs;
+}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
index b5f6365..a73b16f 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
@@ -22,6 +22,7 @@ public:
     static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType);
     static InferenceEngine::TensorDesc getUninitTensorDesc(const InferenceEngine::TensorDesc& desc);
     static bool initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2);
+    static std::string getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc);
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index af03b60..32c9cff 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -463,18 +463,6 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
 void MKLDNNGraph::InitEdges() {
     OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
 
-    auto reorderArgs = [](const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
-        std::string inArgs, outArgs;
-        if (parentDesc.getPrecision() != childDesc.getPrecision()) {
-            inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
-            outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
-        }
-        if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) {
-            inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat());
-            outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat());
-        }
-        return inArgs + "_" + outArgs;
-    };
     size_t numberOfEdges = graphEdges.size();
 
     std::unordered_set<std::string> uniqueLayerNames;
@@ -487,8 +475,8 @@ void MKLDNNGraph::InitEdges() {
 #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
             auto &edge = graphEdges[i];
             std::string basicLayerName = edge->getParent()->getName() + "_" +
-                                         reorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
-                                         edge->getChild()->getName();
+                    MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
+                    edge->getChild()->getName();
             std::string layerName = basicLayerName;
             int idx = 0;
             while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
@@ -496,43 +484,7 @@ void MKLDNNGraph::InitEdges() {
                 layerName = basicLayerName + "_" + std::to_string(idx);
             }
             uniqueLayerNames.insert(layerName);
-            CNNLayerPtr layer(new CNNLayer({layerName,
-                                            "Reorder",
-                                            edge->getInputDesc().getPrecision()}));
-            MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache));
-            auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
-            if (reorderPtr) {
-                reorderPtr->setDescs(edge->getInputDesc(), edge->getOutputDesc());
-            }
-
-            auto oIndex = edge->getOutputNum();
-            auto iIndex = edge->getInputNum();
-            if (iIndex < 0 || oIndex < 0)
-                THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
-                                   << edge->getParent()->getName() << " and "
-                                   << edge->getChild()->getName() << ".";
-
-            edge->drop();
-
-            MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
-            MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
-
-            // Add edge for beforeNode
-            beforeNode->getChild()->parentEdges.push_back(beforeNode);
-            edge->getParent()->childEdges.push_back(beforeNode);
-
-            // Add edge for afterNode
-            afterNode->getParent()->childEdges.push_back(afterNode);
-            edge->getChild()->parentEdges.push_back(afterNode);
-
-            newReorder->getSupportedDescriptors();
-            newReorder->initSupportedPrimitiveDescriptors();
-            newReorder->selectOptimalPrimitiveDescriptor();
-
-            graphEdges.push_back(beforeNode);
-            graphEdges.push_back(afterNode);
-
-            graphNodes.push_back(newReorder);
+            InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc());
             graphEdges.erase(graphEdges.begin() + i);
             i--;
             numberOfEdges--;
@@ -1131,6 +1083,57 @@ void MKLDNNGraph::RemoveDroppedEdges() {
     }
 }
 
+void MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc,
+                                bool isOptimized, InferenceEngine::Blob::Ptr scales) {
+    CNNLayerPtr layer(new CNNLayer({layerName,
+                                    "Reorder",
+                                    inDesc.getPrecision()}));
+    MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache));
+    auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
+    if (reorderPtr) {
+        reorderPtr->setDescs(inDesc, outDesc);
+        reorderPtr->_scales = scales;
+    }
+
+    auto oIndex = edge->getOutputNum();
+    auto iIndex = edge->getInputNum();
+    if (iIndex < 0 || oIndex < 0)
+        THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
+                           << edge->getParent()->getName() << " and "
+                           << edge->getChild()->getName() << ".";
+
+    edge->drop();
+
+    MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
+    MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
+
+    // Add edge for beforeNode
+    beforeNode->getChild()->parentEdges.push_back(beforeNode);
+    edge->getParent()->childEdges.push_back(beforeNode);
+
+    // Add edge for afterNode
+    afterNode->getParent()->childEdges.push_back(afterNode);
+    edge->getChild()->parentEdges.push_back(afterNode);
+
+    reorderPtr->setOptimized(isOptimized);
+
+    newReorder->getSupportedDescriptors();
+    newReorder->initSupportedPrimitiveDescriptors();
+    newReorder->selectOptimalPrimitiveDescriptor();
+
+    graphEdges.push_back(beforeNode);
+    graphEdges.push_back(afterNode);
+
+    // Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal.
+    // Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks.
+    if (!isOptimized) {
+        beforeNode->getDesc();
+        afterNode->getDesc();
+    }
+
+    graphNodes.push_back(newReorder);
+}
+
 void MKLDNNGraph::dumpToDotFile(std::string file) const {
     std::ofstream dot;
     dot.open(file);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
index d4c8bff..b97cf9d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -92,6 +92,28 @@ public:
     void DropNode(const MKLDNNNodePtr& node);
     void DropDWConvNode(const MKLDNNNodePtr& node);
 
+    /**
+     * @brief Insert Reorder node at the edge-specified location.
+     * The Reorder node must be inserted in case when there are inplace conflicts or the input and output tensor descriptors do not match.
+     * The Reorder node rearranges the elements in memory according to inDesc and outDesc, or reinterprets memory descriptor without
+     * rearrangement of elements if isOptimized is true.
+     * @param edge
+     * pointer to the edge in the graph where Reorder node will be inserted
+     * @param layerName
+     * Reorder layer name
+     * @param inDesc
+     * input tensor descriptor
+     * @param outDesc
+     * output tensor descriptor
+     * @param isOptimized
+     * optimization flag; if isOptimized is true then Reorder node does nothing
+     * @param scales
+     * pointer to the blob containing scales
+     * @return none.
+     */
+    void InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc, const InferenceEngine::TensorDesc& outDesc,
+                       bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr);
+
     InferenceEngine::CNNNetwork dump() const;
 
     template<typename NET>
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
index c705395..9ca7177 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -14,6 +14,7 @@
 #include "nodes/mkldnn_bin_conv_node.h"
 #include "nodes/mkldnn_quantize_node.h"
 #include "nodes/mkldnn_mvn_node.h"
+#include <nodes/mkldnn_permute_node.h>
 #include "nodes/mkldnn_resample_node.h"
 #include "nodes/mkldnn_interpolate_node.h"
 #include "nodes/mkldnn_input_node.h"
@@ -151,6 +152,9 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
     graph.RemoveDroppedNodes();
 #endif
 
+    MergePermuteAndReorder(graph);
+    graph.RemoveDroppedNodes();
+
     graph.RemoveDroppedEdges();
 }
 
@@ -1812,8 +1816,9 @@ void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) {
 #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
 void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
     std::set<MKLDNNNodePtr> processed;
-    std::vector<MKLDNNNodePtr> newNodes;
-    for (MKLDNNNodePtr& node : graph.GetNodes()) {
+    int graphNodesSize = graph.GetNodes().size();
+    for (int i = 0; i < graphNodesSize; i++) {
+        MKLDNNNodePtr& node = graph.GetNodes()[i];
         if (processed.find(node) == processed.end() && node->getType() == Reorder
             && node->getChildEdges().size() == 1
             && node->getChildEdgeAt(0)->getChild()->getType() == Reorder ) {
@@ -1855,54 +1860,10 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
 
 
             std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName();
-            CNNLayerPtr layer(new CNNLayer({layerName,
-                                            "Reorder",
-                                            n->getInput().getPrecision()}));
-            MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, graph.getEngine(), graph.weightsCache));
-            auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
-            if (reorderPtr) {
-                reorderPtr->setDescs(n->getInput(), nn->getOutput());
-                reorderPtr->_scales = scales;
-            }
-
-            // new !!!
-            auto oIndex = edge->getOutputNum();
-            auto iIndex = edge->getInputNum();
-            if (iIndex < 0 || oIndex < 0)
-                THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
-                                   << edge->getParent()->getName() << " and "
-                                   << edge->getChild()->getName() << ".";
-            edge->drop();
-
-            MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
-            MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
-
-            // Add edge for beforeNode
-            beforeNode->getChild()->parentEdges.push_back(beforeNode);
-            edge->getParent()->childEdges.push_back(beforeNode);
-
-            // Add edge for afterNode
-            afterNode->getParent()->childEdges.push_back(afterNode);
-            edge->getChild()->parentEdges.push_back(afterNode);
-
-            newReorder->getSupportedDescriptors();
-            newReorder->initSupportedPrimitiveDescriptors();
-            newReorder->selectOptimalPrimitiveDescriptor();
-
-            graph.GetEdges().push_back(beforeNode);
-            graph.GetEdges().push_back(afterNode);
-
-            // Just to check accordance
-            afterNode->getDesc();
-            beforeNode->getDesc();
-
-            newNodes.push_back(newReorder);
+            graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false, scales);
             graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end());
         }
     }
-    for (MKLDNNNodePtr& node : newNodes) {
-        graph.GetNodes().push_back(node);
-    }
 }
 
 void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) {
@@ -2247,3 +2208,142 @@ void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) {
         }
     }
 }
+
+void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
+    auto& graphNodes = graph.GetNodes();
+
+    auto isSutableParentNode = [](MKLDNNNodePtr node) {
+        return node->getType() == Permute && node->getChildEdges().size() == 1;
+    };
+
+    auto isSutableChildNode = [](MKLDNNNodePtr node) {
+        return node->getType() == Reorder && node->getChildEdges().size() == 1;
+    };
+
+    // Method checkAscendingSummaryOrder() checks that after the sequential execution of Permute and Reorder nodes,
+    // the order of the elements in the memory will not change. In other words, that Permute+Reorder is identical permutation.
+    auto checkAscendingSummaryOrder = [](std::shared_ptr<MKLDNNNode> &parentNode, std::shared_ptr<MKLDNNNode> &childNode) -> bool {
+        auto* permuteNode = dynamic_cast<MKLDNNPermuteNode*>(parentNode.get());
+        auto* reorderNode = dynamic_cast<MKLDNNReorderNode*>(childNode.get());
+        if (!permuteNode || !reorderNode) {
+            return false;
+        }
+
+        auto& permuteOrder = permuteNode->getOrder();
+        auto& layoutOrder = permuteNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
+        auto& inOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getBlockingDesc().getOrder();
+        auto& outOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
+
+        if (permuteOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) {
+            return false;
+        }
+
+        // revLayoutOrder - reverse permutation for layoutOrder
+        auto revLayoutOrder = SizeVector(layoutOrder.size());
+        for (int i = 0; i < revLayoutOrder.size(); i++) {
+            revLayoutOrder[layoutOrder[i]] = i;
+        }
+
+        // newPermuteOrder - Permute layout-aware permutation
+        auto newPermuteOrder = SizeVector(permuteOrder.size());
+        for (int i = 0; i < newPermuteOrder.size(); i++) {
+            newPermuteOrder[i] = layoutOrder[permuteOrder[revLayoutOrder[i]]];
+        }
+
+        // reorderOrder - Reorder layout-aware permutation
+        auto reorderOrder = SizeVector(outOrder.size());
+        for (int i = 0; i < reorderOrder.size(); i++) {
+            for (int j = 0; j < reorderOrder.size(); j++) {
+                if (outOrder[i] == inOrder[j]) {
+                    reorderOrder[i] = j;
+                    continue;
+                }
+            }
+        }
+
+        // summaryOrder - resulting Permute+Reorder permutation
+        auto summaryOrder = SizeVector(permuteOrder.size());
+        for (int i = 0; i < summaryOrder.size(); i++) {
+            summaryOrder[i] = reorderOrder[newPermuteOrder[i]];
+        }
+
+        // check that Permute+Reorder is the identical permutation
+        for (int i = 0; i < summaryOrder.size(); i++) {
+            if (summaryOrder[i] != i) {
+                return false;
+            }
+        }
+
+        return true;
+    };
+
+    // Permute and Reorder do opposite permutation to each other.
+    // Example:
+    //      chain [physical layout: NCHW, logical layout: NCHW] -> Permute(order=0312) -> [physical layout: NWCH, logical layout: NCHW] ->
+    //      Reorder(nchw->nhwc) -> [physical layout: NCHW, logical layout: NHWC] can be replaced with Reorder(nchw->nhwc; isOptimized=true)
+    //      which will just reinterprets layout without physical change of the memory.
+    // Two cases are possible:
+    //      1) inPrec = outPrec
+    //          In this case, we replace Permute+Reorder pattern with a new Reorder that does nothing.
+    //      2) inPrec != outPrec
+    //          As in the first case, we also replace Permute+Reorder pattern with a new Reorder.
+    //          Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec)
+    //          to the output precision (outPrec)
+    auto mergePermuteAndReorder = [&](std::shared_ptr<MKLDNNNode>& parentNode, std::shared_ptr<MKLDNNNode>& childNode) {
+        auto parentParentNode = parentNode->getParentEdgeAt(0)->getParent();
+        auto childChildNode = childNode->getChildEdgeAt(0)->getChild();
+
+        graph.DropNode(parentNode);
+        graph.DropNode(childNode);
+
+        auto inDesc = parentParentNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc;
+        auto outDesc = childChildNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
+
+        auto inPrec = inDesc.getPrecision();
+        auto outPrec = outDesc.getPrecision();
+
+        auto reorderInDesc = TensorDesc(inDesc);
+        auto reorderOutDesc = TensorDesc(outDesc);
+        reorderOutDesc.setPrecision(inPrec);
+
+        std::string reorderlayerName = parentParentNode->getName() + "_" +
+                MKLDNNExtensionUtils::getReorderArgs(reorderInDesc, reorderOutDesc) + "_" + "fake";
+
+        MKLDNNEdgePtr edge;
+        for (auto &childEdge : parentParentNode->getChildEdges()) {
+            if (childEdge.lock()->getChild() == childChildNode) {
+                edge = childEdge.lock();
+                break;
+            }
+        }
+
+        graph.InsertReorder(edge, reorderlayerName, reorderInDesc, reorderOutDesc, true);
+
+        // case 2
+        if (inPrec != outPrec) {
+            auto reorderNode = parentParentNode->getChildEdgeAt(0)->getChild();
+            auto reorderInDesc2 = TensorDesc(reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc);
+            auto reorderOutDesc2 = TensorDesc(childChildNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc);
+
+            std::string reorderLayerName2 = reorderNode->getName() + "_" +
+                                    MKLDNNExtensionUtils::getReorderArgs(reorderInDesc2, reorderOutDesc2) + "_" + childChildNode->getName();
+
+            graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, reorderInDesc2, reorderOutDesc2, false);
+        }
+    };
+
+    for (int i = 0; i < graphNodes.size(); i++) {
+        auto parentNode = graphNodes[i];
+        if (!isSutableParentNode(parentNode)) {
+            continue;
+        }
+        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
+        if (!isSutableChildNode(childNode)) {
+            continue;
+        }
+
+        if (checkAscendingSummaryOrder(parentNode, childNode)) {
+            mergePermuteAndReorder(parentNode, childNode);
+        }
+    }
+}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
index 54bdda6..481ca61 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
@@ -52,6 +52,7 @@ private:
     void FuseEltwiseAndSimple(MKLDNNGraph &graph);
     void FuseScaleShiftAndQuantize(MKLDNNGraph &graph);
     void FuseClampAndQuantize(MKLDNNGraph &graph);
+    void MergePermuteAndReorder(MKLDNNGraph &graph);
 
     bool IsOneOf(Type type, std::vector<Type> types);
     bool IsOneOf(EltwiseOpType alg, std::vector<EltwiseOpType> algs);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
index ea8d5c6..e35b312 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
@@ -55,6 +55,10 @@ public:
         return false;
     }
 
+    const InferenceEngine::SizeVector& getOrder() const {
+        return order;
+    }
+
 private:
     InferenceEngine::SizeVector order;
     InferenceEngine::Precision prec;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
index a71f983..ab04b72 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
@@ -46,6 +46,10 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() {
     config.inConfs[0].constant = false;
     config.outConfs[0].inPlace = -1;
     config.outConfs[0].constant = false;
+    if (isOptimized) {
+        config.inConfs[0].inPlace = 0;
+        config.outConfs[0].inPlace = 0;
+    }
     if (input.getLayout() != InferenceEngine::Layout::ANY && output.getLayout() != InferenceEngine::Layout::ANY) {
         config.inConfs[0].desc = input;
         config.outConfs[0].desc = output;
@@ -71,6 +75,7 @@ void MKLDNNReorderNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
 
+    if (!isOptimized)
     createReorderPrimitive(srcMemPtr->GetDescriptor(), srcMemPtr->GetPrimitive().get_data_handle(),
             dstMemPtr->GetDescriptor(), dstMemPtr->GetPrimitive().get_data_handle());
 }
@@ -169,6 +174,9 @@ bool MKLDNNReorderNode::created() const {
 }
 
 void MKLDNNReorderNode::execute(mkldnn::stream strm) {
+    if (isOptimized)
+        return;
+
     src_blocked->GetPrimitivePtr()->set_data_handle(getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle());
     dst_blocked->GetPrimitivePtr()->set_data_handle(getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle());
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
index 29ad087..0d468bc 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
@@ -29,6 +29,10 @@ public:
         this->output = output;
     }
 
+    void setOptimized(bool isOptimized) {
+        this->isOptimized = isOptimized;
+    }
+
     void setDynamicBatchLim(int lim) override;
 
     bool canBeInPlace() const override {
@@ -50,6 +54,8 @@ private:
     MKLDNNMemoryPtr dst_blocked;
     MKLDNNMemoryPtr src_blocked;
 
+    bool isOptimized = false;
+
     void createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr);
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp
new file mode 100644
index 0000000..cfbd70c
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include "test_utils/cpu_test_utils.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+using namespace CPUTestUtils;
+
+namespace LayerTestsDefinitions {
+
+using FusePermuteAndReorderParams = std::tuple<
+        InferenceEngine::SizeVector, // Input shape
+        InferenceEngine::Precision   // Input precision
+>;
+
+class FusePermuteAndReorderTest : public testing::WithParamInterface<FusePermuteAndReorderParams>, public CPUTestsBase,
+        virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<FusePermuteAndReorderParams> obj);
+
+protected:
+    void SetUp() override;
+    std::string pluginTypeNode;
+};
+
+} // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp
new file mode 100644
index 0000000..6f1fb7d
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp
@@ -0,0 +1,82 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "subgraph_tests/include/fuse_permute_reorder.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace LayerTestsDefinitions {
+
+std::string FusePermuteAndReorderTest::getTestCaseName(testing::TestParamInfo<FusePermuteAndReorderParams> obj) {
+    std::ostringstream result;
+    SizeVector inputShape;
+    Precision inPrec;
+    std::tie(inputShape, inPrec) = obj.param;
+
+    result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+    result << "Precision=" << inPrec.name();
+
+    return result.str();
+}
+
+void FusePermuteAndReorderTest::SetUp() {
+    targetDevice = CommonTestUtils::DEVICE_CPU;
+    SizeVector inputShape;
+    Precision inPrec;
+
+    std::tie(inputShape, inPrec) = this->GetParam();
+
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(
+            ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+    auto order = inputShape.size() == 5 ? std::vector<int64_t>{0, 2, 3, 4, 1} : std::vector<int64_t>{0, 2, 3, 1};
+    auto memFmt = inputShape.size() == 5 ? ndhwc : nhwc;
+
+    auto constOrder = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order);
+
+    auto permute = std::make_shared<ngraph::opset5::Transpose>(paramOuts[0], constOrder);
+
+    permute->get_rt_info() = setCPUInfo({memFmt}, {memFmt}, {});
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset5::Result>(permute)};
+    function = std::make_shared<ngraph::Function>(results, params, "PermuteReorder");
+}
+
+TEST_P(FusePermuteAndReorderTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+
+    InferenceEngine::CNNNetwork execGraphInfo = executableNetwork.GetExecGraphInfo();
+    auto function = execGraphInfo.getFunction();
+    ASSERT_NE(nullptr, function);
+    bool permuteFound = false;
+    for (const auto &node : function->get_ops()) {
+        const auto & rtInfo = node->get_rt_info();
+        auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string {
+            auto it = rtInfo.find(paramName);
+            IE_ASSERT(rtInfo.end() != it);
+            auto value = std::dynamic_pointer_cast<ngraph::VariantImpl<std::string>>(it->second);
+            IE_ASSERT(nullptr != value);
+            return value->get();
+        };
+        if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == "Permute") {
+            permuteFound = true;
+            break;
+        }
+    }
+    ASSERT_TRUE(!permuteFound);
+}
+
+const auto fusePermuteAndReorderParams = ::testing::Combine(
+        ::testing::Values(SizeVector{1, 2, 3, 4}, SizeVector{1, 2, 3, 4, 5}),
+        ::testing::Values(Precision::I8, Precision::U8)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndReorderParams, FusePermuteAndReorderTest::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions