From: Anton Voronov Date: Tue, 17 Nov 2020 06:04:49 +0000 (+0300) Subject: [CPU] added MergePermuteAndReorder optimization + added test (#2519) X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6467a9f5b8ef8fdd36a591f1843d25d765a32ec6;p=platform%2Fupstream%2Fdldt.git [CPU] added MergePermuteAndReorder optimization + added test (#2519) --- diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp index 0ab3e0a..bcb2dee 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp @@ -124,3 +124,16 @@ bool MKLDNNExtensionUtils::initTensorsAreEqual(const InferenceEngine::TensorDesc return !(in1Block.getOffsetPadding() != in2Block.getOffsetPadding() && in1Block.getOffsetPadding() != uninitNum && in2Block.getOffsetPadding() != uninitNum); } + +std::string MKLDNNExtensionUtils::getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) { + std::string inArgs, outArgs; + if (parentDesc.getPrecision() != childDesc.getPrecision()) { + inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name()); + outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name()); + } + if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) { + inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat()); + outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat()); + } + return inArgs + "_" + outArgs; +} diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h index b5f6365..a73b16f 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h @@ -22,6 +22,7 @@ public: static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType); static InferenceEngine::TensorDesc getUninitTensorDesc(const InferenceEngine::TensorDesc& desc); static bool initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2); + static std::string getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index af03b60..32c9cff 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -463,18 +463,6 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() { void MKLDNNGraph::InitEdges() { OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges"); - auto reorderArgs = [](const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) { - std::string inArgs, outArgs; - if (parentDesc.getPrecision() != childDesc.getPrecision()) { - inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name()); - outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name()); - } - if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) { - inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat()); - outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat()); - } - return inArgs + "_" + outArgs; - }; size_t numberOfEdges = graphEdges.size(); std::unordered_set uniqueLayerNames; @@ -487,8 +475,8 @@ void MKLDNNGraph::InitEdges() { #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE) auto &edge = graphEdges[i]; std::string basicLayerName = edge->getParent()->getName() + "_" + - reorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" + - edge->getChild()->getName(); + MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" + + edge->getChild()->getName(); std::string layerName = basicLayerName; int idx = 0; while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) { @@ -496,43 +484,7 @@ void MKLDNNGraph::InitEdges() { layerName = basicLayerName + "_" + std::to_string(idx); } uniqueLayerNames.insert(layerName); - CNNLayerPtr layer(new CNNLayer({layerName, - "Reorder", - edge->getInputDesc().getPrecision()})); - MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache)); - auto *reorderPtr = dynamic_cast(newReorder.get()); - if (reorderPtr) { - reorderPtr->setDescs(edge->getInputDesc(), edge->getOutputDesc()); - } - - auto oIndex = edge->getOutputNum(); - auto iIndex = edge->getInputNum(); - if (iIndex < 0 || oIndex < 0) - THROW_IE_EXCEPTION << "Cannot create reorder for nodes: " - << edge->getParent()->getName() << " and " - << edge->getChild()->getName() << "."; - - edge->drop(); - - MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0)); - MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex)); - - // Add edge for beforeNode - beforeNode->getChild()->parentEdges.push_back(beforeNode); - edge->getParent()->childEdges.push_back(beforeNode); - - // Add edge for afterNode - afterNode->getParent()->childEdges.push_back(afterNode); - edge->getChild()->parentEdges.push_back(afterNode); - - newReorder->getSupportedDescriptors(); - newReorder->initSupportedPrimitiveDescriptors(); - newReorder->selectOptimalPrimitiveDescriptor(); - - graphEdges.push_back(beforeNode); - graphEdges.push_back(afterNode); - - graphNodes.push_back(newReorder); + InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc()); graphEdges.erase(graphEdges.begin() + i); i--; numberOfEdges--; @@ -1131,6 +1083,57 @@ void MKLDNNGraph::RemoveDroppedEdges() { } } +void MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc, + bool isOptimized, InferenceEngine::Blob::Ptr scales) { + CNNLayerPtr layer(new CNNLayer({layerName, + "Reorder", + inDesc.getPrecision()})); + MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache)); + auto *reorderPtr = dynamic_cast(newReorder.get()); + if (reorderPtr) { + reorderPtr->setDescs(inDesc, outDesc); + reorderPtr->_scales = scales; + } + + auto oIndex = edge->getOutputNum(); + auto iIndex = edge->getInputNum(); + if (iIndex < 0 || oIndex < 0) + THROW_IE_EXCEPTION << "Cannot create reorder for nodes: " + << edge->getParent()->getName() << " and " + << edge->getChild()->getName() << "."; + + edge->drop(); + + MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0)); + MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex)); + + // Add edge for beforeNode + beforeNode->getChild()->parentEdges.push_back(beforeNode); + edge->getParent()->childEdges.push_back(beforeNode); + + // Add edge for afterNode + afterNode->getParent()->childEdges.push_back(afterNode); + edge->getChild()->parentEdges.push_back(afterNode); + + reorderPtr->setOptimized(isOptimized); + + newReorder->getSupportedDescriptors(); + newReorder->initSupportedPrimitiveDescriptors(); + newReorder->selectOptimalPrimitiveDescriptor(); + + graphEdges.push_back(beforeNode); + graphEdges.push_back(afterNode); + + // Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal. + // Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks. + if (!isOptimized) { + beforeNode->getDesc(); + afterNode->getDesc(); + } + + graphNodes.push_back(newReorder); +} + void MKLDNNGraph::dumpToDotFile(std::string file) const { std::ofstream dot; dot.open(file); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h index d4c8bff..b97cf9d 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h @@ -92,6 +92,28 @@ public: void DropNode(const MKLDNNNodePtr& node); void DropDWConvNode(const MKLDNNNodePtr& node); + /** + * @brief Insert Reorder node at the edge-specified location. + * The Reorder node must be inserted in case when there are inplace conflicts or the input and output tensor descriptors do not match. + * The Reorder node rearranges the elements in memory according to inDesc and outDesc, or reinterprets memory descriptor without + * rearrangement of elements if isOptimized is true. + * @param edge + * pointer to the edge in the graph where Reorder node will be inserted + * @param layerName + * Reorder layer name + * @param inDesc + * input tensor descriptor + * @param outDesc + * output tensor descriptor + * @param isOptimized + * optimization flag; if isOptimized is true then Reorder node does nothing + * @param scales + * pointer to the blob containing scales + * @return none. + */ + void InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc, const InferenceEngine::TensorDesc& outDesc, + bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr); + InferenceEngine::CNNNetwork dump() const; template diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index c705395..9ca7177 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -14,6 +14,7 @@ #include "nodes/mkldnn_bin_conv_node.h" #include "nodes/mkldnn_quantize_node.h" #include "nodes/mkldnn_mvn_node.h" +#include #include "nodes/mkldnn_resample_node.h" #include "nodes/mkldnn_interpolate_node.h" #include "nodes/mkldnn_input_node.h" @@ -151,6 +152,9 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap graph.RemoveDroppedNodes(); #endif + MergePermuteAndReorder(graph); + graph.RemoveDroppedNodes(); + graph.RemoveDroppedEdges(); } @@ -1812,8 +1816,9 @@ void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) { #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE) void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { std::set processed; - std::vector newNodes; - for (MKLDNNNodePtr& node : graph.GetNodes()) { + int graphNodesSize = graph.GetNodes().size(); + for (int i = 0; i < graphNodesSize; i++) { + MKLDNNNodePtr& node = graph.GetNodes()[i]; if (processed.find(node) == processed.end() && node->getType() == Reorder && node->getChildEdges().size() == 1 && node->getChildEdgeAt(0)->getChild()->getType() == Reorder ) { @@ -1855,54 +1860,10 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName(); - CNNLayerPtr layer(new CNNLayer({layerName, - "Reorder", - n->getInput().getPrecision()})); - MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, graph.getEngine(), graph.weightsCache)); - auto *reorderPtr = dynamic_cast(newReorder.get()); - if (reorderPtr) { - reorderPtr->setDescs(n->getInput(), nn->getOutput()); - reorderPtr->_scales = scales; - } - - // new !!! - auto oIndex = edge->getOutputNum(); - auto iIndex = edge->getInputNum(); - if (iIndex < 0 || oIndex < 0) - THROW_IE_EXCEPTION << "Cannot create reorder for nodes: " - << edge->getParent()->getName() << " and " - << edge->getChild()->getName() << "."; - edge->drop(); - - MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0)); - MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex)); - - // Add edge for beforeNode - beforeNode->getChild()->parentEdges.push_back(beforeNode); - edge->getParent()->childEdges.push_back(beforeNode); - - // Add edge for afterNode - afterNode->getParent()->childEdges.push_back(afterNode); - edge->getChild()->parentEdges.push_back(afterNode); - - newReorder->getSupportedDescriptors(); - newReorder->initSupportedPrimitiveDescriptors(); - newReorder->selectOptimalPrimitiveDescriptor(); - - graph.GetEdges().push_back(beforeNode); - graph.GetEdges().push_back(afterNode); - - // Just to check accordance - afterNode->getDesc(); - beforeNode->getDesc(); - - newNodes.push_back(newReorder); + graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false, scales); graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end()); } } - for (MKLDNNNodePtr& node : newNodes) { - graph.GetNodes().push_back(node); - } } void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) { @@ -2247,3 +2208,142 @@ void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) { } } } + +void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) { + auto& graphNodes = graph.GetNodes(); + + auto isSutableParentNode = [](MKLDNNNodePtr node) { + return node->getType() == Permute && node->getChildEdges().size() == 1; + }; + + auto isSutableChildNode = [](MKLDNNNodePtr node) { + return node->getType() == Reorder && node->getChildEdges().size() == 1; + }; + + // Method checkAscendingSummaryOrder() checks that after the sequential execution of Permute and Reorder nodes, + // the order of the elements in the memory will not change. In other words, that Permute+Reorder is identical permutation. + auto checkAscendingSummaryOrder = [](std::shared_ptr &parentNode, std::shared_ptr &childNode) -> bool { + auto* permuteNode = dynamic_cast(parentNode.get()); + auto* reorderNode = dynamic_cast(childNode.get()); + if (!permuteNode || !reorderNode) { + return false; + } + + auto& permuteOrder = permuteNode->getOrder(); + auto& layoutOrder = permuteNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder(); + auto& inOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getBlockingDesc().getOrder(); + auto& outOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder(); + + if (permuteOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) { + return false; + } + + // revLayoutOrder - reverse permutation for layoutOrder + auto revLayoutOrder = SizeVector(layoutOrder.size()); + for (int i = 0; i < revLayoutOrder.size(); i++) { + revLayoutOrder[layoutOrder[i]] = i; + } + + // newPermuteOrder - Permute layout-aware permutation + auto newPermuteOrder = SizeVector(permuteOrder.size()); + for (int i = 0; i < newPermuteOrder.size(); i++) { + newPermuteOrder[i] = layoutOrder[permuteOrder[revLayoutOrder[i]]]; + } + + // reorderOrder - Reorder layout-aware permutation + auto reorderOrder = SizeVector(outOrder.size()); + for (int i = 0; i < reorderOrder.size(); i++) { + for (int j = 0; j < reorderOrder.size(); j++) { + if (outOrder[i] == inOrder[j]) { + reorderOrder[i] = j; + continue; + } + } + } + + // summaryOrder - resulting Permute+Reorder permutation + auto summaryOrder = SizeVector(permuteOrder.size()); + for (int i = 0; i < summaryOrder.size(); i++) { + summaryOrder[i] = reorderOrder[newPermuteOrder[i]]; + } + + // check that Permute+Reorder is the identical permutation + for (int i = 0; i < summaryOrder.size(); i++) { + if (summaryOrder[i] != i) { + return false; + } + } + + return true; + }; + + // Permute and Reorder do opposite permutation to each other. + // Example: + // chain [physical layout: NCHW, logical layout: NCHW] -> Permute(order=0312) -> [physical layout: NWCH, logical layout: NCHW] -> + // Reorder(nchw->nhwc) -> [physical layout: NCHW, logical layout: NHWC] can be replaced with Reorder(nchw->nhwc; isOptimized=true) + // which will just reinterprets layout without physical change of the memory. + // Two cases are possible: + // 1) inPrec = outPrec + // In this case, we replace Permute+Reorder pattern with a new Reorder that does nothing. + // 2) inPrec != outPrec + // As in the first case, we also replace Permute+Reorder pattern with a new Reorder. + // Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec) + // to the output precision (outPrec) + auto mergePermuteAndReorder = [&](std::shared_ptr& parentNode, std::shared_ptr& childNode) { + auto parentParentNode = parentNode->getParentEdgeAt(0)->getParent(); + auto childChildNode = childNode->getChildEdgeAt(0)->getChild(); + + graph.DropNode(parentNode); + graph.DropNode(childNode); + + auto inDesc = parentParentNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc; + auto outDesc = childChildNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc; + + auto inPrec = inDesc.getPrecision(); + auto outPrec = outDesc.getPrecision(); + + auto reorderInDesc = TensorDesc(inDesc); + auto reorderOutDesc = TensorDesc(outDesc); + reorderOutDesc.setPrecision(inPrec); + + std::string reorderlayerName = parentParentNode->getName() + "_" + + MKLDNNExtensionUtils::getReorderArgs(reorderInDesc, reorderOutDesc) + "_" + "fake"; + + MKLDNNEdgePtr edge; + for (auto &childEdge : parentParentNode->getChildEdges()) { + if (childEdge.lock()->getChild() == childChildNode) { + edge = childEdge.lock(); + break; + } + } + + graph.InsertReorder(edge, reorderlayerName, reorderInDesc, reorderOutDesc, true); + + // case 2 + if (inPrec != outPrec) { + auto reorderNode = parentParentNode->getChildEdgeAt(0)->getChild(); + auto reorderInDesc2 = TensorDesc(reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc); + auto reorderOutDesc2 = TensorDesc(childChildNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc); + + std::string reorderLayerName2 = reorderNode->getName() + "_" + + MKLDNNExtensionUtils::getReorderArgs(reorderInDesc2, reorderOutDesc2) + "_" + childChildNode->getName(); + + graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, reorderInDesc2, reorderOutDesc2, false); + } + }; + + for (int i = 0; i < graphNodes.size(); i++) { + auto parentNode = graphNodes[i]; + if (!isSutableParentNode(parentNode)) { + continue; + } + auto childNode = parentNode->getChildEdgeAt(0)->getChild(); + if (!isSutableChildNode(childNode)) { + continue; + } + + if (checkAscendingSummaryOrder(parentNode, childNode)) { + mergePermuteAndReorder(parentNode, childNode); + } + } +} diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h index 54bdda6..481ca61 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h @@ -52,6 +52,7 @@ private: void FuseEltwiseAndSimple(MKLDNNGraph &graph); void FuseScaleShiftAndQuantize(MKLDNNGraph &graph); void FuseClampAndQuantize(MKLDNNGraph &graph); + void MergePermuteAndReorder(MKLDNNGraph &graph); bool IsOneOf(Type type, std::vector types); bool IsOneOf(EltwiseOpType alg, std::vector algs); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h index ea8d5c6..e35b312 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h @@ -55,6 +55,10 @@ public: return false; } + const InferenceEngine::SizeVector& getOrder() const { + return order; + } + private: InferenceEngine::SizeVector order; InferenceEngine::Precision prec; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp index a71f983..ab04b72 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp @@ -46,6 +46,10 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].constant = false; config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; + if (isOptimized) { + config.inConfs[0].inPlace = 0; + config.outConfs[0].inPlace = 0; + } if (input.getLayout() != InferenceEngine::Layout::ANY && output.getLayout() != InferenceEngine::Layout::ANY) { config.inConfs[0].desc = input; config.outConfs[0].desc = output; @@ -71,6 +75,7 @@ void MKLDNNReorderNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set."; + if (!isOptimized) createReorderPrimitive(srcMemPtr->GetDescriptor(), srcMemPtr->GetPrimitive().get_data_handle(), dstMemPtr->GetDescriptor(), dstMemPtr->GetPrimitive().get_data_handle()); } @@ -169,6 +174,9 @@ bool MKLDNNReorderNode::created() const { } void MKLDNNReorderNode::execute(mkldnn::stream strm) { + if (isOptimized) + return; + src_blocked->GetPrimitivePtr()->set_data_handle(getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle()); dst_blocked->GetPrimitivePtr()->set_data_handle(getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h index 29ad087..0d468bc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h @@ -29,6 +29,10 @@ public: this->output = output; } + void setOptimized(bool isOptimized) { + this->isOptimized = isOptimized; + } + void setDynamicBatchLim(int lim) override; bool canBeInPlace() const override { @@ -50,6 +54,8 @@ private: MKLDNNMemoryPtr dst_blocked; MKLDNNMemoryPtr src_blocked; + bool isOptimized = false; + void createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr); }; diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp new file mode 100644 index 0000000..cfbd70c --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "test_utils/cpu_test_utils.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +using namespace CPUTestUtils; + +namespace LayerTestsDefinitions { + +using FusePermuteAndReorderParams = std::tuple< + InferenceEngine::SizeVector, // Input shape + InferenceEngine::Precision // Input precision +>; + +class FusePermuteAndReorderTest : public testing::WithParamInterface, public CPUTestsBase, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + std::string pluginTypeNode; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp new file mode 100644 index 0000000..6f1fb7d --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp @@ -0,0 +1,82 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "subgraph_tests/include/fuse_permute_reorder.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace LayerTestsDefinitions { + +std::string FusePermuteAndReorderTest::getTestCaseName(testing::TestParamInfo obj) { + std::ostringstream result; + SizeVector inputShape; + Precision inPrec; + std::tie(inputShape, inPrec) = obj.param; + + result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_"; + result << "Precision=" << inPrec.name(); + + return result.str(); +} + +void FusePermuteAndReorderTest::SetUp() { + targetDevice = CommonTestUtils::DEVICE_CPU; + SizeVector inputShape; + Precision inPrec; + + std::tie(inputShape, inPrec) = this->GetParam(); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto paramOuts = ngraph::helpers::convert2OutputVector( + ngraph::helpers::castOps2Nodes(params)); + + auto order = inputShape.size() == 5 ? std::vector{0, 2, 3, 4, 1} : std::vector{0, 2, 3, 1}; + auto memFmt = inputShape.size() == 5 ? ndhwc : nhwc; + + auto constOrder = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order); + + auto permute = std::make_shared(paramOuts[0], constOrder); + + permute->get_rt_info() = setCPUInfo({memFmt}, {memFmt}, {}); + + ngraph::ResultVector results{std::make_shared(permute)}; + function = std::make_shared(results, params, "PermuteReorder"); +} + +TEST_P(FusePermuteAndReorderTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + + InferenceEngine::CNNNetwork execGraphInfo = executableNetwork.GetExecGraphInfo(); + auto function = execGraphInfo.getFunction(); + ASSERT_NE(nullptr, function); + bool permuteFound = false; + for (const auto &node : function->get_ops()) { + const auto & rtInfo = node->get_rt_info(); + auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string { + auto it = rtInfo.find(paramName); + IE_ASSERT(rtInfo.end() != it); + auto value = std::dynamic_pointer_cast>(it->second); + IE_ASSERT(nullptr != value); + return value->get(); + }; + if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == "Permute") { + permuteFound = true; + break; + } + } + ASSERT_TRUE(!permuteFound); +} + +const auto fusePermuteAndReorderParams = ::testing::Combine( + ::testing::Values(SizeVector{1, 2, 3, 4}, SizeVector{1, 2, 3, 4, 5}), + ::testing::Values(Precision::I8, Precision::U8) +); + +INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndReorderParams, FusePermuteAndReorderTest::getTestCaseName); + +} // namespace LayerTestsDefinitions