1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include <blob_factory.hpp>
6 #include "nodes/mkldnn_reshape_node.h"
7 #include "mkldnn_graph_optimizer.h"
8 #include <nodes/mkldnn_activation_node.h>
9 #include "nodes/mkldnn_pooling_node.h"
10 #include "nodes/mkldnn_eltwise_node.h"
11 #include "nodes/mkldnn_depthwise_node.h"
12 #include "nodes/mkldnn_concat_node.h"
13 #include "nodes/mkldnn_reorder_node.h"
19 #include <ie_layers_internal.hpp>
20 #include <nodes/mkldnn_bin_conv_node.h>
21 #include <nodes/mkldnn_quantize_node.h>
22 #include "cpu_isa_traits.hpp"
24 using namespace mkldnn;
25 using namespace MKLDNNPlugin;
26 using namespace InferenceEngine;
28 MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}
30 void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
31 MergeGroupConvolution(graph);
32 graph.RemoveDroppedNodes();
34 FuseConvolutionAndDepthwise(graph);
35 graph.RemoveDroppedNodes();
37 FuseConvolutionAndActivation(graph);
38 graph.RemoveDroppedNodes();
40 FuseConvolutionAndDepthwise(graph);
41 graph.RemoveDroppedNodes();
43 FuseConvolutionAndDWConvolution(graph);
44 graph.RemoveDroppedNodes();
46 FuseBinaryConvolutionAndQuantize(graph);
47 graph.RemoveDroppedNodes();
49 FuseBatchNormWithScale(graph);
50 graph.RemoveDroppedNodes();
52 FuseFullyConnectedAndActivation(graph);
53 graph.RemoveDroppedNodes();
55 RemoveIdentityOperator(graph);
56 graph.RemoveDroppedNodes();
58 FuseConvolutionSumAndConvolutionSumActivation(graph);
59 graph.RemoveDroppedNodes();
62 graph.RemoveDroppedEdges();
65 void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) {
66 RemoveIOScaleShifts(graph);
67 graph.RemoveDroppedNodes();
69 DropDoubleReorders(graph);
70 graph.RemoveDroppedNodes();
73 graph.RemoveDroppedEdges();
76 void MKLDNNGraphOptimizer::MergeGroupConvolution(MKLDNNGraph &graph) {
77 for (auto node : graph.GetNodes()) {
78 // Split with at least 2 Convolutions
79 if (!IsOneOf(node->getType(), {Split}) || node->getChildEdges().size() < 2 ||
80 !IsOneOf(node->getChildEdgeAt(0)->getChild()->getType(), {Convolution, Convolution_Activation})) {
83 bool canBeMerged = true;
87 auto convInEdge = split->getChildEdgeAt(0);
88 auto conv = convInEdge->getChild();
89 auto convOutEdge = conv->getChildEdgeAt(0);
91 auto convType = conv->getType();
92 auto convInDims = convInEdge->getDims();
93 auto convOutDims = convOutEdge->getDims();
95 // Convolutions of same the type with Concat as a child
96 for (size_t i = 1; i < split->getChildEdges().size(); i++) {
97 auto childEdge = split->getChildEdgeAt(i);
98 auto child = childEdge->getChild();
99 Type type = child->getType();
101 if (convType != type || child->getChildEdgeAt(0)->getChild()->getType() != Concatenation ||
102 convOutDims != child->getChildEdgeAt(0)->getDims() || child->getChildEdges().size() != 1 ||
103 convInDims != childEdge->getDims()) {
109 if (!canBeMerged) continue;
111 // TODO: Rewrite topology optimizer at all. it should be clean and understandable
112 auto concat = conv->getChildEdgeAt(0)->getChild();
113 // Merge and remove Convolution
114 for (size_t i = 1; i < split->getChildEdges().size(); i++) {
115 auto peerInEdge = split->getChildEdgeAt(i);
116 auto peer = peerInEdge->getChild();
117 conv->mergeWith(peer);
118 convInDims[1] += (peerInEdge->getDims())[1];
119 convOutDims[1] += (peer->getChildEdgeAt(0)->getDims())[1];
122 conv->inDims[0] = convInDims;
123 conv->outDims[0] = convOutDims;
125 conv->fuseWith(split);
126 conv->fuseWith(concat);
128 graph.DropNode(split);
129 graph.DropNode(concat);
133 void MKLDNNGraphOptimizer::FuseBatchNormWithScale(MKLDNNGraph &graph) {
134 auto &graphNodes = graph.GetNodes();
136 for (int i = 0; i < graphNodes.size(); i++) {
137 const auto& bn = graphNodes[i];
138 if (bn->getType() == BatchNormalization) {
139 const auto& outputNodes = graph.GetOutputNodes();
140 const std::string node_name = bn->getName();
141 // Check that the node is not output node
142 if (std::find_if(outputNodes.begin(), outputNodes.end(),
143 [&node_name](const MKLDNNNodePtr& x) {
144 return x->getName() == node_name;}) == outputNodes.end()) {
145 if (bn->getChildEdges().size() == 1) {
146 auto child = bn->getChildEdgeAt(0)->getChild();
147 if (child->type == Depthwise && child->getCnnLayer()->type == "ScaleShift") {
149 graph.DropNode(child);
157 void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
158 auto isOneOf = [&](mkldnn::algorithm alg, std::vector<mkldnn::algorithm> algs) {
159 for (auto a : algs) {
167 auto& graphNodes = graph.GetNodes();
169 auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) {
170 if (!activation->getCnnLayer())
173 auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(activation.get());
175 return activationNode &&
176 (activationNode->getAlgorithm() == eltwise_relu ||
177 (conv->getCnnLayer()->precision == Precision::FP32 &&
178 isOneOf(activationNode->getAlgorithm(), {eltwise_elu, eltwise_logistic, eltwise_bounded_relu, eltwise_clamp})));
181 for (int i = 0; i < graphNodes.size(); i++) {
182 if (graphNodes[i]->getType() == Convolution || graphNodes[i]->getType() == BinaryConvolution) {
183 auto conv = graphNodes[i];
185 auto fuse = [&] (MKLDNNNodePtr relu) {
186 if (graphNodes[i]->getType() != BinaryConvolution)
187 conv->setType(Convolution_Activation);
188 conv->fuseWith(relu);
191 if (conv->getChildEdges().size() == 1) {
192 auto ch1 = conv->getChildEdgeAt(0)->getChild();
194 if (isFusingSupported(conv, ch1)) {
197 if (ch1->getChildEdges().size() == 1) {
198 auto ch2 = ch1->getChildEdgeAt(0)->getChild();
200 if (isFusingSupported(conv, ch2)) {
207 if (ch1->type == Pooling) {
210 dynamic_cast<PoolingLayer *>(pool->getCnnLayer().get())->_type ==
211 PoolingLayer::PoolType::MAX;
213 if (is_max_pool && pool->getChildEdges().size() == 1) {
214 auto ch2 = pool->getChildEdgeAt(0)->getChild();
215 if (isFusingSupported(conv, ch2)) {
227 void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) {
228 auto& graphNodes = graph.GetNodes();
230 auto isSutableParentNode = [](MKLDNNNodePtr node) {
231 bool isSutableConv = (node->getType() == Convolution || node->getType() == Convolution_Activation) &&
232 node->getCnnLayer()->precision == Precision::FP32;
233 bool isSutableBinConv = node->getType() == BinaryConvolution;
234 return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1;
237 auto isSutableChildNode = [](MKLDNNNodePtr node) {
238 if (node->getType() != Depthwise)
241 if (!node->getCnnLayer())
244 auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
245 return ((depthwiseNode->getAlgorithm() == mkldnn::algorithm::depthwise_scale_shift && depthwiseNode->isWithBiases()) ||
246 (depthwiseNode->getAlgorithm() == mkldnn::algorithm::depthwise_prelu));
249 for (int i = 0; i < graphNodes.size(); i++) {
250 auto conv = graphNodes[i];
251 if (!isSutableParentNode(conv)) continue;
253 auto depthwise0 = conv->getChildEdgeAt(0)->getChild();
254 if (!isSutableChildNode(depthwise0)) continue;
256 conv->fuseWith(depthwise0);
257 if (conv->type != BinaryConvolution)
258 conv->setType(Convolution_Depthwise);
260 if (depthwise0->getChildEdges().size() == 1) {
261 auto depthwise1 = depthwise0->getChildEdgeAt(0)->getChild();
263 if (isSutableChildNode(depthwise1)) {
264 conv->fuseWith(depthwise1);
265 graph.DropNode(depthwise1);
269 graph.DropNode(depthwise0);
273 void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
274 auto& graphNodes = graph.GetNodes();
276 auto isConvolutionNode = [](MKLDNNNodePtr node) {
277 return node->getType() == Convolution || node->getType() == Convolution_Activation;
280 auto isBinaryConvolutionNode = [](MKLDNNNodePtr node) {
281 return node->getType() == BinaryConvolution;
284 auto is1x1Convolution = [](ConvolutionLayer* layer) {
285 return layer->_kernel[X_AXIS] == 1 && layer->_kernel[Y_AXIS] == 1;
288 auto isSutableParentConvolution = [&](MKLDNNNodePtr node) {
289 if (isBinaryConvolutionNode(node)) {
290 auto *layer = dynamic_cast<BinaryConvolutionLayer *>(node->getCnnLayer().get());
292 bool isSupportedParams = layer->_group == 1;
293 if (!isSupportedParams) return false;
295 auto *layer = dynamic_cast<ConvolutionLayer *>(node->getCnnLayer().get());
297 bool isSupportedParams = layer->_group == 1 &&
298 ((is1x1Convolution(layer) && layer->_stride[X_AXIS] == 1 &&
299 layer->_stride[Y_AXIS] == 1) || !is1x1Convolution(layer)) &&
300 (layer->precision == Precision::FP32 || layer->precision == Precision::I8);
301 if (!isSupportedParams) return false;
304 return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild());
307 auto isSutableChildConvolution = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
308 auto* childLayer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
310 if (!isBinaryConvolutionNode(parentNode)) {
311 auto* parentLayer = dynamic_cast<ConvolutionLayer*>(parentNode->getCnnLayer().get());
312 if (parentLayer->precision != childLayer->precision)
316 auto allPads = getPaddings(*childLayer);
317 bool isSupportedParams = childLayer->_out_depth == childLayer->_group &&
318 childLayer->_out_depth != 1 &&
319 // Depthwise convolution output should be multiple of 8
320 childLayer->_kernel[X_AXIS] == 3 && childLayer->_kernel[Y_AXIS] == 3 &&
321 allPads.begin[X_AXIS] == 1 && allPads.begin[Y_AXIS] == 1 &&
322 childLayer->_dilation[X_AXIS] == 1 && childLayer->_dilation[Y_AXIS] == 1 &&
323 childLayer->_biases != nullptr && childLayer->_biases->size() != 0;
325 return isSupportedParams;
328 auto isFusingWorthwhile = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
329 if (isBinaryConvolutionNode(parentNode)) {
333 auto* layer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
335 auto inDims = childNode->inDims[0];
336 auto outDims = childNode->outDims[0];
337 int elemSize = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(layer->precision));
339 int L3_cache_size = mkldnn_get_cache_size(3, false);
340 int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize;
341 int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * elemSize;
343 bool isInt8 = layer->precision == Precision::I8 || layer->precision == Precision::U8;
344 bool isAVX512NotSupported = !mkldnn::impl::cpu::mayiuse(impl::cpu::cpu_isa_t::avx512_common);
346 return isInt8 ? isAVX512NotSupported : (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2);
349 for (int i = 0; i < graphNodes.size(); i++) {
350 if (!isConvolutionNode(graphNodes[i]) && !isBinaryConvolutionNode(graphNodes[i])) continue;
352 auto parentConvNode = graphNodes[i];
353 if (!isSutableParentConvolution(parentConvNode)) continue;
355 auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild();
356 if (!isSutableChildConvolution(parentConvNode, childConvNode)) continue;
358 if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue;
360 parentConvNode->fuseWith(childConvNode);
361 graph.DropNode(childConvNode);
365 void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) {
366 auto removeEdge = [](MKLDNNGraph &graph, MKLDNNEdgePtr& edge) {
367 auto& edges = graph.GetEdges();
368 for (auto it = edges.begin(); it != edges.end(); it++) {
376 auto& graphNodes = graph.GetNodes();
378 auto isSutableParentNode = [](MKLDNNNodePtr node) {
379 bool isSutableBinConv = node->getType() == BinaryConvolution;
380 return isSutableBinConv && node->getChildEdges().size() == 1;
383 auto isSutableChildNode = [](MKLDNNNodePtr node) {
384 if (!node->getCnnLayer())
387 auto* quantizeLayer = dynamic_cast<QuantizeLayer*>(node->getCnnLayer().get());
388 bool isSutableQuantize = node->getType() == Quantize && quantizeLayer->levels == 2;
390 return isSutableQuantize;
393 for (int i = 0; i < graphNodes.size(); i++) {
394 auto parent = graphNodes[i];
395 if (!isSutableParentNode(parent)) continue;
397 auto child = parent->getChildEdgeAt(0)->getChild();
398 if (!isSutableChildNode(child)) continue;
400 parent->fuseWith(child);
402 auto* binConvNode = dynamic_cast<MKLDNNBinaryConvolutionNode*>(parent.get());
404 auto parents = child->parentEdges;
405 for (size_t i = 0; i < parents.size(); i++) {
406 auto p_edge = parents[i].lock();
407 if (p_edge->getParent()->getType() == Input) {
408 InferenceEngine::SizeVector dims;
409 dims.push_back(binConvNode->getChildEdgeAt(0)->getDims()[1]);
411 auto InputLowBlob = dynamic_cast<TBlob<float>*>(p_edge->getParent()->getCnnLayer()->blobs["custom"].get());
413 auto inputLowData = InputLowBlob->buffer().as<float*>();
414 int inputLowAxis = p_edge->getDims().ndims() == 1 ? 0 : 1;
415 bool isInputLowBroadcasted = p_edge->getDims()[inputLowAxis] != dims[0];
417 for (int i = 0; i < dims[0]; i++) {
418 binConvNode->pushBinarizationThreshold(inputLowData[isInputLowBroadcasted ? 0 : i]);
425 for (size_t i = 0; i < parents.size(); i++) {
426 auto p_edge = parents[i].lock();
427 if (p_edge->getParent()->getType() == BinaryConvolution)
430 removeEdge(graph, p_edge);
433 graph.DropNode(child);
438 * Check if there is a data dependency between parent and child
439 * BFS starting from parent and comparing with child
441 * @param parent head of BFS
442 * @param child node we try to find
443 * @return True if child is one of data supplier
445 static bool is_data_dependency(const std::shared_ptr<MKLDNNNode> &parent,
446 const std::shared_ptr<MKLDNNNode> &child) {
447 std::set<MKLDNNNode*> visited;
448 std::list<MKLDNNNode*> nextLayers {parent.get()};
450 for (; !nextLayers.empty();) {
451 auto layer = *nextLayers.begin();
452 if (layer == child.get()) return true;
453 for (auto oe : layer->getChildEdges()) {
454 auto nn = oe.lock()->getChild();
455 if (visited.find(nn.get()) == visited.end()) {
456 nextLayers.push_back(nn.get());
457 visited.insert(nn.get());
460 nextLayers.pop_front();
470 * +========+ +========+ +========+ +========+
471 * | any | | conv 2 | | any | | conv 2 |
472 * +========+ +========+ +========+ +========+
474 * +=====================+ +=====================+
476 * +=====================+ +=====================+
478 * +===============+ ***
488 * +========+ +========+
490 * +========+ | conv2 |
503 void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNGraph &graph) {
504 std::vector<MKLDNNNodePtr> &graphNodes = graph.GetNodes();
506 auto isOneOf = [&](mkldnn::algorithm alg, std::vector<mkldnn::algorithm> algs) {
507 for (auto a : algs) {
515 auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) {
516 if (!activation->getCnnLayer())
519 auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(activation.get());
521 return activationNode &&
522 (activationNode->getAlgorithm() == eltwise_relu ||
523 (conv->getCnnLayer()->precision == Precision::FP32 &&
524 isOneOf(activationNode->getAlgorithm(), {eltwise_elu, eltwise_logistic, eltwise_bounded_relu, eltwise_clamp})));
527 for (auto &graphNode : graphNodes) {
528 if (graphNode->getType() != Eltwise)
531 if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isSum()) continue;
532 if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isUnitScales()) continue;
534 auto parent1 = graphNode->getParentEdgeAt(0)->getParent();
535 auto parent2 = graphNode->getParentEdgeAt(1)->getParent();
536 // TODO: Enlarge to several inputs
537 if (graphNode->getParentEdges().size() != 2 ||
538 (parent1->getType() != Convolution && parent1->getType() != BinaryConvolution &&
539 parent2->getType() != Convolution && parent2->getType() != BinaryConvolution))
542 auto mergedConv = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent1 : parent2;
543 auto peerNode = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent2 : parent1;
544 if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) &&
545 mergedConv->getChildEdges().size() != 1) {
546 mergedConv = parent2;
549 auto sum = graphNode;
552 bool fuse_allowed = mergedConv->getChildEdges().size() == 1;
553 for (size_t j = 0; fuse_allowed && j < mergedConv->getParentEdges().size(); j++)
554 if (mergedConv->getParentEdgeAt(j)->getParent() == peerNode)
555 fuse_allowed = false;
557 // Fused Conv+Sum prim will be used inplace. That's mean that input blob will
558 // be overwritten. Should verify that all other consumer already read it and
559 // we can spoil input data.
560 // TODO: rewrite once we add "Inplace" reporting mechanism
561 for (auto & edge : peerNode->getChildEdges()) {
564 fuse_allowed &= is_data_dependency(edge.lock()->getChild(), sum);
566 if (!fuse_allowed) continue;
568 if (graphNode->getChildEdges().size() == 1 &&
569 isFusingSupported(graphNode, graphNode->getChildEdgeAt(0)->getChild())) {
570 auto relu_shared = graphNode->getChildEdgeAt(0)->getChild();
571 lastNode = relu_shared;
572 if (mergedConv->getType() != BinaryConvolution)
573 mergedConv->setType(Convolution_Sum_Activation);
574 mergedConv->fuseWith(sum);
576 if (mergedConv->getType() != BinaryConvolution)
577 mergedConv->setType(Convolution_Sum);
580 mergedConv->fuseWith(lastNode);
582 if (mergedConv->fusedWith.size() > 0 &&
583 (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) {
584 // Merged with DW_conv. Shape may change
585 mergedConv->inDims.push_back(mergedConv->fusedWith[0]->outDims[0]);
587 mergedConv->inDims.push_back(mergedConv->outDims[0]);
591 for (childIdx = 0; childIdx < peerNode->getChildEdges().size(); childIdx++) {
592 if (peerNode->getChildEdgeAt(childIdx)->getChild() == sum) {
597 int peer_port = peerNode->getChildEdgeAt(childIdx)->getInputNum();
598 peerNode->getChildEdgeAt(childIdx)->drop();
600 MKLDNNEdgePtr edgePtr(new MKLDNNEdge(peerNode, mergedConv, peer_port, 1));
601 graph.GetEdges().push_back(edgePtr);
603 mergedConv->addEdge(edgePtr);
605 std::vector<MKLDNNEdgeWeakPtr> edges_to_reconnect = lastNode->getChildEdges();
606 for (auto &edge_w : edges_to_reconnect) {
607 auto edge = edge_w.lock();
608 auto child = edge->getChild();
609 int idxParent = edge->getInputNum();
610 int idxChild = edge->getOutputNum();
612 // reconnect after activation/sum. Port index must be 0
613 IE_ASSERT(idxParent == 0);
617 MKLDNNEdgePtr newEdge(new MKLDNNEdge(mergedConv, child, idxParent, idxChild));
618 graph.GetEdges().push_back(newEdge);
619 child->addEdge(newEdge);
622 if (lastNode != sum) {
629 void MKLDNNGraphOptimizer::FuseFullyConnectedAndActivation(MKLDNNGraph &graph) {
630 auto& graphNodes = graph.GetNodes();
632 auto isFusingSupported = [&](MKLDNNNodePtr fc, MKLDNNNodePtr activation) {
633 if (!activation->getCnnLayer())
636 auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(activation.get());
638 // TODO: fuse on fp32 not optimized yet in mkl-dnn
639 return activationNode && fc->getCnnLayer()->precision != Precision::FP32 &&
640 (activationNode->getAlgorithm() == eltwise_relu);
643 for (int i = 0; i < graphNodes.size(); i++) {
644 if (graphNodes[i]->getType() == FullyConnected) {
645 auto fc = graphNodes[i];
647 auto fuse = [&] (MKLDNNNodePtr relu) {
648 fc->setType(FullyConnected_Activation);
652 if (fc->getChildEdges().size() == 1) {
653 auto ch1 = fc->getChildEdgeAt(0)->getChild();
655 if (isFusingSupported(fc, ch1)) {
664 void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) {
665 for (MKLDNNNodePtr& node : graph.GetNodes()) {
668 if (node->getType() == Power) {
669 PowerLayer* l = dynamic_cast<PowerLayer*>(node->getCnnLayer().get());
671 if (l->power == 1.0f && l->scale == 1.0f && l->offset == 0.0f) toDrop = true;
674 if (node->getType() == Depthwise && node->getCnnLayer()->type == "ScaleShift") {
675 ScaleShiftLayer* l = dynamic_cast<ScaleShiftLayer*>(node->getCnnLayer().get());
677 if (l->_weights == nullptr && l->_biases == nullptr) toDrop = true;
680 if (node->getType() == Copy) toDrop = true;
682 if (toDrop) graph.DropNode(node);
686 void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
687 std::set<MKLDNNNodePtr> processed;
688 std::vector<MKLDNNNodePtr> newNodes;
689 for (MKLDNNNodePtr& node : graph.GetNodes()) {
690 if (processed.find(node) == processed.end() && node->getType() == Reorder
691 && node->getChildEdges().size() == 1
692 && node->getChildEdgeAt(0)->getChild()->getType() == Reorder ) {
693 auto nextNode = node->getChildEdgeAt(0)->getChild();
694 MKLDNNReorderNode* n = dynamic_cast<MKLDNNReorderNode*>(node.get());
695 MKLDNNReorderNode* nn = dynamic_cast<MKLDNNReorderNode*>(nextNode.get());
697 auto scales = n->_scales;
699 if (n->_scales != nullptr && nn->_scales != nullptr) {
700 THROW_IE_EXCEPTION << "Merging scales of two subsequent reorders is unsupported yet";
702 if (scales == nullptr) {
703 scales = nn->_scales;
707 MKLDNNNodePtr p = n->getParentEdgeAt(0)->getParent();
708 MKLDNNNodePtr c = nn->getChildEdgeAt(0)->getChild();
710 auto oldEdgeNum = n->getParentEdgeAt(0)->getInputNum();
712 graph.DropNode(node);
713 graph.DropNode(nextNode);
715 processed.insert(node);
716 processed.insert(nextNode);
719 for (auto cur : p->getChildEdgesAtPort(oldEdgeNum)) {
720 if (cur->getChild() == c)
723 if (!edge) THROW_IE_EXCEPTION << "Inappropriate graph processing";
726 std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName();
727 CNNLayerPtr layer(new CNNLayer({layerName,
729 n->getInput().getPrecision()}));
730 MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, graph.getEngine()));
731 auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
733 reorderPtr->setDescs(n->getInput(), nn->getOutput());
734 reorderPtr->_scales = scales;
738 auto oIndex = edge->getOutputNum();
739 auto iIndex = edge->getInputNum();
740 if (iIndex < 0 || oIndex < 0)
741 THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
742 << edge->getParent()->getName() << " and "
743 << edge->getChild()->getName() << ".";
746 MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
747 MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
749 // Add edge for beforeNode
750 beforeNode->getChild()->parentEdges.push_back(beforeNode);
751 edge->getParent()->childEdges.push_back(beforeNode);
753 // Add edge for afterNode
754 afterNode->getParent()->childEdges.push_back(afterNode);
755 edge->getChild()->parentEdges.push_back(afterNode);
757 newReorder->getSupportedDescriptors();
758 newReorder->initSupportedPrimitiveDescriptors();
759 newReorder->selectOptimalPrimitiveDescriptor();
761 graph.GetEdges().push_back(beforeNode);
762 graph.GetEdges().push_back(afterNode);
764 // Just to check accordance
765 afterNode->getDesc();
766 beforeNode->getDesc();
768 newNodes.push_back(newReorder);
769 graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end());
772 for (MKLDNNNodePtr& node : newNodes) {
773 graph.GetNodes().push_back(node);
777 void MKLDNNGraphOptimizer::RemoveIOScaleShifts(MKLDNNGraph &graph) {
778 for (MKLDNNNodePtr& node : graph.GetNodes()) {
779 if (node->getType() == Depthwise && node->getCnnLayer()->type == "ScaleShift") {
780 ScaleShiftLayer* l = dynamic_cast<ScaleShiftLayer*>(node->getCnnLayer().get());
782 auto cur = l->insData[0].lock();
783 if (cur == nullptr) {
784 THROW_IE_EXCEPTION << "[MKLDNN] error - invalid input data";
786 if (cur->precision != l->outData[0]->precision) {
787 if (node->name.find("_iScaleShift_") != std::string::npos) {
788 auto child = node->childEdges[0].lock()->getChild();
789 if (child->type == Reorder) {
790 MKLDNNReorderNode* rn = dynamic_cast<MKLDNNReorderNode*>(child.get());
792 rn->_scales = l->_weights;
793 graph.DropNode(node);
796 THROW_IE_EXCEPTION << "Strange case. No Reorder after iScaleShift";
798 } else if (node->name.find("_oScaleShift_") != std::string::npos) {
799 auto parent = node->parentEdges[0].lock()->getParent();
801 if (parent->type == Reorder) {
802 MKLDNNReorderNode* rn = dynamic_cast<MKLDNNReorderNode*>(parent.get());
804 rn->_scales = l->_weights;
805 graph.DropNode(node);
808 THROW_IE_EXCEPTION << "Strange case. No Reorder before oScaleShift";
816 bool MKLDNNGraphOptimizer::IsOneOf(Type type, std::vector<Type> types) {
817 for (auto tp : types) {