inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include <blob_factory.hpp>
   6 #include "nodes/mkldnn_reshape_node.h"
   7 #include "mkldnn_graph_optimizer.h"
   8 #include <nodes/mkldnn_activation_node.h>
   9 #include "nodes/mkldnn_pooling_node.h"
  10 #include "nodes/mkldnn_eltwise_node.h"
  11 #include "nodes/mkldnn_depthwise_node.h"
  12 #include "nodes/mkldnn_concat_node.h"
  13 #include "nodes/mkldnn_reorder_node.h"
  14
  15 #include <string>
  16 #include <list>
  17 #include <memory>
  18 #include <set>
  19 #include <ie_layers_internal.hpp>
  20 #include <nodes/mkldnn_bin_conv_node.h>
  21 #include <nodes/mkldnn_quantize_node.h>
  22 #include "cpu_isa_traits.hpp"
  23
  24 using namespace mkldnn;
  25 using namespace MKLDNNPlugin;
  26 using namespace InferenceEngine;
  27
  28 MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}
  29
  30 void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
  31     MergeGroupConvolution(graph);
  32     graph.RemoveDroppedNodes();
  33
  34     FuseConvolutionAndDepthwise(graph);
  35     graph.RemoveDroppedNodes();
  36
  37     FuseConvolutionAndActivation(graph);
  38     graph.RemoveDroppedNodes();
  39
  40     FuseConvolutionAndDepthwise(graph);
  41     graph.RemoveDroppedNodes();
  42
  43     FuseConvolutionAndDWConvolution(graph);
  44     graph.RemoveDroppedNodes();
  45
  46     FuseBinaryConvolutionAndQuantize(graph);
  47     graph.RemoveDroppedNodes();
  48
  49     FuseBatchNormWithScale(graph);
  50     graph.RemoveDroppedNodes();
  51
  52     FuseFullyConnectedAndActivation(graph);
  53     graph.RemoveDroppedNodes();
  54
  55     RemoveIdentityOperator(graph);
  56     graph.RemoveDroppedNodes();
  57
  58     FuseConvolutionSumAndConvolutionSumActivation(graph);
  59     graph.RemoveDroppedNodes();
  60
  61
  62     graph.RemoveDroppedEdges();
  63 }
  64
  65 void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) {
  66     RemoveIOScaleShifts(graph);
  67     graph.RemoveDroppedNodes();
  68
  69     DropDoubleReorders(graph);
  70     graph.RemoveDroppedNodes();
  71
  72
  73     graph.RemoveDroppedEdges();
  74 }
  75
  76 void MKLDNNGraphOptimizer::MergeGroupConvolution(MKLDNNGraph &graph) {
  77     for (auto node : graph.GetNodes()) {
  78         // Split with at least 2 Convolutions
  79         if (!IsOneOf(node->getType(), {Split}) || node->getChildEdges().size() < 2 ||
  80                 !IsOneOf(node->getChildEdgeAt(0)->getChild()->getType(), {Convolution, Convolution_Activation})) {
  81             continue;
  82         }
  83         bool canBeMerged = true;
  84
  85         auto& split = node;
  86
  87         auto convInEdge = split->getChildEdgeAt(0);
  88         auto conv = convInEdge->getChild();
  89         auto convOutEdge = conv->getChildEdgeAt(0);
  90
  91         auto convType = conv->getType();
  92         auto convInDims = convInEdge->getDims();
  93         auto convOutDims = convOutEdge->getDims();
  94
  95         // Convolutions of same the type with Concat as a child
  96         for (size_t i = 1; i < split->getChildEdges().size(); i++) {
  97             auto childEdge = split->getChildEdgeAt(i);
  98             auto child = childEdge->getChild();
  99             Type type = child->getType();
 100
 101             if (convType != type || child->getChildEdgeAt(0)->getChild()->getType() != Concatenation ||
 102                     convOutDims != child->getChildEdgeAt(0)->getDims() || child->getChildEdges().size() != 1 ||
 103                     convInDims != childEdge->getDims()) {
 104                 canBeMerged = false;
 105                 break;
 106             }
 107         }
 108
 109         if (!canBeMerged) continue;
 110
 111         // TODO: Rewrite topology optimizer at all. it should be clean and understandable
 112         auto concat = conv->getChildEdgeAt(0)->getChild();
 113         // Merge and remove Convolution
 114         for (size_t i = 1; i < split->getChildEdges().size(); i++) {
 115             auto peerInEdge = split->getChildEdgeAt(i);
 116             auto peer = peerInEdge->getChild();
 117             conv->mergeWith(peer);
 118             convInDims[1] += (peerInEdge->getDims())[1];
 119             convOutDims[1] += (peer->getChildEdgeAt(0)->getDims())[1];
 120             peer->remove();
 121         }
 122         conv->inDims[0] = convInDims;
 123         conv->outDims[0] = convOutDims;
 124
 125         conv->fuseWith(split);
 126         conv->fuseWith(concat);
 127
 128         graph.DropNode(split);
 129         graph.DropNode(concat);
 130     }
 131 }
 132
 133 void MKLDNNGraphOptimizer::FuseBatchNormWithScale(MKLDNNGraph &graph) {
 134     auto &graphNodes = graph.GetNodes();
 135
 136     for (int i = 0; i < graphNodes.size(); i++) {
 137         const auto& bn = graphNodes[i];
 138         if (bn->getType() == BatchNormalization) {
 139             const auto& outputNodes = graph.GetOutputNodes();
 140             const std::string node_name = bn->getName();
 141             // Check that the node is not output node
 142             if (std::find_if(outputNodes.begin(), outputNodes.end(),
 143                             [&node_name](const MKLDNNNodePtr& x) {
 144                                 return x->getName() == node_name;}) == outputNodes.end()) {
 145                 if (bn->getChildEdges().size() == 1) {
 146                     auto child = bn->getChildEdgeAt(0)->getChild();
 147                     if (child->type == Depthwise && child->getCnnLayer()->type == "ScaleShift") {
 148                         bn->fuseWith(child);
 149                         graph.DropNode(child);
 150                     }
 151                 }
 152             }
 153         }
 154     }
 155 }
 156
 157 void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
 158     auto isOneOf = [&](mkldnn::algorithm alg, std::vector<mkldnn::algorithm> algs) {
 159         for (auto a : algs) {
 160             if (alg == a) {
 161                 return true;
 162             }
 163         }
 164         return false;
 165     };
 166
 167     auto& graphNodes = graph.GetNodes();
 168
 169     auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) {
 170         if (!activation->getCnnLayer())
 171             return false;
 172
 173         auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(activation.get());
 174
 175         return activationNode &&
 176             (activationNode->getAlgorithm() == eltwise_relu ||
 177             (conv->getCnnLayer()->precision == Precision::FP32 &&
 178              isOneOf(activationNode->getAlgorithm(), {eltwise_elu, eltwise_logistic, eltwise_bounded_relu, eltwise_clamp})));
 179     };
 180
 181     for (int i = 0; i < graphNodes.size(); i++) {
 182         if (graphNodes[i]->getType() == Convolution || graphNodes[i]->getType() == BinaryConvolution) {
 183             auto conv = graphNodes[i];
 184
 185             auto fuse = [&] (MKLDNNNodePtr relu) {
 186                 if (graphNodes[i]->getType() != BinaryConvolution)
 187                     conv->setType(Convolution_Activation);
 188                 conv->fuseWith(relu);
 189             };
 190
 191             if (conv->getChildEdges().size() == 1) {
 192                 auto ch1 = conv->getChildEdgeAt(0)->getChild();
 193
 194                 if (isFusingSupported(conv, ch1)) {
 195                     fuse(ch1);
 196
 197                     if (ch1->getChildEdges().size() == 1) {
 198                         auto ch2 = ch1->getChildEdgeAt(0)->getChild();
 199
 200                         if (isFusingSupported(conv, ch2)) {
 201                             fuse(ch2);
 202                             graph.DropNode(ch2);
 203                         }
 204                     }
 205                     graph.DropNode(ch1);
 206                 } else {
 207                     if (ch1->type == Pooling) {
 208                         auto pool = ch1;
 209                         bool is_max_pool =
 210                                 dynamic_cast<PoolingLayer *>(pool->getCnnLayer().get())->_type ==
 211                                 PoolingLayer::PoolType::MAX;
 212
 213                         if (is_max_pool && pool->getChildEdges().size() == 1) {
 214                             auto ch2 = pool->getChildEdgeAt(0)->getChild();
 215                             if (isFusingSupported(conv, ch2)) {
 216                                 fuse(ch2);
 217                                 graph.DropNode(ch2);
 218                             }
 219                         }
 220                     }
 221                 }
 222             }
 223         }
 224     }
 225 }
 226
 227 void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) {
 228     auto& graphNodes = graph.GetNodes();
 229
 230     auto isSutableParentNode = [](MKLDNNNodePtr node) {
 231         bool isSutableConv = (node->getType() == Convolution || node->getType() == Convolution_Activation) &&
 232                              node->getCnnLayer()->precision == Precision::FP32;
 233         bool isSutableBinConv = node->getType() == BinaryConvolution;
 234         return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1;
 235     };
 236
 237     auto isSutableChildNode = [](MKLDNNNodePtr node) {
 238         if (node->getType() != Depthwise)
 239             return false;
 240
 241         if (!node->getCnnLayer())
 242             return false;
 243
 244         auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
 245         return ((depthwiseNode->getAlgorithm() == mkldnn::algorithm::depthwise_scale_shift && depthwiseNode->isWithBiases()) ||
 246                 (depthwiseNode->getAlgorithm() == mkldnn::algorithm::depthwise_prelu));
 247     };
 248
 249     for (int i = 0; i < graphNodes.size(); i++) {
 250         auto conv = graphNodes[i];
 251         if (!isSutableParentNode(conv)) continue;
 252
 253         auto depthwise0 = conv->getChildEdgeAt(0)->getChild();
 254         if (!isSutableChildNode(depthwise0)) continue;
 255
 256         conv->fuseWith(depthwise0);
 257         if (conv->type != BinaryConvolution)
 258             conv->setType(Convolution_Depthwise);
 259
 260         if (depthwise0->getChildEdges().size() == 1) {
 261             auto depthwise1 = depthwise0->getChildEdgeAt(0)->getChild();
 262
 263             if (isSutableChildNode(depthwise1)) {
 264                 conv->fuseWith(depthwise1);
 265                 graph.DropNode(depthwise1);
 266             }
 267         }
 268
 269         graph.DropNode(depthwise0);
 270     }
 271 }
 272
 273 void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
 274     auto& graphNodes = graph.GetNodes();
 275
 276     auto isConvolutionNode = [](MKLDNNNodePtr node) {
 277         return node->getType() == Convolution || node->getType() == Convolution_Activation;
 278     };
 279
 280     auto isBinaryConvolutionNode = [](MKLDNNNodePtr node) {
 281         return node->getType() == BinaryConvolution;
 282     };
 283
 284     auto is1x1Convolution = [](ConvolutionLayer* layer) {
 285         return layer->_kernel[X_AXIS] == 1 && layer->_kernel[Y_AXIS] == 1;
 286     };
 287
 288     auto isSutableParentConvolution = [&](MKLDNNNodePtr node) {
 289         if (isBinaryConvolutionNode(node)) {
 290             auto *layer = dynamic_cast<BinaryConvolutionLayer *>(node->getCnnLayer().get());
 291
 292             bool isSupportedParams = layer->_group == 1;
 293             if (!isSupportedParams) return false;
 294         } else {
 295             auto *layer = dynamic_cast<ConvolutionLayer *>(node->getCnnLayer().get());
 296
 297             bool isSupportedParams = layer->_group == 1 &&
 298                                      ((is1x1Convolution(layer) && layer->_stride[X_AXIS] == 1 &&
 299                                        layer->_stride[Y_AXIS] == 1) || !is1x1Convolution(layer)) &&
 300                                      (layer->precision == Precision::FP32 || layer->precision == Precision::I8);
 301             if (!isSupportedParams) return false;
 302         }
 303
 304         return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild());
 305     };
 306
 307     auto isSutableChildConvolution = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
 308         auto* childLayer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
 309
 310         if (!isBinaryConvolutionNode(parentNode)) {
 311             auto* parentLayer = dynamic_cast<ConvolutionLayer*>(parentNode->getCnnLayer().get());
 312             if (parentLayer->precision != childLayer->precision)
 313                 return false;
 314         }
 315
 316         auto allPads = getPaddings(*childLayer);
 317         bool isSupportedParams = childLayer->_out_depth == childLayer->_group &&
 318                                  childLayer->_out_depth != 1 &&
 319                                  // Depthwise convolution output should be multiple of 8
 320                                  childLayer->_kernel[X_AXIS] == 3 && childLayer->_kernel[Y_AXIS] == 3 &&
 321                                  allPads.begin[X_AXIS] == 1 && allPads.begin[Y_AXIS] == 1 &&
 322                                  childLayer->_dilation[X_AXIS] == 1 && childLayer->_dilation[Y_AXIS] == 1 &&
 323                                  childLayer->_biases != nullptr && childLayer->_biases->size() != 0;
 324
 325         return isSupportedParams;
 326     };
 327
 328     auto isFusingWorthwhile = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
 329         if (isBinaryConvolutionNode(parentNode)) {
 330             return true;
 331         }
 332
 333         auto* layer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
 334
 335         auto inDims = childNode->inDims[0];
 336         auto outDims = childNode->outDims[0];
 337         int elemSize = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(layer->precision));
 338
 339         int L3_cache_size = mkldnn_get_cache_size(3, false);
 340         int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize;
 341         int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * elemSize;
 342
 343         bool isInt8 = layer->precision == Precision::I8 || layer->precision == Precision::U8;
 344         bool isAVX512NotSupported = !mkldnn::impl::cpu::mayiuse(impl::cpu::cpu_isa_t::avx512_common);
 345
 346         return isInt8 ? isAVX512NotSupported : (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2);
 347     };
 348
 349     for (int i = 0; i < graphNodes.size(); i++) {
 350         if (!isConvolutionNode(graphNodes[i]) && !isBinaryConvolutionNode(graphNodes[i])) continue;
 351
 352         auto parentConvNode = graphNodes[i];
 353         if (!isSutableParentConvolution(parentConvNode)) continue;
 354
 355         auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild();
 356         if (!isSutableChildConvolution(parentConvNode, childConvNode)) continue;
 357
 358         if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue;
 359
 360         parentConvNode->fuseWith(childConvNode);
 361         graph.DropNode(childConvNode);
 362     }
 363 }
 364
 365 void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) {
 366     auto removeEdge = [](MKLDNNGraph &graph, MKLDNNEdgePtr& edge) {
 367         auto& edges = graph.GetEdges();
 368         for (auto it = edges.begin(); it != edges.end(); it++) {
 369             if ((*it) == edge) {
 370                 edges.erase(it);
 371                 return;
 372             }
 373         }
 374     };
 375
 376     auto& graphNodes = graph.GetNodes();
 377
 378     auto isSutableParentNode = [](MKLDNNNodePtr node) {
 379         bool isSutableBinConv = node->getType() == BinaryConvolution;
 380         return isSutableBinConv && node->getChildEdges().size() == 1;
 381     };
 382
 383     auto isSutableChildNode = [](MKLDNNNodePtr node) {
 384         if (!node->getCnnLayer())
 385             return false;
 386
 387         auto* quantizeLayer = dynamic_cast<QuantizeLayer*>(node->getCnnLayer().get());
 388         bool isSutableQuantize = node->getType() == Quantize && quantizeLayer->levels == 2;
 389
 390         return isSutableQuantize;
 391     };
 392
 393     for (int i = 0; i < graphNodes.size(); i++) {
 394         auto parent = graphNodes[i];
 395         if (!isSutableParentNode(parent)) continue;
 396
 397         auto child = parent->getChildEdgeAt(0)->getChild();
 398         if (!isSutableChildNode(child)) continue;
 399
 400         parent->fuseWith(child);
 401
 402         auto* binConvNode = dynamic_cast<MKLDNNBinaryConvolutionNode*>(parent.get());
 403
 404         auto parents = child->parentEdges;
 405         for (size_t i = 0; i < parents.size(); i++) {
 406             auto p_edge = parents[i].lock();
 407             if (p_edge->getParent()->getType() == Input) {
 408                 InferenceEngine::SizeVector dims;
 409                 dims.push_back(binConvNode->getChildEdgeAt(0)->getDims()[1]);
 410
 411                 auto InputLowBlob = dynamic_cast<TBlob<float>*>(p_edge->getParent()->getCnnLayer()->blobs["custom"].get());
 412
 413                 auto inputLowData = InputLowBlob->buffer().as<float*>();
 414                 int inputLowAxis = p_edge->getDims().ndims() == 1 ? 0 : 1;
 415                 bool isInputLowBroadcasted = p_edge->getDims()[inputLowAxis] != dims[0];
 416
 417                 for (int i = 0; i < dims[0]; i++) {
 418                     binConvNode->pushBinarizationThreshold(inputLowData[isInputLowBroadcasted ? 0 : i]);
 419                 }
 420
 421                 break;
 422             }
 423         }
 424
 425         for (size_t i = 0; i < parents.size(); i++) {
 426             auto p_edge = parents[i].lock();
 427             if (p_edge->getParent()->getType() == BinaryConvolution)
 428                 continue;
 429
 430             removeEdge(graph, p_edge);
 431         }
 432
 433         graph.DropNode(child);
 434     }
 435 }
 436
 437 /**
 438  *  Check if there is a data dependency between parent and child
 439  *  BFS starting from parent and comparing with child
 440  *
 441  * @param parent head of BFS
 442  * @param child node we try to find
 443  * @return True if child is one of data supplier
 444  */
 445 static bool is_data_dependency(const std::shared_ptr<MKLDNNNode> &parent,
 446                                const std::shared_ptr<MKLDNNNode> &child) {
 447     std::set<MKLDNNNode*> visited;
 448     std::list<MKLDNNNode*> nextLayers {parent.get()};
 449
 450     for (; !nextLayers.empty();) {
 451         auto layer = *nextLayers.begin();
 452         if (layer == child.get()) return true;
 453         for (auto oe : layer->getChildEdges()) {
 454             auto nn = oe.lock()->getChild();
 455             if (visited.find(nn.get()) == visited.end()) {
 456                 nextLayers.push_back(nn.get());
 457                 visited.insert(nn.get());
 458             }
 459         }
 460         nextLayers.pop_front();
 461     }
 462     return false;
 463 }
 464
 465 /*
 466  *  Before:
 467  *
 468  *        ***             ***                   ***             ***
 469  *         |               |                     |               |
 470  *    +========+       +========+           +========+       +========+
 471  *    |  any   |       | conv 2 |           |  any   |       | conv 2 |
 472  *    +========+       +========+           +========+       +========+
 473  *         |               |                     |               |
 474  *      +=====================+               +=====================+
 475  *      |         Sum         |      or       |         Sum         |
 476  *      +=====================+               +=====================+
 477  *                 |                                     |
 478  *         +===============+                            ***
 479  *         |     Relu      |
 480  *         +===============+
 481  *                 |
 482  *                ***
 483  *
 484  *  After:
 485  *
 486  *        ***             ***
 487  *         |               |
 488  *    +========+       +========+
 489  *    |  any   |-------|        |
 490  *    +========+       | conv2  |
 491  *                     |   +    |
 492  *                     |  sum   |
 493  *                     |   +    |
 494  *                     | [relu] |
 495  *                     |        |
 496  *                     +========+
 497  *                         |
 498  *                 +-------+
 499  *                 |
 500  *                ***
 501  */
 502
 503 void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNGraph &graph) {
 504     std::vector<MKLDNNNodePtr> &graphNodes = graph.GetNodes();
 505
 506     auto isOneOf = [&](mkldnn::algorithm alg, std::vector<mkldnn::algorithm> algs) {
 507         for (auto a : algs) {
 508             if (alg == a) {
 509                 return true;
 510             }
 511         }
 512         return false;
 513     };
 514
 515     auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) {
 516         if (!activation->getCnnLayer())
 517             return false;
 518
 519         auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(activation.get());
 520
 521         return activationNode &&
 522             (activationNode->getAlgorithm() == eltwise_relu ||
 523             (conv->getCnnLayer()->precision == Precision::FP32 &&
 524              isOneOf(activationNode->getAlgorithm(), {eltwise_elu, eltwise_logistic, eltwise_bounded_relu, eltwise_clamp})));
 525     };
 526
 527     for (auto &graphNode : graphNodes) {
 528         if (graphNode->getType() != Eltwise)
 529             continue;
 530
 531         if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isSum()) continue;
 532         if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isUnitScales()) continue;
 533
 534         auto parent1 = graphNode->getParentEdgeAt(0)->getParent();
 535         auto parent2 = graphNode->getParentEdgeAt(1)->getParent();
 536         // TODO: Enlarge to several inputs
 537         if (graphNode->getParentEdges().size() != 2 ||
 538             (parent1->getType() != Convolution && parent1->getType() != BinaryConvolution &&
 539              parent2->getType() != Convolution && parent2->getType() != BinaryConvolution))
 540             continue;
 541
 542         auto mergedConv = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent1 : parent2;
 543         auto peerNode = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent2 : parent1;
 544         if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) &&
 545             mergedConv->getChildEdges().size() != 1) {
 546             mergedConv = parent2;
 547             peerNode = parent1;
 548         }
 549         auto sum = graphNode;
 550         auto lastNode = sum;
 551
 552         bool fuse_allowed = mergedConv->getChildEdges().size() == 1;
 553         for (size_t j = 0; fuse_allowed && j < mergedConv->getParentEdges().size(); j++)
 554             if (mergedConv->getParentEdgeAt(j)->getParent() == peerNode)
 555                 fuse_allowed = false;
 556
 557         // Fused Conv+Sum prim will be used inplace. That's mean that input blob will
 558         // be overwritten. Should verify that all other consumer already read it and
 559         // we can spoil input data.
 560         // TODO: rewrite once we add "Inplace" reporting mechanism
 561         for (auto & edge : peerNode->getChildEdges()) {
 562             if (!fuse_allowed)
 563                 break;
 564             fuse_allowed &= is_data_dependency(edge.lock()->getChild(), sum);
 565         }
 566         if (!fuse_allowed) continue;
 567
 568         if (graphNode->getChildEdges().size() == 1 &&
 569                 isFusingSupported(graphNode, graphNode->getChildEdgeAt(0)->getChild())) {
 570             auto relu_shared = graphNode->getChildEdgeAt(0)->getChild();
 571             lastNode = relu_shared;
 572             if (mergedConv->getType() != BinaryConvolution)
 573                 mergedConv->setType(Convolution_Sum_Activation);
 574             mergedConv->fuseWith(sum);
 575         } else {
 576             if (mergedConv->getType() != BinaryConvolution)
 577                 mergedConv->setType(Convolution_Sum);
 578         }
 579
 580         mergedConv->fuseWith(lastNode);
 581
 582         if (mergedConv->fusedWith.size() > 0 &&
 583            (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) {
 584             // Merged with DW_conv. Shape may change
 585             mergedConv->inDims.push_back(mergedConv->fusedWith[0]->outDims[0]);
 586         } else {
 587             mergedConv->inDims.push_back(mergedConv->outDims[0]);
 588         }
 589
 590         size_t childIdx = 0;
 591         for (childIdx = 0; childIdx < peerNode->getChildEdges().size(); childIdx++) {
 592             if (peerNode->getChildEdgeAt(childIdx)->getChild() == sum) {
 593                 break;
 594             }
 595         }
 596
 597         int peer_port = peerNode->getChildEdgeAt(childIdx)->getInputNum();
 598         peerNode->getChildEdgeAt(childIdx)->drop();
 599
 600         MKLDNNEdgePtr edgePtr(new MKLDNNEdge(peerNode, mergedConv, peer_port, 1));
 601         graph.GetEdges().push_back(edgePtr);
 602
 603         mergedConv->addEdge(edgePtr);
 604
 605         std::vector<MKLDNNEdgeWeakPtr> edges_to_reconnect = lastNode->getChildEdges();
 606         for (auto &edge_w : edges_to_reconnect) {
 607             auto edge = edge_w.lock();
 608             auto child = edge->getChild();
 609             int idxParent = edge->getInputNum();
 610             int idxChild = edge->getOutputNum();
 611
 612             // reconnect after  activation/sum. Port index must be 0
 613             IE_ASSERT(idxParent == 0);
 614
 615             edge->drop();
 616
 617             MKLDNNEdgePtr newEdge(new MKLDNNEdge(mergedConv, child, idxParent, idxChild));
 618             graph.GetEdges().push_back(newEdge);
 619             child->addEdge(newEdge);
 620         }
 621
 622         if (lastNode != sum) {
 623             lastNode->remove();
 624         }
 625         sum->remove();
 626     }
 627 }
 628
 629 void MKLDNNGraphOptimizer::FuseFullyConnectedAndActivation(MKLDNNGraph &graph) {
 630     auto& graphNodes = graph.GetNodes();
 631
 632     auto isFusingSupported = [&](MKLDNNNodePtr fc, MKLDNNNodePtr activation) {
 633         if (!activation->getCnnLayer())
 634             return false;
 635
 636         auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(activation.get());
 637
 638         // TODO: fuse on fp32 not optimized yet in mkl-dnn
 639         return activationNode && fc->getCnnLayer()->precision != Precision::FP32 &&
 640             (activationNode->getAlgorithm() == eltwise_relu);
 641     };
 642
 643     for (int i = 0; i < graphNodes.size(); i++) {
 644         if (graphNodes[i]->getType() == FullyConnected) {
 645             auto fc = graphNodes[i];
 646
 647             auto fuse = [&] (MKLDNNNodePtr relu) {
 648                 fc->setType(FullyConnected_Activation);
 649                 fc->fuseWith(relu);
 650             };
 651
 652             if (fc->getChildEdges().size() == 1) {
 653                 auto ch1 = fc->getChildEdgeAt(0)->getChild();
 654
 655                 if (isFusingSupported(fc, ch1)) {
 656                     fuse(ch1);
 657                     graph.DropNode(ch1);
 658                 }
 659             }
 660         }
 661     }
 662 }
 663
 664 void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) {
 665     for (MKLDNNNodePtr& node : graph.GetNodes()) {
 666         bool toDrop = false;
 667
 668         if (node->getType() == Power) {
 669             PowerLayer* l = dynamic_cast<PowerLayer*>(node->getCnnLayer().get());
 670
 671             if (l->power == 1.0f && l->scale == 1.0f && l->offset == 0.0f) toDrop = true;
 672         }
 673
 674         if (node->getType() == Depthwise && node->getCnnLayer()->type == "ScaleShift") {
 675             ScaleShiftLayer* l = dynamic_cast<ScaleShiftLayer*>(node->getCnnLayer().get());
 676
 677             if (l->_weights == nullptr && l->_biases == nullptr) toDrop = true;
 678         }
 679
 680         if (node->getType() == Copy) toDrop = true;
 681
 682         if (toDrop) graph.DropNode(node);
 683     }
 684 }
 685
 686 void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
 687     std::set<MKLDNNNodePtr> processed;
 688     std::vector<MKLDNNNodePtr> newNodes;
 689     for (MKLDNNNodePtr& node : graph.GetNodes()) {
 690         if (processed.find(node) == processed.end() && node->getType() == Reorder
 691             && node->getChildEdges().size() == 1
 692             && node->getChildEdgeAt(0)->getChild()->getType() == Reorder ) {
 693             auto nextNode = node->getChildEdgeAt(0)->getChild();
 694             MKLDNNReorderNode* n = dynamic_cast<MKLDNNReorderNode*>(node.get());
 695             MKLDNNReorderNode* nn = dynamic_cast<MKLDNNReorderNode*>(nextNode.get());
 696
 697             auto scales = n->_scales;
 698
 699             if (n->_scales != nullptr && nn->_scales != nullptr) {
 700                 THROW_IE_EXCEPTION << "Merging scales of two subsequent reorders is unsupported yet";
 701             } else {
 702                 if (scales == nullptr) {
 703                     scales = nn->_scales;
 704                 }
 705             }
 706
 707             MKLDNNNodePtr p = n->getParentEdgeAt(0)->getParent();
 708             MKLDNNNodePtr c = nn->getChildEdgeAt(0)->getChild();
 709
 710             auto oldEdgeNum = n->getParentEdgeAt(0)->getInputNum();
 711
 712             graph.DropNode(node);
 713             graph.DropNode(nextNode);
 714
 715             processed.insert(node);
 716             processed.insert(nextNode);
 717
 718             MKLDNNEdgePtr edge;
 719             for (auto cur : p->getChildEdgesAtPort(oldEdgeNum)) {
 720                 if (cur->getChild() == c)
 721                     edge = cur;
 722             }
 723             if (!edge) THROW_IE_EXCEPTION << "Inappropriate graph processing";
 724
 725
 726             std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName();
 727             CNNLayerPtr layer(new CNNLayer({layerName,
 728                                             "Reorder",
 729                                             n->getInput().getPrecision()}));
 730             MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, graph.getEngine()));
 731             auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
 732             if (reorderPtr) {
 733                 reorderPtr->setDescs(n->getInput(), nn->getOutput());
 734                 reorderPtr->_scales = scales;
 735             }
 736
 737             // new !!!
 738             auto oIndex = edge->getOutputNum();
 739             auto iIndex = edge->getInputNum();
 740             if (iIndex < 0 || oIndex < 0)
 741                 THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
 742                                    << edge->getParent()->getName() << " and "
 743                                    << edge->getChild()->getName() << ".";
 744             edge->drop();
 745
 746             MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
 747             MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
 748
 749             // Add edge for beforeNode
 750             beforeNode->getChild()->parentEdges.push_back(beforeNode);
 751             edge->getParent()->childEdges.push_back(beforeNode);
 752
 753             // Add edge for afterNode
 754             afterNode->getParent()->childEdges.push_back(afterNode);
 755             edge->getChild()->parentEdges.push_back(afterNode);
 756
 757             newReorder->getSupportedDescriptors();
 758             newReorder->initSupportedPrimitiveDescriptors();
 759             newReorder->selectOptimalPrimitiveDescriptor();
 760
 761             graph.GetEdges().push_back(beforeNode);
 762             graph.GetEdges().push_back(afterNode);
 763
 764             // Just to check accordance
 765             afterNode->getDesc();
 766             beforeNode->getDesc();
 767
 768             newNodes.push_back(newReorder);
 769             graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end());
 770         }
 771     }
 772     for (MKLDNNNodePtr& node : newNodes) {
 773         graph.GetNodes().push_back(node);
 774     }
 775 }
 776
 777 void MKLDNNGraphOptimizer::RemoveIOScaleShifts(MKLDNNGraph &graph) {
 778     for (MKLDNNNodePtr& node : graph.GetNodes()) {
 779         if (node->getType() == Depthwise && node->getCnnLayer()->type == "ScaleShift") {
 780             ScaleShiftLayer* l = dynamic_cast<ScaleShiftLayer*>(node->getCnnLayer().get());
 781
 782             auto cur = l->insData[0].lock();
 783             if (cur == nullptr) {
 784                 THROW_IE_EXCEPTION << "[MKLDNN] error - invalid input data";
 785             }
 786             if (cur->precision != l->outData[0]->precision) {
 787                 if (node->name.find("_iScaleShift_") != std::string::npos) {
 788                     auto child = node->childEdges[0].lock()->getChild();
 789                     if (child->type == Reorder) {
 790                         MKLDNNReorderNode* rn = dynamic_cast<MKLDNNReorderNode*>(child.get());
 791                         if (rn != nullptr) {
 792                             rn->_scales = l->_weights;
 793                             graph.DropNode(node);
 794                         }
 795                     } else {
 796                         THROW_IE_EXCEPTION << "Strange case. No Reorder after iScaleShift";
 797                     }
 798                 } else if (node->name.find("_oScaleShift_") != std::string::npos) {
 799                     auto parent = node->parentEdges[0].lock()->getParent();
 800
 801                     if (parent->type == Reorder) {
 802                         MKLDNNReorderNode* rn = dynamic_cast<MKLDNNReorderNode*>(parent.get());
 803                         if (rn != nullptr) {
 804                             rn->_scales = l->_weights;
 805                             graph.DropNode(node);
 806                         }
 807                     } else {
 808                         THROW_IE_EXCEPTION << "Strange case. No Reorder before oScaleShift";
 809                     }
 810                 }
 811             }
 812         }
 813     }
 814 }
 815
 816 bool MKLDNNGraphOptimizer::IsOneOf(Type type, std::vector<Type> types) {
 817     for (auto tp : types) {
 818         if (type == tp) {
 819             return true;
 820         }
 821     }
 822     return false;
 823 }
 824
 825