Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / mkldnn_plugin / mkldnn_graph_optimizer.cpp
index 6c88ebd..4723403 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -17,6 +17,9 @@
 #include <memory>
 #include <set>
 #include <ie_layers_internal.hpp>
+#include <nodes/mkldnn_bin_conv_node.h>
+#include <nodes/mkldnn_quantize_node.h>
+#include "cpu_isa_traits.hpp"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -28,8 +31,8 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
     MergeGroupConvolution(graph);
     graph.RemoveDroppedNodes();
 
-//    SLTMTransform(graph);
-//    RemoveDropped(graph);
+    FuseConvolutionAndDepthwise(graph);
+    graph.RemoveDroppedNodes();
 
     FuseConvolutionAndActivation(graph);
     graph.RemoveDroppedNodes();
@@ -40,9 +43,15 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
     FuseConvolutionAndDWConvolution(graph);
     graph.RemoveDroppedNodes();
 
+    FuseBinaryConvolutionAndQuantize(graph);
+    graph.RemoveDroppedNodes();
+
     FuseBatchNormWithScale(graph);
     graph.RemoveDroppedNodes();
 
+    FuseFullyConnectedAndActivation(graph);
+    graph.RemoveDroppedNodes();
+
     RemoveIdentityOperator(graph);
     graph.RemoveDroppedNodes();
 
@@ -113,6 +122,9 @@ void MKLDNNGraphOptimizer::MergeGroupConvolution(MKLDNNGraph &graph) {
         conv->inDims[0] = convInDims;
         conv->outDims[0] = convOutDims;
 
+        conv->fuseWith(split);
+        conv->fuseWith(concat);
+
         graph.DropNode(split);
         graph.DropNode(concat);
     }
@@ -167,11 +179,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
     };
 
     for (int i = 0; i < graphNodes.size(); i++) {
-        if (graphNodes[i]->getType() == Convolution) {
+        if (graphNodes[i]->getType() == Convolution || graphNodes[i]->getType() == BinaryConvolution) {
             auto conv = graphNodes[i];
 
             auto fuse = [&] (MKLDNNNodePtr relu) {
-                conv->setType(Convolution_Activation);
+                if (graphNodes[i]->getType() != BinaryConvolution)
+                    conv->setType(Convolution_Activation);
                 conv->fuseWith(relu);
             };
 
@@ -215,9 +228,10 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        return (node->getType() == Convolution || node->getType() == Convolution_Activation) &&
-                node->getCnnLayer()->precision == Precision::FP32 &&
-               (node->getChildEdges().size() == 1);
+        bool isSutableConv = (node->getType() == Convolution || node->getType() == Convolution_Activation) &&
+                             node->getCnnLayer()->precision == Precision::FP32;
+        bool isSutableBinConv = node->getType() == BinaryConvolution;
+        return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1;
     };
 
     auto isSutableChildNode = [](MKLDNNNodePtr node) {
@@ -240,7 +254,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) {
         if (!isSutableChildNode(depthwise0)) continue;
 
         conv->fuseWith(depthwise0);
-        conv->setType(Convolution_Depthwise);
+        if (conv->type != BinaryConvolution)
+            conv->setType(Convolution_Depthwise);
 
         if (depthwise0->getChildEdges().size() == 1) {
             auto depthwise1 = depthwise0->getChildEdgeAt(0)->getChild();
@@ -262,64 +277,163 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
         return node->getType() == Convolution || node->getType() == Convolution_Activation;
     };
 
+    auto isBinaryConvolutionNode = [](MKLDNNNodePtr node) {
+        return node->getType() == BinaryConvolution;
+    };
+
     auto is1x1Convolution = [](ConvolutionLayer* layer) {
         return layer->_kernel[X_AXIS] == 1 && layer->_kernel[Y_AXIS] == 1;
     };
 
     auto isSutableParentConvolution = [&](MKLDNNNodePtr node) {
-        auto* layer = dynamic_cast<ConvolutionLayer*>(node->getCnnLayer().get());
+        if (isBinaryConvolutionNode(node)) {
+            auto *layer = dynamic_cast<BinaryConvolutionLayer *>(node->getCnnLayer().get());
+
+            bool isSupportedParams = layer->_group == 1;
+            if (!isSupportedParams) return false;
+        } else {
+            auto *layer = dynamic_cast<ConvolutionLayer *>(node->getCnnLayer().get());
 
-        bool isSupportedParams = layer->_group == 1 &&
-                                 ((is1x1Convolution(layer) &&
-                                  layer->_stride[X_AXIS] == 1 && layer->_stride[Y_AXIS] == 1) || !is1x1Convolution(layer)) &&
-                                  layer->precision == Precision::FP32;;
-        if (!isSupportedParams) return false;
+            bool isSupportedParams = layer->_group == 1 &&
+                                     ((is1x1Convolution(layer) && layer->_stride[X_AXIS] == 1 &&
+                                       layer->_stride[Y_AXIS] == 1) || !is1x1Convolution(layer)) &&
+                                     (layer->precision == Precision::FP32 || layer->precision == Precision::I8);
+            if (!isSupportedParams) return false;
+        }
 
         return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild());
     };
 
-    auto isSutableChildConvolution = [](MKLDNNNodePtr node) {
-        auto* layer = dynamic_cast<ConvolutionLayer*>(node->getCnnLayer().get());
-        auto allPads = getPaddings(*layer);
-        bool isSupportedParams = layer->_out_depth == layer->_group &&
+    auto isSutableChildConvolution = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+        auto* childLayer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
 
-                                 layer->_out_depth != 1 &&
-                                 // Depthwise convolution output should be multiple of 8
+        if (!isBinaryConvolutionNode(parentNode)) {
+            auto* parentLayer = dynamic_cast<ConvolutionLayer*>(parentNode->getCnnLayer().get());
+            if (parentLayer->precision != childLayer->precision)
+                return false;
+        }
 
-                                 layer->_kernel[X_AXIS] == 3 && layer->_kernel[Y_AXIS] == 3 &&
+        auto allPads = getPaddings(*childLayer);
+        bool isSupportedParams = childLayer->_out_depth == childLayer->_group &&
+                                 childLayer->_out_depth != 1 &&
+                                 // Depthwise convolution output should be multiple of 8
+                                 childLayer->_kernel[X_AXIS] == 3 && childLayer->_kernel[Y_AXIS] == 3 &&
                                  allPads.begin[X_AXIS] == 1 && allPads.begin[Y_AXIS] == 1 &&
-                                 layer->_dilation[X_AXIS] == 1 && layer->_dilation[Y_AXIS] == 1 &&
-                                 layer->_biases != nullptr && layer->_biases->size() != 0 &&
-                                 layer->precision == Precision::FP32;
+                                 childLayer->_dilation[X_AXIS] == 1 && childLayer->_dilation[Y_AXIS] == 1 &&
+                                 childLayer->_biases != nullptr && childLayer->_biases->size() != 0;
+
         return isSupportedParams;
     };
 
-    auto isFusingWorthwhile = [](MKLDNNNodePtr node) {
-        auto inDims = node->inDims[0];
-        auto outDims = node->outDims[0];
+    auto isFusingWorthwhile = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+        if (isBinaryConvolutionNode(parentNode)) {
+            return true;
+        }
+
+        auto* layer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
+
+        auto inDims = childNode->inDims[0];
+        auto outDims = childNode->outDims[0];
+        int elemSize = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(layer->precision));
 
         int L3_cache_size = mkldnn_get_cache_size(3, false);
-        int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * sizeof(float);
-        int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * sizeof(float);
-        return (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2);
+        int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize;
+        int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * elemSize;
+
+        bool isInt8 = layer->precision == Precision::I8 || layer->precision == Precision::U8;
+        bool isAVX512NotSupported = !mkldnn::impl::cpu::mayiuse(impl::cpu::cpu_isa_t::avx512_common);
+
+        return isInt8 ? isAVX512NotSupported : (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2);
     };
 
     for (int i = 0; i < graphNodes.size(); i++) {
-        if (!isConvolutionNode(graphNodes[i])) continue;
+        if (!isConvolutionNode(graphNodes[i]) && !isBinaryConvolutionNode(graphNodes[i])) continue;
 
         auto parentConvNode = graphNodes[i];
         if (!isSutableParentConvolution(parentConvNode)) continue;
 
         auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildConvolution(childConvNode)) continue;
+        if (!isSutableChildConvolution(parentConvNode, childConvNode)) continue;
 
-        if (!isFusingWorthwhile(childConvNode)) continue;
+        if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue;
 
         parentConvNode->fuseWith(childConvNode);
         graph.DropNode(childConvNode);
     }
 }
 
+void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) {
+    auto removeEdge = [](MKLDNNGraph &graph, MKLDNNEdgePtr& edge) {
+        auto& edges = graph.GetEdges();
+        for (auto it = edges.begin(); it != edges.end(); it++) {
+            if ((*it) == edge) {
+                edges.erase(it);
+                return;
+            }
+        }
+    };
+
+    auto& graphNodes = graph.GetNodes();
+
+    auto isSutableParentNode = [](MKLDNNNodePtr node) {
+        bool isSutableBinConv = node->getType() == BinaryConvolution;
+        return isSutableBinConv && node->getChildEdges().size() == 1;
+    };
+
+    auto isSutableChildNode = [](MKLDNNNodePtr node) {
+        if (!node->getCnnLayer())
+            return false;
+
+        auto* quantizeLayer = dynamic_cast<QuantizeLayer*>(node->getCnnLayer().get());
+        bool isSutableQuantize = node->getType() == Quantize && quantizeLayer->levels == 2;
+
+        return isSutableQuantize;
+    };
+
+    for (int i = 0; i < graphNodes.size(); i++) {
+        auto parent = graphNodes[i];
+        if (!isSutableParentNode(parent)) continue;
+
+        auto child = parent->getChildEdgeAt(0)->getChild();
+        if (!isSutableChildNode(child)) continue;
+
+        parent->fuseWith(child);
+
+        auto* binConvNode = dynamic_cast<MKLDNNBinaryConvolutionNode*>(parent.get());
+
+        auto parents = child->parentEdges;
+        for (size_t i = 0; i < parents.size(); i++) {
+            auto p_edge = parents[i].lock();
+            if (p_edge->getParent()->getType() == Input) {
+                InferenceEngine::SizeVector dims;
+                dims.push_back(binConvNode->getChildEdgeAt(0)->getDims()[1]);
+
+                auto InputLowBlob = dynamic_cast<TBlob<float>*>(p_edge->getParent()->getCnnLayer()->blobs["custom"].get());
+
+                auto inputLowData = InputLowBlob->buffer().as<float*>();
+                int inputLowAxis = p_edge->getDims().ndims() == 1 ? 0 : 1;
+                bool isInputLowBroadcasted = p_edge->getDims()[inputLowAxis] != dims[0];
+
+                for (int i = 0; i < dims[0]; i++) {
+                    binConvNode->pushBinarizationThreshold(inputLowData[isInputLowBroadcasted ? 0 : i]);
+                }
+
+                break;
+            }
+        }
+
+        for (size_t i = 0; i < parents.size(); i++) {
+            auto p_edge = parents[i].lock();
+            if (p_edge->getParent()->getType() == BinaryConvolution)
+                continue;
+
+            removeEdge(graph, p_edge);
+        }
+
+        graph.DropNode(child);
+    }
+}
+
 /**
  *  Check if there is a data dependency between parent and child
  *  BFS starting from parent and comparing with child
@@ -417,18 +531,18 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
         if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isSum()) continue;
         if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isUnitScales()) continue;
 
+        auto parent1 = graphNode->getParentEdgeAt(0)->getParent();
+        auto parent2 = graphNode->getParentEdgeAt(1)->getParent();
         // TODO: Enlarge to several inputs
         if (graphNode->getParentEdges().size() != 2 ||
-            (graphNode->getParentEdgeAt(0)->getParent()->getType() != Convolution &&
-                    graphNode->getParentEdgeAt(1)->getParent()->getType() != Convolution))
+            (parent1->getType() != Convolution && parent1->getType() != BinaryConvolution &&
+             parent2->getType() != Convolution && parent2->getType() != BinaryConvolution))
             continue;
 
-        auto parent1 = graphNode->getParentEdgeAt(0)->getParent();
-        auto parent2 = graphNode->getParentEdgeAt(1)->getParent();
-
-        auto mergedConv = (parent1->getType() == Convolution) ? parent1 : parent2;
-        auto peerNode = (parent1->getType() == Convolution) ? parent2 : parent1;
-        if (peerNode->getType() == Convolution && mergedConv->getChildEdges().size() != 1) {
+        auto mergedConv = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent1 : parent2;
+        auto peerNode = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent2 : parent1;
+        if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) &&
+            mergedConv->getChildEdges().size() != 1) {
             mergedConv = parent2;
             peerNode = parent1;
         }
@@ -455,16 +569,23 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
                 isFusingSupported(graphNode, graphNode->getChildEdgeAt(0)->getChild())) {
             auto relu_shared = graphNode->getChildEdgeAt(0)->getChild();
             lastNode = relu_shared;
-            mergedConv->setType(Convolution_Sum_Activation);
+            if (mergedConv->getType() != BinaryConvolution)
+                mergedConv->setType(Convolution_Sum_Activation);
             mergedConv->fuseWith(sum);
         } else {
-            mergedConv->setType(Convolution_Sum);
+            if (mergedConv->getType() != BinaryConvolution)
+                mergedConv->setType(Convolution_Sum);
         }
 
         mergedConv->fuseWith(lastNode);
 
-        MKLDNNEdgePtr edgePtr(new MKLDNNEdge(peerNode, mergedConv));
-        graph.GetEdges().push_back(edgePtr);
+        if (mergedConv->fusedWith.size() > 0 &&
+           (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) {
+            // Merged with DW_conv. Shape may change
+            mergedConv->inDims.push_back(mergedConv->fusedWith[0]->outDims[0]);
+        } else {
+            mergedConv->inDims.push_back(mergedConv->outDims[0]);
+        }
 
         size_t childIdx = 0;
         for (childIdx = 0; childIdx < peerNode->getChildEdges().size(); childIdx++) {
@@ -473,17 +594,29 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
             }
         }
 
-        mergedConv->addEdge(edgePtr, mergedConv->getParentEdges().size(), childIdx);
+        int peer_port = peerNode->getChildEdgeAt(childIdx)->getInputNum();
+        peerNode->getChildEdgeAt(childIdx)->drop();
+
+        MKLDNNEdgePtr edgePtr(new MKLDNNEdge(peerNode, mergedConv, peer_port, 1));
+        graph.GetEdges().push_back(edgePtr);
+
+        mergedConv->addEdge(edgePtr);
 
-        for (size_t j = 0; j < lastNode->getChildEdges().size(); j++) {
-            auto child = lastNode->getChildEdgeAt(j)->getChild();
-            edgePtr = lastNode->getChildEdgeAt(j);
-            int idxParent = edgePtr->getOutputNum();
-            int idxChild = edgePtr->getInputNum();
+        std::vector<MKLDNNEdgeWeakPtr> edges_to_reconnect = lastNode->getChildEdges();
+        for (auto &edge_w : edges_to_reconnect) {
+            auto edge = edge_w.lock();
+            auto child = edge->getChild();
+            int idxParent = edge->getInputNum();
+            int idxChild = edge->getOutputNum();
 
-            MKLDNNEdgePtr newEdge(new MKLDNNEdge(mergedConv, child));
+            // reconnect after  activation/sum. Port index must be 0
+            IE_ASSERT(idxParent == 0);
+
+            edge->drop();
+
+            MKLDNNEdgePtr newEdge(new MKLDNNEdge(mergedConv, child, idxParent, idxChild));
             graph.GetEdges().push_back(newEdge);
-            child->addEdge(newEdge, idxParent, idxChild);
+            child->addEdge(newEdge);
         }
 
         if (lastNode != sum) {
@@ -493,6 +626,40 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
     }
 }
 
+void MKLDNNGraphOptimizer::FuseFullyConnectedAndActivation(MKLDNNGraph &graph) {
+    auto& graphNodes = graph.GetNodes();
+
+    auto isFusingSupported = [&](MKLDNNNodePtr fc, MKLDNNNodePtr activation) {
+        if (!activation->getCnnLayer())
+            return false;
+
+        auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(activation.get());
+
+        // TODO: fuse on fp32 not optimized yet in mkl-dnn
+        return activationNode && fc->getCnnLayer()->precision != Precision::FP32 &&
+            (activationNode->getAlgorithm() == eltwise_relu);
+    };
+
+    for (int i = 0; i < graphNodes.size(); i++) {
+        if (graphNodes[i]->getType() == FullyConnected) {
+            auto fc = graphNodes[i];
+
+            auto fuse = [&] (MKLDNNNodePtr relu) {
+                fc->setType(FullyConnected_Activation);
+                fc->fuseWith(relu);
+            };
+
+            if (fc->getChildEdges().size() == 1) {
+                auto ch1 = fc->getChildEdgeAt(0)->getChild();
+
+                if (isFusingSupported(fc, ch1)) {
+                    fuse(ch1);
+                    graph.DropNode(ch1);
+                }
+            }
+        }
+    }
+}
 
 void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) {
     for (MKLDNNNodePtr& node : graph.GetNodes()) {
@@ -538,6 +705,7 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
             }
 
             MKLDNNNodePtr p = n->getParentEdgeAt(0)->getParent();
+            MKLDNNNodePtr c = nn->getChildEdgeAt(0)->getChild();
 
             auto oldEdgeNum = n->getParentEdgeAt(0)->getInputNum();
 
@@ -547,7 +715,12 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
             processed.insert(node);
             processed.insert(nextNode);
 
-            auto edge = p->getChildEdgeAt(oldEdgeNum);
+            MKLDNNEdgePtr edge;
+            for (auto cur : p->getChildEdgesAtPort(oldEdgeNum)) {
+                if (cur->getChild() == c)
+                    edge = cur;
+            }
+            if (!edge) THROW_IE_EXCEPTION << "Inappropriate graph processing";
 
 
             std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName();
@@ -560,37 +733,38 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
                 reorderPtr->setDescs(n->getInput(), nn->getOutput());
                 reorderPtr->_scales = scales;
             }
-            MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder));
-            beforeNode->setDims(edge->getDims());
-            MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild()));
-            afterNode->setDims(edge->getDims());
 
-            int oIndex = edge->getOutputNum();
-            int iIndex = edge->getInputNum();
+            // new !!!
+            auto oIndex = edge->getOutputNum();
+            auto iIndex = edge->getInputNum();
             if (iIndex < 0 || oIndex < 0)
                 THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
                                    << edge->getParent()->getName() << " and "
                                    << edge->getChild()->getName() << ".";
+            edge->drop();
+
+            MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
+            MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
 
             // Add edge for beforeNode
-            edge->getParent()->childEdges[iIndex].reset();
-            edge->getParent()->childEdges[iIndex] = beforeNode;
             beforeNode->getChild()->parentEdges.push_back(beforeNode);
+            edge->getParent()->childEdges.push_back(beforeNode);
 
             // Add edge for afterNode
             afterNode->getParent()->childEdges.push_back(afterNode);
-            edge->getChild()->parentEdges[oIndex].reset();
-            edge->getChild()->parentEdges[oIndex] = afterNode;
+            edge->getChild()->parentEdges.push_back(afterNode);
 
             newReorder->getSupportedDescriptors();
             newReorder->initSupportedPrimitiveDescriptors();
             newReorder->selectOptimalPrimitiveDescriptor();
 
-            beforeNode->getDesc();
             graph.GetEdges().push_back(beforeNode);
-            afterNode->getDesc();
             graph.GetEdges().push_back(afterNode);
 
+            // Just to check accordance
+            afterNode->getDesc();
+            beforeNode->getDesc();
+
             newNodes.push_back(newReorder);
             graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end());
         }