-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <memory>
#include <set>
#include <ie_layers_internal.hpp>
+#include <nodes/mkldnn_bin_conv_node.h>
+#include <nodes/mkldnn_quantize_node.h>
+#include "cpu_isa_traits.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
MergeGroupConvolution(graph);
graph.RemoveDroppedNodes();
-// SLTMTransform(graph);
-// RemoveDropped(graph);
+ FuseConvolutionAndDepthwise(graph);
+ graph.RemoveDroppedNodes();
FuseConvolutionAndActivation(graph);
graph.RemoveDroppedNodes();
FuseConvolutionAndDWConvolution(graph);
graph.RemoveDroppedNodes();
+ FuseBinaryConvolutionAndQuantize(graph);
+ graph.RemoveDroppedNodes();
+
FuseBatchNormWithScale(graph);
graph.RemoveDroppedNodes();
+ FuseFullyConnectedAndActivation(graph);
+ graph.RemoveDroppedNodes();
+
RemoveIdentityOperator(graph);
graph.RemoveDroppedNodes();
conv->inDims[0] = convInDims;
conv->outDims[0] = convOutDims;
+ conv->fuseWith(split);
+ conv->fuseWith(concat);
+
graph.DropNode(split);
graph.DropNode(concat);
}
};
for (int i = 0; i < graphNodes.size(); i++) {
- if (graphNodes[i]->getType() == Convolution) {
+ if (graphNodes[i]->getType() == Convolution || graphNodes[i]->getType() == BinaryConvolution) {
auto conv = graphNodes[i];
auto fuse = [&] (MKLDNNNodePtr relu) {
- conv->setType(Convolution_Activation);
+ if (graphNodes[i]->getType() != BinaryConvolution)
+ conv->setType(Convolution_Activation);
conv->fuseWith(relu);
};
auto& graphNodes = graph.GetNodes();
auto isSutableParentNode = [](MKLDNNNodePtr node) {
- return (node->getType() == Convolution || node->getType() == Convolution_Activation) &&
- node->getCnnLayer()->precision == Precision::FP32 &&
- (node->getChildEdges().size() == 1);
+ bool isSutableConv = (node->getType() == Convolution || node->getType() == Convolution_Activation) &&
+ node->getCnnLayer()->precision == Precision::FP32;
+ bool isSutableBinConv = node->getType() == BinaryConvolution;
+ return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1;
};
auto isSutableChildNode = [](MKLDNNNodePtr node) {
if (!isSutableChildNode(depthwise0)) continue;
conv->fuseWith(depthwise0);
- conv->setType(Convolution_Depthwise);
+ if (conv->type != BinaryConvolution)
+ conv->setType(Convolution_Depthwise);
if (depthwise0->getChildEdges().size() == 1) {
auto depthwise1 = depthwise0->getChildEdgeAt(0)->getChild();
return node->getType() == Convolution || node->getType() == Convolution_Activation;
};
+ auto isBinaryConvolutionNode = [](MKLDNNNodePtr node) {
+ return node->getType() == BinaryConvolution;
+ };
+
auto is1x1Convolution = [](ConvolutionLayer* layer) {
return layer->_kernel[X_AXIS] == 1 && layer->_kernel[Y_AXIS] == 1;
};
auto isSutableParentConvolution = [&](MKLDNNNodePtr node) {
- auto* layer = dynamic_cast<ConvolutionLayer*>(node->getCnnLayer().get());
+ if (isBinaryConvolutionNode(node)) {
+ auto *layer = dynamic_cast<BinaryConvolutionLayer *>(node->getCnnLayer().get());
+
+ bool isSupportedParams = layer->_group == 1;
+ if (!isSupportedParams) return false;
+ } else {
+ auto *layer = dynamic_cast<ConvolutionLayer *>(node->getCnnLayer().get());
- bool isSupportedParams = layer->_group == 1 &&
- ((is1x1Convolution(layer) &&
- layer->_stride[X_AXIS] == 1 && layer->_stride[Y_AXIS] == 1) || !is1x1Convolution(layer)) &&
- layer->precision == Precision::FP32;;
- if (!isSupportedParams) return false;
+ bool isSupportedParams = layer->_group == 1 &&
+ ((is1x1Convolution(layer) && layer->_stride[X_AXIS] == 1 &&
+ layer->_stride[Y_AXIS] == 1) || !is1x1Convolution(layer)) &&
+ (layer->precision == Precision::FP32 || layer->precision == Precision::I8);
+ if (!isSupportedParams) return false;
+ }
return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild());
};
- auto isSutableChildConvolution = [](MKLDNNNodePtr node) {
- auto* layer = dynamic_cast<ConvolutionLayer*>(node->getCnnLayer().get());
- auto allPads = getPaddings(*layer);
- bool isSupportedParams = layer->_out_depth == layer->_group &&
+ auto isSutableChildConvolution = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+ auto* childLayer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
- layer->_out_depth != 1 &&
- // Depthwise convolution output should be multiple of 8
+ if (!isBinaryConvolutionNode(parentNode)) {
+ auto* parentLayer = dynamic_cast<ConvolutionLayer*>(parentNode->getCnnLayer().get());
+ if (parentLayer->precision != childLayer->precision)
+ return false;
+ }
- layer->_kernel[X_AXIS] == 3 && layer->_kernel[Y_AXIS] == 3 &&
+ auto allPads = getPaddings(*childLayer);
+ bool isSupportedParams = childLayer->_out_depth == childLayer->_group &&
+ childLayer->_out_depth != 1 &&
+ // Depthwise convolution output should be multiple of 8
+ childLayer->_kernel[X_AXIS] == 3 && childLayer->_kernel[Y_AXIS] == 3 &&
allPads.begin[X_AXIS] == 1 && allPads.begin[Y_AXIS] == 1 &&
- layer->_dilation[X_AXIS] == 1 && layer->_dilation[Y_AXIS] == 1 &&
- layer->_biases != nullptr && layer->_biases->size() != 0 &&
- layer->precision == Precision::FP32;
+ childLayer->_dilation[X_AXIS] == 1 && childLayer->_dilation[Y_AXIS] == 1 &&
+ childLayer->_biases != nullptr && childLayer->_biases->size() != 0;
+
return isSupportedParams;
};
- auto isFusingWorthwhile = [](MKLDNNNodePtr node) {
- auto inDims = node->inDims[0];
- auto outDims = node->outDims[0];
+ auto isFusingWorthwhile = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+ if (isBinaryConvolutionNode(parentNode)) {
+ return true;
+ }
+
+ auto* layer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
+
+ auto inDims = childNode->inDims[0];
+ auto outDims = childNode->outDims[0];
+ int elemSize = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(layer->precision));
int L3_cache_size = mkldnn_get_cache_size(3, false);
- int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * sizeof(float);
- int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * sizeof(float);
- return (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2);
+ int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize;
+ int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * elemSize;
+
+ bool isInt8 = layer->precision == Precision::I8 || layer->precision == Precision::U8;
+ bool isAVX512NotSupported = !mkldnn::impl::cpu::mayiuse(impl::cpu::cpu_isa_t::avx512_common);
+
+ return isInt8 ? isAVX512NotSupported : (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2);
};
for (int i = 0; i < graphNodes.size(); i++) {
- if (!isConvolutionNode(graphNodes[i])) continue;
+ if (!isConvolutionNode(graphNodes[i]) && !isBinaryConvolutionNode(graphNodes[i])) continue;
auto parentConvNode = graphNodes[i];
if (!isSutableParentConvolution(parentConvNode)) continue;
auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild();
- if (!isSutableChildConvolution(childConvNode)) continue;
+ if (!isSutableChildConvolution(parentConvNode, childConvNode)) continue;
- if (!isFusingWorthwhile(childConvNode)) continue;
+ if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue;
parentConvNode->fuseWith(childConvNode);
graph.DropNode(childConvNode);
}
}
+void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) {
+ auto removeEdge = [](MKLDNNGraph &graph, MKLDNNEdgePtr& edge) {
+ auto& edges = graph.GetEdges();
+ for (auto it = edges.begin(); it != edges.end(); it++) {
+ if ((*it) == edge) {
+ edges.erase(it);
+ return;
+ }
+ }
+ };
+
+ auto& graphNodes = graph.GetNodes();
+
+ auto isSutableParentNode = [](MKLDNNNodePtr node) {
+ bool isSutableBinConv = node->getType() == BinaryConvolution;
+ return isSutableBinConv && node->getChildEdges().size() == 1;
+ };
+
+ auto isSutableChildNode = [](MKLDNNNodePtr node) {
+ if (!node->getCnnLayer())
+ return false;
+
+ auto* quantizeLayer = dynamic_cast<QuantizeLayer*>(node->getCnnLayer().get());
+ bool isSutableQuantize = node->getType() == Quantize && quantizeLayer->levels == 2;
+
+ return isSutableQuantize;
+ };
+
+ for (int i = 0; i < graphNodes.size(); i++) {
+ auto parent = graphNodes[i];
+ if (!isSutableParentNode(parent)) continue;
+
+ auto child = parent->getChildEdgeAt(0)->getChild();
+ if (!isSutableChildNode(child)) continue;
+
+ parent->fuseWith(child);
+
+ auto* binConvNode = dynamic_cast<MKLDNNBinaryConvolutionNode*>(parent.get());
+
+ auto parents = child->parentEdges;
+ for (size_t i = 0; i < parents.size(); i++) {
+ auto p_edge = parents[i].lock();
+ if (p_edge->getParent()->getType() == Input) {
+ InferenceEngine::SizeVector dims;
+ dims.push_back(binConvNode->getChildEdgeAt(0)->getDims()[1]);
+
+ auto InputLowBlob = dynamic_cast<TBlob<float>*>(p_edge->getParent()->getCnnLayer()->blobs["custom"].get());
+
+ auto inputLowData = InputLowBlob->buffer().as<float*>();
+ int inputLowAxis = p_edge->getDims().ndims() == 1 ? 0 : 1;
+ bool isInputLowBroadcasted = p_edge->getDims()[inputLowAxis] != dims[0];
+
+ for (int i = 0; i < dims[0]; i++) {
+ binConvNode->pushBinarizationThreshold(inputLowData[isInputLowBroadcasted ? 0 : i]);
+ }
+
+ break;
+ }
+ }
+
+ for (size_t i = 0; i < parents.size(); i++) {
+ auto p_edge = parents[i].lock();
+ if (p_edge->getParent()->getType() == BinaryConvolution)
+ continue;
+
+ removeEdge(graph, p_edge);
+ }
+
+ graph.DropNode(child);
+ }
+}
+
/**
* Check if there is a data dependency between parent and child
* BFS starting from parent and comparing with child
if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isSum()) continue;
if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isUnitScales()) continue;
+ auto parent1 = graphNode->getParentEdgeAt(0)->getParent();
+ auto parent2 = graphNode->getParentEdgeAt(1)->getParent();
// TODO: Enlarge to several inputs
if (graphNode->getParentEdges().size() != 2 ||
- (graphNode->getParentEdgeAt(0)->getParent()->getType() != Convolution &&
- graphNode->getParentEdgeAt(1)->getParent()->getType() != Convolution))
+ (parent1->getType() != Convolution && parent1->getType() != BinaryConvolution &&
+ parent2->getType() != Convolution && parent2->getType() != BinaryConvolution))
continue;
- auto parent1 = graphNode->getParentEdgeAt(0)->getParent();
- auto parent2 = graphNode->getParentEdgeAt(1)->getParent();
-
- auto mergedConv = (parent1->getType() == Convolution) ? parent1 : parent2;
- auto peerNode = (parent1->getType() == Convolution) ? parent2 : parent1;
- if (peerNode->getType() == Convolution && mergedConv->getChildEdges().size() != 1) {
+ auto mergedConv = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent1 : parent2;
+ auto peerNode = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent2 : parent1;
+ if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) &&
+ mergedConv->getChildEdges().size() != 1) {
mergedConv = parent2;
peerNode = parent1;
}
isFusingSupported(graphNode, graphNode->getChildEdgeAt(0)->getChild())) {
auto relu_shared = graphNode->getChildEdgeAt(0)->getChild();
lastNode = relu_shared;
- mergedConv->setType(Convolution_Sum_Activation);
+ if (mergedConv->getType() != BinaryConvolution)
+ mergedConv->setType(Convolution_Sum_Activation);
mergedConv->fuseWith(sum);
} else {
- mergedConv->setType(Convolution_Sum);
+ if (mergedConv->getType() != BinaryConvolution)
+ mergedConv->setType(Convolution_Sum);
}
mergedConv->fuseWith(lastNode);
- MKLDNNEdgePtr edgePtr(new MKLDNNEdge(peerNode, mergedConv));
- graph.GetEdges().push_back(edgePtr);
+ if (mergedConv->fusedWith.size() > 0 &&
+ (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) {
+ // Merged with DW_conv. Shape may change
+ mergedConv->inDims.push_back(mergedConv->fusedWith[0]->outDims[0]);
+ } else {
+ mergedConv->inDims.push_back(mergedConv->outDims[0]);
+ }
size_t childIdx = 0;
for (childIdx = 0; childIdx < peerNode->getChildEdges().size(); childIdx++) {
}
}
- mergedConv->addEdge(edgePtr, mergedConv->getParentEdges().size(), childIdx);
+ int peer_port = peerNode->getChildEdgeAt(childIdx)->getInputNum();
+ peerNode->getChildEdgeAt(childIdx)->drop();
+
+ MKLDNNEdgePtr edgePtr(new MKLDNNEdge(peerNode, mergedConv, peer_port, 1));
+ graph.GetEdges().push_back(edgePtr);
+
+ mergedConv->addEdge(edgePtr);
- for (size_t j = 0; j < lastNode->getChildEdges().size(); j++) {
- auto child = lastNode->getChildEdgeAt(j)->getChild();
- edgePtr = lastNode->getChildEdgeAt(j);
- int idxParent = edgePtr->getOutputNum();
- int idxChild = edgePtr->getInputNum();
+ std::vector<MKLDNNEdgeWeakPtr> edges_to_reconnect = lastNode->getChildEdges();
+ for (auto &edge_w : edges_to_reconnect) {
+ auto edge = edge_w.lock();
+ auto child = edge->getChild();
+ int idxParent = edge->getInputNum();
+ int idxChild = edge->getOutputNum();
- MKLDNNEdgePtr newEdge(new MKLDNNEdge(mergedConv, child));
+ // reconnect after activation/sum. Port index must be 0
+ IE_ASSERT(idxParent == 0);
+
+ edge->drop();
+
+ MKLDNNEdgePtr newEdge(new MKLDNNEdge(mergedConv, child, idxParent, idxChild));
graph.GetEdges().push_back(newEdge);
- child->addEdge(newEdge, idxParent, idxChild);
+ child->addEdge(newEdge);
}
if (lastNode != sum) {
}
}
+void MKLDNNGraphOptimizer::FuseFullyConnectedAndActivation(MKLDNNGraph &graph) {
+ auto& graphNodes = graph.GetNodes();
+
+ auto isFusingSupported = [&](MKLDNNNodePtr fc, MKLDNNNodePtr activation) {
+ if (!activation->getCnnLayer())
+ return false;
+
+ auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(activation.get());
+
+ // TODO: fuse on fp32 not optimized yet in mkl-dnn
+ return activationNode && fc->getCnnLayer()->precision != Precision::FP32 &&
+ (activationNode->getAlgorithm() == eltwise_relu);
+ };
+
+ for (int i = 0; i < graphNodes.size(); i++) {
+ if (graphNodes[i]->getType() == FullyConnected) {
+ auto fc = graphNodes[i];
+
+ auto fuse = [&] (MKLDNNNodePtr relu) {
+ fc->setType(FullyConnected_Activation);
+ fc->fuseWith(relu);
+ };
+
+ if (fc->getChildEdges().size() == 1) {
+ auto ch1 = fc->getChildEdgeAt(0)->getChild();
+
+ if (isFusingSupported(fc, ch1)) {
+ fuse(ch1);
+ graph.DropNode(ch1);
+ }
+ }
+ }
+ }
+}
void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) {
for (MKLDNNNodePtr& node : graph.GetNodes()) {
}
MKLDNNNodePtr p = n->getParentEdgeAt(0)->getParent();
+ MKLDNNNodePtr c = nn->getChildEdgeAt(0)->getChild();
auto oldEdgeNum = n->getParentEdgeAt(0)->getInputNum();
processed.insert(node);
processed.insert(nextNode);
- auto edge = p->getChildEdgeAt(oldEdgeNum);
+ MKLDNNEdgePtr edge;
+ for (auto cur : p->getChildEdgesAtPort(oldEdgeNum)) {
+ if (cur->getChild() == c)
+ edge = cur;
+ }
+ if (!edge) THROW_IE_EXCEPTION << "Inappropriate graph processing";
std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName();
reorderPtr->setDescs(n->getInput(), nn->getOutput());
reorderPtr->_scales = scales;
}
- MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder));
- beforeNode->setDims(edge->getDims());
- MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild()));
- afterNode->setDims(edge->getDims());
- int oIndex = edge->getOutputNum();
- int iIndex = edge->getInputNum();
+ // new !!!
+ auto oIndex = edge->getOutputNum();
+ auto iIndex = edge->getInputNum();
if (iIndex < 0 || oIndex < 0)
THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
<< edge->getParent()->getName() << " and "
<< edge->getChild()->getName() << ".";
+ edge->drop();
+
+ MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
+ MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
// Add edge for beforeNode
- edge->getParent()->childEdges[iIndex].reset();
- edge->getParent()->childEdges[iIndex] = beforeNode;
beforeNode->getChild()->parentEdges.push_back(beforeNode);
+ edge->getParent()->childEdges.push_back(beforeNode);
// Add edge for afterNode
afterNode->getParent()->childEdges.push_back(afterNode);
- edge->getChild()->parentEdges[oIndex].reset();
- edge->getChild()->parentEdges[oIndex] = afterNode;
+ edge->getChild()->parentEdges.push_back(afterNode);
newReorder->getSupportedDescriptors();
newReorder->initSupportedPrimitiveDescriptors();
newReorder->selectOptimalPrimitiveDescriptor();
- beforeNode->getDesc();
graph.GetEdges().push_back(beforeNode);
- afterNode->getDesc();
graph.GetEdges().push_back(afterNode);
+ // Just to check accordance
+ afterNode->getDesc();
+ beforeNode->getDesc();
+
newNodes.push_back(newReorder);
graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end());
}