1 // Copyright (C) 2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "mkldnn_bin_conv_node.h"
6 #include "mkldnn_reorder_node.h"
7 #include "mkldnn_input_node.h"
8 #include "mkldnn_activation_node.h"
9 #include "desc_iterator.hpp"
10 #include "mkldnn_eltwise_node.h"
11 #include "mkldnn_depthwise_node.h"
12 #include "mkldnn_quantize_node.h"
13 #include "mkldnn_conv_node.h"
14 #include <ie_layers.h>
17 #include <mkldnn_types.h>
18 #include <mkldnn_extension_utils.h>
19 #include <ie_layers_internal.hpp>
21 using namespace mkldnn;
22 using namespace MKLDNNPlugin;
23 using namespace InferenceEngine;
25 MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng)
26 : MKLDNNNode(layer, eng) {
27 internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
28 return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(0).desc());
32 void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
36 auto* binConvLayer = dynamic_cast<BinaryConvolutionLayer*>(getCnnLayer().get());
37 if (binConvLayer == nullptr)
38 THROW_IE_EXCEPTION << "Cannot convert convolution layer.";
40 if (getChildEdges().empty())
41 THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
43 if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
44 THROW_IE_EXCEPTION << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
47 isMerged = (!getMergeWith().empty()); // grouped convolution was constructed from split->concat subgraph
48 isGrouped = binConvLayer->_group != 1; // group info available from IR
49 if (isMerged && isGrouped)
50 THROW_IE_EXCEPTION << "Convolution initialization. Group splitted mode are used together with direct group specification.";
52 // default values. Can be replaced in next steps
53 size_t groupNum = binConvLayer->_group;
54 pad_value = binConvLayer->_pad_value;
55 size_t groupIC = binConvLayer->_in_depth;
56 size_t groupOC = binConvLayer->_out_depth;
58 isDW = groupNum == groupOC && groupNum == groupIC;
61 groupNum = getMergeWith().size() + 1;
69 weightDims.push_back(groupOC);
70 weightDims.push_back(groupIC);
71 for (int i = 1; i <= binConvLayer->_kernel.size(); i++) {
72 weightDims.push_back(binConvLayer->_kernel[binConvLayer->_kernel.size() - i]);
74 biasesDims = { groupOC * groupNum };
76 if (isGrouped || isMerged) weightDims.insert(weightDims.begin(), groupNum);
78 internalBlobs.push_back(createInternalBlob(weightDims, true));
80 Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
82 invertVectorCopyUtoI(binConvLayer->_stride, stride);
83 for (int i = 1; i <= binConvLayer->_dilation.size(); i++) {
84 dilation.push_back(static_cast<int>(binConvLayer->_dilation[binConvLayer->_dilation.size() - i]) - 1);
87 auto allPads = getPaddings(*binConvLayer);
88 invertVectorCopyUtoI(allPads.begin, paddingL);
89 invertVectorCopyUtoI(allPads.end, paddingR);
91 MKLDNNDims weightsDims = MKLDNNDims(weightDims);
93 for (int i = 0; i < paddingR.size(); i++) {
94 int with_group = (isGrouped || isMerged) ? 1 : 0;
95 int krn = weightsDims[with_group + 2 + i];
96 int src = getParentEdgeAt(0)->getDims()[2 + i];
97 int dst = getChildEdgeAt(0)->getDims()[2 + i];
99 krn = (krn - 1)*(dilation[i] + 1) + 1;
100 int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
101 paddingR[i] = (dst - calc_dst) * stride[i];
105 withBinarization = false;
106 for (auto &node : fusedWith) {
107 auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode*>(node.get());
108 if (convolutionNode) {
109 auto *convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
110 dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2];
111 dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1];
112 dw_conv_oc = convLayer->_out_depth;
113 for (int i = 0; i < convLayer->_kernel.size(); i++) {
114 dw_conv_kernel.push_back(convLayer->_kernel[i]);
116 for (int i = 0; i < convLayer->_stride.size(); i++) {
117 dw_conv_strides.push_back(convLayer->_stride[i]);
121 auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
126 auto* quantizationNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
127 if (quantizationNode) {
128 withBinarization = true;
132 if ((!withSum && getParentEdges().size() != 1) || (withSum && getParentEdges().size() != 2))
133 THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
135 auto inputDataType = memory::bin;
136 auto outputDataType = withBinarization ? memory::bin : memory::f32;
138 MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nhwc);
139 MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nhwc);
140 createDescriptor({in_candidate}, {out_candidate});
143 void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
145 mkldnn::post_ops ops;
147 for (auto &node : fusedWith) {
148 auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
150 if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
151 auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
152 if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
153 // currently there is the only one scale while we need scale by channel :(
154 ops.append_sum(it->second->buffer().as<float*>()[0]);
162 auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
163 if (activationNode) {
164 ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
165 activationNode->getBeta());
169 auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
171 auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
174 MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
176 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
177 PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
179 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
180 depthwiseLayer->_weights->buffer(),
181 depthwiseLayer->_weights->size() *
182 MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
184 if (depthwiseNode->isBroadcast()) {
185 float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
186 for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
187 static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
191 if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
192 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
193 PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
195 PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
196 depthwiseLayer->_biases->buffer(),
197 depthwiseLayer->_biases->size() *
198 MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
200 if (depthwiseNode->isBroadcast()) {
201 float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
202 for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
203 static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
207 ops.append_depthwise(depthwiseNode->getAlgorithm(),
208 (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
209 (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
213 ops.append_depthwise(depthwiseNode->getAlgorithm(),
214 (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
220 ops.append_depthwise(depthwiseNode->getAlgorithm(),
228 auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
231 MKLDNNDims binarizationDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
233 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
234 PostOpsIntBlobMemory[blob_idx]->Create(binarizationDims, memory::data_type::f32, memory::format::x);
236 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
237 &binarizationThresholds[0],
238 binarizationThresholds.size() *
239 MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
241 ops.append_binarization(binarization_depthwise, (const float*)PostOpsIntBlobMemory[blob_idx]->GetData());
245 ops.append_binarization(binarization_depthwise, nullptr);
249 auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
250 if (convolutionNode) {
251 auto* convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
254 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
255 MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
256 PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, memory::data_type::f32,
257 memory::format::Goihw8g);
259 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::goihw,
260 convLayer->_weights->buffer(),
261 dwWeightsDims.size() *
262 MKLDNNExtensionUtils::sizeOfDataType(
263 memory::data_type::f32));
265 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
266 MKLDNNDims dwBiasesDims({dw_conv_oc});
267 PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, memory::data_type::f32,
269 PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
270 convLayer->_biases->buffer(),
271 dwBiasesDims.size() *
272 MKLDNNExtensionUtils::sizeOfDataType(
273 memory::data_type::f32));
274 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
275 dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
276 (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
277 (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
281 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
282 dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
286 for (auto &dwConvFusedNode : convolutionNode->getFusedWith()) {
287 auto* dwConvActivationNode = dynamic_cast<MKLDNNActivationNode *>(dwConvFusedNode.get());
288 if (dwConvActivationNode) {
289 ops.append_eltwise(1.0, dwConvActivationNode->getAlgorithm(), dwConvActivationNode->getAlpha(),
290 dwConvActivationNode->getBeta());
298 attr.set_post_ops(ops);
301 void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
302 if (!supportedPrimitiveDescriptors.empty())
305 mkldnn::primitive_attr attr;
308 for (auto& desc : descs) {
310 primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
312 InferenceEngine::LayerConfig config;
313 config.dynBatchSupport = true;
314 for (size_t i = 0; i < desc.inputNumbers(); i++) {
315 InferenceEngine::DataConfig dataConfig;
316 dataConfig.inPlace = -1;
317 dataConfig.constant = false;
318 dataConfig.desc = getSrcMemDesc(itpd, i);
320 dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
321 config.inConfs.push_back(dataConfig);
324 for (size_t i = 0; i < desc.outputNumbers(); i++) {
325 InferenceEngine::DataConfig dataConfig;
327 dataConfig.inPlace = 1;
330 dataConfig.constant = false;
331 dataConfig.desc = getDstMemDesc(itpd, i);
333 dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
334 config.outConfs.push_back(dataConfig);
337 dataConfig.inPlace = -1;
338 config.inConfs.push_back(dataConfig);
341 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
343 supportedPrimitiveDescriptors.emplace_back(config, impl_type);
344 } while (itpd.next());
345 } catch (std::exception& e) {
346 // it throw exception in case of no implementation found
353 void MKLDNNBinaryConvolutionNode::createPrimitive() {
357 mkldnn::primitive_attr attr;
358 setPostOps(attr, true);
360 auto prim_desc = createPrimitiveDescriptor<binary_convolution_forward::primitive_desc,
361 binary_convolution_forward::desc>(attr);
363 prim.reset(new binary_convolution_forward(prim_desc,
364 getParentEdgeAt(0)->getMemory().GetPrimitive(),
365 internalBlobMemory[0]->GetPrimitive(),
366 getChildEdgeAt(0)->getMemory().GetPrimitive()));
369 bool MKLDNNBinaryConvolutionNode::created() const {
370 return getType() == BinaryConvolution;
373 void MKLDNNBinaryConvolutionNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
374 const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
375 TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
376 mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
378 MKLDNNMemoryDesc in_candidate(inDesc);
379 MKLDNNMemoryDesc out_candidate(outDesc);
381 // grouping and autoblocking is not compatible
382 if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
385 MKLDNNDims blocked_weightDims(weightDims);
386 MKLDNNDims blocked_biasesDims(biasesDims);
387 MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::any};
389 std::shared_ptr<mkldnn::binary_convolution_forward::desc> bin_conv_desc;
390 bin_conv_desc.reset(new binary_convolution_forward::desc(prop_kind::forward_scoring, algorithm::binary_convolution_direct,
391 in_candidate, wgh_candidate, out_candidate, stride, dilation,
392 paddingL, paddingR, pad_value));
394 descs.emplace_back(bin_conv_desc);
397 void MKLDNNBinaryConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) {
398 auto* selectedPD = getSelectedPrimitiveDescriptor();
403 createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc});
405 mkldnn::primitive_attr attr;
408 InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
409 size_t selected_count = 0;
410 for (size_t i = 0; i < descs.size(); i++) {
411 const auto& desc = descs[i];
413 primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
415 InferenceEngine::LayerConfig cfg;
416 cfg.dynBatchSupport = true;
417 for (size_t j = 0; j < desc.inputNumbers(); j++) {
418 InferenceEngine::DataConfig dataConfig;
419 dataConfig.inPlace = -1;
420 dataConfig.constant = false;
421 dataConfig.desc = getSrcMemDesc(itpd, j);
422 cfg.inConfs.push_back(dataConfig);
425 for (size_t j = 0; j < desc.outputNumbers(); j++) {
426 InferenceEngine::DataConfig dataConfig;
427 dataConfig.inPlace = -1;
429 cfg.inConfs.push_back(dataConfig);
430 dataConfig.inPlace = 1;
432 dataConfig.constant = false;
433 dataConfig.desc = getDstMemDesc(itpd, j);
435 cfg.outConfs.push_back(dataConfig);
437 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
439 if (selected_count == selectedPrimitiveDescriptorIndex) {
440 if (impl_type != selectedPD->getImplementationType()) {
441 THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
445 if (i == descs.size() - 1) {
446 if (impl_type == selectedPD->getImplementationType()) {
447 rightConfig = config;
451 } while (itpd.next());
452 } catch (std::exception& e) {
456 selectedPD->getConfig() = rightConfig;
459 void MKLDNNBinaryConvolutionNode::pushBinarizationThreshold(float value) {
460 binarizationThresholds.push_back(value);