1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "mkldnn_conv_node.h"
6 #include "mkldnn_reorder_node.h"
7 #include "mkldnn_input_node.h"
8 #include "mkldnn_activation_node.h"
9 #include "desc_iterator.hpp"
10 #include "mkldnn_eltwise_node.h"
11 #include "mkldnn_depthwise_node.h"
12 #include <ie_layers.h>
15 #include <mkldnn_types.h>
16 #include <mkldnn_extension_utils.h>
17 #include <ie_layers_internal.hpp>
19 using namespace mkldnn;
20 using namespace MKLDNNPlugin;
21 using namespace InferenceEngine;
23 MKLDNNConvolutionNode::MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng)
24 : MKLDNNNode(layer, eng), withBiases(false), withSum(false), dw_conv_iw(0), dw_conv_ih(0),
25 dw_conv_oc(0), isDW(false), isMerged(false), withActivation(false), convLayer(nullptr), isGrouped(false) {
26 internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
27 return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(0).desc());
29 internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
31 return MKLDNNMemoryDesc();
32 return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(1).desc());
35 auto ws = layer->blobs.find("w-scale");
36 if (ws != layer->blobs.end()) {
40 // Trying to find oi-scale
41 if (getCnnLayer()->type == "Convolution" && getCnnLayer()->precision == Precision::I8) {
42 auto ois = layer->blobs.find("oi-scale");
43 if ((getCnnLayer()->outData[0]->getPrecision() == Precision::I8 || getCnnLayer()->outData[0]->getPrecision() == Precision::U8)
44 && ois == layer->blobs.end()) {
45 THROW_IE_EXCEPTION << "Internal error of graph quantization - mismatch of intermediate scales and next layer type for convolution "
46 << getCnnLayer()->name;
48 if (ois != layer->blobs.end()) {
49 // If we can find an oi-scale, then the next layer has to be an INT8.
55 void MKLDNNConvolutionNode::getSupportedDescriptors() {
59 InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
60 if (precision == InferenceEngine::Precision::U16) {
61 precision = InferenceEngine::Precision::FP32;
63 auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
64 precision = getCnnLayer()->outData[0]->getPrecision();
65 auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
67 auto * convLayer = dynamic_cast<ConvolutionLayer*>(getCnnLayer().get());
68 if (convLayer == nullptr)
69 THROW_IE_EXCEPTION << "Cannot convert convolution layer.";
71 if (getParentEdges().size() != 1 &&
72 ((getType() != Convolution_Sum && getType() != Convolution_Sum_Activation) || getParentEdges().size() != 2))
73 THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
74 if (getChildEdges().empty())
75 THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
77 if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
78 THROW_IE_EXCEPTION << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
81 isMerged = (!getMergeWith().empty()); // grouped convolution was constructed from split->concat subgraph
82 isGrouped = convLayer->_group != 1; // group info available from IR
83 if (isMerged && isGrouped)
84 THROW_IE_EXCEPTION << "Convolution initialization. Group splitted mode are used together with direct group specification.";
86 // default values. Can be replaced in next steps
87 size_t groupNum = convLayer->_group;
88 size_t IC = convLayer->input()->getDims()[1];
90 size_t groupOC = convLayer->_out_depth;
92 isDW = groupNum == groupOC && groupNum == groupIC;
95 groupNum = getMergeWith().size() + 1;
103 weightDims.push_back(groupOC);
104 weightDims.push_back(groupIC);
105 for (int i = 1; i <= convLayer->_kernel.size(); i++) {
106 weightDims.push_back(convLayer->_kernel[convLayer->_kernel.size() - i]);
108 biasesDims = { groupOC * groupNum };
110 if (isGrouped || isMerged) weightDims.insert(weightDims.begin(), groupNum);
112 withBiases = (convLayer->_biases != nullptr && convLayer->_biases->size() != 0);
114 internalBlobs.push_back(createInternalBlob(weightDims, true));
116 internalBlobs.push_back(createInternalBlob(biasesDims, false));
119 Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
120 if (weights->precision() == Precision::I8) {
121 // The weights blob has incorrect dims, so we have to fix it
122 TensorDesc wdesc = internalBlobs[0]->getTensorDesc();
123 wdesc.setPrecision(Precision::I8);
124 InferenceEngine::TBlob<int8_t>::Ptr reshapedInt8Weights =
125 InferenceEngine::TBlob<int8_t>::Ptr(
126 new InferenceEngine::TBlob<int8_t>(wdesc, static_cast<int8_t*>(weights->buffer()), weights->byteSize()));
128 internalBlobs[0] = reshapedInt8Weights;
130 Blob::Ptr biases = this->getCnnLayer()->blobs.find("biases")->second;
131 TensorDesc bdesc = internalBlobs[1]->getTensorDesc();
132 bdesc.setPrecision(Precision::I32);
133 InferenceEngine::TBlob<int32_t>::Ptr reshapedInt32Biases =
134 InferenceEngine::TBlob<int32_t>::Ptr(
135 new InferenceEngine::TBlob<int32_t>(bdesc, static_cast<int32_t*>(biases->buffer()), biases->byteSize()));
136 internalBlobs[1] = reshapedInt32Biases;
140 invertVectorCopyUtoI(convLayer->_stride, stride);
141 for (int i = 1; i <= convLayer->_dilation.size(); i++) {
142 dilation.push_back(static_cast<int>(convLayer->_dilation[convLayer->_dilation.size() - i]) - 1);
145 auto allPads = getPaddings(*convLayer);
146 invertVectorCopyUtoI(allPads.begin, paddingL);
147 invertVectorCopyUtoI(allPads.end, paddingR);
149 MKLDNNDims weightsDims = MKLDNNDims(weightDims);
151 for (int i = 0; i < paddingR.size(); i++) {
152 int with_group = (isGrouped || isMerged) ? 1 : 0;
153 int krn = weightsDims[with_group + 2 + i];
154 int src = getParentEdgeAt(0)->getDims()[2 + i];
155 int dst = getChildEdgeAt(0)->getDims()[2 + i];
157 krn = (krn - 1)*(dilation[i] + 1) + 1;
158 int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
159 paddingR[i] = (dst - calc_dst) * stride[i];
162 withSum = getType() == Convolution_Sum || getType() == Convolution_Sum_Activation;
164 for (auto &node : fusedWith) {
165 auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
166 if (convolutionNode) {
167 auto *convLayer = reinterpret_cast<ConvolutionLayer *>(convolutionNode->getCnnLayer().get());
168 dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2];
169 dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1];
170 dw_conv_oc = convLayer->_out_depth;
171 for (int i = 0; i < convLayer->_kernel.size(); i++) {
172 dw_conv_kernel.push_back(convLayer->_kernel[i]);
174 for (int i = 0; i < convLayer->_stride.size(); i++) {
175 dw_conv_strides.push_back(convLayer->_stride[i]);
180 if (this->getCnnLayer()->precision == Precision::I8) {
181 MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nhwc);
182 MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nhwc);
183 createDescriptor({in_candidate}, {out_candidate});
185 // If the weights aren't quantized, the only precision we support is FP32
186 inputDataType = memory::f32;
187 outputDataType = memory::f32;
189 Layout layout = convLayer->input()->getLayout();
191 if (layout == NCHW || layout == NHWC) {
192 MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType,
193 layout == NCHW ? memory::nchw : memory::nhwc);
194 MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType,
195 layout == NCHW ? memory::nchw : memory::nhwc);
196 createDescriptor({in_candidate}, {out_candidate});
198 if (IC == 3 || IC == 1) {
199 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw16c);
200 createDescriptor({in_candidate}, {out_candidate});
201 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw8c);
202 createDescriptor({in_candidate}, {out_candidate});
204 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nChw16c);
205 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw16c);
206 createDescriptor({in_candidate}, {out_candidate});
207 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nChw8c);
208 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw8c);
209 createDescriptor({in_candidate}, {out_candidate});
211 } else if (layout == NCDHW || layout == NDHWC) {
212 MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType,
213 layout == NCDHW ? memory::ncdhw : memory::ndhwc);
214 MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType,
215 layout == NCDHW ? memory::ncdhw : memory::ndhwc);
216 createDescriptor({in_candidate}, {out_candidate});
218 if (IC == 3 || IC == 1) {
219 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw16c);
220 createDescriptor({in_candidate}, {out_candidate});
221 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw8c);
222 createDescriptor({in_candidate}, {out_candidate});
224 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nCdhw16c);
225 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw16c);
226 createDescriptor({in_candidate}, {out_candidate});
227 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nCdhw8c);
228 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw8c);
229 createDescriptor({in_candidate}, {out_candidate});
235 void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
237 mkldnn::post_ops ops;
239 for (auto &node : fusedWith) {
240 auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
242 if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
243 auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
244 if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
245 // currently there is the only one scale while we need scale by channel :(
246 ops.append_sum(it->second->buffer().as<float*>()[0]);
254 auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
255 if (activationNode) {
256 ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
257 activationNode->getBeta());
261 auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
263 auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
266 MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
268 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
269 PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
271 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
272 depthwiseLayer->_weights->buffer(),
273 depthwiseLayer->_weights->size() *
274 MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
276 if (depthwiseNode->isBroadcast()) {
277 float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
278 for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
279 static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
283 if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
284 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
285 PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
287 PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
288 depthwiseLayer->_biases->buffer(),
289 depthwiseLayer->_biases->size() *
290 MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
292 if (depthwiseNode->isBroadcast()) {
293 float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
294 for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
295 static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
299 ops.append_depthwise(depthwiseNode->getAlgorithm(),
300 (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
301 (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
305 ops.append_depthwise(depthwiseNode->getAlgorithm(),
306 (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
312 ops.append_depthwise(depthwiseNode->getAlgorithm(),
320 auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
321 if (convolutionNode) {
322 auto* convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
324 auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(convLayer->precision);
325 auto biasPrc = memory::data_type::s32;
328 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
329 MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
330 PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, weightsPrc, memory::format::Goihw8g);
332 Blob::Ptr weights = convLayer->blobs.find("weights")->second;
333 Blob::Ptr biases = convLayer->blobs.find("biases")->second;
335 PostOpsIntBlobMemory[blob_idx]->SetData(weightsPrc, memory::goihw, weights->buffer(),
336 dwWeightsDims.size() * MKLDNNExtensionUtils::sizeOfDataType(weightsPrc));
338 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
339 MKLDNNDims dwBiasesDims({dw_conv_oc});
340 PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, biasPrc, memory::format::x);
341 PostOpsIntBlobMemory[blob_idx + 1]->SetData(biasPrc, memory::x, biases->buffer(),
342 dwBiasesDims.size() * MKLDNNExtensionUtils::sizeOfDataType(biasPrc));
343 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
344 dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
345 (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
346 (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
350 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
351 dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
356 if (convolutionNode->wScale != nullptr) {
357 float* wScaleData = static_cast<float*>(convolutionNode->wScale->buffer());
359 std::vector<float> oScaleDataVector;
360 std::vector<float> oShiftDataVector;
361 if (convolutionNode->getCnnLayer()->precision == Precision::I8 &&
362 convolutionNode->getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
363 float *oScaleData = static_cast<float *>(convolutionNode->oScale->buffer());
365 for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
366 oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
367 oShiftDataVector.push_back(0.f);
370 for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
371 oScaleDataVector.push_back(wScaleData[c]);
372 oShiftDataVector.push_back(0.f);
376 MKLDNNDims oScaleDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
378 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
379 PostOpsIntBlobMemory[blob_idx]->Create(oScaleDims, memory::data_type::f32, memory::format::x);
380 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, &oScaleDataVector[0],
381 oScaleDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
383 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
384 PostOpsIntBlobMemory[blob_idx + 1]->Create(oScaleDims, memory::data_type::f32, memory::format::x);
385 PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, &oShiftDataVector[0],
386 oShiftDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
388 ops.append_depthwise(depthwise_scale_shift,
389 (const float *)PostOpsIntBlobMemory[blob_idx]->GetData(),
390 (const float *)PostOpsIntBlobMemory[blob_idx + 1]->GetData());
395 for (auto &dwConvFusedNode : convolutionNode->fusedWith) {
396 auto* dwConvActivationNode = dynamic_cast<MKLDNNActivationNode *>(dwConvFusedNode.get());
397 if (dwConvActivationNode) {
398 ops.append_eltwise(1.0, dwConvActivationNode->getAlgorithm(), dwConvActivationNode->getAlpha(),
399 dwConvActivationNode->getBeta());
407 attr.set_post_ops(ops);
410 void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
411 if (!supportedPrimitiveDescriptors.empty())
414 mkldnn::primitive_attr attr;
417 for (auto& desc : descs) {
419 primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
421 InferenceEngine::LayerConfig config;
422 config.dynBatchSupport = true;
423 for (size_t i = 0; i < desc.inputNumbers(); i++) {
424 InferenceEngine::DataConfig dataConfig;
425 dataConfig.inPlace = -1;
426 dataConfig.constant = false;
427 dataConfig.desc = getSrcMemDesc(itpd, i);
429 dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
430 config.inConfs.push_back(dataConfig);
433 for (size_t i = 0; i < desc.outputNumbers(); i++) {
434 InferenceEngine::DataConfig dataConfig;
436 dataConfig.inPlace = 1;
439 dataConfig.constant = false;
440 dataConfig.desc = getDstMemDesc(itpd, i);
442 dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
443 config.outConfs.push_back(dataConfig);
446 dataConfig.inPlace = -1;
447 config.inConfs.push_back(dataConfig);
450 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
452 supportedPrimitiveDescriptors.emplace_back(config, impl_type);
453 } while (itpd.next());
454 } catch (std::exception& e) {
455 // it throw exception in case of no implementation found
462 void MKLDNNConvolutionNode::createPrimitive() {
466 mkldnn::primitive_attr attr;
467 setPostOps(attr, true);
468 addScaleToPrimitiveAttr(attr);
470 auto prim_desc = createPrimitiveDescriptor<convolution_forward::primitive_desc,
471 convolution_forward::desc>(attr);
473 if (internalBlobMemory.size() > 1) {
474 prim.reset(new convolution_forward(prim_desc,
475 getParentEdgeAt(0)->getMemory().GetPrimitive(),
476 internalBlobMemory[0]->GetPrimitive(),
477 internalBlobMemory[1]->GetPrimitive(),
478 getChildEdgeAt(0)->getMemory().GetPrimitive()));
480 prim.reset(new convolution_forward(prim_desc,
481 getParentEdgeAt(0)->getMemory().GetPrimitive(),
482 internalBlobMemory[0]->GetPrimitive(),
483 getChildEdgeAt(0)->getMemory().GetPrimitive()));
487 bool MKLDNNConvolutionNode::created() const {
488 return getType() == Convolution || getType() == Convolution_Sum_Activation ||
489 getType() == Convolution_Activation || getType() == Convolution_Sum;
492 void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
493 const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
494 TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
495 mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
496 mkldnn::memory::data_type bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
498 Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
500 if (weights->precision() == Precision::I8) {
505 if (getCnnLayer()->outData[0]->getPrecision() == Precision::FP32) {
506 outPrec = Precision::FP32;
508 // define precision accordninly normalizer
509 // TODO(amalyshe) do we need to have separate flow for last in int8 chain or not?
510 outPrec = outDesc.getPrecision();
513 inDesc = TensorDesc(inDesc.getPrecision() , inputDesc[0].getDims(), inputDesc[0].getBlockingDesc());
514 outDesc = TensorDesc(outPrec, outputDesc[0].getDims(), outputDesc[0].getBlockingDesc());
517 MKLDNNMemoryDesc in_candidate(inDesc);
518 MKLDNNMemoryDesc out_candidate(outDesc);
520 auto in_fmt = in_candidate.getFormat();
521 auto out_fmt = out_candidate.getFormat();
523 int O_IND = (isGrouped || isMerged) ? 1 : 0;
524 int I_IND = (isGrouped || isMerged) ? 2 : 1;
526 // grouping and autoblocking is not compatible
527 if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
530 MKLDNNDims blocked_weightDims(weightDims);
531 MKLDNNDims blocked_biasesDims(biasesDims);
532 MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::any};
534 for (auto alg : {algorithm::convolution_winograd, algorithm::convolution_direct}) {
535 std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
537 MKLDNNMemoryDesc bias_candidate{blocked_biasesDims, bdt, memory::any};
539 conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
540 in_candidate, wgh_candidate, bias_candidate, out_candidate,
541 stride, dilation, paddingL, paddingR, padding_kind::zero));
543 conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
544 in_candidate, wgh_candidate, out_candidate, stride, dilation,
545 paddingL, paddingR, padding_kind::zero));
548 descs.emplace_back(conv_desc);
552 void MKLDNNConvolutionNode::addScaleToPrimitiveAttr(mkldnn::primitive_attr attr) const {
554 if (wScale != nullptr) {
555 float* wScaleData = static_cast<float*>(wScale->buffer());
557 std::vector<float> oScaleDataVector;
558 if (getCnnLayer()->precision == Precision::I8 && getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
559 float *oScaleData = static_cast<float *>(oScale->buffer());
561 for (size_t c = 0; c < wScale->size(); c++) {
562 oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
565 for (size_t c = 0; c < wScale->size(); c++) {
566 oScaleDataVector.push_back(wScaleData[c]);
570 attr.set_int_output_round_mode(mkldnn::round_nearest);
571 attr.set_output_scales(1 << 1 /*through C dim*/, oScaleDataVector);
575 void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) {
576 auto* selectedPD = getSelectedPrimitiveDescriptor();
580 bool addedNewDesc = false;
581 /*if (config.inConfs[0].desc.getPrecision() == InferenceEngine::Precision::FP32 &&
582 config.outConfs[0].desc.getPrecision() == InferenceEngine::Precision::FP32) {*/
584 createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc});
587 mkldnn::primitive_attr attr;
589 addScaleToPrimitiveAttr(attr);
591 InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
592 size_t selected_count = 0;
593 for (size_t i = 0; i < descs.size(); i++) {
594 const auto& desc = descs[i];
596 primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
598 InferenceEngine::LayerConfig cfg;
599 cfg.dynBatchSupport = true;
600 for (size_t j = 0; j < desc.inputNumbers(); j++) {
601 InferenceEngine::DataConfig dataConfig;
602 dataConfig.inPlace = -1;
603 dataConfig.constant = false;
604 dataConfig.desc = getSrcMemDesc(itpd, j);
605 cfg.inConfs.push_back(dataConfig);
608 for (size_t j = 0; j < desc.outputNumbers(); j++) {
609 InferenceEngine::DataConfig dataConfig;
610 dataConfig.inPlace = -1;
612 cfg.inConfs.push_back(dataConfig);
613 dataConfig.inPlace = 1;
615 dataConfig.constant = false;
616 dataConfig.desc = getDstMemDesc(itpd, j);
618 cfg.outConfs.push_back(dataConfig);
620 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
622 if (selected_count == selectedPrimitiveDescriptorIndex) {
623 if (impl_type != selectedPD->getImplementationType()) {
624 THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
628 if (i == descs.size() - 1 && addedNewDesc) {
629 if (impl_type == selectedPD->getImplementationType()) {
630 rightConfig = config;
634 } while (itpd.next());
635 } catch (std::exception& e) {
639 selectedPD->getConfig() = rightConfig;