Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / mkldnn_plugin / nodes / mkldnn_conv_node.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "mkldnn_conv_node.h"
6 #include "mkldnn_reorder_node.h"
7 #include "mkldnn_input_node.h"
8 #include "mkldnn_activation_node.h"
9 #include "desc_iterator.hpp"
10 #include "mkldnn_eltwise_node.h"
11 #include "mkldnn_depthwise_node.h"
12 #include <ie_layers.h>
13 #include <string>
14 #include <vector>
15 #include <mkldnn_types.h>
16 #include <mkldnn_extension_utils.h>
17 #include <ie_layers_internal.hpp>
18
19 using namespace mkldnn;
20 using namespace MKLDNNPlugin;
21 using namespace InferenceEngine;
22
23 MKLDNNConvolutionNode::MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng)
24         : MKLDNNNode(layer, eng), withBiases(false), withSum(false),  dw_conv_iw(0), dw_conv_ih(0),
25         dw_conv_oc(0), isDW(false), isMerged(false), withActivation(false), convLayer(nullptr), isGrouped(false) {
26     internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
27         return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(0).desc());
28     });
29     internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
30         if (!withBiases)
31             return MKLDNNMemoryDesc();
32         return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(1).desc());
33     });
34
35     auto ws = layer->blobs.find("w-scale");
36     if (ws != layer->blobs.end()) {
37         wScale = ws->second;
38     }
39
40     // Trying to find oi-scale
41     if (getCnnLayer()->type == "Convolution" && getCnnLayer()->precision == Precision::I8) {
42         auto ois = layer->blobs.find("oi-scale");
43         if ((getCnnLayer()->outData[0]->getPrecision() == Precision::I8 || getCnnLayer()->outData[0]->getPrecision() == Precision::U8)
44             && ois == layer->blobs.end()) {
45             THROW_IE_EXCEPTION << "Internal error of graph quantization - mismatch of intermediate scales and next layer type for convolution "
46                 << getCnnLayer()->name;
47         }
48         if (ois != layer->blobs.end()) {
49             // If we can find an oi-scale, then the next layer has to be an INT8.
50             oScale = ois->second;
51         }
52     }
53 }
54
55 void MKLDNNConvolutionNode::getSupportedDescriptors() {
56     if (!descs.empty())
57         return;
58
59     InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
60     if (precision == InferenceEngine::Precision::U16) {
61         precision = InferenceEngine::Precision::FP32;
62     }
63     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
64     precision = getCnnLayer()->outData[0]->getPrecision();
65     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
66
67     auto * convLayer = dynamic_cast<ConvolutionLayer*>(getCnnLayer().get());
68     if (convLayer == nullptr)
69         THROW_IE_EXCEPTION << "Cannot convert convolution layer.";
70
71     if (getParentEdges().size() != 1 &&
72         ((getType() != Convolution_Sum && getType() != Convolution_Sum_Activation) || getParentEdges().size() != 2))
73         THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
74     if (getChildEdges().empty())
75         THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
76
77     if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
78         THROW_IE_EXCEPTION << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
79     }
80
81     isMerged = (!getMergeWith().empty());  // grouped convolution was constructed from split->concat subgraph
82     isGrouped = convLayer->_group != 1;    // group info available from IR
83     if (isMerged && isGrouped)
84         THROW_IE_EXCEPTION << "Convolution initialization. Group splitted mode are used together with direct group specification.";
85
86     // default values. Can be replaced in next steps
87     size_t groupNum = convLayer->_group;
88     size_t IC = convLayer->input()->getDims()[1];
89     size_t groupIC = IC;
90     size_t groupOC = convLayer->_out_depth;
91
92     isDW = groupNum == groupOC && groupNum == groupIC;
93
94     if (isMerged) {
95         groupNum = getMergeWith().size() + 1;
96     }
97     if (isGrouped) {
98         groupIC /= groupNum;
99         groupOC /= groupNum;
100     }
101
102     weightDims.clear();
103     weightDims.push_back(groupOC);
104     weightDims.push_back(groupIC);
105     for (int i = 1; i <= convLayer->_kernel.size(); i++) {
106         weightDims.push_back(convLayer->_kernel[convLayer->_kernel.size() - i]);
107     }
108     biasesDims = { groupOC * groupNum };
109
110     if (isGrouped || isMerged) weightDims.insert(weightDims.begin(), groupNum);
111
112     withBiases = (convLayer->_biases != nullptr && convLayer->_biases->size() != 0);
113
114     internalBlobs.push_back(createInternalBlob(weightDims, true));
115     if (withBiases) {
116         internalBlobs.push_back(createInternalBlob(biasesDims, false));
117     }
118
119     Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
120     if (weights->precision() == Precision::I8) {
121         // The weights blob has incorrect dims, so we have to fix it
122         TensorDesc wdesc = internalBlobs[0]->getTensorDesc();
123         wdesc.setPrecision(Precision::I8);
124         InferenceEngine::TBlob<int8_t>::Ptr reshapedInt8Weights =
125                 InferenceEngine::TBlob<int8_t>::Ptr(
126                         new InferenceEngine::TBlob<int8_t>(wdesc, static_cast<int8_t*>(weights->buffer()), weights->byteSize()));
127
128         internalBlobs[0] = reshapedInt8Weights;
129         if (withBiases) {
130             Blob::Ptr biases = this->getCnnLayer()->blobs.find("biases")->second;
131             TensorDesc bdesc = internalBlobs[1]->getTensorDesc();
132             bdesc.setPrecision(Precision::I32);
133             InferenceEngine::TBlob<int32_t>::Ptr reshapedInt32Biases =
134                     InferenceEngine::TBlob<int32_t>::Ptr(
135                             new InferenceEngine::TBlob<int32_t>(bdesc, static_cast<int32_t*>(biases->buffer()), biases->byteSize()));
136             internalBlobs[1] = reshapedInt32Biases;
137         }
138     }
139
140     invertVectorCopyUtoI(convLayer->_stride, stride);
141     for (int i = 1; i <= convLayer->_dilation.size(); i++) {
142         dilation.push_back(static_cast<int>(convLayer->_dilation[convLayer->_dilation.size() - i]) - 1);
143     }
144
145     auto allPads = getPaddings(*convLayer);
146     invertVectorCopyUtoI(allPads.begin, paddingL);
147     invertVectorCopyUtoI(allPads.end, paddingR);
148
149     MKLDNNDims weightsDims = MKLDNNDims(weightDims);
150
151     for (int i = 0; i < paddingR.size(); i++) {
152         int with_group = (isGrouped || isMerged) ? 1 : 0;
153         int krn = weightsDims[with_group + 2 + i];
154         int src = getParentEdgeAt(0)->getDims()[2 + i];
155         int dst = getChildEdgeAt(0)->getDims()[2 + i];
156
157         krn = (krn - 1)*(dilation[i] + 1) + 1;
158         int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
159         paddingR[i] = (dst - calc_dst) * stride[i];
160     }
161
162     withSum = getType() == Convolution_Sum || getType() == Convolution_Sum_Activation;
163
164     for (auto &node : fusedWith) {
165         auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
166         if (convolutionNode) {
167             auto *convLayer = reinterpret_cast<ConvolutionLayer *>(convolutionNode->getCnnLayer().get());
168             dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2];
169             dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1];
170             dw_conv_oc = convLayer->_out_depth;
171             for (int i = 0; i < convLayer->_kernel.size(); i++) {
172                 dw_conv_kernel.push_back(convLayer->_kernel[i]);
173             }
174             for (int i = 0; i < convLayer->_stride.size(); i++) {
175                 dw_conv_strides.push_back(convLayer->_stride[i]);
176             }
177         }
178     }
179
180     if (this->getCnnLayer()->precision == Precision::I8) {
181         MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nhwc);
182         MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nhwc);
183         createDescriptor({in_candidate}, {out_candidate});
184     } else {
185         // If the weights aren't quantized, the only precision we support is FP32
186         inputDataType = memory::f32;
187         outputDataType = memory::f32;
188
189         Layout layout = convLayer->input()->getLayout();
190
191         if (layout == NCHW || layout == NHWC) {
192             MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType,
193                     layout == NCHW ? memory::nchw : memory::nhwc);
194             MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType,
195                     layout == NCHW ? memory::nchw : memory::nhwc);
196             createDescriptor({in_candidate}, {out_candidate});
197
198             if (IC == 3 || IC == 1) {
199                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw16c);
200                 createDescriptor({in_candidate}, {out_candidate});
201                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw8c);
202                 createDescriptor({in_candidate}, {out_candidate});
203             } else {
204                 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nChw16c);
205                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw16c);
206                 createDescriptor({in_candidate}, {out_candidate});
207                 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nChw8c);
208                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw8c);
209                 createDescriptor({in_candidate}, {out_candidate});
210             }
211         } else if (layout == NCDHW || layout == NDHWC) {
212             MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType,
213                     layout == NCDHW ? memory::ncdhw : memory::ndhwc);
214             MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType,
215                     layout == NCDHW ? memory::ncdhw : memory::ndhwc);
216             createDescriptor({in_candidate}, {out_candidate});
217
218             if (IC == 3 || IC == 1) {
219                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw16c);
220                 createDescriptor({in_candidate}, {out_candidate});
221                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw8c);
222                 createDescriptor({in_candidate}, {out_candidate});
223             } else {
224                 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nCdhw16c);
225                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw16c);
226                 createDescriptor({in_candidate}, {out_candidate});
227                 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nCdhw8c);
228                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw8c);
229                 createDescriptor({in_candidate}, {out_candidate});
230             }
231         }
232     }
233 }
234
235 void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
236     int blob_idx = 0;
237     mkldnn::post_ops ops;
238
239     for (auto &node : fusedWith) {
240         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
241         if (eltwiseNode) {
242             if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
243                 auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
244                 if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
245                     // currently there is the only one scale while we need scale by channel :(
246                     ops.append_sum(it->second->buffer().as<float*>()[0]);
247                 }
248             } else {
249                 ops.append_sum(1.0);
250             }
251             continue;
252         }
253
254         auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
255         if (activationNode) {
256             ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
257                                activationNode->getBeta());
258             continue;
259         }
260
261         auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
262         if (depthwiseNode) {
263             auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
264
265             if (initWeights) {
266                 MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
267
268                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
269                 PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
270
271                 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
272                                                              depthwiseLayer->_weights->buffer(),
273                                                              depthwiseLayer->_weights->size() *
274                                                              MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
275
276                 if (depthwiseNode->isBroadcast()) {
277                     float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
278                     for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
279                         static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
280                     }
281                 }
282
283                 if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
284                     PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
285                     PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
286                                                                 memory::format::x);
287                     PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
288                                                                  depthwiseLayer->_biases->buffer(),
289                                                                  depthwiseLayer->_biases->size() *
290                                                                  MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
291
292                     if (depthwiseNode->isBroadcast()) {
293                         float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
294                         for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
295                             static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
296                         }
297                     }
298
299                     ops.append_depthwise(depthwiseNode->getAlgorithm(),
300                                          (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
301                                          (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
302
303                     blob_idx += 2;
304                 } else {
305                     ops.append_depthwise(depthwiseNode->getAlgorithm(),
306                                          (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
307                                          nullptr);
308
309                     blob_idx += 1;
310                 }
311             } else {
312                 ops.append_depthwise(depthwiseNode->getAlgorithm(),
313                                      nullptr,
314                                      nullptr);
315             }
316
317             continue;
318         }
319
320         auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
321         if (convolutionNode) {
322             auto* convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
323
324             auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(convLayer->precision);
325             auto biasPrc = memory::data_type::s32;
326
327             if (initWeights) {
328                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
329                 MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
330                 PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, weightsPrc, memory::format::Goihw8g);
331
332                 Blob::Ptr weights = convLayer->blobs.find("weights")->second;
333                 Blob::Ptr biases = convLayer->blobs.find("biases")->second;
334
335                 PostOpsIntBlobMemory[blob_idx]->SetData(weightsPrc, memory::goihw, weights->buffer(),
336                                                         dwWeightsDims.size() * MKLDNNExtensionUtils::sizeOfDataType(weightsPrc));
337
338                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
339                 MKLDNNDims dwBiasesDims({dw_conv_oc});
340                 PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, biasPrc, memory::format::x);
341                 PostOpsIntBlobMemory[blob_idx + 1]->SetData(biasPrc, memory::x, biases->buffer(),
342                                                             dwBiasesDims.size() * MKLDNNExtensionUtils::sizeOfDataType(biasPrc));
343                 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
344                                    dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
345                                    (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
346                                    (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
347
348                 blob_idx += 2;
349             } else {
350                 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
351                                    dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
352                                    nullptr,
353                                    nullptr);
354             }
355
356             if (convolutionNode->wScale != nullptr) {
357                 float* wScaleData = static_cast<float*>(convolutionNode->wScale->buffer());
358
359                 std::vector<float> oScaleDataVector;
360                 std::vector<float> oShiftDataVector;
361                 if (convolutionNode->getCnnLayer()->precision == Precision::I8 &&
362                     convolutionNode->getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
363                     float *oScaleData = static_cast<float *>(convolutionNode->oScale->buffer());
364
365                     for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
366                         oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
367                         oShiftDataVector.push_back(0.f);
368                     }
369                 } else {
370                     for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
371                         oScaleDataVector.push_back(wScaleData[c]);
372                         oShiftDataVector.push_back(0.f);
373                     }
374                 }
375
376                 MKLDNNDims oScaleDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
377
378                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
379                 PostOpsIntBlobMemory[blob_idx]->Create(oScaleDims, memory::data_type::f32, memory::format::x);
380                 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, &oScaleDataVector[0],
381                                                         oScaleDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
382
383                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
384                 PostOpsIntBlobMemory[blob_idx + 1]->Create(oScaleDims, memory::data_type::f32, memory::format::x);
385                 PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, &oShiftDataVector[0],
386                                                             oShiftDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
387
388                 ops.append_depthwise(depthwise_scale_shift,
389                                      (const float *)PostOpsIntBlobMemory[blob_idx]->GetData(),
390                                      (const float *)PostOpsIntBlobMemory[blob_idx + 1]->GetData());
391
392                 blob_idx += 2;
393             }
394
395             for (auto &dwConvFusedNode : convolutionNode->fusedWith) {
396                 auto* dwConvActivationNode = dynamic_cast<MKLDNNActivationNode *>(dwConvFusedNode.get());
397                 if (dwConvActivationNode) {
398                     ops.append_eltwise(1.0, dwConvActivationNode->getAlgorithm(), dwConvActivationNode->getAlpha(),
399                                        dwConvActivationNode->getBeta());
400                 }
401             }
402
403             continue;
404         }
405     }
406
407     attr.set_post_ops(ops);
408 }
409
410 void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
411     if (!supportedPrimitiveDescriptors.empty())
412         return;
413
414     mkldnn::primitive_attr attr;
415     setPostOps(attr);
416
417     for (auto& desc : descs) {
418         try {
419             primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
420             do {
421                 InferenceEngine::LayerConfig config;
422                 config.dynBatchSupport = true;
423                 for (size_t i = 0; i < desc.inputNumbers(); i++) {
424                     InferenceEngine::DataConfig dataConfig;
425                     dataConfig.inPlace = -1;
426                     dataConfig.constant = false;
427                     dataConfig.desc = getSrcMemDesc(itpd, i);
428                     if (!isGrouped)
429                         dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
430                     config.inConfs.push_back(dataConfig);
431                 }
432
433                 for (size_t i = 0; i < desc.outputNumbers(); i++) {
434                     InferenceEngine::DataConfig dataConfig;
435                     if (withSum) {
436                         dataConfig.inPlace = 1;
437                     }
438
439                     dataConfig.constant = false;
440                     dataConfig.desc = getDstMemDesc(itpd, i);
441                     if (!isGrouped)
442                         dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
443                     config.outConfs.push_back(dataConfig);
444
445                     if (withSum) {
446                         dataConfig.inPlace = -1;
447                         config.inConfs.push_back(dataConfig);
448                     }
449                 }
450                 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
451
452                 supportedPrimitiveDescriptors.emplace_back(config, impl_type);
453             } while (itpd.next());
454         } catch (std::exception& e) {
455             // it throw exception in case of no implementation found
456             continue;
457         }
458     }
459 }
460
461
462 void MKLDNNConvolutionNode::createPrimitive() {
463     if (prim)
464         return;
465
466     mkldnn::primitive_attr attr;
467     setPostOps(attr, true);
468     addScaleToPrimitiveAttr(attr);
469
470     auto prim_desc = createPrimitiveDescriptor<convolution_forward::primitive_desc,
471             convolution_forward::desc>(attr);
472
473     if (internalBlobMemory.size() > 1) {
474         prim.reset(new convolution_forward(prim_desc,
475                                            getParentEdgeAt(0)->getMemory().GetPrimitive(),
476                                            internalBlobMemory[0]->GetPrimitive(),
477                                            internalBlobMemory[1]->GetPrimitive(),
478                                            getChildEdgeAt(0)->getMemory().GetPrimitive()));
479     } else {
480         prim.reset(new convolution_forward(prim_desc,
481                                            getParentEdgeAt(0)->getMemory().GetPrimitive(),
482                                            internalBlobMemory[0]->GetPrimitive(),
483                                            getChildEdgeAt(0)->getMemory().GetPrimitive()));
484     }
485 }
486
487 bool MKLDNNConvolutionNode::created() const {
488     return getType() == Convolution || getType() == Convolution_Sum_Activation ||
489            getType() == Convolution_Activation || getType() == Convolution_Sum;
490 }
491
492 void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
493                                              const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
494     TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
495     mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
496     mkldnn::memory::data_type bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
497
498     Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
499
500     if (weights->precision() == Precision::I8) {
501         wdt = memory::s8;
502         bdt = memory::s32;
503
504         Precision outPrec;
505         if (getCnnLayer()->outData[0]->getPrecision() == Precision::FP32) {
506             outPrec = Precision::FP32;
507         } else {
508             // define precision accordninly normalizer
509             // TODO(amalyshe) do we need to have separate flow for last in int8 chain or not?
510             outPrec = outDesc.getPrecision();
511         }
512
513         inDesc = TensorDesc(inDesc.getPrecision() , inputDesc[0].getDims(), inputDesc[0].getBlockingDesc());
514         outDesc = TensorDesc(outPrec, outputDesc[0].getDims(), outputDesc[0].getBlockingDesc());
515     }
516
517     MKLDNNMemoryDesc in_candidate(inDesc);
518     MKLDNNMemoryDesc out_candidate(outDesc);
519
520     auto in_fmt = in_candidate.getFormat();
521     auto out_fmt = out_candidate.getFormat();
522
523     int O_IND = (isGrouped || isMerged) ? 1 : 0;
524     int I_IND = (isGrouped || isMerged) ? 2 : 1;
525
526     // grouping and autoblocking is not compatible
527     if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
528         return;
529
530     MKLDNNDims blocked_weightDims(weightDims);
531     MKLDNNDims blocked_biasesDims(biasesDims);
532     MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::any};
533
534     for (auto alg : {algorithm::convolution_winograd, algorithm::convolution_direct}) {
535         std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
536         if (withBiases) {
537             MKLDNNMemoryDesc bias_candidate{blocked_biasesDims, bdt, memory::any};
538
539             conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
540                                                           in_candidate, wgh_candidate, bias_candidate, out_candidate,
541                                                           stride, dilation, paddingL, paddingR, padding_kind::zero));
542         } else {
543             conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
544                                                           in_candidate, wgh_candidate, out_candidate, stride, dilation,
545                                                           paddingL, paddingR, padding_kind::zero));
546         }
547
548         descs.emplace_back(conv_desc);
549     }
550 }
551
552 void MKLDNNConvolutionNode::addScaleToPrimitiveAttr(mkldnn::primitive_attr attr) const {
553     bool scaled = false;
554     if (wScale != nullptr) {
555        float* wScaleData = static_cast<float*>(wScale->buffer());
556
557        std::vector<float> oScaleDataVector;
558        if (getCnnLayer()->precision == Precision::I8 && getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
559            float *oScaleData = static_cast<float *>(oScale->buffer());
560
561            for (size_t c = 0; c < wScale->size(); c++) {
562                oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
563            }
564        } else {
565            for (size_t c = 0; c < wScale->size(); c++) {
566                oScaleDataVector.push_back(wScaleData[c]);
567            }
568        }
569
570        attr.set_int_output_round_mode(mkldnn::round_nearest);
571        attr.set_output_scales(1 << 1 /*through C dim*/, oScaleDataVector);
572     }
573 }
574
575 void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) {
576     auto* selectedPD = getSelectedPrimitiveDescriptor();
577     if (!selectedPD) {
578         return;
579     }
580     bool addedNewDesc = false;
581     /*if (config.inConfs[0].desc.getPrecision() == InferenceEngine::Precision::FP32 &&
582             config.outConfs[0].desc.getPrecision() == InferenceEngine::Precision::FP32) {*/
583         addedNewDesc = true;
584         createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc});
585     //}
586
587     mkldnn::primitive_attr attr;
588     setPostOps(attr);
589     addScaleToPrimitiveAttr(attr);
590
591     InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
592     size_t selected_count = 0;
593     for (size_t i = 0; i < descs.size(); i++) {
594         const auto& desc = descs[i];
595         try {
596             primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
597             do {
598                 InferenceEngine::LayerConfig cfg;
599                 cfg.dynBatchSupport = true;
600                 for (size_t j = 0; j < desc.inputNumbers(); j++) {
601                     InferenceEngine::DataConfig dataConfig;
602                     dataConfig.inPlace = -1;
603                     dataConfig.constant = false;
604                     dataConfig.desc = getSrcMemDesc(itpd, j);
605                     cfg.inConfs.push_back(dataConfig);
606                 }
607
608                 for (size_t j = 0; j < desc.outputNumbers(); j++) {
609                     InferenceEngine::DataConfig dataConfig;
610                     dataConfig.inPlace = -1;
611                     if (withSum) {
612                         cfg.inConfs.push_back(dataConfig);
613                         dataConfig.inPlace = 1;
614                     }
615                     dataConfig.constant = false;
616                     dataConfig.desc = getDstMemDesc(itpd, j);
617
618                     cfg.outConfs.push_back(dataConfig);
619                 }
620                 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
621
622                 if (selected_count == selectedPrimitiveDescriptorIndex) {
623                     if (impl_type != selectedPD->getImplementationType()) {
624                         THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
625                     }
626                     rightConfig = cfg;
627                 }
628                 if (i == descs.size() - 1 && addedNewDesc) {
629                     if (impl_type == selectedPD->getImplementationType()) {
630                         rightConfig = config;
631                     }
632                 }
633                 selected_count++;
634             } while (itpd.next());
635         } catch (std::exception& e) {
636             continue;
637         }
638     }
639     selectedPD->getConfig() = rightConfig;
640 }