inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "mkldnn_conv_node.h"
   6 #include "mkldnn_reorder_node.h"
   7 #include "mkldnn_input_node.h"
   8 #include "mkldnn_activation_node.h"
   9 #include "desc_iterator.hpp"
  10 #include "mkldnn_eltwise_node.h"
  11 #include "mkldnn_depthwise_node.h"
  12 #include <ie_layers.h>
  13 #include <string>
  14 #include <vector>
  15 #include <mkldnn_types.h>
  16 #include <mkldnn_extension_utils.h>
  17 #include <ie_layers_internal.hpp>
  18
  19 using namespace mkldnn;
  20 using namespace MKLDNNPlugin;
  21 using namespace InferenceEngine;
  22
  23 MKLDNNConvolutionNode::MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng)
  24         : MKLDNNNode(layer, eng), withBiases(false), withSum(false),  dw_conv_iw(0), dw_conv_ih(0),
  25         dw_conv_oc(0), isDW(false), isMerged(false), withActivation(false), convLayer(nullptr), isGrouped(false) {
  26     internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
  27         return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(0).desc());
  28     });
  29     internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
  30         if (!withBiases)
  31             return MKLDNNMemoryDesc();
  32         return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(1).desc());
  33     });
  34
  35     auto ws = layer->blobs.find("w-scale");
  36     if (ws != layer->blobs.end()) {
  37         wScale = ws->second;
  38     }
  39
  40     // Trying to find oi-scale
  41     if (getCnnLayer()->type == "Convolution" && getCnnLayer()->precision == Precision::I8) {
  42         auto ois = layer->blobs.find("oi-scale");
  43         if ((getCnnLayer()->outData[0]->getPrecision() == Precision::I8 || getCnnLayer()->outData[0]->getPrecision() == Precision::U8)
  44             && ois == layer->blobs.end()) {
  45             THROW_IE_EXCEPTION << "Internal error of graph quantization - mismatch of intermediate scales and next layer type for convolution "
  46                 << getCnnLayer()->name;
  47         }
  48         if (ois != layer->blobs.end()) {
  49             // If we can find an oi-scale, then the next layer has to be an INT8.
  50             oScale = ois->second;
  51         }
  52     }
  53 }
  54
  55 void MKLDNNConvolutionNode::getSupportedDescriptors() {
  56     if (!descs.empty())
  57         return;
  58
  59     InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
  60     if (precision == InferenceEngine::Precision::U16) {
  61         precision = InferenceEngine::Precision::FP32;
  62     }
  63     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
  64     precision = getCnnLayer()->outData[0]->getPrecision();
  65     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
  66
  67     auto * convLayer = dynamic_cast<ConvolutionLayer*>(getCnnLayer().get());
  68     if (convLayer == nullptr)
  69         THROW_IE_EXCEPTION << "Cannot convert convolution layer.";
  70
  71     if (getParentEdges().size() != 1 &&
  72         ((getType() != Convolution_Sum && getType() != Convolution_Sum_Activation) || getParentEdges().size() != 2))
  73         THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
  74     if (getChildEdges().empty())
  75         THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
  76
  77     if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
  78         THROW_IE_EXCEPTION << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
  79     }
  80
  81     isMerged = (!getMergeWith().empty());  // grouped convolution was constructed from split->concat subgraph
  82     isGrouped = convLayer->_group != 1;    // group info available from IR
  83     if (isMerged && isGrouped)
  84         THROW_IE_EXCEPTION << "Convolution initialization. Group splitted mode are used together with direct group specification.";
  85
  86     // default values. Can be replaced in next steps
  87     size_t groupNum = convLayer->_group;
  88     size_t IC = convLayer->input()->getDims()[1];
  89     size_t groupIC = IC;
  90     size_t groupOC = convLayer->_out_depth;
  91
  92     isDW = groupNum == groupOC && groupNum == groupIC;
  93
  94     if (isMerged) {
  95         groupNum = getMergeWith().size() + 1;
  96     }
  97     if (isGrouped) {
  98         groupIC /= groupNum;
  99         groupOC /= groupNum;
 100     }
 101
 102     weightDims.clear();
 103     weightDims.push_back(groupOC);
 104     weightDims.push_back(groupIC);
 105     for (int i = 1; i <= convLayer->_kernel.size(); i++) {
 106         weightDims.push_back(convLayer->_kernel[convLayer->_kernel.size() - i]);
 107     }
 108     biasesDims = { groupOC * groupNum };
 109
 110     if (isGrouped || isMerged) weightDims.insert(weightDims.begin(), groupNum);
 111
 112     withBiases = (convLayer->_biases != nullptr && convLayer->_biases->size() != 0);
 113
 114     internalBlobs.push_back(createInternalBlob(weightDims, true));
 115     if (withBiases) {
 116         internalBlobs.push_back(createInternalBlob(biasesDims, false));
 117     }
 118
 119     Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
 120     if (weights->precision() == Precision::I8) {
 121         // The weights blob has incorrect dims, so we have to fix it
 122         TensorDesc wdesc = internalBlobs[0]->getTensorDesc();
 123         wdesc.setPrecision(Precision::I8);
 124         InferenceEngine::TBlob<int8_t>::Ptr reshapedInt8Weights =
 125                 InferenceEngine::TBlob<int8_t>::Ptr(
 126                         new InferenceEngine::TBlob<int8_t>(wdesc, static_cast<int8_t*>(weights->buffer()), weights->byteSize()));
 127
 128         internalBlobs[0] = reshapedInt8Weights;
 129         if (withBiases) {
 130             Blob::Ptr biases = this->getCnnLayer()->blobs.find("biases")->second;
 131             TensorDesc bdesc = internalBlobs[1]->getTensorDesc();
 132             bdesc.setPrecision(Precision::I32);
 133             InferenceEngine::TBlob<int32_t>::Ptr reshapedInt32Biases =
 134                     InferenceEngine::TBlob<int32_t>::Ptr(
 135                             new InferenceEngine::TBlob<int32_t>(bdesc, static_cast<int32_t*>(biases->buffer()), biases->byteSize()));
 136             internalBlobs[1] = reshapedInt32Biases;
 137         }
 138     }
 139
 140     invertVectorCopyUtoI(convLayer->_stride, stride);
 141     for (int i = 1; i <= convLayer->_dilation.size(); i++) {
 142         dilation.push_back(static_cast<int>(convLayer->_dilation[convLayer->_dilation.size() - i]) - 1);
 143     }
 144
 145     auto allPads = getPaddings(*convLayer);
 146     invertVectorCopyUtoI(allPads.begin, paddingL);
 147     invertVectorCopyUtoI(allPads.end, paddingR);
 148
 149     MKLDNNDims weightsDims = MKLDNNDims(weightDims);
 150
 151     for (int i = 0; i < paddingR.size(); i++) {
 152         int with_group = (isGrouped || isMerged) ? 1 : 0;
 153         int krn = weightsDims[with_group + 2 + i];
 154         int src = getParentEdgeAt(0)->getDims()[2 + i];
 155         int dst = getChildEdgeAt(0)->getDims()[2 + i];
 156
 157         krn = (krn - 1)*(dilation[i] + 1) + 1;
 158         int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
 159         paddingR[i] = (dst - calc_dst) * stride[i];
 160     }
 161
 162     withSum = getType() == Convolution_Sum || getType() == Convolution_Sum_Activation;
 163
 164     for (auto &node : fusedWith) {
 165         auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
 166         if (convolutionNode) {
 167             auto *convLayer = reinterpret_cast<ConvolutionLayer *>(convolutionNode->getCnnLayer().get());
 168             dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2];
 169             dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1];
 170             dw_conv_oc = convLayer->_out_depth;
 171             for (int i = 0; i < convLayer->_kernel.size(); i++) {
 172                 dw_conv_kernel.push_back(convLayer->_kernel[i]);
 173             }
 174             for (int i = 0; i < convLayer->_stride.size(); i++) {
 175                 dw_conv_strides.push_back(convLayer->_stride[i]);
 176             }
 177         }
 178     }
 179
 180     if (this->getCnnLayer()->precision == Precision::I8) {
 181         MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nhwc);
 182         MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nhwc);
 183         createDescriptor({in_candidate}, {out_candidate});
 184     } else {
 185         // If the weights aren't quantized, the only precision we support is FP32
 186         inputDataType = memory::f32;
 187         outputDataType = memory::f32;
 188
 189         Layout layout = convLayer->input()->getLayout();
 190
 191         if (layout == NCHW || layout == NHWC) {
 192             MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType,
 193                     layout == NCHW ? memory::nchw : memory::nhwc);
 194             MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType,
 195                     layout == NCHW ? memory::nchw : memory::nhwc);
 196             createDescriptor({in_candidate}, {out_candidate});
 197
 198             if (IC == 3 || IC == 1) {
 199                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw16c);
 200                 createDescriptor({in_candidate}, {out_candidate});
 201                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw8c);
 202                 createDescriptor({in_candidate}, {out_candidate});
 203             } else {
 204                 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nChw16c);
 205                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw16c);
 206                 createDescriptor({in_candidate}, {out_candidate});
 207                 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nChw8c);
 208                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nChw8c);
 209                 createDescriptor({in_candidate}, {out_candidate});
 210             }
 211         } else if (layout == NCDHW || layout == NDHWC) {
 212             MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType,
 213                     layout == NCDHW ? memory::ncdhw : memory::ndhwc);
 214             MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType,
 215                     layout == NCDHW ? memory::ncdhw : memory::ndhwc);
 216             createDescriptor({in_candidate}, {out_candidate});
 217
 218             if (IC == 3 || IC == 1) {
 219                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw16c);
 220                 createDescriptor({in_candidate}, {out_candidate});
 221                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw8c);
 222                 createDescriptor({in_candidate}, {out_candidate});
 223             } else {
 224                 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nCdhw16c);
 225                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw16c);
 226                 createDescriptor({in_candidate}, {out_candidate});
 227                 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nCdhw8c);
 228                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nCdhw8c);
 229                 createDescriptor({in_candidate}, {out_candidate});
 230             }
 231         }
 232     }
 233 }
 234
 235 void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
 236     int blob_idx = 0;
 237     mkldnn::post_ops ops;
 238
 239     for (auto &node : fusedWith) {
 240         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
 241         if (eltwiseNode) {
 242             if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
 243                 auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
 244                 if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
 245                     // currently there is the only one scale while we need scale by channel :(
 246                     ops.append_sum(it->second->buffer().as<float*>()[0]);
 247                 }
 248             } else {
 249                 ops.append_sum(1.0);
 250             }
 251             continue;
 252         }
 253
 254         auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
 255         if (activationNode) {
 256             ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
 257                                activationNode->getBeta());
 258             continue;
 259         }
 260
 261         auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
 262         if (depthwiseNode) {
 263             auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
 264
 265             if (initWeights) {
 266                 MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
 267
 268                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 269                 PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
 270
 271                 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
 272                                                              depthwiseLayer->_weights->buffer(),
 273                                                              depthwiseLayer->_weights->size() *
 274                                                              MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
 275
 276                 if (depthwiseNode->isBroadcast()) {
 277                     float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
 278                     for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
 279                         static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
 280                     }
 281                 }
 282
 283                 if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
 284                     PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 285                     PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
 286                                                                 memory::format::x);
 287                     PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
 288                                                                  depthwiseLayer->_biases->buffer(),
 289                                                                  depthwiseLayer->_biases->size() *
 290                                                                  MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
 291
 292                     if (depthwiseNode->isBroadcast()) {
 293                         float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
 294                         for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
 295                             static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
 296                         }
 297                     }
 298
 299                     ops.append_depthwise(depthwiseNode->getAlgorithm(),
 300                                          (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
 301                                          (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
 302
 303                     blob_idx += 2;
 304                 } else {
 305                     ops.append_depthwise(depthwiseNode->getAlgorithm(),
 306                                          (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
 307                                          nullptr);
 308
 309                     blob_idx += 1;
 310                 }
 311             } else {
 312                 ops.append_depthwise(depthwiseNode->getAlgorithm(),
 313                                      nullptr,
 314                                      nullptr);
 315             }
 316
 317             continue;
 318         }
 319
 320         auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
 321         if (convolutionNode) {
 322             auto* convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
 323
 324             auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(convLayer->precision);
 325             auto biasPrc = memory::data_type::s32;
 326
 327             if (initWeights) {
 328                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 329                 MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
 330                 PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, weightsPrc, memory::format::Goihw8g);
 331
 332                 Blob::Ptr weights = convLayer->blobs.find("weights")->second;
 333                 Blob::Ptr biases = convLayer->blobs.find("biases")->second;
 334
 335                 PostOpsIntBlobMemory[blob_idx]->SetData(weightsPrc, memory::goihw, weights->buffer(),
 336                                                         dwWeightsDims.size() * MKLDNNExtensionUtils::sizeOfDataType(weightsPrc));
 337
 338                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 339                 MKLDNNDims dwBiasesDims({dw_conv_oc});
 340                 PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, biasPrc, memory::format::x);
 341                 PostOpsIntBlobMemory[blob_idx + 1]->SetData(biasPrc, memory::x, biases->buffer(),
 342                                                             dwBiasesDims.size() * MKLDNNExtensionUtils::sizeOfDataType(biasPrc));
 343                 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
 344                                    dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
 345                                    (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
 346                                    (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
 347
 348                 blob_idx += 2;
 349             } else {
 350                 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
 351                                    dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
 352                                    nullptr,
 353                                    nullptr);
 354             }
 355
 356             if (convolutionNode->wScale != nullptr) {
 357                 float* wScaleData = static_cast<float*>(convolutionNode->wScale->buffer());
 358
 359                 std::vector<float> oScaleDataVector;
 360                 std::vector<float> oShiftDataVector;
 361                 if (convolutionNode->getCnnLayer()->precision == Precision::I8 &&
 362                     convolutionNode->getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
 363                     float *oScaleData = static_cast<float *>(convolutionNode->oScale->buffer());
 364
 365                     for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
 366                         oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
 367                         oShiftDataVector.push_back(0.f);
 368                     }
 369                 } else {
 370                     for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
 371                         oScaleDataVector.push_back(wScaleData[c]);
 372                         oShiftDataVector.push_back(0.f);
 373                     }
 374                 }
 375
 376                 MKLDNNDims oScaleDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
 377
 378                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 379                 PostOpsIntBlobMemory[blob_idx]->Create(oScaleDims, memory::data_type::f32, memory::format::x);
 380                 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, &oScaleDataVector[0],
 381                                                         oScaleDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
 382
 383                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 384                 PostOpsIntBlobMemory[blob_idx + 1]->Create(oScaleDims, memory::data_type::f32, memory::format::x);
 385                 PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, &oShiftDataVector[0],
 386                                                             oShiftDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
 387
 388                 ops.append_depthwise(depthwise_scale_shift,
 389                                      (const float *)PostOpsIntBlobMemory[blob_idx]->GetData(),
 390                                      (const float *)PostOpsIntBlobMemory[blob_idx + 1]->GetData());
 391
 392                 blob_idx += 2;
 393             }
 394
 395             for (auto &dwConvFusedNode : convolutionNode->fusedWith) {
 396                 auto* dwConvActivationNode = dynamic_cast<MKLDNNActivationNode *>(dwConvFusedNode.get());
 397                 if (dwConvActivationNode) {
 398                     ops.append_eltwise(1.0, dwConvActivationNode->getAlgorithm(), dwConvActivationNode->getAlpha(),
 399                                        dwConvActivationNode->getBeta());
 400                 }
 401             }
 402
 403             continue;
 404         }
 405     }
 406
 407     attr.set_post_ops(ops);
 408 }
 409
 410 void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
 411     if (!supportedPrimitiveDescriptors.empty())
 412         return;
 413
 414     mkldnn::primitive_attr attr;
 415     setPostOps(attr);
 416
 417     for (auto& desc : descs) {
 418         try {
 419             primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
 420             do {
 421                 InferenceEngine::LayerConfig config;
 422                 config.dynBatchSupport = true;
 423                 for (size_t i = 0; i < desc.inputNumbers(); i++) {
 424                     InferenceEngine::DataConfig dataConfig;
 425                     dataConfig.inPlace = -1;
 426                     dataConfig.constant = false;
 427                     dataConfig.desc = getSrcMemDesc(itpd, i);
 428                     if (!isGrouped)
 429                         dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
 430                     config.inConfs.push_back(dataConfig);
 431                 }
 432
 433                 for (size_t i = 0; i < desc.outputNumbers(); i++) {
 434                     InferenceEngine::DataConfig dataConfig;
 435                     if (withSum) {
 436                         dataConfig.inPlace = 1;
 437                     }
 438
 439                     dataConfig.constant = false;
 440                     dataConfig.desc = getDstMemDesc(itpd, i);
 441                     if (!isGrouped)
 442                         dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
 443                     config.outConfs.push_back(dataConfig);
 444
 445                     if (withSum) {
 446                         dataConfig.inPlace = -1;
 447                         config.inConfs.push_back(dataConfig);
 448                     }
 449                 }
 450                 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 451
 452                 supportedPrimitiveDescriptors.emplace_back(config, impl_type);
 453             } while (itpd.next());
 454         } catch (std::exception& e) {
 455             // it throw exception in case of no implementation found
 456             continue;
 457         }
 458     }
 459 }
 460
 461
 462 void MKLDNNConvolutionNode::createPrimitive() {
 463     if (prim)
 464         return;
 465
 466     mkldnn::primitive_attr attr;
 467     setPostOps(attr, true);
 468     addScaleToPrimitiveAttr(attr);
 469
 470     auto prim_desc = createPrimitiveDescriptor<convolution_forward::primitive_desc,
 471             convolution_forward::desc>(attr);
 472
 473     if (internalBlobMemory.size() > 1) {
 474         prim.reset(new convolution_forward(prim_desc,
 475                                            getParentEdgeAt(0)->getMemory().GetPrimitive(),
 476                                            internalBlobMemory[0]->GetPrimitive(),
 477                                            internalBlobMemory[1]->GetPrimitive(),
 478                                            getChildEdgeAt(0)->getMemory().GetPrimitive()));
 479     } else {
 480         prim.reset(new convolution_forward(prim_desc,
 481                                            getParentEdgeAt(0)->getMemory().GetPrimitive(),
 482                                            internalBlobMemory[0]->GetPrimitive(),
 483                                            getChildEdgeAt(0)->getMemory().GetPrimitive()));
 484     }
 485 }
 486
 487 bool MKLDNNConvolutionNode::created() const {
 488     return getType() == Convolution || getType() == Convolution_Sum_Activation ||
 489            getType() == Convolution_Activation || getType() == Convolution_Sum;
 490 }
 491
 492 void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
 493                                              const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
 494     TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
 495     mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
 496     mkldnn::memory::data_type bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
 497
 498     Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
 499
 500     if (weights->precision() == Precision::I8) {
 501         wdt = memory::s8;
 502         bdt = memory::s32;
 503
 504         Precision outPrec;
 505         if (getCnnLayer()->outData[0]->getPrecision() == Precision::FP32) {
 506             outPrec = Precision::FP32;
 507         } else {
 508             // define precision accordninly normalizer
 509             // TODO(amalyshe) do we need to have separate flow for last in int8 chain or not?
 510             outPrec = outDesc.getPrecision();
 511         }
 512
 513         inDesc = TensorDesc(inDesc.getPrecision() , inputDesc[0].getDims(), inputDesc[0].getBlockingDesc());
 514         outDesc = TensorDesc(outPrec, outputDesc[0].getDims(), outputDesc[0].getBlockingDesc());
 515     }
 516
 517     MKLDNNMemoryDesc in_candidate(inDesc);
 518     MKLDNNMemoryDesc out_candidate(outDesc);
 519
 520     auto in_fmt = in_candidate.getFormat();
 521     auto out_fmt = out_candidate.getFormat();
 522
 523     int O_IND = (isGrouped || isMerged) ? 1 : 0;
 524     int I_IND = (isGrouped || isMerged) ? 2 : 1;
 525
 526     // grouping and autoblocking is not compatible
 527     if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
 528         return;
 529
 530     MKLDNNDims blocked_weightDims(weightDims);
 531     MKLDNNDims blocked_biasesDims(biasesDims);
 532     MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::any};
 533
 534     for (auto alg : {algorithm::convolution_winograd, algorithm::convolution_direct}) {
 535         std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
 536         if (withBiases) {
 537             MKLDNNMemoryDesc bias_candidate{blocked_biasesDims, bdt, memory::any};
 538
 539             conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
 540                                                           in_candidate, wgh_candidate, bias_candidate, out_candidate,
 541                                                           stride, dilation, paddingL, paddingR, padding_kind::zero));
 542         } else {
 543             conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
 544                                                           in_candidate, wgh_candidate, out_candidate, stride, dilation,
 545                                                           paddingL, paddingR, padding_kind::zero));
 546         }
 547
 548         descs.emplace_back(conv_desc);
 549     }
 550 }
 551
 552 void MKLDNNConvolutionNode::addScaleToPrimitiveAttr(mkldnn::primitive_attr attr) const {
 553     bool scaled = false;
 554     if (wScale != nullptr) {
 555        float* wScaleData = static_cast<float*>(wScale->buffer());
 556
 557        std::vector<float> oScaleDataVector;
 558        if (getCnnLayer()->precision == Precision::I8 && getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
 559            float *oScaleData = static_cast<float *>(oScale->buffer());
 560
 561            for (size_t c = 0; c < wScale->size(); c++) {
 562                oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
 563            }
 564        } else {
 565            for (size_t c = 0; c < wScale->size(); c++) {
 566                oScaleDataVector.push_back(wScaleData[c]);
 567            }
 568        }
 569
 570        attr.set_int_output_round_mode(mkldnn::round_nearest);
 571        attr.set_output_scales(1 << 1 /*through C dim*/, oScaleDataVector);
 572     }
 573 }
 574
 575 void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) {
 576     auto* selectedPD = getSelectedPrimitiveDescriptor();
 577     if (!selectedPD) {
 578         return;
 579     }
 580     bool addedNewDesc = false;
 581     /*if (config.inConfs[0].desc.getPrecision() == InferenceEngine::Precision::FP32 &&
 582             config.outConfs[0].desc.getPrecision() == InferenceEngine::Precision::FP32) {*/
 583         addedNewDesc = true;
 584         createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc});
 585     //}
 586
 587     mkldnn::primitive_attr attr;
 588     setPostOps(attr);
 589     addScaleToPrimitiveAttr(attr);
 590
 591     InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
 592     size_t selected_count = 0;
 593     for (size_t i = 0; i < descs.size(); i++) {
 594         const auto& desc = descs[i];
 595         try {
 596             primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
 597             do {
 598                 InferenceEngine::LayerConfig cfg;
 599                 cfg.dynBatchSupport = true;
 600                 for (size_t j = 0; j < desc.inputNumbers(); j++) {
 601                     InferenceEngine::DataConfig dataConfig;
 602                     dataConfig.inPlace = -1;
 603                     dataConfig.constant = false;
 604                     dataConfig.desc = getSrcMemDesc(itpd, j);
 605                     cfg.inConfs.push_back(dataConfig);
 606                 }
 607
 608                 for (size_t j = 0; j < desc.outputNumbers(); j++) {
 609                     InferenceEngine::DataConfig dataConfig;
 610                     dataConfig.inPlace = -1;
 611                     if (withSum) {
 612                         cfg.inConfs.push_back(dataConfig);
 613                         dataConfig.inPlace = 1;
 614                     }
 615                     dataConfig.constant = false;
 616                     dataConfig.desc = getDstMemDesc(itpd, j);
 617
 618                     cfg.outConfs.push_back(dataConfig);
 619                 }
 620                 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 621
 622                 if (selected_count == selectedPrimitiveDescriptorIndex) {
 623                     if (impl_type != selectedPD->getImplementationType()) {
 624                         THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
 625                     }
 626                     rightConfig = cfg;
 627                 }
 628                 if (i == descs.size() - 1 && addedNewDesc) {
 629                     if (impl_type == selectedPD->getImplementationType()) {
 630                         rightConfig = config;
 631                     }
 632                 }
 633                 selected_count++;
 634             } while (itpd.next());
 635         } catch (std::exception& e) {
 636             continue;
 637         }
 638     }
 639     selectedPD->getConfig() = rightConfig;
 640 }