inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp

   1 // Copyright (C) 2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "mkldnn_bin_conv_node.h"
   6 #include "mkldnn_reorder_node.h"
   7 #include "mkldnn_input_node.h"
   8 #include "mkldnn_activation_node.h"
   9 #include "desc_iterator.hpp"
  10 #include "mkldnn_eltwise_node.h"
  11 #include "mkldnn_depthwise_node.h"
  12 #include "mkldnn_quantize_node.h"
  13 #include "mkldnn_conv_node.h"
  14 #include <ie_layers.h>
  15 #include <string>
  16 #include <vector>
  17 #include <mkldnn_types.h>
  18 #include <mkldnn_extension_utils.h>
  19 #include <ie_layers_internal.hpp>
  20
  21 using namespace mkldnn;
  22 using namespace MKLDNNPlugin;
  23 using namespace InferenceEngine;
  24
  25 MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng)
  26         : MKLDNNNode(layer, eng) {
  27     internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
  28         return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(0).desc());
  29     });
  30 }
  31
  32 void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
  33     if (!descs.empty())
  34         return;
  35
  36     auto* binConvLayer = dynamic_cast<BinaryConvolutionLayer*>(getCnnLayer().get());
  37     if (binConvLayer == nullptr)
  38         THROW_IE_EXCEPTION << "Cannot convert convolution layer.";
  39
  40     if (getChildEdges().empty())
  41         THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
  42
  43     if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
  44         THROW_IE_EXCEPTION << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
  45     }
  46
  47     isMerged = (!getMergeWith().empty());  // grouped convolution was constructed from split->concat subgraph
  48     isGrouped = binConvLayer->_group != 1;  // group info available from IR
  49     if (isMerged && isGrouped)
  50         THROW_IE_EXCEPTION << "Convolution initialization. Group splitted mode are used together with direct group specification.";
  51
  52     // default values. Can be replaced in next steps
  53     size_t groupNum = binConvLayer->_group;
  54     pad_value = binConvLayer->_pad_value;
  55     size_t groupIC = binConvLayer->_in_depth;
  56     size_t groupOC = binConvLayer->_out_depth;
  57
  58     isDW = groupNum == groupOC && groupNum == groupIC;
  59
  60     if (isMerged) {
  61         groupNum = getMergeWith().size() + 1;
  62     }
  63     if (isGrouped) {
  64         groupIC /= groupNum;
  65         groupOC /= groupNum;
  66     }
  67
  68     weightDims.clear();
  69     weightDims.push_back(groupOC);
  70     weightDims.push_back(groupIC);
  71     for (int i = 1; i <= binConvLayer->_kernel.size(); i++) {
  72         weightDims.push_back(binConvLayer->_kernel[binConvLayer->_kernel.size() - i]);
  73     }
  74     biasesDims = { groupOC * groupNum };
  75
  76     if (isGrouped || isMerged) weightDims.insert(weightDims.begin(), groupNum);
  77
  78     internalBlobs.push_back(createInternalBlob(weightDims, true));
  79
  80     Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
  81
  82     invertVectorCopyUtoI(binConvLayer->_stride, stride);
  83     for (int i = 1; i <= binConvLayer->_dilation.size(); i++) {
  84         dilation.push_back(static_cast<int>(binConvLayer->_dilation[binConvLayer->_dilation.size() - i]) - 1);
  85     }
  86
  87     auto allPads = getPaddings(*binConvLayer);
  88     invertVectorCopyUtoI(allPads.begin, paddingL);
  89     invertVectorCopyUtoI(allPads.end, paddingR);
  90
  91     MKLDNNDims weightsDims = MKLDNNDims(weightDims);
  92
  93     for (int i = 0; i < paddingR.size(); i++) {
  94         int with_group = (isGrouped || isMerged) ? 1 : 0;
  95         int krn = weightsDims[with_group + 2 + i];
  96         int src = getParentEdgeAt(0)->getDims()[2 + i];
  97         int dst = getChildEdgeAt(0)->getDims()[2 + i];
  98
  99         krn = (krn - 1)*(dilation[i] + 1) + 1;
 100         int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
 101         paddingR[i] = (dst - calc_dst) * stride[i];
 102     }
 103
 104     withSum = false;
 105     withBinarization = false;
 106     for (auto &node : fusedWith) {
 107         auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode*>(node.get());
 108         if (convolutionNode) {
 109             auto *convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
 110             dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2];
 111             dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1];
 112             dw_conv_oc = convLayer->_out_depth;
 113             for (int i = 0; i < convLayer->_kernel.size(); i++) {
 114                 dw_conv_kernel.push_back(convLayer->_kernel[i]);
 115             }
 116             for (int i = 0; i < convLayer->_stride.size(); i++) {
 117                 dw_conv_strides.push_back(convLayer->_stride[i]);
 118             }
 119         }
 120
 121         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
 122         if (eltwiseNode) {
 123             withSum = true;
 124         }
 125
 126         auto* quantizationNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
 127         if (quantizationNode) {
 128             withBinarization = true;
 129         }
 130     }
 131
 132     if ((!withSum && getParentEdges().size() != 1) || (withSum && getParentEdges().size() != 2))
 133         THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
 134
 135     auto inputDataType = memory::bin;
 136     auto outputDataType = withBinarization ? memory::bin : memory::f32;
 137
 138     MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nhwc);
 139     MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nhwc);
 140     createDescriptor({in_candidate}, {out_candidate});
 141 }
 142
 143 void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
 144     int blob_idx = 0;
 145     mkldnn::post_ops ops;
 146
 147     for (auto &node : fusedWith) {
 148         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
 149         if (eltwiseNode) {
 150             if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
 151                 auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
 152                 if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
 153                     // currently there is the only one scale while we need scale by channel :(
 154                     ops.append_sum(it->second->buffer().as<float*>()[0]);
 155                 }
 156             } else {
 157                 ops.append_sum(1.0);
 158             }
 159             continue;
 160         }
 161
 162         auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
 163         if (activationNode) {
 164             ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
 165                                activationNode->getBeta());
 166             continue;
 167         }
 168
 169         auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
 170         if (depthwiseNode) {
 171             auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
 172
 173             if (initWeights) {
 174                 MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
 175
 176                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 177                 PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
 178
 179                 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
 180                                                              depthwiseLayer->_weights->buffer(),
 181                                                              depthwiseLayer->_weights->size() *
 182                                                              MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
 183
 184                 if (depthwiseNode->isBroadcast()) {
 185                     float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
 186                     for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
 187                         static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
 188                     }
 189                 }
 190
 191                 if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
 192                     PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 193                     PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
 194                                                                 memory::format::x);
 195                     PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
 196                                                                  depthwiseLayer->_biases->buffer(),
 197                                                                  depthwiseLayer->_biases->size() *
 198                                                                  MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
 199
 200                     if (depthwiseNode->isBroadcast()) {
 201                         float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
 202                         for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
 203                             static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
 204                         }
 205                     }
 206
 207                     ops.append_depthwise(depthwiseNode->getAlgorithm(),
 208                                          (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
 209                                          (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
 210
 211                     blob_idx += 2;
 212                 } else {
 213                     ops.append_depthwise(depthwiseNode->getAlgorithm(),
 214                                          (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
 215                                          nullptr);
 216
 217                     blob_idx += 1;
 218                 }
 219             } else {
 220                 ops.append_depthwise(depthwiseNode->getAlgorithm(),
 221                                      nullptr,
 222                                      nullptr);
 223             }
 224
 225             continue;
 226         }
 227
 228         auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
 229         if (quantizeNode) {
 230             if (initWeights) {
 231                 MKLDNNDims binarizationDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
 232
 233                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 234                 PostOpsIntBlobMemory[blob_idx]->Create(binarizationDims, memory::data_type::f32, memory::format::x);
 235
 236                 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
 237                                                         &binarizationThresholds[0],
 238                                                         binarizationThresholds.size() *
 239                                                         MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
 240
 241                 ops.append_binarization(binarization_depthwise, (const float*)PostOpsIntBlobMemory[blob_idx]->GetData());
 242
 243                 blob_idx += 1;
 244             } else {
 245                 ops.append_binarization(binarization_depthwise, nullptr);
 246             }
 247         }
 248
 249         auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
 250         if (convolutionNode) {
 251             auto* convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
 252
 253             if (initWeights) {
 254                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 255                 MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
 256                 PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, memory::data_type::f32,
 257                                                             memory::format::Goihw8g);
 258
 259                 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::goihw,
 260                                                              convLayer->_weights->buffer(),
 261                                                              dwWeightsDims.size() *
 262                                                              MKLDNNExtensionUtils::sizeOfDataType(
 263                                                                      memory::data_type::f32));
 264
 265                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
 266                 MKLDNNDims dwBiasesDims({dw_conv_oc});
 267                 PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, memory::data_type::f32,
 268                                                                 memory::format::x);
 269                 PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
 270                                                                  convLayer->_biases->buffer(),
 271                                                                  dwBiasesDims.size() *
 272                                                                  MKLDNNExtensionUtils::sizeOfDataType(
 273                                                                          memory::data_type::f32));
 274                 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
 275                                    dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
 276                                    (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
 277                                    (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
 278
 279                 blob_idx += 2;
 280             } else {
 281                 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
 282                                    dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
 283                                    nullptr,
 284                                    nullptr);
 285             }
 286             for (auto &dwConvFusedNode : convolutionNode->getFusedWith()) {
 287                 auto* dwConvActivationNode = dynamic_cast<MKLDNNActivationNode *>(dwConvFusedNode.get());
 288                 if (dwConvActivationNode) {
 289                     ops.append_eltwise(1.0, dwConvActivationNode->getAlgorithm(), dwConvActivationNode->getAlpha(),
 290                                        dwConvActivationNode->getBeta());
 291                 }
 292             }
 293
 294             continue;
 295         }
 296     }
 297
 298     attr.set_post_ops(ops);
 299 }
 300
 301 void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
 302     if (!supportedPrimitiveDescriptors.empty())
 303         return;
 304
 305     mkldnn::primitive_attr attr;
 306     setPostOps(attr);
 307
 308     for (auto& desc : descs) {
 309         try {
 310             primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
 311             do {
 312                 InferenceEngine::LayerConfig config;
 313                 config.dynBatchSupport = true;
 314                 for (size_t i = 0; i < desc.inputNumbers(); i++) {
 315                     InferenceEngine::DataConfig dataConfig;
 316                     dataConfig.inPlace = -1;
 317                     dataConfig.constant = false;
 318                     dataConfig.desc = getSrcMemDesc(itpd, i);
 319                     if (!isGrouped)
 320                         dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
 321                     config.inConfs.push_back(dataConfig);
 322                 }
 323
 324                 for (size_t i = 0; i < desc.outputNumbers(); i++) {
 325                     InferenceEngine::DataConfig dataConfig;
 326                     if (withSum) {
 327                         dataConfig.inPlace = 1;
 328                     }
 329
 330                     dataConfig.constant = false;
 331                     dataConfig.desc = getDstMemDesc(itpd, i);
 332                     if (!isGrouped)
 333                         dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
 334                     config.outConfs.push_back(dataConfig);
 335
 336                     if (withSum) {
 337                         dataConfig.inPlace = -1;
 338                         config.inConfs.push_back(dataConfig);
 339                     }
 340                 }
 341                 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 342
 343                 supportedPrimitiveDescriptors.emplace_back(config, impl_type);
 344             } while (itpd.next());
 345         } catch (std::exception& e) {
 346             // it throw exception in case of no implementation found
 347             continue;
 348         }
 349     }
 350 }
 351
 352
 353 void MKLDNNBinaryConvolutionNode::createPrimitive() {
 354     if (prim)
 355         return;
 356
 357     mkldnn::primitive_attr attr;
 358     setPostOps(attr, true);
 359
 360     auto prim_desc = createPrimitiveDescriptor<binary_convolution_forward::primitive_desc,
 361             binary_convolution_forward::desc>(attr);
 362
 363     prim.reset(new binary_convolution_forward(prim_desc,
 364                                        getParentEdgeAt(0)->getMemory().GetPrimitive(),
 365                                        internalBlobMemory[0]->GetPrimitive(),
 366                                        getChildEdgeAt(0)->getMemory().GetPrimitive()));
 367 }
 368
 369 bool MKLDNNBinaryConvolutionNode::created() const {
 370     return getType() == BinaryConvolution;
 371 }
 372
 373 void MKLDNNBinaryConvolutionNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
 374                                                    const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
 375     TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
 376     mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
 377
 378     MKLDNNMemoryDesc in_candidate(inDesc);
 379     MKLDNNMemoryDesc out_candidate(outDesc);
 380
 381     // grouping and autoblocking is not compatible
 382     if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
 383         return;
 384
 385     MKLDNNDims blocked_weightDims(weightDims);
 386     MKLDNNDims blocked_biasesDims(biasesDims);
 387     MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::any};
 388
 389     std::shared_ptr<mkldnn::binary_convolution_forward::desc> bin_conv_desc;
 390     bin_conv_desc.reset(new binary_convolution_forward::desc(prop_kind::forward_scoring, algorithm::binary_convolution_direct,
 391                                                              in_candidate, wgh_candidate, out_candidate, stride, dilation,
 392                                                              paddingL, paddingR, pad_value));
 393
 394     descs.emplace_back(bin_conv_desc);
 395 }
 396
 397 void MKLDNNBinaryConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) {
 398     auto* selectedPD = getSelectedPrimitiveDescriptor();
 399     if (!selectedPD) {
 400         return;
 401     }
 402
 403     createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc});
 404
 405     mkldnn::primitive_attr attr;
 406     setPostOps(attr);
 407
 408     InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
 409     size_t selected_count = 0;
 410     for (size_t i = 0; i < descs.size(); i++) {
 411         const auto& desc = descs[i];
 412         try {
 413             primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
 414             do {
 415                 InferenceEngine::LayerConfig cfg;
 416                 cfg.dynBatchSupport = true;
 417                 for (size_t j = 0; j < desc.inputNumbers(); j++) {
 418                     InferenceEngine::DataConfig dataConfig;
 419                     dataConfig.inPlace = -1;
 420                     dataConfig.constant = false;
 421                     dataConfig.desc = getSrcMemDesc(itpd, j);
 422                     cfg.inConfs.push_back(dataConfig);
 423                 }
 424
 425                 for (size_t j = 0; j < desc.outputNumbers(); j++) {
 426                     InferenceEngine::DataConfig dataConfig;
 427                     dataConfig.inPlace = -1;
 428                     if (withSum) {
 429                         cfg.inConfs.push_back(dataConfig);
 430                         dataConfig.inPlace = 1;
 431                     }
 432                     dataConfig.constant = false;
 433                     dataConfig.desc = getDstMemDesc(itpd, j);
 434
 435                     cfg.outConfs.push_back(dataConfig);
 436                 }
 437                 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 438
 439                 if (selected_count == selectedPrimitiveDescriptorIndex) {
 440                     if (impl_type != selectedPD->getImplementationType()) {
 441                         THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
 442                     }
 443                     rightConfig = cfg;
 444                 }
 445                 if (i == descs.size() - 1) {
 446                     if (impl_type == selectedPD->getImplementationType()) {
 447                         rightConfig = config;
 448                     }
 449                 }
 450                 selected_count++;
 451             } while (itpd.next());
 452         } catch (std::exception& e) {
 453             continue;
 454         }
 455     }
 456     selectedPD->getConfig() = rightConfig;
 457 }
 458
 459 void MKLDNNBinaryConvolutionNode::pushBinarizationThreshold(float value) {
 460     binarizationThresholds.push_back(value);
 461 }