Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / mkldnn_plugin / nodes / mkldnn_bin_conv_node.cpp
1 // Copyright (C) 2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "mkldnn_bin_conv_node.h"
6 #include "mkldnn_reorder_node.h"
7 #include "mkldnn_input_node.h"
8 #include "mkldnn_activation_node.h"
9 #include "desc_iterator.hpp"
10 #include "mkldnn_eltwise_node.h"
11 #include "mkldnn_depthwise_node.h"
12 #include "mkldnn_quantize_node.h"
13 #include "mkldnn_conv_node.h"
14 #include <ie_layers.h>
15 #include <string>
16 #include <vector>
17 #include <mkldnn_types.h>
18 #include <mkldnn_extension_utils.h>
19 #include <ie_layers_internal.hpp>
20
21 using namespace mkldnn;
22 using namespace MKLDNNPlugin;
23 using namespace InferenceEngine;
24
25 MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng)
26         : MKLDNNNode(layer, eng) {
27     internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
28         return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(0).desc());
29     });
30 }
31
32 void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
33     if (!descs.empty())
34         return;
35
36     auto* binConvLayer = dynamic_cast<BinaryConvolutionLayer*>(getCnnLayer().get());
37     if (binConvLayer == nullptr)
38         THROW_IE_EXCEPTION << "Cannot convert convolution layer.";
39
40     if (getChildEdges().empty())
41         THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
42
43     if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
44         THROW_IE_EXCEPTION << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
45     }
46
47     isMerged = (!getMergeWith().empty());  // grouped convolution was constructed from split->concat subgraph
48     isGrouped = binConvLayer->_group != 1;  // group info available from IR
49     if (isMerged && isGrouped)
50         THROW_IE_EXCEPTION << "Convolution initialization. Group splitted mode are used together with direct group specification.";
51
52     // default values. Can be replaced in next steps
53     size_t groupNum = binConvLayer->_group;
54     pad_value = binConvLayer->_pad_value;
55     size_t groupIC = binConvLayer->_in_depth;
56     size_t groupOC = binConvLayer->_out_depth;
57
58     isDW = groupNum == groupOC && groupNum == groupIC;
59
60     if (isMerged) {
61         groupNum = getMergeWith().size() + 1;
62     }
63     if (isGrouped) {
64         groupIC /= groupNum;
65         groupOC /= groupNum;
66     }
67
68     weightDims.clear();
69     weightDims.push_back(groupOC);
70     weightDims.push_back(groupIC);
71     for (int i = 1; i <= binConvLayer->_kernel.size(); i++) {
72         weightDims.push_back(binConvLayer->_kernel[binConvLayer->_kernel.size() - i]);
73     }
74     biasesDims = { groupOC * groupNum };
75
76     if (isGrouped || isMerged) weightDims.insert(weightDims.begin(), groupNum);
77
78     internalBlobs.push_back(createInternalBlob(weightDims, true));
79
80     Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
81
82     invertVectorCopyUtoI(binConvLayer->_stride, stride);
83     for (int i = 1; i <= binConvLayer->_dilation.size(); i++) {
84         dilation.push_back(static_cast<int>(binConvLayer->_dilation[binConvLayer->_dilation.size() - i]) - 1);
85     }
86
87     auto allPads = getPaddings(*binConvLayer);
88     invertVectorCopyUtoI(allPads.begin, paddingL);
89     invertVectorCopyUtoI(allPads.end, paddingR);
90
91     MKLDNNDims weightsDims = MKLDNNDims(weightDims);
92
93     for (int i = 0; i < paddingR.size(); i++) {
94         int with_group = (isGrouped || isMerged) ? 1 : 0;
95         int krn = weightsDims[with_group + 2 + i];
96         int src = getParentEdgeAt(0)->getDims()[2 + i];
97         int dst = getChildEdgeAt(0)->getDims()[2 + i];
98
99         krn = (krn - 1)*(dilation[i] + 1) + 1;
100         int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
101         paddingR[i] = (dst - calc_dst) * stride[i];
102     }
103
104     withSum = false;
105     withBinarization = false;
106     for (auto &node : fusedWith) {
107         auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode*>(node.get());
108         if (convolutionNode) {
109             auto *convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
110             dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2];
111             dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1];
112             dw_conv_oc = convLayer->_out_depth;
113             for (int i = 0; i < convLayer->_kernel.size(); i++) {
114                 dw_conv_kernel.push_back(convLayer->_kernel[i]);
115             }
116             for (int i = 0; i < convLayer->_stride.size(); i++) {
117                 dw_conv_strides.push_back(convLayer->_stride[i]);
118             }
119         }
120
121         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
122         if (eltwiseNode) {
123             withSum = true;
124         }
125
126         auto* quantizationNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
127         if (quantizationNode) {
128             withBinarization = true;
129         }
130     }
131
132     if ((!withSum && getParentEdges().size() != 1) || (withSum && getParentEdges().size() != 2))
133         THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
134
135     auto inputDataType = memory::bin;
136     auto outputDataType = withBinarization ? memory::bin : memory::f32;
137
138     MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nhwc);
139     MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nhwc);
140     createDescriptor({in_candidate}, {out_candidate});
141 }
142
143 void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
144     int blob_idx = 0;
145     mkldnn::post_ops ops;
146
147     for (auto &node : fusedWith) {
148         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
149         if (eltwiseNode) {
150             if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
151                 auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
152                 if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
153                     // currently there is the only one scale while we need scale by channel :(
154                     ops.append_sum(it->second->buffer().as<float*>()[0]);
155                 }
156             } else {
157                 ops.append_sum(1.0);
158             }
159             continue;
160         }
161
162         auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
163         if (activationNode) {
164             ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
165                                activationNode->getBeta());
166             continue;
167         }
168
169         auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
170         if (depthwiseNode) {
171             auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
172
173             if (initWeights) {
174                 MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
175
176                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
177                 PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
178
179                 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
180                                                              depthwiseLayer->_weights->buffer(),
181                                                              depthwiseLayer->_weights->size() *
182                                                              MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
183
184                 if (depthwiseNode->isBroadcast()) {
185                     float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
186                     for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
187                         static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
188                     }
189                 }
190
191                 if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
192                     PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
193                     PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
194                                                                 memory::format::x);
195                     PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
196                                                                  depthwiseLayer->_biases->buffer(),
197                                                                  depthwiseLayer->_biases->size() *
198                                                                  MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
199
200                     if (depthwiseNode->isBroadcast()) {
201                         float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
202                         for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
203                             static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
204                         }
205                     }
206
207                     ops.append_depthwise(depthwiseNode->getAlgorithm(),
208                                          (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
209                                          (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
210
211                     blob_idx += 2;
212                 } else {
213                     ops.append_depthwise(depthwiseNode->getAlgorithm(),
214                                          (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
215                                          nullptr);
216
217                     blob_idx += 1;
218                 }
219             } else {
220                 ops.append_depthwise(depthwiseNode->getAlgorithm(),
221                                      nullptr,
222                                      nullptr);
223             }
224
225             continue;
226         }
227
228         auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
229         if (quantizeNode) {
230             if (initWeights) {
231                 MKLDNNDims binarizationDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
232
233                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
234                 PostOpsIntBlobMemory[blob_idx]->Create(binarizationDims, memory::data_type::f32, memory::format::x);
235
236                 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
237                                                         &binarizationThresholds[0],
238                                                         binarizationThresholds.size() *
239                                                         MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
240
241                 ops.append_binarization(binarization_depthwise, (const float*)PostOpsIntBlobMemory[blob_idx]->GetData());
242
243                 blob_idx += 1;
244             } else {
245                 ops.append_binarization(binarization_depthwise, nullptr);
246             }
247         }
248
249         auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
250         if (convolutionNode) {
251             auto* convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
252
253             if (initWeights) {
254                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
255                 MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
256                 PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, memory::data_type::f32,
257                                                             memory::format::Goihw8g);
258
259                 PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::goihw,
260                                                              convLayer->_weights->buffer(),
261                                                              dwWeightsDims.size() *
262                                                              MKLDNNExtensionUtils::sizeOfDataType(
263                                                                      memory::data_type::f32));
264
265                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
266                 MKLDNNDims dwBiasesDims({dw_conv_oc});
267                 PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, memory::data_type::f32,
268                                                                 memory::format::x);
269                 PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
270                                                                  convLayer->_biases->buffer(),
271                                                                  dwBiasesDims.size() *
272                                                                  MKLDNNExtensionUtils::sizeOfDataType(
273                                                                          memory::data_type::f32));
274                 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
275                                    dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
276                                    (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
277                                    (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
278
279                 blob_idx += 2;
280             } else {
281                 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
282                                    dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
283                                    nullptr,
284                                    nullptr);
285             }
286             for (auto &dwConvFusedNode : convolutionNode->getFusedWith()) {
287                 auto* dwConvActivationNode = dynamic_cast<MKLDNNActivationNode *>(dwConvFusedNode.get());
288                 if (dwConvActivationNode) {
289                     ops.append_eltwise(1.0, dwConvActivationNode->getAlgorithm(), dwConvActivationNode->getAlpha(),
290                                        dwConvActivationNode->getBeta());
291                 }
292             }
293
294             continue;
295         }
296     }
297
298     attr.set_post_ops(ops);
299 }
300
301 void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
302     if (!supportedPrimitiveDescriptors.empty())
303         return;
304
305     mkldnn::primitive_attr attr;
306     setPostOps(attr);
307
308     for (auto& desc : descs) {
309         try {
310             primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
311             do {
312                 InferenceEngine::LayerConfig config;
313                 config.dynBatchSupport = true;
314                 for (size_t i = 0; i < desc.inputNumbers(); i++) {
315                     InferenceEngine::DataConfig dataConfig;
316                     dataConfig.inPlace = -1;
317                     dataConfig.constant = false;
318                     dataConfig.desc = getSrcMemDesc(itpd, i);
319                     if (!isGrouped)
320                         dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
321                     config.inConfs.push_back(dataConfig);
322                 }
323
324                 for (size_t i = 0; i < desc.outputNumbers(); i++) {
325                     InferenceEngine::DataConfig dataConfig;
326                     if (withSum) {
327                         dataConfig.inPlace = 1;
328                     }
329
330                     dataConfig.constant = false;
331                     dataConfig.desc = getDstMemDesc(itpd, i);
332                     if (!isGrouped)
333                         dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
334                     config.outConfs.push_back(dataConfig);
335
336                     if (withSum) {
337                         dataConfig.inPlace = -1;
338                         config.inConfs.push_back(dataConfig);
339                     }
340                 }
341                 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
342
343                 supportedPrimitiveDescriptors.emplace_back(config, impl_type);
344             } while (itpd.next());
345         } catch (std::exception& e) {
346             // it throw exception in case of no implementation found
347             continue;
348         }
349     }
350 }
351
352
353 void MKLDNNBinaryConvolutionNode::createPrimitive() {
354     if (prim)
355         return;
356
357     mkldnn::primitive_attr attr;
358     setPostOps(attr, true);
359
360     auto prim_desc = createPrimitiveDescriptor<binary_convolution_forward::primitive_desc,
361             binary_convolution_forward::desc>(attr);
362
363     prim.reset(new binary_convolution_forward(prim_desc,
364                                        getParentEdgeAt(0)->getMemory().GetPrimitive(),
365                                        internalBlobMemory[0]->GetPrimitive(),
366                                        getChildEdgeAt(0)->getMemory().GetPrimitive()));
367 }
368
369 bool MKLDNNBinaryConvolutionNode::created() const {
370     return getType() == BinaryConvolution;
371 }
372
373 void MKLDNNBinaryConvolutionNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
374                                                    const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
375     TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
376     mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
377
378     MKLDNNMemoryDesc in_candidate(inDesc);
379     MKLDNNMemoryDesc out_candidate(outDesc);
380
381     // grouping and autoblocking is not compatible
382     if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
383         return;
384
385     MKLDNNDims blocked_weightDims(weightDims);
386     MKLDNNDims blocked_biasesDims(biasesDims);
387     MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::any};
388
389     std::shared_ptr<mkldnn::binary_convolution_forward::desc> bin_conv_desc;
390     bin_conv_desc.reset(new binary_convolution_forward::desc(prop_kind::forward_scoring, algorithm::binary_convolution_direct,
391                                                              in_candidate, wgh_candidate, out_candidate, stride, dilation,
392                                                              paddingL, paddingR, pad_value));
393
394     descs.emplace_back(bin_conv_desc);
395 }
396
397 void MKLDNNBinaryConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) {
398     auto* selectedPD = getSelectedPrimitiveDescriptor();
399     if (!selectedPD) {
400         return;
401     }
402
403     createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc});
404
405     mkldnn::primitive_attr attr;
406     setPostOps(attr);
407
408     InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
409     size_t selected_count = 0;
410     for (size_t i = 0; i < descs.size(); i++) {
411         const auto& desc = descs[i];
412         try {
413             primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
414             do {
415                 InferenceEngine::LayerConfig cfg;
416                 cfg.dynBatchSupport = true;
417                 for (size_t j = 0; j < desc.inputNumbers(); j++) {
418                     InferenceEngine::DataConfig dataConfig;
419                     dataConfig.inPlace = -1;
420                     dataConfig.constant = false;
421                     dataConfig.desc = getSrcMemDesc(itpd, j);
422                     cfg.inConfs.push_back(dataConfig);
423                 }
424
425                 for (size_t j = 0; j < desc.outputNumbers(); j++) {
426                     InferenceEngine::DataConfig dataConfig;
427                     dataConfig.inPlace = -1;
428                     if (withSum) {
429                         cfg.inConfs.push_back(dataConfig);
430                         dataConfig.inPlace = 1;
431                     }
432                     dataConfig.constant = false;
433                     dataConfig.desc = getDstMemDesc(itpd, j);
434
435                     cfg.outConfs.push_back(dataConfig);
436                 }
437                 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
438
439                 if (selected_count == selectedPrimitiveDescriptorIndex) {
440                     if (impl_type != selectedPD->getImplementationType()) {
441                         THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
442                     }
443                     rightConfig = cfg;
444                 }
445                 if (i == descs.size() - 1) {
446                     if (impl_type == selectedPD->getImplementationType()) {
447                         rightConfig = config;
448                     }
449                 }
450                 selected_count++;
451             } while (itpd.next());
452         } catch (std::exception& e) {
453             continue;
454         }
455     }
456     selectedPD->getConfig() = rightConfig;
457 }
458
459 void MKLDNNBinaryConvolutionNode::pushBinarizationThreshold(float value) {
460     binarizationThresholds.push_back(value);
461 }