Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / mkldnn_plugin / mkldnn_node.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "mkldnn_node.h"
6 #include "mkldnn_extension_mngr.h"
7
8 #include "details/caseless.hpp"
9 #include <vector>
10 #include <string>
11 #include <limits>
12 #include <cstdint>
13 #include <unordered_map>
14
15 #include <nodes/mkldnn_batchnorm_node.h>
16 #include <nodes/mkldnn_concat_node.h>
17 #include <nodes/mkldnn_conv_node.h>
18 #include <nodes/mkldnn_crop_node.h>
19 #include <nodes/mkldnn_deconv_node.h>
20 #include <nodes/mkldnn_eltwise_node.h>
21 #include <nodes/mkldnn_gemm_node.h>
22 #include <nodes/mkldnn_fullyconnected_node.h>
23 #include <nodes/mkldnn_generic_node.h>
24 #include <nodes/mkldnn_input_node.h>
25 #include <nodes/mkldnn_lrn_node.h>
26 #include <nodes/mkldnn_pooling_node.h>
27 #include <nodes/mkldnn_power_node.h>
28 #include <nodes/mkldnn_activation_node.h>
29 #include <nodes/mkldnn_reorder_node.h>
30 #include <nodes/mkldnn_reshape_node.h>
31 #include <nodes/mkldnn_roi_pooling_node.h>
32 #include <nodes/mkldnn_depthwise_node.h>
33 #include <nodes/mkldnn_softmax_node.h>
34 #include <nodes/mkldnn_tile_node.h>
35 #include <nodes/mkldnn_split_node.h>
36 #include <nodes/mkldnn_permute_node.h>
37 #include <nodes/mkldnn_memory_node.hpp>
38 #include <nodes/mkldnn_rnn.h>
39 #include <nodes/mkldnn_quantize_node.h>
40 #include <nodes/mkldnn_bin_conv_node.h>
41 #include <mkldnn_types.h>
42 #include "mkldnn_extension_utils.h"
43 #include "mkldnn_plugin.h"
44 #include "ie_memcpy.h"
45
46 using namespace mkldnn;
47 using namespace MKLDNNPlugin;
48
49 using namespace InferenceEngine::details;
50
51 std::vector<MKLDNNNode::Registry::CreatorByLayerFunction> MKLDNNNode::Registry::_dataByLayer;
52
53 MKLDNNNode::Register<MKLDNNGenericNode> MKLDNNGenericNode::reg;
54 MKLDNNNode::Register<MKLDNNBatchNormalizationNode> MKLDNNBatchNormalizationNode::reg;
55 MKLDNNNode::Register<MKLDNNConcatNode> MKLDNNConcatNode::reg;
56 MKLDNNNode::Register<MKLDNNConvolutionNode> MKLDNNConvolutionNode::reg;
57 MKLDNNNode::Register<MKLDNNCropNode> MKLDNNCropNode::reg;
58 MKLDNNNode::Register<MKLDNNDeconvolutionNode> MKLDNNDeconvolutionNode::reg;
59 MKLDNNNode::Register<MKLDNNEltwiseNode> MKLDNNEltwiseNode::reg;
60 MKLDNNNode::Register<MKLDNNGemmNode> MKLDNNGemmNode::reg;
61 MKLDNNNode::Register<MKLDNNFullyConnectedNode> MKLDNNFullyConnectedNode::reg;
62 MKLDNNNode::Register<MKLDNNInputNode> MKLDNNInputNode::reg;
63 MKLDNNNode::Register<MKLDNNLrnNode> MKLDNNLrnNode::reg;
64 MKLDNNNode::Register<MKLDNNPoolingNode> MKLDNNPoolingNode::reg;
65 MKLDNNNode::Register<MKLDNNPowerNode> MKLDNNPowerNode::reg;
66 MKLDNNNode::Register<MKLDNNActivationNode> MKLDNNActivationNode::reg;
67 MKLDNNNode::Register<MKLDNNDepthwiseNode> MKLDNNDepthwiseNode::reg;
68 MKLDNNNode::Register<MKLDNNReorderNode> MKLDNNReorderNode::reg;
69 MKLDNNNode::Register<MKLDNNReshapeNode> MKLDNNReshapeNode::reg;
70 MKLDNNNode::Register<MKLDNNROIPoolingNode> MKLDNNROIPoolingNode::reg;
71 MKLDNNNode::Register<MKLDNNSoftMaxNode> MKLDNNSoftMaxNode::reg;
72 MKLDNNNode::Register<MKLDNNSplitNode> MKLDNNSplitNode::reg;
73 MKLDNNNode::Register<MKLDNNTileNode> MKLDNNTileNode::reg;
74 MKLDNNNode::Register<MKLDNNPermuteNode> MKLDNNPermuteNode::reg;
75 MKLDNNNode::Register<MKLDNNQuantizeNode> MKLDNNQuantizeNode::reg;
76 MKLDNNNode::Register<MKLDNNBinaryConvolutionNode> MKLDNNBinaryConvolutionNode::reg;
77 MKLDNNNode::Register<MKLDNNMemoryInputNode> MKLDNNMemoryInputNode::reg;
78 MKLDNNNode::Register<MKLDNNMemoryOutputNode> MKLDNNMemoryOutputNode::reg;
79 MKLDNNNode::Register<MKLDNNRNN> MKLDNNRNN::reg;
80
81 MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng)
82         : cnnLayer(layer), name(layer->name), typeStr(layer->type), type(TypeFromName(layer->type)), engine(eng),
83           selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
84           profilingTask(name) {
85     if (!layer->outData.empty()) {
86         for (const auto& outData : layer->outData) {
87             outDims.emplace_back(outData->getDims());
88         }
89     } else {
90         if (!(CaselessEq<std::string>()(layer->type, "memory") ||
91             CaselessEq<std::string>()(layer->type, "memoryinput") ||
92             CaselessEq<std::string>()(layer->type, "output") ||
93             CaselessEq<std::string>()(layer->type, "reorder"))) {
94             THROW_IE_EXCEPTION << "Inappropriate layer type: " << layer->type << " name: " << layer->name;
95         }
96     }
97
98     for (const auto& inData : layer->insData) {
99         inDims.emplace_back(inData.lock()->getDims());
100     }
101     if (layer->params.find("PrimitivesPriority") != layer->params.end()) {
102         std::istringstream stream(layer->params["PrimitivesPriority"]);
103         std::string str;
104         while (getline(stream, str, ',')) {
105             if (str.substr(0, 4) != "cpu:")
106                 continue;
107             implPriorities.push_back(parse_impl_name(str));
108             if (implPriorities[implPriorities.size() - 1] == impl_desc_type::unknown &&
109                     str != "cpu:unknown")
110                 THROW_IE_EXCEPTION << "Unsupported CPU implementation " << str << " for node " << getName();
111         }
112     }
113 }
114
115 void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge) {
116     auto edgePtr = edge.lock();
117     if (!edgePtr)
118         return;
119     auto parentPtr = edgePtr->getParent();
120     auto childPtr = edgePtr->getChild();
121     if (!parentPtr || !childPtr)
122         return;
123
124     parentPtr->childEdges.push_back(edge);
125     childPtr->parentEdges.push_back(edge);
126 }
127
128 void MKLDNNNode::removeEdge(const MKLDNNEdgeWeakPtr& edge) {
129     auto edgePtr = edge.lock();
130     if (!edgePtr)
131         return;
132     auto parentPtr = edgePtr->getParent();
133     auto childPtr = edgePtr->getChild();
134     if (!parentPtr || !childPtr)
135         return;
136     for (auto it = childPtr->parentEdges.begin(); it != childPtr->parentEdges.end(); it++) {
137         auto parentEdge = (*it).lock();
138         if (parentEdge && parentEdge->getChild() == childPtr && parentEdge->getParent() == parentPtr) {
139             childPtr->parentEdges.erase(it);
140             break;
141         }
142     }
143     for (auto it = parentPtr->childEdges.begin(); it != parentPtr->childEdges.end(); it++) {
144         auto childEdge = (*it).lock();
145         if (childEdge && childEdge->getChild() == childPtr && childEdge->getParent() == parentPtr) {
146             parentPtr->childEdges.erase(it);
147             break;
148         }
149     }
150 }
151
152 void MKLDNNNode::remove() {
153     auto parent_edges = parentEdges;
154     for (const auto &parentEdge : parent_edges) {
155         removeEdge(parentEdge);
156     }
157     auto child_edges = childEdges;
158     for (const auto &childEdge : child_edges) {
159         removeEdge(childEdge);
160     }
161 }
162
163 MKLDNNNode* MKLDNNNode::CreateNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
164                                    const MKLDNNExtensionManager::Ptr& extMgr) {
165     MKLDNNNode* newNode = Registry::CreateNode(layer, eng, extMgr);
166     if (!newNode)
167         THROW_IE_EXCEPTION << "Unsupported primitive of type: " << layer->type << " name: " << layer->name;
168
169     return newNode;
170 }
171
172 bool MKLDNNNode::isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const {
173     for (auto &edge : edges) {
174         if (edge.lock())
175             return false;
176     }
177     return true;
178 }
179
180 void MKLDNNNode::selectOptimalPrimitiveDescriptor() {
181     selectPreferPrimitiveDescriptor(getPrimitivesPriority());
182 }
183
184 void MKLDNNNode::selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& priority) {
185     for (auto& type : priority) {
186         int selectedPrimitive = -1;
187         int equalsFormatCount = -1;
188         for (size_t i = 0; i < getSupportedPrimitiveDescriptors().size(); i++) {
189             impl_desc_type supportedType = getSupportedPrimitiveDescriptors()[i].getImplementationType();
190             if (type == supportedType) {
191                 int equalsLocalFormatCount = 0;
192                 if (getSupportedPrimitiveDescriptors()[i].getConfig().inConfs.size() > getParentEdges().size())
193                     continue;
194                 for (size_t j = 0; j < getSupportedPrimitiveDescriptors()[i].getConfig().inConfs.size(); j++) {
195                     auto parentEdge = getParentEdgeAt(j);
196                     auto parentPtr = parentEdge->getParent();
197                     auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor();
198
199                     if (parent_spd != nullptr && !parent_spd->getConfig().outConfs.empty()) {
200                         int inNum = parentEdge->getInputNum();
201                         if (inNum < 0 || inNum >= parent_spd->getConfig().outConfs.size()) {
202                             inNum = 0;
203                         }
204                         if (MKLDNNExtensionUtils::initTensorsAreEqual(
205                                 getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[j].desc,
206                                 parent_spd->getConfig().outConfs[inNum].desc)) {
207                             equalsLocalFormatCount++;
208                         }
209                     }
210                 }
211                 if (equalsLocalFormatCount > equalsFormatCount) {
212                     equalsFormatCount = equalsLocalFormatCount;
213                     selectedPrimitive = static_cast<int>(i);
214                 }
215             }
216         }
217         if (selectedPrimitive >= 0) {
218             selectPrimitiveDescriptorByIndex(selectedPrimitive);
219             return;
220         }
221     }
222
223     if (getSupportedPrimitiveDescriptors().empty())
224         THROW_IE_EXCEPTION << "Supported primitive descriptors list is empty for node: " << getName();
225     // fallback. If there are no primitives from priority list just select a first
226     selectPrimitiveDescriptorByIndex(0);
227 }
228
229 bool MKLDNNNode::canBeInPlace() const {
230     if (getParentEdges().size() != 1 || getParentEdgeAt(0)->getParent()->getChildEdges().size() != 1 ||
231             (getParentEdgeAt(0)->getParent()->isConstant() && !getParentEdgeAt(0)->getChild()->isConstant()))
232         return false;
233
234     MKLDNNDims dims = getParentEdgeAt(0)->getDims();
235     for (size_t cIdx = 0; cIdx < getChildEdges().size(); cIdx++) {
236         if (getChildEdgeAt(cIdx)->getDims() != dims) {
237             return false;
238         }
239     }
240     return true;
241 }
242
243 void MKLDNNNode::resolveNotAllocatedEdges() {
244     const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor();
245     if (!selected_pd)
246         THROW_IE_EXCEPTION << "Cannot find selected primitive descriptor for node: " << getName();
247     for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) {
248         auto parentEdge = getParentEdgeAt(i);
249
250         if (parentEdge->getStatus() != MKLDNNEdge::Status::NotAllocated || selected_pd->getConfig().inConfs[i].inPlace < 0)
251             continue;
252
253         auto * memPtr = reinterpret_cast<char*>(parentEdge->getMemory().GetData());
254         parentEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine()));
255         parentEdge->getMemoryPtr()->Create(MKLDNNMemoryDesc(selected_pd->getConfig().inConfs[i].desc), memPtr);
256
257         parentEdge->changeStatus(MKLDNNEdge::Status::Allocated);
258     }
259     for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) {
260         auto childEdge = getChildEdgeAt(i);
261
262         if (childEdge->getStatus() != MKLDNNEdge::Status::NotAllocated || selected_pd->getConfig().outConfs[i].inPlace < 0)
263             continue;
264
265         auto * memPtr = reinterpret_cast<char*>(childEdge->getMemory().GetData());
266         childEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine()));
267         childEdge->getMemoryPtr()->Create(MKLDNNMemoryDesc(selected_pd->getConfig().outConfs[i].desc), memPtr);
268
269         childEdge->changeStatus(MKLDNNEdge::Status::Allocated);
270     }
271 }
272
273 std::string MKLDNNNode::getPrimitiveDescriptorType() {
274     auto selectedPrimitiveDesc = getSelectedPrimitiveDescriptor();
275
276     impl_desc_type type = impl_desc_type::undef;
277     if (selectedPrimitiveDesc) {
278         type = selectedPrimitiveDesc->getImplementationType();
279     }
280
281     std::string str_type;
282
283     auto add_type = [&](std::string t) {
284         if (!str_type.empty() && t.c_str()[0] != '_')
285             str_type += "_";
286         str_type += t;
287     };
288
289 #define SEARCH_TYPE(_type)                                          \
290     if ((type & impl_desc_type::_type) == impl_desc_type::_type)    \
291         add_type(#_type)
292
293     SEARCH_TYPE(undef);
294     SEARCH_TYPE(reorder);
295     SEARCH_TYPE(jit);
296     SEARCH_TYPE(gemm);
297     SEARCH_TYPE(ref);
298
299     SEARCH_TYPE(avx512);
300     SEARCH_TYPE(avx2);
301     SEARCH_TYPE(avx);
302     SEARCH_TYPE(sse42);
303     SEARCH_TYPE(blas);
304     SEARCH_TYPE(any);
305
306     SEARCH_TYPE(winograd);
307     SEARCH_TYPE(_dw);
308     SEARCH_TYPE(_1x1);
309
310     if (type == impl_desc_type::unknown)
311         str_type = "unknown";
312     else if (str_type.empty())
313         str_type = "undef";
314
315     // adding layer precision to the performance counters as one of the token
316     // currently we treat a layer executing in int8 mode if its input is I8 or U8. if input is U8, we still
317     // add I8 since I8 is special placeholder. The real calc precision might be quite complex and in most cases
318     // it is mixed precision.
319     if (selectedPrimitiveDesc) {
320         if (!selectedPrimitiveDesc->getConfig().inConfs.empty()) {
321             if (selectedPrimitiveDesc->getConfig().inConfs[0].desc.getPrecision() != InferenceEngine::Precision::U8) {
322                 str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().inConfs[0].desc.getPrecision().name());
323             } else {
324                 str_type += "_I8";
325             }
326         }
327     }
328
329     return str_type;
330 }
331
332 const MKLDNNEdgePtr MKLDNNNode::getParentEdgeAt(size_t idx) const {
333     if (idx >= parentEdges.size())
334         THROW_IE_EXCEPTION << "Node " << getName() << " contains less parent edges than " << idx;
335     auto parentEdgePtr = parentEdges[idx].lock();
336     if (!parentEdgePtr)
337         THROW_IE_EXCEPTION << "Node " << getName() << " contains empty parent edge for index " << idx;
338     return parentEdgePtr;
339 }
340
341 const MKLDNNEdgePtr MKLDNNNode::getChildEdgeAt(size_t idx) const {
342     if (idx >= childEdges.size())
343         THROW_IE_EXCEPTION << "Node " << getName() << " contains less child edges than " << idx;
344     auto childEdgePtr = childEdges[idx].lock();
345     if (!childEdgePtr)
346         THROW_IE_EXCEPTION << "Node " << getName() << " contains empty child edge for index " << idx;
347     return childEdgePtr;
348 }
349
350 const std::vector<MKLDNNEdgePtr> MKLDNNNode::getParentEdgesAtPort(size_t idx) const {
351     if (idx >= inDims.size())
352         THROW_IE_EXCEPTION << "Node " << getName() << " contains less input ports than " << idx;
353
354     std::vector<MKLDNNEdgePtr> res;
355     for (auto &edge_w : parentEdges) {
356         auto edge = edge_w.lock();
357         if (!edge)
358             THROW_IE_EXCEPTION << "Node " << getName() << " contains dead weak ptr";
359         if (edge->getOutputNum() == idx) res.push_back(edge);
360     }
361     return res;
362 }
363
364 const std::vector<MKLDNNEdgePtr> MKLDNNNode::getChildEdgesAtPort(size_t idx) const {
365     if (idx >= outDims.size())
366         THROW_IE_EXCEPTION << "Node " << getName() << " contains less output ports than " << idx;
367
368     std::vector<MKLDNNEdgePtr> res;
369     for (auto &edge_w : childEdges) {
370         auto edge = edge_w.lock();
371         if (!edge)
372             THROW_IE_EXCEPTION << "Node " << getName() << " contains dead weak ptr";
373         if (edge->getInputNum() == idx) res.push_back(edge);
374     }
375     return res;
376 }
377
378
379 std::vector<memory::format> MKLDNNNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const {
380     if (dims.ndims() == 1)
381         return {memory::format::x};
382     else if (dims.ndims() == 2)
383         return {memory::format::nc};
384     else if (dims.ndims() == 3)
385         return {memory::format::tnc, memory::format::ntc};
386     else if (dims.ndims() == 4)
387         return {memory::format::nchw, memory::format::nChw8c, memory::format::nChw16c};
388     else if (dims.ndims() == 5)
389         return {memory::format::ncdhw, memory::format::nCdhw8c, memory::format::nCdhw16c};
390     return {memory::format::any};
391 }
392
393 void MKLDNNNode::execute(mkldnn::stream strm) {
394     if (prim) {
395         strm.submit({*prim});
396     }
397 }
398
399 void MKLDNNNode::initSupportedPrimitiveDescriptors() {
400     if (!supportedPrimitiveDescriptors.empty())
401         return;
402
403     for (auto& desc : descs) {
404         try {
405             std::shared_ptr<primitive_desc_iterator> itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine));
406             do {
407                 InferenceEngine::LayerConfig config;
408                 config.dynBatchSupport = true;
409                 for (size_t i = 0; i < desc.inputNumbers(); i++) {
410                     InferenceEngine::DataConfig dataConfig;
411                     dataConfig.inPlace = -1;
412                     dataConfig.constant = false;
413                     dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(*itpd, i));
414                     config.inConfs.push_back(dataConfig);
415                 }
416
417                 for (size_t i = 0; i < desc.outputNumbers(); i++) {
418                     InferenceEngine::DataConfig dataConfig;
419                     dataConfig.inPlace = canBeInPlace() ? 0 : -1;
420                     dataConfig.constant = false;
421                     dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(*itpd, i));
422                     config.outConfs.push_back(dataConfig);
423                 }
424                 impl_desc_type impl_type = parse_impl_name(itpd->get_impl_info_str());
425
426                 supportedPrimitiveDescriptors.emplace_back(config, impl_type);
427             } while (itpd->next());
428         } catch (std::exception& e) {
429             // it throw exception in case of no implementation found
430             continue;
431         }
432     }
433 }
434
435 void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
436     auto* selectedPD = getSelectedPrimitiveDescriptor();
437     if (!selectedPD) {
438         return;
439     }
440     std::vector<InferenceEngine::TensorDesc> inDescs;
441     for (const auto& inConf : config.inConfs)
442         inDescs.push_back(inConf.desc);
443     std::vector<InferenceEngine::TensorDesc> outDescs;
444     for (const auto& outConf : config.outConfs)
445         outDescs.push_back(outConf.desc);
446     createDescriptor({inDescs}, {outDescs});
447
448     std::shared_ptr<mkldnn::primitive_attr> attr = initPrimitiveAttr();
449
450     InferenceEngine::LayerConfig rightConfig = getSelectedPrimitiveDescriptor()->getConfig();
451     size_t selected_count = 0;
452     for (size_t j = 0; j < descs.size(); j++) {
453         try {
454             const auto &desc = descs[j];
455             std::shared_ptr<primitive_desc_iterator> itpd;
456             if (attr == nullptr) {
457                 itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine));
458             } else {
459                 itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine, *(attr.get())));
460             }
461             do {
462                 InferenceEngine::LayerConfig cfg;
463                 cfg.dynBatchSupport = true;
464                 for (size_t i = 0; i < desc.inputNumbers(); i++) {
465                     InferenceEngine::DataConfig dataConfig;
466                     dataConfig.inPlace = canBeInPlace() ? 0 : -1;
467                     dataConfig.constant = false;
468                     dataConfig.desc = getSrcMemDesc(*itpd, i);
469                     cfg.inConfs.push_back(dataConfig);
470                 }
471
472                 for (size_t i = 0; i < desc.outputNumbers(); i++) {
473                     InferenceEngine::DataConfig dataConfig;
474                     dataConfig.inPlace = -1;
475                     dataConfig.constant = false;
476                     dataConfig.desc = getDstMemDesc(*itpd, i);
477                     cfg.outConfs.push_back(dataConfig);
478                 }
479                 impl_desc_type impl_type = parse_impl_name(itpd->get_impl_info_str().c_str());
480                 if (selected_count == selectedPrimitiveDescriptorIndex) {
481                     if (impl_type != selectedPD->getImplementationType()) {
482                         THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
483                     }
484                     rightConfig = cfg;
485                 }
486                 if (j == descs.size() - 1) {
487                     if (impl_type == selectedPD->getImplementationType()) {
488                         rightConfig = config;
489                     }
490                 }
491                 selected_count++;
492             } while (itpd->next());
493         } catch(...) {}
494     }
495
496     if (descs.empty()) {
497         const auto& selectedConfig = selectedPD->getConfig();
498         if (selectedConfig.inConfs.size() != config.inConfs.size() || selectedConfig.outConfs.size() != config.outConfs.size())
499             return;
500
501         for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) {
502             if (selectedConfig.inConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY &&
503                 !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.inConfs[i].desc, config.inConfs[i].desc))
504                 THROW_IE_EXCEPTION << "Incorrect descriptor for node: " << getName();
505         }
506
507         for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) {
508             if (selectedConfig.outConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY &&
509                 !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.outConfs[i].desc, config.outConfs[i].desc))
510                 THROW_IE_EXCEPTION << "Incorrect descriptor for node: " << getName();
511         }
512         rightConfig = config;
513     }
514
515     selectedPD->getConfig() = rightConfig;
516 }
517
518 InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeVector dims, bool weights) {
519     auto checkSize = [](size_t dst_size, size_t src_size) {
520         if (dst_size < src_size) {
521             THROW_IE_EXCEPTION << "Cannot create internal buffer. Buffer can be overrun.";
522         }
523     };
524     auto * wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(getCnnLayer().get());
525     if (wLayer == nullptr)
526         THROW_IE_EXCEPTION << "Cannot get weightable layer for node " << getName() << ".";
527
528     InferenceEngine::Blob::Ptr blb = weights ? wLayer->_weights : wLayer->_biases;
529
530     if (blb == nullptr)
531         THROW_IE_EXCEPTION << "Cannot get internal blob layer for node " << getName() << ".";
532
533     auto intLayout = InferenceEngine::TensorDesc::getLayoutByDims(dims);
534     if (intLayout == InferenceEngine::Layout::NCHW)
535         intLayout = InferenceEngine::Layout::OIHW;
536
537     InferenceEngine::TensorDesc desc(blb->precision(), dims, intLayout);
538
539     auto fillInternalBlob = [&](char *data, size_t intBuffSize) {
540         size_t offset = blb->byteSize();
541         checkSize(intBuffSize, offset);
542         ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
543         data += blb->byteSize();
544         for (const auto &merged : getMergeWith()) {
545             wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(merged->getCnnLayer().get());
546             if (wLayer == nullptr)
547                 THROW_IE_EXCEPTION << "Cannot convert merged weightable layer for node "
548                                    << getName() << ".";
549             blb = weights ? wLayer->_weights : wLayer->_biases;
550
551             if (blb == nullptr)
552                 THROW_IE_EXCEPTION << "Cannot get internal blob layer for node " << getName() << ".";
553             offset += blb->byteSize();
554             checkSize(intBuffSize, offset);
555             ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
556             data += blb->byteSize();
557         }
558     };
559
560     if (blb->precision() == Precision::BIN) {
561         InferenceEngine::TBlob<int8_t>::Ptr internalBlob = InferenceEngine::make_shared_blob<int8_t>(desc);
562
563         internalBlob->allocate();
564         char *data = internalBlob->buffer();
565         size_t intBuffSize = internalBlob->byteSize();
566
567         fillInternalBlob(data, intBuffSize);
568
569         return internalBlob;
570     } else {
571         InferenceEngine::TBlob<float>::Ptr internalBlob = InferenceEngine::make_shared_blob<float>(desc);
572
573         internalBlob->allocate();
574         char *data = internalBlob->buffer();
575         size_t intBuffSize = internalBlob->byteSize();
576
577         fillInternalBlob(data, intBuffSize);
578
579         return internalBlob;
580     }
581 }
582
583 void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
584     for (size_t i = 0; i < getChildEdges().size(); i++) {
585         auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
586         if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
587             THROW_IE_EXCEPTION << "Destination memory didn't allocate for node " << getName()
588                                << " to node " << getChildEdgeAt(i)->getChild()->getName() << ".";
589     }
590     for (size_t i = 0; i < getParentEdges().size(); i++) {
591         auto &srcMemPtr = getParentEdgeAt(i)->getMemoryPtr();
592         if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
593             THROW_IE_EXCEPTION << "Destination memory didn't allocate for node " << getName()
594                                << " from node " << getParentEdgeAt(i)->getParent()->getName() << ".";
595     }
596     std::vector<MKLDNNMemoryDesc> intDescs;
597     for (auto &it : internalBlobDesc)
598         intDescs.push_back(it(itpd, 0));
599
600     internalBlobMemory.clear();
601     for (size_t i = 0; i < internalBlobs.size(); i++) {
602         const auto &internalBlob = internalBlobs[i];
603
604         const uint64_t data_hash =  Engine::GetWeightsSharing().GetHashFunc().hash(internalBlob->buffer(), internalBlob->byteSize());
605         const std::string string_hash = name + "_" + std::to_string(i)
606                                      + "_" + std::to_string(internalBlob->byteSize())
607                                      + "_" + std::to_string(data_hash);
608         MKLDNNMemoryPtr ptr =
609                 Engine::GetWeightsSharing().findOrCreate(string_hash, [&] () {
610                     MKLDNNMemoryPtr _ptr = MKLDNNMemoryPtr(new MKLDNNMemory(engine));
611                     _ptr->Create(intDescs[i]);
612                     MKLDNNMemory memory(engine);
613
614                     auto newDesc = MKLDNNMemoryDesc(internalBlob->getTensorDesc());
615                     auto newFormat = newDesc.getFormat();
616                     if (newFormat == mkldnn::memory::ncdhw) {
617                         newFormat = mkldnn::memory::goihw;
618                     }
619                     if (newFormat == mkldnn::memory::nchw) {
620                         newFormat = mkldnn::memory::oihw;
621                     }
622                     memory.Create(MKLDNNMemoryDesc(newDesc.getDims(), newDesc.getDataType(), newFormat), internalBlob->buffer());
623                     auto aformat = memory.GetFormat();
624                     _ptr->SetData(memory);
625                     return _ptr;
626                 });
627         internalBlobMemory.push_back(ptr);
628     }
629 }
630
631 bool MKLDNNNode::isInplace() const {
632     auto config = getSelectedPrimitiveDescriptor()->getConfig();
633
634     for (auto &in : config.inConfs) if (in.inPlace >= 0) return true;
635     for (auto &out : config.outConfs) if (out.inPlace >= 0) return true;
636     return false;
637 }
638
639 bool MKLDNNNode::isConstant() {
640     if (constant == ConstantType::Unknown) {
641         std::vector<MKLDNNNodePtr> checkNodes;
642         for (size_t i = 0; i < getChildEdges().size(); i++) {
643             checkNodes.push_back(getChildEdgeAt(i)->getChild());
644         }
645         while (constant != ConstantType::NoConst && !checkNodes.empty()) {
646             constant = checkNodes.front()->checkConstant(LOOK_DOWN, checkNodes);
647             checkNodes.erase(checkNodes.begin());
648         }
649         if (constant != ConstantType::Const) {
650             constant = ConstantType::Unknown;
651             checkNodes.clear();
652             for (size_t i = 0; i < getParentEdges().size(); i++) {
653                 checkNodes.push_back(getParentEdgeAt(i)->getParent());
654             }
655             while (constant != ConstantType::NoConst && !checkNodes.empty()) {
656                 constant = checkNodes.front()->checkConstant(LOOK_UP, checkNodes);
657                 checkNodes.erase(checkNodes.begin());
658             }
659         }
660         if (constant == ConstantType::Unknown)
661             constant = ConstantType::NoConst;
662     }
663     return constant == ConstantType::Const;
664 }
665
666 MKLDNNNode::ConstantType MKLDNNNode::checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes) {
667     if (constant == ConstantType::Unknown) {
668         if (look == LOOK_DOWN) {
669             for (size_t i = 0; i < getChildEdges().size(); i++) {
670                 if (std::find(checkNodes.begin(), checkNodes.end(), getChildEdgeAt(i)->getChild()) == checkNodes.end())
671                     checkNodes.push_back(getChildEdgeAt(i)->getChild());
672             }
673         } else {
674             for (size_t i = 0; i < getParentEdges().size(); i++) {
675                 if (std::find(checkNodes.begin(), checkNodes.end(), getParentEdgeAt(i)->getParent()) == checkNodes.end())
676                     checkNodes.push_back(getParentEdgeAt(i)->getParent());
677             }
678         }
679     }
680     return constant;
681 }
682
683 void MKLDNNNode::addOriginalLayer(const InferenceEngine::CNNLayerPtr &layer) {
684     if (!layer) return;
685     if (originalLayers.empty()) {
686         originalLayers = layer->name;
687     } else {
688         originalLayers += "," + layer->name;
689     }
690 }
691
692 void MKLDNNNode::cleanup() {
693     internalBlobs.clear();
694     cnnLayer.reset();
695
696     for (auto it : fusedWith) {
697         it->cleanup();
698     }
699
700     for (auto it : mergedWith) {
701         it->cleanup();
702     }
703 }
704
705 std::string MKLDNNNode::typeToStr(Type type) {
706     switch (type) {
707         case Generic:
708             return "Generic";
709         case Reorder:
710             return "Reorder";
711         case Input:
712             return "Input";
713         case Output:
714             return "Output";
715         case Convolution:
716             return "Convolution";
717         case Deconvolution:
718             return "Deconvolution";
719         case Convolution_Sum:
720             return "Convolution_Sum";
721         case Convolution_Activation:
722             return "Convolution_Activation";
723         case Convolution_Sum_Activation:
724             return "Convolution_Sum_Activation";
725         case Activation:
726             return "Activation";
727         case Lrn:
728             return "Lrn";
729         case Pooling:
730             return "Pooling";
731         case FullyConnected:
732             return "FullyConnected";
733         case FullyConnected_Activation:
734             return "FullyConnected_Activation";
735         case Gemm:
736             return "Gemm";
737         case SoftMax:
738             return "SoftMax";
739         case Split:
740             return "Split";
741         case Concatenation:
742             return "Concatenation";
743         case Power:
744             return "Power";
745         case Depthwise:
746             return "Depthwise";
747         case Crop:
748             return "Crop";
749         case Reshape:
750             return "Reshape";
751         case Tile:
752             return "Tile";
753         case SimplerNMS:
754             return "SimplerNMS";
755         case ROIPooling:
756             return "ROIPooling";
757         case BatchNormalization:
758             return "BatchNormalization";
759         case Flatten:
760             return "Flatten";
761         case Permute:
762             return "Permute";
763         case Copy:
764             return "Copy";
765         case MemoryOutput:
766             return "MemoryOutput";
767         case MemoryInput:
768             return "MemoryInput";
769         case RNNSeq:
770             return "RNNSeq";
771         case RNNCell:
772             return "RNNCell";
773
774         default:
775             return "Unknown";
776     }
777 }
778
779 const std::vector<impl_desc_type>& MKLDNNNode::getPrimitivesPriority() {
780     std::vector<impl_desc_type> priorities = {
781             impl_desc_type::unknown,
782             impl_desc_type::jit_uni_dw,
783             impl_desc_type::jit_uni_1x1,
784             impl_desc_type::jit_uni,
785             impl_desc_type::jit_avx512_dw,
786             impl_desc_type::jit_avx512_1x1,
787             impl_desc_type::jit_avx512,
788             impl_desc_type::jit_avx2_dw,
789             impl_desc_type::jit_avx2_1x1,
790             impl_desc_type::jit_avx2,
791             impl_desc_type::jit_avx_dw,
792             impl_desc_type::jit_avx_1x1,
793             impl_desc_type::jit_avx,
794             impl_desc_type::jit_sse42_dw,
795             impl_desc_type::jit_sse42_1x1,
796             impl_desc_type::jit_sse42,
797             impl_desc_type::gemm_any,
798             impl_desc_type::gemm_blas,
799             impl_desc_type::gemm_avx512,
800             impl_desc_type::gemm_avx2,
801             impl_desc_type::gemm_avx,
802             impl_desc_type::gemm_sse42,
803             impl_desc_type::ref_any,
804             impl_desc_type::ref,
805     };
806     for (const auto& impl : priorities) {
807         if (std::find(implPriorities.begin(), implPriorities.end(), impl) == implPriorities.end())
808             implPriorities.push_back(impl);
809     }
810     return implPriorities;
811 }
812
813 bool MKLDNNNode::isUninitTensorDesc(const InferenceEngine::TensorDesc& desc) const {
814     if (desc.getLayout() == InferenceEngine::Layout::ANY)
815         return true;
816
817     if (desc.getBlockingDesc().getOffsetPadding() == std::numeric_limits<size_t>::max())
818         return true;
819
820     for (size_t i = 0; i < desc.getBlockingDesc().getOrder().size(); i++) {
821         if (desc.getBlockingDesc().getOffsetPaddingToData()[i] == std::numeric_limits<size_t>::max() ||
822                 desc.getBlockingDesc().getStrides()[i] == std::numeric_limits<size_t>::max())
823             return true;
824     }
825
826     return false;
827 }
828
829 InferenceEngine::TensorDesc MKLDNNNode::getConfiguredInputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const {
830     if (!isUninitTensorDesc(config.inConfs[idx].desc))
831         return config.inConfs[idx].desc;
832
833     int num = getParentEdgeAt(idx)->getInputNum();
834     auto *selectedPD = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor();
835     if (!selectedPD)
836         THROW_IE_EXCEPTION << "Cannot get selected primitive descriptor for node: " << getParentEdgeAt(idx)->getParent()->getName();
837
838     if (selectedPD->getConfig().outConfs.size() <= num)
839         num = 0;
840
841     if (config.inConfs[idx].inPlace >= 0) {
842         return getConfiguredOutputDesc(config, static_cast<size_t>(config.inConfs[idx].inPlace));
843     }
844
845     if (num >= 0) {
846         auto parentConf = selectedPD->getConfig().outConfs[num];
847         parentConf.desc.setPrecision(config.inConfs[idx].desc.getPrecision());
848         if (isUninitTensorDesc(parentConf.desc) && parentConf.inPlace >= 0)
849             getParentEdgeAt(idx)->getParent()->initOptimalPrimitiveDescriptor();
850         parentConf = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num];
851         if (!isUninitTensorDesc(parentConf.desc) &&
852             MKLDNNExtensionUtils::initTensorsAreEqual(parentConf.desc, config.inConfs[idx].desc)) {
853             return parentConf.desc;
854         }
855
856         if (config.inConfs[idx].desc.getLayout() == InferenceEngine::Layout::ANY &&
857             parentConf.desc.getLayout() != InferenceEngine::Layout::ANY) {
858             return InferenceEngine::TensorDesc(parentConf.desc.getPrecision(),
859                                                parentConf.desc.getDims(), {
860                                                        parentConf.desc.getBlockingDesc().getBlockDims(),
861                                                        parentConf.desc.getBlockingDesc().getOrder()
862                                                });
863         }
864     }
865
866     if (config.inConfs[idx].desc.getLayout() != InferenceEngine::Layout::ANY) {
867         return InferenceEngine::TensorDesc(config.inConfs[idx].desc.getPrecision(),
868                                            config.inConfs[idx].desc.getDims(), {
869                                                    config.inConfs[idx].desc.getBlockingDesc().getBlockDims(),
870                                                    config.inConfs[idx].desc.getBlockingDesc().getOrder()
871                                            });
872     }
873
874     return InferenceEngine::TensorDesc(config.inConfs[idx].desc.getPrecision(),
875                                        config.inConfs[idx].desc.getDims(),
876                                        InferenceEngine::TensorDesc::getLayoutByDims(config.inConfs[idx].desc.getDims()));
877 }
878
879 InferenceEngine::TensorDesc MKLDNNNode::getConfiguredOutputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const {
880     if (!isUninitTensorDesc(config.outConfs[idx].desc))
881         return config.outConfs[idx].desc;
882
883     int num = getChildEdgeAt(idx)->getOutputNum();
884     auto *selectedPD = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor();
885     if (!selectedPD)
886         THROW_IE_EXCEPTION << "Cannot get selected primitive descriptor for node: " << getChildEdgeAt(idx)->getChild()->getName();
887
888     if (selectedPD->getConfig().inConfs.size() <= num)
889         num = 0;
890
891     if (config.outConfs[idx].inPlace >= 0) {
892         return getConfiguredInputDesc(config, static_cast<size_t>(config.outConfs[idx].inPlace));
893     }
894
895     if (num >= 0) {
896         auto childConf = selectedPD->getConfig().inConfs[num];
897         childConf.desc.setPrecision(config.outConfs[idx].desc.getPrecision());
898         if (isUninitTensorDesc(childConf.desc) && childConf.inPlace >= 0)
899             getChildEdgeAt(idx)->getChild()->initOptimalPrimitiveDescriptor();
900         childConf = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num];
901         if (!isUninitTensorDesc(childConf.desc) &&
902             MKLDNNExtensionUtils::initTensorsAreEqual(childConf.desc, config.outConfs[idx].desc)) {
903             return childConf.desc;
904         }
905         if (config.outConfs[idx].desc.getLayout() == InferenceEngine::Layout::ANY &&
906             childConf.desc.getLayout() != InferenceEngine::Layout::ANY) {
907             return InferenceEngine::TensorDesc(childConf.desc.getPrecision(),
908                                                childConf.desc.getDims(), {
909                                                        childConf.desc.getBlockingDesc().getBlockDims(),
910                                                        childConf.desc.getBlockingDesc().getOrder()
911                                                });
912         }
913     }
914
915     if (config.outConfs[idx].desc.getLayout() != InferenceEngine::Layout::ANY) {
916         return InferenceEngine::TensorDesc(config.outConfs[idx].desc.getPrecision(),
917                                                                 config.outConfs[idx].desc.getDims(), {
918                                                                         config.outConfs[idx].desc.getBlockingDesc().getBlockDims(),
919                                                                         config.outConfs[idx].desc.getBlockingDesc().getOrder()
920                                                                 });
921     }
922
923     return InferenceEngine::TensorDesc(config.outConfs[idx].desc.getPrecision(),
924                                        config.outConfs[idx].desc.getDims(),
925                                        InferenceEngine::TensorDesc::getLayoutByDims(config.outConfs[idx].desc.getDims()));
926 }
927
928 void MKLDNNNode::initOptimalPrimitiveDescriptor() {
929     auto config = getSelectedPrimitiveDescriptor()->getConfig();
930     if (!isInitConfig(config)) {
931         for (size_t i = 0; i < config.inConfs.size(); i++) {
932             config.inConfs[i].desc = getConfiguredInputDesc(config, i);
933         }
934
935         for (size_t i = 0; i < config.outConfs.size(); i++) {
936             config.outConfs[i].desc = getConfiguredOutputDesc(config, i);
937         }
938         initDescriptor(config);
939     } else if (getType() != RNNSeq && getType() != RNNCell) {
940         initDescriptor(config);
941     }
942 }
943
944 bool MKLDNNNode::isInitConfig(const InferenceEngine::LayerConfig& config) const {
945     for (const auto& configs : {config.inConfs, config.outConfs}) {
946         for (const auto &dc : configs) {
947             if (isUninitTensorDesc(dc.desc))
948                 return false;
949         }
950     }
951     return true;
952 }
953
954 MKLDNNMemoryDesc MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
955     InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.src_primitive_desc(idx).desc());
956     if (desc.getLayout() == InferenceEngine::Layout::ANY)
957         return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
958                                                             getParentEdgeAt(idx)->getDims().ToSizeVector(),
959                                                             desc.getLayout()));
960     else
961         return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
962                                                             getParentEdgeAt(idx)->getDims().ToSizeVector(),
963                                                             desc.getBlockingDesc()));
964 }
965
966 MKLDNNMemoryDesc MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
967     InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_primitive_desc(idx).desc());
968     if (desc.getLayout() == InferenceEngine::Layout::ANY)
969         return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
970                                                             getChildEdgeAt(idx)->getDims().ToSizeVector(),
971                                                             desc.getLayout()));
972     else
973         return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
974                                                             getChildEdgeAt(idx)->getDims().ToSizeVector(),
975                                                             desc.getBlockingDesc()));
976 }
977
978 int MKLDNNNode::batchToProcess() {
979     return dynBatchLim == 0 ? getMaxBatch() : std::min<int>(getMaxBatch(), dynBatchLim);
980 }
981
982 int MKLDNNNode::getMaxBatch() {
983     // FIXME: batch != 0 dims number
984     if (!inDims.empty())
985         return inDims[0][0];
986     if (!outDims.empty())
987         return outDims[0][0];
988     return 0;
989 }
990
991 void MKLDNNNode::setDynamicBatchLim(int lim) {
992     dynBatchLim = lim;
993     if (prim) {
994         prim.setBatchLimit(batchToProcess(), getParentEdges().size(), getChildEdges().size());
995     }
996 }
997
998 MKLDNNNode *MKLDNNNode::Registry::CreateNode(const InferenceEngine::CNNLayerPtr &layer, const mkldnn::engine& eng,
999                                              const MKLDNNExtensionManager::Ptr& extMgr) {
1000     for (auto maker : _dataByLayer) {
1001         std::unique_ptr<MKLDNNNode> ol(maker(layer, eng));
1002         if (ol != nullptr && ol->created(extMgr))
1003             return ol.release();
1004     }
1005     return nullptr;
1006 }
1007
1008 void MKLDNNNode::Registry::RegisterNode(MKLDNNNode::Registry::CreatorByLayerFunction f) {
1009     _dataByLayer.push_back(f);
1010 }