1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "mkldnn_node.h"
6 #include "mkldnn_extension_mngr.h"
8 #include "details/caseless.hpp"
13 #include <unordered_map>
15 #include <nodes/mkldnn_batchnorm_node.h>
16 #include <nodes/mkldnn_concat_node.h>
17 #include <nodes/mkldnn_conv_node.h>
18 #include <nodes/mkldnn_crop_node.h>
19 #include <nodes/mkldnn_deconv_node.h>
20 #include <nodes/mkldnn_eltwise_node.h>
21 #include <nodes/mkldnn_gemm_node.h>
22 #include <nodes/mkldnn_fullyconnected_node.h>
23 #include <nodes/mkldnn_generic_node.h>
24 #include <nodes/mkldnn_input_node.h>
25 #include <nodes/mkldnn_lrn_node.h>
26 #include <nodes/mkldnn_pooling_node.h>
27 #include <nodes/mkldnn_power_node.h>
28 #include <nodes/mkldnn_activation_node.h>
29 #include <nodes/mkldnn_reorder_node.h>
30 #include <nodes/mkldnn_reshape_node.h>
31 #include <nodes/mkldnn_roi_pooling_node.h>
32 #include <nodes/mkldnn_depthwise_node.h>
33 #include <nodes/mkldnn_softmax_node.h>
34 #include <nodes/mkldnn_tile_node.h>
35 #include <nodes/mkldnn_split_node.h>
36 #include <nodes/mkldnn_permute_node.h>
37 #include <nodes/mkldnn_memory_node.hpp>
38 #include <nodes/mkldnn_rnn.h>
39 #include <nodes/mkldnn_quantize_node.h>
40 #include <nodes/mkldnn_bin_conv_node.h>
41 #include <mkldnn_types.h>
42 #include "mkldnn_extension_utils.h"
43 #include "mkldnn_plugin.h"
44 #include "ie_memcpy.h"
46 using namespace mkldnn;
47 using namespace MKLDNNPlugin;
49 using namespace InferenceEngine::details;
51 std::vector<MKLDNNNode::Registry::CreatorByLayerFunction> MKLDNNNode::Registry::_dataByLayer;
53 MKLDNNNode::Register<MKLDNNGenericNode> MKLDNNGenericNode::reg;
54 MKLDNNNode::Register<MKLDNNBatchNormalizationNode> MKLDNNBatchNormalizationNode::reg;
55 MKLDNNNode::Register<MKLDNNConcatNode> MKLDNNConcatNode::reg;
56 MKLDNNNode::Register<MKLDNNConvolutionNode> MKLDNNConvolutionNode::reg;
57 MKLDNNNode::Register<MKLDNNCropNode> MKLDNNCropNode::reg;
58 MKLDNNNode::Register<MKLDNNDeconvolutionNode> MKLDNNDeconvolutionNode::reg;
59 MKLDNNNode::Register<MKLDNNEltwiseNode> MKLDNNEltwiseNode::reg;
60 MKLDNNNode::Register<MKLDNNGemmNode> MKLDNNGemmNode::reg;
61 MKLDNNNode::Register<MKLDNNFullyConnectedNode> MKLDNNFullyConnectedNode::reg;
62 MKLDNNNode::Register<MKLDNNInputNode> MKLDNNInputNode::reg;
63 MKLDNNNode::Register<MKLDNNLrnNode> MKLDNNLrnNode::reg;
64 MKLDNNNode::Register<MKLDNNPoolingNode> MKLDNNPoolingNode::reg;
65 MKLDNNNode::Register<MKLDNNPowerNode> MKLDNNPowerNode::reg;
66 MKLDNNNode::Register<MKLDNNActivationNode> MKLDNNActivationNode::reg;
67 MKLDNNNode::Register<MKLDNNDepthwiseNode> MKLDNNDepthwiseNode::reg;
68 MKLDNNNode::Register<MKLDNNReorderNode> MKLDNNReorderNode::reg;
69 MKLDNNNode::Register<MKLDNNReshapeNode> MKLDNNReshapeNode::reg;
70 MKLDNNNode::Register<MKLDNNROIPoolingNode> MKLDNNROIPoolingNode::reg;
71 MKLDNNNode::Register<MKLDNNSoftMaxNode> MKLDNNSoftMaxNode::reg;
72 MKLDNNNode::Register<MKLDNNSplitNode> MKLDNNSplitNode::reg;
73 MKLDNNNode::Register<MKLDNNTileNode> MKLDNNTileNode::reg;
74 MKLDNNNode::Register<MKLDNNPermuteNode> MKLDNNPermuteNode::reg;
75 MKLDNNNode::Register<MKLDNNQuantizeNode> MKLDNNQuantizeNode::reg;
76 MKLDNNNode::Register<MKLDNNBinaryConvolutionNode> MKLDNNBinaryConvolutionNode::reg;
77 MKLDNNNode::Register<MKLDNNMemoryInputNode> MKLDNNMemoryInputNode::reg;
78 MKLDNNNode::Register<MKLDNNMemoryOutputNode> MKLDNNMemoryOutputNode::reg;
79 MKLDNNNode::Register<MKLDNNRNN> MKLDNNRNN::reg;
81 MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng)
82 : cnnLayer(layer), name(layer->name), typeStr(layer->type), type(TypeFromName(layer->type)), engine(eng),
83 selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
85 if (!layer->outData.empty()) {
86 for (const auto& outData : layer->outData) {
87 outDims.emplace_back(outData->getDims());
90 if (!(CaselessEq<std::string>()(layer->type, "memory") ||
91 CaselessEq<std::string>()(layer->type, "memoryinput") ||
92 CaselessEq<std::string>()(layer->type, "output") ||
93 CaselessEq<std::string>()(layer->type, "reorder"))) {
94 THROW_IE_EXCEPTION << "Inappropriate layer type: " << layer->type << " name: " << layer->name;
98 for (const auto& inData : layer->insData) {
99 inDims.emplace_back(inData.lock()->getDims());
101 if (layer->params.find("PrimitivesPriority") != layer->params.end()) {
102 std::istringstream stream(layer->params["PrimitivesPriority"]);
104 while (getline(stream, str, ',')) {
105 if (str.substr(0, 4) != "cpu:")
107 implPriorities.push_back(parse_impl_name(str));
108 if (implPriorities[implPriorities.size() - 1] == impl_desc_type::unknown &&
109 str != "cpu:unknown")
110 THROW_IE_EXCEPTION << "Unsupported CPU implementation " << str << " for node " << getName();
115 void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge) {
116 auto edgePtr = edge.lock();
119 auto parentPtr = edgePtr->getParent();
120 auto childPtr = edgePtr->getChild();
121 if (!parentPtr || !childPtr)
124 parentPtr->childEdges.push_back(edge);
125 childPtr->parentEdges.push_back(edge);
128 void MKLDNNNode::removeEdge(const MKLDNNEdgeWeakPtr& edge) {
129 auto edgePtr = edge.lock();
132 auto parentPtr = edgePtr->getParent();
133 auto childPtr = edgePtr->getChild();
134 if (!parentPtr || !childPtr)
136 for (auto it = childPtr->parentEdges.begin(); it != childPtr->parentEdges.end(); it++) {
137 auto parentEdge = (*it).lock();
138 if (parentEdge && parentEdge->getChild() == childPtr && parentEdge->getParent() == parentPtr) {
139 childPtr->parentEdges.erase(it);
143 for (auto it = parentPtr->childEdges.begin(); it != parentPtr->childEdges.end(); it++) {
144 auto childEdge = (*it).lock();
145 if (childEdge && childEdge->getChild() == childPtr && childEdge->getParent() == parentPtr) {
146 parentPtr->childEdges.erase(it);
152 void MKLDNNNode::remove() {
153 auto parent_edges = parentEdges;
154 for (const auto &parentEdge : parent_edges) {
155 removeEdge(parentEdge);
157 auto child_edges = childEdges;
158 for (const auto &childEdge : child_edges) {
159 removeEdge(childEdge);
163 MKLDNNNode* MKLDNNNode::CreateNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
164 const MKLDNNExtensionManager::Ptr& extMgr) {
165 MKLDNNNode* newNode = Registry::CreateNode(layer, eng, extMgr);
167 THROW_IE_EXCEPTION << "Unsupported primitive of type: " << layer->type << " name: " << layer->name;
172 bool MKLDNNNode::isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const {
173 for (auto &edge : edges) {
180 void MKLDNNNode::selectOptimalPrimitiveDescriptor() {
181 selectPreferPrimitiveDescriptor(getPrimitivesPriority());
184 void MKLDNNNode::selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& priority) {
185 for (auto& type : priority) {
186 int selectedPrimitive = -1;
187 int equalsFormatCount = -1;
188 for (size_t i = 0; i < getSupportedPrimitiveDescriptors().size(); i++) {
189 impl_desc_type supportedType = getSupportedPrimitiveDescriptors()[i].getImplementationType();
190 if (type == supportedType) {
191 int equalsLocalFormatCount = 0;
192 if (getSupportedPrimitiveDescriptors()[i].getConfig().inConfs.size() > getParentEdges().size())
194 for (size_t j = 0; j < getSupportedPrimitiveDescriptors()[i].getConfig().inConfs.size(); j++) {
195 auto parentEdge = getParentEdgeAt(j);
196 auto parentPtr = parentEdge->getParent();
197 auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor();
199 if (parent_spd != nullptr && !parent_spd->getConfig().outConfs.empty()) {
200 int inNum = parentEdge->getInputNum();
201 if (inNum < 0 || inNum >= parent_spd->getConfig().outConfs.size()) {
204 if (MKLDNNExtensionUtils::initTensorsAreEqual(
205 getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[j].desc,
206 parent_spd->getConfig().outConfs[inNum].desc)) {
207 equalsLocalFormatCount++;
211 if (equalsLocalFormatCount > equalsFormatCount) {
212 equalsFormatCount = equalsLocalFormatCount;
213 selectedPrimitive = static_cast<int>(i);
217 if (selectedPrimitive >= 0) {
218 selectPrimitiveDescriptorByIndex(selectedPrimitive);
223 if (getSupportedPrimitiveDescriptors().empty())
224 THROW_IE_EXCEPTION << "Supported primitive descriptors list is empty for node: " << getName();
225 // fallback. If there are no primitives from priority list just select a first
226 selectPrimitiveDescriptorByIndex(0);
229 bool MKLDNNNode::canBeInPlace() const {
230 if (getParentEdges().size() != 1 || getParentEdgeAt(0)->getParent()->getChildEdges().size() != 1 ||
231 (getParentEdgeAt(0)->getParent()->isConstant() && !getParentEdgeAt(0)->getChild()->isConstant()))
234 MKLDNNDims dims = getParentEdgeAt(0)->getDims();
235 for (size_t cIdx = 0; cIdx < getChildEdges().size(); cIdx++) {
236 if (getChildEdgeAt(cIdx)->getDims() != dims) {
243 void MKLDNNNode::resolveNotAllocatedEdges() {
244 const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor();
246 THROW_IE_EXCEPTION << "Cannot find selected primitive descriptor for node: " << getName();
247 for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) {
248 auto parentEdge = getParentEdgeAt(i);
250 if (parentEdge->getStatus() != MKLDNNEdge::Status::NotAllocated || selected_pd->getConfig().inConfs[i].inPlace < 0)
253 auto * memPtr = reinterpret_cast<char*>(parentEdge->getMemory().GetData());
254 parentEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine()));
255 parentEdge->getMemoryPtr()->Create(MKLDNNMemoryDesc(selected_pd->getConfig().inConfs[i].desc), memPtr);
257 parentEdge->changeStatus(MKLDNNEdge::Status::Allocated);
259 for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) {
260 auto childEdge = getChildEdgeAt(i);
262 if (childEdge->getStatus() != MKLDNNEdge::Status::NotAllocated || selected_pd->getConfig().outConfs[i].inPlace < 0)
265 auto * memPtr = reinterpret_cast<char*>(childEdge->getMemory().GetData());
266 childEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine()));
267 childEdge->getMemoryPtr()->Create(MKLDNNMemoryDesc(selected_pd->getConfig().outConfs[i].desc), memPtr);
269 childEdge->changeStatus(MKLDNNEdge::Status::Allocated);
273 std::string MKLDNNNode::getPrimitiveDescriptorType() {
274 auto selectedPrimitiveDesc = getSelectedPrimitiveDescriptor();
276 impl_desc_type type = impl_desc_type::undef;
277 if (selectedPrimitiveDesc) {
278 type = selectedPrimitiveDesc->getImplementationType();
281 std::string str_type;
283 auto add_type = [&](std::string t) {
284 if (!str_type.empty() && t.c_str()[0] != '_')
289 #define SEARCH_TYPE(_type) \
290 if ((type & impl_desc_type::_type) == impl_desc_type::_type) \
294 SEARCH_TYPE(reorder);
306 SEARCH_TYPE(winograd);
310 if (type == impl_desc_type::unknown)
311 str_type = "unknown";
312 else if (str_type.empty())
315 // adding layer precision to the performance counters as one of the token
316 // currently we treat a layer executing in int8 mode if its input is I8 or U8. if input is U8, we still
317 // add I8 since I8 is special placeholder. The real calc precision might be quite complex and in most cases
318 // it is mixed precision.
319 if (selectedPrimitiveDesc) {
320 if (!selectedPrimitiveDesc->getConfig().inConfs.empty()) {
321 if (selectedPrimitiveDesc->getConfig().inConfs[0].desc.getPrecision() != InferenceEngine::Precision::U8) {
322 str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().inConfs[0].desc.getPrecision().name());
332 const MKLDNNEdgePtr MKLDNNNode::getParentEdgeAt(size_t idx) const {
333 if (idx >= parentEdges.size())
334 THROW_IE_EXCEPTION << "Node " << getName() << " contains less parent edges than " << idx;
335 auto parentEdgePtr = parentEdges[idx].lock();
337 THROW_IE_EXCEPTION << "Node " << getName() << " contains empty parent edge for index " << idx;
338 return parentEdgePtr;
341 const MKLDNNEdgePtr MKLDNNNode::getChildEdgeAt(size_t idx) const {
342 if (idx >= childEdges.size())
343 THROW_IE_EXCEPTION << "Node " << getName() << " contains less child edges than " << idx;
344 auto childEdgePtr = childEdges[idx].lock();
346 THROW_IE_EXCEPTION << "Node " << getName() << " contains empty child edge for index " << idx;
350 const std::vector<MKLDNNEdgePtr> MKLDNNNode::getParentEdgesAtPort(size_t idx) const {
351 if (idx >= inDims.size())
352 THROW_IE_EXCEPTION << "Node " << getName() << " contains less input ports than " << idx;
354 std::vector<MKLDNNEdgePtr> res;
355 for (auto &edge_w : parentEdges) {
356 auto edge = edge_w.lock();
358 THROW_IE_EXCEPTION << "Node " << getName() << " contains dead weak ptr";
359 if (edge->getOutputNum() == idx) res.push_back(edge);
364 const std::vector<MKLDNNEdgePtr> MKLDNNNode::getChildEdgesAtPort(size_t idx) const {
365 if (idx >= outDims.size())
366 THROW_IE_EXCEPTION << "Node " << getName() << " contains less output ports than " << idx;
368 std::vector<MKLDNNEdgePtr> res;
369 for (auto &edge_w : childEdges) {
370 auto edge = edge_w.lock();
372 THROW_IE_EXCEPTION << "Node " << getName() << " contains dead weak ptr";
373 if (edge->getInputNum() == idx) res.push_back(edge);
379 std::vector<memory::format> MKLDNNNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const {
380 if (dims.ndims() == 1)
381 return {memory::format::x};
382 else if (dims.ndims() == 2)
383 return {memory::format::nc};
384 else if (dims.ndims() == 3)
385 return {memory::format::tnc, memory::format::ntc};
386 else if (dims.ndims() == 4)
387 return {memory::format::nchw, memory::format::nChw8c, memory::format::nChw16c};
388 else if (dims.ndims() == 5)
389 return {memory::format::ncdhw, memory::format::nCdhw8c, memory::format::nCdhw16c};
390 return {memory::format::any};
393 void MKLDNNNode::execute(mkldnn::stream strm) {
395 strm.submit({*prim});
399 void MKLDNNNode::initSupportedPrimitiveDescriptors() {
400 if (!supportedPrimitiveDescriptors.empty())
403 for (auto& desc : descs) {
405 std::shared_ptr<primitive_desc_iterator> itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine));
407 InferenceEngine::LayerConfig config;
408 config.dynBatchSupport = true;
409 for (size_t i = 0; i < desc.inputNumbers(); i++) {
410 InferenceEngine::DataConfig dataConfig;
411 dataConfig.inPlace = -1;
412 dataConfig.constant = false;
413 dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(*itpd, i));
414 config.inConfs.push_back(dataConfig);
417 for (size_t i = 0; i < desc.outputNumbers(); i++) {
418 InferenceEngine::DataConfig dataConfig;
419 dataConfig.inPlace = canBeInPlace() ? 0 : -1;
420 dataConfig.constant = false;
421 dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(*itpd, i));
422 config.outConfs.push_back(dataConfig);
424 impl_desc_type impl_type = parse_impl_name(itpd->get_impl_info_str());
426 supportedPrimitiveDescriptors.emplace_back(config, impl_type);
427 } while (itpd->next());
428 } catch (std::exception& e) {
429 // it throw exception in case of no implementation found
435 void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
436 auto* selectedPD = getSelectedPrimitiveDescriptor();
440 std::vector<InferenceEngine::TensorDesc> inDescs;
441 for (const auto& inConf : config.inConfs)
442 inDescs.push_back(inConf.desc);
443 std::vector<InferenceEngine::TensorDesc> outDescs;
444 for (const auto& outConf : config.outConfs)
445 outDescs.push_back(outConf.desc);
446 createDescriptor({inDescs}, {outDescs});
448 std::shared_ptr<mkldnn::primitive_attr> attr = initPrimitiveAttr();
450 InferenceEngine::LayerConfig rightConfig = getSelectedPrimitiveDescriptor()->getConfig();
451 size_t selected_count = 0;
452 for (size_t j = 0; j < descs.size(); j++) {
454 const auto &desc = descs[j];
455 std::shared_ptr<primitive_desc_iterator> itpd;
456 if (attr == nullptr) {
457 itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine));
459 itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine, *(attr.get())));
462 InferenceEngine::LayerConfig cfg;
463 cfg.dynBatchSupport = true;
464 for (size_t i = 0; i < desc.inputNumbers(); i++) {
465 InferenceEngine::DataConfig dataConfig;
466 dataConfig.inPlace = canBeInPlace() ? 0 : -1;
467 dataConfig.constant = false;
468 dataConfig.desc = getSrcMemDesc(*itpd, i);
469 cfg.inConfs.push_back(dataConfig);
472 for (size_t i = 0; i < desc.outputNumbers(); i++) {
473 InferenceEngine::DataConfig dataConfig;
474 dataConfig.inPlace = -1;
475 dataConfig.constant = false;
476 dataConfig.desc = getDstMemDesc(*itpd, i);
477 cfg.outConfs.push_back(dataConfig);
479 impl_desc_type impl_type = parse_impl_name(itpd->get_impl_info_str().c_str());
480 if (selected_count == selectedPrimitiveDescriptorIndex) {
481 if (impl_type != selectedPD->getImplementationType()) {
482 THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
486 if (j == descs.size() - 1) {
487 if (impl_type == selectedPD->getImplementationType()) {
488 rightConfig = config;
492 } while (itpd->next());
497 const auto& selectedConfig = selectedPD->getConfig();
498 if (selectedConfig.inConfs.size() != config.inConfs.size() || selectedConfig.outConfs.size() != config.outConfs.size())
501 for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) {
502 if (selectedConfig.inConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY &&
503 !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.inConfs[i].desc, config.inConfs[i].desc))
504 THROW_IE_EXCEPTION << "Incorrect descriptor for node: " << getName();
507 for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) {
508 if (selectedConfig.outConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY &&
509 !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.outConfs[i].desc, config.outConfs[i].desc))
510 THROW_IE_EXCEPTION << "Incorrect descriptor for node: " << getName();
512 rightConfig = config;
515 selectedPD->getConfig() = rightConfig;
518 InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeVector dims, bool weights) {
519 auto checkSize = [](size_t dst_size, size_t src_size) {
520 if (dst_size < src_size) {
521 THROW_IE_EXCEPTION << "Cannot create internal buffer. Buffer can be overrun.";
524 auto * wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(getCnnLayer().get());
525 if (wLayer == nullptr)
526 THROW_IE_EXCEPTION << "Cannot get weightable layer for node " << getName() << ".";
528 InferenceEngine::Blob::Ptr blb = weights ? wLayer->_weights : wLayer->_biases;
531 THROW_IE_EXCEPTION << "Cannot get internal blob layer for node " << getName() << ".";
533 auto intLayout = InferenceEngine::TensorDesc::getLayoutByDims(dims);
534 if (intLayout == InferenceEngine::Layout::NCHW)
535 intLayout = InferenceEngine::Layout::OIHW;
537 InferenceEngine::TensorDesc desc(blb->precision(), dims, intLayout);
539 auto fillInternalBlob = [&](char *data, size_t intBuffSize) {
540 size_t offset = blb->byteSize();
541 checkSize(intBuffSize, offset);
542 ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
543 data += blb->byteSize();
544 for (const auto &merged : getMergeWith()) {
545 wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(merged->getCnnLayer().get());
546 if (wLayer == nullptr)
547 THROW_IE_EXCEPTION << "Cannot convert merged weightable layer for node "
549 blb = weights ? wLayer->_weights : wLayer->_biases;
552 THROW_IE_EXCEPTION << "Cannot get internal blob layer for node " << getName() << ".";
553 offset += blb->byteSize();
554 checkSize(intBuffSize, offset);
555 ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
556 data += blb->byteSize();
560 if (blb->precision() == Precision::BIN) {
561 InferenceEngine::TBlob<int8_t>::Ptr internalBlob = InferenceEngine::make_shared_blob<int8_t>(desc);
563 internalBlob->allocate();
564 char *data = internalBlob->buffer();
565 size_t intBuffSize = internalBlob->byteSize();
567 fillInternalBlob(data, intBuffSize);
571 InferenceEngine::TBlob<float>::Ptr internalBlob = InferenceEngine::make_shared_blob<float>(desc);
573 internalBlob->allocate();
574 char *data = internalBlob->buffer();
575 size_t intBuffSize = internalBlob->byteSize();
577 fillInternalBlob(data, intBuffSize);
583 void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
584 for (size_t i = 0; i < getChildEdges().size(); i++) {
585 auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
586 if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
587 THROW_IE_EXCEPTION << "Destination memory didn't allocate for node " << getName()
588 << " to node " << getChildEdgeAt(i)->getChild()->getName() << ".";
590 for (size_t i = 0; i < getParentEdges().size(); i++) {
591 auto &srcMemPtr = getParentEdgeAt(i)->getMemoryPtr();
592 if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
593 THROW_IE_EXCEPTION << "Destination memory didn't allocate for node " << getName()
594 << " from node " << getParentEdgeAt(i)->getParent()->getName() << ".";
596 std::vector<MKLDNNMemoryDesc> intDescs;
597 for (auto &it : internalBlobDesc)
598 intDescs.push_back(it(itpd, 0));
600 internalBlobMemory.clear();
601 for (size_t i = 0; i < internalBlobs.size(); i++) {
602 const auto &internalBlob = internalBlobs[i];
604 const uint64_t data_hash = Engine::GetWeightsSharing().GetHashFunc().hash(internalBlob->buffer(), internalBlob->byteSize());
605 const std::string string_hash = name + "_" + std::to_string(i)
606 + "_" + std::to_string(internalBlob->byteSize())
607 + "_" + std::to_string(data_hash);
608 MKLDNNMemoryPtr ptr =
609 Engine::GetWeightsSharing().findOrCreate(string_hash, [&] () {
610 MKLDNNMemoryPtr _ptr = MKLDNNMemoryPtr(new MKLDNNMemory(engine));
611 _ptr->Create(intDescs[i]);
612 MKLDNNMemory memory(engine);
614 auto newDesc = MKLDNNMemoryDesc(internalBlob->getTensorDesc());
615 auto newFormat = newDesc.getFormat();
616 if (newFormat == mkldnn::memory::ncdhw) {
617 newFormat = mkldnn::memory::goihw;
619 if (newFormat == mkldnn::memory::nchw) {
620 newFormat = mkldnn::memory::oihw;
622 memory.Create(MKLDNNMemoryDesc(newDesc.getDims(), newDesc.getDataType(), newFormat), internalBlob->buffer());
623 auto aformat = memory.GetFormat();
624 _ptr->SetData(memory);
627 internalBlobMemory.push_back(ptr);
631 bool MKLDNNNode::isInplace() const {
632 auto config = getSelectedPrimitiveDescriptor()->getConfig();
634 for (auto &in : config.inConfs) if (in.inPlace >= 0) return true;
635 for (auto &out : config.outConfs) if (out.inPlace >= 0) return true;
639 bool MKLDNNNode::isConstant() {
640 if (constant == ConstantType::Unknown) {
641 std::vector<MKLDNNNodePtr> checkNodes;
642 for (size_t i = 0; i < getChildEdges().size(); i++) {
643 checkNodes.push_back(getChildEdgeAt(i)->getChild());
645 while (constant != ConstantType::NoConst && !checkNodes.empty()) {
646 constant = checkNodes.front()->checkConstant(LOOK_DOWN, checkNodes);
647 checkNodes.erase(checkNodes.begin());
649 if (constant != ConstantType::Const) {
650 constant = ConstantType::Unknown;
652 for (size_t i = 0; i < getParentEdges().size(); i++) {
653 checkNodes.push_back(getParentEdgeAt(i)->getParent());
655 while (constant != ConstantType::NoConst && !checkNodes.empty()) {
656 constant = checkNodes.front()->checkConstant(LOOK_UP, checkNodes);
657 checkNodes.erase(checkNodes.begin());
660 if (constant == ConstantType::Unknown)
661 constant = ConstantType::NoConst;
663 return constant == ConstantType::Const;
666 MKLDNNNode::ConstantType MKLDNNNode::checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes) {
667 if (constant == ConstantType::Unknown) {
668 if (look == LOOK_DOWN) {
669 for (size_t i = 0; i < getChildEdges().size(); i++) {
670 if (std::find(checkNodes.begin(), checkNodes.end(), getChildEdgeAt(i)->getChild()) == checkNodes.end())
671 checkNodes.push_back(getChildEdgeAt(i)->getChild());
674 for (size_t i = 0; i < getParentEdges().size(); i++) {
675 if (std::find(checkNodes.begin(), checkNodes.end(), getParentEdgeAt(i)->getParent()) == checkNodes.end())
676 checkNodes.push_back(getParentEdgeAt(i)->getParent());
683 void MKLDNNNode::addOriginalLayer(const InferenceEngine::CNNLayerPtr &layer) {
685 if (originalLayers.empty()) {
686 originalLayers = layer->name;
688 originalLayers += "," + layer->name;
692 void MKLDNNNode::cleanup() {
693 internalBlobs.clear();
696 for (auto it : fusedWith) {
700 for (auto it : mergedWith) {
705 std::string MKLDNNNode::typeToStr(Type type) {
716 return "Convolution";
718 return "Deconvolution";
719 case Convolution_Sum:
720 return "Convolution_Sum";
721 case Convolution_Activation:
722 return "Convolution_Activation";
723 case Convolution_Sum_Activation:
724 return "Convolution_Sum_Activation";
732 return "FullyConnected";
733 case FullyConnected_Activation:
734 return "FullyConnected_Activation";
742 return "Concatenation";
757 case BatchNormalization:
758 return "BatchNormalization";
766 return "MemoryOutput";
768 return "MemoryInput";
779 const std::vector<impl_desc_type>& MKLDNNNode::getPrimitivesPriority() {
780 std::vector<impl_desc_type> priorities = {
781 impl_desc_type::unknown,
782 impl_desc_type::jit_uni_dw,
783 impl_desc_type::jit_uni_1x1,
784 impl_desc_type::jit_uni,
785 impl_desc_type::jit_avx512_dw,
786 impl_desc_type::jit_avx512_1x1,
787 impl_desc_type::jit_avx512,
788 impl_desc_type::jit_avx2_dw,
789 impl_desc_type::jit_avx2_1x1,
790 impl_desc_type::jit_avx2,
791 impl_desc_type::jit_avx_dw,
792 impl_desc_type::jit_avx_1x1,
793 impl_desc_type::jit_avx,
794 impl_desc_type::jit_sse42_dw,
795 impl_desc_type::jit_sse42_1x1,
796 impl_desc_type::jit_sse42,
797 impl_desc_type::gemm_any,
798 impl_desc_type::gemm_blas,
799 impl_desc_type::gemm_avx512,
800 impl_desc_type::gemm_avx2,
801 impl_desc_type::gemm_avx,
802 impl_desc_type::gemm_sse42,
803 impl_desc_type::ref_any,
806 for (const auto& impl : priorities) {
807 if (std::find(implPriorities.begin(), implPriorities.end(), impl) == implPriorities.end())
808 implPriorities.push_back(impl);
810 return implPriorities;
813 bool MKLDNNNode::isUninitTensorDesc(const InferenceEngine::TensorDesc& desc) const {
814 if (desc.getLayout() == InferenceEngine::Layout::ANY)
817 if (desc.getBlockingDesc().getOffsetPadding() == std::numeric_limits<size_t>::max())
820 for (size_t i = 0; i < desc.getBlockingDesc().getOrder().size(); i++) {
821 if (desc.getBlockingDesc().getOffsetPaddingToData()[i] == std::numeric_limits<size_t>::max() ||
822 desc.getBlockingDesc().getStrides()[i] == std::numeric_limits<size_t>::max())
829 InferenceEngine::TensorDesc MKLDNNNode::getConfiguredInputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const {
830 if (!isUninitTensorDesc(config.inConfs[idx].desc))
831 return config.inConfs[idx].desc;
833 int num = getParentEdgeAt(idx)->getInputNum();
834 auto *selectedPD = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor();
836 THROW_IE_EXCEPTION << "Cannot get selected primitive descriptor for node: " << getParentEdgeAt(idx)->getParent()->getName();
838 if (selectedPD->getConfig().outConfs.size() <= num)
841 if (config.inConfs[idx].inPlace >= 0) {
842 return getConfiguredOutputDesc(config, static_cast<size_t>(config.inConfs[idx].inPlace));
846 auto parentConf = selectedPD->getConfig().outConfs[num];
847 parentConf.desc.setPrecision(config.inConfs[idx].desc.getPrecision());
848 if (isUninitTensorDesc(parentConf.desc) && parentConf.inPlace >= 0)
849 getParentEdgeAt(idx)->getParent()->initOptimalPrimitiveDescriptor();
850 parentConf = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num];
851 if (!isUninitTensorDesc(parentConf.desc) &&
852 MKLDNNExtensionUtils::initTensorsAreEqual(parentConf.desc, config.inConfs[idx].desc)) {
853 return parentConf.desc;
856 if (config.inConfs[idx].desc.getLayout() == InferenceEngine::Layout::ANY &&
857 parentConf.desc.getLayout() != InferenceEngine::Layout::ANY) {
858 return InferenceEngine::TensorDesc(parentConf.desc.getPrecision(),
859 parentConf.desc.getDims(), {
860 parentConf.desc.getBlockingDesc().getBlockDims(),
861 parentConf.desc.getBlockingDesc().getOrder()
866 if (config.inConfs[idx].desc.getLayout() != InferenceEngine::Layout::ANY) {
867 return InferenceEngine::TensorDesc(config.inConfs[idx].desc.getPrecision(),
868 config.inConfs[idx].desc.getDims(), {
869 config.inConfs[idx].desc.getBlockingDesc().getBlockDims(),
870 config.inConfs[idx].desc.getBlockingDesc().getOrder()
874 return InferenceEngine::TensorDesc(config.inConfs[idx].desc.getPrecision(),
875 config.inConfs[idx].desc.getDims(),
876 InferenceEngine::TensorDesc::getLayoutByDims(config.inConfs[idx].desc.getDims()));
879 InferenceEngine::TensorDesc MKLDNNNode::getConfiguredOutputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const {
880 if (!isUninitTensorDesc(config.outConfs[idx].desc))
881 return config.outConfs[idx].desc;
883 int num = getChildEdgeAt(idx)->getOutputNum();
884 auto *selectedPD = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor();
886 THROW_IE_EXCEPTION << "Cannot get selected primitive descriptor for node: " << getChildEdgeAt(idx)->getChild()->getName();
888 if (selectedPD->getConfig().inConfs.size() <= num)
891 if (config.outConfs[idx].inPlace >= 0) {
892 return getConfiguredInputDesc(config, static_cast<size_t>(config.outConfs[idx].inPlace));
896 auto childConf = selectedPD->getConfig().inConfs[num];
897 childConf.desc.setPrecision(config.outConfs[idx].desc.getPrecision());
898 if (isUninitTensorDesc(childConf.desc) && childConf.inPlace >= 0)
899 getChildEdgeAt(idx)->getChild()->initOptimalPrimitiveDescriptor();
900 childConf = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num];
901 if (!isUninitTensorDesc(childConf.desc) &&
902 MKLDNNExtensionUtils::initTensorsAreEqual(childConf.desc, config.outConfs[idx].desc)) {
903 return childConf.desc;
905 if (config.outConfs[idx].desc.getLayout() == InferenceEngine::Layout::ANY &&
906 childConf.desc.getLayout() != InferenceEngine::Layout::ANY) {
907 return InferenceEngine::TensorDesc(childConf.desc.getPrecision(),
908 childConf.desc.getDims(), {
909 childConf.desc.getBlockingDesc().getBlockDims(),
910 childConf.desc.getBlockingDesc().getOrder()
915 if (config.outConfs[idx].desc.getLayout() != InferenceEngine::Layout::ANY) {
916 return InferenceEngine::TensorDesc(config.outConfs[idx].desc.getPrecision(),
917 config.outConfs[idx].desc.getDims(), {
918 config.outConfs[idx].desc.getBlockingDesc().getBlockDims(),
919 config.outConfs[idx].desc.getBlockingDesc().getOrder()
923 return InferenceEngine::TensorDesc(config.outConfs[idx].desc.getPrecision(),
924 config.outConfs[idx].desc.getDims(),
925 InferenceEngine::TensorDesc::getLayoutByDims(config.outConfs[idx].desc.getDims()));
928 void MKLDNNNode::initOptimalPrimitiveDescriptor() {
929 auto config = getSelectedPrimitiveDescriptor()->getConfig();
930 if (!isInitConfig(config)) {
931 for (size_t i = 0; i < config.inConfs.size(); i++) {
932 config.inConfs[i].desc = getConfiguredInputDesc(config, i);
935 for (size_t i = 0; i < config.outConfs.size(); i++) {
936 config.outConfs[i].desc = getConfiguredOutputDesc(config, i);
938 initDescriptor(config);
939 } else if (getType() != RNNSeq && getType() != RNNCell) {
940 initDescriptor(config);
944 bool MKLDNNNode::isInitConfig(const InferenceEngine::LayerConfig& config) const {
945 for (const auto& configs : {config.inConfs, config.outConfs}) {
946 for (const auto &dc : configs) {
947 if (isUninitTensorDesc(dc.desc))
954 MKLDNNMemoryDesc MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
955 InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.src_primitive_desc(idx).desc());
956 if (desc.getLayout() == InferenceEngine::Layout::ANY)
957 return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
958 getParentEdgeAt(idx)->getDims().ToSizeVector(),
961 return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
962 getParentEdgeAt(idx)->getDims().ToSizeVector(),
963 desc.getBlockingDesc()));
966 MKLDNNMemoryDesc MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
967 InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_primitive_desc(idx).desc());
968 if (desc.getLayout() == InferenceEngine::Layout::ANY)
969 return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
970 getChildEdgeAt(idx)->getDims().ToSizeVector(),
973 return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
974 getChildEdgeAt(idx)->getDims().ToSizeVector(),
975 desc.getBlockingDesc()));
978 int MKLDNNNode::batchToProcess() {
979 return dynBatchLim == 0 ? getMaxBatch() : std::min<int>(getMaxBatch(), dynBatchLim);
982 int MKLDNNNode::getMaxBatch() {
983 // FIXME: batch != 0 dims number
986 if (!outDims.empty())
987 return outDims[0][0];
991 void MKLDNNNode::setDynamicBatchLim(int lim) {
994 prim.setBatchLimit(batchToProcess(), getParentEdges().size(), getChildEdges().size());
998 MKLDNNNode *MKLDNNNode::Registry::CreateNode(const InferenceEngine::CNNLayerPtr &layer, const mkldnn::engine& eng,
999 const MKLDNNExtensionManager::Ptr& extMgr) {
1000 for (auto maker : _dataByLayer) {
1001 std::unique_ptr<MKLDNNNode> ol(maker(layer, eng));
1002 if (ol != nullptr && ol->created(extMgr))
1003 return ol.release();
1008 void MKLDNNNode::Registry::RegisterNode(MKLDNNNode::Registry::CreatorByLayerFunction f) {
1009 _dataByLayer.push_back(f);