Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / mkldnn_plugin / nodes / mkldnn_split_node.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "mkldnn_split_node.h"
6 #include <ie_layers.h>
7 #include <string>
8 #include <vector>
9 #include <map>
10 #include <mkldnn_types.h>
11 #include <mkldnn_extension_utils.h>
12 #include <limits>
13
14 using namespace mkldnn;
15 using namespace MKLDNNPlugin;
16 using namespace InferenceEngine;
17
18 MKLDNNSplitNode::MKLDNNSplitNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng) : MKLDNNNode(layer, eng) {}
19
20 void MKLDNNSplitNode::getSupportedDescriptors() {
21     auto * splitLayer = dynamic_cast<SplitLayer*>(getCnnLayer().get());
22
23     if (splitLayer == nullptr)
24         THROW_IE_EXCEPTION << "Cannot convert split layer.";
25
26     if (getParentEdges().size() != 1)
27         THROW_IE_EXCEPTION << "Incorrect number of input nodes.";
28     if (getChildEdges().empty())
29         THROW_IE_EXCEPTION << "Incorrect number of output nodes.";
30
31     axis = splitLayer->_axis;
32     if (axis >= getParentEdgeAt(0)->getDims().ndims())
33         THROW_IE_EXCEPTION << "Invalid value of axis parameter in split layer";
34 }
35
36 void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
37     if (!supportedPrimitiveDescriptors.empty())
38         return;
39
40     InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
41     if (precision != InferenceEngine::Precision::FP32)
42         precision = InferenceEngine::Precision::FP32;
43     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
44     precision = getCnnLayer()->outData[0]->getPrecision();
45     if (precision != InferenceEngine::Precision::FP32)
46         precision = InferenceEngine::Precision::FP32;
47     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
48
49     auto srcDims = getParentEdgeAt(0)->getDims();
50
51     InferenceEngine::LayerConfig config;
52     config.dynBatchSupport = true;
53     config.inConfs.resize(1);
54     config.inConfs[0].inPlace = -1;
55     config.inConfs[0].constant = false;
56     config.inConfs[0].desc = MKLDNNMemoryDesc(srcDims, inputDataType, memory::format::any);
57     config.outConfs.resize(outDims.size());
58
59     if (srcDims.ndims() < 2)
60         THROW_IE_EXCEPTION << "Split " << getName() << " isn't supported 1d blobs";
61
62     auto axis_size = 0;
63     auto dstFirstDims = getChildEdgeAt(0)->getDims();
64     for (size_t i = 0; i < outDims.size(); i++) {
65         auto o_Dims = outDims[i];
66         if (dstFirstDims.ndims() != o_Dims.ndims()) {
67             THROW_IE_EXCEPTION << "Split " << getName() << " supports only output blob with equal number of dimensions";
68         }
69
70         config.outConfs[i].inPlace = -1;
71         config.outConfs[i].constant = false;
72         config.outConfs[i].desc = MKLDNNMemoryDesc(o_Dims, outputDataType, memory::format::any);
73         axis_size += o_Dims[axis];
74         for (size_t j = 0; j < dstFirstDims.ndims(); j++) {
75             if (j == axis)
76                 continue;
77             if (o_Dims[j] != dstFirstDims[j])
78                 THROW_IE_EXCEPTION << "Split " << getName() << " has incorrect output dimensions";
79         }
80     }
81     dstFirstDims[axis] = axis_size;
82     if (dstFirstDims.size() != srcDims.size())
83         THROW_IE_EXCEPTION << "The sizes of input blob and sum of output blobs are not equal.";
84     supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref);
85
86     auto numOfDim = static_cast<size_t>(srcDims.ndims());
87
88     SizeVector order;
89     SizeVector offsets(numOfDim, 0lu);
90     size_t offset = std::numeric_limits<size_t>::max();
91     for (size_t i = 0; i < numOfDim; i++) {
92         order.push_back(i);
93     }
94
95     SizeVector strides(numOfDim);
96     strides[numOfDim - 1] = 1;
97     for (size_t i = 2; i <= numOfDim; i++) {
98         if (numOfDim - i < axis) {
99             strides[numOfDim - i] = std::numeric_limits<size_t>::max();
100         } else {
101             strides[numOfDim - i] = strides[numOfDim - i + 1] * srcDims[numOfDim - i + 1];
102         }
103     }
104
105     config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {srcDims.ToSizeVector(), order, offset, offsets, strides});
106     for (size_t i = 0; i < outDims.size(); i++) {
107         auto dims = outDims[i].ToSizeVector();
108         config.outConfs[i].inPlace = 0;
109         config.outConfs[i].desc = TensorDesc(Precision::FP32, dims,
110                                             {dims, order, offset, offsets, strides});
111     }
112     supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
113
114     if ((numOfDim != 4 && numOfDim != 5) || axis != 1)
115         return;
116
117     order.push_back(1);
118     numOfDim = order.size();
119     offsets = SizeVector(numOfDim, 0lu);
120
121     // nChw8c and nChw16c
122     for (size_t sizeS : {8lu, 16lu}) {
123         SizeVector blkDims = srcDims.ToSizeVector();
124         if (blkDims[1] % sizeS)
125             continue;
126         blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
127         blkDims.push_back(sizeS);
128
129         strides.resize(numOfDim);
130         strides[numOfDim - 1] = 1lu;
131         for (size_t i = 2; i <= numOfDim; i++) {
132             if (numOfDim - i < axis) {
133                 strides[numOfDim - i] = std::numeric_limits<size_t>::max();
134             } else {
135                 strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1];
136             }
137         }
138         config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides});
139
140         bool canInplace = true;
141         for (size_t i = 0; i < outDims.size(); i++) {
142             auto dims = outDims[i].ToSizeVector();
143             blkDims = dims;
144
145             if (blkDims[1] % sizeS) {
146                 canInplace = false;
147                 break;
148             }
149             blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
150             blkDims.push_back(sizeS);
151             config.outConfs[i].desc = TensorDesc(Precision::FP32, dims, {blkDims, order, offset, offsets, strides});
152         }
153         if (canInplace)
154             supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
155     }
156 }
157
158 void MKLDNNSplitNode::createPrimitive() {
159     auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
160     if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
161         THROW_IE_EXCEPTION << "Input memory didn't allocate.";
162     for (size_t i = 0; i < getChildEdges().size(); i++) {
163         if (!getChildEdgeAt(i)->getMemoryPtr() || !getChildEdgeAt(i)->getMemory().GetPrimitivePtr())
164             THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
165     }
166     if (getSelectedPrimitiveDescriptor() == nullptr)
167         THROW_IE_EXCEPTION << "Preferable primitive descriptor does not set.";
168 }
169
170 void MKLDNNSplitNode::execute(mkldnn::stream strm) {
171     if (isOptimized())
172         return;
173
174     // FIXME: add more optimal implementation
175     MKLDNNDims par_dims = getParentEdgeAt(0)->getDims();
176     int MB = batchToProcess();
177     auto srcBlob = getParentEdgeAt(0)->getBlob();
178     const auto *srcData = srcBlob->cbuffer().as<const float *>();
179
180     size_t outerSize = 1;
181     for (int i = 0; i < axis; i++) {
182         if (i == 0)
183             outerSize *= MB;
184         else
185             outerSize *= srcBlob->dims()[srcBlob->dims().size() - i - 1];
186     }
187
188     size_t srcSize = getParentEdgeAt(0)->getMemory().GetSize();
189     size_t src_batch_off = srcBlob->getTensorDesc().offset(srcBlob->size() / outerSize)
190             - srcBlob->getTensorDesc().offset(0);
191
192     for (size_t i = 0, sIdx = 0; i < getChildEdges().size(); i++) {
193         auto dstBlob = getChildEdgeAt(i)->getBlob();
194         auto *dstData = dstBlob->buffer().as<float *>();
195
196         size_t innerSize = 1;
197         for (size_t j = axis; j < dstBlob->dims().size(); j++) {
198             innerSize *= dstBlob->dims()[dstBlob->dims().size() - j - 1];
199         }
200
201         size_t dst_batch_off = dstBlob->getTensorDesc().offset(innerSize) - dstBlob->getTensorDesc().offset(0);
202
203         for (size_t dIdx = 0; dIdx < innerSize; dIdx++, sIdx++) {
204             for (unsigned b = 0; b < outerSize; b++) {
205                 if (sIdx + b*src_batch_off >= srcSize)
206                     THROW_IE_EXCEPTION << "Incorrect configuration of split layer " << getName() << "!";
207                 dstData[b * dst_batch_off + dstBlob->getTensorDesc().offset(dIdx)] =
208                         srcData[b * src_batch_off + srcBlob->getTensorDesc().offset(sIdx)];
209             }
210         }
211     }
212 }
213
214 bool MKLDNNSplitNode::created() const {
215     return getType() == Split;
216 }
217
218 void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
219     if (implPriorities.size() > 0 && implPriorities[0] == impl_desc_type::ref) {
220         selectPrimitiveDescriptorByIndex(0);
221         return;
222     }
223     InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
224     if (precision != InferenceEngine::Precision::FP32)
225         precision = InferenceEngine::Precision::FP32;
226     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
227     precision = getCnnLayer()->outData[0]->getPrecision();
228     if (precision != InferenceEngine::Precision::FP32)
229         precision = InferenceEngine::Precision::FP32;
230     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
231
232     bool hasUnknown = false;
233     std::vector<size_t> canSelectPrimitive;
234     for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
235         bool hasAny = true;
236         auto &primDescInfo = supportedPrimitiveDescriptors[i];
237         if (primDescInfo.getImplementationType() != impl_desc_type::unknown ||
238             primDescInfo.getConfig().outConfs[0].inPlace < 0)
239             continue;
240         hasUnknown = true;
241         for (auto iInfo : primDescInfo.getConfig().inConfs) {
242             if (iInfo.desc.getLayout() != InferenceEngine::Layout::ANY) {
243                 hasAny = false;
244                 break;
245             }
246         }
247
248         if (hasAny) {
249             for (auto oInfo : primDescInfo.getConfig().outConfs) {
250                 if (oInfo.desc.getLayout() != InferenceEngine::Layout::ANY) {
251                     hasAny = false;
252                     break;
253                 }
254             }
255         }
256
257         if (!hasAny) {
258             canSelectPrimitive.push_back(i);
259         }
260     }
261
262     bool canOptimize = false;
263     if (hasUnknown) {
264         canOptimize = true;
265
266         if (canSelectPrimitive.size() == 1) {
267             selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive[0]));
268             return;
269         }
270     }
271
272     std::map<mkldnn::memory::format, size_t> formatFrequency;
273     for (size_t i = 0; i < getParentEdges().size(); i++) {
274         auto parentEdge = getParentEdgeAt(i);
275         auto parent = parentEdge->getParent();
276
277         if (parent->getSelectedPrimitiveDescriptor() == nullptr)
278             continue;
279
280         int outputIndex = parentEdge->getOutputNum();
281         if (outputIndex < 0)
282             THROW_IE_EXCEPTION << "Cannot find index of output node";
283         if (outputIndex >= parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size())
284             outputIndex = 0;
285         auto outDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIndex].desc);
286         if (!outDesc)
287             continue;
288         if (formatFrequency.find(outDesc.getFormat()) != formatFrequency.end())
289             formatFrequency[outDesc.getFormat()] += 1;
290         else
291             formatFrequency[outDesc.getFormat()] = 1;
292     }
293     for (size_t i = 0; i < getChildEdges().size(); i++) {
294         auto childEdge = getChildEdgeAt(i);
295         auto child = childEdge->getChild();
296         if (child->getSelectedPrimitiveDescriptor() == nullptr)
297             continue;
298         int inputIndex = childEdge->getOutputNum();
299         if (inputIndex < 0)
300             THROW_IE_EXCEPTION << "Cannot find index of output node";
301         if (inputIndex >= child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size())
302             inputIndex = 0;
303         auto outDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIndex].desc);
304         if (!outDesc)
305             continue;
306         if (formatFrequency.find(outDesc.getFormat()) != formatFrequency.end())
307             formatFrequency[outDesc.getFormat()] += 1;
308         else
309             formatFrequency[outDesc.getFormat()] = 1;
310     }
311
312     size_t maxCount = 0;
313     mkldnn::memory::format convertTo = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
314     for (auto &it : formatFrequency) {
315         if (it.second > maxCount && !MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, it.first).blocksExtended()) {
316             maxCount = it.second;
317             convertTo = it.first;
318         }
319     }
320
321     if (canOptimize && MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, convertTo).blocksExtended())
322         canOptimize = false;
323     for (size_t i = 0; canOptimize && i < getChildEdges().size(); i++) {
324         if (MKLDNNMemoryDesc(getChildEdgeAt(i)->getDims(), outputDataType, convertTo).blocksExtended())
325             canOptimize = false;
326     }
327
328     if (canOptimize) {
329         for (auto supportedPdIndex : canSelectPrimitive) {
330             if (MKLDNNMemoryDesc(supportedPrimitiveDescriptors[supportedPdIndex].getConfig().inConfs[0].desc).getFormat() == convertTo) {
331                 selectPrimitiveDescriptorByIndex(static_cast<int>(supportedPdIndex));
332                 return;
333             }
334         }
335     }
336
337     for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
338         auto &primDescInfo = supportedPrimitiveDescriptors[i];
339         if (primDescInfo.getImplementationType() == impl_desc_type::unknown)
340             continue;
341         if (convertTo == MKLDNNMemoryDesc(supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc).getFormat()) {
342             size_t num = 0;
343             for (num = 0; num < getParentEdges().size(); num++) {
344                 if (MKLDNNMemoryDesc(getParentEdgeAt(num)->getDims(), inputDataType, convertTo).blocksExtended())
345                     break;
346             }
347             if (num == getParentEdges().size()) {
348                 selectPrimitiveDescriptorByIndex(i);
349                 return;
350             }
351         }
352     }
353
354     selectPrimitiveDescriptorByIndex(0);
355 }
356
357 bool MKLDNNSplitNode::isOptimized() {
358     return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].inPlace >= 0;
359 }
360
361 void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
362     if (!isOptimized()) {
363         MKLDNNNode::initOptimalPrimitiveDescriptor();
364         return;
365     }
366
367     auto config = getSelectedPrimitiveDescriptor()->getConfig();
368     if (isInitConfig(config))
369         return;
370
371     for (size_t i = 0; i < config.inConfs.size(); i++) {
372         if (config.inConfs[i].desc.getLayout() == InferenceEngine::Layout::ANY ||
373             !isUninitTensorDesc(config.inConfs[i].desc))
374             continue;
375
376         int num = getParentEdgeAt(i)->getOutputNum();
377         if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) {
378             if (num >= 0) {
379                 if (isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) &&
380                         getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].inPlace >= 0)
381                     getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor();
382                 if (!isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) &&
383                     MKLDNNExtensionUtils::initTensorsAreEqual(
384                             getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc,
385                             config.inConfs[i].desc)) {
386                     config.inConfs[i].desc = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc;
387                     continue;
388                 }
389             }
390         }
391         config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(),
392                                                               config.inConfs[i].desc.getDims(), {
393                                                                       config.inConfs[i].desc.getBlockingDesc().getBlockDims(),
394                                                                       config.inConfs[i].desc.getBlockingDesc().getOrder()
395                                                               });
396     }
397     const auto& cnnLayer = getCnnLayer();
398     if (!cnnLayer)
399         THROW_IE_EXCEPTION << "Cannot create Split layer " << getName() << " without CNNLayer!";
400     if (config.outConfs.size() != outDims.size())
401         THROW_IE_EXCEPTION << "Invalid config for Split layer " << getName();
402     size_t offset = 0;
403     for (size_t i = 0; i < cnnLayer->outData.size(); i++) {
404         size_t confNum = i;
405         config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(),
406                                                               config.outConfs[i].desc.getDims(), {
407                                                                       config.outConfs[i].desc.getBlockingDesc().getBlockDims(),
408                                                                       config.outConfs[i].desc.getBlockingDesc().getOrder(),
409                                                                       config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset,
410                                                                       config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(),
411                                                                       config.inConfs[0].desc.getBlockingDesc().getStrides()
412                                                               });
413         size_t axisSize = 1;
414         for (size_t j = axis; j < config.outConfs[confNum].desc.getBlockingDesc().getBlockDims().size(); j++) {
415             axisSize *= config.outConfs[confNum].desc.getBlockingDesc().getBlockDims()[j];
416         }
417         offset += axisSize;
418     }
419     initDescriptor(config);
420 }
421
422 void MKLDNNSplitNode::setDynamicBatchLim(int lim) {
423     if (axis == 0)
424         THROW_IE_EXCEPTION << "Dynamic batch is not supported by split layer with axis == 0 parameter";
425
426     dynBatchLim = lim;
427     if (prim) {
428         prim.setBatchLimit(batchToProcess(), getParentEdges().size(), getChildEdges().size());
429     }
430 }