1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "mkldnn_split_node.h"
10 #include <mkldnn_types.h>
11 #include <mkldnn_extension_utils.h>
14 using namespace mkldnn;
15 using namespace MKLDNNPlugin;
16 using namespace InferenceEngine;
18 MKLDNNSplitNode::MKLDNNSplitNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng) : MKLDNNNode(layer, eng) {}
20 void MKLDNNSplitNode::getSupportedDescriptors() {
21 auto * splitLayer = dynamic_cast<SplitLayer*>(getCnnLayer().get());
23 if (splitLayer == nullptr)
24 THROW_IE_EXCEPTION << "Cannot convert split layer.";
26 if (getParentEdges().size() != 1)
27 THROW_IE_EXCEPTION << "Incorrect number of input nodes.";
28 if (getChildEdges().empty())
29 THROW_IE_EXCEPTION << "Incorrect number of output nodes.";
31 axis = splitLayer->_axis;
32 if (axis >= getParentEdgeAt(0)->getDims().ndims())
33 THROW_IE_EXCEPTION << "Invalid value of axis parameter in split layer";
36 void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
37 if (!supportedPrimitiveDescriptors.empty())
40 InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
41 if (precision != InferenceEngine::Precision::FP32)
42 precision = InferenceEngine::Precision::FP32;
43 auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
44 precision = getCnnLayer()->outData[0]->getPrecision();
45 if (precision != InferenceEngine::Precision::FP32)
46 precision = InferenceEngine::Precision::FP32;
47 auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
49 auto srcDims = getParentEdgeAt(0)->getDims();
51 InferenceEngine::LayerConfig config;
52 config.dynBatchSupport = true;
53 config.inConfs.resize(1);
54 config.inConfs[0].inPlace = -1;
55 config.inConfs[0].constant = false;
56 config.inConfs[0].desc = MKLDNNMemoryDesc(srcDims, inputDataType, memory::format::any);
57 config.outConfs.resize(outDims.size());
59 if (srcDims.ndims() < 2)
60 THROW_IE_EXCEPTION << "Split " << getName() << " isn't supported 1d blobs";
63 auto dstFirstDims = getChildEdgeAt(0)->getDims();
64 for (size_t i = 0; i < outDims.size(); i++) {
65 auto o_Dims = outDims[i];
66 if (dstFirstDims.ndims() != o_Dims.ndims()) {
67 THROW_IE_EXCEPTION << "Split " << getName() << " supports only output blob with equal number of dimensions";
70 config.outConfs[i].inPlace = -1;
71 config.outConfs[i].constant = false;
72 config.outConfs[i].desc = MKLDNNMemoryDesc(o_Dims, outputDataType, memory::format::any);
73 axis_size += o_Dims[axis];
74 for (size_t j = 0; j < dstFirstDims.ndims(); j++) {
77 if (o_Dims[j] != dstFirstDims[j])
78 THROW_IE_EXCEPTION << "Split " << getName() << " has incorrect output dimensions";
81 dstFirstDims[axis] = axis_size;
82 if (dstFirstDims.size() != srcDims.size())
83 THROW_IE_EXCEPTION << "The sizes of input blob and sum of output blobs are not equal.";
84 supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref);
86 auto numOfDim = static_cast<size_t>(srcDims.ndims());
89 SizeVector offsets(numOfDim, 0lu);
90 size_t offset = std::numeric_limits<size_t>::max();
91 for (size_t i = 0; i < numOfDim; i++) {
95 SizeVector strides(numOfDim);
96 strides[numOfDim - 1] = 1;
97 for (size_t i = 2; i <= numOfDim; i++) {
98 if (numOfDim - i < axis) {
99 strides[numOfDim - i] = std::numeric_limits<size_t>::max();
101 strides[numOfDim - i] = strides[numOfDim - i + 1] * srcDims[numOfDim - i + 1];
105 config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {srcDims.ToSizeVector(), order, offset, offsets, strides});
106 for (size_t i = 0; i < outDims.size(); i++) {
107 auto dims = outDims[i].ToSizeVector();
108 config.outConfs[i].inPlace = 0;
109 config.outConfs[i].desc = TensorDesc(Precision::FP32, dims,
110 {dims, order, offset, offsets, strides});
112 supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
114 if ((numOfDim != 4 && numOfDim != 5) || axis != 1)
118 numOfDim = order.size();
119 offsets = SizeVector(numOfDim, 0lu);
121 // nChw8c and nChw16c
122 for (size_t sizeS : {8lu, 16lu}) {
123 SizeVector blkDims = srcDims.ToSizeVector();
124 if (blkDims[1] % sizeS)
126 blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
127 blkDims.push_back(sizeS);
129 strides.resize(numOfDim);
130 strides[numOfDim - 1] = 1lu;
131 for (size_t i = 2; i <= numOfDim; i++) {
132 if (numOfDim - i < axis) {
133 strides[numOfDim - i] = std::numeric_limits<size_t>::max();
135 strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1];
138 config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides});
140 bool canInplace = true;
141 for (size_t i = 0; i < outDims.size(); i++) {
142 auto dims = outDims[i].ToSizeVector();
145 if (blkDims[1] % sizeS) {
149 blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
150 blkDims.push_back(sizeS);
151 config.outConfs[i].desc = TensorDesc(Precision::FP32, dims, {blkDims, order, offset, offsets, strides});
154 supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
158 void MKLDNNSplitNode::createPrimitive() {
159 auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
160 if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
161 THROW_IE_EXCEPTION << "Input memory didn't allocate.";
162 for (size_t i = 0; i < getChildEdges().size(); i++) {
163 if (!getChildEdgeAt(i)->getMemoryPtr() || !getChildEdgeAt(i)->getMemory().GetPrimitivePtr())
164 THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
166 if (getSelectedPrimitiveDescriptor() == nullptr)
167 THROW_IE_EXCEPTION << "Preferable primitive descriptor does not set.";
170 void MKLDNNSplitNode::execute(mkldnn::stream strm) {
174 // FIXME: add more optimal implementation
175 MKLDNNDims par_dims = getParentEdgeAt(0)->getDims();
176 int MB = batchToProcess();
177 auto srcBlob = getParentEdgeAt(0)->getBlob();
178 const auto *srcData = srcBlob->cbuffer().as<const float *>();
180 size_t outerSize = 1;
181 for (int i = 0; i < axis; i++) {
185 outerSize *= srcBlob->dims()[srcBlob->dims().size() - i - 1];
188 size_t srcSize = getParentEdgeAt(0)->getMemory().GetSize();
189 size_t src_batch_off = srcBlob->getTensorDesc().offset(srcBlob->size() / outerSize)
190 - srcBlob->getTensorDesc().offset(0);
192 for (size_t i = 0, sIdx = 0; i < getChildEdges().size(); i++) {
193 auto dstBlob = getChildEdgeAt(i)->getBlob();
194 auto *dstData = dstBlob->buffer().as<float *>();
196 size_t innerSize = 1;
197 for (size_t j = axis; j < dstBlob->dims().size(); j++) {
198 innerSize *= dstBlob->dims()[dstBlob->dims().size() - j - 1];
201 size_t dst_batch_off = dstBlob->getTensorDesc().offset(innerSize) - dstBlob->getTensorDesc().offset(0);
203 for (size_t dIdx = 0; dIdx < innerSize; dIdx++, sIdx++) {
204 for (unsigned b = 0; b < outerSize; b++) {
205 if (sIdx + b*src_batch_off >= srcSize)
206 THROW_IE_EXCEPTION << "Incorrect configuration of split layer " << getName() << "!";
207 dstData[b * dst_batch_off + dstBlob->getTensorDesc().offset(dIdx)] =
208 srcData[b * src_batch_off + srcBlob->getTensorDesc().offset(sIdx)];
214 bool MKLDNNSplitNode::created() const {
215 return getType() == Split;
218 void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
219 if (implPriorities.size() > 0 && implPriorities[0] == impl_desc_type::ref) {
220 selectPrimitiveDescriptorByIndex(0);
223 InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
224 if (precision != InferenceEngine::Precision::FP32)
225 precision = InferenceEngine::Precision::FP32;
226 auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
227 precision = getCnnLayer()->outData[0]->getPrecision();
228 if (precision != InferenceEngine::Precision::FP32)
229 precision = InferenceEngine::Precision::FP32;
230 auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
232 bool hasUnknown = false;
233 std::vector<size_t> canSelectPrimitive;
234 for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
236 auto &primDescInfo = supportedPrimitiveDescriptors[i];
237 if (primDescInfo.getImplementationType() != impl_desc_type::unknown ||
238 primDescInfo.getConfig().outConfs[0].inPlace < 0)
241 for (auto iInfo : primDescInfo.getConfig().inConfs) {
242 if (iInfo.desc.getLayout() != InferenceEngine::Layout::ANY) {
249 for (auto oInfo : primDescInfo.getConfig().outConfs) {
250 if (oInfo.desc.getLayout() != InferenceEngine::Layout::ANY) {
258 canSelectPrimitive.push_back(i);
262 bool canOptimize = false;
266 if (canSelectPrimitive.size() == 1) {
267 selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive[0]));
272 std::map<mkldnn::memory::format, size_t> formatFrequency;
273 for (size_t i = 0; i < getParentEdges().size(); i++) {
274 auto parentEdge = getParentEdgeAt(i);
275 auto parent = parentEdge->getParent();
277 if (parent->getSelectedPrimitiveDescriptor() == nullptr)
280 int outputIndex = parentEdge->getOutputNum();
282 THROW_IE_EXCEPTION << "Cannot find index of output node";
283 if (outputIndex >= parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size())
285 auto outDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIndex].desc);
288 if (formatFrequency.find(outDesc.getFormat()) != formatFrequency.end())
289 formatFrequency[outDesc.getFormat()] += 1;
291 formatFrequency[outDesc.getFormat()] = 1;
293 for (size_t i = 0; i < getChildEdges().size(); i++) {
294 auto childEdge = getChildEdgeAt(i);
295 auto child = childEdge->getChild();
296 if (child->getSelectedPrimitiveDescriptor() == nullptr)
298 int inputIndex = childEdge->getOutputNum();
300 THROW_IE_EXCEPTION << "Cannot find index of output node";
301 if (inputIndex >= child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size())
303 auto outDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIndex].desc);
306 if (formatFrequency.find(outDesc.getFormat()) != formatFrequency.end())
307 formatFrequency[outDesc.getFormat()] += 1;
309 formatFrequency[outDesc.getFormat()] = 1;
313 mkldnn::memory::format convertTo = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
314 for (auto &it : formatFrequency) {
315 if (it.second > maxCount && !MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, it.first).blocksExtended()) {
316 maxCount = it.second;
317 convertTo = it.first;
321 if (canOptimize && MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, convertTo).blocksExtended())
323 for (size_t i = 0; canOptimize && i < getChildEdges().size(); i++) {
324 if (MKLDNNMemoryDesc(getChildEdgeAt(i)->getDims(), outputDataType, convertTo).blocksExtended())
329 for (auto supportedPdIndex : canSelectPrimitive) {
330 if (MKLDNNMemoryDesc(supportedPrimitiveDescriptors[supportedPdIndex].getConfig().inConfs[0].desc).getFormat() == convertTo) {
331 selectPrimitiveDescriptorByIndex(static_cast<int>(supportedPdIndex));
337 for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
338 auto &primDescInfo = supportedPrimitiveDescriptors[i];
339 if (primDescInfo.getImplementationType() == impl_desc_type::unknown)
341 if (convertTo == MKLDNNMemoryDesc(supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc).getFormat()) {
343 for (num = 0; num < getParentEdges().size(); num++) {
344 if (MKLDNNMemoryDesc(getParentEdgeAt(num)->getDims(), inputDataType, convertTo).blocksExtended())
347 if (num == getParentEdges().size()) {
348 selectPrimitiveDescriptorByIndex(i);
354 selectPrimitiveDescriptorByIndex(0);
357 bool MKLDNNSplitNode::isOptimized() {
358 return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].inPlace >= 0;
361 void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
362 if (!isOptimized()) {
363 MKLDNNNode::initOptimalPrimitiveDescriptor();
367 auto config = getSelectedPrimitiveDescriptor()->getConfig();
368 if (isInitConfig(config))
371 for (size_t i = 0; i < config.inConfs.size(); i++) {
372 if (config.inConfs[i].desc.getLayout() == InferenceEngine::Layout::ANY ||
373 !isUninitTensorDesc(config.inConfs[i].desc))
376 int num = getParentEdgeAt(i)->getOutputNum();
377 if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) {
379 if (isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) &&
380 getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].inPlace >= 0)
381 getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor();
382 if (!isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) &&
383 MKLDNNExtensionUtils::initTensorsAreEqual(
384 getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc,
385 config.inConfs[i].desc)) {
386 config.inConfs[i].desc = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc;
391 config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(),
392 config.inConfs[i].desc.getDims(), {
393 config.inConfs[i].desc.getBlockingDesc().getBlockDims(),
394 config.inConfs[i].desc.getBlockingDesc().getOrder()
397 const auto& cnnLayer = getCnnLayer();
399 THROW_IE_EXCEPTION << "Cannot create Split layer " << getName() << " without CNNLayer!";
400 if (config.outConfs.size() != outDims.size())
401 THROW_IE_EXCEPTION << "Invalid config for Split layer " << getName();
403 for (size_t i = 0; i < cnnLayer->outData.size(); i++) {
405 config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(),
406 config.outConfs[i].desc.getDims(), {
407 config.outConfs[i].desc.getBlockingDesc().getBlockDims(),
408 config.outConfs[i].desc.getBlockingDesc().getOrder(),
409 config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset,
410 config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(),
411 config.inConfs[0].desc.getBlockingDesc().getStrides()
414 for (size_t j = axis; j < config.outConfs[confNum].desc.getBlockingDesc().getBlockDims().size(); j++) {
415 axisSize *= config.outConfs[confNum].desc.getBlockingDesc().getBlockDims()[j];
419 initDescriptor(config);
422 void MKLDNNSplitNode::setDynamicBatchLim(int lim) {
424 THROW_IE_EXCEPTION << "Dynamic batch is not supported by split layer with axis == 0 parameter";
428 prim.setBatchLimit(batchToProcess(), getParentEdges().size(), getChildEdges().size());