inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "mkldnn_split_node.h"
   6 #include <ie_layers.h>
   7 #include <string>
   8 #include <vector>
   9 #include <map>
  10 #include <mkldnn_types.h>
  11 #include <mkldnn_extension_utils.h>
  12 #include <limits>
  13
  14 using namespace mkldnn;
  15 using namespace MKLDNNPlugin;
  16 using namespace InferenceEngine;
  17
  18 MKLDNNSplitNode::MKLDNNSplitNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng) : MKLDNNNode(layer, eng) {}
  19
  20 void MKLDNNSplitNode::getSupportedDescriptors() {
  21     auto * splitLayer = dynamic_cast<SplitLayer*>(getCnnLayer().get());
  22
  23     if (splitLayer == nullptr)
  24         THROW_IE_EXCEPTION << "Cannot convert split layer.";
  25
  26     if (getParentEdges().size() != 1)
  27         THROW_IE_EXCEPTION << "Incorrect number of input nodes.";
  28     if (getChildEdges().empty())
  29         THROW_IE_EXCEPTION << "Incorrect number of output nodes.";
  30
  31     axis = splitLayer->_axis;
  32     if (axis >= getParentEdgeAt(0)->getDims().ndims())
  33         THROW_IE_EXCEPTION << "Invalid value of axis parameter in split layer";
  34 }
  35
  36 void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
  37     if (!supportedPrimitiveDescriptors.empty())
  38         return;
  39
  40     InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
  41     if (precision != InferenceEngine::Precision::FP32)
  42         precision = InferenceEngine::Precision::FP32;
  43     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
  44     precision = getCnnLayer()->outData[0]->getPrecision();
  45     if (precision != InferenceEngine::Precision::FP32)
  46         precision = InferenceEngine::Precision::FP32;
  47     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
  48
  49     auto srcDims = getParentEdgeAt(0)->getDims();
  50
  51     InferenceEngine::LayerConfig config;
  52     config.dynBatchSupport = true;
  53     config.inConfs.resize(1);
  54     config.inConfs[0].inPlace = -1;
  55     config.inConfs[0].constant = false;
  56     config.inConfs[0].desc = MKLDNNMemoryDesc(srcDims, inputDataType, memory::format::any);
  57     config.outConfs.resize(outDims.size());
  58
  59     if (srcDims.ndims() < 2)
  60         THROW_IE_EXCEPTION << "Split " << getName() << " isn't supported 1d blobs";
  61
  62     auto axis_size = 0;
  63     auto dstFirstDims = getChildEdgeAt(0)->getDims();
  64     for (size_t i = 0; i < outDims.size(); i++) {
  65         auto o_Dims = outDims[i];
  66         if (dstFirstDims.ndims() != o_Dims.ndims()) {
  67             THROW_IE_EXCEPTION << "Split " << getName() << " supports only output blob with equal number of dimensions";
  68         }
  69
  70         config.outConfs[i].inPlace = -1;
  71         config.outConfs[i].constant = false;
  72         config.outConfs[i].desc = MKLDNNMemoryDesc(o_Dims, outputDataType, memory::format::any);
  73         axis_size += o_Dims[axis];
  74         for (size_t j = 0; j < dstFirstDims.ndims(); j++) {
  75             if (j == axis)
  76                 continue;
  77             if (o_Dims[j] != dstFirstDims[j])
  78                 THROW_IE_EXCEPTION << "Split " << getName() << " has incorrect output dimensions";
  79         }
  80     }
  81     dstFirstDims[axis] = axis_size;
  82     if (dstFirstDims.size() != srcDims.size())
  83         THROW_IE_EXCEPTION << "The sizes of input blob and sum of output blobs are not equal.";
  84     supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref);
  85
  86     auto numOfDim = static_cast<size_t>(srcDims.ndims());
  87
  88     SizeVector order;
  89     SizeVector offsets(numOfDim, 0lu);
  90     size_t offset = std::numeric_limits<size_t>::max();
  91     for (size_t i = 0; i < numOfDim; i++) {
  92         order.push_back(i);
  93     }
  94
  95     SizeVector strides(numOfDim);
  96     strides[numOfDim - 1] = 1;
  97     for (size_t i = 2; i <= numOfDim; i++) {
  98         if (numOfDim - i < axis) {
  99             strides[numOfDim - i] = std::numeric_limits<size_t>::max();
 100         } else {
 101             strides[numOfDim - i] = strides[numOfDim - i + 1] * srcDims[numOfDim - i + 1];
 102         }
 103     }
 104
 105     config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {srcDims.ToSizeVector(), order, offset, offsets, strides});
 106     for (size_t i = 0; i < outDims.size(); i++) {
 107         auto dims = outDims[i].ToSizeVector();
 108         config.outConfs[i].inPlace = 0;
 109         config.outConfs[i].desc = TensorDesc(Precision::FP32, dims,
 110                                             {dims, order, offset, offsets, strides});
 111     }
 112     supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
 113
 114     if ((numOfDim != 4 && numOfDim != 5) || axis != 1)
 115         return;
 116
 117     order.push_back(1);
 118     numOfDim = order.size();
 119     offsets = SizeVector(numOfDim, 0lu);
 120
 121     // nChw8c and nChw16c
 122     for (size_t sizeS : {8lu, 16lu}) {
 123         SizeVector blkDims = srcDims.ToSizeVector();
 124         if (blkDims[1] % sizeS)
 125             continue;
 126         blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
 127         blkDims.push_back(sizeS);
 128
 129         strides.resize(numOfDim);
 130         strides[numOfDim - 1] = 1lu;
 131         for (size_t i = 2; i <= numOfDim; i++) {
 132             if (numOfDim - i < axis) {
 133                 strides[numOfDim - i] = std::numeric_limits<size_t>::max();
 134             } else {
 135                 strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1];
 136             }
 137         }
 138         config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides});
 139
 140         bool canInplace = true;
 141         for (size_t i = 0; i < outDims.size(); i++) {
 142             auto dims = outDims[i].ToSizeVector();
 143             blkDims = dims;
 144
 145             if (blkDims[1] % sizeS) {
 146                 canInplace = false;
 147                 break;
 148             }
 149             blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
 150             blkDims.push_back(sizeS);
 151             config.outConfs[i].desc = TensorDesc(Precision::FP32, dims, {blkDims, order, offset, offsets, strides});
 152         }
 153         if (canInplace)
 154             supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
 155     }
 156 }
 157
 158 void MKLDNNSplitNode::createPrimitive() {
 159     auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
 160     if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
 161         THROW_IE_EXCEPTION << "Input memory didn't allocate.";
 162     for (size_t i = 0; i < getChildEdges().size(); i++) {
 163         if (!getChildEdgeAt(i)->getMemoryPtr() || !getChildEdgeAt(i)->getMemory().GetPrimitivePtr())
 164             THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
 165     }
 166     if (getSelectedPrimitiveDescriptor() == nullptr)
 167         THROW_IE_EXCEPTION << "Preferable primitive descriptor does not set.";
 168 }
 169
 170 void MKLDNNSplitNode::execute(mkldnn::stream strm) {
 171     if (isOptimized())
 172         return;
 173
 174     // FIXME: add more optimal implementation
 175     MKLDNNDims par_dims = getParentEdgeAt(0)->getDims();
 176     int MB = batchToProcess();
 177     auto srcBlob = getParentEdgeAt(0)->getBlob();
 178     const auto *srcData = srcBlob->cbuffer().as<const float *>();
 179
 180     size_t outerSize = 1;
 181     for (int i = 0; i < axis; i++) {
 182         if (i == 0)
 183             outerSize *= MB;
 184         else
 185             outerSize *= srcBlob->dims()[srcBlob->dims().size() - i - 1];
 186     }
 187
 188     size_t srcSize = getParentEdgeAt(0)->getMemory().GetSize();
 189     size_t src_batch_off = srcBlob->getTensorDesc().offset(srcBlob->size() / outerSize)
 190             - srcBlob->getTensorDesc().offset(0);
 191
 192     for (size_t i = 0, sIdx = 0; i < getChildEdges().size(); i++) {
 193         auto dstBlob = getChildEdgeAt(i)->getBlob();
 194         auto *dstData = dstBlob->buffer().as<float *>();
 195
 196         size_t innerSize = 1;
 197         for (size_t j = axis; j < dstBlob->dims().size(); j++) {
 198             innerSize *= dstBlob->dims()[dstBlob->dims().size() - j - 1];
 199         }
 200
 201         size_t dst_batch_off = dstBlob->getTensorDesc().offset(innerSize) - dstBlob->getTensorDesc().offset(0);
 202
 203         for (size_t dIdx = 0; dIdx < innerSize; dIdx++, sIdx++) {
 204             for (unsigned b = 0; b < outerSize; b++) {
 205                 if (sIdx + b*src_batch_off >= srcSize)
 206                     THROW_IE_EXCEPTION << "Incorrect configuration of split layer " << getName() << "!";
 207                 dstData[b * dst_batch_off + dstBlob->getTensorDesc().offset(dIdx)] =
 208                         srcData[b * src_batch_off + srcBlob->getTensorDesc().offset(sIdx)];
 209             }
 210         }
 211     }
 212 }
 213
 214 bool MKLDNNSplitNode::created() const {
 215     return getType() == Split;
 216 }
 217
 218 void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
 219     if (implPriorities.size() > 0 && implPriorities[0] == impl_desc_type::ref) {
 220         selectPrimitiveDescriptorByIndex(0);
 221         return;
 222     }
 223     InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
 224     if (precision != InferenceEngine::Precision::FP32)
 225         precision = InferenceEngine::Precision::FP32;
 226     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 227     precision = getCnnLayer()->outData[0]->getPrecision();
 228     if (precision != InferenceEngine::Precision::FP32)
 229         precision = InferenceEngine::Precision::FP32;
 230     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 231
 232     bool hasUnknown = false;
 233     std::vector<size_t> canSelectPrimitive;
 234     for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
 235         bool hasAny = true;
 236         auto &primDescInfo = supportedPrimitiveDescriptors[i];
 237         if (primDescInfo.getImplementationType() != impl_desc_type::unknown ||
 238             primDescInfo.getConfig().outConfs[0].inPlace < 0)
 239             continue;
 240         hasUnknown = true;
 241         for (auto iInfo : primDescInfo.getConfig().inConfs) {
 242             if (iInfo.desc.getLayout() != InferenceEngine::Layout::ANY) {
 243                 hasAny = false;
 244                 break;
 245             }
 246         }
 247
 248         if (hasAny) {
 249             for (auto oInfo : primDescInfo.getConfig().outConfs) {
 250                 if (oInfo.desc.getLayout() != InferenceEngine::Layout::ANY) {
 251                     hasAny = false;
 252                     break;
 253                 }
 254             }
 255         }
 256
 257         if (!hasAny) {
 258             canSelectPrimitive.push_back(i);
 259         }
 260     }
 261
 262     bool canOptimize = false;
 263     if (hasUnknown) {
 264         canOptimize = true;
 265
 266         if (canSelectPrimitive.size() == 1) {
 267             selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive[0]));
 268             return;
 269         }
 270     }
 271
 272     std::map<mkldnn::memory::format, size_t> formatFrequency;
 273     for (size_t i = 0; i < getParentEdges().size(); i++) {
 274         auto parentEdge = getParentEdgeAt(i);
 275         auto parent = parentEdge->getParent();
 276
 277         if (parent->getSelectedPrimitiveDescriptor() == nullptr)
 278             continue;
 279
 280         int outputIndex = parentEdge->getOutputNum();
 281         if (outputIndex < 0)
 282             THROW_IE_EXCEPTION << "Cannot find index of output node";
 283         if (outputIndex >= parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size())
 284             outputIndex = 0;
 285         auto outDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIndex].desc);
 286         if (!outDesc)
 287             continue;
 288         if (formatFrequency.find(outDesc.getFormat()) != formatFrequency.end())
 289             formatFrequency[outDesc.getFormat()] += 1;
 290         else
 291             formatFrequency[outDesc.getFormat()] = 1;
 292     }
 293     for (size_t i = 0; i < getChildEdges().size(); i++) {
 294         auto childEdge = getChildEdgeAt(i);
 295         auto child = childEdge->getChild();
 296         if (child->getSelectedPrimitiveDescriptor() == nullptr)
 297             continue;
 298         int inputIndex = childEdge->getOutputNum();
 299         if (inputIndex < 0)
 300             THROW_IE_EXCEPTION << "Cannot find index of output node";
 301         if (inputIndex >= child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size())
 302             inputIndex = 0;
 303         auto outDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIndex].desc);
 304         if (!outDesc)
 305             continue;
 306         if (formatFrequency.find(outDesc.getFormat()) != formatFrequency.end())
 307             formatFrequency[outDesc.getFormat()] += 1;
 308         else
 309             formatFrequency[outDesc.getFormat()] = 1;
 310     }
 311
 312     size_t maxCount = 0;
 313     mkldnn::memory::format convertTo = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
 314     for (auto &it : formatFrequency) {
 315         if (it.second > maxCount && !MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, it.first).blocksExtended()) {
 316             maxCount = it.second;
 317             convertTo = it.first;
 318         }
 319     }
 320
 321     if (canOptimize && MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, convertTo).blocksExtended())
 322         canOptimize = false;
 323     for (size_t i = 0; canOptimize && i < getChildEdges().size(); i++) {
 324         if (MKLDNNMemoryDesc(getChildEdgeAt(i)->getDims(), outputDataType, convertTo).blocksExtended())
 325             canOptimize = false;
 326     }
 327
 328     if (canOptimize) {
 329         for (auto supportedPdIndex : canSelectPrimitive) {
 330             if (MKLDNNMemoryDesc(supportedPrimitiveDescriptors[supportedPdIndex].getConfig().inConfs[0].desc).getFormat() == convertTo) {
 331                 selectPrimitiveDescriptorByIndex(static_cast<int>(supportedPdIndex));
 332                 return;
 333             }
 334         }
 335     }
 336
 337     for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
 338         auto &primDescInfo = supportedPrimitiveDescriptors[i];
 339         if (primDescInfo.getImplementationType() == impl_desc_type::unknown)
 340             continue;
 341         if (convertTo == MKLDNNMemoryDesc(supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc).getFormat()) {
 342             size_t num = 0;
 343             for (num = 0; num < getParentEdges().size(); num++) {
 344                 if (MKLDNNMemoryDesc(getParentEdgeAt(num)->getDims(), inputDataType, convertTo).blocksExtended())
 345                     break;
 346             }
 347             if (num == getParentEdges().size()) {
 348                 selectPrimitiveDescriptorByIndex(i);
 349                 return;
 350             }
 351         }
 352     }
 353
 354     selectPrimitiveDescriptorByIndex(0);
 355 }
 356
 357 bool MKLDNNSplitNode::isOptimized() {
 358     return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].inPlace >= 0;
 359 }
 360
 361 void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
 362     if (!isOptimized()) {
 363         MKLDNNNode::initOptimalPrimitiveDescriptor();
 364         return;
 365     }
 366
 367     auto config = getSelectedPrimitiveDescriptor()->getConfig();
 368     if (isInitConfig(config))
 369         return;
 370
 371     for (size_t i = 0; i < config.inConfs.size(); i++) {
 372         if (config.inConfs[i].desc.getLayout() == InferenceEngine::Layout::ANY ||
 373             !isUninitTensorDesc(config.inConfs[i].desc))
 374             continue;
 375
 376         int num = getParentEdgeAt(i)->getOutputNum();
 377         if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) {
 378             if (num >= 0) {
 379                 if (isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) &&
 380                         getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].inPlace >= 0)
 381                     getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor();
 382                 if (!isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) &&
 383                     MKLDNNExtensionUtils::initTensorsAreEqual(
 384                             getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc,
 385                             config.inConfs[i].desc)) {
 386                     config.inConfs[i].desc = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc;
 387                     continue;
 388                 }
 389             }
 390         }
 391         config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(),
 392                                                               config.inConfs[i].desc.getDims(), {
 393                                                                       config.inConfs[i].desc.getBlockingDesc().getBlockDims(),
 394                                                                       config.inConfs[i].desc.getBlockingDesc().getOrder()
 395                                                               });
 396     }
 397     const auto& cnnLayer = getCnnLayer();
 398     if (!cnnLayer)
 399         THROW_IE_EXCEPTION << "Cannot create Split layer " << getName() << " without CNNLayer!";
 400     if (config.outConfs.size() != outDims.size())
 401         THROW_IE_EXCEPTION << "Invalid config for Split layer " << getName();
 402     size_t offset = 0;
 403     for (size_t i = 0; i < cnnLayer->outData.size(); i++) {
 404         size_t confNum = i;
 405         config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(),
 406                                                               config.outConfs[i].desc.getDims(), {
 407                                                                       config.outConfs[i].desc.getBlockingDesc().getBlockDims(),
 408                                                                       config.outConfs[i].desc.getBlockingDesc().getOrder(),
 409                                                                       config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset,
 410                                                                       config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(),
 411                                                                       config.inConfs[0].desc.getBlockingDesc().getStrides()
 412                                                               });
 413         size_t axisSize = 1;
 414         for (size_t j = axis; j < config.outConfs[confNum].desc.getBlockingDesc().getBlockDims().size(); j++) {
 415             axisSize *= config.outConfs[confNum].desc.getBlockingDesc().getBlockDims()[j];
 416         }
 417         offset += axisSize;
 418     }
 419     initDescriptor(config);
 420 }
 421
 422 void MKLDNNSplitNode::setDynamicBatchLim(int lim) {
 423     if (axis == 0)
 424         THROW_IE_EXCEPTION << "Dynamic batch is not supported by split layer with axis == 0 parameter";
 425
 426     dynBatchLim = lim;
 427     if (prim) {
 428         prim.setBatchLimit(batchToProcess(), getParentEdges().size(), getChildEdges().size());
 429     }
 430 }