inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "mkldnn_infer_request.h"
   6 #include "mkldnn_extension_utils.h"
   7 #include "mkldnn_streams.h"
   8 #include <vector>
   9 #include <string>
  10 #include <map>
  11 #include <blob_factory.hpp>
  12 #include <nodes/mkldnn_concat_node.h>
  13 #include <nodes/mkldnn_split_node.h>
  14
  15 MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs,
  16                                                      InferenceEngine::OutputsDataMap networkOutputs)
  17         : InferRequestInternal(networkInputs, networkOutputs) {}
  18
  19
  20 template <typename T> void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob) {
  21     InferenceEngine::TBlob<T> *in_f = dynamic_cast<InferenceEngine::TBlob<T> *>(inputBlob.get());
  22
  23     if (in_f == nullptr) {
  24         THROW_IE_EXCEPTION << "Input data precision not supported. Expected float.";
  25     }
  26
  27     if (in_f->readOnly() == nullptr) {
  28         THROW_IE_EXCEPTION << "Input data was not allocated.";
  29     }
  30
  31     graph->PushInputData(inputName, inputBlob);
  32 }
  33
  34 void MKLDNNPlugin::MKLDNNInferRequest::InferImpl() {
  35     IE_PROFILING_AUTO_SCOPE(MKLDNN_INFER)
  36     if (!graph || !graph->IsReady()) {
  37         THROW_IE_EXCEPTION << "Network not loaded.";
  38     }
  39     auto infer = [this] {
  40         // execute input pre-processing.
  41         execDataPreprocessing(_inputs);
  42
  43         changeDefaultPtr();
  44         // need to retain converted blobs until infer finish
  45         std::vector<InferenceEngine::Blob::Ptr> convertedInputs;
  46         for (auto input : _inputs) {
  47             if (!_networkInputs[input.first]) {
  48                 THROW_IE_EXCEPTION <<
  49                                    "input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name "
  50                                    << input.first;
  51             }
  52             /*if (_networkInputs[input.first]->getInputPrecision() != input.second->precision()) {
  53                 THROW_IE_EXCEPTION << "Different input precision for input " << input.first
  54                                    << " registered in IInferencePlugin::LoadNetwork network and IInferencePlugin::Infer. "
  55                                    << _networkInputs[input.first]->getInputPrecision() << " vs "
  56                                    << input.second->precision();
  57             }*/
  58
  59
  60
  61             InferenceEngine::Blob::Ptr iconv;
  62             InferenceEngine::TBlob<float> *in_f = nullptr;
  63             switch (input.second->precision()) {
  64                 case InferenceEngine::Precision::FP32:
  65                     pushInput<float>(input.first, input.second);
  66                     break;
  67                 case InferenceEngine::Precision::I32:
  68                     pushInput<int32_t>(input.first, input.second);
  69                     break;
  70                 case InferenceEngine::Precision::I8:
  71                     pushInput<int8_t>(input.first, input.second);
  72                     break;
  73                 case InferenceEngine::Precision::U16:
  74                     // U16 is unsupported by mkldnn, so here we convert the blob and send FP32
  75                     iconv = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
  76                             InferenceEngine::Precision::FP32,
  77                             input.second->getTensorDesc().getLayout(), input.second->dims());
  78                     convertedInputs.push_back(iconv);
  79                     iconv->allocate();
  80                     in_f = dynamic_cast<InferenceEngine::TBlob<float> *>(iconv.get());
  81                     InferenceEngine::copyToFloat<uint16_t>(in_f->data(), input.second.get());
  82                     pushInput<float>(input.first, iconv);
  83                     break;
  84                 case InferenceEngine::Precision::I16:
  85                     if (graph->hasMeanImageFor(input.first)) {
  86                         // If a mean image exists, we convert the blob and send FP32
  87                         iconv = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
  88                                 InferenceEngine::Precision::FP32,
  89                                 input.second->getTensorDesc().getLayout(), input.second->dims());
  90                         convertedInputs.push_back(iconv);
  91                         iconv->allocate();
  92                         in_f = dynamic_cast<InferenceEngine::TBlob<float> *>(iconv.get());
  93                         InferenceEngine::copyToFloat<int16_t>(in_f->data(), input.second.get());
  94                         pushInput<float>(input.first, iconv);
  95                     } else {
  96                         // Instead we can send I16 directly
  97                         pushInput<int16_t>(input.first, input.second);
  98                     }
  99                     break;
 100                 case InferenceEngine::Precision::U8:
 101                     if (graph->hasMeanImageFor(input.first)) {
 102                         // If a mean image exists, we convert the blob and send FP32
 103                         iconv = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
 104                                 InferenceEngine::Precision::FP32,
 105                                 input.second->getTensorDesc().getLayout(), input.second->dims());
 106                         convertedInputs.push_back(iconv);
 107                         iconv->allocate();
 108                         in_f = dynamic_cast<InferenceEngine::TBlob<float> *>(iconv.get());
 109                         InferenceEngine::copyToFloat<uint8_t>(in_f->data(), input.second.get());
 110                         pushInput<float>(input.first, iconv);
 111                     } else {
 112                         // Instead we can send I8 directly
 113                         pushInput<uint8_t>(input.first, input.second);
 114                     }
 115                     break;
 116                 default:
 117                     THROW_IE_EXCEPTION << "Unsupported input precision " << input.second->precision();
 118             }
 119         }
 120         graph->Infer(m_curBatch);
 121         graph->PullOutputData(_outputs);
 122     };
 123 #if IE_THREAD == IE_THREAD_TBB
 124     auto_scope_observing observer(graph->ptrObserver);
 125     // a TBB arena is made "this" for Infer call via executing lambda for the arena
 126     graph->ptrArena->execute([&] { infer(); });
 127 #else
 128     infer();
 129 #endif
 130 }
 131
 132 void MKLDNNPlugin::MKLDNNInferRequest::GetPerformanceCounts(
 133         std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
 134     if (!graph || !graph->IsReady())
 135         THROW_IE_EXCEPTION << "Graph is not ready!";
 136     graph->GetPerfData(perfMap);
 137 }
 138
 139 void MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const char *name, InferenceEngine::Blob::Ptr &data) {
 140     if (!graph || !graph->IsReady())
 141         THROW_IE_EXCEPTION << "Graph is not ready!";
 142
 143     InferenceEngine::BlobMap blobs;
 144     graph->getInputBlobs(blobs);
 145
 146     if (blobs.find(name) != blobs.end()) {
 147         // ROI blob is returned only if it was set previously.
 148         auto it = _preProcData.find(name);
 149         if (it != _preProcData.end()) {
 150             data = it->second.getRoiBlob();
 151             return;
 152         }
 153
 154         if (_inputs.find(name) != _inputs.end()) {
 155             data = _inputs[name];
 156             checkBlob(data, name, true);
 157             return;
 158         }
 159
 160         InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc();
 161         InferenceEngine::Precision originPrecision = blobs[name]->getTensorDesc().getPrecision();
 162         if (_networkInputs.find(name) != _networkInputs.end()) {
 163             InferenceEngine::Layout l = _networkInputs[name]->getLayout();
 164             InferenceEngine::Precision p = _networkInputs[name]->getPrecision();
 165             InferenceEngine::SizeVector dims = _networkInputs[name]->getTensorDesc().getDims();
 166
 167             desc = InferenceEngine::TensorDesc(p, dims, l);
 168         }
 169
 170         _inputs[name] = make_blob_with_precision(desc);
 171         _inputs[name]->allocate();
 172         if (desc.getPrecision() == originPrecision &&
 173                 graph->_meanImages.find(name) == graph->_meanImages.end() && !graph->getProperty().batchLimit) {
 174             externalPtr[name] = _inputs[name]->buffer();
 175         }
 176         data = _inputs[name];
 177         checkBlob(data, name, true);
 178         return;
 179     }
 180     blobs.clear();
 181     graph->getOutputBlobs(blobs);
 182
 183     if (blobs.find(name) != blobs.end()) {
 184         if (_outputs.find(name) != _outputs.end()) {
 185             data = _outputs[name];
 186             checkBlob(data, name, false);
 187             return;
 188         }
 189
 190         _outputs[name] = make_blob_with_precision(blobs[name]->getTensorDesc());
 191         _outputs[name]->allocate();
 192         if (blobs[name]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
 193                 !graph->getProperty().batchLimit) {
 194             externalPtr[name] = _outputs[name]->buffer();
 195         }
 196         data = _outputs[name];
 197         checkBlob(data, name, false);
 198         return;
 199     }
 200     THROW_IE_EXCEPTION << "Cannot find blob with name: " << name;
 201 }
 202
 203 void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const char *name, const InferenceEngine::Blob::Ptr &data) {
 204     if (!data)
 205         THROW_IE_EXCEPTION << NOT_ALLOCATED_str << "Failed to set empty blob with name: \'" << name << "\'";
 206     if (data->buffer() == nullptr)
 207         THROW_IE_EXCEPTION << "Input data was not allocated. Input name: \'" << name << "\'";
 208     if (name == nullptr) {
 209         THROW_IE_EXCEPTION << NOT_FOUND_str + "Failed to set blob with empty name";
 210     }
 211     InferenceEngine::InputInfo::Ptr foundInput;
 212     InferenceEngine::DataPtr foundOutput;
 213     size_t dataSize = data->size();
 214     if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
 215         if (foundInput->getInputPrecision() != data->precision()) {
 216             THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Failed to set Blob with precision "
 217                                << data->precision();
 218         }
 219
 220         if (foundInput->getPreProcess().getResizeAlgorithm() != InferenceEngine::ResizeAlgorithm::NO_RESIZE) {
 221             PreProcessData::isApplicable(data, _inputs[name]);
 222             // Stores the given blob as ROI blob. It will be used to fill in network input during pre-processing.
 223             _preProcData[name].setRoiBlob(data);
 224         } else {
 225             size_t inputSize = InferenceEngine::details::product(foundInput->getDims());
 226             if (dataSize != inputSize) {
 227                 THROW_IE_EXCEPTION << "Input blob size is not equal network input size ("
 228                                    << dataSize << "!=" << inputSize << ").";
 229             }
 230
 231             if (data->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
 232                 graph->_meanImages.find(name) == graph->_meanImages.end() && !graph->getProperty().batchLimit) {
 233                 externalPtr[name] = data->buffer();
 234             } else if (externalPtr.find(name) != externalPtr.end()) {
 235                 externalPtr.erase(name);
 236             }
 237             _inputs[name] = data;
 238         }
 239     } else {
 240         size_t outputSize = InferenceEngine::details::product(foundOutput->getDims());
 241         if (dataSize != outputSize) {
 242             THROW_IE_EXCEPTION << "Output blob size is not equal network output size ("
 243                                << dataSize << "!=" << outputSize << ").";
 244         }
 245         if (foundOutput->getPrecision() != data->precision()) {
 246             THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str
 247                                << "Failed to set Blob with precision not corresponding to user output precision";
 248         }
 249         if (data->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
 250                 !graph->getProperty().batchLimit) {
 251             externalPtr[name] = data->buffer();
 252         } else if (externalPtr.find(name) != externalPtr.end()) {
 253             externalPtr.erase(name);
 254         }
 255         _outputs[name] = data;
 256     }
 257 }
 258
 259 static inline void changeEdgePtr(MKLDNNPlugin::MKLDNNEdgePtr edge, void *newPtr) {
 260     edge->getMemory().GetPrimitivePtr()->set_data_handle(newPtr);
 261 }
 262
 263 void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
 264     for (auto& it : externalPtr) {
 265         auto input = graph->inputNodes.find(it.first);
 266         if (input != graph->inputNodes.end()) {
 267             if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
 268                 continue;
 269             // Input cannot be in-place with other primitives
 270             bool canBeInPlace = true;
 271             for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
 272                 auto& child = input->second->getChildEdgeAt(i)->getChild();
 273                 if (child->isConstant())
 274                     canBeInPlace = false;
 275                 auto* concat = dynamic_cast<MKLDNNConcatNode *>(child.get());
 276                 if (canBeInPlace && concat && concat->isOptimized())
 277                     canBeInPlace = false;
 278                 // Cannot be in-place before split because split is using different ptrs without offsets
 279                 auto* split = dynamic_cast<MKLDNNSplitNode *>(child.get());
 280                 if (canBeInPlace && split)
 281                     canBeInPlace = false;
 282
 283                 if (child->isInplace())
 284                     canBeInPlace = false;
 285                 for (size_t j = 0; canBeInPlace && j < child->getChildEdges().size(); j++) {
 286                     if (child->getChildEdgeAt(j)->getMemory().GetPrimitive().get_data_handle() ==
 287                             input->second->getChildEdgeAt(i)->getMemory().GetPrimitive().get_data_handle())
 288                         canBeInPlace = false;
 289                 }
 290             }
 291             for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
 292                 changeEdgePtr(input->second->getChildEdgeAt(i), it.second);
 293             }
 294             continue;
 295         }
 296
 297         MKLDNNNodePtr output;
 298         for (auto& out : graph->outputNodes) {
 299             if (out->getName() == "out_" + it.first) {
 300                 output = out;
 301                 break;
 302             }
 303         }
 304         if (output) {
 305             if (output->getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
 306                 continue;
 307             bool canBeInPlace = true;
 308             void * defaultPtr = output->getParentEdgeAt(0)->getMemory().GetPrimitivePtr()->get_data_handle();
 309             // Cannot be in-place after concat because concat is using different ptrs without offsets
 310             auto parent = output->getParentEdgeAt(0)->getParent();
 311             MKLDNNNodePtr previousParent;
 312             do {
 313                 previousParent = parent;
 314                 if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInplace()) {
 315                     canBeInPlace = false;
 316                     break;
 317                 }
 318
 319                 for (size_t i = 0; i < parent->getParentEdges().size(); i++) {
 320                     if (parent->getParentEdgeAt(i)->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
 321                         parent = parent->getParentEdgeAt(i)->getParent();
 322                         break;
 323                     }
 324                 }
 325             } while (previousParent != parent);
 326             if (canBeInPlace)
 327                 changeEdgePtr(output->getParentEdgeAt(0), it.second);
 328             continue;
 329         }
 330         THROW_IE_EXCEPTION << "Cannot find input/output blob: " << it.first;
 331     }
 332 }
 333
 334 void MKLDNNPlugin::MKLDNNInferRequest::SetGraph(const MKLDNNPlugin::MKLDNNGraph::Ptr &graph) {
 335     this->graph = graph;
 336
 337     InferenceEngine::BlobMap blobs;
 338     this->graph->getInputBlobs(blobs);
 339     for (const auto& it : blobs) {
 340         InferenceEngine::Blob::Ptr blob;
 341         GetBlob(it.first.c_str(), blob);
 342     }
 343     blobs.clear();
 344     this->graph->getOutputBlobs(blobs);
 345     for (const auto& it : blobs) {
 346         InferenceEngine::Blob::Ptr blob;
 347         GetBlob(it.first.c_str(), blob);
 348     }
 349 }
 350
 351 void MKLDNNPlugin::MKLDNNInferRequest::SetBatch(int new_batch) {
 352     if (!graph->getProperty().enableDynamicBatch)
 353         THROW_IE_EXCEPTION << "Dynamic batch is not enabled.";
 354
 355     if (new_batch < 1 || new_batch > graph->getProperty().batchLimit) {
 356         THROW_IE_EXCEPTION << "Invalid dynamic batch size " << new_batch <<
 357             " for this request.";
 358     }
 359
 360     m_curBatch = new_batch;
 361 }