Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / mkldnn_plugin / mkldnn_infer_request.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "mkldnn_infer_request.h"
6 #include "mkldnn_extension_utils.h"
7 #include "mkldnn_streams.h"
8 #include <vector>
9 #include <string>
10 #include <map>
11 #include <blob_factory.hpp>
12 #include <nodes/mkldnn_concat_node.h>
13 #include <nodes/mkldnn_split_node.h>
14
15 MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs,
16                                                      InferenceEngine::OutputsDataMap networkOutputs)
17         : InferRequestInternal(networkInputs, networkOutputs) {}
18
19
20 template <typename T> void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob) {
21     InferenceEngine::TBlob<T> *in_f = dynamic_cast<InferenceEngine::TBlob<T> *>(inputBlob.get());
22
23     if (in_f == nullptr) {
24         THROW_IE_EXCEPTION << "Input data precision not supported. Expected float.";
25     }
26
27     if (in_f->readOnly() == nullptr) {
28         THROW_IE_EXCEPTION << "Input data was not allocated.";
29     }
30
31     graph->PushInputData(inputName, inputBlob);
32 }
33
34 void MKLDNNPlugin::MKLDNNInferRequest::InferImpl() {
35     IE_PROFILING_AUTO_SCOPE(MKLDNN_INFER)
36     if (!graph || !graph->IsReady()) {
37         THROW_IE_EXCEPTION << "Network not loaded.";
38     }
39     auto infer = [this] {
40         // execute input pre-processing.
41         execDataPreprocessing(_inputs);
42
43         changeDefaultPtr();
44         // need to retain converted blobs until infer finish
45         std::vector<InferenceEngine::Blob::Ptr> convertedInputs;
46         for (auto input : _inputs) {
47             if (!_networkInputs[input.first]) {
48                 THROW_IE_EXCEPTION <<
49                                    "input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name "
50                                    << input.first;
51             }
52             /*if (_networkInputs[input.first]->getInputPrecision() != input.second->precision()) {
53                 THROW_IE_EXCEPTION << "Different input precision for input " << input.first
54                                    << " registered in IInferencePlugin::LoadNetwork network and IInferencePlugin::Infer. "
55                                    << _networkInputs[input.first]->getInputPrecision() << " vs "
56                                    << input.second->precision();
57             }*/
58
59
60
61             InferenceEngine::Blob::Ptr iconv;
62             InferenceEngine::TBlob<float> *in_f = nullptr;
63             switch (input.second->precision()) {
64                 case InferenceEngine::Precision::FP32:
65                     pushInput<float>(input.first, input.second);
66                     break;
67                 case InferenceEngine::Precision::I32:
68                     pushInput<int32_t>(input.first, input.second);
69                     break;
70                 case InferenceEngine::Precision::I8:
71                     pushInput<int8_t>(input.first, input.second);
72                     break;
73                 case InferenceEngine::Precision::U16:
74                     // U16 is unsupported by mkldnn, so here we convert the blob and send FP32
75                     iconv = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
76                             InferenceEngine::Precision::FP32,
77                             input.second->getTensorDesc().getLayout(), input.second->dims());
78                     convertedInputs.push_back(iconv);
79                     iconv->allocate();
80                     in_f = dynamic_cast<InferenceEngine::TBlob<float> *>(iconv.get());
81                     InferenceEngine::copyToFloat<uint16_t>(in_f->data(), input.second.get());
82                     pushInput<float>(input.first, iconv);
83                     break;
84                 case InferenceEngine::Precision::I16:
85                     if (graph->hasMeanImageFor(input.first)) {
86                         // If a mean image exists, we convert the blob and send FP32
87                         iconv = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
88                                 InferenceEngine::Precision::FP32,
89                                 input.second->getTensorDesc().getLayout(), input.second->dims());
90                         convertedInputs.push_back(iconv);
91                         iconv->allocate();
92                         in_f = dynamic_cast<InferenceEngine::TBlob<float> *>(iconv.get());
93                         InferenceEngine::copyToFloat<int16_t>(in_f->data(), input.second.get());
94                         pushInput<float>(input.first, iconv);
95                     } else {
96                         // Instead we can send I16 directly
97                         pushInput<int16_t>(input.first, input.second);
98                     }
99                     break;
100                 case InferenceEngine::Precision::U8:
101                     if (graph->hasMeanImageFor(input.first)) {
102                         // If a mean image exists, we convert the blob and send FP32
103                         iconv = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
104                                 InferenceEngine::Precision::FP32,
105                                 input.second->getTensorDesc().getLayout(), input.second->dims());
106                         convertedInputs.push_back(iconv);
107                         iconv->allocate();
108                         in_f = dynamic_cast<InferenceEngine::TBlob<float> *>(iconv.get());
109                         InferenceEngine::copyToFloat<uint8_t>(in_f->data(), input.second.get());
110                         pushInput<float>(input.first, iconv);
111                     } else {
112                         // Instead we can send I8 directly
113                         pushInput<uint8_t>(input.first, input.second);
114                     }
115                     break;
116                 default:
117                     THROW_IE_EXCEPTION << "Unsupported input precision " << input.second->precision();
118             }
119         }
120         graph->Infer(m_curBatch);
121         graph->PullOutputData(_outputs);
122     };
123 #if IE_THREAD == IE_THREAD_TBB
124     auto_scope_observing observer(graph->ptrObserver);
125     // a TBB arena is made "this" for Infer call via executing lambda for the arena
126     graph->ptrArena->execute([&] { infer(); });
127 #else
128     infer();
129 #endif
130 }
131
132 void MKLDNNPlugin::MKLDNNInferRequest::GetPerformanceCounts(
133         std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
134     if (!graph || !graph->IsReady())
135         THROW_IE_EXCEPTION << "Graph is not ready!";
136     graph->GetPerfData(perfMap);
137 }
138
139 void MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const char *name, InferenceEngine::Blob::Ptr &data) {
140     if (!graph || !graph->IsReady())
141         THROW_IE_EXCEPTION << "Graph is not ready!";
142
143     InferenceEngine::BlobMap blobs;
144     graph->getInputBlobs(blobs);
145
146     if (blobs.find(name) != blobs.end()) {
147         // ROI blob is returned only if it was set previously.
148         auto it = _preProcData.find(name);
149         if (it != _preProcData.end()) {
150             data = it->second.getRoiBlob();
151             return;
152         }
153
154         if (_inputs.find(name) != _inputs.end()) {
155             data = _inputs[name];
156             checkBlob(data, name, true);
157             return;
158         }
159
160         InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc();
161         InferenceEngine::Precision originPrecision = blobs[name]->getTensorDesc().getPrecision();
162         if (_networkInputs.find(name) != _networkInputs.end()) {
163             InferenceEngine::Layout l = _networkInputs[name]->getLayout();
164             InferenceEngine::Precision p = _networkInputs[name]->getPrecision();
165             InferenceEngine::SizeVector dims = _networkInputs[name]->getTensorDesc().getDims();
166
167             desc = InferenceEngine::TensorDesc(p, dims, l);
168         }
169
170         _inputs[name] = make_blob_with_precision(desc);
171         _inputs[name]->allocate();
172         if (desc.getPrecision() == originPrecision &&
173                 graph->_meanImages.find(name) == graph->_meanImages.end() && !graph->getProperty().batchLimit) {
174             externalPtr[name] = _inputs[name]->buffer();
175         }
176         data = _inputs[name];
177         checkBlob(data, name, true);
178         return;
179     }
180     blobs.clear();
181     graph->getOutputBlobs(blobs);
182
183     if (blobs.find(name) != blobs.end()) {
184         if (_outputs.find(name) != _outputs.end()) {
185             data = _outputs[name];
186             checkBlob(data, name, false);
187             return;
188         }
189
190         _outputs[name] = make_blob_with_precision(blobs[name]->getTensorDesc());
191         _outputs[name]->allocate();
192         if (blobs[name]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
193                 !graph->getProperty().batchLimit) {
194             externalPtr[name] = _outputs[name]->buffer();
195         }
196         data = _outputs[name];
197         checkBlob(data, name, false);
198         return;
199     }
200     THROW_IE_EXCEPTION << "Cannot find blob with name: " << name;
201 }
202
203 void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const char *name, const InferenceEngine::Blob::Ptr &data) {
204     if (!data)
205         THROW_IE_EXCEPTION << NOT_ALLOCATED_str << "Failed to set empty blob with name: \'" << name << "\'";
206     if (data->buffer() == nullptr)
207         THROW_IE_EXCEPTION << "Input data was not allocated. Input name: \'" << name << "\'";
208     if (name == nullptr) {
209         THROW_IE_EXCEPTION << NOT_FOUND_str + "Failed to set blob with empty name";
210     }
211     InferenceEngine::InputInfo::Ptr foundInput;
212     InferenceEngine::DataPtr foundOutput;
213     size_t dataSize = data->size();
214     if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
215         if (foundInput->getInputPrecision() != data->precision()) {
216             THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Failed to set Blob with precision "
217                                << data->precision();
218         }
219
220         if (foundInput->getPreProcess().getResizeAlgorithm() != InferenceEngine::ResizeAlgorithm::NO_RESIZE) {
221             PreProcessData::isApplicable(data, _inputs[name]);
222             // Stores the given blob as ROI blob. It will be used to fill in network input during pre-processing.
223             _preProcData[name].setRoiBlob(data);
224         } else {
225             size_t inputSize = InferenceEngine::details::product(foundInput->getDims());
226             if (dataSize != inputSize) {
227                 THROW_IE_EXCEPTION << "Input blob size is not equal network input size ("
228                                    << dataSize << "!=" << inputSize << ").";
229             }
230
231             if (data->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
232                 graph->_meanImages.find(name) == graph->_meanImages.end() && !graph->getProperty().batchLimit) {
233                 externalPtr[name] = data->buffer();
234             } else if (externalPtr.find(name) != externalPtr.end()) {
235                 externalPtr.erase(name);
236             }
237             _inputs[name] = data;
238         }
239     } else {
240         size_t outputSize = InferenceEngine::details::product(foundOutput->getDims());
241         if (dataSize != outputSize) {
242             THROW_IE_EXCEPTION << "Output blob size is not equal network output size ("
243                                << dataSize << "!=" << outputSize << ").";
244         }
245         if (foundOutput->getPrecision() != data->precision()) {
246             THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str
247                                << "Failed to set Blob with precision not corresponding to user output precision";
248         }
249         if (data->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
250                 !graph->getProperty().batchLimit) {
251             externalPtr[name] = data->buffer();
252         } else if (externalPtr.find(name) != externalPtr.end()) {
253             externalPtr.erase(name);
254         }
255         _outputs[name] = data;
256     }
257 }
258
259 static inline void changeEdgePtr(MKLDNNPlugin::MKLDNNEdgePtr edge, void *newPtr) {
260     edge->getMemory().GetPrimitivePtr()->set_data_handle(newPtr);
261 }
262
263 void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
264     for (auto& it : externalPtr) {
265         auto input = graph->inputNodes.find(it.first);
266         if (input != graph->inputNodes.end()) {
267             if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
268                 continue;
269             // Input cannot be in-place with other primitives
270             bool canBeInPlace = true;
271             for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
272                 auto& child = input->second->getChildEdgeAt(i)->getChild();
273                 if (child->isConstant())
274                     canBeInPlace = false;
275                 auto* concat = dynamic_cast<MKLDNNConcatNode *>(child.get());
276                 if (canBeInPlace && concat && concat->isOptimized())
277                     canBeInPlace = false;
278                 // Cannot be in-place before split because split is using different ptrs without offsets
279                 auto* split = dynamic_cast<MKLDNNSplitNode *>(child.get());
280                 if (canBeInPlace && split)
281                     canBeInPlace = false;
282
283                 if (child->isInplace())
284                     canBeInPlace = false;
285                 for (size_t j = 0; canBeInPlace && j < child->getChildEdges().size(); j++) {
286                     if (child->getChildEdgeAt(j)->getMemory().GetPrimitive().get_data_handle() ==
287                             input->second->getChildEdgeAt(i)->getMemory().GetPrimitive().get_data_handle())
288                         canBeInPlace = false;
289                 }
290             }
291             for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
292                 changeEdgePtr(input->second->getChildEdgeAt(i), it.second);
293             }
294             continue;
295         }
296
297         MKLDNNNodePtr output;
298         for (auto& out : graph->outputNodes) {
299             if (out->getName() == "out_" + it.first) {
300                 output = out;
301                 break;
302             }
303         }
304         if (output) {
305             if (output->getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
306                 continue;
307             bool canBeInPlace = true;
308             void * defaultPtr = output->getParentEdgeAt(0)->getMemory().GetPrimitivePtr()->get_data_handle();
309             // Cannot be in-place after concat because concat is using different ptrs without offsets
310             auto parent = output->getParentEdgeAt(0)->getParent();
311             MKLDNNNodePtr previousParent;
312             do {
313                 previousParent = parent;
314                 if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInplace()) {
315                     canBeInPlace = false;
316                     break;
317                 }
318
319                 for (size_t i = 0; i < parent->getParentEdges().size(); i++) {
320                     if (parent->getParentEdgeAt(i)->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
321                         parent = parent->getParentEdgeAt(i)->getParent();
322                         break;
323                     }
324                 }
325             } while (previousParent != parent);
326             if (canBeInPlace)
327                 changeEdgePtr(output->getParentEdgeAt(0), it.second);
328             continue;
329         }
330         THROW_IE_EXCEPTION << "Cannot find input/output blob: " << it.first;
331     }
332 }
333
334 void MKLDNNPlugin::MKLDNNInferRequest::SetGraph(const MKLDNNPlugin::MKLDNNGraph::Ptr &graph) {
335     this->graph = graph;
336
337     InferenceEngine::BlobMap blobs;
338     this->graph->getInputBlobs(blobs);
339     for (const auto& it : blobs) {
340         InferenceEngine::Blob::Ptr blob;
341         GetBlob(it.first.c_str(), blob);
342     }
343     blobs.clear();
344     this->graph->getOutputBlobs(blobs);
345     for (const auto& it : blobs) {
346         InferenceEngine::Blob::Ptr blob;
347         GetBlob(it.first.c_str(), blob);
348     }
349 }
350
351 void MKLDNNPlugin::MKLDNNInferRequest::SetBatch(int new_batch) {
352     if (!graph->getProperty().enableDynamicBatch)
353         THROW_IE_EXCEPTION << "Dynamic batch is not enabled.";
354
355     if (new_batch < 1 || new_batch > graph->getProperty().batchLimit) {
356         THROW_IE_EXCEPTION << "Invalid dynamic batch size " << new_batch <<
357             " for this request.";
358     }
359
360     m_curBatch = new_batch;
361 }