1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "mkldnn_infer_request.h"
6 #include "mkldnn_extension_utils.h"
7 #include "mkldnn_streams.h"
11 #include <blob_factory.hpp>
12 #include <nodes/mkldnn_concat_node.h>
13 #include <nodes/mkldnn_split_node.h>
15 MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs,
16 InferenceEngine::OutputsDataMap networkOutputs)
17 : InferRequestInternal(networkInputs, networkOutputs) {}
20 template <typename T> void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob) {
21 InferenceEngine::TBlob<T> *in_f = dynamic_cast<InferenceEngine::TBlob<T> *>(inputBlob.get());
23 if (in_f == nullptr) {
24 THROW_IE_EXCEPTION << "Input data precision not supported. Expected float.";
27 if (in_f->readOnly() == nullptr) {
28 THROW_IE_EXCEPTION << "Input data was not allocated.";
31 graph->PushInputData(inputName, inputBlob);
34 void MKLDNNPlugin::MKLDNNInferRequest::InferImpl() {
35 IE_PROFILING_AUTO_SCOPE(MKLDNN_INFER)
36 if (!graph || !graph->IsReady()) {
37 THROW_IE_EXCEPTION << "Network not loaded.";
40 // execute input pre-processing.
41 execDataPreprocessing(_inputs);
44 // need to retain converted blobs until infer finish
45 std::vector<InferenceEngine::Blob::Ptr> convertedInputs;
46 for (auto input : _inputs) {
47 if (!_networkInputs[input.first]) {
49 "input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name "
52 /*if (_networkInputs[input.first]->getInputPrecision() != input.second->precision()) {
53 THROW_IE_EXCEPTION << "Different input precision for input " << input.first
54 << " registered in IInferencePlugin::LoadNetwork network and IInferencePlugin::Infer. "
55 << _networkInputs[input.first]->getInputPrecision() << " vs "
56 << input.second->precision();
61 InferenceEngine::Blob::Ptr iconv;
62 InferenceEngine::TBlob<float> *in_f = nullptr;
63 switch (input.second->precision()) {
64 case InferenceEngine::Precision::FP32:
65 pushInput<float>(input.first, input.second);
67 case InferenceEngine::Precision::I32:
68 pushInput<int32_t>(input.first, input.second);
70 case InferenceEngine::Precision::I8:
71 pushInput<int8_t>(input.first, input.second);
73 case InferenceEngine::Precision::U16:
74 // U16 is unsupported by mkldnn, so here we convert the blob and send FP32
75 iconv = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
76 InferenceEngine::Precision::FP32,
77 input.second->getTensorDesc().getLayout(), input.second->dims());
78 convertedInputs.push_back(iconv);
80 in_f = dynamic_cast<InferenceEngine::TBlob<float> *>(iconv.get());
81 InferenceEngine::copyToFloat<uint16_t>(in_f->data(), input.second.get());
82 pushInput<float>(input.first, iconv);
84 case InferenceEngine::Precision::I16:
85 if (graph->hasMeanImageFor(input.first)) {
86 // If a mean image exists, we convert the blob and send FP32
87 iconv = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
88 InferenceEngine::Precision::FP32,
89 input.second->getTensorDesc().getLayout(), input.second->dims());
90 convertedInputs.push_back(iconv);
92 in_f = dynamic_cast<InferenceEngine::TBlob<float> *>(iconv.get());
93 InferenceEngine::copyToFloat<int16_t>(in_f->data(), input.second.get());
94 pushInput<float>(input.first, iconv);
96 // Instead we can send I16 directly
97 pushInput<int16_t>(input.first, input.second);
100 case InferenceEngine::Precision::U8:
101 if (graph->hasMeanImageFor(input.first)) {
102 // If a mean image exists, we convert the blob and send FP32
103 iconv = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
104 InferenceEngine::Precision::FP32,
105 input.second->getTensorDesc().getLayout(), input.second->dims());
106 convertedInputs.push_back(iconv);
108 in_f = dynamic_cast<InferenceEngine::TBlob<float> *>(iconv.get());
109 InferenceEngine::copyToFloat<uint8_t>(in_f->data(), input.second.get());
110 pushInput<float>(input.first, iconv);
112 // Instead we can send I8 directly
113 pushInput<uint8_t>(input.first, input.second);
117 THROW_IE_EXCEPTION << "Unsupported input precision " << input.second->precision();
120 graph->Infer(m_curBatch);
121 graph->PullOutputData(_outputs);
123 #if IE_THREAD == IE_THREAD_TBB
124 auto_scope_observing observer(graph->ptrObserver);
125 // a TBB arena is made "this" for Infer call via executing lambda for the arena
126 graph->ptrArena->execute([&] { infer(); });
132 void MKLDNNPlugin::MKLDNNInferRequest::GetPerformanceCounts(
133 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
134 if (!graph || !graph->IsReady())
135 THROW_IE_EXCEPTION << "Graph is not ready!";
136 graph->GetPerfData(perfMap);
139 void MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const char *name, InferenceEngine::Blob::Ptr &data) {
140 if (!graph || !graph->IsReady())
141 THROW_IE_EXCEPTION << "Graph is not ready!";
143 InferenceEngine::BlobMap blobs;
144 graph->getInputBlobs(blobs);
146 if (blobs.find(name) != blobs.end()) {
147 // ROI blob is returned only if it was set previously.
148 auto it = _preProcData.find(name);
149 if (it != _preProcData.end()) {
150 data = it->second.getRoiBlob();
154 if (_inputs.find(name) != _inputs.end()) {
155 data = _inputs[name];
156 checkBlob(data, name, true);
160 InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc();
161 InferenceEngine::Precision originPrecision = blobs[name]->getTensorDesc().getPrecision();
162 if (_networkInputs.find(name) != _networkInputs.end()) {
163 InferenceEngine::Layout l = _networkInputs[name]->getLayout();
164 InferenceEngine::Precision p = _networkInputs[name]->getPrecision();
165 InferenceEngine::SizeVector dims = _networkInputs[name]->getTensorDesc().getDims();
167 desc = InferenceEngine::TensorDesc(p, dims, l);
170 _inputs[name] = make_blob_with_precision(desc);
171 _inputs[name]->allocate();
172 if (desc.getPrecision() == originPrecision &&
173 graph->_meanImages.find(name) == graph->_meanImages.end() && !graph->getProperty().batchLimit) {
174 externalPtr[name] = _inputs[name]->buffer();
176 data = _inputs[name];
177 checkBlob(data, name, true);
181 graph->getOutputBlobs(blobs);
183 if (blobs.find(name) != blobs.end()) {
184 if (_outputs.find(name) != _outputs.end()) {
185 data = _outputs[name];
186 checkBlob(data, name, false);
190 _outputs[name] = make_blob_with_precision(blobs[name]->getTensorDesc());
191 _outputs[name]->allocate();
192 if (blobs[name]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
193 !graph->getProperty().batchLimit) {
194 externalPtr[name] = _outputs[name]->buffer();
196 data = _outputs[name];
197 checkBlob(data, name, false);
200 THROW_IE_EXCEPTION << "Cannot find blob with name: " << name;
203 void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const char *name, const InferenceEngine::Blob::Ptr &data) {
205 THROW_IE_EXCEPTION << NOT_ALLOCATED_str << "Failed to set empty blob with name: \'" << name << "\'";
206 if (data->buffer() == nullptr)
207 THROW_IE_EXCEPTION << "Input data was not allocated. Input name: \'" << name << "\'";
208 if (name == nullptr) {
209 THROW_IE_EXCEPTION << NOT_FOUND_str + "Failed to set blob with empty name";
211 InferenceEngine::InputInfo::Ptr foundInput;
212 InferenceEngine::DataPtr foundOutput;
213 size_t dataSize = data->size();
214 if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
215 if (foundInput->getInputPrecision() != data->precision()) {
216 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Failed to set Blob with precision "
217 << data->precision();
220 if (foundInput->getPreProcess().getResizeAlgorithm() != InferenceEngine::ResizeAlgorithm::NO_RESIZE) {
221 PreProcessData::isApplicable(data, _inputs[name]);
222 // Stores the given blob as ROI blob. It will be used to fill in network input during pre-processing.
223 _preProcData[name].setRoiBlob(data);
225 size_t inputSize = InferenceEngine::details::product(foundInput->getDims());
226 if (dataSize != inputSize) {
227 THROW_IE_EXCEPTION << "Input blob size is not equal network input size ("
228 << dataSize << "!=" << inputSize << ").";
231 if (data->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
232 graph->_meanImages.find(name) == graph->_meanImages.end() && !graph->getProperty().batchLimit) {
233 externalPtr[name] = data->buffer();
234 } else if (externalPtr.find(name) != externalPtr.end()) {
235 externalPtr.erase(name);
237 _inputs[name] = data;
240 size_t outputSize = InferenceEngine::details::product(foundOutput->getDims());
241 if (dataSize != outputSize) {
242 THROW_IE_EXCEPTION << "Output blob size is not equal network output size ("
243 << dataSize << "!=" << outputSize << ").";
245 if (foundOutput->getPrecision() != data->precision()) {
246 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str
247 << "Failed to set Blob with precision not corresponding to user output precision";
249 if (data->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
250 !graph->getProperty().batchLimit) {
251 externalPtr[name] = data->buffer();
252 } else if (externalPtr.find(name) != externalPtr.end()) {
253 externalPtr.erase(name);
255 _outputs[name] = data;
259 static inline void changeEdgePtr(MKLDNNPlugin::MKLDNNEdgePtr edge, void *newPtr) {
260 edge->getMemory().GetPrimitivePtr()->set_data_handle(newPtr);
263 void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
264 for (auto& it : externalPtr) {
265 auto input = graph->inputNodes.find(it.first);
266 if (input != graph->inputNodes.end()) {
267 if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
269 // Input cannot be in-place with other primitives
270 bool canBeInPlace = true;
271 for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
272 auto& child = input->second->getChildEdgeAt(i)->getChild();
273 if (child->isConstant())
274 canBeInPlace = false;
275 auto* concat = dynamic_cast<MKLDNNConcatNode *>(child.get());
276 if (canBeInPlace && concat && concat->isOptimized())
277 canBeInPlace = false;
278 // Cannot be in-place before split because split is using different ptrs without offsets
279 auto* split = dynamic_cast<MKLDNNSplitNode *>(child.get());
280 if (canBeInPlace && split)
281 canBeInPlace = false;
283 if (child->isInplace())
284 canBeInPlace = false;
285 for (size_t j = 0; canBeInPlace && j < child->getChildEdges().size(); j++) {
286 if (child->getChildEdgeAt(j)->getMemory().GetPrimitive().get_data_handle() ==
287 input->second->getChildEdgeAt(i)->getMemory().GetPrimitive().get_data_handle())
288 canBeInPlace = false;
291 for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
292 changeEdgePtr(input->second->getChildEdgeAt(i), it.second);
297 MKLDNNNodePtr output;
298 for (auto& out : graph->outputNodes) {
299 if (out->getName() == "out_" + it.first) {
305 if (output->getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
307 bool canBeInPlace = true;
308 void * defaultPtr = output->getParentEdgeAt(0)->getMemory().GetPrimitivePtr()->get_data_handle();
309 // Cannot be in-place after concat because concat is using different ptrs without offsets
310 auto parent = output->getParentEdgeAt(0)->getParent();
311 MKLDNNNodePtr previousParent;
313 previousParent = parent;
314 if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInplace()) {
315 canBeInPlace = false;
319 for (size_t i = 0; i < parent->getParentEdges().size(); i++) {
320 if (parent->getParentEdgeAt(i)->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
321 parent = parent->getParentEdgeAt(i)->getParent();
325 } while (previousParent != parent);
327 changeEdgePtr(output->getParentEdgeAt(0), it.second);
330 THROW_IE_EXCEPTION << "Cannot find input/output blob: " << it.first;
334 void MKLDNNPlugin::MKLDNNInferRequest::SetGraph(const MKLDNNPlugin::MKLDNNGraph::Ptr &graph) {
337 InferenceEngine::BlobMap blobs;
338 this->graph->getInputBlobs(blobs);
339 for (const auto& it : blobs) {
340 InferenceEngine::Blob::Ptr blob;
341 GetBlob(it.first.c_str(), blob);
344 this->graph->getOutputBlobs(blobs);
345 for (const auto& it : blobs) {
346 InferenceEngine::Blob::Ptr blob;
347 GetBlob(it.first.c_str(), blob);
351 void MKLDNNPlugin::MKLDNNInferRequest::SetBatch(int new_batch) {
352 if (!graph->getProperty().enableDynamicBatch)
353 THROW_IE_EXCEPTION << "Dynamic batch is not enabled.";
355 if (new_batch < 1 || new_batch > graph->getProperty().batchLimit) {
356 THROW_IE_EXCEPTION << "Invalid dynamic batch size " << new_batch <<
357 " for this request.";
360 m_curBatch = new_batch;