1 // Copyright (C) 2018-2019 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "ie_api_impl.hpp"
16 #include "hetero/hetero_plugin_config.hpp"
17 #include "ie_iinfer_request.hpp"
18 #include "details/ie_cnn_network_tools.h"
20 std::map<std::string, InferenceEngine::Precision> precision_map = {{"FP32", InferenceEngine::Precision::FP32},
21 {"FP16", InferenceEngine::Precision::FP16},
22 {"Q78", InferenceEngine::Precision::Q78},
23 {"I32", InferenceEngine::Precision::I32},
24 {"I16", InferenceEngine::Precision::I16},
25 {"I8", InferenceEngine::Precision::I8},
26 {"U16", InferenceEngine::Precision::U16},
27 {"U8", InferenceEngine::Precision::U8}};
29 std::map<std::string, InferenceEngine::Layout> layout_map = {{"ANY", InferenceEngine::Layout::ANY},
30 {"NCHW", InferenceEngine::Layout::NCHW},
31 {"NHWC", InferenceEngine::Layout::NHWC},
32 {"OIHW", InferenceEngine::Layout::OIHW},
33 {"C", InferenceEngine::Layout::C},
34 {"CHW", InferenceEngine::Layout::CHW},
35 {"HW", InferenceEngine::Layout::HW},
36 {"NC", InferenceEngine::Layout::NC},
37 {"CN", InferenceEngine::Layout::CN},
38 {"NCDHW", InferenceEngine::Layout::NCDHW},
39 {"BLOCKED", InferenceEngine::Layout::BLOCKED}};
40 #define stringify(name) # name
41 #define IE_CHECK_CALL(expr) { \
43 if (ret != InferenceEngine::StatusCode::OK) { \
44 THROW_IE_EXCEPTION << response.msg; \
49 InferenceEnginePython::IENetwork::IENetwork(const std::string &model, const std::string &weights) {
50 InferenceEngine::CNNNetReader net_reader;
51 net_reader.ReadNetwork(model);
52 net_reader.ReadWeights(weights);
53 name = net_reader.getName();
54 actual = net_reader.getNetwork();
55 batch_size = actual.getBatchSize();
58 void InferenceEnginePython::IENetwork::serialize(const std::string &path_to_xml, const std::string &path_to_bin) {
59 actual.serialize(path_to_xml, path_to_bin);
62 const std::vector<std::pair<std::string, InferenceEnginePython::IENetLayer>>
63 InferenceEnginePython::IENetwork::getLayers() {
64 std::vector<std::pair<std::string, InferenceEnginePython::IENetLayer>> result;
65 std::vector<InferenceEngine::CNNLayerPtr> sorted_layers = InferenceEngine::details::CNNNetSortTopologically(actual);
66 for (const auto &layer : sorted_layers) {
67 InferenceEnginePython::IENetLayer layer_info;
69 layer_info.layer_ptr = layer;
70 layer_info.network_ptr = actual;
71 layer_info.name = layer->name;
72 layer_info.type = layer->type;
73 layer_info.precision = layer->precision.name();
74 layer_info.params = layer->params;
75 layer_info.affinity = layer->affinity;
76 std::vector<std::string> parents;
77 for (const auto &i : layer->insData) {
80 parents.emplace_back(data->getName());
83 layer_info.parents = parents;
84 std::vector<std::string> children;
85 for (const auto &data : layer->outData) {
86 auto inputTo = data->getInputTo();
87 for (auto layer_iter : inputTo) {
88 InferenceEngine::CNNLayerPtr layer_in_data = layer_iter.second;
90 THROW_IE_EXCEPTION << "Layer which takes data " << data->name << " is nullptr";
92 children.emplace_back(layer_in_data->name);
95 layer_info.children = children;
96 const InferenceEngine::TensorDesc &inputTensorDesc = layer->outData[0]->getTensorDesc();
97 for (const auto &it : layout_map) {
98 if (it.second == inputTensorDesc.getLayout()) {
99 layer_info.layout = it.first;
102 auto dims = inputTensorDesc.getDims();
103 std::string string_dims = "";
104 for (const auto &it : dims) {
105 string_dims += std::to_string(it) + " ";
107 string_dims = string_dims.substr(0, string_dims.size() - 1);
108 layer_info.shape = string_dims;
109 result.emplace_back(std::make_pair(layer->name, layer_info));
114 const std::map<std::string, InferenceEnginePython::InputInfo> InferenceEnginePython::IENetwork::getInputs() {
115 std::map<std::string, InferenceEnginePython::InputInfo> inputs;
116 const InferenceEngine::InputsDataMap &inputsInfo = actual.getInputsInfo();
117 for (auto &in : inputsInfo) {
118 InferenceEnginePython::InputInfo info;
119 info.actual = *in.second;
120 const InferenceEngine::TensorDesc &inputTensorDesc = in.second->getTensorDesc();
121 info.dims = inputTensorDesc.getDims();
122 for (auto it : precision_map)
123 if (it.second == in.second->getPrecision())
124 info.precision = it.first;
125 for (auto it : layout_map)
126 if (it.second == in.second->getLayout())
127 info.layout = it.first;
128 inputs[in.first] = info;
133 const std::map<std::string, InferenceEnginePython::OutputInfo> InferenceEnginePython::IENetwork::getOutputs() {
134 std::map<std::string, InferenceEnginePython::OutputInfo> outputs;
135 const InferenceEngine::OutputsDataMap &outputsInfo = actual.getOutputsInfo();
136 for (auto &out : outputsInfo) {
137 InferenceEnginePython::OutputInfo info;
138 info.actual = out.second;
139 const InferenceEngine::TensorDesc &inputTensorDesc = out.second->getTensorDesc();
140 info.dims = inputTensorDesc.getDims();
141 for (auto it : precision_map)
142 if (it.second == out.second->getPrecision())
143 info.precision = it.first;
144 for (auto it : layout_map)
145 if (it.second == out.second->getLayout())
146 info.layout = it.first;
147 outputs[out.first] = info;
153 InferenceEnginePython::IENetwork::addOutputs(const std::vector<std::string> &out_layers, const std::string &precision) {
154 for (auto &&l : out_layers) {
155 InferenceEngine::OutputsDataMap outputsDataMap = actual.getOutputsInfo();
156 if (outputsDataMap.find(l) != outputsDataMap.end()) {
159 InferenceEngine::CNNLayerPtr cnnLayer = actual.getLayerByName(l.c_str());
160 std::vector<InferenceEngine::DataPtr> outData = cnnLayer->outData;
161 if (outData.size() != 1) {
162 std::cout << "Layer " << l << " has " << outData.size() << " output blobs and can not be set as output."
167 InferenceEngine::OutputsDataMap outputsDataMapUpd = actual.getOutputsInfo();
168 outputsDataMapUpd[l]->setPrecision(precision_map[precision]);
172 void InferenceEnginePython::IENetwork::setBatch(const size_t size) {
173 actual.setBatchSize(size);
176 void InferenceEnginePython::IENetwork::reshape(const std::map<std::string, std::vector<size_t>> &input_shapes) {
177 actual.reshape(input_shapes);
180 const std::map<std::string, std::map<std::string, std::vector<float>>> InferenceEnginePython::IENetwork::getStats() {
181 InferenceEngine::ICNNNetworkStats *pstats = nullptr;
182 InferenceEngine::ResponseDesc response;
183 IE_CHECK_CALL(((InferenceEngine::ICNNNetwork &) actual).getStats(&pstats, &response));
184 auto statsMap = pstats->getNodesStats();
185 std::map<std::string, std::map<std::string, std::vector<float>>> map;
186 for (const auto &it : statsMap) {
187 std::map<std::string, std::vector<float>> stats;
188 stats.emplace("min", it.second->_minOutputs);
189 stats.emplace("max", it.second->_maxOutputs);
190 map.emplace(it.first, stats);
196 InferenceEnginePython::IENetwork::setStats(
197 const std::map<std::string, std::map<std::string, std::vector<float>>> &stats) {
198 InferenceEngine::ICNNNetworkStats *pstats = nullptr;
199 InferenceEngine::ResponseDesc response;
200 IE_CHECK_CALL(((InferenceEngine::ICNNNetwork &) actual).getStats(&pstats, &response));
201 std::map<std::string, InferenceEngine::NetworkNodeStatsPtr> newNetNodesStats;
202 for (const auto &it : stats) {
203 InferenceEngine::NetworkNodeStatsPtr nodeStats = InferenceEngine::NetworkNodeStatsPtr(
204 new InferenceEngine::NetworkNodeStats());
205 newNetNodesStats.emplace(it.first, nodeStats);
206 nodeStats->_minOutputs = it.second.at("min");
207 nodeStats->_maxOutputs = it.second.at("max");
209 pstats->setNodesStats(newNetNodesStats);
212 void InferenceEnginePython::InputInfo::setPrecision(std::string precision) {
213 actual.setPrecision(precision_map[precision]);
216 void InferenceEnginePython::InputInfo::setLayout(std::string layout) {
217 actual.setLayout(layout_map[layout]);
220 void InferenceEnginePython::OutputInfo::setPrecision(std::string precision) {
221 actual->setPrecision(precision_map[precision]);
224 InferenceEnginePython::IEPlugin::IEPlugin(const std::string &device, const std::vector<std::string> &plugin_dirs) {
225 InferenceEngine::PluginDispatcher dispatcher{plugin_dirs};
226 actual = dispatcher.getPluginByDevice(device);
227 const InferenceEngine::Version *pluginVersion;
228 actual->GetVersion(pluginVersion);
229 version = std::to_string(pluginVersion->apiVersion.major) + ".";
230 version += std::to_string(pluginVersion->apiVersion.minor) + ".";
231 version += pluginVersion->buildNumber;
232 device_name = device;
235 void InferenceEnginePython::IEPlugin::setInitialAffinity(const InferenceEnginePython::IENetwork &net) {
236 InferenceEngine::HeteroPluginPtr hetero_plugin(actual);
237 InferenceEngine::ResponseDesc response;
238 auto &network = net.actual;
239 IE_CHECK_CALL(hetero_plugin->SetAffinity(network, {}, &response));
242 std::set<std::string> InferenceEnginePython::IEPlugin::queryNetwork(const InferenceEnginePython::IENetwork &net) {
243 const InferenceEngine::CNNNetwork &network = net.actual;
244 InferenceEngine::QueryNetworkResult queryRes;
245 actual->QueryNetwork(network, queryRes);
246 return queryRes.supportedLayers;
250 void InferenceEnginePython::IENetLayer::setAffinity(const std::string &target_affinity) {
251 layer_ptr->affinity = target_affinity;
254 void InferenceEnginePython::IENetLayer::setParams(const std::map<std::string, std::string> ¶ms_map) {
255 layer_ptr->params = params_map;
258 std::map<std::string, InferenceEngine::Blob::Ptr> InferenceEnginePython::IENetLayer::getWeights() {
259 auto w_layer = std::dynamic_pointer_cast<InferenceEngine::WeightableLayer>(layer_ptr);
260 // IF current layer is weightable gather weights and biases from casted WeightableLayer and all other blobs
261 // considered as custom and gathered from blobs field pf CNNLayer.
262 std::map<std::string, InferenceEngine::Blob::Ptr> weights;
263 if (w_layer != nullptr) {
264 if (w_layer->_weights != nullptr) {
265 weights["weights"] = w_layer->_weights;
267 if (w_layer->_biases != nullptr) {
268 weights["biases"] = w_layer->_biases;
270 for (auto it : w_layer->blobs) {
271 if (it.first == "weights" || it.first == "biases") {
274 weights[it.first] = it.second;
277 // Otherwise all layer's blobs are considered as custom and gathered from CNNLayer
278 std::map<std::string, InferenceEngine::Blob::Ptr> map_placeholder;
279 weights = map_placeholder; // If layer has no blobs it should not be missed from weights map
280 for (auto it : layer_ptr->blobs) {
281 weights[it.first] = it.second;
287 void InferenceEnginePython::IENetLayer::setPrecision(std::string precision) {
288 layer_ptr->precision = precision_map[precision];
291 void InferenceEnginePython::IEPlugin::addCpuExtension(const std::string &extension_path) {
292 InferenceEngine::ResponseDesc response;
293 auto extension_ptr = InferenceEngine::make_so_pointer<InferenceEngine::IExtension>(extension_path);
294 auto extension = std::dynamic_pointer_cast<InferenceEngine::IExtension>(extension_ptr);
295 IE_CHECK_CALL(actual->AddExtension(extension, &response))
298 std::unique_ptr<InferenceEnginePython::IEExecNetwork>
299 InferenceEnginePython::IEPlugin::load(const InferenceEnginePython::IENetwork &net,
301 const std::map<std::string, std::string> &config) {
302 InferenceEngine::ResponseDesc response;
303 auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(net.name,
305 IE_CHECK_CALL(actual->LoadNetwork(exec_network->actual, net.actual, config, &response))
307 for (size_t i = 0; i < num_requests; ++i) {
308 InferRequestWrap &infer_request = exec_network->infer_requests[i];
309 IE_CHECK_CALL(exec_network->actual->CreateInferRequest(infer_request.request_ptr, &response))
315 void InferenceEnginePython::IEPlugin::setConfig(const std::map<std::string, std::string> &config) {
316 InferenceEngine::ResponseDesc response;
317 IE_CHECK_CALL(actual->SetConfig(config, &response))
320 InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string &name, size_t num_requests) :
321 infer_requests(num_requests), name(name) {
324 void InferenceEnginePython::IEExecNetwork::infer() {
325 InferRequestWrap &request = infer_requests[0];
330 void InferenceEnginePython::InferRequestWrap::getBlobPtr(const std::string &blob_name, InferenceEngine::Blob::Ptr &blob_ptr)
332 InferenceEngine::ResponseDesc response;
333 IE_CHECK_CALL(request_ptr->GetBlob(blob_name.c_str(), blob_ptr, &response));
337 void InferenceEnginePython::InferRequestWrap::setBatch(int size) {
338 InferenceEngine::ResponseDesc response;
339 IE_CHECK_CALL(request_ptr->SetBatch(size, &response));
342 void latency_callback(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code){
343 if (code != InferenceEngine::StatusCode::OK) {
344 THROW_IE_EXCEPTION << "Async Infer Request failed with status code " << code;
346 InferenceEnginePython::InferRequestWrap *requestWrap;
347 InferenceEngine::ResponseDesc dsc;
348 request->GetUserData(reinterpret_cast<void**>(&requestWrap), &dsc);
349 auto end_time = Time::now();
350 auto execTime = std::chrono::duration_cast<ns>(end_time - requestWrap->start_time);
351 requestWrap->exec_time = static_cast<double>(execTime.count()) * 0.000001;
354 void InferenceEnginePython::InferRequestWrap::infer() {
355 InferenceEngine::ResponseDesc response;
356 start_time = Time::now();
357 IE_CHECK_CALL(request_ptr->Infer(&response));
358 auto end_time = Time::now();
359 auto execTime = std::chrono::duration_cast<ns>(end_time - start_time);
360 exec_time = static_cast<double>(execTime.count()) * 0.000001;
364 void InferenceEnginePython::InferRequestWrap::infer_async() {
365 InferenceEngine::ResponseDesc response;
366 start_time = Time::now();
367 IE_CHECK_CALL(request_ptr->SetUserData(this, &response));
368 request_ptr->SetCompletionCallback(latency_callback);
369 IE_CHECK_CALL(request_ptr->StartAsync(&response));
372 int InferenceEnginePython::InferRequestWrap::wait(int64_t timeout) {
373 InferenceEngine::ResponseDesc responseDesc;
374 InferenceEngine::StatusCode code = request_ptr->Wait(timeout, &responseDesc);
375 return static_cast<int >(code);
378 std::map<std::string, InferenceEnginePython::ProfileInfo>
379 InferenceEnginePython::InferRequestWrap::getPerformanceCounts() {
380 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perf_counts;
381 InferenceEngine::ResponseDesc response;
382 request_ptr->GetPerformanceCounts(perf_counts, &response);
383 std::map<std::string, InferenceEnginePython::ProfileInfo> perf_map;
385 for (auto it : perf_counts) {
386 InferenceEnginePython::ProfileInfo profile_info;
387 switch (it.second.status) {
388 case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
389 profile_info.status = "EXECUTED";
391 case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
392 profile_info.status = "NOT_RUN";
394 case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
395 profile_info.status = "OPTIMIZED_OUT";
398 profile_info.status = "UNKNOWN";
400 profile_info.exec_type = it.second.exec_type;
401 profile_info.layer_type = it.second.layer_type;
402 profile_info.cpu_time = it.second.cpu_uSec;
403 profile_info.real_time = it.second.realTime_uSec;
404 perf_map[it.first] = profile_info;
409 std::string InferenceEnginePython::get_version() {
410 auto version = InferenceEngine::GetInferenceEngineVersion();
411 std::string version_str = std::to_string(version->apiVersion.major) + ".";
412 version_str += std::to_string(version->apiVersion.minor) + ".";
413 version_str += version->buildNumber;