1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
7 #include <unordered_set>
9 #include <api/cldnn.hpp>
10 #include <api/network.hpp>
11 #include <api/profiling.hpp>
12 #include <api/custom_gpu_primitive.hpp>
16 #include "cldnn_graph.h"
17 #include "simple_math.h"
18 #include <description_buffer.hpp>
19 #include <cldnn/cldnn_config.hpp>
20 #include <graph_tools.hpp>
21 #include <ie_layers_internal.hpp>
23 #include "cldnn_infer_request.h"
24 #include <threading/ie_executor_manager.hpp>
25 #include "details/caseless.hpp"
28 #include <sys/types.h>
30 #include <exec_graph_info.hpp>
32 using namespace InferenceEngine;
33 using namespace InferenceEngine::details;
35 namespace CLDNNPlugin {
37 CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id)
39 , m_networkName(network.getName())
41 , m_stream_id(stream_id) {
42 m_program = std::make_shared<Program>(network, GetEngine(), m_config);
46 CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
47 : m_context(graph->m_context)
48 , m_program(graph->m_program)
49 , m_networkName(graph->m_networkName)
50 , m_config(graph->m_config)
51 , m_stream_id(stream_id) {
55 void CLDNNGraph::UpdateLayersMaps() {
56 primitiveIDs = m_program->primitiveIDs;
57 primitivesToIRLayersMap = m_program->primitivesToIRLayersMap;
58 IRToNgraphLayersMap = m_program->IRToNgraphLayersMap;
59 prevPrimitiveIDs = m_program->prevPrimitiveIDs;
60 profilingIDs = m_program->profilingIDs;
61 perfMap = m_program->perfMap;
62 outputDims = m_program->outputDims;
65 void CLDNNGraph::Build() {
68 if (GetMaxDynamicBatchSize() > 1) {
69 int m_bv_sz = m_program->GetMaxBatchSizeForSingleProgram();
70 for (int b = m_bv_sz - 1; b >= 0; b--) {
71 auto network = BuildNetwork(m_program->getCompiledProgram(b));
72 m_networks.insert(m_networks.begin(), network);
73 GetEngine()->release_pending_memory(network->get_id());
76 auto network = BuildNetwork(m_program->getCompiledProgram());
77 m_networks.emplace_back(network);
78 GetEngine()->release_pending_memory(network->get_id());
81 UpdateImplementationsMap();
84 std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::program> program) {
85 auto network = std::make_shared<cldnn::network>(*program, m_stream_id);
87 if (!m_config.graph_dumps_dir.empty() && m_stream_id == 0) {
88 static int net_id = 0;
89 auto steps_info = network->get_optimization_steps_info();
91 for (auto& step : steps_info) {
92 CNNNetwork net(GetExecGraphInfoByPrimitivesInfo(step.second, true));
93 net.serialize(m_config.graph_dumps_dir + std::to_string(net_id) + "_" +
94 std::to_string(step_idx) + "_" + step.first + "_graph.xml");
103 InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
104 bool filter_const_primitives) {
105 auto net = std::make_shared<details::CNNNetworkImpl>();
106 net->setPrecision(Precision::FP32);
107 net->setName("runtime_gpu_graph");
108 if (m_config.useProfiling) {
110 // Update may throw an exception for step-by-step runtime graph dump,
111 // since network->get_executed_primitives() method can't be called before network execution
112 UpdatePerfStatistics();
113 } catch (std::exception&) {
117 std::vector<std::pair<cldnn::primitive_info, CNNLayerPtr>> node2layer;
119 auto data_type_to_precision = [](cldnn::data_types dt) {
121 case cldnn::data_types::bin: return Precision::BIN;
122 case cldnn::data_types::f32: return Precision::FP32;
123 case cldnn::data_types::f16: return Precision::FP16;
124 case cldnn::data_types::i32: return Precision::I32;
125 case cldnn::data_types::i64: return Precision::I64;
126 case cldnn::data_types::u8: return Precision::U8;
127 case cldnn::data_types::i8: return Precision::I8;
128 default: return Precision::UNSPECIFIED;
132 auto to_IE_type_name = [](const std::string& cldnn_name) -> std::string{
133 static std::map<std::string, std::string> type_n2l {
134 { "activation", "Activation" },
135 { "arg_max_min", "ArgMax" },
136 { "average_unpooling", "AverageUnpooling" },
137 { "batch_norm", "BatchNormalization" },
138 { "binary_convolution", "BinaryConvolution" },
140 { "concatenation", "Concat" },
141 { "convolution", "Convolution" },
142 { "deformable_convolution", "DeformableConvolution" },
144 { "custom_gpu_primitive", "CustomGPUPrimitive" },
146 { "deconvolution", "Deconvolution" },
147 { "depth_to_space", "DepthToSpace" },
148 { "detection_output", "DetectionOutput" },
149 { "eltwise", "Eltwise" },
150 { "fully_connected", "FullyConnected" },
151 { "gather", "Gather" },
153 { "input_layout", "Input" },
156 { "lstm_elt", "LSTM_Eltwise" },
157 { "lstm_gemm", "LSTM_Gemm" },
159 { "normalize", "Normalize" },
160 { "permute", "Permute" },
161 { "pooling", "Pooling" },
162 { "prior_box", "PriorBox" },
163 { "proposal", "Proposal" },
164 { "quantize", "Quantize" },
165 { "region_yolo", "RegionYolo" },
166 { "reorder", "Reorder" },
167 { "reorg_yolo", "ReorgYolo" },
168 { "reshape", "Reshape" },
169 { "reverse_sequence", "ReverseSequence" },
170 { "roi_pooling", "ROIPooling" },
171 { "scale", "ScaleShift" },
172 { "shuffle_channels", "ShuffleChannels" },
173 { "softmax", "SoftMax" },
174 { "split", "Split" },
175 { "strided_slice", "StridedSlice" },
177 { "resample", "Resample" },
178 { "interp", "Interp" },
179 { "reduce_max", "ReduceMax" },
180 { "reduce_min", "ReduceMin" },
181 { "reduce_mean", "ReduceMean" },
182 { "reduce_prod", "ReduceProd" },
183 { "reduce_sum", "ReduceSum" },
184 { "reduce_and", "ReduceAnd" },
185 { "reduce_or", "ReduceOr" },
186 { "reduce_sum_square", "ReduceSumSquare" },
187 { "reduce_l1", "ReduceL1" },
188 { "reduce_l2", "ReduceL2" },
189 { "reduce_log_sum", "ReduceLogSum" },
190 { "reduce_log_sum_exp", "ReduceLogSumExp" }
193 if (type_n2l.find(cldnn_name) != type_n2l.end())
194 return type_n2l.at(cldnn_name);
199 auto concat_strings = [](std::vector<std::string> strs, char sep) -> std::string {
203 std::string res = strs[0];
204 for (size_t i = 1; i < strs.size(); i++) {
205 res += sep + strs[i];
211 auto split_string = [](std::string src, std::string delimiter = ",") -> std::vector<std::string> {
212 std::vector<std::string> tokens;
213 std::string tokenBuf;
214 size_t prev = 0, pos = 0, srcLength = src.length(), delimLength = delimiter.length();
216 pos = src.find(delimiter, prev);
217 if (pos == std::string::npos) {
220 tokenBuf = src.substr(prev, pos - prev);
221 if (!tokenBuf.empty()) {
222 tokens.push_back(tokenBuf);
224 prev = pos + delimLength;
225 } while (pos < srcLength && prev < srcLength);
230 auto remove_type_from_name = [](const std::string& name) -> std::string {
231 auto it = std::find(name.begin(), name.end(), ':');
232 if (it == name.end() || (it + 1) == name.end())
235 return std::string((it+1), name.end());
238 auto find_origin_layers = [&](const std::string& name) -> std::vector<std::string> {
239 if (primitivesToIRLayersMap.find(name) == primitivesToIRLayersMap.end())
242 auto cnn_names = primitivesToIRLayersMap.at(name);
243 std::vector<std::string> res;
245 for (auto& cnn_name : cnn_names) {
246 if (IRToNgraphLayersMap.find(cnn_name) != IRToNgraphLayersMap.end()) {
247 auto ngraph_names = split_string(IRToNgraphLayersMap.at(cnn_name));
248 res.insert(res.end(), ngraph_names.begin(), ngraph_names.end());
250 res.push_back(cnn_name);
256 auto create_layer = [&](const cldnn::primitive_info& prim_info) -> CNNLayer::Ptr {
257 CNNLayer::Ptr layer(new CNNLayer({"name", "type", Precision::UNSPECIFIED}));
259 layer->name = remove_type_from_name(prim_info.original_id);
260 layer->type = to_IE_type_name(prim_info.type_id);
261 layer->precision = data_type_to_precision(prim_info.output_layout.data_type);
262 std::vector<std::string> originalNames{find_origin_layers(prim_info.original_id)};
263 for (auto& fused_id : prim_info.c_fused_ids) {
264 for (auto& origin_id : find_origin_layers(fused_id)) {
265 if (std::find(originalNames.begin(), originalNames.end(), origin_id) == originalNames.end())
266 originalNames.push_back(origin_id);
270 layer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = concat_strings(originalNames, ',');
271 layer->params[ExecGraphInfoSerialization::IMPL_TYPE] = prim_info.kernel_id;
272 layer->params[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = layer->precision.name();
273 std::string exec_time = "not_executed";
274 if (perfMap.find(prim_info.original_id) != perfMap.end()) {
275 auto perfCounter = perfMap.at(prim_info.original_id).second;
276 if (perfCounter.num > 0) {
277 exec_time = std::to_string(perfCounter.realTime_avg());
281 layer->params[ExecGraphInfoSerialization::PERF_COUNTER] = exec_time;
282 layer->params[ExecGraphInfoSerialization::OUTPUT_LAYOUTS] = prim_info.layout_str;
283 layer->params[ExecGraphInfoSerialization::EXECUTION_ORDER] = std::to_string(prim_info.exec_id);
285 node2layer.emplace_back(prim_info, layer);
287 size_t in_size = prim_info.c_dependencies.size();
289 if (filter_const_primitives) {
290 // Decrease expected dependencies count if there is a const input without original id in the IR
291 for (auto& dep : prim_info.c_dependencies) {
292 auto it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) {
293 return entry.original_id == dep;
296 if (it == primitives_info.end())
299 if (it->type_id == "data") {
300 std::vector<std::string> childOriginalNames{find_origin_layers(prim_info.original_id)};
305 layer->insData.resize(in_size);
306 layer->outData.resize(prim_info.c_users.size());
311 if (filter_const_primitives) {
312 for (auto& pi : primitives_info) {
313 // extract mutable_data primitives and connect it's dependencies and users directly
314 if (pi.type_id == "mutable_data") {
315 if (pi.c_dependencies.size() == 1 && !pi.c_users.empty()) {
316 auto dep = pi.c_dependencies[0];
317 auto users = pi.c_users;
318 auto it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) {
319 return entry.original_id == dep;
321 if (it == primitives_info.end())
324 auto& dep_users = it->c_users;
325 // Remove mutable data from users list
326 dep_users.erase(std::find_if(dep_users.begin(), dep_users.end(), [&](std::string user_id) {
327 return user_id == pi.original_id;
330 // Add mutable data users to it's dependency users
331 dep_users.insert(dep_users.end(), users.begin(), users.end());
333 for (auto& user : users) {
334 it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) {
335 return entry.original_id == user;
337 if (it == primitives_info.end())
340 for (auto& d : it->c_dependencies) {
341 if (d == pi.original_id)
350 for (auto& pi : primitives_info) {
351 if (filter_const_primitives) {
353 if (pi.type_id == "data") {
358 if (pi.type_id == "mutable_data" &&
359 pi.c_dependencies.size() == 1 &&
360 !pi.c_users.empty()) {
364 auto layer = create_layer(pi);
365 net->addLayer(layer);
368 auto desc_from_layout = [&](cldnn::layout layout) -> TensorDesc {
369 Precision precision = data_type_to_precision(layout.data_type);
371 Layout l = Layout::NCHW;
372 auto size = layout.size;
373 if (layout.format.dimension() == 4) {
374 dims = {static_cast<size_t>(size.batch[0]),
375 static_cast<size_t>(size.feature[0]),
376 static_cast<size_t>(size.spatial[1]),
377 static_cast<size_t>(size.spatial[0])};
378 } else if (layout.format.dimension() == 5) {
379 dims = {static_cast<size_t>(size.batch[0]),
380 static_cast<size_t>(size.feature[0]),
381 static_cast<size_t>(size.spatial[2]),
382 static_cast<size_t>(size.spatial[1]),
383 static_cast<size_t>(size.spatial[0])};
385 } else if (layout.format.dimension() == 6) {
386 dims = {static_cast<size_t>(size.batch[0]),
387 static_cast<size_t>(size.feature[0]),
388 static_cast<size_t>(size.spatial[3]),
389 static_cast<size_t>(size.spatial[2]),
390 static_cast<size_t>(size.spatial[1]),
391 static_cast<size_t>(size.spatial[0])};
392 // Should be NC?DHW but there is no such layout yet
395 TensorDesc dst{precision, dims, l};
399 for (auto& pair : node2layer) {
400 auto pi = pair.first;
401 auto layer = pair.second;
402 auto user_ids = pi.c_users;
403 for (int i = 0; i < user_ids.size(); i++) {
404 auto it = std::find_if(node2layer.begin(), node2layer.end(), [&](std::pair<cldnn::primitive_info, CNNLayerPtr>& entry) {
405 return entry.first.original_id == user_ids[i];
408 if (it == node2layer.end())
411 auto& child_layer = it->second;
414 if (i < layer->outData.size()) {
415 std::string data_name = pi.original_id + "_out" + std::to_string(i);
416 layer->outData[i] = std::make_shared<Data>(data_name, desc_from_layout(pi.output_layout));
417 data = layer->outData[i];
418 data->getCreatorLayer() = layer;
420 data = layer->outData[0];
424 for (auto& dep : it->first.c_dependencies) {
425 if (filter_const_primitives) {
426 auto it = std::find_if(node2layer.begin(), node2layer.end(), [&](std::pair<cldnn::primitive_info, CNNLayerPtr>& entry) {
427 return entry.first.original_id == dep;
430 if (it == node2layer.end())
434 if (dep == pi.original_id && child_layer->insData[in_port_id].lock() == nullptr) {
435 data->getInputTo()[child_layer->name] = child_layer;
436 child_layer->insData[in_port_id] = data;
443 // Specify inputs data
444 for (auto& pair : node2layer) {
445 auto pi = pair.first;
446 auto layer = pair.second;
447 if (pi.c_dependencies.size() != 0)
450 auto in_info = std::make_shared<InputInfo>();
451 if (layer->outData.empty())
454 auto dt = layer->outData[0];
455 auto tensor_desc = desc_from_layout(pi.output_layout);
457 dt->setDims(tensor_desc.getDims());
458 dt->setPrecision(tensor_desc.getPrecision());
459 dt->setLayout(tensor_desc.getLayout());
461 in_info->setInputData(dt);
462 net->setInputInfo(in_info);
468 void CLDNNGraph::GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) {
469 auto primitives_info = GetNetwork()->get_primitives_info();
470 graphPtr = GetExecGraphInfoByPrimitivesInfo(primitives_info, true);
474 void CLDNNGraph::UpdatePerfStatistics() {
475 if (GetNetworksCount() == 0) {
480 auto collectTimings = [](cldnn::instrumentation::profiling_info& cldnnInfo, PerfCounter& pc) {
481 for (auto &interval : cldnnInfo.intervals) {
482 using duration_t = std::chrono::duration<long long, std::chrono::microseconds::period>;
483 auto count = std::chrono::duration_cast<duration_t>(interval.value->value()).count();
485 if (interval.name == "submission") {
486 pc.cpu_uSec += count;
487 } else if (interval.name == "executing") {
488 pc.realTime_uSec += count;
489 } else if (interval.name == "duration") { // "duration" is used for CPU layers
490 pc.cpu_uSec += count;
498 std::map<cldnn::primitive_id, cldnn::event> executedPrimitives = GetNetwork()->get_executed_primitives();
499 auto allPrimitives = GetNetwork()->get_all_primitives();
501 // Get profiling info for all layers
502 for (auto &profiledID : profilingIDs) {
503 auto pcIter = perfMap.find(profiledID);
505 if (pcIter == perfMap.end()) continue;
507 auto execIter = executedPrimitives.find(profiledID);
508 auto& perfCount = pcIter->second.second;
509 // Change status if layer wasn't executed by cldnn engine
510 if (execIter == executedPrimitives.end()) {
511 if (perfCount.num == 0) {
512 perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT;
517 auto event = execIter->second;
518 executedPrimitives.erase(execIter);
520 cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event.get_profiling_info()};
522 collectTimings(cldnnInfo, perfCount);
526 for (auto &executedID : executedPrimitives) {
527 auto pcIter = perfMap.find(executedID.first);
528 if (pcIter == perfMap.end()) {
529 perfMap[executedID.first].first = executedID.first;
530 pcIter = perfMap.find(executedID.first);
531 auto& perfCount = pcIter->second.second;
533 cldnn::instrumentation::profiling_info cldnnInfo{executedID.first, executedID.second.get_profiling_info()};
535 collectTimings(cldnnInfo, perfCount);
541 bool CLDNNGraph::IsLoaded() const {
542 return GetNetwork() != nullptr;
545 void CLDNNGraph::UpdateImplementationsMap() {
546 if (m_config.useProfiling) {
547 auto extractImplementationFromInfo = [](const std::string& info) -> std::string {
548 std::string def_implementation = "undef";
549 std::string impl_section = "implementation :";
550 std::string::size_type pos = info.find(impl_section);
551 if (pos == std::string::npos) {
552 return def_implementation;
555 std::string::size_type end_pos = info.find(',', pos);
556 if (end_pos == std::string::npos) {
557 return def_implementation;
560 std::string::size_type length = end_pos - pos - impl_section.size();
562 auto trim = [](const std::string& str) {
563 size_t first = str.find_first_not_of(' ');
564 if (std::string::npos == first) {
567 size_t last = str.find_last_not_of(' ');
568 return str.substr(first, (last - first + 1));
570 std::string tmp = trim(info.substr(pos + impl_section.size(), length));
572 return tmp.length() > 1 ? tmp : def_implementation;
575 // Parse primitive info and extract implementation name.
576 for (auto& id : profilingIDs) {
577 std::string prim_info = "";
579 prim_info = GetNetwork()->get_primitive_info(id);
580 } catch (std::exception& /*e*/) { }
582 implementationsMap.insert({id, extractImplementationFromInfo(prim_info)});
587 void CLDNNGraph::GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &result) const {
588 bool combinePrimByIRLayers = false;
590 auto allIds = GetNetwork()->get_all_primitive_org_ids();
591 auto executedPrimitives = GetNetwork()->get_executed_primitives();
592 auto primitivesInfo = GetNetwork()->get_primitives_info();
594 auto getUpperCaseName = [&](std::string name) {
595 if (name.length() > 0)
596 name[0] = toupper(name[0]);
600 auto getFromProfiling = [&](std::string primId) -> bool {
601 auto perfIter = perfMap.find(primId);
603 if (perfIter == perfMap.end()) return false;
605 const auto& layerName = perfIter->second.first;
606 if (layerName.length() == 0) // no layer directly associated
609 const auto& perfCounter = perfIter->second.second;
611 if (!perfCounter.parentPrimitive.empty() && combinePrimByIRLayers)
614 auto& extPerfEntry = result[layerName];
616 memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
617 if (perfCounter.isCPU) {
618 static const std::string cpuExecType("CPU");
619 cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length()); // Override execType as CPU
621 std::string impl = implementationsMap.at(primId);
622 impl.copy(extPerfEntry.exec_type, impl.length());
625 extPerfEntry.execution_index = i++;
626 extPerfEntry.status = perfCounter.status;
627 extPerfEntry.cpu_uSec = perfCounter.cpu_avg();
628 extPerfEntry.realTime_uSec = perfCounter.realTime_avg();
630 if (combinePrimByIRLayers) {
631 std::string kernelId = "";
632 long long kernelTime = 0; // used for finding the most complex computation kernel in sub_graph for perf stat
633 for (auto &id : profilingIDs) {
634 auto iter = perfMap.find(id);
635 if (iter == perfMap.end()) continue;
637 const auto &pc = iter->second.second;
638 if (id != primId && pc.parentPrimitive == primId) {
639 extPerfEntry.cpu_uSec += pc.cpu_avg();
640 extPerfEntry.realTime_uSec += pc.realTime_avg();
641 if (pc.realTime_avg() > kernelTime) {
642 kernelTime = pc.realTime_avg();
645 allIds.erase(std::find(allIds.begin(), allIds.end(), id));
648 if (!kernelId.empty())
649 implementationsMap.at(kernelId).copy(extPerfEntry.exec_type, implementationsMap.at(kernelId).length());
652 getUpperCaseName(perfCounter.layerType).copy(extPerfEntry.layer_type, perfCounter.layerType.length());
656 // Step 1. Get all primitives in execution order which was added by clDNNPlugin
657 for (auto& primId : profilingIDs) {
658 getFromProfiling(primId);
661 // Step 2. Find all other primitives which was added while optimization process and executed after
662 for (auto& primId : allIds) {
663 auto perfIter = perfMap.find(primId);
664 if (perfIter == perfMap.end()) continue;
666 bool existInProfiling = std::find(profilingIDs.begin(), profilingIDs.end(), primId) != profilingIDs.end();
667 if ((!existInProfiling || (existInProfiling && perfIter->second.first.length() == 0)) &&
668 executedPrimitives.find(primId) != executedPrimitives.end()) {
669 auto event = executedPrimitives.at(primId);
671 cldnn::instrumentation::profiling_info cldnnInfo{primId, event.get_profiling_info()};
674 long long cpuTime = 0;
675 long long deviceTime = 0;
677 for (auto &interval : cldnnInfo.intervals) {
678 using duration_t = std::chrono::duration<long long, std::chrono::microseconds::period>;
679 auto count = std::chrono::duration_cast<duration_t>(interval.value->value()).count();
681 if (interval.name == "submission") {
683 } else if (interval.name == "executing") {
685 } else if (interval.name == "duration") { // "duration" is used for CPU layers
690 std::string layerName = primId;
691 if (primId.find(":") != std::string::npos) {
692 layerName = primId.substr(primId.find(":") + 1, primId.length());
695 for (auto& pi : primitivesInfo) {
696 if (pi.original_id == primId) {
697 if (pi.type_id == "mutable_data")
700 auto& extPerfEntry = result[layerName];
703 static const std::string cpuExecType("CPU");
704 memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
705 cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length()); // Override execType as CPU
707 std::string impl = pi.kernel_id;
708 impl.copy(extPerfEntry.exec_type, impl.length());
711 getUpperCaseName(pi.type_id).copy(extPerfEntry.layer_type, pi.type_id.length());
712 extPerfEntry.execution_index = i++;
713 extPerfEntry.status = InferenceEngineProfileInfo::LayerStatus::EXECUTED;
714 extPerfEntry.cpu_uSec = cpuTime;
715 extPerfEntry.realTime_uSec = deviceTime;
717 if (pi.type_id == "input_layout") {
718 const std::string input_string = "Input";
719 const std::string undef_string = "undef";
720 input_string.copy(extPerfEntry.layer_type, 256);
721 undef_string.copy(extPerfEntry.exec_type, 256);
728 // Step 3. Checking primitives which has been deleted from execution order but added by clDNNPlugin
729 for (auto& primId : profilingIDs)
730 if (std::find(allIds.begin(), allIds.end(), primId) == allIds.end()) {
731 getFromProfiling(primId);
735 std::shared_ptr<cldnn::network> CLDNNGraph::GetNetwork(size_t idx) const {
736 if (idx >= GetNetworksCount())
737 THROW_IE_EXCEPTION << "Unable to find network with id=" << idx << ". Stored networks count: " << GetNetworksCount();
739 return m_networks[idx];
743 std::string CLDNNGraph::MapOutputName(std::string outName) const {
744 auto networkOutputsIDs = GetNetwork()->get_output_ids();
745 auto allPrimitiveIds = GetNetwork()->get_all_primitives();
747 // Find correct output ID. Start with name stored in IR.
748 std::string outputID = primitiveIDs.at(outName);
749 while (std::find(networkOutputsIDs.begin(), networkOutputsIDs.end(), outputID) == networkOutputsIDs.end()) {
750 // If current ID isn't found in cldnn network outputs, get previous primitive id and try again.
751 auto prim = allPrimitiveIds.find(outputID);
752 if (prim == allPrimitiveIds.end()) {
753 THROW_IE_EXCEPTION << "Unknown primitive id " << outputID;
756 if (prevPrimitiveIDs.at(outputID).size() != 1 || prim->second != "_optimized_") {
757 THROW_IE_EXCEPTION << "Unable to find parent for output primitive " << outputID;
759 outputID = prevPrimitiveIDs.at(outputID)[0];
765 InferenceEngine::SizeVector CLDNNGraph::GetOutputSize(std::string outName) const {
766 auto res_output = outputDims.find(outName);
768 InferenceEngine::SizeVector sz;
769 if (res_output != outputDims.end())
770 sz = res_output->second;
772 sz = outputDims.at(primitiveIDs.at(outName));
777 }; // namespace CLDNNPlugin