1 // Copyright (C) 2018 Intel Corporation
3 // SPDX-License-Identifier: Apache-2.0
8 #include <unordered_set>
10 #include <CPP/cldnn_defs.h>
11 #include <CPP/data.hpp>
12 #include <CPP/input_layout.hpp>
13 #include <CPP/reorder.hpp>
14 #include <CPP/convolution.hpp>
15 #include <CPP/pooling.hpp>
16 #include <CPP/lrn.hpp>
17 #include <CPP/fully_connected.hpp>
18 #include <CPP/softmax.hpp>
19 #include <CPP/activation.hpp>
20 #include <CPP/concatenation.hpp>
21 #include <CPP/proposal.hpp>
22 #include <CPP/roi_pooling.hpp>
23 #include <CPP/scale.hpp>
24 #include <CPP/crop.hpp>
25 #include <CPP/deconvolution.hpp>
26 #include <CPP/prior_box.hpp>
27 #include <CPP/detection_output.hpp>
28 #include <CPP/normalize.hpp>
29 #include <CPP/reshape.hpp>
30 #include <CPP/batch_norm.hpp>
31 #include <CPP/permute.hpp>
32 #include <CPP/split.hpp>
33 #include <CPP/upsampling.hpp>
34 #include <CPP/network.hpp>
35 #include <CPP/profiling.hpp>
36 #include <CPP/custom_gpu_primitive.hpp>
37 #include <CPP/reorg_yolo.hpp>
38 #include <CPP/region_yolo.hpp>
39 #include <CPP/mutable_data.hpp>
40 #include <CPP/max_unpooling.hpp>
41 #include <CPP/arg_max_min.hpp>
42 #include <CPP/mvn.hpp>
46 #include "cldnn_graph.h"
47 #include "simple_math.h"
48 #include <description_buffer.hpp>
49 #include <cldnn/cldnn_config.hpp>
50 #include <graph_tools.hpp>
51 #include "cldnn_infer_request.h"
52 #include <cpp_interfaces/ie_executor_manager.hpp>
53 #include <caseless.hpp>
56 #include <sys/types.h>
59 using namespace InferenceEngine;
60 using namespace InferenceEngine::details;
65 #define THROW_CLDNN_EXCEPTION(desc)\
67 InferenceEngineException ex(__FILE__, __LINE__);\
68 std::cout << desc << "\n---\nException detected at " << __FILE__ << ":" << \
69 __LINE__ << " (" << __FUNCTION__ << ")\n---\n" << std::endl; THROW_IE_EXCEPTION << desc; } while (0);
71 #define THROW_CLDNN_EXCEPTION(desc) THROW_IE_EXCEPTION << desc;
73 #define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)
75 namespace CLDNNPlugin {
77 const cldnn::primitive_id CLDNNGraph::m_preProcessTag("_cldnn_input_preprocess");
78 const cldnn::primitive_id CLDNNGraph::m_weightsTag("_cldnn_weights");
79 const cldnn::primitive_id CLDNNGraph::m_biasesTag("_cldnn_biases");
80 const cldnn::primitive_id CLDNNGraph::m_meanValuesTag("_cldnn_mean_values");
81 const cldnn::primitive_id CLDNNGraph::m_postProcessTag("_cldnn_output_postprocess");
82 const cldnn::primitive_id CLDNNGraph::m_scalesTag("_cldnn_scales");
83 const cldnn::primitive_id CLDNNGraph::m_workaroundTag("_cldnn_workaround");
84 const cldnn::primitive_id CLDNNGraph::m_preCustomLayerTag("_cldnn_custom_preprocess");
85 const cldnn::primitive_id CLDNNGraph::m_postCustomLayerTag("_cldnn_custom_postprocess");
87 static void ValidateLayer(const InferenceEngine::CNNLayerPtr& layer, unsigned inputs) { // todo: add more checks
88 if (inputs && layer->insData.size() != inputs) {
89 THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name);
91 if (layer->_fusedWith) {
92 THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
96 static void ValidateEltwiseLayer(const InferenceEngine::CNNLayerPtr& layer) {
97 if (layer->insData.size() < 2) {
98 THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name << ". Eltwise layer should take at least 2 inputs");
100 if (layer->_fusedWith) {
101 THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
106 #define mkdir(dir, mode) _mkdir(dir)
109 void CLDNNGraph::Config::LoadFromMap(const std::map<std::string, std::string>& configMap) {
110 for (auto& kvp : configMap) {
111 std::string key = kvp.first;
112 std::string val = kvp.second;
114 // TODO: refactor if-else to map?
115 if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0) {
116 if (val.compare(PluginConfigParams::YES) == 0) {
118 } else if (val.compare(PluginConfigParams::NO) == 0) {
119 useProfiling = false;
121 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
123 } else if (key.compare(PluginConfigParams::KEY_DYN_BATCH_ENABLED) == 0) {
124 if (val.compare(PluginConfigParams::YES) == 0) {
125 enableDynamicBatch = true;
126 } else if (val.compare(PluginConfigParams::NO) == 0) {
127 enableDynamicBatch = false;
129 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
131 } else if (key.compare(PluginConfigParams::KEY_DUMP_KERNELS) == 0) {
132 if (val.compare(PluginConfigParams::YES) == 0) {
133 dumpCustomKernels = true;
134 } else if (val.compare(PluginConfigParams::NO) == 0) {
135 dumpCustomKernels = false;
137 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
139 } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY) == 0) {
140 std::stringstream ss(val);
144 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
148 queuePriority = cldnn::priority_mode_types::disabled;
151 queuePriority = cldnn::priority_mode_types::low;
154 queuePriority = cldnn::priority_mode_types::med;
157 queuePriority = cldnn::priority_mode_types::high;
160 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue priority value: " << uVal;
164 } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) == 0) {
165 std::stringstream ss(val);
169 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
173 queueThrottle = cldnn::throttle_mode_types::disabled;
176 queueThrottle = cldnn::throttle_mode_types::low;
179 queueThrottle = cldnn::throttle_mode_types::med;
182 queueThrottle = cldnn::throttle_mode_types::high;
185 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue throttle value: " << uVal;
188 } else if (key.compare(PluginConfigParams::KEY_CONFIG_FILE) == 0) {
189 std::stringstream ss(val);
190 std::istream_iterator<std::string> begin(ss);
191 std::istream_iterator<std::string> end;
192 std::vector<std::string> configFiles(begin, end);
193 for (auto& file : configFiles) {
194 CLDNNCustomLayer::LoadFromFile(file, customLayers);
196 } else if (key.compare(PluginConfigParams::KEY_TUNING_MODE) == 0) {
197 if (val.compare(PluginConfigParams::TUNING_DISABLED) == 0) {
198 tuningConfig.mode = cldnn::tuning_mode::tuning_disabled;
199 } else if (val.compare(PluginConfigParams::TUNING_CREATE) == 0) {
200 tuningConfig.mode = cldnn::tuning_mode::tuning_tune_and_cache;
201 } else if (val.compare(PluginConfigParams::TUNING_USE_EXISTING) == 0) {
202 tuningConfig.mode = cldnn::tuning_mode::tuning_use_cache;
204 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported tuning mode value by plugin: " << val;
206 } else if (key.compare(PluginConfigParams::KEY_TUNING_FILE) == 0) {
207 tuningConfig.cache_file_path = val;
208 } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_MEM_POOL) == 0) {
209 if (val.compare(PluginConfigParams::YES) == 0) {
210 memory_pool_on = true;
211 } else if (val.compare(PluginConfigParams::NO) == 0) {
212 memory_pool_on = false;
214 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported memory pool flag value: " << val;
216 } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR) == 0) {
218 graph_dumps_dir = val;
219 mkdir(graph_dumps_dir.c_str(), 0755);
221 } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR) == 0) {
223 sources_dumps_dir = val;
224 mkdir(sources_dumps_dir.c_str(), 0755);
226 } else if (key.compare(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) == 0) {
227 if (val.compare(PluginConfigParams::YES) == 0) {
228 exclusiveAsyncRequests = true;
229 } else if (val.compare(PluginConfigParams::NO) == 0) {
230 exclusiveAsyncRequests = false;
232 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
235 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property key by plugin: " << key;
240 void CLDNNGraph::changeInputBatch(size_t batch) {
244 bool CLDNNGraph::CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const {
245 InputsDataMap inputs;
246 network.getInputsInfo(inputs);
248 CNNLayerSet inputLayers;
249 std::unordered_set<CNNLayer *> allLayers;
254 auto & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
255 if (secondLayers.empty())
258 bool check_result = true;
259 details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
260 auto type = LayerTypeFromStr(layer->type);
261 if (SimplerNMS == type ||
262 ROIPooling == type ||
264 DetectionOutput == type ||
269 PSROIPooling == type ) {
270 check_result = false;
273 // check for custom layer
274 auto customLayer = m_config.customLayers.find(layer->type);
275 if (customLayer != m_config.customLayers.end()) {
276 check_result = false;
283 CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, const Config& config, int max_batch) : m_config(config),
284 m_defaultFormat(cldnn::format::bfyx),
285 m_networkPrecision(cldnn::data_types::f32),
287 m_env.engine = std::make_shared<cldnn::engine>(cldnn::engine_configuration(
288 (config.useProfiling || (config.tuningConfig.mode != cldnn::tuning_mode::tuning_disabled)),
290 config.dumpCustomKernels,
295 config.sources_dumps_dir,
296 config.queuePriority,
297 config.queueThrottle,
298 config.memory_pool_on));
300 m_env.debugOptions.PrintOptions();
302 if (config.exclusiveAsyncRequests) {
303 ExecutorManager *executorManager = ExecutorManager::getInstance();
304 _taskExecutor = executorManager->getExecutor(TargetDeviceInfo::name(TargetDevice::eGPU));
308 // check topology for applicability
309 if (!CanProcessDynBatch(network)) {
310 THROW_CLDNN_EXCEPTION("Such topology cannot be compiled for dynamic batch!");
313 // calculate number of networks necessary based on binary log
314 unsigned int tmp = max_batch;
315 unsigned int mask = 1 << 31;
316 unsigned int ldigit = 31;
318 while (!(tmp & mask)) {
323 m_env.m_bv_sz = ldigit + 1;
328 m_env.m_max_batch = max_batch;
330 // Handle workarounds
331 char networkName[128] = { 0 };
332 network.getName(networkName, 127);
333 m_env.debugOptions.EnableWA(networkName);
334 m_env.debugOptions.AddTimedEvent("Loading Begin");
337 for (int b = m_env.m_bv_sz - 1; b >= 0; b--) {
338 m_topology = std::make_shared<cldnn::topology>(cldnn::topology());
339 m_env.network.reset();
340 m_env.constBlobs.clear();
341 m_env.inputLayouts.clear();
342 m_env.outputDims.clear();
343 m_env.primitiveIDs.clear();
345 changeInputBatch(1 << b);
348 m_env.batchNetworks.insert(m_env.batchNetworks.begin(), m_env.network);
351 m_env.engine->release_pending_memory();
354 m_topology = std::make_shared<cldnn::topology>(cldnn::topology());
358 m_env.engine->release_pending_memory();
361 m_env.debugOptions.AddTimedEvent("Loading", "Loading Begin");
362 m_env.debugOptions.PrintTimedEvents();
363 m_env.debugOptions.ClearTimedEvents();
366 std::vector<InferenceEngine::CNNLayerPtr> CLDNNGraph::GetNextLayers(const InferenceEngine::DataPtr data) {
367 std::vector<InferenceEngine::CNNLayerPtr> nextLayers;
368 if (data == nullptr) {
371 for (auto nl : data->getInputTo()) {
372 nextLayers.push_back(nl.second);
377 std::vector<InferenceEngine::CNNLayerPtr> CLDNNGraph::GetNextLayers(const InferenceEngine::CNNLayerPtr layer) {
378 std::vector<InferenceEngine::CNNLayerPtr> nextLayers;
379 if (layer == nullptr) {
382 for (auto od : layer->outData) {
383 auto nextLayersVec = GetNextLayers(od);
384 for (auto nl : nextLayersVec) {
385 nextLayers.push_back(nl);
391 InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngine::DataPtr data) {
392 if (data == nullptr) {
395 auto nextLayers = GetNextLayers(data);
396 IE_ASSERT(nextLayers.size() == 1);
397 return nextLayers[0];
400 InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer) {
401 if (layer == nullptr) {
404 auto nextLayers = GetNextLayers(layer);
405 IE_ASSERT(nextLayers.size() == 1);
406 return nextLayers[0];
409 void CLDNNGraph::InitFormat(InferenceEngine::ICNNNetwork &network) {
410 m_defaultFormat = FormatFromLayout(InferenceEngine::Layout::NCHW);
411 m_networkPrecision = DataTypeFromPrecision(network.getPrecision());
414 void CLDNNGraph::CompileNetwork() {
415 m_env.debugOptions.AddTimedEvent("Network Build Begin");
416 cldnn::build_options options;
417 if (!m_config.graph_dumps_dir.empty()) {
418 options.set_option(cldnn::build_option::graph_dumps_dir(m_config.graph_dumps_dir));
420 options.set_option(cldnn::build_option::optimize_data(true));
421 options.set_option(cldnn::build_option::tuning_config(m_config.tuningConfig));
423 m_env.network.reset();
424 m_env.network = std::make_shared<cldnn::network>(cldnn::network(*(m_env.engine), *m_topology, options));
425 m_env.debugOptions.AddTimedEvent("Network Build", "Network Build Begin");
427 // add input data from all constant blobs
428 for (auto& cblob : m_env.constBlobs) {
429 m_env.network->set_input_data(cblob.first, cblob.second);
433 void CLDNNGraph::Load(InferenceEngine::ICNNNetwork &network) {
435 auto _networkPrecision = network.getPrecision();
438 InferenceEngine::InputsDataMap networkInputs;
439 network.getInputsInfo(networkInputs);
440 p_currentInputs = &networkInputs;
442 InferenceEngine::OutputsDataMap networkOutputs;
443 network.getOutputsInfo(networkOutputs);
444 p_currentOutputs = &networkOutputs;
446 if (networkInputs.size() == 0) {
447 THROW_CLDNN_EXCEPTION("No inputs detected.");
450 std::list<InferenceEngine::CNNLayerPtr> layersToHandle;
451 for (auto input : networkInputs) {
452 IE_ASSERT(input.first.compare(input.second->name()) == 0);
453 AddInputPrimitive(input.second);
455 // collect next layers to process
456 for (auto l : input.second->getInputData()->getInputTo()) {
457 layersToHandle.push_back(l.second);
461 auto allInputs = CNNNetGetAllInputLayers(network);
462 for (auto input : allInputs) {
463 if (LayerTypeFromStr(input->type) == ConstantBlob) {
464 AddConstantBlobInput(input);
466 // collect next layers to process
467 for (auto nl : GetNextLayers(input)) {
468 layersToHandle.push_back(nl);
473 // 2. traverse layers
474 unsigned infLoopProtection = 0;
475 while (!layersToHandle.empty()) {
476 if (infLoopProtection++ >= layersToHandle.size()) {
477 THROW_CLDNN_EXCEPTION("Infinite loop during network creation");
480 InferenceEngine::CNNLayerPtr currLayer = layersToHandle.front();
481 layersToHandle.pop_front();
482 auto layerName = currLayer->name;
484 if (m_env.primitiveIDs.find(layerName) != m_env.primitiveIDs.end()) {
485 infLoopProtection = 0;
486 continue; // this layer was already added (had multiple inputs)
489 bool missingInput = false;
491 GetPrevLayersPrimitives(currLayer);
492 } catch (std::exception) {
496 if (missingInput) { // some inputs aren't created yet
497 layersToHandle.push_back(currLayer); // push the current layer to the end of the line
498 continue; // move on to the next layer
501 infLoopProtection = 0; // found a layer with all inputs already existing
502 IE_ASSERT(_networkPrecision == currLayer->precision);
503 CreateSingleLayerPrimitive(currLayer); // currLayer will be advanced if layer was skipped or merged
504 m_env.prevPrimitiveIDs[currLayer->name] = GetPrevLayersPrimitives(currLayer);
506 for (auto nl : GetNextLayers(currLayer)) {
507 layersToHandle.push_back(nl);
511 // 3. Handle output reordering
512 for (auto output : networkOutputs) {
513 // always reorder and let clDNN remove unneeded reorders
514 AddOutputPrimitive(output.first, output.second);
519 p_currentInputs = nullptr;
520 p_currentOutputs = nullptr;
523 CLDNNGraph::LayerType CLDNNGraph::LayerTypeFromStr(const std::string &str) {
524 static const caseless_map<std::string, CLDNNGraph::LayerType> LayerNameToType = {
525 { "Convolution" , Convolution },
528 { "Sigmoid" , Sigmoid },
529 { "Logistic" , Sigmoid },
532 { "Activation" , Activation },
534 { "Pooling" , Pooling },
535 { "FullyConnected" , FullyConnected },
536 { "SoftMax" , SoftMax },
540 { "Concat" , Concatenate },
541 { "Eltwise" , Eltwise },
542 { "SimplerNMS" , SimplerNMS },
543 { "ROIPooling" , ROIPooling },
545 { "Deconvolution" , Deconvolution },
546 { "PriorBox" , PriorBox },
547 { "DetectionOutput" , DetectionOutput },
548 { "Normalize" , Normalize },
549 { "Reshape" , Reshape },
550 { "Permute" , Permute },
551 { "Flatten" , Flatten },
552 { "BatchNormalization" , BatchNormalization },
554 { "ScaleShift" , ScaleShift },
555 { "Proposal" , Proposal },
556 { "PSROIPooling" , PSROIPooling },
559 { "Upsampling" , Upsampling },
560 { "Resample" , Resample },
561 { "RegionYolo" , RegionYolo },
562 { "ReorgYolo" , ReorgYolo },
563 { "Const" , ConstantBlob },
564 { "ArgMax" , ArgMax },
566 { "Unpooling" , Unpooling },
568 auto it = LayerNameToType.find(str);
569 if (it != LayerNameToType.end())
575 cldnn::pooling_mode CLDNNGraph::PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding) {
577 case InferenceEngine::PoolingLayer::PoolType::MAX:
578 return cldnn::pooling_mode::max;
579 case InferenceEngine::PoolingLayer::PoolType::AVG:
580 return excludePadding ? cldnn::pooling_mode::average_no_padding : cldnn::pooling_mode::average;
581 default: IE_ASSERT(0); // unhandled pool mode
582 THROW_CLDNN_EXCEPTION("Unsupported pooling type: " << pt);
586 return cldnn::pooling_mode::max; // shouldn't get here
589 cldnn::eltwise_mode CLDNNGraph::EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op) {
591 case InferenceEngine::EltwiseLayer::Sum:
592 return cldnn::eltwise_mode::sum;
593 case InferenceEngine::EltwiseLayer::Prod:
594 return cldnn::eltwise_mode::prod;
595 case InferenceEngine::EltwiseLayer::Max:
596 return cldnn::eltwise_mode::max;
597 default: THROW_CLDNN_EXCEPTION("Unsupported eltwise operation: " << op);
601 return cldnn::eltwise_mode::max; // shouldn't get here
604 cldnn::concatenation::concatenation_axis CLDNNGraph::ConcatAxisFromIEAxis(unsigned axis) {
607 THROW_CLDNN_EXCEPTION("Unsupported concatenation axis: " << axis); // Currently unsupported (although existing in the API)
608 return cldnn::concatenation::concatenation_axis::along_b;
610 return cldnn::concatenation::concatenation_axis::along_f;
612 return cldnn::concatenation::concatenation_axis::along_y;
614 return cldnn::concatenation::concatenation_axis::along_x;
615 default: THROW_CLDNN_EXCEPTION("Unsupported concatenation axis: " << axis);
619 return cldnn::concatenation::concatenation_axis::along_f; // shouldn't get here
622 void CLDNNGraph::CreatePrimitiveFromBlob(cldnn::primitive_id primID,
623 const InferenceEngine::Blob::Ptr pBlob,
624 cldnn::layout blobLayout,
625 size_t blobByteOffset,
626 WeightRearrangeType rearrange) {
627 auto mem = cldnn::memory::allocate(*(m_env.engine), blobLayout);
628 auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
629 auto buf = tmpPointer.data();
630 auto bufSize = blobLayout.bytes_count();
631 // The condition below is not valid once we use groups - todo: think of some other size check here
632 // if ((pBlob != nullptr) &&
633 // (pBlob->size() * (broadcastFeatures ? blobLayout.size.feature[0] : 1)) != blobLayout.count()) {
634 // THROW_CLDNN_EXCEPTION("Unexpected blob size");
636 if (pBlob == nullptr) {
637 THROW_CLDNN_EXCEPTION("Missing blob data: " << primID);
638 } else if ((pBlob->layout() != InferenceEngine::OIHW) &&
639 (pBlob->layout() != InferenceEngine::NCHW) &&
640 (pBlob->layout() != InferenceEngine::CHW) &&
641 (pBlob->layout() != InferenceEngine::C)) {
642 // TODO: support more layouts
643 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(pBlob->layout()) << ") in blob: " << primID);
644 } else if (rearrange == BroadcastFeatures) {
645 size_t features = static_cast<size_t>(blobLayout.size.feature[0]);
646 if (pBlob->size() != features) {
647 THROW_CLDNN_EXCEPTION("Invalid blob dimensions to broadcast: " << primID);
649 auto data = static_cast<const char *>(pBlob->buffer());
650 auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type);
651 size_t featureElements = blobLayout.count() / static_cast<size_t>(blobLayout.size.feature[0]);
652 IE_ASSERT(blobLayout.format == cldnn::format::bfyx);
653 for (size_t f = 0; f < features; f++) {
654 for (size_t e = 0; e < featureElements; e++) {
655 for (size_t b = 0; b < elementSize; b++) {
656 buf[(f*featureElements + e)*elementSize + b] = data[f*elementSize + b];
660 } else if (rearrange == FlipDeconvDims) {
661 auto data = static_cast<const char *>(pBlob->buffer());
662 auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type);
664 size_t inputFeatureElements = static_cast<size_t>(blobLayout.size.feature[0]);
665 size_t outputFeatureElements = static_cast<size_t>(blobLayout.size.batch[0]);
667 size_t featureSize = elementSize * static_cast<size_t>(blobLayout.size.spatial[0] * blobLayout.size.spatial[1]);
669 for (size_t i = 0; i < inputFeatureElements; i++) {
670 for (size_t o = 0; o < outputFeatureElements; o++) {
671 size_t outputShift = (o*inputFeatureElements + i)*featureSize;
672 size_t inputShift = (i*outputFeatureElements + o)*featureSize;
674 for (size_t b = 0; b < featureSize; b++) {
675 buf[outputShift + b] = data[inputShift + b];
680 auto data = static_cast<const char *>(pBlob->buffer());
681 for (size_t i = 0; i < bufSize; i++) {
682 buf[i] = data[i + blobByteOffset];
685 m_topology->add(cldnn::data(primID, mem));
688 void CLDNNGraph::CreateWeightAndBiasPrimitives(const InferenceEngine::CNNLayerPtr& layer,
689 std::vector<cldnn::primitive_id>& weightsPrimID,
690 std::vector<cldnn::primitive_id>& biasesPrimID) {
691 cldnn::tensor::value_type inFeatures = 1; // todo: workaround for xyf input, handle general case (xf, xyzf etc...)
692 std::shared_ptr<Data> insData0 = layer->insData[0].lock();
693 IE_ASSERT(insData0 != nullptr);
694 if (insData0->dims.size() > 2) {
695 inFeatures = TensorValue(insData0->dims[2]);
697 cldnn::tensor::value_type outFeatures(0);
698 std::vector<cldnn::tensor::value_type> weightDimsVec;
699 InferenceEngine::Blob::Ptr pWeightsBlob, pBiasBlob;
700 unsigned groupSize = 1;
701 WeightRearrangeType rearrange = NO_REARRANGE;
703 switch (LayerTypeFromStr(layer->type)) {
705 auto convLayer = dynamic_cast<InferenceEngine::ConvolutionLayer *> (layer.get());
706 groupSize = convLayer->_group;
707 if ((inFeatures % groupSize) || (convLayer->_out_depth % groupSize)) {
708 THROW_CLDNN_EXCEPTION("Invalid group size in layer " << convLayer->name);
711 TensorValue(convLayer->_out_depth / groupSize),
712 TensorValue(inFeatures / groupSize),
713 TensorValue(convLayer->_kernel_x),
714 TensorValue(convLayer->_kernel_y)
716 outFeatures = convLayer->_out_depth;
717 pWeightsBlob = convLayer->_weights;
718 pBiasBlob = convLayer->_biases;
721 case Deconvolution: {
722 auto deconvLayer = dynamic_cast<InferenceEngine::DeconvolutionLayer *> (layer.get());
723 groupSize = deconvLayer->_group;
724 if ((inFeatures % groupSize) || (deconvLayer->_out_depth % groupSize)) {
725 THROW_CLDNN_EXCEPTION("Invalid group size in layer " << deconvLayer->name);
728 TensorValue(deconvLayer->_out_depth / groupSize),
729 TensorValue(inFeatures / groupSize),
730 TensorValue(deconvLayer->_kernel_x),
731 TensorValue(deconvLayer->_kernel_y)
733 outFeatures = deconvLayer->_out_depth;
734 pWeightsBlob = deconvLayer->_weights;
735 pBiasBlob = deconvLayer->_biases;
737 if ((groupSize < outFeatures) || (groupSize < inFeatures))
738 rearrange = FlipDeconvDims;
742 IE_ASSERT("Wrong weightable layer type"); // shouldn't get here
746 // create weights primitive
747 cldnn::layout weightsLayout = cldnn::layout(
750 cldnn::tensor(weightDimsVec));
751 size_t bytesPerGroup = weightsLayout.bytes_count();
753 for (unsigned g = 0; g < groupSize; g++) {
754 cldnn::primitive_id weightID = layer->name + m_weightsTag + std::to_string(g);
755 CreatePrimitiveFromBlob(
761 weightsPrimID.push_back(weightID);
764 // create bias primitive
765 if (pBiasBlob != nullptr) {
766 cldnn::layout biasesLayout = cldnn::layout(
769 cldnn::spatial(TensorValue(outFeatures / groupSize)));
770 size_t bytesPerGroup = biasesLayout.bytes_count();
771 for (unsigned g = 0; g < groupSize; g++) {
772 cldnn::primitive_id biasID = layer->name + m_biasesTag + std::to_string(g);
773 CreatePrimitiveFromBlob(
778 biasesPrimID.push_back(biasID);
783 void CLDNNGraph::CreateScaleWeightsAndBiasesFromBN(
784 const InferenceEngine::BatchNormalizationLayer* bnLayer,
785 cldnn::primitive_id weightsPrimID,
786 cldnn::primitive_id biasesPrimID) {
788 if (bnLayer->_weights->dims() != bnLayer->_biases->dims()) {
789 THROW_CLDNN_EXCEPTION("mean/variance dimensions mismatch in " << bnLayer->name);
791 if (bnLayer->_weights->precision() != bnLayer->_biases->precision()) {
792 THROW_CLDNN_EXCEPTION("mean/variance precision mismatch in " << bnLayer->name);
795 cldnn::tensor blobTensor(0);
796 switch (bnLayer->outData[0]->dims.size()) {
798 blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[0]));
801 blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[2]));
804 THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
806 cldnn::layout blobLayout(
811 switch (bnLayer->_weights->precision()) {
812 case Precision::FP16: {
813 InferenceEngine::TBlob<uint16_t> weightsBlob(bnLayer->_weights->precision(), bnLayer->_weights->layout(), bnLayer->_weights->dims());
814 weightsBlob.allocate();
815 InferenceEngine::TBlob<uint16_t> biasesBlob(bnLayer->_biases->precision(), bnLayer->_weights->layout(), bnLayer->_biases->dims());
816 biasesBlob.allocate();
818 auto weightsData = weightsBlob.data();
819 auto biasesData = biasesBlob.data();
820 auto varianceData = static_cast<const uint16_t *>(bnLayer->_weights->buffer());
821 auto meanData = static_cast<const uint16_t *>(bnLayer->_biases->buffer());
823 cldnn_status status = CLDNN_SUCCESS;
824 for (size_t i = 0; i < weightsBlob.size(); i++) {
825 auto variance = cldnn_half_to_float(varianceData[i], &status);
826 if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
827 auto mean = cldnn_half_to_float(meanData[i], &status);
828 if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
830 float scale = 1.0f / sqrt(variance + bnLayer->epsilon);
831 weightsData[i] = cldnn_float_to_half(scale, &status);
832 if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
833 biasesData[i] = cldnn_float_to_half((-mean) * scale, &status);
834 if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
836 CreatePrimitiveFromBlob(weightsPrimID, std::make_shared<InferenceEngine::TBlob<uint16_t>>(weightsBlob), blobLayout);
837 CreatePrimitiveFromBlob(biasesPrimID, std::make_shared<InferenceEngine::TBlob<uint16_t>>(biasesBlob), blobLayout);
840 case Precision::FP32: {
841 InferenceEngine::TBlob<float> weightsBlob(bnLayer->_weights->precision(), bnLayer->_weights->layout(), bnLayer->_weights->dims());
842 weightsBlob.allocate();
843 InferenceEngine::TBlob<float> biasesBlob(bnLayer->_biases->precision(), bnLayer->_weights->layout(), bnLayer->_biases->dims());
844 biasesBlob.allocate();
846 auto weightsData = weightsBlob.data();
847 auto biasesData = biasesBlob.data();
848 auto varianceData = static_cast<const float *>(bnLayer->_weights->buffer());
849 auto meanData = static_cast<const float *>(bnLayer->_biases->buffer());
851 for (size_t i = 0; i < weightsBlob.size(); i++) {
852 auto variance = varianceData[i];
853 auto mean = meanData[i];
854 weightsData[i] = 1.0f / sqrt(variance + bnLayer->epsilon);
855 biasesData[i] = (-mean) * weightsData[i];
857 CreatePrimitiveFromBlob(weightsPrimID, std::make_shared<InferenceEngine::TBlob<float>>(weightsBlob), blobLayout);
858 CreatePrimitiveFromBlob(biasesPrimID, std::make_shared<InferenceEngine::TBlob<float>>(biasesBlob), blobLayout);
862 THROW_CLDNN_EXCEPTION("Unhandled mean/variance precision in " << bnLayer->name);
867 void CLDNNGraph::CreateSingleLayerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
868 // Initialize a profiling entry
869 InitProfileInfo(layer->name, layer->type, "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
871 // First check for custom layer
872 auto customLayer = m_config.customLayers.find(layer->type);
873 if (customLayer != m_config.customLayers.end()) {
874 CreateCustomLayerPrimitive(layer, customLayer->second);
878 // Otherwise move on to built-in layer types
879 switch (LayerTypeFromStr(layer->type)) {
880 case Convolution: CreateConvolutionPrimitive(layer);
889 CreateActivationPrimitive(layer, LayerTypeFromStr(layer->type));
891 case LRN: CreateLRNPrimitive(layer);
893 case Pooling: CreatePoolingPrimitive(layer);
895 case Unpooling: CreateMaxUnpoolingPrimitive(layer);
897 case FullyConnected: CreateFullyConnectedPrimitive(layer);
899 case SoftMax: CreateSoftMaxPrimitive(layer);
901 case Power: CreatePowerPrimitive(layer);
903 case Split: CreateSplitPrimitive(layer);
905 case Concatenate: CreateConcatenatePrimitive(layer);
907 case Eltwise: CreateEltwisePrimitive(layer);
909 case SimplerNMS: CreateSimplerNMSPrimitive(layer);
911 case ROIPooling: CreateROIPoolingPrimitive(layer);
913 case Crop: CreateCropPrimitive(layer);
915 case Deconvolution: CreateDeconvolutionPrimitive(layer);
917 case PriorBox: CreatePriorBoxPrimitive(layer);
919 case DetectionOutput: CreateDetectionOutputPrimitive(layer);
921 case Normalize: CreateNormalizePrimitive(layer);
923 case Reshape: CreateReshapePrimitive(layer);
925 case Permute: CreatePermutePrimitive(layer);
927 case Flatten: CreateFlattenPrimitive(layer);
929 case BatchNormalization: CreateBatchNormalizationPrimitive(layer);
931 case PReLU: CreatePReLUPrimitive(layer);
933 case ScaleShift: CreateScaleShiftPrimitive(layer);
935 case Proposal: CreateProposalPrimitive(layer);
937 case PSROIPooling: CreatePSROIPoolingPrimitive(layer);
939 case Copy: CreateCopyPrimitive(layer);
941 case Upsampling: CreateUpsamplingPrimitive(layer);
943 case Resample: CreateResamplePrimitive(layer);
945 case ArgMax: CreateArgMaxPrimitive(layer);
947 case MVN: CreateMVNPrimitive(layer);
949 case RegionYolo: CreateYOLO2RegionPrimitive(layer);
951 case ReorgYolo: CreateYOLO2ReorgPrimitive(layer);
953 default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type);
957 void CLDNNGraph::CreateScaleShiftPrimitive(InferenceEngine::CNNLayerPtr &layer) {
958 ValidateLayer(layer, 1);
959 auto inputPrimitives = GetPrevLayersPrimitives(layer);
960 auto scaleShiftLayer = dynamic_cast<InferenceEngine::ScaleShiftLayer*> (layer.get());
962 // create scales and biases
963 cldnn::primitive_id scalePrimID = scaleShiftLayer->name + m_scalesTag;
964 cldnn::primitive_id biasPrimID = scaleShiftLayer->name + m_biasesTag;
966 const auto& dims = scaleShiftLayer->_weights->dims();
967 cldnn::tensor weightTensor(1);
968 switch (dims.size()) {
969 case 1: weightTensor = cldnn::feature(TensorValue(dims[0])); // value per feature (or 1 global value)
971 case 4: weightTensor = cldnn::tensor(TensorValue(dims[0]), TensorValue(dims[1]), TensorValue(dims[3]), TensorValue(dims[2])); // value per pixel
973 default: THROW_CLDNN_EXCEPTION("Invalid weights dimensions in layer " << layer->name);
977 cldnn::layout blobLayout(m_networkPrecision, m_defaultFormat, weightTensor);
978 CreatePrimitiveFromBlob(scalePrimID, scaleShiftLayer->_weights, blobLayout);
979 if (scaleShiftLayer->_biases != nullptr) {
980 if (scaleShiftLayer->_biases->dims() != dims) {
981 THROW_CLDNN_EXCEPTION("Invalid bias blob dimensions in layer " << layer->name);
983 CreatePrimitiveFromBlob(biasPrimID, scaleShiftLayer->_biases, blobLayout);
985 biasPrimID = ""; // 0-bias
988 auto scaleShiftPrim = cldnn::scale(
989 scaleShiftLayer->name,
994 m_env.primitiveIDs[scaleShiftLayer->name] = scaleShiftLayer->name;
995 m_topology->add(scaleShiftPrim);
996 m_env.profilingIDs.insert(scaleShiftLayer->name);
999 void CLDNNGraph::CreateProposalPrimitive(InferenceEngine::CNNLayerPtr & layer) {
1000 ValidateLayer(layer, 3);
1001 IE_ASSERT(layer->insData[0].lock()->dims[3] == 1); // only handling input batch size 1
1002 IE_ASSERT(layer->insData[1].lock()->dims[3] == 1); // only handling input batch size 1
1003 auto proposalLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1005 float nms_thresh = proposalLayer->GetParamAsFloat("nms_thresh", 0.7f);
1006 int min_size = proposalLayer->GetParamAsInt("min_size", 16);
1007 int feature_stride = proposalLayer->GetParamAsInt("feat_stride", 16);
1008 int pre_nms_topn = proposalLayer->GetParamAsInt("pre_nms_topn", 6000);
1009 int post_nms_topn = proposalLayer->GetParamAsInt("post_nms_topn", 300);
1010 std::vector<float> ratio = proposalLayer->GetParamAsFloats("ratio");
1011 std::vector<float> scale = proposalLayer->GetParamAsFloats("scale");
1013 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1015 auto proposalPrim = cldnn::proposal(
1016 proposalLayer->name,
1017 inputPrimitives[0], // cls_score
1018 inputPrimitives[1], // bbox_pred
1019 inputPrimitives[2], // im_info
1020 0, // max_num_proposals is unused
1029 m_env.primitiveIDs[proposalLayer->name] = proposalLayer->name;
1030 m_topology->add(proposalPrim);
1031 m_env.profilingIDs.insert(proposalLayer->name);
1034 void CLDNNGraph::CreatePReLUPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1035 ValidateLayer(layer, 1);
1036 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1037 auto preluLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1039 auto inDataPtr = preluLayer->insData[0].lock();
1041 THROW_CLDNN_EXCEPTION("Data inserted into PreLu " << preluLayer->name << " is nullptr");
1043 auto inputDims = inDataPtr->dims;
1044 if (inputDims.size() == 2) {
1045 // WA for FC output as BF instead of BX
1046 // todo: remove this once FC output is changed in clDNN
1047 cldnn::primitive_id reshapeID = preluLayer->name + m_workaroundTag;
1048 m_topology->add(cldnn::reshape(
1051 cldnn::tensor(TensorValue(inputDims[1]), TensorValue(inputDims[0]), 1, 1)));
1052 m_env.primitiveIDs[inputPrimitives[0]] = reshapeID;
1053 inputPrimitives[0] = reshapeID;
1054 m_env.primitiveIDs[reshapeID] = reshapeID;
1055 m_env.profilingIDs.insert(reshapeID);
1058 static const std::string blobName("weights");
1059 ValidateGenericLayerBlobs(preluLayer, { blobName });
1061 bool channel_shared = preluLayer->GetParamsAsBool("channel_shared", false);
1063 auto slopeBlob = preluLayer->blobs.at(blobName);
1064 if (channel_shared) {
1065 if (slopeBlob->dims()[0] != 1) {
1066 THROW_CLDNN_EXCEPTION("PReLU slope blob with wrong dimensions in " << preluLayer->name);
1069 switch (slopeBlob->precision()) {
1070 case InferenceEngine::Precision::FP32:
1071 slope = *static_cast<const float *>(slopeBlob->buffer());
1073 case InferenceEngine::Precision::FP16:
1075 cldnn_status status = CLDNN_SUCCESS;
1076 slope = cldnn_half_to_float(*static_cast<const uint16_t *>(slopeBlob->buffer()), &status);
1077 if (status != CLDNN_SUCCESS) {
1078 THROW_CLDNN_EXCEPTION("Error converting fp16 value in " << preluLayer->name);
1082 default: THROW_CLDNN_EXCEPTION("Invalid PReLU slope blob precision in " << preluLayer->name);
1084 m_topology->add(cldnn::activation(preluLayer->name, inputPrimitives[0], activation_relu_negative_slope, { slope, 0.f }));
1086 CreateGenericLayerBlobPrimitives(preluLayer);
1087 cldnn::primitive_id slopePrimID(preluLayer->name + "_" + blobName + m_weightsTag);
1088 m_topology->add(cldnn::activation(preluLayer->name, inputPrimitives[0], slopePrimID, activation_relu_negative_slope));
1091 m_env.primitiveIDs[preluLayer->name] = preluLayer->name;
1092 m_env.profilingIDs.insert(preluLayer->name);
1095 void CLDNNGraph::CreateBatchNormalizationPrimitive(InferenceEngine::CNNLayerPtr & layer) {
1096 ValidateLayer(layer, 1);
1097 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1099 auto bnLayer = dynamic_cast<InferenceEngine::BatchNormalizationLayer *> (layer.get());
1100 cldnn::primitive_id weightID = bnLayer->name + "_" + m_scalesTag;
1101 cldnn::primitive_id biasID = bnLayer->name + "_" + m_biasesTag;
1103 #define _SCALE_BN_OPT
1104 #ifdef _SCALE_BN_OPT
1105 // Using scale as an optimization (1 mad instead of mad+rsq)
1106 // create new blobs for scale shift
1107 CreateScaleWeightsAndBiasesFromBN(bnLayer, weightID, biasID);
1108 auto scalePrim = cldnn::scale(bnLayer->name, inputPrimitives[0], weightID, biasID);
1110 m_env.primitiveIDs[bnLayer->name] = bnLayer->name;
1111 m_topology->add(scalePrim);
1112 m_env.profilingIDs.insert(bnLayer->name);
1114 #endif // _SCALE_BN_OPT
1116 cldnn::tensor blobTensor(0);
1117 switch (bnLayer->outData[0]->dims.size()) {
1119 blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[0]));
1122 blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[2]));
1125 THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
1127 cldnn::layout blobLayout(
1132 // Create variance primitive
1133 cldnn::primitive_id varianceID = bnLayer->name + "_" + m_weightsTag;
1134 CreatePrimitiveFromBlob(varianceID, bnLayer->_weights, blobLayout);
1136 // Create mean primitive
1137 cldnn::primitive_id meanID = bnLayer->name + "_" + m_biasesTag;
1138 CreatePrimitiveFromBlob(meanID, bnLayer->_biases, blobLayout);
1140 auto bnPrim = cldnn::batch_norm(
1147 m_env.primitiveIDs[bnLayer->name] = bnLayer->name;
1148 m_topology->add(bnPrim);
1149 m_env.profilingIDs.insert(bnLayer->name);
1152 void CLDNNGraph::CreateFlattenPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1153 ValidateLayer(layer, 1);
1154 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1155 auto flattenLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1157 auto flattenPrim = cldnn::reshape(
1160 CldnnTensorFromIEDims(flattenLayer->outData[0]->dims));
1162 m_env.primitiveIDs[flattenLayer->name] = flattenLayer->name;
1163 m_topology->add(flattenPrim);
1164 m_env.profilingIDs.insert(flattenLayer->name);
1167 void CLDNNGraph::CreatePermutePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1168 ValidateLayer(layer, 1);
1169 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1170 auto permuteLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1171 std::vector<uint16_t> order;
1172 for (auto& a : permuteLayer->GetParamAsInts("order"))
1173 order.push_back(static_cast<uint16_t>(a));
1174 auto outputDims = permuteLayer->outData[0]->dims;
1176 auto permutePrim = cldnn::permute(
1181 m_env.primitiveIDs[permuteLayer->name] = permuteLayer->name;
1182 m_topology->add(permutePrim);
1183 m_env.profilingIDs.insert(permuteLayer->name);
1186 void CLDNNGraph::CreateReshapePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1187 ValidateLayer(layer, 1);
1188 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1189 auto reshapeLayer = dynamic_cast<InferenceEngine::ReshapeLayer*> (layer.get());
1190 IE_ASSERT(reshapeLayer->outData.size());
1192 auto reshapePrim = cldnn::reshape(
1195 CldnnTensorFromIEDims(reshapeLayer->outData[0]->dims));
1197 m_env.primitiveIDs[reshapeLayer->name] = reshapeLayer->name;
1198 m_topology->add(reshapePrim);
1199 m_env.profilingIDs.insert(reshapeLayer->name);
1202 void CLDNNGraph::CreateNormalizePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1203 ValidateLayer(layer, 1);
1204 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1205 auto normLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1206 ValidateGenericLayerBlobs(normLayer, { "weights" });
1207 CreateGenericLayerBlobPrimitives(normLayer);
1210 bool across_spatial = normLayer->GetParamsAsBool("across_spatial", true);
1211 float eps = normLayer->GetParamAsFloat("eps", 0.0f);
1213 // WA for MO outputting %.6f
1218 auto normPrim = cldnn::normalize(
1221 normLayer->name + "_weights" + m_weightsTag,
1225 m_env.primitiveIDs[normLayer->name] = normLayer->name;
1226 m_topology->add(normPrim);
1227 m_env.profilingIDs.insert(normLayer->name);
1230 void CLDNNGraph::CreateDetectionOutputPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1231 ValidateLayer(layer, 3);
1232 auto detectionLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1234 uint32_t num_classes = detectionLayer->GetParamAsUInt("num_classes", 1);
1235 bool share_location = detectionLayer->GetParamsAsBool("share_location", true);
1236 int background_label_id = detectionLayer->GetParamAsInt("background_label_id", 0);
1237 float nms_threshold = detectionLayer->GetParamAsFloat("nms_threshold", 0.3f);
1238 int top_k = detectionLayer->GetParamAsInt("top_k", -1);
1239 float confidence_threshold = detectionLayer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
1240 float eta = detectionLayer->GetParamAsFloat("eta", 1.0f);
1241 int keep_top_k = detectionLayer->GetParamAsInt("keep_top_k", -1);
1242 bool variance_encoded_in_target = detectionLayer->GetParamsAsBool("variance_encoded_in_target", false);
1243 int input_width = detectionLayer->GetParamAsInt("input_width", -1);
1244 int input_height = detectionLayer->GetParamAsInt("input_height", -1);
1245 bool normalized = detectionLayer->GetParamsAsBool("normalized", true);
1246 std::string code_type = detectionLayer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
1247 bool clip = detectionLayer->GetParamsAsBool("clip", false);
1248 bool decrease_label_id = detectionLayer->GetParamsAsBool("decrease_label_id", false);
1249 cldnn::prior_box_code_type cldnnCodeType = PriorBoxCodeFromString(code_type);
1251 int32_t prior_info_size = normalized != 0 ? 4 : 5;
1252 int32_t prior_coordinates_offset = normalized != 0 ? 0 : 1;
1254 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1255 auto detectionPrim = cldnn::detection_output(
1256 detectionLayer->name,
1263 background_label_id,
1268 variance_encoded_in_target,
1269 confidence_threshold,
1271 prior_coordinates_offset,
1278 m_env.primitiveIDs[detectionLayer->name] = detectionLayer->name;
1279 m_topology->add(detectionPrim);
1280 m_env.profilingIDs.insert(detectionLayer->name);
1283 void CLDNNGraph::CreatePriorBoxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1284 ValidateLayer(layer, 2);
1285 auto priorBoxLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1288 std::vector<float> min_size = priorBoxLayer->GetParamAsFloats("min_size");
1289 std::vector<float> max_size = priorBoxLayer->GetParamAsFloats("max_size", {});
1290 std::vector<float> aspect_ratio = priorBoxLayer->GetParamAsFloats("aspect_ratio", {});
1291 std::vector<float> variance = priorBoxLayer->GetParamAsFloats("variance");
1292 bool flip = priorBoxLayer->GetParamsAsBool("flip", true);
1293 bool clip = priorBoxLayer->GetParamsAsBool("clip", false);
1294 bool scale_all_sizes = priorBoxLayer->GetParamsAsBool("scale_all_sizes", true);
1295 float offset = priorBoxLayer->GetParamAsFloat("offset", 0.5f);
1297 auto step_w = priorBoxLayer->GetParamAsFloat("step_w", 0.0f);
1298 auto step_h = priorBoxLayer->GetParamAsFloat("step_h", 0.0f);
1299 auto step = priorBoxLayer->GetParamAsFloat("step", 0.0f);
1301 float _step_w = 0.0f;
1302 float _step_h = 0.0f;
1303 if (HasParam(priorBoxLayer->params, "step_w") && step_w != 0.0f &&
1304 HasParam(priorBoxLayer->params, "step_h") && step_h != 0.0f) {
1307 } else if (HasParam(priorBoxLayer->params, "step") && step != 0.0f) {
1312 int img = priorBoxLayer->GetParamAsInt("img_size", 0);
1313 int img_w = priorBoxLayer->GetParamAsInt("img_w", 0);
1314 int img_h = priorBoxLayer->GetParamAsInt("img_h", 0);
1315 if ((img != 0) || (img_w != 0) || (img_h != 0)) {
1317 THROW_CLDNN_EXCEPTION("Unsupported image sizes in prior box " + layer->name + " (use an image blob instead of dimensions)");
1320 IE_ASSERT(layer->insData[1].lock());
1321 auto img_dims = layer->insData[1].lock()->dims;
1322 cldnn::tensor img_size = cldnn::spatial(TensorValue(img_dims[0]), TensorValue(img_dims[1]));
1323 std::vector<cldnn::primitive_id> inputPrimitives = GetPrevLayersPrimitives(layer);
1324 // second input isn't used by value - only dimensions taken from the layer input
1326 if (_step_w == 0.0f || _step_h == 0.0f) {
1327 _step_w = static_cast<float>(img_w) / static_cast<float>(img_dims[0]);
1328 _step_h = static_cast<float>(img_h) / static_cast<float>(img_dims[1]);
1331 auto priorBoxPrim = cldnn::prior_box(
1332 priorBoxLayer->name,
1346 m_env.primitiveIDs[priorBoxLayer->name] = priorBoxLayer->name;
1347 m_topology->add(priorBoxPrim);
1348 m_env.profilingIDs.insert(priorBoxLayer->name);
1351 void CLDNNGraph::CreateDeconvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1352 ValidateLayer(layer, 1);
1353 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1354 auto deconvLayer = dynamic_cast<InferenceEngine::DeconvolutionLayer *> (layer.get());
1356 if (deconvLayer->_dilation_x != 1 || deconvLayer->_dilation_y != 1) {
1357 THROW_CLDNN_EXCEPTION("Unsupported dilation in deconvolution " << layer->name);
1360 std::vector<cldnn::primitive_id> weightPrimID;
1361 std::vector<cldnn::primitive_id> biasPrimID;
1362 CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID);
1363 cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
1364 cldnn::spatial(deconvLayer->_stride_x, deconvLayer->_stride_y));
1365 cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
1366 cldnn::spatial(-deconvLayer->_padding_x, -deconvLayer->_padding_y));
1368 auto deconvPrim = cldnn::deconvolution(deconvLayer->name,
1376 CldnnTensorFromIEDims(deconvLayer->outData[0]->dims));
1377 m_env.primitiveIDs[deconvLayer->name] = deconvLayer->name;
1378 m_topology->add(deconvPrim);
1379 m_env.profilingIDs.insert(deconvLayer->name);
1382 void CLDNNGraph::CreateCropPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1383 if (layer->insData.size() != 1 && layer->insData.size() != 2) {
1384 THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name);
1386 if (layer->_fusedWith) {
1387 THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
1389 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1390 auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*> (layer.get());
1391 IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size());
1392 IE_ASSERT(cropLayer->outData[0] && cropLayer->outData[0]->dims.size() == 4);
1394 std::vector<cldnn::tensor::value_type> offset{ 0, 0, 0, 0 };
1395 for (size_t i = 0; i < cropLayer->axis.size(); i++) {
1396 if (cropLayer->axis[i] < 0 || cropLayer->axis[i] > 3) {
1397 THROW_CLDNN_EXCEPTION("Invalid crop axis: " + std::to_string(cropLayer->axis[i]) + " in layer " + cropLayer->name);
1399 offset[cropLayer->axis[i]] = cropLayer->offset[i];
1401 auto outputDims = cropLayer->outData[0]->dims;
1402 cldnn::tensor refSize(
1403 TensorValue(outputDims[3]),
1404 TensorValue(outputDims[2]),
1405 TensorValue(outputDims[0]),
1406 TensorValue(outputDims[1]));
1408 auto cropPrim = cldnn::crop(
1412 cldnn::tensor(offset));
1413 m_env.primitiveIDs[cropLayer->name] = cropLayer->name;
1414 m_topology->add(cropPrim);
1415 m_env.profilingIDs.insert(cropLayer->name);
1418 void CLDNNGraph::CreateROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1419 ValidateLayer(layer, 2);
1420 auto roiPoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1423 int pooled_width = roiPoolingLayer->GetParamAsInt("pooled_w", 0);
1424 int pooled_height = roiPoolingLayer->GetParamAsInt("pooled_h", 0);
1425 float spatial_scale = roiPoolingLayer->GetParamAsFloat("spatial_scale", 1.0f);
1427 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1429 auto roiPoolingPrim = cldnn::roi_pooling(
1430 roiPoolingLayer->name,
1431 inputPrimitives[0], // input data
1432 inputPrimitives[1], // input rois
1433 cldnn::pooling_mode::max,
1437 m_env.primitiveIDs[roiPoolingLayer->name] = roiPoolingLayer->name;
1438 m_topology->add(roiPoolingPrim);
1439 m_env.profilingIDs.insert(roiPoolingLayer->name);
1442 void CLDNNGraph::CreatePSROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1443 ValidateLayer(layer, 2);
1444 auto psROIPoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1447 int group_size = psROIPoolingLayer->GetParamAsInt("group_size");
1448 // todo: assert outputdim*group_size*group_size == input features
1449 float spatial_scale = psROIPoolingLayer->GetParamAsFloat("spatial_scale");
1450 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1452 auto psROIPoolingPrim = cldnn::roi_pooling(
1453 psROIPoolingLayer->name,
1454 inputPrimitives[0], // input data
1455 inputPrimitives[1], // input rois
1456 cldnn::pooling_mode::average,
1461 m_env.primitiveIDs[psROIPoolingLayer->name] = psROIPoolingLayer->name;
1462 m_topology->add(psROIPoolingPrim);
1463 m_env.profilingIDs.insert(psROIPoolingLayer->name);
1466 void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer, CLDNNCustomLayerPtr customLayer) {
1467 ValidateLayer(layer, 0);
1468 // todo: handling fusing
1469 auto genericLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1470 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1473 std::string layerDefines;
1474 for (const auto& def : customLayer->Defines()) {
1475 std::string singleDefine("#define " + def.name + " " + def.prefix);
1476 if (genericLayer->params.find(def.param) != genericLayer->params.end()) {
1477 singleDefine += genericLayer->params.at(def.param);
1479 singleDefine += def.default_value;
1481 singleDefine += def.postfix + "\n";
1482 layerDefines.append(singleDefine);
1486 std::vector<cldnn::primitive_id> reorderedInputs;
1487 reorderedInputs.resize(inputPrimitives.size());
1490 std::map<std::string, size_t> blobIndex;
1491 for (auto& blob : genericLayer->blobs) {
1492 // create primitive from blob (always 1d)
1493 cldnn::primitive_id blobId = genericLayer->name + "_" + blob.first;
1494 if (blob.second->dims().size() != 1) {
1495 THROW_CLDNN_EXCEPTION("Invalid dimensions for blob " << blob.first << " in layer " << genericLayer->name);
1497 CreatePrimitiveFromBlob(blobId, blob.second, cldnn::layout(
1500 cldnn::tensor(1, 1, TensorValue(blob.second->dims()[0]), 1)));
1501 // save index in blobIndex
1502 blobIndex[blob.first] = reorderedInputs.size();
1503 // add to reorderedInputs
1504 reorderedInputs.push_back(blobId);
1507 // Handle kernel parameters
1508 std::vector<cldnn_arg> kernelParameters;
1509 cldnn::format outputFormat(cldnn::format::any);
1510 for (const auto& param : customLayer->KernelParams()) {
1511 switch (param.type) {
1512 case CLDNNCustomLayer::ParamType::Input: {
1513 kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1514 kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input;
1515 kernelParameters[param.paramIndex].index = static_cast<cldnn_arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
1517 // Handle input reorder
1518 if (param.portIndex < inputPrimitives.size() && reorderedInputs[param.portIndex].empty()) {
1519 // todo: add support for multiple reorders of the same input? (read as bfyx for one arg and yxfb for another)
1520 if (param.format != cldnn::format::any) {
1521 auto reorderPrimName = inputPrimitives[param.portIndex] + "_" + layer->name + m_preCustomLayerTag;
1522 auto preprocessPrim = cldnn::reorder(
1524 inputPrimitives[param.portIndex],
1526 DataTypeFromPrecision(layer->precision));
1527 m_topology->add(preprocessPrim);
1528 m_env.profilingIDs.insert(reorderPrimName);
1529 InitProfileInfo(reorderPrimName, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
1530 reorderedInputs[param.portIndex] = (reorderPrimName);
1532 reorderedInputs[param.portIndex] = inputPrimitives[param.portIndex];
1537 case CLDNNCustomLayer::ParamType::Output: {
1538 kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1539 kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_output;
1540 kernelParameters[param.paramIndex].index =
1541 static_cast<cldnn_arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
1542 outputFormat = param.format;
1545 case CLDNNCustomLayer::ParamType::Data: {
1546 kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1547 kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input;
1548 kernelParameters[param.paramIndex].index =
1549 static_cast<cldnn_arg_index>((blobIndex.find(param.blobName) == blobIndex.end()) ? -1 : blobIndex.at(param.blobName));
1553 THROW_CLDNN_EXCEPTION("Invalid custom layer param type: " << param.type << " in layer: " << genericLayer->name);
1556 const std::string layerTitle("\n// Layer " + layer->name + " using Custom Layer " + customLayer->Name() + "\n");
1557 const std::string defineTitle("// Custom Layer User Defines\n");
1559 auto dims = genericLayer->outData[0]->dims;
1560 std::reverse(dims.begin(), dims.end());
1562 size_t N = (dims.size() > 0) ? dims[0] : 1;
1563 size_t C = (dims.size() > 1) ? dims[1] : 1;
1564 size_t H = (dims.size() > 2) ? dims[2] : 1;
1565 size_t W = (dims.size() > 3) ? dims[3] : 1;
1566 cldnn::tensor outputTensor = cldnn::tensor(cldnn::batch(N), cldnn::feature(C), cldnn::spatial(W, H));
1568 cldnn::layout outputLayout = cldnn::layout(DataTypeFromPrecision(genericLayer->precision), outputFormat, outputTensor);
1570 // evaluate work sizes rules
1571 std::vector<size_t> gws, lws;
1573 // assume output tensor is dimension source by default
1574 int batchDim = outputTensor.batch[0];
1575 int featureDim = outputTensor.feature[0];
1576 int yDim = outputTensor.spatial[1];
1577 int xDim = outputTensor.spatial[0];
1578 int iidx = customLayer->InputDimSourceIndex();
1580 // if input index is greater than -1, take dimension from input
1582 if (iidx >= genericLayer->insData.size())
1583 THROW_CLDNN_EXCEPTION("Invalid input tensor for index: " << iidx);
1584 // get dimensions from one of the input tensors
1585 auto inDataPtr = genericLayer->insData[iidx].lock();
1587 THROW_CLDNN_EXCEPTION("Data inserted into generic layer " << genericLayer->name << " is nullptr");
1589 auto inputDims = inDataPtr->dims;
1591 batchDim = featureDim = yDim = 0;
1592 xDim = inputDims[0];
1594 if (dims.size() > 1)
1595 yDim = inputDims[1];
1596 if (dims.size() > 2)
1597 featureDim = inputDims[2];
1598 if (dims.size() > 3)
1599 batchDim = inputDims[3];
1601 const std::map<char, int> vars = {
1602 { 'b', batchDim } , { 'B', batchDim },
1603 { 'f', featureDim }, { 'F', featureDim },
1604 { 'y', yDim }, { 'Y', yDim },
1605 { 'x', xDim }, { 'X', xDim },
1607 for (auto rule : customLayer->GlobalSizeRules()) {
1608 SimpleMathExpression expr;
1609 expr.SetVariables(vars);
1610 expr.SetExpression(rule);
1611 gws.push_back(expr.Evaluate());
1613 for (auto rule : customLayer->LocalSizeRules()) {
1614 SimpleMathExpression expr;
1615 expr.SetVariables(vars);
1616 expr.SetExpression(rule);
1617 lws.push_back(expr.Evaluate());
1620 auto customPrim = cldnn::custom_gpu_primitive(
1623 { layerTitle, defineTitle, layerDefines, customLayer->KernelSource() },
1624 customLayer->KernelEntry(),
1626 customLayer->CompilerOptions(),
1631 if (outputLayout.format != cldnn::format::any &&
1632 p_currentOutputs->find(genericLayer->name) == p_currentOutputs->end()) {
1633 // Handle output reorder
1634 auto reorderPrimName = genericLayer->name + m_postCustomLayerTag;
1640 m_networkPrecision));
1641 m_env.primitiveIDs[genericLayer->name] = reorderPrimName;
1642 m_env.profilingIDs.insert(reorderPrimName);
1643 InitProfileInfo(reorderPrimName, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
1645 m_env.primitiveIDs[genericLayer->name] = genericLayer->name;
1647 m_topology->add(customPrim);
1648 m_env.profilingIDs.insert(genericLayer->name);
1651 void CLDNNGraph::CreateSimplerNMSPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1652 ValidateLayer(layer, 3);
1653 IE_ASSERT(layer->insData[0].lock()->dims[3] == 1); // only handling input batch size 1
1654 IE_ASSERT(layer->insData[1].lock()->dims[3] == 1); // only handling input batch size 1
1655 auto simpleNMSLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1657 int max_num_proposals = simpleNMSLayer->GetParamAsInt("max_num_proposals");
1658 float iou_threshold = simpleNMSLayer->GetParamAsFloat("iou_threshold", 0.7f);
1659 int min_bbox_size = simpleNMSLayer->GetParamAsInt("min_bbox_size", 16);
1660 int feature_stride = simpleNMSLayer->GetParamAsInt("feat_stride", 16);
1661 int pre_nms_topn = simpleNMSLayer->GetParamAsInt("pre_nms_topn");
1662 int post_nms_topn = simpleNMSLayer->GetParamAsInt("post_nms_topn");
1663 std::vector<float> scale = simpleNMSLayer->GetParamAsFloats("scale");
1664 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1666 auto simpleNMSPrim = cldnn::proposal(
1667 simpleNMSLayer->name,
1668 inputPrimitives[0], // cls_score
1669 inputPrimitives[1], // bbox_pred
1670 inputPrimitives[2], // im_info
1677 { 0.5f, 1.0f, 2.0f }, // ratios for the SimplerNMS variant
1680 m_env.primitiveIDs[simpleNMSLayer->name] = simpleNMSLayer->name;
1681 m_topology->add(simpleNMSPrim);
1682 m_env.profilingIDs.insert(simpleNMSLayer->name);
1685 void CLDNNGraph::CreateEltwisePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1686 ValidateEltwiseLayer(layer);
1688 auto eltwiseLayer = dynamic_cast<InferenceEngine::EltwiseLayer *> (layer.get());
1689 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1691 std::vector<float> coefficients = eltwiseLayer->coeff;
1692 if (eltwiseLayer->_operation != InferenceEngine::EltwiseLayer::Sum && !coefficients.empty()) {
1693 THROW_IE_EXCEPTION << "Only sum operation supports operands coefficients";
1696 if (!coefficients.empty() && coefficients.size() != inputPrimitives.size()) {
1697 THROW_IE_EXCEPTION << "Number of provided coefficients is not equal to number of operands";
1700 auto eltwisePrim = cldnn::eltwise(
1703 EltwiseModeFromIEEltwise(eltwiseLayer->_operation),
1705 m_env.primitiveIDs[eltwiseLayer->name] = eltwiseLayer->name;
1706 m_topology->add(eltwisePrim);
1707 m_env.profilingIDs.insert(eltwiseLayer->name);
1710 void CLDNNGraph::CreateConcatenatePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1711 ValidateLayer(layer, 0);
1712 auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
1713 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1714 auto concatPrim = cldnn::concatenation(
1717 ConcatAxisFromIEAxis(concatLayer->_axis));
1718 m_env.primitiveIDs[concatLayer->name] = concatLayer->name;
1719 m_topology->add(concatPrim);
1720 m_env.profilingIDs.insert(concatLayer->name);
1723 void CLDNNGraph::CreateSplitPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1724 ValidateLayer(layer, 1);
1725 auto splitLayer = dynamic_cast<InferenceEngine::SplitLayer *> (layer.get());
1726 if (IsValidSplitConvMerge(splitLayer)) {
1727 // AlextNet style split->conv*2->merge
1728 CreateFusedSplitConvMergePrimitive(layer);
1730 #ifdef _USE_SPLIT_PRIMITIVE
1731 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1732 auto inputDims = splitLayer->insData[0].lock()->dims;
1733 InferenceEngine::SizeVector startOffset(inputDims.size());
1734 std::vector<std::pair<cldnn::primitive_id, cldnn::tensor>> outputOffsets;
1735 std::cout << "Splitting layer: " << layer->name << "\n\tSize:" << CldnnTensorFromIEDims(inputDims) << std::endl;
1736 for (auto& outLayer : splitLayer->outData) {
1737 if (outLayer->dims.size() != startOffset.size()) {
1738 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1740 for (size_t i = 0; i < inputDims.size(); i++) {
1741 if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) {
1742 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1745 auto outTensor = CldnnTensorFromIEDims(outLayer->dims);
1746 auto cropPrim = cldnn::crop(outLayer->name, inputPrimitives[0], outTensor, CldnnTensorFromIEDims(startOffset));
1747 m_topology->add(cropPrim);
1748 m_env.primitiveIDs[outLayer->name] = outLayer->name;
1749 m_env.profilingIDs.insert(outLayer->name);
1750 outputOffsets.push_back({ outLayer->name, CldnnTensorFromIEDims(startOffset) });
1751 for (size_t i = 0; i < inputDims.size(); i++) {
1752 if (outLayer->dims[i] != inputDims[i]) {
1753 startOffset[i] += outLayer->dims[i];
1758 auto splitPrim = cldnn::split(
1762 m_topology->add(splitPrim);
1765 // set split as not_run
1766 InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out
1768 #else // _USE_SPLIT_PRIMITIVE
1769 // TODO: replace with clDNN split when it's implemented
1770 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1771 auto inDataPtr = splitLayer->insData[0].lock();
1773 THROW_CLDNN_EXCEPTION("Data inserts into split layer " << splitLayer->name << " is nullptr");
1775 auto inputDims = inDataPtr->dims;
1776 InferenceEngine::SizeVector startOffset(inputDims.size());
1778 auto TensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def) {
1779 switch (dims.size()) {
1780 case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def));
1781 case 2: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(dims[1], def));
1782 case 3: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[2], def));
1783 case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
1784 default: THROW_CLDNN_EXCEPTION("Invalid dimensions size(" << dims.size() << ") in split layer");
1788 for (auto& outLayer : splitLayer->outData) {
1789 if (outLayer->dims.size() != startOffset.size()) {
1790 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1792 for (size_t i = 0; i < inputDims.size(); i++) {
1793 if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) {
1794 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1797 SizeVector reverseDims = outLayer->dims;
1798 std::reverse(reverseDims.begin(), reverseDims.end());
1799 auto outTensor = TensorFromIEDims(reverseDims, 1);
1801 SizeVector reverseOffset = startOffset;
1802 std::reverse(reverseOffset.begin(), reverseOffset.end());
1803 auto offsetTensor = TensorFromIEDims(reverseOffset, 0);
1805 auto cropPrim = cldnn::crop(outLayer->name, inputPrimitives[0], outTensor, offsetTensor);
1806 m_env.primitiveIDs[outLayer->name] = outLayer->name;
1807 m_topology->add(cropPrim);
1808 m_env.profilingIDs.insert(outLayer->name);
1809 InitProfileInfo(outLayer->name, "Crop", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
1811 for (size_t i = 0; i < inputDims.size(); i++) {
1812 if (outLayer->dims[i] != inputDims[i]) {
1813 startOffset[i] += outLayer->dims[i];
1818 // set split as not_run
1819 InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out
1820 #endif // _USE_SPLIT_PRIMITIVE
1824 void CLDNNGraph::CreateFusedSplitConvMergePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1825 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1826 // only handle the split->conv->merge topology for now
1827 auto splitLayer = dynamic_cast<InferenceEngine::SplitLayer *> (layer.get());
1828 IE_ASSERT(IsValidSplitConvMerge(splitLayer));
1831 dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[0]).get());
1833 dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[1]).get());
1835 dynamic_cast<InferenceEngine::ConcatLayer *> (GetNextSingleLayer(
1836 GetNextSingleLayer(splitLayer->outData[0])).get());
1838 if (convLayer1 == nullptr ||
1839 convLayer2 == nullptr ||
1840 concatLayer == nullptr) {
1841 THROW_CLDNN_EXCEPTION("Expected single layer does not exist");
1843 // Mark these layers as optimized out
1844 InitProfileInfo(convLayer1->name, convLayer1->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
1845 InitProfileInfo(convLayer2->name, convLayer2->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
1846 InitProfileInfo(concatLayer->name, concatLayer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
1848 // build the split conv primitive
1849 std::vector<cldnn::primitive_id> weightPrimID;
1850 std::vector<cldnn::primitive_id> biasPrimID;
1851 CreateWeightAndBiasPrimitives(GetNextSingleLayer(splitLayer->outData[0]), weightPrimID, biasPrimID);
1852 CreateWeightAndBiasPrimitives(GetNextSingleLayer(splitLayer->outData[1]), weightPrimID, biasPrimID);
1854 auto concatLayerPtr = std::make_shared<InferenceEngine::CNNLayer>(*concatLayer);
1856 cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
1857 cldnn::spatial(convLayer1->_stride_x, convLayer1->_stride_y));
1858 cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
1859 cldnn::spatial(-convLayer1->_padding_x, -convLayer1->_padding_y));
1860 cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
1861 cldnn::spatial(convLayer1->_dilation_x, convLayer1->_dilation_y));
1863 auto splitPrim = cldnn::convolution(splitLayer->name,
1872 CldnnTensorFromIEDims(concatLayer->outData[0]->dims));
1874 layer = concatLayerPtr;
1876 m_env.primitiveIDs[splitLayer->name] = splitLayer->name;
1877 m_env.primitiveIDs[convLayer1->name] = splitLayer->name;
1878 m_env.primitiveIDs[convLayer2->name] = splitLayer->name;
1879 m_env.primitiveIDs[concatLayer->name] = splitLayer->name; // pair the last merged layer (concat or relu) with
1880 // this primitive name to be used as
1881 // input prim for subsequent layers
1882 m_topology->add(splitPrim);
1883 m_env.profilingIDs.insert(splitLayer->name);
1886 void CLDNNGraph::CreatePowerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1887 ValidateLayer(layer, 1);
1888 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1889 auto powerLayer = dynamic_cast<InferenceEngine::PowerLayer *> (layer.get());
1890 if (powerLayer->power != 1.0f && powerLayer->power != 0.5f) {
1891 THROW_CLDNN_EXCEPTION("Power Layer " << layer->name << "uses unsupported power value");
1894 if ((powerLayer->scale == 1.0f) && (powerLayer->offset == 0.0f)) {
1895 if (powerLayer->power == 0.5f) {
1896 auto activationPrim = cldnn::activation(powerLayer->name, inputPrimitives[0], activation_sqrt);
1897 m_topology->add(activationPrim);
1898 m_env.profilingIDs.insert(powerLayer->name);
1899 m_env.primitiveIDs[powerLayer->name] = powerLayer->name;
1902 m_env.primitiveIDs[powerLayer->name] = inputPrimitives[0]; // register the previous primID for this layer too
1903 InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::NOT_RUN); // Mark this layer as not run
1906 // create scale primitive
1907 auto scaleValuePrimName = powerLayer->name + m_scalesTag;
1908 AddSingleValuePrimitive(scaleValuePrimName,
1909 DataTypeFromPrecision(powerLayer->precision),
1912 cldnn::primitive_id biasValuePrimName = "";
1913 if (powerLayer->offset != 0.0f) {
1914 biasValuePrimName = powerLayer->name + m_biasesTag;
1915 AddSingleValuePrimitive(biasValuePrimName,
1916 DataTypeFromPrecision(powerLayer->precision),
1917 powerLayer->offset);
1919 auto scalePrim = cldnn::scale(
1925 m_env.primitiveIDs[powerLayer->name] = powerLayer->name;
1926 m_topology->add(scalePrim);
1927 m_env.profilingIDs.insert(powerLayer->name);
1929 if (powerLayer->power == 0.5f) {
1930 auto activationPrim = cldnn::activation(powerLayer->name+"_sqrt", powerLayer->name, activation_sqrt);
1931 m_topology->add(activationPrim);
1932 m_env.profilingIDs.insert(powerLayer->name+"_sqrt");
1937 void CLDNNGraph::CreateSoftMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1938 ValidateLayer(layer, 1);
1939 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1940 auto softmaxLayer = dynamic_cast<InferenceEngine::SoftMaxLayer *> (layer.get());
1942 // additional WA for clDNN FullyConnected output in BX instead of BF
1944 auto prevData = layer->insData[0].lock();
1946 if (prevData == nullptr) {
1947 THROW_CLDNN_EXCEPTION("SoftMax: nonexistent input for layer: " << layer->name);
1950 auto prevCreator = prevData->creatorLayer.lock();
1951 bool isPrevFC = false;
1953 if (prevCreator && (LayerTypeFromStr(prevCreator->type) == FullyConnected))
1957 auto softmaxPrim = cldnn::softmax(softmaxLayer->name, inputPrimitives[0], SoftmaxDimensionFromIEAxis(softmaxLayer, isPrevFC));
1958 m_env.primitiveIDs[softmaxLayer->name] = softmaxLayer->name;
1959 m_topology->add(softmaxPrim);
1960 m_env.profilingIDs.insert(softmaxLayer->name);
1963 void CLDNNGraph::CreateFullyConnectedPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1964 ValidateLayer(layer, 1);
1965 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1966 auto fcLayer = dynamic_cast<InferenceEngine::FullyConnectedLayer *> (layer.get());
1968 // create bias primitive
1969 cldnn::primitive_id biasesPrimID = "";
1970 if (fcLayer->_biases != nullptr) {
1971 biasesPrimID = fcLayer->name + m_biasesTag;
1972 CreatePrimitiveFromBlob(biasesPrimID,
1974 cldnn::layout(m_networkPrecision, m_defaultFormat,
1975 cldnn::spatial(TensorValue(fcLayer->_out_num))));
1978 // create weights primitive
1979 // gcc bug to resolve auto, at least for 5.4 version
1980 std::shared_ptr<Data> insData0 = fcLayer->insData[0].lock();
1981 IE_ASSERT(insData0 != nullptr);
1982 cldnn::primitive_id weightsPrimID = fcLayer->name + m_weightsTag;
1983 cldnn::tensor weightsDims;
1984 switch (insData0->dims.size()) {
1986 weightsDims = { TensorValue(fcLayer->outData[0]->dims[0]),
1987 TensorValue(insData0->dims[2]),
1988 TensorValue(insData0->dims[0]),
1989 TensorValue(insData0->dims[1]) };
1992 weightsDims = { TensorValue(fcLayer->outData[0]->dims[0]), 1, TensorValue(insData0->dims[0]), 1 };
1994 default: THROW_CLDNN_EXCEPTION("Invalid data dimensions");
1996 CreatePrimitiveFromBlob(weightsPrimID,
1998 cldnn::layout(m_networkPrecision, m_defaultFormat, weightsDims));
2000 auto fcPrim = cldnn::fully_connected(fcLayer->name,
2007 m_env.primitiveIDs[fcLayer->name] = fcLayer->name;
2008 m_topology->add(fcPrim);
2009 m_env.profilingIDs.insert(fcLayer->name);
2012 void CLDNNGraph::CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2013 ValidateLayer(layer, 1);
2014 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2015 auto poolLayer = dynamic_cast<InferenceEngine::PoolingLayer *> (layer.get());
2017 if (poolLayer->outData.size() > 1) {
2018 // max pooling with argmax
2019 SizeVector argmaxDims;
2021 std::string realOutputID, argmaxOutputID;
2022 int outputOrder = 0;
2024 for (auto out : poolLayer->outData) {
2025 auto layersMap = out->getInputTo();
2027 for (auto item : layersMap) {
2028 bool isUpooling = (LayerTypeFromStr(item.second->type) == Unpooling);
2029 if (outputOrder == 1 && isUpooling) {
2030 argmaxDims = out->dims;
2031 argmaxOutputID = out->name;
2033 realOutputID = out->name;
2039 // create mutable_data primitive for storing argmax data
2040 cldnn::tensor mutableTensor;
2041 switch (argmaxDims.size()) {
2042 case 4: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[3]), TensorValue(argmaxDims[2]),
2043 TensorValue(argmaxDims[0]), TensorValue(argmaxDims[1]));
2045 case 3: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[2]), TensorValue(argmaxDims[1]),
2046 1, TensorValue(argmaxDims[0]));
2048 case 2: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[1]), 1, TensorValue(argmaxDims[0]), 1);
2050 case 1: // not implemented yet.
2051 default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions");
2054 cldnn::layout mutableLayout = cldnn::layout(
2055 cldnn::data_types::f32,
2059 cldnn::primitive_id argmaxPrimID = layer->name + "_argmax_mutable";
2061 auto mem = cldnn::memory::allocate(*(m_env.engine), mutableLayout);
2062 auto argmax_mutable_prim = cldnn::mutable_data(argmaxPrimID, mem);
2063 m_topology->add(argmax_mutable_prim);
2064 m_env.primitiveIDs[argmaxPrimID] = argmaxPrimID;
2065 m_env.primitiveIDs[argmaxOutputID] = argmaxPrimID;
2067 // create pooling primitive itself
2068 auto poolPrim = cldnn::pooling(poolLayer->name,
2071 cldnn::pooling_mode::max_with_argmax,
2072 cldnn::spatial(TensorValue(poolLayer->_kernel_x), TensorValue(poolLayer->_kernel_y)), // size
2073 cldnn::spatial(TensorValue(poolLayer->_stride_x), TensorValue(poolLayer->_stride_y)), // stride
2074 // input offset (padding) - explicit tensor for 0 bf
2075 { 0, 0, -TensorValue(poolLayer->_padding_x), -TensorValue(poolLayer->_padding_y) },
2076 CldnnTensorFromIEDims(poolLayer->outData[0]->dims));
2077 m_topology->add(poolPrim);
2078 m_env.primitiveIDs[realOutputID] = poolLayer->name;
2081 auto poolPrim = cldnn::pooling(poolLayer->name,
2083 PoolingModeFromIEPooling(poolLayer->_type, poolLayer->_exclude_pad),
2084 cldnn::spatial(TensorValue(poolLayer->_kernel_x), TensorValue(poolLayer->_kernel_y)), // size
2085 cldnn::spatial(TensorValue(poolLayer->_stride_x), TensorValue(poolLayer->_stride_y)), // stride
2086 // input offset (padding) - explicit tensor for 0 bf
2087 { 0, 0, -TensorValue(poolLayer->_padding_x), -TensorValue(poolLayer->_padding_y) },
2088 CldnnTensorFromIEDims(poolLayer->outData[0]->dims));
2089 m_topology->add(poolPrim);
2090 m_env.primitiveIDs[poolLayer->name] = poolLayer->name;
2093 m_env.profilingIDs.insert(poolLayer->name);
2096 void CLDNNGraph::CreateLRNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2097 ValidateLayer(layer, 1);
2098 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2099 auto lrnLayer = dynamic_cast<InferenceEngine::NormLayer *> (layer.get());
2100 auto lrnPrim = cldnn::lrn(
2104 static_cast<float>(lrnLayer->_k),
2107 lrnLayer->_isAcrossMaps ? cldnn_lrn_norm_region_across_channel : cldnn_lrn_norm_region_within_channel);
2109 m_env.primitiveIDs[lrnLayer->name] = lrnLayer->name;
2110 m_topology->add(lrnPrim);
2111 m_env.profilingIDs.insert(lrnLayer->name);
2114 void CLDNNGraph::CreateActivationPrimitive(InferenceEngine::CNNLayerPtr &layer, const LayerType type) {
2115 ValidateLayer(layer, 1);
2116 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2117 cldnn_activation_additional_params params{ 0.0f, 0.0f };
2118 cldnn_activation_func func = cldnn_activation_func_t::activation_none;
2120 LayerType activationType;
2121 if (type == Activation) {
2122 std::string activation_type = layer->GetParamAsString("type");
2123 if (activation_type == "tanh") {
2124 activationType = TanH;
2125 } else if (activation_type == "sigmoid" || activation_type == "logistic") {
2126 activationType = Sigmoid;
2127 } else if (activation_type == "elu") {
2128 activationType = ELU;
2129 } else if (activation_type == "relu") {
2130 activationType = ReLU;
2131 } else if (activation_type == "relu6") {
2132 activationType = ReLU6;
2133 } else if (activation_type == "clamp") {
2134 activationType = Clamp;
2136 THROW_CLDNN_EXCEPTION("Unsupported activation type (" + activation_type +
2137 ") in layer " + layer->name);
2140 activationType = type;
2143 switch (activationType) {
2146 func = cldnn_activation_func_t::activation_hyperbolic_tan;
2151 func = cldnn_activation_func_t::activation_elu;
2152 params.a = layer->GetParamAsFloat("alpha", 1.0f);
2157 func = cldnn_activation_func_t::activation_logistic;
2162 func = cldnn_activation_func_t::activation_relu_negative_slope;
2163 params.a = layer->GetParamAsFloat("negative_slope", 0.0f);
2168 func = cldnn_activation_func_t::activation_clamp;
2169 params.b = layer->GetParamAsFloat("n", 6.0f);
2174 func = cldnn_activation_func_t::activation_clamp;
2175 params.a = layer->GetParamAsFloat("min");
2176 params.b = layer->GetParamAsFloat("max");
2180 THROW_CLDNN_EXCEPTION("Unsupported activation type (" + layer->type +
2181 ") in layer " + layer->name);
2184 auto activationPrimitive = cldnn::activation(layer->name, inputPrimitives[0], func, params);
2185 m_env.primitiveIDs[layer->name] = layer->name;
2186 m_topology->add(activationPrimitive);
2187 m_env.profilingIDs.insert(layer->name);
2190 void CLDNNGraph::CreateCopyPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2191 ValidateLayer(layer, 1);
2192 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2193 auto copyLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2195 // Optimize out and just update references
2196 m_env.primitiveIDs[copyLayer->name] = inputPrimitives[0];
2197 InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out
2200 void CLDNNGraph::CreateUpsamplingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2201 // Assuming multi-input will be handled by prev concat/eltwise layers
2202 ValidateLayer(layer, 1);
2203 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2204 auto upsamplingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2205 uint32_t scale = upsamplingLayer->GetParamAsUInt("scale");
2206 uint32_t numFilter = upsamplingLayer->GetParamAsUInt("num_filter");
2207 std::string sampleType = upsamplingLayer->GetParamAsString("sample_type");
2209 auto upsamplingPrim = cldnn::upsampling(
2210 upsamplingLayer->name,
2214 UpsamplingTypeFromString(sampleType));
2216 m_env.primitiveIDs[upsamplingLayer->name] = upsamplingLayer->name;
2217 m_topology->add(upsamplingPrim);
2218 m_env.profilingIDs.insert(upsamplingLayer->name);
2221 void CLDNNGraph::CreateResamplePrimitive(InferenceEngine::CNNLayerPtr &layer) {
2222 ValidateLayer(layer, 1);
2223 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2224 auto resampleLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2226 auto outDims = layer->outData[0]->dims;
2227 size_t inFeatures = 1;
2228 unsigned int scale = 1;
2229 std::shared_ptr<Data> insData0 = layer->insData[0].lock();
2230 IE_ASSERT(insData0 != nullptr);
2231 if (insData0->dims.size() > 2) {
2232 inFeatures = insData0->dims[2];
2233 scale = outDims[0]/insData0->dims[0];
2235 THROW_CLDNN_EXCEPTION("Unsupported scale in layer " + layer->name);
2238 std::string sampleType = resampleLayer->GetParamAsString("type");
2240 if (sampleType != "caffe.ResampleParameter.NEAREST") {
2241 THROW_CLDNN_EXCEPTION("Unsupported resampling type (" + sampleType + ") in layer " + layer->name);
2244 auto upsamplingPrim = cldnn::upsampling(
2245 resampleLayer->name,
2249 cldnn::upsampling_sample_type::nearest);
2251 m_env.primitiveIDs[resampleLayer->name] = resampleLayer->name;
2252 m_topology->add(upsamplingPrim);
2253 m_env.profilingIDs.insert(resampleLayer->name);
2256 void CLDNNGraph::CreateYOLO2RegionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2257 ValidateLayer(layer, 1);
2258 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2259 auto YOLOregionLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2261 uint32_t coords = YOLOregionLayer->GetParamAsUInt("coords", 4);
2262 uint32_t classes = YOLOregionLayer->GetParamAsUInt("classes", 20);
2263 uint32_t num = YOLOregionLayer->GetParamAsUInt("num", 1);
2264 bool do_softmax = YOLOregionLayer->GetParamsAsBool("do_softmax", true);
2266 uint32_t mask_size = 0;
2267 if (HasParam(YOLOregionLayer->params, "mask")) {
2268 const auto mask = YOLOregionLayer->GetParamAsInts("mask");
2269 mask_size = static_cast<uint32_t>(mask.size());
2272 auto regionPrim = cldnn::region_yolo(
2273 YOLOregionLayer->name,
2281 m_env.primitiveIDs[YOLOregionLayer->name] = YOLOregionLayer->name;
2282 m_topology->add(regionPrim);
2283 m_env.profilingIDs.insert(YOLOregionLayer->name);
2286 void CLDNNGraph::CreateYOLO2ReorgPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2287 ValidateLayer(layer, 1);
2288 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2289 auto YOLOreorgLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2290 uint32_t stride = YOLOreorgLayer->GetParamAsUInt("stride");
2292 auto reorgPrim = cldnn::reorg_yolo(
2293 YOLOreorgLayer->name,
2297 m_env.primitiveIDs[YOLOreorgLayer->name] = YOLOreorgLayer->name;
2298 m_topology->add(reorgPrim);
2299 m_env.profilingIDs.insert(YOLOreorgLayer->name);
2302 void CLDNNGraph::CreateArgMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2303 ValidateLayer(layer, 1);
2304 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2305 auto ArgMaxLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2306 const cldnn::arg_max_min::out_type otype = cldnn::arg_max_min::out_type::max;
2308 if (HasParam(ArgMaxLayer->params, "out_max_val")) {
2309 int32_t out_max_val_flag = ArgMaxLayer->GetParamAsInt("out_max_val");
2310 if (out_max_val_flag != 0) {
2311 THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "ArgMax: out_max_val param is not supported for layer: " << layer->name;
2315 uint32_t top_k = ArgMaxLayer->GetParamAsUInt("top_k", 1);
2317 cldnn::arg_max_min::axis_name chosen_axis = cldnn::arg_max_min::axis_name::xyf;
2319 if (HasParam(ArgMaxLayer->params, "axis")) {
2320 int32_t axis_param = ArgMaxLayer->GetParamAsInt("axis", 1);
2322 int32_t axis = axis_param;
2323 if (-4 <= axis && axis <= -1)
2327 case 0: chosen_axis = cldnn::arg_max_min::axis_name::batch; break;
2328 case 1: chosen_axis = cldnn::arg_max_min::axis_name::feature; break;
2329 case 2: chosen_axis = cldnn::arg_max_min::axis_name::y; break;
2330 case 3: chosen_axis = cldnn::arg_max_min::axis_name::x; break;
2334 auto argmaxPrim = cldnn::arg_max_min(
2341 m_env.primitiveIDs[ArgMaxLayer->name] = ArgMaxLayer->name;
2342 m_topology->add(argmaxPrim);
2343 m_env.profilingIDs.insert(ArgMaxLayer->name);
2346 void CLDNNGraph::CreateMaxUnpoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2347 ValidateLayer(layer, 2);
2349 auto UnpoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2351 cldnn::primitive_id real_input, argmax_mutable;
2353 // locate ArgMax primitive
2355 for (auto inputData : layer->insData) {
2356 auto prevData = inputData.lock();
2358 if (prevData == nullptr) {
2359 THROW_CLDNN_EXCEPTION("MaxUnpooling: nonexistent input for layer: " << layer->name);
2362 auto prevCreator = prevData->creatorLayer.lock();
2365 (LayerTypeFromStr(prevCreator->type) == Pooling) &&
2366 prevCreator->outData.size() > 1 &&
2368 argmax_mutable = m_env.primitiveIDs.at(prevCreator->name + "_argmax_mutable");
2370 real_input = m_env.primitiveIDs.at(prevData->name);
2375 uint32_t stride = UnpoolingLayer->GetParamAsUInt("stride");
2376 uint32_t kernel_size = UnpoolingLayer->GetParamAsUInt("kernel_size");
2378 auto unpoolingPrim = cldnn::max_unpooling(
2379 UnpoolingLayer->name,
2382 cldnn::spatial(kernel_size, kernel_size), // size
2383 cldnn::spatial(stride, stride) ); // stride
2385 m_env.primitiveIDs[UnpoolingLayer->name] = UnpoolingLayer->name;
2386 m_topology->add(unpoolingPrim);
2387 m_env.profilingIDs.insert(UnpoolingLayer->name);
2390 void CLDNNGraph::CreateMVNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2391 ValidateLayer(layer, 1);
2392 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2393 auto MvnLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2395 bool across_channels = MvnLayer->GetParamsAsBool("across_channels", false);
2396 bool normalize_variance = MvnLayer->GetParamsAsBool("normalize_variance", true);
2397 float eps = MvnLayer->GetParamAsFloat("eps", 1e-10f);
2399 auto mvnPrim = cldnn::mvn(
2406 m_env.primitiveIDs[MvnLayer->name] = MvnLayer->name;
2407 m_topology->add(mvnPrim);
2408 m_env.profilingIDs.insert(MvnLayer->name);
2412 void CLDNNGraph::AddConstantBlobInput(InferenceEngine::CNNLayerPtr &layer) {
2413 auto constBlob = layer->blobs.begin()->second;
2414 auto constDims = layer->outData[0]->dims;
2416 cldnn::tensor constTensor;
2417 switch (constDims.size()) {
2418 case 4: constTensor = cldnn::tensor(TensorValue(constDims[3]), TensorValue(constDims[2]),
2419 TensorValue(constDims[0]), TensorValue(constDims[1]));
2421 case 3: constTensor = cldnn::tensor(TensorValue(constDims[2]), TensorValue(constDims[1]),
2422 1, TensorValue(constDims[0]));
2424 case 2: constTensor = cldnn::tensor(TensorValue(constDims[1]), 1, TensorValue(constDims[0]), 1);
2426 case 1: // not implemented yet.
2427 default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions");
2430 cldnn::layout constLayout = cldnn::layout(
2431 DataTypeFromPrecision(layer->blobs.begin()->second->precision()),
2435 size_t bytes = constLayout.bytes_count();
2436 cldnn::primitive_id constPrimID = layer->name;
2438 auto mem = cldnn::memory::allocate(*(m_env.engine), constLayout);
2439 auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
2440 auto buf = tmpPointer.data();
2442 // fill cldnn::memory from blob
2443 auto bufSize = constLayout.bytes_count();
2444 auto data = static_cast<const char *>(constBlob->buffer());
2445 for (size_t i = 0; i < bufSize; i++) {
2449 // add new input to topology
2450 // and put it in const blob map
2451 // (to set input memory after network compilation)
2452 m_topology->add(cldnn::input_layout(constPrimID, constLayout));
2453 m_env.primitiveIDs[layer->name] = constPrimID;
2454 m_env.constBlobs.insert({ layer->name, mem });
2457 void CLDNNGraph::CreateConvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2458 ValidateLayer(layer, 1);
2459 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2460 auto convLayer = dynamic_cast<InferenceEngine::ConvolutionLayer *> (layer.get());
2462 std::vector<cldnn::primitive_id> weightPrimID;
2463 std::vector<cldnn::primitive_id> biasPrimID;
2464 CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID);
2466 cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
2467 cldnn::spatial(convLayer->_stride_x, convLayer->_stride_y));
2468 cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
2469 cldnn::spatial(-convLayer->_padding_x, -convLayer->_padding_y));
2470 cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
2471 cldnn::spatial(convLayer->_dilation_x, convLayer->_dilation_y));
2473 auto convPrim = cldnn::convolution(convLayer->name,
2482 CldnnTensorFromIEDims(convLayer->outData[0]->dims));
2484 m_env.primitiveIDs[convLayer->name] = convLayer->name;
2485 m_topology->add(convPrim);
2486 m_env.profilingIDs.insert(convLayer->name);
2489 bool CLDNNGraph::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const {
2490 if (splitLayer->outData.size() != 2) return false; // split into 2
2492 dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[0]).get());
2494 dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[1]).get());
2495 if (!convLayer1 || !convLayer2 // outputs aren't convolutions
2496 || convLayer1->precision != convLayer2->precision // wrong precision
2497 || convLayer1->_fusedWith || convLayer2->_fusedWith // convolutions are fused
2498 || convLayer1->outData.size() != 1 || convLayer2->outData.size() != 1 // more than 1 output for convolutions
2499 || convLayer1->_padding_x != convLayer2->_padding_x // different padding
2500 || convLayer1->_padding_y != convLayer2->_padding_y // different padding
2501 || convLayer1->_stride_x != convLayer2->_stride_x // different strides
2502 || convLayer1->_stride_y != convLayer2->_stride_y // different strides
2503 || convLayer1->_dilation_x != convLayer2->_dilation_x // different dilation
2504 || convLayer1->_dilation_y != convLayer2->_dilation_y // different dilation
2505 || (GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0])) // no merge after convolutions
2506 != GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[1])))
2507 || (p_currentOutputs->find(convLayer1->name) != p_currentOutputs->end())
2508 || (p_currentOutputs->find(convLayer2->name) != p_currentOutputs->end())) {
2512 dynamic_cast<InferenceEngine::ConcatLayer *> (
2513 GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0])).get());
2514 if (!concatLayer || // not a merge layer
2515 concatLayer->_axis != 1 || // merge on unsupported axis
2516 concatLayer->outData.size() != 1) { // too many outputs
2519 if (m_config.customLayers.find(convLayer1->type) != m_config.customLayers.end() ||
2520 m_config.customLayers.find(concatLayer->type) != m_config.customLayers.end()) {
2521 return false; // convolution or concat were overwritten by a custom layer
2527 void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo) {
2528 // first create and add the input layout
2529 auto inputDims = inputInfo->getDims();
2530 InferenceEngine::Layout l = inputInfo->getTensorDesc().getLayout();
2532 cldnn::tensor dataTensor;
2533 switch (inputDims.size()) {
2536 cldnn::tensor::value_type batch = (m_env.m_max_batch <= 1) ? TensorValue(inputDims[3]) : TensorValue(m_curBatch);
2538 if (InferenceEngine::Layout::NCHW == l) {
2539 dataTensor = cldnn::tensor(batch, TensorValue(inputDims[2]),
2540 TensorValue(inputDims[0]), TensorValue(inputDims[1]));
2541 } else if (InferenceEngine::Layout::NHWC == l) {
2542 dataTensor = cldnn::tensor(batch,
2543 TensorValue(inputDims[2]), TensorValue(inputDims[0]),
2544 TensorValue(inputDims[1]));
2546 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in input " + inputInfo->name());
2551 if (InferenceEngine::NC == l)
2552 dataTensor = cldnn::tensor(TensorValue(inputDims[1]), 1, TensorValue(inputDims[0]), 1);
2554 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in input " + inputInfo->name());
2556 case 3: // not implemented yet.
2557 case 1: // not implemented yet.
2558 default: THROW_CLDNN_EXCEPTION("Invalid data dimensions");
2561 cldnn::layout inputLayout(DataTypeFromPrecision(inputInfo->getInputPrecision()),
2562 FormatFromLayout(l),
2564 auto inputName = inputInfo->name();
2565 m_topology->add(cldnn::input_layout(inputName, inputLayout));
2567 // save the input dims
2568 m_env.inputLayouts.insert({ inputName, inputLayout });
2570 // create preprocess primitive for this input
2571 auto preProcess = inputInfo->getPreProcess();
2573 size_t meanChannels = preProcess.getNumberOfChannels();
2574 auto internalInputLayout = m_env.inputLayouts.at(inputName);
2575 internalInputLayout.format = m_defaultFormat;
2576 internalInputLayout.size = internalInputLayout.size.transform(m_defaultFormat, 1);
2577 internalInputLayout.data_type = m_networkPrecision;
2578 auto preprocessPrimID = inputName + m_preProcessTag;
2580 if ((meanChannels > 0) &&
2581 (meanChannels != internalInputLayout.size.feature[0])) {
2582 THROW_CLDNN_EXCEPTION("Mismatched mean values channels in input " + inputName);
2585 switch (preProcess.getMeanVariant()) {
2588 std::vector<float> meanValues;
2589 if (meanChannels > 0) {
2590 for (size_t c = 0; c < meanChannels; c++) {
2591 if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
2592 THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName);
2593 meanValues.push_back(preProcess[c]->meanValue);
2596 m_topology->add(cldnn::reorder(preprocessPrimID, inputName, internalInputLayout, meanValues));
2597 m_env.profilingIDs.insert(preprocessPrimID);
2598 InitProfileInfo(preprocessPrimID, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
2603 IE_ASSERT(meanChannels);
2604 // first merge all mean values to a single blob
2605 // todo make sure mean blob precision is the same as the input precision
2606 auto meanDims = inputInfo->getDims();
2607 // overwrite batches with 1
2608 switch (meanDims.size()) {
2609 case 4: meanDims[3] = 1;
2612 THROW_CLDNN_EXCEPTION("Missing batch dimensions in input image");
2614 InferenceEngine::TBlob<float> meanBlob(Precision(Precision::FP32), TensorDesc::getLayoutByDims(meanDims), meanDims);
2615 meanBlob.allocate();
2616 auto meanBlobData = meanBlob.data();
2617 for (size_t c = 0; c < meanChannels; c++) {
2618 if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
2619 THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName);
2620 auto channelMeanBlob = std::dynamic_pointer_cast<TBlob<float>>(preProcess[c]->meanData);
2621 auto channelSize = channelMeanBlob->size();
2622 auto channelBlobData = channelMeanBlob->data();
2623 for (size_t i = 0; i < channelSize; i++) {
2624 meanBlobData[(c * channelSize) + i] = channelBlobData[i];
2627 // then create a data primitive for the mean values
2628 auto meanBlobPtr = std::make_shared<InferenceEngine::TBlob<float>>(meanBlob);
2630 // mean values will use external format (sub in the input format before convert to new format)
2631 cldnn::tensor meanBlobTensor(internalInputLayout.size);
2632 meanBlobTensor.batch[0] = 1; // mean values have no batches
2633 cldnn::layout meanBlobLayout(cldnn::data_types::f32, m_defaultFormat, meanBlobTensor);
2634 CreatePrimitiveFromBlob(
2635 inputName + m_meanValuesTag,
2638 m_topology->add(cldnn::reorder(preprocessPrimID,
2640 internalInputLayout,
2641 inputName + m_meanValuesTag));
2642 m_env.profilingIDs.insert(preprocessPrimID);
2643 InitProfileInfo(preprocessPrimID, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
2647 default: THROW_CLDNN_EXCEPTION("Invalid mean variant in input " + inputName);
2650 m_env.primitiveIDs[inputName] = preprocessPrimID;
2651 m_env.primitiveIDs[preprocessPrimID] = preprocessPrimID;
2654 std::vector<cldnn::primitive_id> CLDNNGraph::GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const {
2655 if (layer == nullptr) {
2658 std::vector<cldnn::primitive_id> inputPrimitives;
2659 for (auto inputData : layer->insData) {
2660 auto prevData = inputData.lock();
2661 if (prevData == nullptr) {
2662 THROW_CLDNN_EXCEPTION("Nonexistent input for layer: " << layer->name);
2664 auto prevCreator = prevData->creatorLayer.lock();
2665 auto prevName = prevCreator ? prevCreator->name : prevData->name;
2666 if (prevCreator && prevCreator->outData.size() > 1) {
2667 inputPrimitives.push_back(m_env.primitiveIDs.at(prevData->name));
2669 inputPrimitives.push_back(m_env.primitiveIDs.at(prevName));
2672 return inputPrimitives;
2675 void CLDNNGraph::AddOutputPrimitive(std::string outputName, const InferenceEngine::DataPtr outputData, Precision outputPrecision) {
2676 // TODO: add precision check once there's an outputInfo object
2677 if (outputData->layout != InferenceEngine::NCHW &&
2678 outputData->layout != InferenceEngine::NHWC &&
2679 outputData->layout != InferenceEngine::CHW &&
2680 outputData->layout != InferenceEngine::NC) {
2681 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(outputData->layout) << ") in output: " << outputName);
2683 auto outputReorderID = outputName + m_postProcessTag;
2684 Precision precision = outputPrecision == Precision::UNSPECIFIED ? outputData->getPrecision() : outputPrecision;
2686 // Find correct output ID. Start with name stored in IR.
2687 std::string outputID = outputName;
2688 std::string finalID = m_env.primitiveIDs.at(outputName);
2690 while (outputID != finalID) {
2691 auto prim = m_env.primitiveIDs.find(finalID);
2693 if (prim == m_env.primitiveIDs.end()) {
2694 THROW_IE_EXCEPTION << "Unknown output primitive id " << outputID;
2697 finalID = prim->second;
2700 m_topology->add(cldnn::reorder(outputReorderID, outputID,
2701 FormatFromLayout(outputData->getLayout()),
2702 DataTypeFromPrecision(precision)));
2703 m_env.primitiveIDs[outputName] = outputReorderID;
2704 m_env.profilingIDs.insert(outputReorderID);
2705 InitProfileInfo(outputReorderID, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
2706 m_env.outputDims[outputName] = outputData->dims;
2707 m_env.prevPrimitiveIDs[outputReorderID] = {outputName};
2710 void CLDNNGraph::AddSingleValuePrimitive(cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value) {
2711 cldnn::layout primLayout(dataType, m_defaultFormat, { 1, 1, 1, 1 });
2712 auto primMem = cldnn::memory::allocate(*(m_env.engine), primLayout);
2714 case cldnn::data_types::f32:
2716 auto tmpPointer = primMem.pointer<float>(); // implicitly maps buffer - unmap in destructor
2717 tmpPointer[0] = value;
2720 case cldnn::data_types::f16:
2722 auto tmpPointer = primMem.pointer<uint16_t>(); // implicitly maps buffer - unmap in destructor
2723 cldnn_status status = CLDNN_SUCCESS;
2724 tmpPointer[0] = cldnn_float_to_half(value, &status);
2725 if (status != CLDNN_SUCCESS) {
2726 THROW_CLDNN_EXCEPTION("Error converting value to fp16.");
2731 THROW_CLDNN_EXCEPTION("Unhandled data type (precision)");
2734 m_topology->add(cldnn::data(valPrimID, primMem));
2737 cldnn::data_types CLDNNGraph::DataTypeFromPrecision(InferenceEngine::Precision p) {
2739 case Precision::I16:
2740 case Precision::FP32:
2741 return cldnn::data_types::f32;
2742 case Precision::FP16:
2743 return cldnn::data_types::f16;
2745 return cldnn::data_types::u8;
2747 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << p.name() << " precision";
2752 cldnn::format CLDNNGraph::FormatFromLayout(InferenceEngine::Layout l) {
2754 case InferenceEngine::Layout::NCHW:
2755 case InferenceEngine::Layout::NC:
2756 case InferenceEngine::Layout::CHW:
2757 return cldnn::format::bfyx;
2758 case InferenceEngine::Layout::NHWC:
2759 return cldnn::format::byxf;
2761 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << l << " layout";
2766 cldnn::upsampling_sample_type CLDNNGraph::UpsamplingTypeFromString(const std::string& str) {
2767 static const caseless_map<std::string, cldnn::upsampling_sample_type> UpsamplingTypeNameToType = {
2768 { "Bilinear" , cldnn::upsampling_sample_type::bilinear },
2769 { "Nearest" , cldnn::upsampling_sample_type::nearest },
2771 auto it = UpsamplingTypeNameToType.find(str);
2772 if (it != UpsamplingTypeNameToType.end())
2775 THROW_CLDNN_EXCEPTION("Unknown Upsampling type: " << str);
2778 cldnn::softmax::dimension_t CLDNNGraph::SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer, bool isPrevFC) {
2779 // WA for default softmax dimension in cldnn for fyx
2780 // todo: remove this once clDNN changes FC output to BF instead of BX
2781 auto dims = softmaxLayer->outData[0]->dims;
2782 unsigned non1Dims = 0;
2783 for (size_t i = 0; i < dims.size(); i++) {
2788 if (non1Dims == 1 || isPrevFC) {
2789 return cldnn::softmax::normalize_fyx;
2793 switch (softmaxLayer->axis) {
2794 case 1: return cldnn::softmax::normalize_f;
2795 case 2: return cldnn::softmax::normalize_y;
2796 case 3: return cldnn::softmax::normalize_x;
2797 default: THROW_CLDNN_EXCEPTION("Invalid softmax axis " << softmaxLayer->axis);
2799 return cldnn::softmax::normalize_fyx;
2802 cldnn::prior_box_code_type CLDNNGraph::PriorBoxCodeFromString(const std::string& str) {
2803 static const std::map<std::string, cldnn::prior_box_code_type> CodeNameToType = {
2804 { "caffe.PriorBoxParameter.CORNER" , cldnn::prior_box_code_type::corner },
2805 { "caffe.PriorBoxParameter.CENTER_SIZE" , cldnn::prior_box_code_type::center_size },
2806 { "caffe.PriorBoxParameter.CORNER_SIZE" , cldnn::prior_box_code_type::corner_size },
2808 auto it = CodeNameToType.find(str);
2809 if (it != CodeNameToType.end()) {
2812 THROW_CLDNN_EXCEPTION("Unknown Prior-Box code type: " + str);
2813 return cldnn::prior_box_code_type::corner;
2817 void CLDNNGraph::CreateGenericLayerBlobPrimitives(const InferenceEngine::GenericLayer* layer) {
2819 for (auto& blob : layer->blobs) {
2820 if (blob.second->dims().size() != 1) {
2821 THROW_CLDNN_EXCEPTION("Unhandled blob dim in layer " + layer->name);
2823 CreatePrimitiveFromBlob(
2824 layer->name + "_" + blob.first + m_weightsTag,
2827 DataTypeFromPrecision(blob.second->precision()),
2828 m_defaultFormat, cldnn::spatial(TensorValue(blob.second->dims()[0]))));
2832 void CLDNNGraph::ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector<std::string>& blobNames) {
2834 for (auto& name : blobNames) {
2835 if (layer->blobs.find(name) == layer->blobs.end()) {
2836 THROW_CLDNN_EXCEPTION("Missing blob " + name + " in layer " + layer->name);
2841 cldnn::tensor CLDNNGraph::CldnnTensorFromIEDims(const InferenceEngine::SizeVector& dims) {
2842 auto numDims = dims.size();
2843 std::vector<cldnn::tensor::value_type> outputTensor({ 1, 1, 1, 1 });
2844 for (size_t i = 0; i < numDims; i++) {
2845 outputTensor[i] = TensorValue(dims[numDims - i - 1]);
2847 // swap x,y for cldnn tensor taking bfxy instead of bfyx
2848 auto tmp = outputTensor[2];
2849 outputTensor[2] = outputTensor[3];
2850 outputTensor[3] = tmp;
2852 return outputTensor;
2855 InferRequestInternal::Ptr
2856 CLDNNGraph::CreateInferRequestImpl(InputsDataMap networkInputs, OutputsDataMap networkOutputs) {
2857 if (m_env.network == nullptr) {
2858 THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str;
2860 return std::make_shared<CLDNNInferRequest>(m_env, m_config.useProfiling, networkInputs, networkOutputs);
2863 void CLDNNGraph::InitProfileInfo(const std::string& layerName,
2864 const std::string& layerType,
2865 const std::string& execType,
2866 InferenceEngine::InferenceEngineProfileInfo::LayerStatus status) {
2867 m_env.perfMap[layerName].status = status;
2868 m_env.perfMap[layerName].cpu_uSec = m_env.perfMap[layerName].realTime_uSec = 0;
2869 layerType.copy(m_env.perfMap[layerName].layer_type, layerType.length());
2870 execType.copy(m_env.perfMap[layerName].exec_type, execType.length());
2873 }; // namespace CLDNNPlugin