1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
7 #include <unordered_set>
9 #include <CPP/cldnn_defs.h>
10 #include <CPP/data.hpp>
11 #include <CPP/input_layout.hpp>
12 #include <CPP/reorder.hpp>
13 #include <CPP/convolution.hpp>
14 #include <CPP/pooling.hpp>
15 #include <CPP/lrn.hpp>
16 #include <CPP/fully_connected.hpp>
17 #include <CPP/softmax.hpp>
18 #include <CPP/activation.hpp>
19 #include <CPP/concatenation.hpp>
20 #include <CPP/proposal.hpp>
21 #include <CPP/roi_pooling.hpp>
22 #include <CPP/scale.hpp>
23 #include <CPP/crop.hpp>
24 #include <CPP/deconvolution.hpp>
25 #include <CPP/prior_box.hpp>
26 #include <CPP/detection_output.hpp>
27 #include <CPP/normalize.hpp>
28 #include <CPP/reshape.hpp>
29 #include <CPP/batch_norm.hpp>
30 #include <CPP/permute.hpp>
31 #include <CPP/split.hpp>
32 #include <CPP/upsampling.hpp>
33 #include <CPP/network.hpp>
34 #include <CPP/profiling.hpp>
35 #include <CPP/custom_gpu_primitive.hpp>
36 #include <CPP/reorg_yolo.hpp>
37 #include <CPP/region_yolo.hpp>
38 #include <CPP/mutable_data.hpp>
39 #include <CPP/max_unpooling.hpp>
40 #include <CPP/arg_max_min.hpp>
41 #include <CPP/mvn.hpp>
42 #include <CPP/tile.hpp>
43 #include <CPP/border.hpp>
44 #include <CPP/lstm.hpp>
45 #include <CPP/gather.hpp>
46 #include <CPP/depth_to_space.hpp>
47 #include <CPP/shuffle_channels.hpp>
48 #include <CPP/strided_slice.hpp>
49 #include <CPP/reverse_sequence.hpp>
53 #include "cldnn_graph.h"
54 #include "simple_math.h"
55 #include <description_buffer.hpp>
56 #include <cldnn/cldnn_config.hpp>
57 #include <graph_tools.hpp>
58 #include <ie_layers_internal.hpp>
60 #include "cldnn_infer_request.h"
61 #include <cpp_interfaces/ie_executor_manager.hpp>
62 #include "details/caseless.hpp"
65 #include <sys/types.h>
68 using namespace InferenceEngine;
69 using namespace InferenceEngine::details;
74 #define THROW_CLDNN_EXCEPTION(desc)\
76 InferenceEngineException ex(__FILE__, __LINE__);\
77 std::cout << desc << "\n---\nException detected at " << __FILE__ << ":" << \
78 __LINE__ << " (" << __FUNCTION__ << ")\n---\n" << std::endl; THROW_IE_EXCEPTION << desc; } while (0);
80 #define THROW_CLDNN_EXCEPTION(desc) THROW_IE_EXCEPTION << desc;
82 #define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)
84 namespace CLDNNPlugin {
86 const cldnn::primitive_id CLDNNGraph::m_preProcessTag("_cldnn_input_preprocess");
87 const cldnn::primitive_id CLDNNGraph::m_weightsTag("_cldnn_weights");
88 const cldnn::primitive_id CLDNNGraph::m_biasesTag("_cldnn_biases");
89 const cldnn::primitive_id CLDNNGraph::m_meanValuesTag("_cldnn_mean_values");
90 const cldnn::primitive_id CLDNNGraph::m_postProcessTag("_cldnn_output_postprocess");
91 const cldnn::primitive_id CLDNNGraph::m_scalesTag("_cldnn_scales");
92 const cldnn::primitive_id CLDNNGraph::m_workaroundTag("_cldnn_workaround");
93 const cldnn::primitive_id CLDNNGraph::m_preCustomLayerTag("_cldnn_custom_preprocess");
94 const cldnn::primitive_id CLDNNGraph::m_postCustomLayerTag("_cldnn_custom_postprocess");
96 static void ValidateLayer(const InferenceEngine::CNNLayerPtr& layer, unsigned inputs) { // todo: add more checks
97 if (inputs && layer->insData.size() != inputs) {
98 THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name);
100 if (layer->_fusedWith) {
101 THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
105 static void ValidateEltwiseLayer(const InferenceEngine::CNNLayerPtr& layer) {
106 if (layer->_fusedWith) {
107 THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
112 #define mkdir(dir, mode) _mkdir(dir)
115 void CLDNNGraph::Config::LoadFromMap(const std::map<std::string, std::string>& configMap) {
116 for (auto& kvp : configMap) {
117 std::string key = kvp.first;
118 std::string val = kvp.second;
120 // TODO: refactor if-else to map?
121 if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0) {
122 if (val.compare(PluginConfigParams::YES) == 0) {
124 } else if (val.compare(PluginConfigParams::NO) == 0) {
125 useProfiling = false;
127 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
129 } else if (key.compare(PluginConfigParams::KEY_DYN_BATCH_ENABLED) == 0) {
130 if (val.compare(PluginConfigParams::YES) == 0) {
131 enableDynamicBatch = true;
132 } else if (val.compare(PluginConfigParams::NO) == 0) {
133 enableDynamicBatch = false;
135 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
137 } else if (key.compare(PluginConfigParams::KEY_DUMP_KERNELS) == 0) {
138 if (val.compare(PluginConfigParams::YES) == 0) {
139 dumpCustomKernels = true;
140 } else if (val.compare(PluginConfigParams::NO) == 0) {
141 dumpCustomKernels = false;
143 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
145 } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY) == 0) {
146 std::stringstream ss(val);
150 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
154 queuePriority = cldnn::priority_mode_types::disabled;
157 queuePriority = cldnn::priority_mode_types::low;
160 queuePriority = cldnn::priority_mode_types::med;
163 queuePriority = cldnn::priority_mode_types::high;
166 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue priority value: " << uVal;
170 } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) == 0) {
171 std::stringstream ss(val);
175 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
179 queueThrottle = cldnn::throttle_mode_types::disabled;
182 queueThrottle = cldnn::throttle_mode_types::low;
185 queueThrottle = cldnn::throttle_mode_types::med;
188 queueThrottle = cldnn::throttle_mode_types::high;
191 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue throttle value: " << uVal;
194 } else if (key.compare(PluginConfigParams::KEY_CONFIG_FILE) == 0) {
195 std::stringstream ss(val);
196 std::istream_iterator<std::string> begin(ss);
197 std::istream_iterator<std::string> end;
198 std::vector<std::string> configFiles(begin, end);
199 for (auto& file : configFiles) {
200 CLDNNCustomLayer::LoadFromFile(file, customLayers);
202 } else if (key.compare(PluginConfigParams::KEY_TUNING_MODE) == 0) {
203 if (val.compare(PluginConfigParams::TUNING_DISABLED) == 0) {
204 tuningConfig.mode = cldnn::tuning_mode::tuning_disabled;
205 } else if (val.compare(PluginConfigParams::TUNING_CREATE) == 0) {
206 tuningConfig.mode = cldnn::tuning_mode::tuning_tune_and_cache;
207 } else if (val.compare(PluginConfigParams::TUNING_USE_EXISTING) == 0) {
208 tuningConfig.mode = cldnn::tuning_mode::tuning_use_cache;
210 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported tuning mode value by plugin: " << val;
212 } else if (key.compare(PluginConfigParams::KEY_TUNING_FILE) == 0) {
213 tuningConfig.cache_file_path = val;
214 } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_MEM_POOL) == 0) {
215 if (val.compare(PluginConfigParams::YES) == 0) {
216 memory_pool_on = true;
217 } else if (val.compare(PluginConfigParams::NO) == 0) {
218 memory_pool_on = false;
220 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported memory pool flag value: " << val;
222 } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR) == 0) {
224 graph_dumps_dir = val;
225 mkdir(graph_dumps_dir.c_str(), 0755);
227 } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR) == 0) {
229 sources_dumps_dir = val;
230 mkdir(sources_dumps_dir.c_str(), 0755);
232 } else if (key.compare(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) == 0) {
233 if (val.compare(PluginConfigParams::YES) == 0) {
234 exclusiveAsyncRequests = true;
235 } else if (val.compare(PluginConfigParams::NO) == 0) {
236 exclusiveAsyncRequests = false;
238 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
241 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property key by plugin: " << key;
246 void CLDNNGraph::changeInputBatch(size_t batch) {
250 bool CLDNNGraph::CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const {
251 InputsDataMap inputs;
252 network.getInputsInfo(inputs);
254 CNNLayerSet inputLayers;
255 std::unordered_set<CNNLayer *> allLayers;
260 auto & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
261 if (secondLayers.empty())
264 bool check_result = true;
265 details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
266 auto type = LayerTypeFromStr(layer->type);
267 if (SimplerNMS == type ||
268 ROIPooling == type ||
270 DetectionOutput == type ||
275 PSROIPooling == type ) {
276 check_result = false;
279 // check for custom layer
280 auto customLayer = m_config.customLayers.find(layer->type);
281 if (customLayer != m_config.customLayers.end()) {
282 check_result = false;
289 CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, const Config& config, int max_batch) : m_config(config),
290 m_defaultFormat(cldnn::format::bfyx),
292 m_env.engine = std::make_shared<cldnn::engine>(cldnn::engine_configuration(
293 (config.useProfiling || (config.tuningConfig.mode != cldnn::tuning_mode::tuning_disabled)),
295 config.dumpCustomKernels,
300 config.sources_dumps_dir,
301 config.queuePriority,
302 config.queueThrottle,
303 config.memory_pool_on));
305 m_env.debugOptions.PrintOptions();
307 if (config.exclusiveAsyncRequests) {
308 ExecutorManager *executorManager = ExecutorManager::getInstance();
309 _taskExecutor = executorManager->getExecutor(TargetDeviceInfo::name(TargetDevice::eGPU));
312 bool res = !NetPass::CombineRNNSeq(network) ? NetPass::UnrollTI(network) : true;
313 res &= NetPass::UnrollRNN_if(network, [] (RNNCellBase rnn) -> bool {
314 if (rnn.clip != 0.0f)
316 if (rnn.type == "GRUCell" ||
317 rnn.type == "GRUSequence" ||
318 rnn.type == "RNNCell" ||
319 rnn.type == "RNNSequence")
321 if (!(rnn.type == "LSTMCell" || rnn.type == "LSTMSequence") ||
322 rnn.activations == std::vector<std::string>{"sigmoid", "tanh", "tanh"})
328 THROW_CLDNN_EXCEPTION("Plugin doesn't support Tensor Iterator in pure form. "
329 "No one TI optimization pattern was not applied successfully");
332 // check topology for applicability
333 if (!CanProcessDynBatch(network)) {
334 THROW_CLDNN_EXCEPTION("Such topology cannot be compiled for dynamic batch!");
337 // calculate number of networks necessary based on binary log
338 unsigned int tmp = max_batch;
339 unsigned int mask = 1 << 31;
340 unsigned int ldigit = 31;
342 while (!(tmp & mask)) {
347 m_env.m_bv_sz = ldigit + 1;
352 m_env.m_max_batch = max_batch;
354 // Handle workarounds
355 char networkName[128] = { 0 };
356 network.getName(networkName, 127);
357 m_env.debugOptions.EnableWA(networkName);
358 m_env.debugOptions.AddTimedEvent("Loading Begin");
361 for (int b = m_env.m_bv_sz - 1; b >= 0; b--) {
362 m_topology = std::make_shared<cldnn::topology>(cldnn::topology());
363 m_env.network.reset();
364 m_env.inputLayouts.clear();
365 m_env.outputDims.clear();
366 m_env.primitiveIDs.clear();
368 changeInputBatch(1 << b);
371 m_env.batchNetworks.insert(m_env.batchNetworks.begin(), m_env.network);
374 m_env.engine->release_pending_memory();
377 m_topology = std::make_shared<cldnn::topology>(cldnn::topology());
381 m_env.engine->release_pending_memory();
384 m_env.debugOptions.AddTimedEvent("Loading", "Loading Begin");
385 m_env.debugOptions.PrintTimedEvents();
386 m_env.debugOptions.ClearTimedEvents();
389 inline std::string layer_type_name_ID(InferenceEngine::CNNLayer* layer) {
390 return layer->type + ":" + layer->name;
393 inline std::string layer_type_name_ID(InferenceEngine::CNNLayerPtr layer) {
394 return layer_type_name_ID(layer.get());
397 std::vector<InferenceEngine::CNNLayerPtr> CLDNNGraph::GetNextLayers(const InferenceEngine::DataPtr data) {
398 std::vector<InferenceEngine::CNNLayerPtr> nextLayers;
399 if (data == nullptr) {
402 for (auto nl : data->getInputTo()) {
403 nextLayers.push_back(nl.second);
408 std::vector<InferenceEngine::CNNLayerPtr> CLDNNGraph::GetNextLayers(const InferenceEngine::CNNLayerPtr layer) {
409 std::vector<InferenceEngine::CNNLayerPtr> nextLayers;
410 if (layer == nullptr) {
413 for (auto od : layer->outData) {
414 auto nextLayersVec = GetNextLayers(od);
415 for (auto nl : nextLayersVec) {
416 nextLayers.push_back(nl);
422 InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngine::DataPtr data) {
423 if (data == nullptr) {
426 auto nextLayers = GetNextLayers(data);
427 IE_ASSERT(nextLayers.size() == 1);
428 return nextLayers[0];
431 InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer) {
432 if (layer == nullptr) {
435 auto nextLayers = GetNextLayers(layer);
436 IE_ASSERT(nextLayers.size() == 1);
437 return nextLayers[0];
440 void CLDNNGraph::InitFormat(InferenceEngine::ICNNNetwork &network) {
441 m_defaultFormat = FormatFromLayout(InferenceEngine::Layout::NCHW);
444 void CLDNNGraph::CompileNetwork() {
445 m_env.debugOptions.AddTimedEvent("Network Build Begin");
446 cldnn::build_options options;
447 if (!m_config.graph_dumps_dir.empty()) {
448 options.set_option(cldnn::build_option::graph_dumps_dir(m_config.graph_dumps_dir));
450 options.set_option(cldnn::build_option::optimize_data(true));
451 options.set_option(cldnn::build_option::tuning_config(m_config.tuningConfig));
453 m_env.network.reset();
454 m_env.network = std::make_shared<cldnn::network>(cldnn::network(*(m_env.engine), *m_topology, options));
455 m_env.debugOptions.AddTimedEvent("Network Build", "Network Build Begin");
458 void CLDNNGraph::Load(InferenceEngine::ICNNNetwork &network) {
460 auto _networkPrecision = network.getPrecision();
463 InferenceEngine::InputsDataMap networkInputs;
464 network.getInputsInfo(networkInputs);
465 p_currentInputs = &networkInputs;
467 InferenceEngine::OutputsDataMap networkOutputs;
468 network.getOutputsInfo(networkOutputs);
469 p_currentOutputs = &networkOutputs;
471 if (networkInputs.size() == 0) {
472 THROW_CLDNN_EXCEPTION("No inputs detected.");
475 using LayerVect = std::vector<InferenceEngine::CNNLayerPtr>;
476 std::list<InferenceEngine::CNNLayerPtr> layersToHandle;
478 auto push_if = [&](const LayerVect& clist) {
479 for (auto& l : clist) {
480 if ( (std::find_if( layersToHandle.begin(),
481 layersToHandle.end(),
482 [&](const CNNLayerPtr& x) { return layer_type_name_ID(x) == layer_type_name_ID(l); } )) == layersToHandle.end() )
483 layersToHandle.push_back(l);
487 auto allInputs = CNNNetGetAllInputLayers(network);
488 for (auto input : allInputs) {
489 if (LayerTypeFromStr(input->type) == ConstantBlob) {
490 AddConstantBlobInput(input);
492 auto iter = networkInputs.find(input->name); // regular input
493 if (iter != networkInputs.end()) {
494 AddInputPrimitive(iter->second, input->precision);
497 // collect next layers to process
498 push_if(GetNextLayers(input));
501 // 2. traverse layers
502 unsigned infLoopProtection = 0;
503 while (!layersToHandle.empty()) {
504 if (infLoopProtection++ >= layersToHandle.size()) {
505 THROW_CLDNN_EXCEPTION("Infinite loop during network creation");
508 InferenceEngine::CNNLayerPtr currLayer = layersToHandle.front();
509 layersToHandle.pop_front();
510 auto layerName = layer_type_name_ID(currLayer);
512 if (m_env.primitiveIDs.find(layerName) != m_env.primitiveIDs.end()) {
513 infLoopProtection = 0;
514 continue; // this layer was already added (had multiple inputs)
517 bool missingInput = false;
519 GetPrevLayersPrimitives(currLayer);
520 } catch (std::exception) {
524 if (missingInput) { // some inputs aren't created yet
525 layersToHandle.push_back(currLayer); // push the current layer to the end of the line
526 continue; // move on to the next layer
529 infLoopProtection = 0; // found a layer with all inputs already existing
530 CreateSingleLayerPrimitive(currLayer); // currLayer will be advanced if layer was skipped or merged
531 m_env.prevPrimitiveIDs[layerName] = GetPrevLayersPrimitives(currLayer);
533 push_if(GetNextLayers(currLayer));
536 // 3. Handle output reordering
537 for (auto output : networkOutputs) {
538 // always reorder and let clDNN remove unneeded reorders
539 AddOutputPrimitive(output.first, output.second);
544 p_currentInputs = nullptr;
545 p_currentOutputs = nullptr;
548 CLDNNGraph::LayerType CLDNNGraph::LayerTypeFromStr(const std::string &str) {
549 static const caseless_map<std::string, CLDNNGraph::LayerType> LayerNameToType = {
550 { "Convolution" , Convolution },
553 { "Sigmoid" , Sigmoid },
554 { "Logistic" , Sigmoid },
557 { "Activation" , Activation },
561 { "Pooling" , Pooling },
562 { "FullyConnected" , FullyConnected },
563 { "SoftMax" , SoftMax },
567 { "Concat" , Concatenate },
568 { "Eltwise" , Eltwise },
569 { "SimplerNMS" , SimplerNMS },
570 { "ROIPooling" , ROIPooling },
572 { "Deconvolution" , Deconvolution },
573 { "PriorBox" , PriorBox },
574 { "DetectionOutput" , DetectionOutput },
575 { "Normalize" , Normalize },
576 { "Reshape" , Reshape },
577 { "Permute" , Permute },
578 { "Flatten" , Flatten },
579 { "BatchNormalization" , BatchNormalization },
581 { "ScaleShift" , ScaleShift },
582 { "Proposal" , Proposal },
583 { "PSROIPooling" , PSROIPooling },
586 { "Upsampling" , Upsampling },
587 { "Resample" , Resample },
588 { "RegionYolo" , RegionYolo },
589 { "ReorgYolo" , ReorgYolo },
590 { "Const" , ConstantBlob },
591 { "ArgMax" , ArgMax },
593 { "Unpooling" , Unpooling },
596 { "LSTMCell" , LSTMCell },
597 { "LSTMSequence" , RNN },
598 { "RNNSequence" , RNN },
599 { "Gather" , Gather },
600 { "DepthToSpace" , DepthToSpace },
601 { "ShuffleChannels" , ShuffleChannels },
602 { "StridedSlice" , StridedSlice },
603 { "ReverseSequence" , ReverseSequence }
605 auto it = LayerNameToType.find(str);
606 if (it != LayerNameToType.end())
612 cldnn::pooling_mode CLDNNGraph::PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding) {
614 case InferenceEngine::PoolingLayer::PoolType::MAX:
615 return cldnn::pooling_mode::max;
616 case InferenceEngine::PoolingLayer::PoolType::AVG:
617 return excludePadding ? cldnn::pooling_mode::average_no_padding : cldnn::pooling_mode::average;
618 default: IE_ASSERT(0); // unhandled pool mode
619 THROW_CLDNN_EXCEPTION("Unsupported pooling type: " << pt);
623 return cldnn::pooling_mode::max; // shouldn't get here
626 cldnn::eltwise_mode CLDNNGraph::EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op) {
628 case InferenceEngine::EltwiseLayer::Sum:
629 return cldnn::eltwise_mode::sum;
630 case InferenceEngine::EltwiseLayer::Prod:
631 return cldnn::eltwise_mode::prod;
632 case InferenceEngine::EltwiseLayer::Max:
633 return cldnn::eltwise_mode::max;
634 case InferenceEngine::EltwiseLayer::Sub:
635 return cldnn::eltwise_mode::sub;
636 case InferenceEngine::EltwiseLayer::Min:
637 return cldnn::eltwise_mode::min;
638 case InferenceEngine::EltwiseLayer::Div:
639 return cldnn::eltwise_mode::div;
640 case InferenceEngine::EltwiseLayer::Squared_diff:
641 return cldnn::eltwise_mode::squared_diff;
642 case InferenceEngine::EltwiseLayer::Equal:
643 return cldnn::eltwise_mode::eq;
644 case InferenceEngine::EltwiseLayer::Not_equal:
645 return cldnn::eltwise_mode::ne;
646 case InferenceEngine::EltwiseLayer::Less:
647 return cldnn::eltwise_mode::lt;
648 case InferenceEngine::EltwiseLayer::Less_equal:
649 return cldnn::eltwise_mode::le;
650 case InferenceEngine::EltwiseLayer::Greater:
651 return cldnn::eltwise_mode::gt;
652 case InferenceEngine::EltwiseLayer::Greater_equal:
653 return cldnn::eltwise_mode::ge;
654 case InferenceEngine::EltwiseLayer::Logical_AND:
655 return cldnn::eltwise_mode::logic_and;
656 case InferenceEngine::EltwiseLayer::Logical_OR:
657 return cldnn::eltwise_mode::logic_or;
658 case InferenceEngine::EltwiseLayer::Logical_XOR:
659 return cldnn::eltwise_mode::logic_xor;
660 default: THROW_CLDNN_EXCEPTION("Unsupported eltwise operation: " << op);
664 return cldnn::eltwise_mode::max; // shouldn't get here
667 cldnn::concatenation::concatenation_axis CLDNNGraph::ConcatAxisFromIEAxis(unsigned axis) {
670 return cldnn::concatenation::concatenation_axis::along_b;
672 return cldnn::concatenation::concatenation_axis::along_f;
674 return cldnn::concatenation::concatenation_axis::along_y;
676 return cldnn::concatenation::concatenation_axis::along_x;
677 default: THROW_CLDNN_EXCEPTION("Unsupported concatenation axis: " << axis);
681 return cldnn::concatenation::concatenation_axis::along_f; // shouldn't get here
684 void CLDNNGraph::CreatePrimitiveFromBlob(cldnn::primitive_id primID,
685 const InferenceEngine::Blob::Ptr pBlob,
686 cldnn::layout blobLayout,
687 size_t blobByteOffset,
688 WeightRearrangeType rearrange) {
689 auto mem = cldnn::memory::allocate(*(m_env.engine), blobLayout);
690 auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
691 auto buf = tmpPointer.data();
692 auto bufSize = blobLayout.bytes_count();
693 // The condition below is not valid once we use groups - todo: think of some other size check here
694 // if ((pBlob != nullptr) &&
695 // (pBlob->size() * (broadcastFeatures ? blobLayout.size.feature[0] : 1)) != blobLayout.count()) {
696 // THROW_CLDNN_EXCEPTION("Unexpected blob size");
698 if (pBlob == nullptr) {
699 THROW_CLDNN_EXCEPTION("Missing blob data: " << primID);
700 } else if ((pBlob->layout() != InferenceEngine::OIHW) &&
701 (pBlob->layout() != InferenceEngine::NCHW) &&
702 (pBlob->layout() != InferenceEngine::CHW) &&
703 (pBlob->layout() != InferenceEngine::NC) &&
704 (pBlob->layout() != InferenceEngine::C)) {
705 // TODO: support more layouts
706 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(pBlob->layout()) << ") in blob: " << primID);
707 } else if (rearrange == BroadcastFeatures) {
708 size_t features = static_cast<size_t>(blobLayout.size.feature[0]);
709 if (pBlob->size() != features) {
710 THROW_CLDNN_EXCEPTION("Invalid blob dimensions to broadcast: " << primID);
712 auto data = static_cast<const char *>(pBlob->buffer());
713 auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type);
714 size_t featureElements = blobLayout.count() / static_cast<size_t>(blobLayout.size.feature[0]);
715 IE_ASSERT(blobLayout.format == cldnn::format::bfyx);
716 for (size_t f = 0; f < features; f++) {
717 for (size_t e = 0; e < featureElements; e++) {
718 for (size_t b = 0; b < elementSize; b++) {
719 buf[(f*featureElements + e)*elementSize + b] = data[f*elementSize + b];
723 } else if (rearrange == FlipDeconvDims) {
724 auto data = static_cast<const char *>(pBlob->buffer());
725 auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type);
727 size_t inputFeatureElements = static_cast<size_t>(blobLayout.size.feature[0]);
728 size_t outputFeatureElements = static_cast<size_t>(blobLayout.size.batch[0]);
730 size_t featureSize = elementSize * static_cast<size_t>(blobLayout.size.spatial[0] * blobLayout.size.spatial[1]);
732 for (size_t i = 0; i < inputFeatureElements; i++) {
733 for (size_t o = 0; o < outputFeatureElements; o++) {
734 size_t outputShift = (o*inputFeatureElements + i)*featureSize;
735 size_t inputShift = (i*outputFeatureElements + o)*featureSize;
737 for (size_t b = 0; b < featureSize; b++) {
738 buf[outputShift + b] = data[inputShift + b];
743 auto data = static_cast<const char *>(pBlob->buffer());
744 for (size_t i = 0; i < bufSize; i++) {
745 buf[i] = data[i + blobByteOffset];
748 m_topology->add(cldnn::data(primID, mem));
751 void CLDNNGraph::CreateWeightAndBiasPrimitives(const InferenceEngine::CNNLayerPtr& layer,
752 std::vector<cldnn::primitive_id>& weightsPrimID,
753 std::vector<cldnn::primitive_id>& biasesPrimID) {
754 cldnn::tensor::value_type inFeatures = 1; // todo: workaround for xyf input, handle general case (xf, xyzf etc...)
755 std::shared_ptr<Data> insData0 = layer->insData[0].lock();
756 IE_ASSERT(insData0 != nullptr);
757 if (insData0->dims.size() > 2) {
758 inFeatures = TensorValue(insData0->dims[2]);
760 cldnn::tensor::value_type outFeatures(0);
761 std::vector<cldnn::tensor::value_type> weightDimsVec;
762 InferenceEngine::Blob::Ptr pWeightsBlob, pBiasBlob;
763 unsigned groupSize = 1;
764 WeightRearrangeType rearrange = NO_REARRANGE;
766 switch (LayerTypeFromStr(layer->type)) {
768 auto convLayer = dynamic_cast<InferenceEngine::ConvolutionLayer *> (layer.get());
769 if ((inFeatures % groupSize) || (convLayer->_out_depth % groupSize)) {
770 THROW_CLDNN_EXCEPTION("Invalid group size in layer " << convLayer->name);
772 groupSize = convLayer->_group;
773 if (groupSize >= 16) // cldnn optimization for 16 and more groups
776 TensorValue(convLayer->_out_depth / groupSize),
777 TensorValue(inFeatures / convLayer->_group),
778 TensorValue(convLayer->_kernel[X_AXIS]),
779 TensorValue(convLayer->_kernel[Y_AXIS])
781 outFeatures = convLayer->_out_depth;
782 pWeightsBlob = convLayer->_weights;
783 pBiasBlob = convLayer->_biases;
786 case Deconvolution: {
787 auto deconvLayer = dynamic_cast<InferenceEngine::DeconvolutionLayer *> (layer.get());
788 if ((inFeatures % groupSize) || (deconvLayer->_out_depth % groupSize)) {
789 THROW_CLDNN_EXCEPTION("Invalid group size in layer " << deconvLayer->name);
791 groupSize = deconvLayer->_group;
792 if (groupSize >= 16) // cldnn optimization for 16 and more groups
795 TensorValue(deconvLayer->_out_depth / groupSize),
796 TensorValue(inFeatures / deconvLayer->_group),
797 TensorValue(deconvLayer->_kernel[X_AXIS]),
798 TensorValue(deconvLayer->_kernel[Y_AXIS])
800 outFeatures = deconvLayer->_out_depth;
801 pWeightsBlob = deconvLayer->_weights;
802 pBiasBlob = deconvLayer->_biases;
804 if ((groupSize < outFeatures) || (groupSize < inFeatures))
805 rearrange = FlipDeconvDims;
809 IE_ASSERT("Wrong weightable layer type"); // shouldn't get here
813 // create weights primitive
814 cldnn::layout weightsLayout = cldnn::layout(
815 DataTypeFromPrecision(layer->precision),
817 cldnn::tensor(weightDimsVec));
818 size_t bytesPerGroup = weightsLayout.bytes_count();
820 for (unsigned g = 0; g < groupSize; g++) {
821 cldnn::primitive_id weightID = layer_type_name_ID(layer) + m_weightsTag + std::to_string(g);
822 CreatePrimitiveFromBlob(
828 weightsPrimID.push_back(weightID);
831 // create bias primitive
832 if (pBiasBlob != nullptr) {
833 cldnn::layout biasesLayout = cldnn::layout(
834 DataTypeFromPrecision(layer->precision),
836 cldnn::spatial(TensorValue(outFeatures / groupSize)));
837 size_t bytesPerGroup = biasesLayout.bytes_count();
838 for (unsigned g = 0; g < groupSize; g++) {
839 cldnn::primitive_id biasID = layer_type_name_ID(layer) + m_biasesTag + std::to_string(g);
840 CreatePrimitiveFromBlob(
845 biasesPrimID.push_back(biasID);
850 void CLDNNGraph::CreateScaleWeightsAndBiasesFromBN(
851 const InferenceEngine::BatchNormalizationLayer* bnLayer,
852 cldnn::primitive_id weightsPrimID,
853 cldnn::primitive_id biasesPrimID) {
855 if (bnLayer->_weights->dims() != bnLayer->_biases->dims()) {
856 THROW_CLDNN_EXCEPTION("mean/variance dimensions mismatch in " << bnLayer->name);
858 if (bnLayer->_weights->precision() != bnLayer->_biases->precision()) {
859 THROW_CLDNN_EXCEPTION("mean/variance precision mismatch in " << bnLayer->name);
862 cldnn::tensor blobTensor(0);
863 switch (bnLayer->outData[0]->dims.size()) {
865 blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[0]));
868 blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[2]));
871 THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
873 cldnn::layout blobLayout(
874 DataTypeFromPrecision(bnLayer->precision),
878 switch (bnLayer->_weights->precision()) {
879 case Precision::FP16: {
880 InferenceEngine::TBlob<uint16_t> weightsBlob(bnLayer->_weights->precision(), bnLayer->_weights->layout(), bnLayer->_weights->dims());
881 weightsBlob.allocate();
882 InferenceEngine::TBlob<uint16_t> biasesBlob(bnLayer->_biases->precision(), bnLayer->_weights->layout(), bnLayer->_biases->dims());
883 biasesBlob.allocate();
885 auto weightsData = weightsBlob.data();
886 auto biasesData = biasesBlob.data();
887 auto varianceData = static_cast<const uint16_t *>(bnLayer->_weights->buffer());
888 auto meanData = static_cast<const uint16_t *>(bnLayer->_biases->buffer());
890 cldnn_status status = CLDNN_SUCCESS;
891 for (size_t i = 0; i < weightsBlob.size(); i++) {
892 auto variance = cldnn_half_to_float(varianceData[i], &status);
893 if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
894 auto mean = cldnn_half_to_float(meanData[i], &status);
895 if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
897 float scale = 1.0f / sqrt(variance + bnLayer->epsilon);
898 weightsData[i] = cldnn_float_to_half(scale, &status);
899 if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
900 biasesData[i] = cldnn_float_to_half((-mean) * scale, &status);
901 if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
903 CreatePrimitiveFromBlob(weightsPrimID, std::make_shared<InferenceEngine::TBlob<uint16_t>>(weightsBlob), blobLayout);
904 CreatePrimitiveFromBlob(biasesPrimID, std::make_shared<InferenceEngine::TBlob<uint16_t>>(biasesBlob), blobLayout);
907 case Precision::FP32: {
908 InferenceEngine::TBlob<float> weightsBlob(bnLayer->_weights->precision(), bnLayer->_weights->layout(), bnLayer->_weights->dims());
909 weightsBlob.allocate();
910 InferenceEngine::TBlob<float> biasesBlob(bnLayer->_biases->precision(), bnLayer->_weights->layout(), bnLayer->_biases->dims());
911 biasesBlob.allocate();
913 auto weightsData = weightsBlob.data();
914 auto biasesData = biasesBlob.data();
915 auto varianceData = static_cast<const float *>(bnLayer->_weights->buffer());
916 auto meanData = static_cast<const float *>(bnLayer->_biases->buffer());
918 for (size_t i = 0; i < weightsBlob.size(); i++) {
919 auto variance = varianceData[i];
920 auto mean = meanData[i];
921 weightsData[i] = 1.0f / sqrt(variance + bnLayer->epsilon);
922 biasesData[i] = (-mean) * weightsData[i];
924 CreatePrimitiveFromBlob(weightsPrimID, std::make_shared<InferenceEngine::TBlob<float>>(weightsBlob), blobLayout);
925 CreatePrimitiveFromBlob(biasesPrimID, std::make_shared<InferenceEngine::TBlob<float>>(biasesBlob), blobLayout);
929 THROW_CLDNN_EXCEPTION("Unhandled mean/variance precision in " << bnLayer->name);
934 void CLDNNGraph::CreateSingleLayerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
935 // Initialize a profiling entry
936 InitProfileInfo(layer->name, layer->type);
938 // First check for custom layer
939 auto customLayer = m_config.customLayers.find(layer->type);
940 if (customLayer != m_config.customLayers.end()) {
941 CreateCustomLayerPrimitive(layer, customLayer->second);
945 // Otherwise move on to built-in layer types
946 switch (LayerTypeFromStr(layer->type)) {
947 case Convolution: CreateConvolutionPrimitive(layer);
958 CreateActivationPrimitive(layer, LayerTypeFromStr(layer->type));
960 case LRN: CreateLRNPrimitive(layer);
962 case Pooling: CreatePoolingPrimitive(layer);
964 case Unpooling: CreateMaxUnpoolingPrimitive(layer);
966 case FullyConnected: CreateFullyConnectedPrimitive(layer);
968 case SoftMax: CreateSoftMaxPrimitive(layer);
970 case Power: CreatePowerPrimitive(layer);
972 case Split: CreateSplitPrimitive(layer);
974 case Concatenate: CreateConcatenatePrimitive(layer);
976 case Eltwise: CreateEltwisePrimitive(layer);
978 case SimplerNMS: CreateSimplerNMSPrimitive(layer);
980 case ROIPooling: CreateROIPoolingPrimitive(layer);
982 case Crop: CreateCropPrimitive(layer);
984 case Deconvolution: CreateDeconvolutionPrimitive(layer);
986 case PriorBox: CreatePriorBoxPrimitive(layer);
988 case DetectionOutput: CreateDetectionOutputPrimitive(layer);
990 case Normalize: CreateNormalizePrimitive(layer);
992 case Reshape: CreateReshapePrimitive(layer);
994 case Permute: CreatePermutePrimitive(layer);
996 case Flatten: CreateFlattenPrimitive(layer);
998 case BatchNormalization: CreateBatchNormalizationPrimitive(layer);
1000 case PReLU: CreatePReLUPrimitive(layer);
1002 case ScaleShift: CreateScaleShiftPrimitive(layer);
1004 case Proposal: CreateProposalPrimitive(layer);
1006 case PSROIPooling: CreatePSROIPoolingPrimitive(layer);
1008 case Copy: CreateCopyPrimitive(layer);
1010 case Upsampling: CreateUpsamplingPrimitive(layer);
1012 case Resample: CreateResamplePrimitive(layer);
1014 case ArgMax: CreateArgMaxPrimitive(layer);
1016 case MVN: CreateMVNPrimitive(layer);
1018 case LSTMCell: CreateLSTMCellPrimitive(layer);
1020 case RNN: CreateRNNPrimitive(layer);
1022 case RegionYolo: CreateYOLO2RegionPrimitive(layer);
1024 case ReorgYolo: CreateYOLO2ReorgPrimitive(layer);
1026 case Tile: CreateTilePrimitive(layer);
1028 case Pad: CreatePadPrimitive(layer);
1030 case Gather: CreateGatherPrimitive(layer);
1032 case DepthToSpace: CreateDepthToSpacePrimitive(layer);
1034 case ShuffleChannels: CreateShuffleChannelsPrimitive(layer);
1036 case StridedSlice: CreateStridedSlicePrimitive(layer);
1038 case ReverseSequence: CreateReverseSequencePrimitive(layer);
1040 default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type);
1044 void CLDNNGraph::CreateScaleShiftPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1045 ValidateLayer(layer, 1);
1046 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1047 auto scaleShiftLayer = dynamic_cast<InferenceEngine::ScaleShiftLayer*> (layer.get());
1049 // create scales and biases
1050 cldnn::primitive_id scalePrimID = scaleShiftLayer->name + m_scalesTag;
1051 cldnn::primitive_id biasPrimID = scaleShiftLayer->name + m_biasesTag;
1053 const auto& dims = scaleShiftLayer->_weights->dims();
1054 cldnn::tensor weightTensor(1);
1055 switch (dims.size()) {
1056 case 1: weightTensor = cldnn::feature(TensorValue(dims[0])); // value per feature (or 1 global value)
1058 case 4: weightTensor = cldnn::tensor(TensorValue(dims[0]), TensorValue(dims[1]), TensorValue(dims[3]), TensorValue(dims[2])); // value per pixel
1060 default: THROW_CLDNN_EXCEPTION("Invalid weights dimensions in layer " << layer->name);
1063 cldnn::layout blobLayout(DataTypeFromPrecision(layer->precision), m_defaultFormat, weightTensor);
1064 CreatePrimitiveFromBlob(scalePrimID, scaleShiftLayer->_weights, blobLayout);
1065 if (scaleShiftLayer->_biases != nullptr) {
1066 if (scaleShiftLayer->_biases->dims() != dims) {
1067 THROW_CLDNN_EXCEPTION("Invalid bias blob dimensions in layer " << layer->name);
1069 CreatePrimitiveFromBlob(biasPrimID, scaleShiftLayer->_biases, blobLayout);
1071 biasPrimID = ""; // 0-bias
1074 std::string scaleShiftLayerName = layer_type_name_ID(layer);
1075 auto scaleShiftPrim = cldnn::scale(
1076 scaleShiftLayerName,
1081 m_env.primitiveIDs[scaleShiftLayerName] = scaleShiftLayerName;
1082 m_topology->add(scaleShiftPrim);
1083 m_env.profilingIDs.push_back(scaleShiftLayerName);
1086 void CLDNNGraph::CreateProposalPrimitive(InferenceEngine::CNNLayerPtr & layer) {
1087 ValidateLayer(layer, 3);
1088 auto proposalLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1090 float nms_thresh = proposalLayer->GetParamAsFloat("nms_thresh", 0.7f);
1091 int min_size = proposalLayer->GetParamAsInt("min_size", 16);
1092 int feature_stride = proposalLayer->GetParamAsInt("feat_stride", 16);
1093 int pre_nms_topn = proposalLayer->GetParamAsInt("pre_nms_topn", 6000);
1094 int post_nms_topn = proposalLayer->GetParamAsInt("post_nms_topn", 300);
1095 const std::vector<float> ratio = proposalLayer->GetParamAsFloats("ratio");
1096 const std::vector<float> scale = proposalLayer->GetParamAsFloats("scale");
1097 float box_coordinate_scale = proposalLayer->GetParamAsFloat("box_coordinate_scale", 1.0f);
1098 float box_size_scale = proposalLayer->GetParamAsFloat("box_size_scale", 1.0f);
1099 int base_size = proposalLayer->GetParamAsInt("base_size", 16);
1100 std::string framework = proposalLayer->GetParamAsString("framework", "");
1101 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1102 bool normalize = layer->GetParamsAsBool("normalize", false);
1103 bool clip_before_nms = layer->GetParamsAsBool("clip_before_nms", true);
1104 bool clip_after_nms = layer->GetParamsAsBool("clip_after_nms", false);
1106 float coordinates_offset;
1112 if (framework == "tensorflow") {
1113 coordinates_offset = 0.0f;
1114 initial_clip = true;
1115 shift_anchors = true;
1116 round_ratios = false;
1119 coordinates_offset = 1.0f;
1120 initial_clip = false;
1121 shift_anchors = false;
1122 round_ratios = true;
1126 std::string proposalLayerName = layer_type_name_ID(layer);
1127 auto proposalPrim = cldnn::proposal(
1129 inputPrimitives[0], // cls_score
1130 inputPrimitives[1], // bbox_pred
1131 inputPrimitives[2], // im_info
1132 0, // max_num_proposals is unused
1142 box_coordinate_scale,
1152 m_env.primitiveIDs[proposalLayerName] = proposalLayerName;
1153 m_topology->add(proposalPrim);
1154 m_env.profilingIDs.push_back(proposalLayerName);
1157 void CLDNNGraph::CreatePReLUPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1158 ValidateLayer(layer, 1);
1159 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1160 auto preluLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1162 std::string preluLayerName = layer_type_name_ID(layer);
1163 auto inDataPtr = preluLayer->insData[0].lock();
1165 THROW_CLDNN_EXCEPTION("Data inserted into PreLu " << preluLayer->name << " is nullptr");
1167 auto inputDims = inDataPtr->dims;
1168 static const std::string blobName("weights");
1169 ValidateGenericLayerBlobs(preluLayer, { blobName });
1171 bool channel_shared = preluLayer->GetParamsAsBool("channel_shared", false);
1173 auto slopeBlob = preluLayer->blobs.at(blobName);
1174 if (channel_shared) {
1175 if (slopeBlob->dims()[0] != 1) {
1176 THROW_CLDNN_EXCEPTION("PReLU slope blob with wrong dimensions in " << preluLayer->name);
1179 switch (slopeBlob->precision()) {
1180 case InferenceEngine::Precision::FP32:
1181 slope = *static_cast<const float *>(slopeBlob->buffer());
1183 case InferenceEngine::Precision::FP16:
1185 cldnn_status status = CLDNN_SUCCESS;
1186 slope = cldnn_half_to_float(*static_cast<const uint16_t *>(slopeBlob->buffer()), &status);
1187 if (status != CLDNN_SUCCESS) {
1188 THROW_CLDNN_EXCEPTION("Error converting fp16 value in " << preluLayer->name);
1192 default: THROW_CLDNN_EXCEPTION("Invalid PReLU slope blob precision in " << preluLayer->name);
1194 m_topology->add(cldnn::activation(preluLayerName, inputPrimitives[0], activation_relu_negative_slope, { slope, 0.f }));
1196 CreateGenericLayerBlobPrimitives(preluLayer);
1197 cldnn::primitive_id slopePrimID(preluLayerName + "_" + blobName + m_weightsTag);
1198 m_topology->add(cldnn::activation(preluLayerName, inputPrimitives[0], slopePrimID, activation_relu_negative_slope));
1201 m_env.primitiveIDs[preluLayerName] = preluLayerName;
1202 m_env.profilingIDs.push_back(preluLayerName);
1205 void CLDNNGraph::CreateBatchNormalizationPrimitive(InferenceEngine::CNNLayerPtr & layer) {
1206 ValidateLayer(layer, 1);
1207 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1208 std::string bnLayerName = layer_type_name_ID(layer);
1210 auto bnLayer = dynamic_cast<InferenceEngine::BatchNormalizationLayer *> (layer.get());
1211 cldnn::primitive_id weightID = bnLayerName + "_" + m_scalesTag;
1212 cldnn::primitive_id biasID = bnLayerName + "_" + m_biasesTag;
1214 #define _SCALE_BN_OPT
1215 #ifdef _SCALE_BN_OPT
1216 // Using scale as an optimization (1 mad instead of mad+rsq)
1217 // create new blobs for scale shift
1218 CreateScaleWeightsAndBiasesFromBN(bnLayer, weightID, biasID);
1219 auto scalePrim = cldnn::scale(bnLayerName, inputPrimitives[0], weightID, biasID);
1221 m_env.primitiveIDs[bnLayerName] = bnLayerName;
1222 m_topology->add(scalePrim);
1223 m_env.profilingIDs.push_back(bnLayerName);
1225 #endif // _SCALE_BN_OPT
1227 cldnn::tensor blobTensor(0);
1228 switch (bnLayer->outData[0]->dims.size()) {
1230 blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[0]));
1233 blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[2]));
1236 THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
1238 cldnn::layout blobLayout(
1239 DataTypeFromPrecision(layer->precision),
1243 // Create variance primitive
1244 cldnn::primitive_id varianceID = bnLayerName + "_" + m_weightsTag;
1245 CreatePrimitiveFromBlob(varianceID, bnLayer->_weights, blobLayout);
1247 // Create mean primitive
1248 cldnn::primitive_id meanID = bnLayerName + "_" + m_biasesTag;
1249 CreatePrimitiveFromBlob(meanID, bnLayer->_biases, blobLayout);
1251 auto bnPrim = cldnn::batch_norm(
1258 m_env.primitiveIDs[bnLayerName] = bnLayerName;
1259 m_topology->add(bnPrim);
1260 m_env.profilingIDs.push_back(bnLayerName);
1263 void CLDNNGraph::CreateFlattenPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1264 ValidateLayer(layer, 1);
1265 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1266 auto flattenLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1267 std::string flattenLayerName = layer_type_name_ID(layer);
1269 auto flattenPrim = cldnn::reshape(
1272 CldnnTensorFromIEDims(flattenLayer->outData[0]->dims));
1274 m_env.primitiveIDs[flattenLayerName] = flattenLayerName;
1275 m_topology->add(flattenPrim);
1276 m_env.profilingIDs.push_back(flattenLayerName);
1279 void CLDNNGraph::CreatePermutePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1280 ValidateLayer(layer, 1);
1281 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1282 auto permuteLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1283 std::vector<uint16_t> ie_order;
1284 for (auto& a : permuteLayer->GetParamAsInts("order"))
1285 ie_order.push_back(static_cast<uint16_t>(a));
1287 // if order size is less than 4 - fill the rest with just copy
1288 for (auto o = ie_order.size(); o < 4; o++)
1289 ie_order.push_back((uint16_t)o);
1292 Because ofthe cldnn ordering: bfxy, and IE ordering: bfyx
1293 wee need to adjust the permute order.
1295 std::vector<uint16_t> cldnn_permute_order;
1296 // 1. Switch permute order values (x and y)
1297 for (auto const& o : ie_order) {
1299 cldnn_permute_order.push_back(3);
1301 cldnn_permute_order.push_back(2);
1303 cldnn_permute_order.push_back(o);
1305 // 2. Swap x and y positions
1306 std::swap(cldnn_permute_order[2], cldnn_permute_order[3]);
1308 std::string permuteLayerName = layer_type_name_ID(layer);
1310 auto permutePrim = cldnn::permute(
1313 cldnn_permute_order);
1315 m_env.primitiveIDs[permuteLayerName] = permuteLayerName;
1316 m_topology->add(permutePrim);
1317 m_env.profilingIDs.push_back(permuteLayerName);
1320 void CLDNNGraph::CreateReshapePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1321 ValidateLayer(layer, 1);
1322 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1323 auto reshapeLayer = dynamic_cast<InferenceEngine::ReshapeLayer*> (layer.get());
1324 IE_ASSERT(reshapeLayer->outData.size());
1325 std::string reshapeLayerName = layer_type_name_ID(layer);
1327 auto reshapePrim = cldnn::reshape(
1330 CldnnTensorFromIEDims(reshapeLayer->outData[0]->dims));
1332 m_env.primitiveIDs[reshapeLayerName] = reshapeLayerName;
1333 m_topology->add(reshapePrim);
1334 m_env.profilingIDs.push_back(reshapeLayerName);
1337 void CLDNNGraph::CreateNormalizePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1338 ValidateLayer(layer, 1);
1339 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1340 auto normLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1341 ValidateGenericLayerBlobs(normLayer, { "weights" });
1342 CreateGenericLayerBlobPrimitives(normLayer);
1345 bool across_spatial = normLayer->GetParamsAsBool("across_spatial", true);
1346 float eps = normLayer->GetParamAsFloat("eps", 0.0f);
1348 // WA for MO outputting %.6f
1353 std::string normLayerName = layer_type_name_ID(layer);
1354 auto normPrim = cldnn::normalize(
1357 normLayerName + "_weights" + m_weightsTag,
1361 m_env.primitiveIDs[normLayerName] = normLayerName;
1362 m_topology->add(normPrim);
1363 m_env.profilingIDs.push_back(normLayerName);
1366 void CLDNNGraph::CreateDetectionOutputPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1367 ValidateLayer(layer, 3);
1368 auto detectionLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1370 uint32_t num_classes = detectionLayer->GetParamAsUInt("num_classes", 1);
1371 bool share_location = detectionLayer->GetParamsAsBool("share_location", true);
1372 int background_label_id = detectionLayer->GetParamAsInt("background_label_id", 0);
1373 float nms_threshold = detectionLayer->GetParamAsFloat("nms_threshold", 0.3f);
1374 int top_k = detectionLayer->GetParamAsInt("top_k", -1);
1375 float confidence_threshold = detectionLayer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
1376 float eta = detectionLayer->GetParamAsFloat("eta", 1.0f);
1377 int keep_top_k = detectionLayer->GetParamAsInt("keep_top_k", -1);
1378 bool variance_encoded_in_target = detectionLayer->GetParamsAsBool("variance_encoded_in_target", false);
1379 int input_width = detectionLayer->GetParamAsInt("input_width", -1);
1380 int input_height = detectionLayer->GetParamAsInt("input_height", -1);
1381 bool normalized = detectionLayer->GetParamsAsBool("normalized", true);
1382 std::string code_type = detectionLayer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
1383 bool clip_before_nms = detectionLayer->GetParamsAsBool("clip_before_nms", false) ||
1384 detectionLayer->GetParamsAsBool("clip", false); // For backward compatibility
1385 bool clip_after_nms = detectionLayer->GetParamsAsBool("clip_after_nms", false);
1386 bool decrease_label_id = detectionLayer->GetParamsAsBool("decrease_label_id", false);
1388 cldnn::prior_box_code_type cldnnCodeType = PriorBoxCodeFromString(code_type);
1389 int32_t prior_info_size = normalized != 0 ? 4 : 5;
1390 int32_t prior_coordinates_offset = normalized != 0 ? 0 : 1;
1392 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1393 std::string detectionLayerName = layer_type_name_ID(layer);
1394 auto detectionPrim = cldnn::detection_output(detectionLayerName,
1401 background_label_id,
1406 variance_encoded_in_target,
1407 confidence_threshold,
1409 prior_coordinates_offset,
1417 m_env.primitiveIDs[detectionLayerName] = detectionLayerName;
1418 m_topology->add(detectionPrim);
1419 m_env.profilingIDs.push_back(detectionLayerName);
1422 void CLDNNGraph::CreatePriorBoxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1423 ValidateLayer(layer, 2);
1424 auto priorBoxLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1427 std::vector<float> min_size = priorBoxLayer->GetParamAsFloats("min_size");
1428 std::vector<float> max_size = priorBoxLayer->GetParamAsFloats("max_size", {});
1429 std::vector<float> aspect_ratio = priorBoxLayer->GetParamAsFloats("aspect_ratio", {});
1430 std::vector<float> variance = priorBoxLayer->GetParamAsFloats("variance");
1431 bool flip = priorBoxLayer->GetParamsAsBool("flip", true);
1432 bool clip = priorBoxLayer->GetParamsAsBool("clip", false);
1433 bool scale_all_sizes = priorBoxLayer->GetParamsAsBool("scale_all_sizes", true);
1434 float offset = priorBoxLayer->GetParamAsFloat("offset", 0.5f);
1436 auto step_w = priorBoxLayer->GetParamAsFloat("step_w", 0.0f);
1437 auto step_h = priorBoxLayer->GetParamAsFloat("step_h", 0.0f);
1438 auto step = priorBoxLayer->GetParamAsFloat("step", 0.0f);
1440 float _step_w = 0.0f;
1441 float _step_h = 0.0f;
1442 if (HasParam(priorBoxLayer->params, "step_w") && step_w != 0.0f &&
1443 HasParam(priorBoxLayer->params, "step_h") && step_h != 0.0f) {
1446 } else if (HasParam(priorBoxLayer->params, "step") && step != 0.0f) {
1451 int img = priorBoxLayer->GetParamAsInt("img_size", 0);
1452 int img_w = priorBoxLayer->GetParamAsInt("img_w", 0);
1453 int img_h = priorBoxLayer->GetParamAsInt("img_h", 0);
1454 if ((img != 0) || (img_w != 0) || (img_h != 0)) {
1456 THROW_CLDNN_EXCEPTION("Unsupported image sizes in prior box " + layer->name + " (use an image blob instead of dimensions)");
1459 IE_ASSERT(layer->insData[1].lock());
1460 auto img_dims = layer->insData[1].lock()->dims;
1461 cldnn::tensor img_size = cldnn::spatial(TensorValue(img_dims[0]), TensorValue(img_dims[1]));
1462 std::vector<cldnn::primitive_id> inputPrimitives = GetPrevLayersPrimitives(layer);
1463 // second input isn't used by value - only dimensions taken from the layer input
1465 if (_step_w == 0.0f || _step_h == 0.0f) {
1466 _step_w = static_cast<float>(img_w) / static_cast<float>(img_dims[0]);
1467 _step_h = static_cast<float>(img_h) / static_cast<float>(img_dims[1]);
1470 std::string priorBoxLayerName = layer_type_name_ID(layer);
1471 auto priorBoxPrim = cldnn::prior_box(
1486 m_env.primitiveIDs[priorBoxLayerName] = priorBoxLayerName;
1487 m_topology->add(priorBoxPrim);
1488 m_env.profilingIDs.push_back(priorBoxLayerName);
1491 void CLDNNGraph::CreateDeconvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1492 ValidateLayer(layer, 1);
1493 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1494 auto deconvLayer = dynamic_cast<InferenceEngine::DeconvolutionLayer *> (layer.get());
1496 if (deconvLayer->_dilation[X_AXIS] != 1 || deconvLayer->_dilation[Y_AXIS] != 1) {
1497 THROW_CLDNN_EXCEPTION("Unsupported dilation in deconvolution " << layer->name);
1500 std::vector<cldnn::primitive_id> weightPrimID;
1501 std::vector<cldnn::primitive_id> biasPrimID;
1502 CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID);
1503 auto allPads = getPaddings(*deconvLayer);
1504 cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
1505 cldnn::spatial(deconvLayer->_stride[X_AXIS], deconvLayer->_stride[Y_AXIS]));
1506 cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
1507 cldnn::spatial(-allPads.begin[X_AXIS], -allPads.begin[Y_AXIS]));
1509 std::string deconvLayerName = layer_type_name_ID(layer);
1511 if (deconvLayer->_group >= 16) {
1512 auto deconvPrim = cldnn::deconvolution(deconvLayerName,
1516 deconvLayer->_group,
1521 CldnnTensorFromIEDims(deconvLayer->outData[0]->dims));
1522 m_topology->add(deconvPrim);
1524 auto deconvPrim = cldnn::deconvolution(deconvLayerName,
1532 CldnnTensorFromIEDims(deconvLayer->outData[0]->dims));
1533 m_topology->add(deconvPrim);
1535 m_env.primitiveIDs[deconvLayerName] = deconvLayerName;
1536 m_env.profilingIDs.push_back(deconvLayerName);
1539 void CLDNNGraph::CreateCropPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1540 if (layer->insData.size() != 1 && layer->insData.size() != 2) {
1541 THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name);
1543 if (layer->_fusedWith) {
1544 THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
1546 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1547 auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*> (layer.get());
1548 IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size());
1549 // IE_ASSERT(cropLayer->outData[0] && cropLayer->outData[0]->dims.size() == 4);
1551 std::vector<cldnn::tensor::value_type> offset{ 0, 0, 0, 0 };
1552 for (size_t i = 0; i < cropLayer->axis.size(); i++) {
1553 if (cropLayer->axis[i] < 0 || cropLayer->axis[i] > 3) {
1554 THROW_CLDNN_EXCEPTION("Invalid crop axis: " + std::to_string(cropLayer->axis[i]) + " in layer " + cropLayer->name);
1556 offset[cropLayer->axis[i]] = cropLayer->offset[i];
1558 auto outputDims = cropLayer->outData[0]->dims;
1559 size_t ods = outputDims.size();
1560 cldnn::tensor refSize(
1561 TensorValue(ods > 3 ? outputDims[3] : 1),
1562 TensorValue(ods > 2 ? outputDims[2] : 1),
1563 TensorValue(outputDims[0]),
1564 TensorValue(outputDims[1]));
1566 cldnn::tensor offSize(
1567 TensorValue(offset[0]),
1568 TensorValue(offset[1]),
1569 TensorValue(offset[3]),
1570 TensorValue(offset[2]));
1572 std::string cropLayerName = layer_type_name_ID(layer);
1573 auto cropPrim = cldnn::crop(
1578 m_env.primitiveIDs[cropLayerName] = cropLayerName;
1579 m_topology->add(cropPrim);
1580 m_env.profilingIDs.push_back(cropLayerName);
1583 void CLDNNGraph::CreateROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1584 ValidateLayer(layer, 2);
1585 auto roiPoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1588 int pooled_width = roiPoolingLayer->GetParamAsInt("pooled_w", 0);
1589 int pooled_height = roiPoolingLayer->GetParamAsInt("pooled_h", 0);
1590 float spatial_scale = roiPoolingLayer->GetParamAsFloat("spatial_scale", 1.0f);
1591 std::string method = roiPoolingLayer->GetParamAsString("method", "max");
1592 bool position_sensitive = false;
1594 cldnn::pooling_mode mode = cldnn::pooling_mode::max;
1595 if (method == "bilinear") {
1596 mode = cldnn::pooling_mode::bilinear;
1598 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1600 std::string roiPoolingLayerName = layer_type_name_ID(layer);
1601 auto roiPoolingPrim = cldnn::roi_pooling(roiPoolingLayerName,
1602 inputPrimitives[0], // input data
1603 inputPrimitives[1], // input rois
1609 m_env.primitiveIDs[roiPoolingLayerName] = roiPoolingLayerName;
1610 m_topology->add(roiPoolingPrim);
1611 m_env.profilingIDs.push_back(roiPoolingLayerName);
1614 void CLDNNGraph::CreatePSROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1615 ValidateLayer(layer, 2);
1616 auto psROIPoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1619 int group_size = psROIPoolingLayer->GetParamAsInt("group_size");
1620 int output_dim = psROIPoolingLayer->GetParamAsInt("output_dim");
1621 float spatial_scale = psROIPoolingLayer->GetParamAsFloat("spatial_scale");
1622 size_t spatial_bins_x = static_cast<size_t>(psROIPoolingLayer->GetParamAsInt("spatial_bins_x", 1));
1623 size_t spatial_bins_y = static_cast<size_t>(psROIPoolingLayer->GetParamAsInt("spatial_bins_y", 1));
1624 std::string mode_str = psROIPoolingLayer->GetParamAsString("mode", "average");
1625 bool position_sensitive = true;
1627 cldnn::pooling_mode mode = mode_str == "average" ? cldnn::pooling_mode::average
1628 : cldnn::pooling_mode::bilinear;
1630 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1632 std::string psROIPoolingLayerName = layer_type_name_ID(layer);
1633 auto psROIPoolingPrim = cldnn::roi_pooling(psROIPoolingLayerName,
1634 inputPrimitives[0], // input data
1635 inputPrimitives[1], // input rois
1645 m_env.primitiveIDs[psROIPoolingLayerName] = psROIPoolingLayerName;
1646 m_topology->add(psROIPoolingPrim);
1647 m_env.profilingIDs.push_back(psROIPoolingLayerName);
1650 void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer, CLDNNCustomLayerPtr customLayer) {
1651 ValidateLayer(layer, 0);
1652 // todo: handling fusing
1653 auto genericLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1654 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1657 std::string layerDefines;
1658 for (const auto& def : customLayer->Defines()) {
1659 std::string singleDefine("#define " + def.name + " " + def.prefix);
1660 if (genericLayer->params.find(def.param) != genericLayer->params.end()) {
1661 singleDefine += genericLayer->params.at(def.param);
1663 singleDefine += def.default_value;
1665 singleDefine += def.postfix + "\n";
1666 layerDefines.append(singleDefine);
1670 std::vector<cldnn::primitive_id> reorderedInputs;
1671 reorderedInputs.resize(inputPrimitives.size());
1674 std::map<std::string, size_t> blobIndex;
1675 for (auto& blob : genericLayer->blobs) {
1676 // create primitive from blob (always 1d)
1677 cldnn::primitive_id blobId = genericLayer->name + "_" + blob.first;
1678 if (blob.second->dims().size() != 1) {
1679 THROW_CLDNN_EXCEPTION("Invalid dimensions for blob " << blob.first << " in layer " << genericLayer->name);
1681 CreatePrimitiveFromBlob(blobId, blob.second, cldnn::layout(
1682 DataTypeFromPrecision(blob.second->precision()),
1684 cldnn::tensor(1, 1, TensorValue(blob.second->dims()[0]), 1)));
1685 // save index in blobIndex
1686 blobIndex[blob.first] = reorderedInputs.size();
1687 // add to reorderedInputs
1688 reorderedInputs.push_back(blobId);
1691 // Handle kernel parameters
1692 std::vector<cldnn_arg> kernelParameters;
1693 cldnn::format outputFormat(cldnn::format::any);
1694 for (const auto& param : customLayer->KernelParams()) {
1695 switch (param.type) {
1696 case CLDNNCustomLayer::ParamType::Input: {
1697 kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1698 kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input;
1699 kernelParameters[param.paramIndex].index = static_cast<cldnn_arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
1701 // Handle input reorder
1702 if (param.portIndex < inputPrimitives.size() && reorderedInputs[param.portIndex].empty()) {
1703 // todo: add support for multiple reorders of the same input? (read as bfyx for one arg and yxfb for another)
1704 if (param.format != cldnn::format::any) {
1705 auto reorderPrimName = inputPrimitives[param.portIndex] + "_" + layer->name + m_preCustomLayerTag;
1706 auto preprocessPrim = cldnn::reorder(
1708 inputPrimitives[param.portIndex],
1710 DataTypeFromPrecision(layer->precision));
1711 m_topology->add(preprocessPrim);
1712 m_env.profilingIDs.push_back(reorderPrimName);
1713 InitProfileInfo(reorderPrimName, "Reorder");
1714 reorderedInputs[param.portIndex] = (reorderPrimName);
1716 reorderedInputs[param.portIndex] = inputPrimitives[param.portIndex];
1721 case CLDNNCustomLayer::ParamType::Output: {
1722 kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1723 kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_output;
1724 kernelParameters[param.paramIndex].index =
1725 static_cast<cldnn_arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
1726 outputFormat = param.format;
1729 case CLDNNCustomLayer::ParamType::Data: {
1730 kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1731 kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input;
1732 kernelParameters[param.paramIndex].index =
1733 static_cast<cldnn_arg_index>((blobIndex.find(param.blobName) == blobIndex.end()) ? -1 : blobIndex.at(param.blobName));
1737 THROW_CLDNN_EXCEPTION("Invalid custom layer param type: " << param.type << " in layer: " << genericLayer->name);
1740 const std::string layerTitle("\n// Layer " + layer->name + " using Custom Layer " + customLayer->Name() + "\n");
1741 const std::string defineTitle("// Custom Layer User Defines\n");
1743 auto dims = genericLayer->outData[0]->dims;
1744 std::reverse(dims.begin(), dims.end());
1746 size_t N = (dims.size() > 0) ? dims[0] : 1;
1747 size_t C = (dims.size() > 1) ? dims[1] : 1;
1748 size_t H = (dims.size() > 2) ? dims[2] : 1;
1749 size_t W = (dims.size() > 3) ? dims[3] : 1;
1750 cldnn::tensor outputTensor = cldnn::tensor(cldnn::batch(N), cldnn::feature(C), cldnn::spatial(W, H));
1752 cldnn::layout outputLayout = cldnn::layout(DataTypeFromPrecision(genericLayer->precision), outputFormat, outputTensor);
1754 // evaluate work sizes rules
1755 std::vector<size_t> gws, lws;
1757 // assume output tensor is dimension source by default
1758 int batchDim = outputTensor.batch[0];
1759 int featureDim = outputTensor.feature[0];
1760 int yDim = outputTensor.spatial[1];
1761 int xDim = outputTensor.spatial[0];
1762 int iidx = customLayer->InputDimSourceIndex();
1764 std::string genericLayerName = layer_type_name_ID(layer);
1765 // if input index is greater than -1, take dimension from input
1767 if (iidx >= genericLayer->insData.size())
1768 THROW_CLDNN_EXCEPTION("Invalid input tensor for index: " << iidx);
1769 // get dimensions from one of the input tensors
1770 auto inDataPtr = genericLayer->insData[iidx].lock();
1772 THROW_CLDNN_EXCEPTION("Data inserted into generic layer " << genericLayer->name << " is nullptr");
1774 auto inputDims = inDataPtr->dims;
1776 batchDim = featureDim = yDim = 0;
1777 xDim = inputDims[0];
1779 if (dims.size() > 1)
1780 yDim = inputDims[1];
1781 if (dims.size() > 2)
1782 featureDim = inputDims[2];
1783 if (dims.size() > 3)
1784 batchDim = inputDims[3];
1786 const std::map<char, int> vars = {
1787 { 'b', batchDim } , { 'B', batchDim },
1788 { 'f', featureDim }, { 'F', featureDim },
1789 { 'y', yDim }, { 'Y', yDim },
1790 { 'x', xDim }, { 'X', xDim },
1792 for (auto rule : customLayer->GlobalSizeRules()) {
1793 SimpleMathExpression expr;
1794 expr.SetVariables(vars);
1795 expr.SetExpression(rule);
1796 gws.push_back(expr.Evaluate());
1798 for (auto rule : customLayer->LocalSizeRules()) {
1799 SimpleMathExpression expr;
1800 expr.SetVariables(vars);
1801 expr.SetExpression(rule);
1802 lws.push_back(expr.Evaluate());
1805 auto customPrim = cldnn::custom_gpu_primitive(
1808 { layerTitle, defineTitle, layerDefines, customLayer->KernelSource() },
1809 customLayer->KernelEntry(),
1811 customLayer->CompilerOptions(),
1816 if (outputLayout.format != cldnn::format::any &&
1817 p_currentOutputs->find(genericLayerName) == p_currentOutputs->end()) {
1818 // Handle output reorder
1819 auto reorderPrimName = genericLayerName + m_postCustomLayerTag;
1825 customPrim.output_layout.data_type));
1826 m_env.primitiveIDs[genericLayerName] = reorderPrimName;
1827 m_env.primitiveIDs[reorderPrimName] = reorderPrimName;
1828 m_env.profilingIDs.push_back(reorderPrimName);
1829 InitProfileInfo(reorderPrimName, "Reorder");
1831 m_env.primitiveIDs[genericLayerName] = genericLayerName;
1833 m_topology->add(customPrim);
1834 m_env.profilingIDs.push_back(genericLayerName);
1837 void CLDNNGraph::CreateSimplerNMSPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1838 ValidateLayer(layer, 3);
1839 IE_ASSERT(layer->insData[0].lock()->dims[3] == 1); // only handling input batch size 1
1840 IE_ASSERT(layer->insData[1].lock()->dims[3] == 1); // only handling input batch size 1
1841 auto simpleNMSLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1843 int max_num_proposals = simpleNMSLayer->GetParamAsInt("max_num_proposals");
1844 float iou_threshold = simpleNMSLayer->GetParamAsFloat("iou_threshold", 0.7f);
1845 int min_bbox_size = simpleNMSLayer->GetParamAsInt("min_bbox_size", 16);
1846 int feature_stride = simpleNMSLayer->GetParamAsInt("feat_stride", 16);
1847 int pre_nms_topn = simpleNMSLayer->GetParamAsInt("pre_nms_topn");
1848 int post_nms_topn = simpleNMSLayer->GetParamAsInt("post_nms_topn");
1849 std::vector<float> scale = simpleNMSLayer->GetParamAsFloats("scale");
1850 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1852 std::string simpleNMSLayerName = layer_type_name_ID(layer);
1853 auto simpleNMSPrim = cldnn::proposal(
1855 inputPrimitives[0], // cls_score
1856 inputPrimitives[1], // bbox_pred
1857 inputPrimitives[2], // im_info
1864 { 0.5f, 1.0f, 2.0f }, // ratios for the SimplerNMS variant
1867 m_env.primitiveIDs[simpleNMSLayerName] = simpleNMSLayerName;
1868 m_topology->add(simpleNMSPrim);
1869 m_env.profilingIDs.push_back(simpleNMSLayerName);
1872 void CLDNNGraph::CreateEltwisePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1873 ValidateEltwiseLayer(layer);
1875 auto eltwiseLayer = dynamic_cast<InferenceEngine::EltwiseLayer *> (layer.get());
1876 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1878 std::vector<float> coefficients = eltwiseLayer->coeff;
1879 if (eltwiseLayer->_operation != InferenceEngine::EltwiseLayer::Sum && !coefficients.empty()) {
1880 THROW_IE_EXCEPTION << "Only sum operation supports operands coefficients";
1883 if (!coefficients.empty() && coefficients.size() != inputPrimitives.size()) {
1884 THROW_IE_EXCEPTION << "Number of provided coefficients is not equal to number of operands";
1887 std::string eltwiseLayerName = layer_type_name_ID(layer);
1888 auto eltwisePrim = cldnn::eltwise(
1891 EltwiseModeFromIEEltwise(eltwiseLayer->_operation),
1893 m_env.primitiveIDs[eltwiseLayerName] = eltwiseLayerName;
1894 m_topology->add(eltwisePrim);
1895 m_env.profilingIDs.push_back(eltwiseLayerName);
1898 void CLDNNGraph::CreateConcatenatePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1899 ValidateLayer(layer, 0);
1900 auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
1901 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1902 std::string concatLayerName = layer_type_name_ID(layer);
1903 auto concatPrim = cldnn::concatenation(
1906 ConcatAxisFromIEAxis(concatLayer->_axis));
1907 m_env.primitiveIDs[concatLayerName] = concatLayerName;
1908 m_topology->add(concatPrim);
1909 m_env.profilingIDs.push_back(concatLayerName);
1912 void CLDNNGraph::CreateSplitPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1913 ValidateLayer(layer, 1);
1914 auto splitLayer = dynamic_cast<InferenceEngine::SplitLayer *> (layer.get());
1915 if (IsValidSplitConvMerge(splitLayer)) {
1916 // AlextNet style split->conv*2->merge
1917 CreateFusedSplitConvMergePrimitive(layer);
1919 #ifdef _USE_SPLIT_PRIMITIVE
1920 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1921 auto inputDims = splitLayer->insData[0].lock()->dims;
1922 InferenceEngine::SizeVector startOffset(inputDims.size());
1923 std::vector<std::pair<cldnn::primitive_id, cldnn::tensor>> outputOffsets;
1924 std::cout << "Splitting layer: " << layer->name << "\n\tSize:" << CldnnTensorFromIEDims(inputDims) << std::endl;
1925 for (auto& outLayer : splitLayer->outData) {
1926 if (outLayer->dims.size() != startOffset.size()) {
1927 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1929 for (size_t i = 0; i < inputDims.size(); i++) {
1930 if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) {
1931 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1934 auto outTensor = CldnnTensorFromIEDims(outLayer->dims);
1935 auto cropPrim = cldnn::crop(outLayer->name, inputPrimitives[0], outTensor, CldnnTensorFromIEDims(startOffset));
1936 m_topology->add(cropPrim);
1937 m_env.primitiveIDs[outLayer->name] = outLayer->name;
1938 m_env.profilingIDs.push_back(outLayer->name);
1939 outputOffsets.push_back({ outLayer->name, CldnnTensorFromIEDims(startOffset) });
1940 for (size_t i = 0; i < inputDims.size(); i++) {
1941 if (outLayer->dims[i] != inputDims[i]) {
1942 startOffset[i] += outLayer->dims[i];
1947 auto splitPrim = cldnn::split(
1951 m_topology->add(splitPrim);
1954 // set split as not_run
1955 InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out
1957 #else // _USE_SPLIT_PRIMITIVE
1958 // TODO: replace with clDNN split when it's implemented
1959 auto inputPrimitives = GetPrevLayersPrimitives(layer);
1960 auto inDataPtr = splitLayer->insData[0].lock();
1962 THROW_CLDNN_EXCEPTION("Data inserts into split layer " << splitLayer->name << " is nullptr");
1964 auto inputDims = inDataPtr->dims;
1965 InferenceEngine::SizeVector startOffset(inputDims.size());
1967 auto TensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def) {
1968 switch (dims.size()) {
1969 case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def));
1970 case 2: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(def, def));
1971 case 3: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(def, dims[2]));
1972 case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
1973 default: THROW_CLDNN_EXCEPTION("Invalid dimensions size(" << dims.size() << ") in split layer");
1977 for (auto& outLayer : splitLayer->outData) {
1978 std::string outLayerName = splitLayer->type + ":" + outLayer->name;
1979 if (outLayer->dims.size() != startOffset.size()) {
1980 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1982 for (size_t i = 0; i < inputDims.size(); i++) {
1983 if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) {
1984 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1987 SizeVector reverseDims = outLayer->dims;
1988 std::reverse(reverseDims.begin(), reverseDims.end());
1989 auto outTensor = TensorFromIEDims(reverseDims, 1);
1991 SizeVector reverseOffset = startOffset;
1992 std::reverse(reverseOffset.begin(), reverseOffset.end());
1993 auto offsetTensor = TensorFromIEDims(reverseOffset, 0);
1995 auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor);
1996 m_env.primitiveIDs[outLayerName] = outLayerName;
1997 m_topology->add(cropPrim);
1998 m_env.profilingIDs.push_back(outLayerName);
1999 InitProfileInfo(outLayerName, "Crop");
2001 for (size_t i = 0; i < inputDims.size(); i++) {
2002 if (outLayer->dims[i] != inputDims[i]) {
2003 startOffset[i] += outLayer->dims[i];
2008 // set split as not_run
2009 InitProfileInfo(layer->name, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out
2010 #endif // _USE_SPLIT_PRIMITIVE
2014 void CLDNNGraph::CreateFusedSplitConvMergePrimitive(InferenceEngine::CNNLayerPtr &layer) {
2015 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2016 // only handle the split->conv->merge topology for now
2017 auto splitLayer = dynamic_cast<InferenceEngine::SplitLayer *> (layer.get());
2018 IE_ASSERT(IsValidSplitConvMerge(splitLayer));
2021 dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[0]).get());
2023 dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[1]).get());
2025 dynamic_cast<InferenceEngine::ConcatLayer *> (GetNextSingleLayer(
2026 GetNextSingleLayer(splitLayer->outData[0])).get());
2028 if (convLayer1 == nullptr ||
2029 convLayer2 == nullptr ||
2030 concatLayer == nullptr) {
2031 THROW_CLDNN_EXCEPTION("Expected single layer does not exist");
2033 // Mark these layers as optimized out
2034 InitProfileInfo(convLayer1->name, convLayer1->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
2035 InitProfileInfo(convLayer2->name, convLayer2->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
2036 InitProfileInfo(concatLayer->name, concatLayer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
2038 // build the split conv primitive
2039 std::vector<cldnn::primitive_id> weightPrimID;
2040 std::vector<cldnn::primitive_id> biasPrimID;
2041 CreateWeightAndBiasPrimitives(GetNextSingleLayer(splitLayer->outData[0]), weightPrimID, biasPrimID);
2042 CreateWeightAndBiasPrimitives(GetNextSingleLayer(splitLayer->outData[1]), weightPrimID, biasPrimID);
2044 auto concatLayerPtr = std::make_shared<InferenceEngine::CNNLayer>(*concatLayer);
2046 cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
2047 cldnn::spatial(convLayer1->_stride[X_AXIS], convLayer1->_stride[Y_AXIS]));
2048 auto allPad = getPaddings(*convLayer1);
2049 cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
2050 cldnn::spatial(-allPad.begin[X_AXIS], -allPad.begin[Y_AXIS]));
2051 cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
2052 cldnn::spatial(convLayer1->_dilation[X_AXIS], convLayer1->_dilation[Y_AXIS]));
2054 std::string splitLayerName = layer_type_name_ID(layer);
2055 auto splitPrim = cldnn::convolution(splitLayerName,
2064 CldnnTensorFromIEDims(concatLayer->outData[0]->dims));
2066 layer = concatLayerPtr;
2068 m_env.primitiveIDs[splitLayerName] = splitLayerName;
2069 m_env.primitiveIDs[layer_type_name_ID(convLayer1)] = splitLayerName;
2070 m_env.primitiveIDs[layer_type_name_ID(convLayer2)] = splitLayerName;
2071 m_env.primitiveIDs[layer_type_name_ID(concatLayer)] = splitLayerName; // pair the last merged layer (concat or relu) with
2072 // this primitive name to be used as
2073 // input prim for subsequent layers
2074 m_topology->add(splitPrim);
2075 m_env.profilingIDs.push_back(splitLayerName);
2078 void CLDNNGraph::CreatePowerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2079 ValidateLayer(layer, 1);
2080 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2081 auto powerLayer = dynamic_cast<InferenceEngine::PowerLayer *> (layer.get());
2082 if (powerLayer->power != 1.0f && powerLayer->power != 0.5f) {
2083 THROW_CLDNN_EXCEPTION("Power Layer " << layer->name << "uses unsupported power value");
2086 std::string powerLayerName = layer_type_name_ID(layer);
2087 if ((powerLayer->scale == 1.0f) && (powerLayer->offset == 0.0f)) {
2088 if (powerLayer->power == 0.5f) {
2089 auto activationPrim = cldnn::activation(powerLayerName, inputPrimitives[0], activation_sqrt);
2090 m_topology->add(activationPrim);
2091 m_env.profilingIDs.push_back(powerLayerName);
2092 m_env.primitiveIDs[powerLayerName] = powerLayerName;
2095 m_env.primitiveIDs[powerLayerName] = inputPrimitives[0]; // register the previous primID for this layer too
2096 InitProfileInfo(layer->name, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::NOT_RUN); // Mark this layer as not run
2099 // create scale primitive
2100 auto scaleValuePrimName = powerLayerName + m_scalesTag;
2101 AddSingleValuePrimitive(scaleValuePrimName,
2102 DataTypeFromPrecision(powerLayer->precision),
2105 cldnn::primitive_id biasValuePrimName = "";
2106 if (powerLayer->offset != 0.0f) {
2107 biasValuePrimName = powerLayerName + m_biasesTag;
2108 AddSingleValuePrimitive(biasValuePrimName,
2109 DataTypeFromPrecision(powerLayer->precision),
2110 powerLayer->offset);
2112 auto scalePrim = cldnn::scale(
2118 m_env.primitiveIDs[powerLayerName] = powerLayerName;
2119 m_topology->add(scalePrim);
2120 m_env.profilingIDs.push_back(powerLayerName);
2122 if (powerLayer->power == 0.5f) {
2123 auto activationPrim = cldnn::activation(powerLayerName+"_sqrt", powerLayerName, activation_sqrt);
2124 m_topology->add(activationPrim);
2125 m_env.profilingIDs.push_back(powerLayerName+"_sqrt");
2130 void CLDNNGraph::CreateSoftMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2131 ValidateLayer(layer, 1);
2132 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2133 auto softmaxLayer = dynamic_cast<InferenceEngine::SoftMaxLayer *> (layer.get());
2135 // additional WA for clDNN FullyConnected output in BX instead of BF
2137 auto prevData = layer->insData[0].lock();
2139 if (prevData == nullptr) {
2140 THROW_CLDNN_EXCEPTION("SoftMax: nonexistent input for layer: " << layer->name);
2143 auto prevCreator = prevData->creatorLayer.lock();
2144 bool isPrevFC = false;
2146 if (prevCreator && (LayerTypeFromStr(prevCreator->type) == FullyConnected))
2150 std::string softmaxLayerName = layer_type_name_ID(layer);
2151 auto softmaxPrim = cldnn::softmax(softmaxLayerName, inputPrimitives[0], SoftmaxDimensionFromIEAxis(softmaxLayer, isPrevFC));
2152 m_env.primitiveIDs[softmaxLayerName] = softmaxLayerName;
2153 m_topology->add(softmaxPrim);
2154 m_env.profilingIDs.push_back(softmaxLayerName);
2157 void CLDNNGraph::CreateFullyConnectedPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2158 ValidateLayer(layer, 1);
2159 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2160 auto fcLayer = dynamic_cast<InferenceEngine::FullyConnectedLayer *> (layer.get());
2162 std::string fcLayerName = layer_type_name_ID(layer);
2163 // create bias primitive
2164 cldnn::primitive_id biasesPrimID = "";
2165 if (fcLayer->_biases != nullptr) {
2166 biasesPrimID = fcLayerName + m_biasesTag;
2167 CreatePrimitiveFromBlob(biasesPrimID,
2169 cldnn::layout(DataTypeFromPrecision(fcLayer->precision), m_defaultFormat,
2170 cldnn::spatial(TensorValue(fcLayer->_out_num))));
2173 // create weights primitive
2174 // gcc bug to resolve auto, at least for 5.4 version
2175 std::shared_ptr<Data> insData0 = fcLayer->insData[0].lock();
2176 IE_ASSERT(insData0 != nullptr);
2177 cldnn::primitive_id weightsPrimID = fcLayerName + m_weightsTag;
2178 cldnn::tensor weightsDims;
2179 switch (insData0->dims.size()) {
2181 weightsDims = { TensorValue(fcLayer->outData[0]->dims[0]),
2182 TensorValue(insData0->dims[2]),
2183 TensorValue(insData0->dims[0]),
2184 TensorValue(insData0->dims[1]) };
2187 weightsDims = { TensorValue(fcLayer->outData[0]->dims[0]), TensorValue(insData0->dims[0]), 1, 1 };
2189 default: THROW_CLDNN_EXCEPTION("Invalid data dimensions");
2191 CreatePrimitiveFromBlob(weightsPrimID,
2193 cldnn::layout(DataTypeFromPrecision(fcLayer->precision), m_defaultFormat, weightsDims));
2195 auto fcPrim = cldnn::fully_connected(fcLayerName,
2202 m_env.primitiveIDs[fcLayerName] = fcLayerName;
2203 m_topology->add(fcPrim);
2204 m_env.profilingIDs.push_back(fcLayerName);
2207 void CLDNNGraph::CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2208 ValidateLayer(layer, 1);
2209 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2210 auto poolLayer = dynamic_cast<InferenceEngine::PoolingLayer *> (layer.get());
2212 std::string poolLayerName = layer_type_name_ID(layer);
2213 auto allPads = getPaddings(*poolLayer);
2214 if (poolLayer->outData.size() > 1) {
2215 // max pooling with argmax
2216 SizeVector argmaxDims;
2218 std::string realOutputID, argmaxOutputID;
2219 int outputOrder = 0;
2221 for (auto out : poolLayer->outData) {
2222 auto layersMap = out->getInputTo();
2224 for (auto item : layersMap) {
2225 bool isUpooling = (LayerTypeFromStr(item.second->type) == Unpooling);
2226 if (outputOrder == 1 && isUpooling) {
2227 argmaxDims = out->dims;
2228 argmaxOutputID = out->name;
2230 realOutputID = out->name;
2236 // create mutable_data primitive for storing argmax data
2237 cldnn::tensor mutableTensor;
2238 switch (argmaxDims.size()) {
2239 case 4: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[3]), TensorValue(argmaxDims[2]),
2240 TensorValue(argmaxDims[0]), TensorValue(argmaxDims[1]));
2242 case 3: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[2]), TensorValue(argmaxDims[1]),
2243 1, TensorValue(argmaxDims[0]));
2245 case 2: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[1]), TensorValue(argmaxDims[0]), 1, 1);
2247 case 1: // not implemented yet.
2248 default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions");
2251 cldnn::layout mutableLayout = cldnn::layout(
2252 cldnn::data_types::f32,
2256 cldnn::primitive_id argmaxPrimID = layer->name + "_argmax_mutable";
2258 auto mem = cldnn::memory::allocate(*(m_env.engine), mutableLayout);
2259 auto argmax_mutable_prim = cldnn::mutable_data(argmaxPrimID, mem);
2260 m_topology->add(argmax_mutable_prim);
2261 m_env.primitiveIDs[argmaxPrimID] = argmaxPrimID;
2262 m_env.primitiveIDs[argmaxOutputID] = argmaxPrimID;
2264 // create pooling primitive itself
2265 auto poolPrim = cldnn::pooling(poolLayerName,
2268 cldnn::pooling_mode::max_with_argmax,
2269 cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), TensorValue(poolLayer->_kernel[Y_AXIS])), // size
2270 cldnn::spatial(TensorValue(poolLayer->_stride[X_AXIS]), TensorValue(poolLayer->_stride[Y_AXIS])), // stride
2271 // input offset (padding) - explicit tensor for 0 bf
2272 { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]) },
2273 CldnnTensorFromIEDims(poolLayer->outData[0]->dims));
2274 m_topology->add(poolPrim);
2275 m_env.primitiveIDs[realOutputID] = poolLayerName;
2278 auto poolPrim = cldnn::pooling(poolLayerName,
2280 PoolingModeFromIEPooling(poolLayer->_type, poolLayer->_exclude_pad),
2281 cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), TensorValue(poolLayer->_kernel[Y_AXIS])), // size
2282 cldnn::spatial(TensorValue(poolLayer->_stride[X_AXIS]), TensorValue(poolLayer->_stride[Y_AXIS])), // stride
2283 // input offset (padding) - explicit tensor for 0 bf
2284 { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]) },
2285 CldnnTensorFromIEDims(poolLayer->outData[0]->dims));
2286 m_topology->add(poolPrim);
2287 m_env.primitiveIDs[poolLayerName] = poolLayerName;
2290 m_env.profilingIDs.push_back(poolLayerName);
2293 void CLDNNGraph::CreateLRNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2294 ValidateLayer(layer, 1);
2295 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2296 auto lrnLayer = dynamic_cast<InferenceEngine::NormLayer *> (layer.get());
2297 std::string lrnLayerName = layer_type_name_ID(layer);
2298 auto lrnPrim = cldnn::lrn(
2302 static_cast<float>(lrnLayer->_k),
2305 lrnLayer->_isAcrossMaps ? cldnn_lrn_norm_region_across_channel : cldnn_lrn_norm_region_within_channel);
2307 m_env.primitiveIDs[lrnLayerName] = lrnLayerName;
2308 m_topology->add(lrnPrim);
2309 m_env.profilingIDs.push_back(lrnLayerName);
2312 void CLDNNGraph::CreateActivationPrimitive(InferenceEngine::CNNLayerPtr &layer, const LayerType type) {
2313 ValidateLayer(layer, 1);
2314 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2315 cldnn_activation_additional_params params{ 0.0f, 0.0f };
2316 cldnn_activation_func func = cldnn_activation_func_t::activation_none;
2318 LayerType activationType;
2319 if (type == Activation) {
2320 std::string activation_type = layer->GetParamAsString("type");
2321 if (activation_type == "tanh") {
2322 activationType = TanH;
2323 } else if (activation_type == "sigmoid" || activation_type == "logistic") {
2324 activationType = Sigmoid;
2325 } else if (activation_type == "elu") {
2326 activationType = ELU;
2327 } else if (activation_type == "relu") {
2328 activationType = ReLU;
2329 } else if (activation_type == "relu6") {
2330 activationType = ReLU6;
2331 } else if (activation_type == "clamp") {
2332 activationType = Clamp;
2333 } else if (activation_type == "exp") {
2334 activationType = Exp;
2335 } else if (activation_type == "not") {
2336 activationType = Not;
2338 THROW_CLDNN_EXCEPTION("Unsupported activation type (" + activation_type +
2339 ") in layer " + layer->name);
2342 activationType = type;
2345 switch (activationType) {
2348 func = cldnn_activation_func_t::activation_hyperbolic_tan;
2353 func = cldnn_activation_func_t::activation_elu;
2354 params.a = layer->GetParamAsFloat("alpha", 1.0f);
2359 func = cldnn_activation_func_t::activation_logistic;
2364 func = cldnn_activation_func_t::activation_relu_negative_slope;
2365 params.a = layer->GetParamAsFloat("negative_slope", 0.0f);
2370 func = cldnn_activation_func_t::activation_clamp;
2371 params.b = layer->GetParamAsFloat("n", 6.0f);
2376 func = cldnn_activation_func_t::activation_clamp;
2377 params.a = layer->GetParamAsFloat("min");
2378 params.b = layer->GetParamAsFloat("max");
2383 func = cldnn_activation_func_t::activation_exp;
2388 func = cldnn_activation_func_t::activation_not;
2392 THROW_CLDNN_EXCEPTION("Unsupported activation type (" + layer->type +
2393 ") in layer " + layer->name);
2396 std::string layerName = layer_type_name_ID(layer);
2397 auto activationPrimitive = cldnn::activation(layerName, inputPrimitives[0], func, params);
2398 m_env.primitiveIDs[layerName] = layerName;
2399 m_topology->add(activationPrimitive);
2400 m_env.profilingIDs.push_back(layerName);
2403 void CLDNNGraph::CreateCopyPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2404 ValidateLayer(layer, 1);
2405 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2406 auto copyLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2408 // Optimize out and just update references
2409 std::string layerName = layer_type_name_ID(layer);
2410 m_env.primitiveIDs[layerName] = inputPrimitives[0];
2411 InitProfileInfo(layerName, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out
2414 void CLDNNGraph::CreateUpsamplingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2415 // Assuming multi-input will be handled by prev concat/eltwise layers
2416 ValidateLayer(layer, 1);
2417 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2418 auto upsamplingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2419 uint32_t scale = upsamplingLayer->GetParamAsUInt("scale");
2420 uint32_t numFilter = upsamplingLayer->GetParamAsUInt("num_filter");
2421 std::string sampleType = upsamplingLayer->GetParamAsString("sample_type");
2423 std::string upsamplingLayerName = layer_type_name_ID(layer);
2424 auto upsamplingPrim = cldnn::upsampling(
2425 upsamplingLayerName,
2429 UpsamplingTypeFromString(sampleType));
2431 m_env.primitiveIDs[upsamplingLayerName] = upsamplingLayerName;
2432 m_topology->add(upsamplingPrim);
2433 m_env.profilingIDs.push_back(upsamplingLayerName);
2436 void CLDNNGraph::CreateResamplePrimitive(InferenceEngine::CNNLayerPtr &layer) {
2437 ValidateLayer(layer, 1);
2438 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2439 auto resampleLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2441 auto outDims = layer->outData[0]->dims;
2442 size_t inFeatures = 1;
2443 unsigned int scale = 1;
2444 std::shared_ptr<Data> insData0 = layer->insData[0].lock();
2445 IE_ASSERT(insData0 != nullptr);
2446 if (insData0->dims.size() > 2) {
2447 inFeatures = insData0->dims[2];
2448 scale = outDims[0]/insData0->dims[0];
2450 THROW_CLDNN_EXCEPTION("Unsupported scale in layer " + layer->name);
2453 std::string sampleType = resampleLayer->GetParamAsString("type");
2455 if (sampleType != "caffe.ResampleParameter.NEAREST") {
2456 THROW_CLDNN_EXCEPTION("Unsupported resampling type (" + sampleType + ") in layer " + layer->name);
2459 std::string resampleLayerName = layer_type_name_ID(layer);
2460 auto upsamplingPrim = cldnn::upsampling(
2465 cldnn::upsampling_sample_type::nearest);
2467 m_env.primitiveIDs[resampleLayerName] = resampleLayerName;
2468 m_topology->add(upsamplingPrim);
2469 m_env.profilingIDs.push_back(resampleLayerName);
2472 void CLDNNGraph::CreateYOLO2RegionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2473 ValidateLayer(layer, 1);
2474 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2475 auto YOLOregionLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2477 uint32_t coords = YOLOregionLayer->GetParamAsUInt("coords", 4);
2478 uint32_t classes = YOLOregionLayer->GetParamAsUInt("classes", 20);
2479 uint32_t num = YOLOregionLayer->GetParamAsUInt("num", 1);
2480 bool do_softmax = YOLOregionLayer->GetParamsAsBool("do_softmax", true);
2482 uint32_t mask_size = 0;
2483 if (HasParam(YOLOregionLayer->params, "mask")) {
2484 const auto mask = YOLOregionLayer->GetParamAsInts("mask");
2485 mask_size = static_cast<uint32_t>(mask.size());
2488 std::string YOLOregionLayerName = layer_type_name_ID(layer);
2489 auto regionPrim = cldnn::region_yolo(
2490 YOLOregionLayerName,
2498 m_env.primitiveIDs[YOLOregionLayerName] = YOLOregionLayerName;
2499 m_topology->add(regionPrim);
2500 m_env.profilingIDs.push_back(YOLOregionLayerName);
2503 void CLDNNGraph::CreateYOLO2ReorgPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2504 ValidateLayer(layer, 1);
2505 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2506 auto YOLOreorgLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2507 uint32_t stride = YOLOreorgLayer->GetParamAsUInt("stride");
2509 std::string YOLOreorgLayerName = layer_type_name_ID(layer);
2510 auto reorgPrim = cldnn::reorg_yolo(
2515 m_env.primitiveIDs[YOLOreorgLayerName] = YOLOreorgLayerName;
2516 m_topology->add(reorgPrim);
2517 m_env.profilingIDs.push_back(YOLOreorgLayerName);
2520 void CLDNNGraph::CreateArgMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2521 ValidateLayer(layer, 1);
2522 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2523 auto ArgMaxLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2524 const cldnn::arg_max_min::out_type otype = cldnn::arg_max_min::out_type::max;
2526 if (HasParam(ArgMaxLayer->params, "out_max_val")) {
2527 int32_t out_max_val_flag = ArgMaxLayer->GetParamAsInt("out_max_val");
2528 if (out_max_val_flag != 0) {
2529 THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "ArgMax: out_max_val param is not supported for layer: " << layer->name;
2533 uint32_t top_k = ArgMaxLayer->GetParamAsUInt("top_k", 1);
2535 cldnn::arg_max_min::axis_name chosen_axis = cldnn::arg_max_min::axis_name::xyf;
2537 if (HasParam(ArgMaxLayer->params, "axis")) {
2538 int32_t axis_param = ArgMaxLayer->GetParamAsInt("axis", 1);
2540 int32_t axis = axis_param;
2541 if (-4 <= axis && axis <= -1)
2545 case 0: chosen_axis = cldnn::arg_max_min::axis_name::batch; break;
2546 case 1: chosen_axis = cldnn::arg_max_min::axis_name::feature; break;
2547 case 2: chosen_axis = cldnn::arg_max_min::axis_name::y; break;
2548 case 3: chosen_axis = cldnn::arg_max_min::axis_name::x; break;
2552 std::string ArgMaxLayerName = layer_type_name_ID(layer);
2553 auto argmaxPrim = cldnn::arg_max_min(
2560 m_env.primitiveIDs[ArgMaxLayerName] = ArgMaxLayerName;
2561 m_topology->add(argmaxPrim);
2562 m_env.profilingIDs.push_back(ArgMaxLayerName);
2565 void CLDNNGraph::CreateMaxUnpoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2566 ValidateLayer(layer, 2);
2568 auto UnpoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2570 cldnn::primitive_id real_input, argmax_mutable;
2572 // locate ArgMax primitive
2574 for (auto inputData : layer->insData) {
2575 auto prevData = inputData.lock();
2577 if (prevData == nullptr) {
2578 THROW_CLDNN_EXCEPTION("MaxUnpooling: nonexistent input for layer: " << layer->name);
2581 auto prevCreator = prevData->creatorLayer.lock();
2584 (LayerTypeFromStr(prevCreator->type) == Pooling) &&
2585 prevCreator->outData.size() > 1 &&
2587 argmax_mutable = m_env.primitiveIDs.at(prevCreator->name + "_argmax_mutable");
2589 real_input = m_env.primitiveIDs.at(prevData->name);
2594 uint32_t stride = UnpoolingLayer->GetParamAsUInt("stride");
2595 uint32_t kernel_size = UnpoolingLayer->GetParamAsUInt("kernel_size");
2597 std::string UnpoolingLayerName = layer_type_name_ID(layer);
2598 auto unpoolingPrim = cldnn::max_unpooling(
2602 cldnn::spatial(kernel_size, kernel_size), // size
2603 cldnn::spatial(stride, stride) ); // stride
2605 m_env.primitiveIDs[UnpoolingLayerName] = UnpoolingLayerName;
2606 m_topology->add(unpoolingPrim);
2607 m_env.profilingIDs.push_back(UnpoolingLayerName);
2610 void CLDNNGraph::CreateMVNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2611 ValidateLayer(layer, 1);
2612 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2613 auto MvnLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2615 bool across_channels = MvnLayer->GetParamsAsBool("across_channels", false);
2616 bool normalize_variance = MvnLayer->GetParamsAsBool("normalize_variance", true);
2617 float eps = MvnLayer->GetParamAsFloat("eps", 1e-10f);
2619 std::string MvnLayerName = layer_type_name_ID(layer);
2620 auto mvnPrim = cldnn::mvn(
2627 m_env.primitiveIDs[MvnLayerName] = MvnLayerName;
2628 m_topology->add(mvnPrim);
2629 m_env.profilingIDs.push_back(MvnLayerName);
2632 void CLDNNGraph::CreateTilePrimitive(InferenceEngine::CNNLayerPtr &layer) {
2633 ValidateLayer(layer, 1);
2634 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2635 auto tileLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2637 int axis = tileLayer->GetParamAsInt("axis", 1);
2638 int tiles = tileLayer->GetParamAsInt("tiles");
2640 auto cldnnAxisFromIE = [](int axis) {
2642 case 0: return cldnn::tile::tile_axis::along_b;
2643 case 1: return cldnn::tile::tile_axis::along_f;
2644 case 2: return cldnn::tile::tile_axis::along_y;
2645 case 3: return cldnn::tile::tile_axis::along_x;
2646 default: THROW_CLDNN_EXCEPTION("Unsupported tile axis: " << axis);
2649 std::string tileLayerName = layer_type_name_ID(layer);
2650 auto tilePrim = cldnn::tile(
2653 cldnnAxisFromIE(axis),
2656 m_env.primitiveIDs[tileLayerName] = tileLayerName;
2657 m_topology->add(tilePrim);
2658 m_env.profilingIDs.push_back(tileLayerName);
2661 void CLDNNGraph::CreatePadPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2662 ValidateLayer(layer, 1);
2663 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2664 auto padLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2666 auto PadTensorFromArgs = [](const std::string &s) -> cldnn::tensor {
2667 std::stringstream ss(s);
2669 std::vector<cldnn::tensor::value_type> elems;
2670 while (std::getline(ss, item, ',')) {
2671 elems.push_back(static_cast<cldnn::tensor::value_type>(std::atoll(item.c_str())));
2674 while (elems.size() < 4) {
2679 auto tmp = elems[2];
2680 elems[2] = elems[3];
2683 return cldnn::tensor(elems, 0);
2686 auto pads_begin = PadTensorFromArgs(padLayer->GetParamAsString("pads_begin"));
2687 auto pads_end = PadTensorFromArgs(padLayer->GetParamAsString("pads_end"));
2688 std::string mode = padLayer->GetParamAsString("pad_mode");
2689 float pad_value = padLayer->GetParamAsFloat("pad_value", 0.0f);
2691 cldnn::border_type border_mode;
2692 if (mode == "constant")
2693 border_mode = cldnn::border_type::constant;
2694 else if (mode == "edge")
2695 border_mode = cldnn::border_type::edge;
2696 else if (mode == "symmetric")
2697 border_mode = cldnn::border_type::mirror;
2698 else if (mode == "reflect")
2699 border_mode = cldnn::border_type::mirror_101;
2701 THROW_CLDNN_EXCEPTION("Invalid border mode " << mode << " in layer " << padLayer->name);
2703 std::string padLayerName = layer_type_name_ID(layer);
2704 auto tilePrim = cldnn::border(
2712 m_env.primitiveIDs[padLayerName] = padLayerName;
2713 m_topology->add(tilePrim);
2714 m_env.profilingIDs.push_back(padLayerName);
2717 std::string get_string_id(size_t i) {
2718 std::stringstream ss;
2719 ss << std::setw(5) << std::setfill('0') << i;
2723 void CLDNNGraph::CreateLSTMCellPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2724 int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size;
2725 SizeVector in_dims1, in_dims2;
2726 bool hasBias = false;
2727 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2729 auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(layer->precision));
2730 std::string layerName = layer_type_name_ID(layer);
2731 cldnn::primitive_id weightID = layerName + m_weightsTag;
2732 cldnn::primitive_id recurrentID = layerName + "_recurrent" + m_weightsTag;
2733 cldnn::primitive_id biasID = layerName + m_biasesTag;
2734 auto cellLayer = dynamic_cast<InferenceEngine::LSTMCell*> (layer.get());
2736 /* check incoming CNN layer and setup required variables */
2738 auto in_data0 = layer->insData[0].lock();
2740 THROW_IE_EXCEPTION << "Missing first input for LSTMCell layer " << layer->name;
2742 auto in_dims0 = in_data0->dims;
2743 auto out_dims0 = layer->outData[0]->dims;
2745 lstm_input_size = in_dims0[0];
2746 lstm_batch_size = in_dims0[1];
2747 lstm_hidden_size = out_dims0[0];
2749 /* do we have initial hidden and cell?
2750 if blobs are not null, direct the data from them
2751 into corresponding LSTM inputs */
2753 auto in_data1 = layer->insData[1].lock();
2755 THROW_IE_EXCEPTION << "Missing second input for LSTMCell layer " << layer->name;
2756 in_dims1 = in_data1->dims;
2759 auto in_data2 = layer->insData[2].lock();
2761 THROW_IE_EXCEPTION << "Missing third input for LSTMCell layer " << layer->name;
2762 in_dims2 = in_data2->dims;
2765 if (in_dims0.size() != 2 || in_dims1.size() != 2 || in_dims2.size() != 2)
2766 THROW_IE_EXCEPTION << "Wrong input shapes for LSTMCell Layer " << layer->name;
2769 /* Prepare weight/bias memory primitives - split weight blob into W and R */
2771 cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size));
2772 cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size));
2773 cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor);
2774 cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
2776 auto wmem = cldnn::memory::allocate(*(m_env.engine), WLayout);
2777 auto wtmpPointer = wmem.pointer<char>(); // implicitly maps buffer - unmap in destructor
2779 auto rmem = cldnn::memory::allocate(*(m_env.engine), RLayout);
2780 auto rtmpPointer = rmem.pointer<char>();
2782 auto wLayer = dynamic_cast<InferenceEngine::WeightableLayer *> (layer.get());
2783 auto pWeightsBlob = wLayer->_weights;
2784 auto blobBytes = static_cast<const char *>(pWeightsBlob->buffer());
2785 const size_t WchunkSz = lstm_input_size * elementSize;
2786 const size_t RchunkSz = lstm_hidden_size * elementSize;
2788 auto wBytes = wtmpPointer.data();
2789 auto rBytes = rtmpPointer.data();
2791 for (int h = 0; h < 4 * lstm_hidden_size; h++) {
2792 // copy "input size" elements to W
2793 for (size_t b = 0; b < WchunkSz; b++)
2794 *wBytes++ = *blobBytes++;
2796 // copy "lstm_hidden_size" elements to R
2797 for (size_t b = 0; b < RchunkSz; b++)
2798 *rBytes++ = *blobBytes++;
2801 m_topology->add(cldnn::data(weightID, wmem));
2802 m_topology->add(cldnn::data(recurrentID, rmem));
2804 /* create bias memory primitive */
2805 auto pBiasBlob = wLayer->_biases;
2806 if (pBiasBlob != nullptr) {
2807 cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1));
2808 cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
2810 auto bmem = cldnn::memory::allocate(*(m_env.engine), BLayout);
2811 auto btmpPointer = bmem.pointer<char>();
2813 auto blobBytes = static_cast<const char *>(pBiasBlob->buffer());
2814 const size_t BchunkSz = lstm_hidden_size * elementSize;
2815 auto bBytes = btmpPointer.data();
2817 for (size_t b = 0; b < 4 * BchunkSz; b++)
2818 *bBytes++ = *blobBytes++;
2820 m_topology->add(cldnn::data(biasID, bmem));
2825 cldnn::primitive_id inReshapeID = layerName + "_inReshape";
2826 cldnn::primitive_id permuteID = layerName + "_inputReorder";
2827 cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
2829 cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 };
2830 cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
2831 cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape);
2832 m_topology->add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
2833 m_topology->add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
2835 std::string hiddenInStr = inHiddenReshapeID + "_1";
2836 std::string cellInStr = inHiddenReshapeID + "_2";
2837 m_topology->add(cldnn::reshape(hiddenInStr, inputPrimitives[1], hiddenStateShape));
2838 m_topology->add(cldnn::reshape(cellInStr, inputPrimitives[2], hiddenStateShape));
2840 cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
2841 cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
2843 std::string lstm_gemm_id = layerName + "_lstm_gemm";
2844 std::string lstm_elt_id = layerName + "_lstm_elt";
2845 std::string crop_id = layerName + "_crop";
2847 m_topology->add(cldnn::lstm_gemm(lstm_gemm_id, permuteID,
2848 weightID, recurrentID,
2849 hasBias ? biasID : "",
2851 m_topology->add(cldnn::lstm_elt(lstm_elt_id, lstm_gemm_id, cellInStr,
2852 0, 0, {}, {}, cldnn_lstm_offset_order_fizo));
2854 cldnn::primitive_id outputHiddenID = layerName;
2855 m_topology->add(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
2856 cldnn::primitive_id outputCellID = layer->type + ":" + layer->outData[1]->name;
2857 m_topology->add(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz));
2859 // output primitive IDs
2860 m_env.primitiveIDs[outputHiddenID] = outputHiddenID; // LSTMCell:LSTMCell - "concat hidden"
2861 m_env.primitiveIDs[layer->type + ":" + layer->outData[0]->name] = outputHiddenID; // LSTMCell:LSTMCell:0 - hidden state
2862 m_env.primitiveIDs[outputCellID] = outputCellID; // LSTMCell:LSTMCell:1 - cell state
2864 m_env.profilingIDs.push_back(layerName);
2867 void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2868 int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size;
2869 SizeVector in_dims1, in_dims2;
2870 bool hasInitialHidden = false, hasInitialCell = false, hasBias = false, isForward = true;
2871 auto inputPrimitives = GetPrevLayersPrimitives(layer);
2873 auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(layer->precision));
2874 std::string layerName = layer_type_name_ID(layer);
2875 cldnn::primitive_id weightID = layerName + m_weightsTag;
2876 cldnn::primitive_id recurrentID = layerName + "_recurrent" + m_weightsTag;
2877 cldnn::primitive_id biasID = layerName + m_biasesTag;
2878 auto rnnLayer = dynamic_cast<InferenceEngine::RNNSequenceLayer*> (layer.get());
2879 bool permute_input = (1 != rnnLayer->axis);
2881 /* check incoming CNN layer and setup required variables */
2883 if (rnnLayer->cellType != RNNSequenceLayer::LSTM)
2884 THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell";
2886 auto in_data0 = layer->insData[0].lock();
2888 THROW_IE_EXCEPTION << "Missing first input for RNN layer " << layer->name;
2890 auto in_dims0 = in_data0->dims;
2891 auto out_dims0 = layer->outData[0]->dims;
2893 if (!permute_input) {
2894 lstm_batch_size = in_dims0[2];
2895 lstm_sequence_len = in_dims0[1];
2897 lstm_batch_size = in_dims0[1];
2898 lstm_sequence_len = in_dims0[2];
2901 lstm_input_size = in_dims0[0];
2902 lstm_hidden_size = out_dims0[0];
2904 /* do we have initial hidden and cell?
2905 if blobs are not null, direct the data from them
2906 into corresponding LSTM inputs */
2908 auto in_data1 = layer->insData[1].lock();
2910 in_dims1 = in_data1->dims;
2911 hasInitialHidden = true;
2914 auto in_data2 = layer->insData[2].lock();
2916 in_dims2 = in_data2->dims;
2917 hasInitialCell = true;
2920 if (rnnLayer->direction != RNNSequenceLayer::FWD && rnnLayer->direction != RNNSequenceLayer::BWD)
2921 THROW_IE_EXCEPTION << "Support only forward and backward direction for RNN Layer " << layer->name;
2922 isForward = rnnLayer->direction == RNNSequenceLayer::FWD;
2924 if (in_dims0.size() != 3 || in_dims1.size() != 2 || in_dims2.size() != 2)
2925 THROW_IE_EXCEPTION << "Wrong input shapes for RNN Layer " << layer->name;
2928 /* Prepare weight/bias memory primitives - split weight blob into W and R */
2930 cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size));
2931 cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size));
2932 cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor);
2933 cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
2935 auto wmem = cldnn::memory::allocate(*(m_env.engine), WLayout);
2936 auto wtmpPointer = wmem.pointer<char>(); // implicitly maps buffer - unmap in destructor
2938 auto rmem = cldnn::memory::allocate(*(m_env.engine), RLayout);
2939 auto rtmpPointer = rmem.pointer<char>();
2941 auto wLayer = dynamic_cast<InferenceEngine::WeightableLayer *> (layer.get());
2942 auto pWeightsBlob = wLayer->_weights;
2943 auto blobBytes = static_cast<const char *>(pWeightsBlob->buffer());
2944 const size_t WchunkSz = lstm_input_size * elementSize;
2945 const size_t RchunkSz = lstm_hidden_size * elementSize;
2947 auto wBytes = wtmpPointer.data();
2948 auto rBytes = rtmpPointer.data();
2950 for (int h = 0; h < 4 * lstm_hidden_size; h++) {
2951 // copy "input size" elements to W
2952 for (size_t b = 0; b < WchunkSz; b++)
2953 *wBytes++ = *blobBytes++;
2955 // copy "lstm_hidden_size" elements to R
2956 for (size_t b = 0; b < RchunkSz; b++)
2957 *rBytes++ = *blobBytes++;
2960 m_topology->add(cldnn::data(weightID, wmem));
2961 m_topology->add(cldnn::data(recurrentID, rmem));
2963 /* create bias memory primitive */
2964 auto pBiasBlob = wLayer->_biases;
2965 if (pBiasBlob != nullptr) {
2966 cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1));
2967 cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
2969 auto bmem = cldnn::memory::allocate(*(m_env.engine), BLayout);
2970 auto btmpPointer = bmem.pointer<char>();
2972 auto blobBytes = static_cast<const char *>(pBiasBlob->buffer());
2973 const size_t BchunkSz = lstm_hidden_size * elementSize;
2974 auto bBytes = btmpPointer.data();
2976 for (size_t b = 0; b < 4 * BchunkSz; b++)
2977 *bBytes++ = *blobBytes++;
2979 m_topology->add(cldnn::data(biasID, bmem));
2984 std::vector<std::pair<cldnn::primitive_id, cldnn::tensor>> input_ids_offsets;
2985 std::vector<cldnn::primitive_id> output_ids_offsets;
2987 cldnn::primitive_id inReshapeID = layerName + "_inReshape";
2988 cldnn::primitive_id permuteID = layerName + "_inputReorder";
2989 cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
2991 cldnn::tensor inputShape;
2993 if (permute_input) {
2994 inputShape = { lstm_sequence_len, lstm_batch_size, lstm_input_size, 1 };
2996 inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 };
2998 cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
2999 cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape);
3000 m_topology->add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
3001 m_topology->add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
3003 m_topology->add(cldnn::reshape(inHiddenReshapeID+"_1", inputPrimitives[1], hiddenStateShape));
3004 m_topology->add(cldnn::reshape(inHiddenReshapeID+"_2", inputPrimitives[2], hiddenStateShape));
3006 for (int i = 0; i < lstm_sequence_len; ++i)
3007 input_ids_offsets.push_back({ get_string_id(i), {0, i, 0, 0} });
3009 cldnn::primitive_id inputSplitID = layerName + "_inputSplit";
3011 if (permute_input) {
3012 m_topology->add(cldnn::permute(layerName + "_inputSwap", permuteID, { 1, 0, 2, 3 }));
3013 m_topology->add(cldnn::split(inputSplitID, layerName + "_inputSwap", input_ids_offsets));
3015 m_topology->add(cldnn::split(inputSplitID, permuteID, input_ids_offsets));
3018 cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
3019 cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
3020 std::string hiddenStr = hasInitialHidden ? inHiddenReshapeID+"_1" : "";
3021 std::string cellStr = hasInitialCell ? inHiddenReshapeID+"_2" : "";
3023 for (int i = 0; i < lstm_sequence_len; ++i) {
3024 std::string lstm_gemm_id = layerName + "_lstm_gemm" + get_string_id(i);
3025 std::string lstm_elt_id = layerName + "_lstm_elt" + get_string_id(i);
3026 std::string crop_id = layerName + "_crop" + get_string_id(i);
3028 int seqIdx = isForward ? i : lstm_sequence_len - 1 - i;
3029 m_topology->add(cldnn::lstm_gemm(lstm_gemm_id, inputSplitID + ":" + get_string_id(seqIdx),
3030 weightID, recurrentID,
3031 hasBias ? biasID : "",
3033 m_topology->add(cldnn::lstm_elt(lstm_elt_id, lstm_gemm_id,
3034 cellStr, 0, 0, {}, {},
3035 cldnn_lstm_offset_order_fizo));
3037 hiddenStr = crop_id + ":hidden";
3038 cellStr = crop_id + ":cell";
3039 m_topology->add(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
3040 output_ids_offsets.push_back(hiddenStr);
3042 if (i < lstm_sequence_len - 1) {
3043 m_topology->add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
3045 // last hidden state crop (output 2)
3046 if (layer->outData.size() > 1) {
3047 cldnn::primitive_id outputHiddenID = layer->type + ":" + layer->outData[1]->name;
3048 m_env.primitiveIDs[hiddenStr] = hiddenStr;
3049 m_env.primitiveIDs[outputHiddenID] = hiddenStr;
3052 // last cell state crop (output 3)
3053 if (layer->outData.size() > 2) {
3054 m_topology->add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
3055 cldnn::primitive_id outputCellID = layer->type + ":" + layer->outData[2]->name;
3056 m_env.primitiveIDs[cellStr] = cellStr;
3057 m_env.primitiveIDs[outputCellID] = cellStr;
3062 if (!isForward) std::reverse(output_ids_offsets.begin(), output_ids_offsets.end());
3064 if (permute_input) {
3065 m_topology->add(cldnn::concatenation(layerName + "_outputConcat", output_ids_offsets, cldnn::concatenation::along_f));
3066 m_topology->add(cldnn::permute(layerName, layerName + "_outputConcat", { 1, 0, 2, 3 }));
3068 m_topology->add(cldnn::concatenation(layerName, output_ids_offsets, cldnn::concatenation::along_f));
3071 m_env.primitiveIDs[layerName] = layerName;
3072 m_env.primitiveIDs[layer->type + ":" + layer->outData[0]->name] = layerName;
3073 m_env.profilingIDs.push_back(layerName);
3076 void CLDNNGraph::AddConstantBlobInput(InferenceEngine::CNNLayerPtr &layer) {
3077 auto constBlob = layer->blobs.begin()->second;
3078 auto constDims = layer->outData[0]->dims;
3080 cldnn::tensor constTensor;
3081 switch (constDims.size()) {
3082 case 4: constTensor = cldnn::tensor(TensorValue(constDims[3]), TensorValue(constDims[2]),
3083 TensorValue(constDims[0]), TensorValue(constDims[1]));
3085 case 3: constTensor = cldnn::tensor(TensorValue(constDims[2]), TensorValue(constDims[1]),
3086 1, TensorValue(constDims[0]));
3088 case 2: constTensor = cldnn::tensor(TensorValue(constDims[1]), TensorValue(constDims[0]), 1, 1);
3090 case 1: constTensor = cldnn::tensor(TensorValue(constDims[0]), 1, 1, 1);
3092 default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions");
3095 cldnn::layout constLayout = cldnn::layout(
3096 DataTypeFromPrecision(layer->blobs.begin()->second->precision()),
3100 size_t bytes = constLayout.bytes_count();
3101 cldnn::primitive_id constPrimID = layer_type_name_ID(layer);
3103 CreatePrimitiveFromBlob(constPrimID, constBlob, constLayout);
3104 m_env.primitiveIDs[constPrimID] = constPrimID;
3107 void CLDNNGraph::CreateConvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
3108 ValidateLayer(layer, 1);
3109 auto inputPrimitives = GetPrevLayersPrimitives(layer);
3110 auto convLayer = dynamic_cast<InferenceEngine::ConvolutionLayer *> (layer.get());
3112 std::vector<cldnn::primitive_id> weightPrimID;
3113 std::vector<cldnn::primitive_id> biasPrimID;
3114 CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID);
3116 cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
3117 cldnn::spatial(convLayer->_stride[X_AXIS], convLayer->_stride[Y_AXIS]));
3118 auto allPad = getPaddings(*convLayer);
3119 cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
3120 cldnn::spatial(-allPad.begin[X_AXIS], -allPad.begin[Y_AXIS]));
3121 cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
3122 cldnn::spatial(convLayer->_dilation[X_AXIS], convLayer->_dilation[Y_AXIS]));
3124 std::string convLayerName = layer_type_name_ID(layer);
3125 if (convLayer->_group >= 16) {
3126 auto convPrim = cldnn::convolution(convLayerName,
3136 CldnnTensorFromIEDims(convLayer->outData[0]->dims));
3137 m_topology->add(convPrim);
3139 auto convPrim = cldnn::convolution(convLayerName,
3148 CldnnTensorFromIEDims(convLayer->outData[0]->dims));
3149 m_topology->add(convPrim);
3151 m_env.primitiveIDs[convLayerName] = convLayerName;
3152 m_env.profilingIDs.push_back(convLayerName);
3155 void CLDNNGraph::CreateGatherPrimitive(InferenceEngine::CNNLayerPtr &layer) {
3156 ValidateLayer(layer, 2);
3158 auto inputPrimitives = GetPrevLayersPrimitives(layer);
3159 auto gatherLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
3161 int axis = gatherLayer->GetParamAsInt("axis", 0);
3163 // Be careful, TensorFlow consist negative axis interpretation bug. Here: -3 = b, -2 = f, -1 = y, but must be -3 = f, -2 = y, -1 = x
3164 auto cldnnAxisFromIE = [](int axis) {
3166 case 0: return cldnn::gather::gather_axis::along_b;
3167 case 1: return cldnn::gather::gather_axis::along_f;
3168 case 2: return cldnn::gather::gather_axis::along_y;
3169 case 3: return cldnn::gather::gather_axis::along_x;
3170 case -1: return cldnn::gather::gather_axis::along_y;
3171 case -2: return cldnn::gather::gather_axis::along_f;
3172 case -3: return cldnn::gather::gather_axis::along_b;
3173 default: THROW_CLDNN_EXCEPTION("Unsupported gather axis: " << axis);
3177 std::string gatherLayerName = layer_type_name_ID(layer);
3178 auto gatherPrim = cldnn::gather(
3182 cldnnAxisFromIE(axis),
3183 CldnnTensorFromIEDims(gatherLayer->outData[0]->dims));
3185 m_env.primitiveIDs[gatherLayerName] = gatherLayerName;
3186 m_topology->add(gatherPrim);
3187 m_env.profilingIDs.push_back(gatherLayerName);
3190 void CLDNNGraph::CreateDepthToSpacePrimitive(InferenceEngine::CNNLayerPtr &layer) {
3191 ValidateLayer(layer, 1);
3193 auto inputPrimitives = GetPrevLayersPrimitives(layer);
3194 auto depthToSpace = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
3196 size_t blockSize = depthToSpace->GetParamAsInt("block_size", 2);
3198 if (depthToSpace->input().get()->dims.size() != 4)
3199 THROW_CLDNN_EXCEPTION("Unsupported size of tensor " << depthToSpace->input().get()->dims.size());
3201 size_t blockSizeSquare = blockSize * blockSize;
3203 if (depthToSpace->input().get()->dims[2] % blockSizeSquare != 0)
3204 THROW_CLDNN_EXCEPTION("The depth of the input tensor must be divisible by squared block size = " << blockSizeSquare);
3206 std::string depthToSpaceName = layer_type_name_ID(layer);
3207 auto depthToSpacePrim = cldnn::depth_to_space(
3212 m_env.primitiveIDs[depthToSpaceName] = depthToSpaceName;
3213 m_topology->add(depthToSpacePrim);
3214 m_env.profilingIDs.push_back(depthToSpaceName);
3217 void CLDNNGraph::CreateShuffleChannelsPrimitive(InferenceEngine::CNNLayerPtr &layer) {
3218 ValidateLayer(layer, 1);
3220 auto inputPrimitives = GetPrevLayersPrimitives(layer);
3221 auto shuffleChannels = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
3222 const int32_t numberOfDims = shuffleChannels->input()->getDims().size();
3224 int32_t group = shuffleChannels->GetParamAsInt("group", 1);
3225 int32_t axis = shuffleChannels->GetParamAsInt("axis", 1);
3228 axis += numberOfDims;
3230 if (axis < 0 || axis >= numberOfDims)
3231 THROW_CLDNN_EXCEPTION("Incorrect axis value! Actual axis is" + std::to_string(group));
3234 THROW_CLDNN_EXCEPTION("Invalid group size value (should equal at least one). Actual block size is" +
3235 std::to_string(group));
3237 if (shuffleChannels->input().get()->getDims()[axis] % group != 0)
3238 THROW_CLDNN_EXCEPTION("Group parameter must evenly divide the channel dimension. Actual group size is " +
3239 std::to_string(axis));
3241 std::string shuffleChannelsName = layer_type_name_ID(layer);
3242 auto shuffleChannelsPrim = cldnn::shuffle_channels(
3243 shuffleChannelsName,
3248 m_env.primitiveIDs[shuffleChannelsName] = shuffleChannelsName;
3249 m_topology->add(shuffleChannelsPrim);
3250 m_env.profilingIDs.push_back(shuffleChannelsName);
3253 void CLDNNGraph::CreateStridedSlicePrimitive(InferenceEngine::CNNLayerPtr &layer) {
3254 auto inputPrimitives = GetPrevLayersPrimitives(layer);
3255 auto stridedSliceLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
3257 auto tmp = stridedSliceLayer->GetParamAsUInts("end_mask");
3258 std::vector<uint8_t> end_mask(tmp.begin(), tmp.end());
3259 tmp = stridedSliceLayer->GetParamAsUInts("begin_mask");
3260 std::vector<uint8_t> begin_mask(tmp.begin(), tmp.end());
3261 tmp = stridedSliceLayer->GetParamAsUInts("new_axis_mask");
3262 std::vector<uint8_t> new_axis_mask(tmp.begin(), tmp.end());
3263 tmp = stridedSliceLayer->GetParamAsUInts("shrink_axis_mask");
3264 std::vector<uint8_t> shrink_axis_mask(tmp.begin(), tmp.end());
3266 std::string stridedSliceLayerName = layer_type_name_ID(layer);
3267 auto stridedSlicePrim = cldnn::strided_slice(
3268 stridedSliceLayerName,
3269 inputPrimitives[0], inputPrimitives[1], inputPrimitives[2], inputPrimitives[3],
3270 begin_mask, end_mask, new_axis_mask, shrink_axis_mask);
3272 m_env.primitiveIDs[stridedSliceLayerName] = stridedSliceLayerName;
3273 m_topology->add(stridedSlicePrim);
3274 m_env.profilingIDs.push_back(stridedSliceLayerName);
3277 void CLDNNGraph::CreateReverseSequencePrimitive(InferenceEngine::CNNLayerPtr &layer) {
3278 ValidateLayer(layer, 2);
3280 auto inputPrimitives = GetPrevLayersPrimitives(layer);
3281 auto reverseSequence = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
3282 const int32_t numberOfDims = reverseSequence->input()->getDims().size();
3284 const auto input = reverseSequence->insData[0].lock()->getDims();
3285 const auto sequence_lengths = reverseSequence->insData[1].lock()->getDims();
3287 int32_t batch_axis = reverseSequence->GetParamAsInt("batch_axis", 0);
3288 int32_t seq_axis = reverseSequence->GetParamAsInt("seq_axis", 1);
3291 batch_axis += input.size();
3294 seq_axis += input.size();
3296 if (batch_axis == seq_axis)
3297 THROW_CLDNN_EXCEPTION("Batch axis and sequence axis should not be equal\n");
3299 if (seq_axis < 0 || seq_axis >= input.size())
3300 THROW_CLDNN_EXCEPTION("Incorrect Sequence axis value! Actual axis is " + std::to_string(seq_axis));
3302 if (batch_axis < 0 || batch_axis >= input.size())
3303 THROW_CLDNN_EXCEPTION("Incorrect Sequence axis value! Actual axis is " + std::to_string(batch_axis));
3305 if (sequence_lengths[0] != input[batch_axis])
3306 THROW_CLDNN_EXCEPTION("Sequence lengths must be a vector of length " + std::to_string(input[batch_axis])
3307 + "! Actual axis is " + std::to_string(sequence_lengths[0]));
3309 std::string reverseSequenceLayerName = layer_type_name_ID(layer);
3310 auto reverseSequencePrim = cldnn::reverse_sequence(
3311 reverseSequenceLayerName,
3317 m_env.primitiveIDs[reverseSequenceLayerName] = reverseSequenceLayerName;
3318 m_topology->add(reverseSequencePrim);
3319 m_env.profilingIDs.push_back(reverseSequence->name);
3322 bool CLDNNGraph::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const {
3323 if (splitLayer->outData.size() != 2) return false; // split into 2
3325 for (auto out : splitLayer->outData) {
3326 if (out->getInputTo().size() != 1) {
3332 dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[0]).get());
3334 dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[1]).get());
3335 if (!convLayer1 || !convLayer2) { // outputs aren't convolutions
3338 auto allPad1 = getPaddings(*convLayer1);
3339 auto allPad2 = getPaddings(*convLayer2);
3340 if (convLayer1->precision != convLayer2->precision // wrong precision
3341 || convLayer1->_fusedWith || convLayer2->_fusedWith // convolutions are fused
3342 || convLayer1->outData.size() != 1 || convLayer2->outData.size() != 1 // more than 1 output for convolutions
3343 || allPad1.begin[X_AXIS] != allPad2.begin[X_AXIS] // different padding
3344 || allPad1.begin[Y_AXIS] != allPad2.begin[Y_AXIS] // different padding
3345 || convLayer1->_stride[X_AXIS] != convLayer2->_stride[X_AXIS] // different strides
3346 || convLayer1->_stride[Y_AXIS] != convLayer2->_stride[Y_AXIS] // different strides
3347 || convLayer1->_dilation[X_AXIS] != convLayer2->_dilation[X_AXIS] // different dilation
3348 || convLayer1->_dilation[Y_AXIS] != convLayer2->_dilation[Y_AXIS] // different dilation
3349 || (GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0])) // no merge after convolutions
3350 != GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[1])))
3351 || (p_currentOutputs->find(convLayer1->name) != p_currentOutputs->end())
3352 || (p_currentOutputs->find(convLayer2->name) != p_currentOutputs->end())) {
3356 dynamic_cast<InferenceEngine::ConcatLayer *> (
3357 GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0])).get());
3358 if (!concatLayer || // not a merge layer
3359 concatLayer->_axis != 1 || // merge on unsupported axis
3360 concatLayer->outData.size() != 1) { // too many outputs
3363 if (m_config.customLayers.find(convLayer1->type) != m_config.customLayers.end() ||
3364 m_config.customLayers.find(concatLayer->type) != m_config.customLayers.end()) {
3365 return false; // convolution or concat were overwritten by a custom layer
3371 void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo, Precision inputPrecision) {
3372 // first create and add the input layout
3373 auto inputDims = inputInfo->getDims();
3374 InferenceEngine::Layout l = inputInfo->getTensorDesc().getLayout();
3375 auto consumers = inputInfo->getInputData()->getInputTo();
3376 bool single_consumer = consumers.size() == 1;
3377 CLDNNGraph::LayerType consumerType = LayerTypeFromStr(consumers.begin()->second->type);
3379 cldnn::tensor dataTensor;
3380 cldnn::tensor::value_type batch = (m_env.m_max_batch <= 1)
3381 ? (inputDims.size() == 4 ? TensorValue(inputDims[3]) : 1)
3382 : TensorValue(m_curBatch);
3383 switch (inputDims.size()) {
3385 if (InferenceEngine::Layout::NCHW == l || InferenceEngine::Layout::CHW == l) {
3386 dataTensor = cldnn::tensor(batch,
3387 TensorValue(inputDims[2]), TensorValue(inputDims[0]),
3388 TensorValue(inputDims[1]));
3389 } else if (InferenceEngine::Layout::NHWC == l) {
3390 dataTensor = cldnn::tensor(batch,
3391 TensorValue(inputDims[2]), TensorValue(inputDims[0]),
3392 TensorValue(inputDims[1]));
3394 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 4D input " + inputInfo->name());
3398 if (InferenceEngine::Layout::CHW == l) {
3399 dataTensor = cldnn::tensor(TensorValue(inputDims[2]), TensorValue(inputDims[1]), 1, TensorValue(inputDims[0]));
3401 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 3D input " + inputInfo->name());
3405 if (InferenceEngine::Layout::NCHW == l) {
3406 dataTensor = cldnn::tensor(1, 1, TensorValue(inputDims[1]), TensorValue(inputDims[0]));
3407 } else if (InferenceEngine::NC == l) {
3408 dataTensor = cldnn::tensor(TensorValue(inputDims[1]), TensorValue(inputDims[0]), 1, 1);
3410 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 2D input " + inputInfo->name());
3414 dataTensor = cldnn::tensor(TensorValue(inputDims[0]), 1, 1, 1);
3416 default: THROW_CLDNN_EXCEPTION("Invalid data dimensions");
3419 cldnn::layout inputLayout(DataTypeFromPrecision(inputInfo->getInputPrecision()),
3420 FormatFromLayout(l),
3423 // save the input dims
3424 m_env.inputLayouts.insert({ inputInfo->name(), inputLayout });
3426 auto inputName = "Input:" + inputInfo->name();
3427 m_topology->add(cldnn::input_layout(inputName, inputLayout));
3429 // create preprocess primitive for this input
3430 auto preProcess = inputInfo->getPreProcess();
3432 size_t meanChannels = preProcess.getNumberOfChannels();
3433 inputLayout.format = m_defaultFormat;
3434 inputLayout.size = inputLayout.size.transform(m_defaultFormat, 1);
3435 inputLayout.data_type = DataTypeFromPrecision(inputPrecision);
3436 auto preprocessPrimID = inputName + m_preProcessTag;
3438 if ((meanChannels > 0) &&
3439 (meanChannels != inputLayout.size.feature[0])) {
3440 THROW_CLDNN_EXCEPTION("Mismatched mean values channels in input " + inputName);
3443 switch (preProcess.getMeanVariant()) {
3446 std::vector<float> meanValues;
3447 if (meanChannels > 0) {
3448 for (size_t c = 0; c < meanChannels; c++) {
3449 if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
3450 THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName);
3451 meanValues.push_back(preProcess[c]->meanValue);
3454 m_topology->add(cldnn::reorder(preprocessPrimID, inputName, inputLayout, meanValues));
3455 m_env.profilingIDs.push_back(preprocessPrimID);
3456 InitProfileInfo(preprocessPrimID, "Reorder");
3461 IE_ASSERT(meanChannels);
3462 // first merge all mean values to a single blob
3463 // todo make sure mean blob precision is the same as the input precision
3464 auto meanDims = inputInfo->getDims();
3465 // overwrite batches with 1
3466 switch (meanDims.size()) {
3467 case 4: meanDims[3] = 1;
3470 THROW_CLDNN_EXCEPTION("Missing batch dimensions in input image");
3472 InferenceEngine::TBlob<float> meanBlob(Precision(Precision::FP32), TensorDesc::getLayoutByDims(meanDims), meanDims);
3473 meanBlob.allocate();
3474 auto meanBlobData = meanBlob.data();
3475 for (size_t c = 0; c < meanChannels; c++) {
3476 if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
3477 THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName);
3478 auto channelMeanBlob = std::dynamic_pointer_cast<TBlob<float>>(preProcess[c]->meanData);
3479 auto channelSize = channelMeanBlob->size();
3480 auto channelBlobData = channelMeanBlob->data();
3481 for (size_t i = 0; i < channelSize; i++) {
3482 meanBlobData[(c * channelSize) + i] = channelBlobData[i];
3485 // then create a data primitive for the mean values
3486 auto meanBlobPtr = std::make_shared<InferenceEngine::TBlob<float>>(meanBlob);
3488 // mean values will use external format (sub in the input format before convert to new format)
3489 cldnn::tensor meanBlobTensor(inputLayout.size);
3490 meanBlobTensor.batch[0] = 1; // mean values have no batches
3491 cldnn::layout meanBlobLayout(cldnn::data_types::f32, m_defaultFormat, meanBlobTensor);
3492 CreatePrimitiveFromBlob(
3493 inputName + m_meanValuesTag,
3496 m_topology->add(cldnn::reorder(preprocessPrimID,
3499 inputName + m_meanValuesTag));
3500 m_env.profilingIDs.push_back(preprocessPrimID);
3501 InitProfileInfo(preprocessPrimID, "Reorder");
3505 default: THROW_CLDNN_EXCEPTION("Invalid mean variant in input " + inputName);
3508 m_env.primitiveIDs[inputName] = preprocessPrimID;
3509 m_env.primitiveIDs[preprocessPrimID] = preprocessPrimID;
3512 std::vector<cldnn::primitive_id> CLDNNGraph::GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const {
3513 if (layer == nullptr) {
3516 std::vector<cldnn::primitive_id> inputPrimitives;
3517 for (auto inputData : layer->insData) {
3518 auto prevData = inputData.lock();
3519 if (prevData == nullptr) {
3520 THROW_CLDNN_EXCEPTION("Nonexistent input for layer: " << layer->name);
3522 auto prevCreator = prevData->creatorLayer.lock();
3523 std::string prevName;
3526 prevName = prevCreator->type + ":";
3527 if (prevCreator->outData.size() > 1)
3528 prevName += prevData->name;
3530 prevName += prevCreator->name;
3532 prevName = prevData->name;
3534 inputPrimitives.push_back(m_env.primitiveIDs.at(prevName));
3536 return inputPrimitives;
3539 void CLDNNGraph::AddOutputPrimitive(std::string outputName, const InferenceEngine::DataPtr outputData, Precision outputPrecision) {
3540 // TODO: add precision check once there's an outputInfo object
3541 if (outputData->layout != InferenceEngine::NCHW &&
3542 outputData->layout != InferenceEngine::NHWC &&
3543 outputData->layout != InferenceEngine::CHW &&
3544 outputData->layout != InferenceEngine::NC) {
3545 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(outputData->layout) << ") in output: " << outputName);
3548 auto outputCreator = outputData->getCreatorLayer().lock();
3549 std::string outLayerName = outputCreator->type + ":";
3551 if (outputCreator->outData.size() > 1)
3552 outLayerName += outputName;
3554 outLayerName += outputCreator->name;
3556 auto outputReorderID = outputName + m_postProcessTag;
3557 Precision precision = outputPrecision == Precision::UNSPECIFIED ? outputData->getPrecision() : outputPrecision;
3559 // Find correct output ID. Start with name stored in IR.
3560 std::string outputID = outLayerName;
3561 std::string finalID = m_env.primitiveIDs.at(outLayerName);
3563 while (outputID != finalID) {
3564 auto prim = m_env.primitiveIDs.find(finalID);
3566 if (prim == m_env.primitiveIDs.end()) {
3567 THROW_IE_EXCEPTION << "Unknown output primitive id " << outputID;
3570 finalID = prim->second;
3573 m_topology->add(cldnn::reorder(outputReorderID, outputID,
3574 FormatFromLayout(outputData->getLayout()),
3575 DataTypeFromPrecision(precision)));
3576 m_env.primitiveIDs[outputName] = outputReorderID;
3577 m_env.profilingIDs.push_back(outputReorderID);
3578 InitProfileInfo(outputReorderID, "Reorder");
3579 m_env.outputDims[outputName] = outputData->dims;
3580 m_env.prevPrimitiveIDs[outputReorderID] = {outputName};
3583 void CLDNNGraph::AddSingleValuePrimitive(cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value) {
3584 cldnn::layout primLayout(dataType, m_defaultFormat, { 1, 1, 1, 1 });
3585 auto primMem = cldnn::memory::allocate(*(m_env.engine), primLayout);
3587 case cldnn::data_types::f32:
3589 auto tmpPointer = primMem.pointer<float>(); // implicitly maps buffer - unmap in destructor
3590 tmpPointer[0] = value;
3593 case cldnn::data_types::f16:
3595 auto tmpPointer = primMem.pointer<uint16_t>(); // implicitly maps buffer - unmap in destructor
3596 cldnn_status status = CLDNN_SUCCESS;
3597 tmpPointer[0] = cldnn_float_to_half(value, &status);
3598 if (status != CLDNN_SUCCESS) {
3599 THROW_CLDNN_EXCEPTION("Error converting value to fp16.");
3604 THROW_CLDNN_EXCEPTION("Unhandled data type (precision)");
3607 m_topology->add(cldnn::data(valPrimID, primMem));
3610 cldnn::data_types CLDNNGraph::DataTypeFromPrecision(InferenceEngine::Precision p) {
3612 case Precision::I16:
3613 case Precision::FP32:
3614 return cldnn::data_types::f32;
3615 case Precision::FP16:
3616 return cldnn::data_types::f16;
3618 return cldnn::data_types::u8;
3619 case Precision::I32:
3620 return cldnn::data_types::i32;
3622 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << p.name() << " precision";
3627 cldnn::format CLDNNGraph::FormatFromLayout(InferenceEngine::Layout l) {
3629 case InferenceEngine::Layout::NCHW:
3630 case InferenceEngine::Layout::NC:
3631 case InferenceEngine::Layout::CHW:
3632 case InferenceEngine::Layout::C:
3633 return cldnn::format::bfyx;
3634 case InferenceEngine::Layout::NHWC:
3635 return cldnn::format::byxf;
3637 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << l << " layout";
3642 cldnn::upsampling_sample_type CLDNNGraph::UpsamplingTypeFromString(const std::string& str) {
3643 static const caseless_map<std::string, cldnn::upsampling_sample_type> UpsamplingTypeNameToType = {
3644 { "Bilinear" , cldnn::upsampling_sample_type::bilinear },
3645 { "Nearest" , cldnn::upsampling_sample_type::nearest },
3647 auto it = UpsamplingTypeNameToType.find(str);
3648 if (it != UpsamplingTypeNameToType.end())
3651 THROW_CLDNN_EXCEPTION("Unknown Upsampling type: " << str);
3654 cldnn::softmax::dimension_t CLDNNGraph::SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer, bool isPrevFC) {
3655 // WA for default softmax dimension in cldnn for fyx
3656 // todo: remove this once clDNN changes FC output to BF instead of BX
3657 auto dims = softmaxLayer->outData[0]->dims;
3658 unsigned non1Dims = 0;
3659 for (size_t i = 0; i < dims.size(); i++) {
3664 if (non1Dims == 1 || isPrevFC) {
3665 return cldnn::softmax::normalize_fyx;
3669 switch (softmaxLayer->axis) {
3670 case 1: return cldnn::softmax::normalize_f;
3671 case 2: return cldnn::softmax::normalize_y;
3672 case 3: return cldnn::softmax::normalize_x;
3673 default: THROW_CLDNN_EXCEPTION("Invalid softmax axis " << softmaxLayer->axis);
3675 return cldnn::softmax::normalize_fyx;
3678 cldnn::prior_box_code_type CLDNNGraph::PriorBoxCodeFromString(const std::string& str) {
3679 static const std::map<std::string, cldnn::prior_box_code_type> CodeNameToType = {
3680 { "caffe.PriorBoxParameter.CORNER" , cldnn::prior_box_code_type::corner },
3681 { "caffe.PriorBoxParameter.CENTER_SIZE" , cldnn::prior_box_code_type::center_size },
3682 { "caffe.PriorBoxParameter.CORNER_SIZE" , cldnn::prior_box_code_type::corner_size },
3684 auto it = CodeNameToType.find(str);
3685 if (it != CodeNameToType.end()) {
3688 THROW_CLDNN_EXCEPTION("Unknown Prior-Box code type: " + str);
3689 return cldnn::prior_box_code_type::corner;
3693 void CLDNNGraph::CreateGenericLayerBlobPrimitives(const InferenceEngine::GenericLayer* layer) {
3695 for (auto& blob : layer->blobs) {
3696 if (blob.second->dims().size() != 1) {
3697 THROW_CLDNN_EXCEPTION("Unhandled blob dim in layer " + layer->name);
3699 CreatePrimitiveFromBlob(
3700 layer->type + ":" + layer->name + "_" + blob.first + m_weightsTag,
3703 DataTypeFromPrecision(blob.second->precision()),
3704 m_defaultFormat, cldnn::spatial(TensorValue(blob.second->dims()[0]))));
3708 void CLDNNGraph::ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector<std::string>& blobNames) {
3710 for (auto& name : blobNames) {
3711 if (layer->blobs.find(name) == layer->blobs.end()) {
3712 THROW_CLDNN_EXCEPTION("Missing blob " + name + " in layer " + layer->name);
3717 cldnn::tensor CLDNNGraph::CldnnTensorFromIEDims(const InferenceEngine::SizeVector& dims) {
3718 auto numDims = dims.size();
3719 std::vector<cldnn::tensor::value_type> outputTensor({ 1, 1, 1, 1 });
3720 for (size_t i = 0; i < numDims; i++) {
3721 outputTensor[i] = TensorValue(dims[numDims - i - 1]);
3723 // swap x,y for cldnn tensor taking bfxy instead of bfyx
3724 auto tmp = outputTensor[2];
3725 outputTensor[2] = outputTensor[3];
3726 outputTensor[3] = tmp;
3728 return outputTensor;
3731 InferRequestInternal::Ptr
3732 CLDNNGraph::CreateInferRequestImpl(InputsDataMap networkInputs, OutputsDataMap networkOutputs) {
3733 if (m_env.network == nullptr) {
3734 THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str;
3736 return std::make_shared<CLDNNInferRequest>(m_env, m_config.useProfiling, networkInputs, networkOutputs);
3739 void CLDNNGraph::InitProfileInfo(const std::string& layerName,
3740 const std::string& layerType,
3742 InferenceEngine::InferenceEngineProfileInfo::LayerStatus status) {
3743 m_env.perfMap[layerType + ":" + layerName].first = layerName;
3744 auto& perfEntry = m_env.perfMap[layerType + ":" + layerName].second;
3745 perfEntry.layerType = layerType;
3746 perfEntry.status = status;
3747 perfEntry.cpu_uSec = perfEntry.realTime_uSec = 0;
3748 perfEntry.isCPU = isCPU;
3749 perfEntry.status = status;
3752 }; // namespace CLDNNPlugin