Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / cldnn_engine / cldnn_graph.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include <list>
6 #include <set>
7 #include <unordered_set>
8 #include <sstream>
9 #include <CPP/cldnn_defs.h>
10 #include <CPP/data.hpp>
11 #include <CPP/input_layout.hpp>
12 #include <CPP/reorder.hpp>
13 #include <CPP/convolution.hpp>
14 #include <CPP/pooling.hpp>
15 #include <CPP/lrn.hpp>
16 #include <CPP/fully_connected.hpp>
17 #include <CPP/softmax.hpp>
18 #include <CPP/activation.hpp>
19 #include <CPP/concatenation.hpp>
20 #include <CPP/proposal.hpp>
21 #include <CPP/roi_pooling.hpp>
22 #include <CPP/scale.hpp>
23 #include <CPP/crop.hpp>
24 #include <CPP/deconvolution.hpp>
25 #include <CPP/prior_box.hpp>
26 #include <CPP/detection_output.hpp>
27 #include <CPP/normalize.hpp>
28 #include <CPP/reshape.hpp>
29 #include <CPP/batch_norm.hpp>
30 #include <CPP/permute.hpp>
31 #include <CPP/split.hpp>
32 #include <CPP/upsampling.hpp>
33 #include <CPP/network.hpp>
34 #include <CPP/profiling.hpp>
35 #include <CPP/custom_gpu_primitive.hpp>
36 #include <CPP/reorg_yolo.hpp>
37 #include <CPP/region_yolo.hpp>
38 #include <CPP/mutable_data.hpp>
39 #include <CPP/max_unpooling.hpp>
40 #include <CPP/arg_max_min.hpp>
41 #include <CPP/mvn.hpp>
42 #include <CPP/tile.hpp>
43 #include <CPP/border.hpp>
44 #include <CPP/lstm.hpp>
45 #include <CPP/gather.hpp>
46 #include <CPP/depth_to_space.hpp>
47 #include <CPP/shuffle_channels.hpp>
48 #include <CPP/strided_slice.hpp>
49 #include <CPP/reverse_sequence.hpp>
50 #include <chrono>
51 #include <cmath>
52 #include <algorithm>
53 #include "cldnn_graph.h"
54 #include "simple_math.h"
55 #include <description_buffer.hpp>
56 #include <cldnn/cldnn_config.hpp>
57 #include <graph_tools.hpp>
58 #include <ie_layers_internal.hpp>
59 #include <net_pass.h>
60 #include "cldnn_infer_request.h"
61 #include <cpp_interfaces/ie_executor_manager.hpp>
62 #include "details/caseless.hpp"
63 #include <fstream>
64 #include <utility>
65 #include <sys/types.h>
66 #include <sys/stat.h>
67
68 using namespace InferenceEngine;
69 using namespace InferenceEngine::details;
70
71 #ifndef NDEBUG
72 #include <iostream>
73 #include <iomanip>
74 #define THROW_CLDNN_EXCEPTION(desc)\
75 do { \
76 InferenceEngineException ex(__FILE__, __LINE__);\
77 std::cout << desc << "\n---\nException detected at " << __FILE__ << ":" << \
78 __LINE__ << " (" << __FUNCTION__ << ")\n---\n" << std::endl; THROW_IE_EXCEPTION << desc; } while (0);
79 #else
80 #define THROW_CLDNN_EXCEPTION(desc) THROW_IE_EXCEPTION << desc;
81 #endif  // NDEBUG
82 #define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)
83
84 namespace CLDNNPlugin {
85
86 const cldnn::primitive_id CLDNNGraph::m_preProcessTag("_cldnn_input_preprocess");
87 const cldnn::primitive_id CLDNNGraph::m_weightsTag("_cldnn_weights");
88 const cldnn::primitive_id CLDNNGraph::m_biasesTag("_cldnn_biases");
89 const cldnn::primitive_id CLDNNGraph::m_meanValuesTag("_cldnn_mean_values");
90 const cldnn::primitive_id CLDNNGraph::m_postProcessTag("_cldnn_output_postprocess");
91 const cldnn::primitive_id CLDNNGraph::m_scalesTag("_cldnn_scales");
92 const cldnn::primitive_id CLDNNGraph::m_workaroundTag("_cldnn_workaround");
93 const cldnn::primitive_id CLDNNGraph::m_preCustomLayerTag("_cldnn_custom_preprocess");
94 const cldnn::primitive_id CLDNNGraph::m_postCustomLayerTag("_cldnn_custom_postprocess");
95
96 static void ValidateLayer(const InferenceEngine::CNNLayerPtr& layer, unsigned inputs) {  // todo: add more checks
97     if (inputs && layer->insData.size() != inputs) {
98         THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name);
99     }
100     if (layer->_fusedWith) {
101         THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
102     }
103 }
104
105 static void ValidateEltwiseLayer(const InferenceEngine::CNNLayerPtr& layer) {
106     if (layer->_fusedWith) {
107         THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
108     }
109 }
110
111 #if defined(_WIN32)
112 #define mkdir(dir, mode) _mkdir(dir)
113 #endif
114
115 void CLDNNGraph::Config::LoadFromMap(const std::map<std::string, std::string>& configMap) {
116     for (auto& kvp : configMap) {
117         std::string key = kvp.first;
118         std::string val = kvp.second;
119
120         // TODO: refactor if-else to map?
121         if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0) {
122             if (val.compare(PluginConfigParams::YES) == 0) {
123                 useProfiling = true;
124             } else if (val.compare(PluginConfigParams::NO) == 0) {
125                 useProfiling = false;
126             } else {
127                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
128             }
129         } else if (key.compare(PluginConfigParams::KEY_DYN_BATCH_ENABLED) == 0) {
130             if (val.compare(PluginConfigParams::YES) == 0) {
131                 enableDynamicBatch = true;
132             } else if (val.compare(PluginConfigParams::NO) == 0) {
133                 enableDynamicBatch = false;
134             } else {
135                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
136             }
137         } else if (key.compare(PluginConfigParams::KEY_DUMP_KERNELS) == 0) {
138             if (val.compare(PluginConfigParams::YES) == 0) {
139                 dumpCustomKernels = true;
140             } else if (val.compare(PluginConfigParams::NO) == 0) {
141                 dumpCustomKernels = false;
142             } else {
143                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
144             }
145         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY) == 0) {
146             std::stringstream ss(val);
147             uint32_t uVal(0);
148             ss >> uVal;
149             if (ss.fail()) {
150                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
151             }
152             switch (uVal) {
153             case 0:
154                 queuePriority = cldnn::priority_mode_types::disabled;
155                 break;
156             case 1:
157                 queuePriority = cldnn::priority_mode_types::low;
158                 break;
159             case 2:
160                 queuePriority = cldnn::priority_mode_types::med;
161                 break;
162             case 3:
163                 queuePriority = cldnn::priority_mode_types::high;
164                 break;
165             default:
166                 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue priority value: " << uVal;
167                 break;
168             }
169
170         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) == 0) {
171             std::stringstream ss(val);
172             uint32_t uVal(0);
173             ss >> uVal;
174             if (ss.fail()) {
175                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
176             }
177             switch (uVal) {
178             case 0:
179                 queueThrottle = cldnn::throttle_mode_types::disabled;
180                 break;
181             case 1:
182                 queueThrottle = cldnn::throttle_mode_types::low;
183                 break;
184             case 2:
185                 queueThrottle = cldnn::throttle_mode_types::med;
186                 break;
187             case 3:
188                 queueThrottle = cldnn::throttle_mode_types::high;
189                 break;
190             default:
191                 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue throttle value: " << uVal;
192                 break;
193             }
194         } else if (key.compare(PluginConfigParams::KEY_CONFIG_FILE) == 0) {
195             std::stringstream ss(val);
196             std::istream_iterator<std::string> begin(ss);
197             std::istream_iterator<std::string> end;
198             std::vector<std::string> configFiles(begin, end);
199             for (auto& file : configFiles) {
200                 CLDNNCustomLayer::LoadFromFile(file, customLayers);
201             }
202         } else if (key.compare(PluginConfigParams::KEY_TUNING_MODE) == 0) {
203             if (val.compare(PluginConfigParams::TUNING_DISABLED) == 0) {
204                 tuningConfig.mode = cldnn::tuning_mode::tuning_disabled;
205             } else if (val.compare(PluginConfigParams::TUNING_CREATE) == 0) {
206                 tuningConfig.mode = cldnn::tuning_mode::tuning_tune_and_cache;
207             } else if (val.compare(PluginConfigParams::TUNING_USE_EXISTING) == 0) {
208                 tuningConfig.mode = cldnn::tuning_mode::tuning_use_cache;
209             } else {
210                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported tuning mode value by plugin: " << val;
211             }
212         } else if (key.compare(PluginConfigParams::KEY_TUNING_FILE) == 0) {
213             tuningConfig.cache_file_path = val;
214         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_MEM_POOL) == 0) {
215             if (val.compare(PluginConfigParams::YES) == 0) {
216                 memory_pool_on = true;
217             } else if (val.compare(PluginConfigParams::NO) == 0) {
218                 memory_pool_on = false;
219             } else {
220                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported memory pool flag value: " << val;
221             }
222         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR) == 0) {
223             if (!val.empty()) {
224                 graph_dumps_dir = val;
225                 mkdir(graph_dumps_dir.c_str(), 0755);
226             }
227         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR) == 0) {
228             if (!val.empty()) {
229                 sources_dumps_dir = val;
230                 mkdir(sources_dumps_dir.c_str(), 0755);
231             }
232         } else if (key.compare(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) == 0) {
233             if (val.compare(PluginConfigParams::YES) == 0) {
234                 exclusiveAsyncRequests = true;
235             } else if (val.compare(PluginConfigParams::NO) == 0) {
236                 exclusiveAsyncRequests = false;
237             } else {
238                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
239             }
240         } else {
241             THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property key by plugin: " << key;
242         }
243     }
244 }
245
246 void CLDNNGraph::changeInputBatch(size_t batch) {
247     m_curBatch = batch;
248 }
249
250 bool CLDNNGraph::CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const {
251     InputsDataMap inputs;
252     network.getInputsInfo(inputs);
253
254     CNNLayerSet inputLayers;
255     std::unordered_set<CNNLayer *> allLayers;
256
257     if (inputs.empty())
258         return false;
259
260     auto & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
261     if (secondLayers.empty())
262         return false;
263
264     bool check_result = true;
265     details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
266         auto type = LayerTypeFromStr(layer->type);
267         if (SimplerNMS == type ||
268             ROIPooling == type ||
269             PriorBox == type ||
270             DetectionOutput == type ||
271             Reshape == type ||
272             Permute == type ||
273             Flatten == type ||
274             Proposal == type ||
275             PSROIPooling == type ) {
276             check_result = false;
277         }
278
279         // check for custom layer
280         auto customLayer = m_config.customLayers.find(layer->type);
281         if (customLayer != m_config.customLayers.end()) {
282             check_result = false;
283         }
284     }, false);
285
286     return check_result;
287 }
288
289 CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, const Config& config, int max_batch) : m_config(config),
290     m_defaultFormat(cldnn::format::bfyx),
291     m_curBatch(-1) {
292     m_env.engine = std::make_shared<cldnn::engine>(cldnn::engine_configuration(
293         (config.useProfiling || (config.tuningConfig.mode != cldnn::tuning_mode::tuning_disabled)),
294         false,
295         config.dumpCustomKernels,
296         std::string(),
297         std::string(),
298         true,
299         std::string(),
300         config.sources_dumps_dir,
301         config.queuePriority,
302         config.queueThrottle,
303         config.memory_pool_on));
304 #if 0
305         m_env.debugOptions.PrintOptions();
306 #endif
307     if (config.exclusiveAsyncRequests) {
308         ExecutorManager *executorManager = ExecutorManager::getInstance();
309         _taskExecutor = executorManager->getExecutor(TargetDeviceInfo::name(TargetDevice::eGPU));
310     }
311
312     bool res = !NetPass::CombineRNNSeq(network) ? NetPass::UnrollTI(network) : true;
313     res &= NetPass::UnrollRNN_if(network, [] (RNNCellBase rnn) -> bool {
314         if (rnn.clip != 0.0f)
315             return true;
316         if (rnn.type == "GRUCell" ||
317             rnn.type == "GRUSequence" ||
318             rnn.type == "RNNCell" ||
319             rnn.type == "RNNSequence")
320             return true;
321         if (!(rnn.type == "LSTMCell" || rnn.type == "LSTMSequence") ||
322             rnn.activations == std::vector<std::string>{"sigmoid", "tanh", "tanh"})
323             return false;
324         return true;
325     });
326
327     if (!res)
328         THROW_CLDNN_EXCEPTION("Plugin doesn't support Tensor Iterator in pure form. "
329                               "No one TI optimization pattern was not applied successfully");
330
331     if (max_batch > 1) {
332         // check topology for applicability
333         if (!CanProcessDynBatch(network)) {
334             THROW_CLDNN_EXCEPTION("Such topology cannot be compiled for dynamic batch!");
335         }
336
337         // calculate number of networks necessary based on binary log
338         unsigned int tmp = max_batch;
339         unsigned int mask = 1 << 31;
340         unsigned int ldigit = 31;
341
342         while (!(tmp & mask)) {
343             mask >>= 1;
344             ldigit--;
345         }
346
347         m_env.m_bv_sz = ldigit + 1;
348     } else {
349         m_env.m_bv_sz = 0;
350     }
351
352     m_env.m_max_batch = max_batch;
353
354     // Handle workarounds
355     char networkName[128] = { 0 };
356     network.getName(networkName, 127);
357     m_env.debugOptions.EnableWA(networkName);
358     m_env.debugOptions.AddTimedEvent("Loading Begin");
359
360     if (max_batch > 1) {
361         for (int b = m_env.m_bv_sz - 1; b >= 0; b--) {
362             m_topology = std::make_shared<cldnn::topology>(cldnn::topology());
363             m_env.network.reset();
364             m_env.inputLayouts.clear();
365             m_env.outputDims.clear();
366             m_env.primitiveIDs.clear();
367
368             changeInputBatch(1 << b);
369             Load(network);
370             CompileNetwork();
371             m_env.batchNetworks.insert(m_env.batchNetworks.begin(), m_env.network);
372
373             m_topology.reset();
374             m_env.engine->release_pending_memory();
375         }
376     } else {
377         m_topology = std::make_shared<cldnn::topology>(cldnn::topology());
378         Load(network);
379         CompileNetwork();
380         m_topology.reset();
381         m_env.engine->release_pending_memory();
382     }
383
384     m_env.debugOptions.AddTimedEvent("Loading", "Loading Begin");
385     m_env.debugOptions.PrintTimedEvents();
386     m_env.debugOptions.ClearTimedEvents();
387 }
388
389 inline std::string layer_type_name_ID(InferenceEngine::CNNLayer* layer) {
390     return layer->type + ":" + layer->name;
391 }
392
393 inline std::string layer_type_name_ID(InferenceEngine::CNNLayerPtr layer) {
394     return layer_type_name_ID(layer.get());
395 }
396
397 std::vector<InferenceEngine::CNNLayerPtr> CLDNNGraph::GetNextLayers(const InferenceEngine::DataPtr data) {
398     std::vector<InferenceEngine::CNNLayerPtr> nextLayers;
399     if (data == nullptr) {
400         return nextLayers;
401     }
402     for (auto nl : data->getInputTo()) {
403         nextLayers.push_back(nl.second);
404     }
405     return nextLayers;
406 }
407
408 std::vector<InferenceEngine::CNNLayerPtr> CLDNNGraph::GetNextLayers(const InferenceEngine::CNNLayerPtr layer) {
409     std::vector<InferenceEngine::CNNLayerPtr> nextLayers;
410     if (layer == nullptr) {
411         return nextLayers;
412     }
413     for (auto od : layer->outData) {
414         auto nextLayersVec = GetNextLayers(od);
415         for (auto nl : nextLayersVec) {
416             nextLayers.push_back(nl);
417         }
418     }
419     return nextLayers;
420 }
421
422 InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngine::DataPtr data) {
423     if (data == nullptr) {
424         return nullptr;
425     }
426     auto nextLayers = GetNextLayers(data);
427     IE_ASSERT(nextLayers.size() == 1);
428     return nextLayers[0];
429 }
430
431 InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer) {
432     if (layer == nullptr) {
433         return nullptr;
434     }
435     auto nextLayers = GetNextLayers(layer);
436     IE_ASSERT(nextLayers.size() == 1);
437     return nextLayers[0];
438 }
439
440 void CLDNNGraph::InitFormat(InferenceEngine::ICNNNetwork &network) {
441     m_defaultFormat    = FormatFromLayout(InferenceEngine::Layout::NCHW);
442 }
443
444 void CLDNNGraph::CompileNetwork() {
445     m_env.debugOptions.AddTimedEvent("Network Build Begin");
446     cldnn::build_options options;
447     if (!m_config.graph_dumps_dir.empty()) {
448         options.set_option(cldnn::build_option::graph_dumps_dir(m_config.graph_dumps_dir));
449     }
450     options.set_option(cldnn::build_option::optimize_data(true));
451     options.set_option(cldnn::build_option::tuning_config(m_config.tuningConfig));
452
453     m_env.network.reset();
454     m_env.network = std::make_shared<cldnn::network>(cldnn::network(*(m_env.engine), *m_topology, options));
455     m_env.debugOptions.AddTimedEvent("Network Build", "Network Build Begin");
456 }
457
458 void CLDNNGraph::Load(InferenceEngine::ICNNNetwork &network) {
459     InitFormat(network);
460     auto _networkPrecision = network.getPrecision();
461
462     // 1. create inputs
463     InferenceEngine::InputsDataMap networkInputs;
464     network.getInputsInfo(networkInputs);
465     p_currentInputs = &networkInputs;
466
467     InferenceEngine::OutputsDataMap networkOutputs;
468     network.getOutputsInfo(networkOutputs);
469     p_currentOutputs = &networkOutputs;
470
471     if (networkInputs.size() == 0) {
472         THROW_CLDNN_EXCEPTION("No inputs detected.");
473     }
474
475     using LayerVect = std::vector<InferenceEngine::CNNLayerPtr>;
476     std::list<InferenceEngine::CNNLayerPtr> layersToHandle;
477
478     auto push_if = [&](const LayerVect& clist) {
479         for (auto& l : clist) {
480             if ( (std::find_if( layersToHandle.begin(),
481                             layersToHandle.end(),
482                             [&](const CNNLayerPtr& x) { return layer_type_name_ID(x) == layer_type_name_ID(l); } )) == layersToHandle.end() )
483                 layersToHandle.push_back(l);
484         }
485     };
486
487     auto allInputs = CNNNetGetAllInputLayers(network);
488     for (auto input : allInputs) {
489         if (LayerTypeFromStr(input->type) == ConstantBlob) {
490             AddConstantBlobInput(input);
491         } else {
492             auto iter = networkInputs.find(input->name);    // regular input
493             if (iter != networkInputs.end()) {
494                 AddInputPrimitive(iter->second, input->precision);
495             }
496         }
497         // collect next layers to process
498         push_if(GetNextLayers(input));
499     }
500
501     // 2. traverse layers
502     unsigned infLoopProtection = 0;
503     while (!layersToHandle.empty()) {
504         if (infLoopProtection++ >= layersToHandle.size()) {
505             THROW_CLDNN_EXCEPTION("Infinite loop during network creation");
506             break;
507         }
508         InferenceEngine::CNNLayerPtr currLayer = layersToHandle.front();
509         layersToHandle.pop_front();
510         auto layerName = layer_type_name_ID(currLayer);
511
512         if (m_env.primitiveIDs.find(layerName) != m_env.primitiveIDs.end()) {
513             infLoopProtection = 0;
514             continue;  // this layer was already added (had multiple inputs)
515         }
516
517         bool missingInput = false;
518         try {
519             GetPrevLayersPrimitives(currLayer);
520         } catch (std::exception) {
521             missingInput = true;
522         }
523
524         if (missingInput) {  // some inputs aren't created yet
525             layersToHandle.push_back(currLayer);  // push the current layer to the end of the line
526             continue;  // move on to the next layer
527         }
528
529         infLoopProtection = 0;  // found a layer with all inputs already existing
530         CreateSingleLayerPrimitive(currLayer);  // currLayer will be advanced if layer was skipped or merged
531         m_env.prevPrimitiveIDs[layerName] = GetPrevLayersPrimitives(currLayer);
532
533         push_if(GetNextLayers(currLayer));
534     }
535
536     // 3. Handle output reordering
537     for (auto output : networkOutputs) {
538         // always reorder and let clDNN remove unneeded reorders
539         AddOutputPrimitive(output.first, output.second);
540     }
541
542     // 4. ???
543     // 5. profit
544     p_currentInputs = nullptr;
545     p_currentOutputs = nullptr;
546 }
547
548 CLDNNGraph::LayerType CLDNNGraph::LayerTypeFromStr(const std::string &str) {
549     static const caseless_map<std::string, CLDNNGraph::LayerType> LayerNameToType = {
550         { "Convolution" , Convolution },
551         { "ReLU" , ReLU },
552         { "ReLU6" , ReLU6 },
553         { "Sigmoid" , Sigmoid },
554         { "Logistic" , Sigmoid },
555         { "TanH" , TanH },
556         { "ELU" , ELU },
557         { "Activation" , Activation },
558         { "Exp" , Exp },
559         { "Not" , Not },
560         { "Norm" , LRN },
561         { "Pooling" , Pooling },
562         { "FullyConnected" , FullyConnected },
563         { "SoftMax" , SoftMax },
564         { "Power" , Power },
565         { "Split" , Split },
566         { "Slice" , Split },
567         { "Concat" , Concatenate },
568         { "Eltwise" , Eltwise },
569         { "SimplerNMS" , SimplerNMS },
570         { "ROIPooling" , ROIPooling },
571         { "Crop" , Crop },
572         { "Deconvolution" , Deconvolution },
573         { "PriorBox" , PriorBox },
574         { "DetectionOutput" , DetectionOutput },
575         { "Normalize" , Normalize },
576         { "Reshape" , Reshape },
577         { "Permute" , Permute },
578         { "Flatten" , Flatten },
579         { "BatchNormalization" , BatchNormalization },
580         { "PReLU" , PReLU },
581         { "ScaleShift" , ScaleShift },
582         { "Proposal" , Proposal },
583         { "PSROIPooling" , PSROIPooling },
584         { "Clamp" , Clamp },
585         { "Copy" , Copy },
586         { "Upsampling" , Upsampling },
587         { "Resample" , Resample },
588         { "RegionYolo" , RegionYolo },
589         { "ReorgYolo" , ReorgYolo },
590         { "Const" , ConstantBlob },
591         { "ArgMax" , ArgMax },
592         { "MVN" , MVN },
593         { "Unpooling" , Unpooling },
594         { "Tile" , Tile },
595         { "Pad" , Pad },
596         { "LSTMCell" , LSTMCell },
597         { "LSTMSequence" , RNN },
598         { "RNNSequence" , RNN },
599         { "Gather" , Gather },
600         { "DepthToSpace" , DepthToSpace },
601         { "ShuffleChannels" , ShuffleChannels },
602         { "StridedSlice" , StridedSlice },
603         { "ReverseSequence" , ReverseSequence }
604     };
605     auto it = LayerNameToType.find(str);
606     if (it != LayerNameToType.end())
607         return it->second;
608     else
609         return NO_TYPE;
610 }
611
612 cldnn::pooling_mode CLDNNGraph::PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding) {
613     switch (pt) {
614         case InferenceEngine::PoolingLayer::PoolType::MAX:
615             return cldnn::pooling_mode::max;
616         case InferenceEngine::PoolingLayer::PoolType::AVG:
617             return excludePadding ? cldnn::pooling_mode::average_no_padding : cldnn::pooling_mode::average;
618         default: IE_ASSERT(0);  // unhandled pool mode
619             THROW_CLDNN_EXCEPTION("Unsupported pooling type: " << pt);
620             break;
621     }
622
623     return cldnn::pooling_mode::max;  // shouldn't get here
624 }
625
626 cldnn::eltwise_mode CLDNNGraph::EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op) {
627     switch (op) {
628         case InferenceEngine::EltwiseLayer::Sum:
629             return cldnn::eltwise_mode::sum;
630         case InferenceEngine::EltwiseLayer::Prod:
631             return cldnn::eltwise_mode::prod;
632         case InferenceEngine::EltwiseLayer::Max:
633             return cldnn::eltwise_mode::max;
634         case InferenceEngine::EltwiseLayer::Sub:
635             return cldnn::eltwise_mode::sub;
636         case InferenceEngine::EltwiseLayer::Min:
637             return cldnn::eltwise_mode::min;
638         case InferenceEngine::EltwiseLayer::Div:
639             return cldnn::eltwise_mode::div;
640         case InferenceEngine::EltwiseLayer::Squared_diff:
641             return cldnn::eltwise_mode::squared_diff;
642         case InferenceEngine::EltwiseLayer::Equal:
643             return cldnn::eltwise_mode::eq;
644         case InferenceEngine::EltwiseLayer::Not_equal:
645             return cldnn::eltwise_mode::ne;
646         case InferenceEngine::EltwiseLayer::Less:
647             return cldnn::eltwise_mode::lt;
648         case InferenceEngine::EltwiseLayer::Less_equal:
649             return cldnn::eltwise_mode::le;
650         case InferenceEngine::EltwiseLayer::Greater:
651             return cldnn::eltwise_mode::gt;
652         case InferenceEngine::EltwiseLayer::Greater_equal:
653             return cldnn::eltwise_mode::ge;
654         case InferenceEngine::EltwiseLayer::Logical_AND:
655             return cldnn::eltwise_mode::logic_and;
656         case InferenceEngine::EltwiseLayer::Logical_OR:
657             return cldnn::eltwise_mode::logic_or;
658         case InferenceEngine::EltwiseLayer::Logical_XOR:
659             return cldnn::eltwise_mode::logic_xor;
660         default: THROW_CLDNN_EXCEPTION("Unsupported eltwise operation: " << op);
661             break;
662     }
663
664     return cldnn::eltwise_mode::max;  // shouldn't get here
665 }
666
667 cldnn::concatenation::concatenation_axis CLDNNGraph::ConcatAxisFromIEAxis(unsigned axis) {
668     switch (axis) {
669     case 0:
670         return cldnn::concatenation::concatenation_axis::along_b;
671     case 1:
672         return cldnn::concatenation::concatenation_axis::along_f;
673     case 2:
674         return cldnn::concatenation::concatenation_axis::along_y;
675     case 3:
676         return cldnn::concatenation::concatenation_axis::along_x;
677     default: THROW_CLDNN_EXCEPTION("Unsupported concatenation axis: " << axis);
678         break;
679     }
680
681     return cldnn::concatenation::concatenation_axis::along_f;  // shouldn't get here
682 }
683
684 void CLDNNGraph::CreatePrimitiveFromBlob(cldnn::primitive_id primID,
685                                          const InferenceEngine::Blob::Ptr pBlob,
686                                          cldnn::layout blobLayout,
687                                          size_t blobByteOffset,
688                                          WeightRearrangeType rearrange) {
689     auto mem = cldnn::memory::allocate(*(m_env.engine), blobLayout);
690     auto tmpPointer = mem.pointer<char>();  // implicitly maps buffer - unmap in destructor
691     auto buf = tmpPointer.data();
692     auto bufSize = blobLayout.bytes_count();
693 // The condition below is not valid once we use groups - todo: think of some other size check here
694 //     if ((pBlob != nullptr) &&
695 //         (pBlob->size() * (broadcastFeatures ? blobLayout.size.feature[0] : 1)) != blobLayout.count()) {
696 //         THROW_CLDNN_EXCEPTION("Unexpected blob size");
697 //     }
698     if (pBlob == nullptr) {
699         THROW_CLDNN_EXCEPTION("Missing blob data: " << primID);
700     } else if ((pBlob->layout() != InferenceEngine::OIHW) &&
701                (pBlob->layout() != InferenceEngine::NCHW) &&
702                (pBlob->layout() != InferenceEngine::CHW) &&
703                (pBlob->layout() != InferenceEngine::NC) &&
704                (pBlob->layout() != InferenceEngine::C)) {
705         // TODO: support more layouts
706         THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(pBlob->layout()) << ") in blob: " << primID);
707     } else if (rearrange == BroadcastFeatures) {
708         size_t features = static_cast<size_t>(blobLayout.size.feature[0]);
709         if (pBlob->size() != features) {
710             THROW_CLDNN_EXCEPTION("Invalid blob dimensions to broadcast: " << primID);
711         }
712         auto data = static_cast<const char *>(pBlob->buffer());
713         auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type);
714         size_t featureElements = blobLayout.count() / static_cast<size_t>(blobLayout.size.feature[0]);
715         IE_ASSERT(blobLayout.format == cldnn::format::bfyx);
716         for (size_t f = 0; f < features; f++) {
717             for (size_t e = 0; e < featureElements; e++) {
718                 for (size_t b = 0; b < elementSize; b++) {
719                     buf[(f*featureElements + e)*elementSize + b] = data[f*elementSize + b];
720                 }
721             }
722         }
723     } else if (rearrange == FlipDeconvDims) {
724         auto data = static_cast<const char *>(pBlob->buffer());
725         auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type);
726
727         size_t inputFeatureElements = static_cast<size_t>(blobLayout.size.feature[0]);
728         size_t outputFeatureElements = static_cast<size_t>(blobLayout.size.batch[0]);
729
730         size_t featureSize = elementSize * static_cast<size_t>(blobLayout.size.spatial[0] * blobLayout.size.spatial[1]);
731
732         for (size_t i = 0; i < inputFeatureElements; i++) {
733             for (size_t o = 0; o < outputFeatureElements; o++) {
734                 size_t outputShift = (o*inputFeatureElements + i)*featureSize;
735                 size_t inputShift = (i*outputFeatureElements + o)*featureSize;
736
737                 for (size_t b = 0; b < featureSize; b++) {
738                     buf[outputShift + b] = data[inputShift + b];
739                 }
740             }
741         }
742     } else {
743         auto data = static_cast<const char *>(pBlob->buffer());
744         for (size_t i = 0; i < bufSize; i++) {
745             buf[i] = data[i + blobByteOffset];
746         }
747     }
748     m_topology->add(cldnn::data(primID, mem));
749 }
750
751 void CLDNNGraph::CreateWeightAndBiasPrimitives(const InferenceEngine::CNNLayerPtr& layer,
752                                                    std::vector<cldnn::primitive_id>& weightsPrimID,
753                                                    std::vector<cldnn::primitive_id>& biasesPrimID) {
754     cldnn::tensor::value_type inFeatures = 1;  // todo: workaround for xyf input, handle general case (xf, xyzf etc...)
755     std::shared_ptr<Data> insData0 = layer->insData[0].lock();
756     IE_ASSERT(insData0 != nullptr);
757     if (insData0->dims.size() > 2) {
758         inFeatures = TensorValue(insData0->dims[2]);
759     }
760     cldnn::tensor::value_type outFeatures(0);
761     std::vector<cldnn::tensor::value_type> weightDimsVec;
762     InferenceEngine::Blob::Ptr pWeightsBlob, pBiasBlob;
763     unsigned groupSize = 1;
764     WeightRearrangeType rearrange = NO_REARRANGE;
765
766     switch (LayerTypeFromStr(layer->type)) {
767     case Convolution: {
768         auto convLayer = dynamic_cast<InferenceEngine::ConvolutionLayer *> (layer.get());
769         if ((inFeatures % groupSize) || (convLayer->_out_depth % groupSize)) {
770             THROW_CLDNN_EXCEPTION("Invalid group size in layer " << convLayer->name);
771         }
772         groupSize = convLayer->_group;
773         if (groupSize >= 16)  // cldnn optimization for 16 and more groups
774             groupSize = 1;
775         weightDimsVec = {
776             TensorValue(convLayer->_out_depth / groupSize),
777             TensorValue(inFeatures / convLayer->_group),
778             TensorValue(convLayer->_kernel[X_AXIS]),
779             TensorValue(convLayer->_kernel[Y_AXIS])
780         };
781         outFeatures = convLayer->_out_depth;
782         pWeightsBlob = convLayer->_weights;
783         pBiasBlob = convLayer->_biases;
784     }
785         break;
786     case Deconvolution: {
787         auto deconvLayer = dynamic_cast<InferenceEngine::DeconvolutionLayer *> (layer.get());
788         if ((inFeatures % groupSize) || (deconvLayer->_out_depth % groupSize)) {
789             THROW_CLDNN_EXCEPTION("Invalid group size in layer " << deconvLayer->name);
790         }
791         groupSize = deconvLayer->_group;
792         if (groupSize >= 16)  // cldnn optimization for 16 and more groups
793             groupSize = 1;
794         weightDimsVec = {
795             TensorValue(deconvLayer->_out_depth / groupSize),
796             TensorValue(inFeatures / deconvLayer->_group),
797             TensorValue(deconvLayer->_kernel[X_AXIS]),
798             TensorValue(deconvLayer->_kernel[Y_AXIS])
799         };
800         outFeatures = deconvLayer->_out_depth;
801         pWeightsBlob = deconvLayer->_weights;
802         pBiasBlob = deconvLayer->_biases;
803
804         if ((groupSize < outFeatures) || (groupSize < inFeatures))
805             rearrange = FlipDeconvDims;
806     }
807         break;
808     default:
809         IE_ASSERT("Wrong weightable layer type");  // shouldn't get here
810         break;
811     }
812
813     // create weights primitive
814     cldnn::layout weightsLayout = cldnn::layout(
815         DataTypeFromPrecision(layer->precision),
816         m_defaultFormat,
817         cldnn::tensor(weightDimsVec));
818     size_t bytesPerGroup = weightsLayout.bytes_count();
819
820     for (unsigned g = 0; g < groupSize; g++) {
821         cldnn::primitive_id weightID = layer_type_name_ID(layer) + m_weightsTag + std::to_string(g);
822         CreatePrimitiveFromBlob(
823             weightID,
824             pWeightsBlob,
825             weightsLayout,
826             g * bytesPerGroup,
827             rearrange);
828         weightsPrimID.push_back(weightID);
829     }
830
831     // create bias primitive
832     if (pBiasBlob != nullptr) {
833         cldnn::layout biasesLayout = cldnn::layout(
834             DataTypeFromPrecision(layer->precision),
835             m_defaultFormat,
836             cldnn::spatial(TensorValue(outFeatures / groupSize)));
837         size_t bytesPerGroup = biasesLayout.bytes_count();
838         for (unsigned g = 0; g < groupSize; g++) {
839             cldnn::primitive_id biasID = layer_type_name_ID(layer) + m_biasesTag + std::to_string(g);
840             CreatePrimitiveFromBlob(
841                 biasID,
842                 pBiasBlob,
843                 biasesLayout,
844                 g * bytesPerGroup);
845             biasesPrimID.push_back(biasID);
846         }
847     }
848 }
849
850 void CLDNNGraph::CreateScaleWeightsAndBiasesFromBN(
851     const InferenceEngine::BatchNormalizationLayer* bnLayer,
852     cldnn::primitive_id weightsPrimID,
853     cldnn::primitive_id biasesPrimID) {
854
855     if (bnLayer->_weights->dims() != bnLayer->_biases->dims()) {
856         THROW_CLDNN_EXCEPTION("mean/variance dimensions mismatch in " << bnLayer->name);
857     }
858     if (bnLayer->_weights->precision() != bnLayer->_biases->precision()) {
859         THROW_CLDNN_EXCEPTION("mean/variance precision mismatch in " << bnLayer->name);
860     }
861
862     cldnn::tensor blobTensor(0);
863     switch (bnLayer->outData[0]->dims.size()) {
864     case 2:
865         blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[0]));
866         break;
867     case 4:
868         blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[2]));
869         break;
870     default:
871         THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
872     }
873     cldnn::layout blobLayout(
874         DataTypeFromPrecision(bnLayer->precision),
875         m_defaultFormat,
876         blobTensor);
877
878     switch (bnLayer->_weights->precision()) {
879     case Precision::FP16: {
880         InferenceEngine::TBlob<uint16_t> weightsBlob(bnLayer->_weights->precision(), bnLayer->_weights->layout(),  bnLayer->_weights->dims());
881         weightsBlob.allocate();
882         InferenceEngine::TBlob<uint16_t> biasesBlob(bnLayer->_biases->precision(), bnLayer->_weights->layout(), bnLayer->_biases->dims());
883         biasesBlob.allocate();
884
885         auto weightsData = weightsBlob.data();
886         auto biasesData = biasesBlob.data();
887         auto varianceData = static_cast<const uint16_t *>(bnLayer->_weights->buffer());
888         auto meanData = static_cast<const uint16_t *>(bnLayer->_biases->buffer());
889
890         cldnn_status status = CLDNN_SUCCESS;
891         for (size_t i = 0; i < weightsBlob.size(); i++) {
892             auto variance = cldnn_half_to_float(varianceData[i], &status);
893             if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
894             auto mean = cldnn_half_to_float(meanData[i], &status);
895             if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
896
897             float scale = 1.0f / sqrt(variance + bnLayer->epsilon);
898             weightsData[i] = cldnn_float_to_half(scale, &status);
899             if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
900             biasesData[i] = cldnn_float_to_half((-mean) * scale, &status);
901             if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
902         }
903         CreatePrimitiveFromBlob(weightsPrimID, std::make_shared<InferenceEngine::TBlob<uint16_t>>(weightsBlob), blobLayout);
904         CreatePrimitiveFromBlob(biasesPrimID, std::make_shared<InferenceEngine::TBlob<uint16_t>>(biasesBlob), blobLayout);
905     }
906         break;
907     case Precision::FP32: {
908         InferenceEngine::TBlob<float> weightsBlob(bnLayer->_weights->precision(), bnLayer->_weights->layout(), bnLayer->_weights->dims());
909         weightsBlob.allocate();
910         InferenceEngine::TBlob<float> biasesBlob(bnLayer->_biases->precision(), bnLayer->_weights->layout(), bnLayer->_biases->dims());
911         biasesBlob.allocate();
912
913         auto weightsData = weightsBlob.data();
914         auto biasesData = biasesBlob.data();
915         auto varianceData = static_cast<const float *>(bnLayer->_weights->buffer());
916         auto meanData = static_cast<const float *>(bnLayer->_biases->buffer());
917
918         for (size_t i = 0; i < weightsBlob.size(); i++) {
919             auto variance = varianceData[i];
920             auto mean = meanData[i];
921             weightsData[i] = 1.0f / sqrt(variance + bnLayer->epsilon);
922             biasesData[i] = (-mean) * weightsData[i];
923         }
924         CreatePrimitiveFromBlob(weightsPrimID, std::make_shared<InferenceEngine::TBlob<float>>(weightsBlob), blobLayout);
925         CreatePrimitiveFromBlob(biasesPrimID, std::make_shared<InferenceEngine::TBlob<float>>(biasesBlob), blobLayout);
926     }
927         break;
928     default:
929         THROW_CLDNN_EXCEPTION("Unhandled mean/variance precision in " << bnLayer->name);
930         break;
931     }
932 }
933
934 void CLDNNGraph::CreateSingleLayerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
935     // Initialize a profiling entry
936     InitProfileInfo(layer->name, layer->type);
937
938     // First check for custom layer
939     auto customLayer = m_config.customLayers.find(layer->type);
940     if (customLayer != m_config.customLayers.end()) {
941         CreateCustomLayerPrimitive(layer, customLayer->second);
942         return;
943     }
944
945     // Otherwise move on to built-in layer types
946     switch (LayerTypeFromStr(layer->type)) {
947         case Convolution: CreateConvolutionPrimitive(layer);
948             break;
949         case ReLU:
950         case ReLU6:
951         case Sigmoid:
952         case TanH:
953         case ELU:
954         case Clamp:
955         case Activation:
956         case Exp:
957         case Not:
958             CreateActivationPrimitive(layer, LayerTypeFromStr(layer->type));
959             break;
960         case LRN: CreateLRNPrimitive(layer);
961             break;
962         case Pooling: CreatePoolingPrimitive(layer);
963             break;
964         case Unpooling: CreateMaxUnpoolingPrimitive(layer);
965             break;
966         case FullyConnected: CreateFullyConnectedPrimitive(layer);
967             break;
968         case SoftMax: CreateSoftMaxPrimitive(layer);
969             break;
970         case Power: CreatePowerPrimitive(layer);
971             break;
972         case Split: CreateSplitPrimitive(layer);
973             break;
974         case Concatenate: CreateConcatenatePrimitive(layer);
975             break;
976         case Eltwise: CreateEltwisePrimitive(layer);
977             break;
978         case SimplerNMS: CreateSimplerNMSPrimitive(layer);
979             break;
980         case ROIPooling: CreateROIPoolingPrimitive(layer);
981             break;
982         case Crop: CreateCropPrimitive(layer);
983             break;
984         case Deconvolution: CreateDeconvolutionPrimitive(layer);
985             break;
986         case PriorBox: CreatePriorBoxPrimitive(layer);
987             break;
988         case DetectionOutput: CreateDetectionOutputPrimitive(layer);
989             break;
990         case Normalize: CreateNormalizePrimitive(layer);
991             break;
992         case Reshape: CreateReshapePrimitive(layer);
993             break;
994         case Permute: CreatePermutePrimitive(layer);
995             break;
996         case Flatten: CreateFlattenPrimitive(layer);
997             break;
998         case BatchNormalization: CreateBatchNormalizationPrimitive(layer);
999             break;
1000         case PReLU: CreatePReLUPrimitive(layer);
1001             break;
1002         case ScaleShift: CreateScaleShiftPrimitive(layer);
1003             break;
1004         case Proposal: CreateProposalPrimitive(layer);
1005             break;
1006         case PSROIPooling: CreatePSROIPoolingPrimitive(layer);
1007             break;
1008         case Copy: CreateCopyPrimitive(layer);
1009             break;
1010         case Upsampling: CreateUpsamplingPrimitive(layer);
1011             break;
1012         case Resample: CreateResamplePrimitive(layer);
1013             break;
1014         case ArgMax: CreateArgMaxPrimitive(layer);
1015             break;
1016         case MVN: CreateMVNPrimitive(layer);
1017             break;
1018         case LSTMCell: CreateLSTMCellPrimitive(layer);
1019             break;
1020         case RNN: CreateRNNPrimitive(layer);
1021             break;
1022         case RegionYolo: CreateYOLO2RegionPrimitive(layer);
1023             break;
1024         case ReorgYolo: CreateYOLO2ReorgPrimitive(layer);
1025             break;
1026         case Tile: CreateTilePrimitive(layer);
1027             break;
1028         case Pad: CreatePadPrimitive(layer);
1029             break;
1030         case Gather: CreateGatherPrimitive(layer);
1031             break;
1032         case DepthToSpace: CreateDepthToSpacePrimitive(layer);
1033             break;
1034         case ShuffleChannels: CreateShuffleChannelsPrimitive(layer);
1035             break;
1036         case StridedSlice: CreateStridedSlicePrimitive(layer);
1037             break;
1038         case ReverseSequence: CreateReverseSequencePrimitive(layer);
1039             break;
1040         default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type);
1041     }
1042 }
1043
1044 void CLDNNGraph::CreateScaleShiftPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1045     ValidateLayer(layer, 1);
1046     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1047     auto scaleShiftLayer = dynamic_cast<InferenceEngine::ScaleShiftLayer*> (layer.get());
1048
1049     // create scales and biases
1050     cldnn::primitive_id scalePrimID = scaleShiftLayer->name + m_scalesTag;
1051     cldnn::primitive_id biasPrimID = scaleShiftLayer->name + m_biasesTag;
1052
1053     const auto& dims = scaleShiftLayer->_weights->dims();
1054     cldnn::tensor weightTensor(1);
1055     switch (dims.size()) {
1056     case 1: weightTensor = cldnn::feature(TensorValue(dims[0]));  // value per feature (or 1 global value)
1057         break;
1058     case 4: weightTensor = cldnn::tensor(TensorValue(dims[0]), TensorValue(dims[1]), TensorValue(dims[3]), TensorValue(dims[2]));  // value per pixel
1059         break;
1060     default: THROW_CLDNN_EXCEPTION("Invalid weights dimensions in layer " << layer->name);
1061         break;
1062     }
1063     cldnn::layout blobLayout(DataTypeFromPrecision(layer->precision), m_defaultFormat, weightTensor);
1064     CreatePrimitiveFromBlob(scalePrimID, scaleShiftLayer->_weights, blobLayout);
1065     if (scaleShiftLayer->_biases != nullptr) {
1066         if (scaleShiftLayer->_biases->dims() != dims) {
1067             THROW_CLDNN_EXCEPTION("Invalid bias blob dimensions in layer " << layer->name);
1068         }
1069         CreatePrimitiveFromBlob(biasPrimID, scaleShiftLayer->_biases, blobLayout);
1070     } else {
1071         biasPrimID = "";  // 0-bias
1072     }
1073
1074     std::string scaleShiftLayerName = layer_type_name_ID(layer);
1075     auto scaleShiftPrim = cldnn::scale(
1076         scaleShiftLayerName,
1077         inputPrimitives[0],
1078         scalePrimID,
1079         biasPrimID);
1080
1081     m_env.primitiveIDs[scaleShiftLayerName] = scaleShiftLayerName;
1082     m_topology->add(scaleShiftPrim);
1083     m_env.profilingIDs.push_back(scaleShiftLayerName);
1084 }
1085
1086 void CLDNNGraph::CreateProposalPrimitive(InferenceEngine::CNNLayerPtr & layer) {
1087     ValidateLayer(layer, 3);
1088     auto proposalLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1089
1090     float nms_thresh = proposalLayer->GetParamAsFloat("nms_thresh", 0.7f);
1091     int min_size = proposalLayer->GetParamAsInt("min_size", 16);
1092     int feature_stride = proposalLayer->GetParamAsInt("feat_stride", 16);
1093     int pre_nms_topn = proposalLayer->GetParamAsInt("pre_nms_topn", 6000);
1094     int post_nms_topn = proposalLayer->GetParamAsInt("post_nms_topn", 300);
1095     const std::vector<float> ratio = proposalLayer->GetParamAsFloats("ratio");
1096     const std::vector<float> scale = proposalLayer->GetParamAsFloats("scale");
1097     float box_coordinate_scale = proposalLayer->GetParamAsFloat("box_coordinate_scale", 1.0f);
1098     float box_size_scale = proposalLayer->GetParamAsFloat("box_size_scale", 1.0f);
1099     int base_size = proposalLayer->GetParamAsInt("base_size", 16);
1100     std::string framework = proposalLayer->GetParamAsString("framework", "");
1101     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1102     bool normalize = layer->GetParamsAsBool("normalize", false);
1103     bool clip_before_nms = layer->GetParamsAsBool("clip_before_nms", true);
1104     bool clip_after_nms = layer->GetParamsAsBool("clip_after_nms", false);
1105
1106     float coordinates_offset;
1107     bool swap_xy;
1108     bool initial_clip;
1109     bool round_ratios;
1110     bool shift_anchors;
1111
1112     if (framework == "tensorflow") {
1113         coordinates_offset = 0.0f;
1114         initial_clip = true;
1115         shift_anchors = true;
1116         round_ratios = false;
1117         swap_xy = true;
1118     } else {
1119         coordinates_offset = 1.0f;
1120         initial_clip = false;
1121         shift_anchors = false;
1122         round_ratios = true;
1123         swap_xy = false;
1124     }
1125
1126     std::string proposalLayerName = layer_type_name_ID(layer);
1127     auto proposalPrim = cldnn::proposal(
1128         proposalLayerName,
1129         inputPrimitives[0],  // cls_score
1130         inputPrimitives[1],  // bbox_pred
1131         inputPrimitives[2],  // im_info
1132         0,                   // max_num_proposals is unused
1133         nms_thresh,
1134         base_size,
1135         min_size,
1136         feature_stride,
1137         pre_nms_topn,
1138         post_nms_topn,
1139         ratio,
1140         scale,
1141         coordinates_offset,
1142         box_coordinate_scale,
1143         box_size_scale,
1144         swap_xy,
1145         initial_clip,
1146         clip_before_nms,
1147         clip_after_nms,
1148         round_ratios,
1149         shift_anchors,
1150         normalize);
1151
1152     m_env.primitiveIDs[proposalLayerName] = proposalLayerName;
1153     m_topology->add(proposalPrim);
1154     m_env.profilingIDs.push_back(proposalLayerName);
1155 }
1156
1157 void CLDNNGraph::CreatePReLUPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1158     ValidateLayer(layer, 1);
1159     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1160     auto preluLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1161
1162     std::string preluLayerName = layer_type_name_ID(layer);
1163     auto inDataPtr = preluLayer->insData[0].lock();
1164     if (!inDataPtr) {
1165         THROW_CLDNN_EXCEPTION("Data inserted into PreLu " << preluLayer->name << " is nullptr");
1166     }
1167     auto inputDims = inDataPtr->dims;
1168     static const std::string blobName("weights");
1169     ValidateGenericLayerBlobs(preluLayer, { blobName });
1170
1171     bool channel_shared = preluLayer->GetParamsAsBool("channel_shared", false);
1172
1173     auto slopeBlob = preluLayer->blobs.at(blobName);
1174     if (channel_shared) {
1175         if (slopeBlob->dims()[0] != 1) {
1176             THROW_CLDNN_EXCEPTION("PReLU slope blob with wrong dimensions in " << preluLayer->name);
1177         }
1178         float slope(0.0f);
1179         switch (slopeBlob->precision()) {
1180         case InferenceEngine::Precision::FP32:
1181             slope = *static_cast<const float *>(slopeBlob->buffer());
1182             break;
1183         case InferenceEngine::Precision::FP16:
1184         {
1185             cldnn_status status = CLDNN_SUCCESS;
1186             slope = cldnn_half_to_float(*static_cast<const uint16_t *>(slopeBlob->buffer()), &status);
1187             if (status != CLDNN_SUCCESS) {
1188                 THROW_CLDNN_EXCEPTION("Error converting fp16 value in " << preluLayer->name);
1189             }
1190         }
1191             break;
1192         default: THROW_CLDNN_EXCEPTION("Invalid PReLU slope blob precision in " << preluLayer->name);
1193         }
1194         m_topology->add(cldnn::activation(preluLayerName, inputPrimitives[0], activation_relu_negative_slope, { slope, 0.f }));
1195     } else {
1196         CreateGenericLayerBlobPrimitives(preluLayer);
1197         cldnn::primitive_id slopePrimID(preluLayerName + "_" + blobName + m_weightsTag);
1198         m_topology->add(cldnn::activation(preluLayerName, inputPrimitives[0], slopePrimID, activation_relu_negative_slope));
1199     }
1200
1201     m_env.primitiveIDs[preluLayerName] = preluLayerName;
1202     m_env.profilingIDs.push_back(preluLayerName);
1203 }
1204
1205 void CLDNNGraph::CreateBatchNormalizationPrimitive(InferenceEngine::CNNLayerPtr & layer) {
1206     ValidateLayer(layer, 1);
1207     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1208     std::string bnLayerName = layer_type_name_ID(layer);
1209
1210     auto bnLayer = dynamic_cast<InferenceEngine::BatchNormalizationLayer *> (layer.get());
1211     cldnn::primitive_id weightID = bnLayerName + "_" + m_scalesTag;
1212     cldnn::primitive_id biasID = bnLayerName + "_" + m_biasesTag;
1213
1214 #define _SCALE_BN_OPT
1215 #ifdef _SCALE_BN_OPT
1216     // Using scale as an optimization (1 mad instead of mad+rsq)
1217     // create new blobs for scale shift
1218     CreateScaleWeightsAndBiasesFromBN(bnLayer, weightID, biasID);
1219     auto scalePrim = cldnn::scale(bnLayerName, inputPrimitives[0], weightID, biasID);
1220
1221     m_env.primitiveIDs[bnLayerName] = bnLayerName;
1222     m_topology->add(scalePrim);
1223     m_env.profilingIDs.push_back(bnLayerName);
1224     return;
1225 #endif  // _SCALE_BN_OPT
1226
1227     cldnn::tensor blobTensor(0);
1228     switch (bnLayer->outData[0]->dims.size()) {
1229     case 2:
1230         blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[0]));
1231         break;
1232     case 4:
1233         blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[2]));
1234         break;
1235     default:
1236         THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
1237     }
1238     cldnn::layout blobLayout(
1239         DataTypeFromPrecision(layer->precision),
1240         m_defaultFormat,
1241         blobTensor);
1242
1243     // Create variance primitive
1244     cldnn::primitive_id varianceID = bnLayerName + "_" + m_weightsTag;
1245     CreatePrimitiveFromBlob(varianceID, bnLayer->_weights, blobLayout);
1246
1247     // Create mean primitive
1248     cldnn::primitive_id meanID = bnLayerName + "_" + m_biasesTag;
1249     CreatePrimitiveFromBlob(meanID, bnLayer->_biases, blobLayout);
1250
1251     auto bnPrim = cldnn::batch_norm(
1252         bnLayerName,
1253         inputPrimitives[0],
1254         meanID,
1255         varianceID,
1256         bnLayer->epsilon);
1257
1258     m_env.primitiveIDs[bnLayerName] = bnLayerName;
1259     m_topology->add(bnPrim);
1260     m_env.profilingIDs.push_back(bnLayerName);
1261 }
1262
1263 void CLDNNGraph::CreateFlattenPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1264     ValidateLayer(layer, 1);
1265     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1266     auto flattenLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1267     std::string flattenLayerName = layer_type_name_ID(layer);
1268
1269     auto flattenPrim = cldnn::reshape(
1270         flattenLayerName,
1271         inputPrimitives[0],
1272         CldnnTensorFromIEDims(flattenLayer->outData[0]->dims));
1273
1274     m_env.primitiveIDs[flattenLayerName] = flattenLayerName;
1275     m_topology->add(flattenPrim);
1276     m_env.profilingIDs.push_back(flattenLayerName);
1277 }
1278
1279 void CLDNNGraph::CreatePermutePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1280     ValidateLayer(layer, 1);
1281     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1282     auto permuteLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1283     std::vector<uint16_t> ie_order;
1284     for (auto& a : permuteLayer->GetParamAsInts("order"))
1285         ie_order.push_back(static_cast<uint16_t>(a));
1286
1287     // if order size is less than 4 - fill the rest with just copy
1288     for (auto o = ie_order.size(); o < 4; o++)
1289         ie_order.push_back((uint16_t)o);
1290
1291     /*
1292         Because ofthe cldnn ordering: bfxy, and IE ordering: bfyx
1293         wee need to adjust the permute order.
1294     */
1295     std::vector<uint16_t> cldnn_permute_order;
1296     // 1. Switch permute order values (x and y)
1297     for (auto const& o : ie_order) {
1298         if (o == 2)
1299             cldnn_permute_order.push_back(3);
1300         else if (o == 3)
1301             cldnn_permute_order.push_back(2);
1302         else
1303             cldnn_permute_order.push_back(o);
1304     }
1305     // 2. Swap x and y positions
1306     std::swap(cldnn_permute_order[2], cldnn_permute_order[3]);
1307
1308     std::string permuteLayerName = layer_type_name_ID(layer);
1309
1310     auto permutePrim = cldnn::permute(
1311         permuteLayerName,
1312         inputPrimitives[0],
1313         cldnn_permute_order);
1314
1315     m_env.primitiveIDs[permuteLayerName] = permuteLayerName;
1316     m_topology->add(permutePrim);
1317     m_env.profilingIDs.push_back(permuteLayerName);
1318 }
1319
1320 void CLDNNGraph::CreateReshapePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1321     ValidateLayer(layer, 1);
1322     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1323     auto reshapeLayer = dynamic_cast<InferenceEngine::ReshapeLayer*> (layer.get());
1324     IE_ASSERT(reshapeLayer->outData.size());
1325     std::string reshapeLayerName = layer_type_name_ID(layer);
1326
1327     auto reshapePrim = cldnn::reshape(
1328         reshapeLayerName,
1329         inputPrimitives[0],
1330         CldnnTensorFromIEDims(reshapeLayer->outData[0]->dims));
1331
1332     m_env.primitiveIDs[reshapeLayerName] = reshapeLayerName;
1333     m_topology->add(reshapePrim);
1334     m_env.profilingIDs.push_back(reshapeLayerName);
1335 }
1336
1337 void CLDNNGraph::CreateNormalizePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1338     ValidateLayer(layer, 1);
1339     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1340     auto normLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1341     ValidateGenericLayerBlobs(normLayer, { "weights" });
1342     CreateGenericLayerBlobPrimitives(normLayer);
1343
1344     // params
1345     bool across_spatial = normLayer->GetParamsAsBool("across_spatial", true);
1346     float eps = normLayer->GetParamAsFloat("eps", 0.0f);
1347
1348     // WA for MO outputting %.6f
1349     if (eps == 0.0f) {
1350         eps = 1e-10f;
1351     }
1352
1353     std::string normLayerName = layer_type_name_ID(layer);
1354     auto normPrim = cldnn::normalize(
1355         normLayerName,
1356         inputPrimitives[0],
1357         normLayerName + "_weights" + m_weightsTag,
1358         across_spatial,
1359         eps);
1360
1361     m_env.primitiveIDs[normLayerName] = normLayerName;
1362     m_topology->add(normPrim);
1363     m_env.profilingIDs.push_back(normLayerName);
1364 }
1365
1366 void CLDNNGraph::CreateDetectionOutputPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1367     ValidateLayer(layer, 3);
1368     auto detectionLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1369
1370     uint32_t num_classes            = detectionLayer->GetParamAsUInt("num_classes", 1);
1371     bool share_location             = detectionLayer->GetParamsAsBool("share_location", true);
1372     int background_label_id         = detectionLayer->GetParamAsInt("background_label_id", 0);
1373     float nms_threshold             = detectionLayer->GetParamAsFloat("nms_threshold", 0.3f);
1374     int top_k                       = detectionLayer->GetParamAsInt("top_k", -1);
1375     float confidence_threshold      = detectionLayer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
1376     float eta                       = detectionLayer->GetParamAsFloat("eta", 1.0f);
1377     int keep_top_k                  = detectionLayer->GetParamAsInt("keep_top_k", -1);
1378     bool variance_encoded_in_target = detectionLayer->GetParamsAsBool("variance_encoded_in_target", false);
1379     int input_width                 = detectionLayer->GetParamAsInt("input_width", -1);
1380     int input_height                = detectionLayer->GetParamAsInt("input_height", -1);
1381     bool normalized                 = detectionLayer->GetParamsAsBool("normalized", true);
1382     std::string code_type           = detectionLayer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
1383     bool clip_before_nms            = detectionLayer->GetParamsAsBool("clip_before_nms", false) ||
1384                                       detectionLayer->GetParamsAsBool("clip", false);  // For backward compatibility
1385     bool clip_after_nms             = detectionLayer->GetParamsAsBool("clip_after_nms", false);
1386     bool decrease_label_id          = detectionLayer->GetParamsAsBool("decrease_label_id", false);
1387
1388     cldnn::prior_box_code_type cldnnCodeType = PriorBoxCodeFromString(code_type);
1389     int32_t prior_info_size = normalized != 0 ? 4 : 5;
1390     int32_t prior_coordinates_offset = normalized != 0 ? 0 : 1;
1391
1392     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1393     std::string detectionLayerName = layer_type_name_ID(layer);
1394     auto detectionPrim = cldnn::detection_output(detectionLayerName,
1395                                                  inputPrimitives[0],
1396                                                  inputPrimitives[1],
1397                                                  inputPrimitives[2],
1398                                                  num_classes,
1399                                                  keep_top_k,
1400                                                  share_location,
1401                                                  background_label_id,
1402                                                  nms_threshold,
1403                                                  top_k,
1404                                                  eta,
1405                                                  cldnnCodeType,
1406                                                  variance_encoded_in_target,
1407                                                  confidence_threshold,
1408                                                  prior_info_size,
1409                                                  prior_coordinates_offset,
1410                                                  normalized,
1411                                                  input_width,
1412                                                  input_height,
1413                                                  decrease_label_id,
1414                                                  clip_before_nms,
1415                                                  clip_after_nms);
1416
1417     m_env.primitiveIDs[detectionLayerName] = detectionLayerName;
1418     m_topology->add(detectionPrim);
1419     m_env.profilingIDs.push_back(detectionLayerName);
1420 }
1421
1422 void CLDNNGraph::CreatePriorBoxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1423     ValidateLayer(layer, 2);
1424     auto priorBoxLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1425
1426     // params
1427     std::vector<float> min_size = priorBoxLayer->GetParamAsFloats("min_size");
1428     std::vector<float> max_size = priorBoxLayer->GetParamAsFloats("max_size", {});
1429     std::vector<float> aspect_ratio = priorBoxLayer->GetParamAsFloats("aspect_ratio", {});
1430     std::vector<float> variance = priorBoxLayer->GetParamAsFloats("variance");
1431     bool flip = priorBoxLayer->GetParamsAsBool("flip", true);
1432     bool clip = priorBoxLayer->GetParamsAsBool("clip", false);
1433     bool scale_all_sizes = priorBoxLayer->GetParamsAsBool("scale_all_sizes", true);
1434     float offset = priorBoxLayer->GetParamAsFloat("offset", 0.5f);
1435
1436     auto step_w = priorBoxLayer->GetParamAsFloat("step_w", 0.0f);
1437     auto step_h = priorBoxLayer->GetParamAsFloat("step_h", 0.0f);
1438     auto step   = priorBoxLayer->GetParamAsFloat("step", 0.0f);
1439
1440     float _step_w = 0.0f;
1441     float _step_h = 0.0f;
1442     if (HasParam(priorBoxLayer->params, "step_w") && step_w != 0.0f &&
1443         HasParam(priorBoxLayer->params, "step_h") && step_h != 0.0f) {
1444         _step_w = step_w;
1445         _step_h = step_h;
1446     } else if (HasParam(priorBoxLayer->params, "step") && step != 0.0f) {
1447         _step_w = step;
1448         _step_h = step;
1449     }
1450
1451     int img = priorBoxLayer->GetParamAsInt("img_size", 0);
1452     int img_w = priorBoxLayer->GetParamAsInt("img_w", 0);
1453     int img_h = priorBoxLayer->GetParamAsInt("img_h", 0);
1454     if ((img != 0) || (img_w != 0) || (img_h != 0)) {
1455         // unsupported mode
1456         THROW_CLDNN_EXCEPTION("Unsupported image sizes in prior box " + layer->name + " (use an image blob instead of dimensions)");
1457     }
1458
1459     IE_ASSERT(layer->insData[1].lock());
1460     auto img_dims = layer->insData[1].lock()->dims;
1461     cldnn::tensor img_size = cldnn::spatial(TensorValue(img_dims[0]), TensorValue(img_dims[1]));
1462     std::vector<cldnn::primitive_id> inputPrimitives = GetPrevLayersPrimitives(layer);
1463     // second input isn't used by value - only dimensions taken from the layer input
1464
1465     if (_step_w == 0.0f || _step_h == 0.0f) {
1466         _step_w = static_cast<float>(img_w) / static_cast<float>(img_dims[0]);
1467         _step_h = static_cast<float>(img_h) / static_cast<float>(img_dims[1]);
1468     }
1469
1470     std::string priorBoxLayerName = layer_type_name_ID(layer);
1471     auto priorBoxPrim = cldnn::prior_box(
1472         priorBoxLayerName,
1473         inputPrimitives[0],
1474         img_size,
1475         min_size,
1476         max_size,
1477         aspect_ratio,
1478         flip,
1479         clip,
1480         variance,
1481         _step_w,
1482         _step_h,
1483         offset,
1484         scale_all_sizes);
1485
1486     m_env.primitiveIDs[priorBoxLayerName] = priorBoxLayerName;
1487     m_topology->add(priorBoxPrim);
1488     m_env.profilingIDs.push_back(priorBoxLayerName);
1489 }
1490
1491 void CLDNNGraph::CreateDeconvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1492     ValidateLayer(layer, 1);
1493     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1494     auto deconvLayer = dynamic_cast<InferenceEngine::DeconvolutionLayer *> (layer.get());
1495
1496     if (deconvLayer->_dilation[X_AXIS] != 1 || deconvLayer->_dilation[Y_AXIS] != 1) {
1497         THROW_CLDNN_EXCEPTION("Unsupported dilation in deconvolution " << layer->name);
1498     }
1499
1500     std::vector<cldnn::primitive_id> weightPrimID;
1501     std::vector<cldnn::primitive_id> biasPrimID;
1502     CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID);
1503     auto allPads = getPaddings(*deconvLayer);
1504     cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
1505         cldnn::spatial(deconvLayer->_stride[X_AXIS], deconvLayer->_stride[Y_AXIS]));
1506     cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
1507         cldnn::spatial(-allPads.begin[X_AXIS], -allPads.begin[Y_AXIS]));
1508
1509     std::string deconvLayerName = layer_type_name_ID(layer);
1510
1511     if (deconvLayer->_group >= 16) {
1512         auto deconvPrim = cldnn::deconvolution(deconvLayerName,
1513             inputPrimitives[0],
1514             weightPrimID,
1515             biasPrimID,
1516             deconvLayer->_group,
1517             stride,
1518             padding,
1519             false,
1520             0.0f,
1521             CldnnTensorFromIEDims(deconvLayer->outData[0]->dims));
1522         m_topology->add(deconvPrim);
1523     } else {
1524         auto deconvPrim = cldnn::deconvolution(deconvLayerName,
1525             inputPrimitives[0],
1526             weightPrimID,
1527             biasPrimID,
1528             stride,
1529             padding,
1530             false,
1531             0.0f,
1532             CldnnTensorFromIEDims(deconvLayer->outData[0]->dims));
1533         m_topology->add(deconvPrim);
1534     }
1535     m_env.primitiveIDs[deconvLayerName] = deconvLayerName;
1536     m_env.profilingIDs.push_back(deconvLayerName);
1537 }
1538
1539 void CLDNNGraph::CreateCropPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1540     if (layer->insData.size() != 1 && layer->insData.size() != 2) {
1541         THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name);
1542     }
1543     if (layer->_fusedWith) {
1544         THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
1545     }
1546     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1547     auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*> (layer.get());
1548     IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size());
1549     // IE_ASSERT(cropLayer->outData[0] && cropLayer->outData[0]->dims.size() == 4);
1550
1551     std::vector<cldnn::tensor::value_type> offset{ 0, 0, 0, 0 };
1552     for (size_t i = 0; i < cropLayer->axis.size(); i++) {
1553         if (cropLayer->axis[i] < 0 || cropLayer->axis[i] > 3) {
1554             THROW_CLDNN_EXCEPTION("Invalid crop axis: " + std::to_string(cropLayer->axis[i]) + " in layer " + cropLayer->name);
1555         }
1556         offset[cropLayer->axis[i]] = cropLayer->offset[i];
1557     }
1558     auto outputDims = cropLayer->outData[0]->dims;
1559     size_t ods = outputDims.size();
1560     cldnn::tensor refSize(
1561         TensorValue(ods > 3 ? outputDims[3] : 1),
1562         TensorValue(ods > 2 ? outputDims[2] : 1),
1563         TensorValue(outputDims[0]),
1564         TensorValue(outputDims[1]));
1565
1566     cldnn::tensor offSize(
1567         TensorValue(offset[0]),
1568         TensorValue(offset[1]),
1569         TensorValue(offset[3]),
1570         TensorValue(offset[2]));
1571
1572     std::string cropLayerName = layer_type_name_ID(layer);
1573     auto cropPrim = cldnn::crop(
1574         cropLayerName,
1575         inputPrimitives[0],
1576         refSize,
1577         offSize);
1578     m_env.primitiveIDs[cropLayerName] = cropLayerName;
1579     m_topology->add(cropPrim);
1580     m_env.profilingIDs.push_back(cropLayerName);
1581 }
1582
1583 void CLDNNGraph::CreateROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1584     ValidateLayer(layer, 2);
1585     auto roiPoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1586
1587     // params
1588     int pooled_width = roiPoolingLayer->GetParamAsInt("pooled_w", 0);
1589     int pooled_height = roiPoolingLayer->GetParamAsInt("pooled_h", 0);
1590     float spatial_scale = roiPoolingLayer->GetParamAsFloat("spatial_scale", 1.0f);
1591     std::string method = roiPoolingLayer->GetParamAsString("method", "max");
1592     bool position_sensitive = false;
1593
1594     cldnn::pooling_mode mode = cldnn::pooling_mode::max;
1595     if (method == "bilinear") {
1596         mode = cldnn::pooling_mode::bilinear;
1597     }
1598     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1599
1600     std::string roiPoolingLayerName = layer_type_name_ID(layer);
1601     auto roiPoolingPrim = cldnn::roi_pooling(roiPoolingLayerName,
1602                                              inputPrimitives[0],  // input data
1603                                              inputPrimitives[1],  // input rois
1604                                              mode,
1605                                              position_sensitive,
1606                                              pooled_width,
1607                                              pooled_height,
1608                                              spatial_scale);
1609     m_env.primitiveIDs[roiPoolingLayerName] = roiPoolingLayerName;
1610     m_topology->add(roiPoolingPrim);
1611     m_env.profilingIDs.push_back(roiPoolingLayerName);
1612 }
1613
1614 void CLDNNGraph::CreatePSROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1615     ValidateLayer(layer, 2);
1616     auto psROIPoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1617
1618     // params
1619     int group_size = psROIPoolingLayer->GetParamAsInt("group_size");
1620     int output_dim = psROIPoolingLayer->GetParamAsInt("output_dim");
1621     float spatial_scale = psROIPoolingLayer->GetParamAsFloat("spatial_scale");
1622     size_t spatial_bins_x = static_cast<size_t>(psROIPoolingLayer->GetParamAsInt("spatial_bins_x", 1));
1623     size_t spatial_bins_y = static_cast<size_t>(psROIPoolingLayer->GetParamAsInt("spatial_bins_y", 1));
1624     std::string mode_str = psROIPoolingLayer->GetParamAsString("mode", "average");
1625     bool position_sensitive = true;
1626
1627     cldnn::pooling_mode mode = mode_str == "average" ? cldnn::pooling_mode::average
1628                                                      : cldnn::pooling_mode::bilinear;
1629
1630     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1631
1632     std::string psROIPoolingLayerName = layer_type_name_ID(layer);
1633     auto psROIPoolingPrim = cldnn::roi_pooling(psROIPoolingLayerName,
1634                                                inputPrimitives[0],  // input data
1635                                                inputPrimitives[1],  // input rois
1636                                                mode,
1637                                                position_sensitive,
1638                                                group_size,
1639                                                group_size,
1640                                                spatial_scale,
1641                                                output_dim,
1642                                                spatial_bins_x,
1643                                                spatial_bins_y);
1644
1645     m_env.primitiveIDs[psROIPoolingLayerName] = psROIPoolingLayerName;
1646     m_topology->add(psROIPoolingPrim);
1647     m_env.profilingIDs.push_back(psROIPoolingLayerName);
1648 }
1649
1650 void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer, CLDNNCustomLayerPtr customLayer) {
1651     ValidateLayer(layer, 0);
1652     // todo: handling fusing
1653     auto genericLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1654     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1655
1656     // Handle defines
1657     std::string layerDefines;
1658     for (const auto& def : customLayer->Defines()) {
1659         std::string singleDefine("#define " + def.name + " " + def.prefix);
1660         if (genericLayer->params.find(def.param) != genericLayer->params.end()) {
1661             singleDefine += genericLayer->params.at(def.param);
1662         } else {
1663             singleDefine += def.default_value;
1664         }
1665         singleDefine += def.postfix + "\n";
1666         layerDefines.append(singleDefine);
1667     }
1668
1669     // reserve
1670     std::vector<cldnn::primitive_id> reorderedInputs;
1671     reorderedInputs.resize(inputPrimitives.size());
1672
1673     // Handle Blobs
1674     std::map<std::string, size_t> blobIndex;
1675     for (auto& blob : genericLayer->blobs) {
1676         // create primitive from blob (always 1d)
1677         cldnn::primitive_id blobId = genericLayer->name + "_" + blob.first;
1678         if (blob.second->dims().size() != 1) {
1679             THROW_CLDNN_EXCEPTION("Invalid dimensions for blob " << blob.first << " in layer " << genericLayer->name);
1680         }
1681         CreatePrimitiveFromBlob(blobId, blob.second, cldnn::layout(
1682             DataTypeFromPrecision(blob.second->precision()),
1683             m_defaultFormat,
1684             cldnn::tensor(1, 1, TensorValue(blob.second->dims()[0]), 1)));
1685         // save index in blobIndex
1686         blobIndex[blob.first] = reorderedInputs.size();
1687         // add to reorderedInputs
1688         reorderedInputs.push_back(blobId);
1689     }
1690
1691     // Handle kernel parameters
1692     std::vector<cldnn_arg> kernelParameters;
1693     cldnn::format outputFormat(cldnn::format::any);
1694     for (const auto& param : customLayer->KernelParams()) {
1695         switch (param.type) {
1696         case CLDNNCustomLayer::ParamType::Input: {
1697             kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1698             kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input;
1699             kernelParameters[param.paramIndex].index = static_cast<cldnn_arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
1700
1701             // Handle input reorder
1702             if (param.portIndex < inputPrimitives.size() && reorderedInputs[param.portIndex].empty()) {
1703                 // todo: add support for multiple reorders of the same input? (read as bfyx for one arg and yxfb for another)
1704                 if (param.format != cldnn::format::any) {
1705                     auto reorderPrimName = inputPrimitives[param.portIndex] + "_" + layer->name + m_preCustomLayerTag;
1706                     auto preprocessPrim = cldnn::reorder(
1707                         reorderPrimName,
1708                         inputPrimitives[param.portIndex],
1709                         param.format,
1710                         DataTypeFromPrecision(layer->precision));
1711                     m_topology->add(preprocessPrim);
1712                     m_env.profilingIDs.push_back(reorderPrimName);
1713                     InitProfileInfo(reorderPrimName, "Reorder");
1714                     reorderedInputs[param.portIndex] = (reorderPrimName);
1715                 } else {
1716                     reorderedInputs[param.portIndex] = inputPrimitives[param.portIndex];
1717                 }
1718             }
1719         }
1720             break;
1721         case CLDNNCustomLayer::ParamType::Output: {
1722             kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1723             kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_output;
1724             kernelParameters[param.paramIndex].index =
1725                 static_cast<cldnn_arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
1726             outputFormat = param.format;
1727         }
1728             break;
1729         case CLDNNCustomLayer::ParamType::Data: {
1730             kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1731             kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input;
1732             kernelParameters[param.paramIndex].index =
1733                 static_cast<cldnn_arg_index>((blobIndex.find(param.blobName) == blobIndex.end()) ? -1 : blobIndex.at(param.blobName));
1734         }
1735             break;
1736         default:
1737             THROW_CLDNN_EXCEPTION("Invalid custom layer param type: " << param.type << " in layer: " << genericLayer->name);
1738         }
1739     }
1740     const std::string layerTitle("\n// Layer " + layer->name + " using Custom Layer " + customLayer->Name() + "\n");
1741     const std::string defineTitle("// Custom Layer User Defines\n");
1742
1743     auto dims = genericLayer->outData[0]->dims;
1744     std::reverse(dims.begin(), dims.end());
1745
1746     size_t N = (dims.size() > 0) ? dims[0] : 1;
1747     size_t C = (dims.size() > 1) ? dims[1] : 1;
1748     size_t H = (dims.size() > 2) ? dims[2] : 1;
1749     size_t W = (dims.size() > 3) ? dims[3] : 1;
1750     cldnn::tensor outputTensor = cldnn::tensor(cldnn::batch(N), cldnn::feature(C), cldnn::spatial(W, H));
1751
1752     cldnn::layout outputLayout = cldnn::layout(DataTypeFromPrecision(genericLayer->precision), outputFormat, outputTensor);
1753
1754     // evaluate work sizes rules
1755     std::vector<size_t> gws, lws;
1756
1757     // assume output tensor is dimension source by default
1758     int batchDim = outputTensor.batch[0];
1759     int featureDim = outputTensor.feature[0];
1760     int yDim = outputTensor.spatial[1];
1761     int xDim = outputTensor.spatial[0];
1762     int iidx = customLayer->InputDimSourceIndex();
1763
1764     std::string genericLayerName = layer_type_name_ID(layer);
1765     // if input index is greater than -1, take dimension from input
1766     if (iidx >= 0) {
1767         if (iidx >= genericLayer->insData.size())
1768             THROW_CLDNN_EXCEPTION("Invalid input tensor for index: " << iidx);
1769         // get dimensions from one of the input tensors
1770         auto inDataPtr = genericLayer->insData[iidx].lock();
1771         if (!inDataPtr) {
1772             THROW_CLDNN_EXCEPTION("Data inserted into generic layer " << genericLayer->name << " is nullptr");
1773         }
1774         auto inputDims = inDataPtr->dims;
1775
1776         batchDim = featureDim = yDim = 0;
1777         xDim = inputDims[0];
1778
1779         if (dims.size() > 1)
1780             yDim = inputDims[1];
1781         if (dims.size() > 2)
1782             featureDim = inputDims[2];
1783         if (dims.size() > 3)
1784             batchDim = inputDims[3];
1785     }
1786     const std::map<char, int> vars = {
1787         { 'b', batchDim }  , { 'B', batchDim },
1788         { 'f', featureDim }, { 'F', featureDim },
1789         { 'y', yDim },       { 'Y', yDim },
1790         { 'x', xDim },       { 'X', xDim },
1791     };
1792     for (auto rule : customLayer->GlobalSizeRules()) {
1793         SimpleMathExpression expr;
1794         expr.SetVariables(vars);
1795         expr.SetExpression(rule);
1796         gws.push_back(expr.Evaluate());
1797     }
1798     for (auto rule : customLayer->LocalSizeRules()) {
1799         SimpleMathExpression expr;
1800         expr.SetVariables(vars);
1801         expr.SetExpression(rule);
1802         lws.push_back(expr.Evaluate());
1803     }
1804
1805     auto customPrim = cldnn::custom_gpu_primitive(
1806         genericLayerName,
1807         reorderedInputs,
1808         { layerTitle, defineTitle, layerDefines, customLayer->KernelSource() },
1809         customLayer->KernelEntry(),
1810         kernelParameters,
1811         customLayer->CompilerOptions(),
1812         outputLayout,
1813         gws,
1814         lws);
1815
1816     if (outputLayout.format != cldnn::format::any &&
1817         p_currentOutputs->find(genericLayerName) == p_currentOutputs->end()) {
1818         // Handle output reorder
1819         auto reorderPrimName = genericLayerName + m_postCustomLayerTag;
1820         m_topology->add(
1821             cldnn::reorder(
1822                 reorderPrimName,
1823                 genericLayerName,
1824                 m_defaultFormat,
1825                 customPrim.output_layout.data_type));
1826         m_env.primitiveIDs[genericLayerName] = reorderPrimName;
1827         m_env.primitiveIDs[reorderPrimName] = reorderPrimName;
1828         m_env.profilingIDs.push_back(reorderPrimName);
1829         InitProfileInfo(reorderPrimName, "Reorder");
1830     } else {
1831         m_env.primitiveIDs[genericLayerName] = genericLayerName;
1832     }
1833     m_topology->add(customPrim);
1834     m_env.profilingIDs.push_back(genericLayerName);
1835 }
1836
1837 void CLDNNGraph::CreateSimplerNMSPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1838     ValidateLayer(layer, 3);
1839     IE_ASSERT(layer->insData[0].lock()->dims[3] == 1);  // only handling input batch size 1
1840     IE_ASSERT(layer->insData[1].lock()->dims[3] == 1);  // only handling input batch size 1
1841     auto simpleNMSLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1842
1843     int max_num_proposals = simpleNMSLayer->GetParamAsInt("max_num_proposals");
1844     float iou_threshold = simpleNMSLayer->GetParamAsFloat("iou_threshold", 0.7f);
1845     int min_bbox_size = simpleNMSLayer->GetParamAsInt("min_bbox_size", 16);
1846     int feature_stride = simpleNMSLayer->GetParamAsInt("feat_stride", 16);
1847     int pre_nms_topn = simpleNMSLayer->GetParamAsInt("pre_nms_topn");
1848     int post_nms_topn = simpleNMSLayer->GetParamAsInt("post_nms_topn");
1849     std::vector<float> scale = simpleNMSLayer->GetParamAsFloats("scale");
1850     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1851
1852     std::string simpleNMSLayerName = layer_type_name_ID(layer);
1853     auto simpleNMSPrim = cldnn::proposal(
1854         simpleNMSLayerName,
1855         inputPrimitives[0],  // cls_score
1856         inputPrimitives[1],  // bbox_pred
1857         inputPrimitives[2],  // im_info
1858         max_num_proposals,
1859         iou_threshold,
1860         min_bbox_size,
1861         feature_stride,
1862         pre_nms_topn,
1863         post_nms_topn,
1864         { 0.5f, 1.0f, 2.0f },  // ratios for the SimplerNMS variant
1865         scale);
1866
1867     m_env.primitiveIDs[simpleNMSLayerName] = simpleNMSLayerName;
1868     m_topology->add(simpleNMSPrim);
1869     m_env.profilingIDs.push_back(simpleNMSLayerName);
1870 }
1871
1872 void CLDNNGraph::CreateEltwisePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1873     ValidateEltwiseLayer(layer);
1874
1875     auto eltwiseLayer = dynamic_cast<InferenceEngine::EltwiseLayer *> (layer.get());
1876     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1877
1878     std::vector<float> coefficients = eltwiseLayer->coeff;
1879     if (eltwiseLayer->_operation != InferenceEngine::EltwiseLayer::Sum && !coefficients.empty()) {
1880         THROW_IE_EXCEPTION << "Only sum operation supports operands coefficients";
1881     }
1882
1883     if (!coefficients.empty() && coefficients.size() != inputPrimitives.size()) {
1884         THROW_IE_EXCEPTION << "Number of provided coefficients is not equal to number of operands";
1885     }
1886
1887     std::string eltwiseLayerName = layer_type_name_ID(layer);
1888     auto eltwisePrim = cldnn::eltwise(
1889         eltwiseLayerName,
1890         inputPrimitives,
1891         EltwiseModeFromIEEltwise(eltwiseLayer->_operation),
1892         coefficients);
1893     m_env.primitiveIDs[eltwiseLayerName] = eltwiseLayerName;
1894     m_topology->add(eltwisePrim);
1895     m_env.profilingIDs.push_back(eltwiseLayerName);
1896 }
1897
1898 void CLDNNGraph::CreateConcatenatePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1899     ValidateLayer(layer, 0);
1900     auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
1901     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1902     std::string concatLayerName = layer_type_name_ID(layer);
1903     auto concatPrim = cldnn::concatenation(
1904         concatLayerName,
1905         inputPrimitives,
1906         ConcatAxisFromIEAxis(concatLayer->_axis));
1907     m_env.primitiveIDs[concatLayerName] = concatLayerName;
1908     m_topology->add(concatPrim);
1909     m_env.profilingIDs.push_back(concatLayerName);
1910 }
1911
1912 void CLDNNGraph::CreateSplitPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1913     ValidateLayer(layer, 1);
1914     auto splitLayer = dynamic_cast<InferenceEngine::SplitLayer *> (layer.get());
1915     if (IsValidSplitConvMerge(splitLayer)) {
1916         // AlextNet style split->conv*2->merge
1917         CreateFusedSplitConvMergePrimitive(layer);
1918     } else {
1919 #ifdef _USE_SPLIT_PRIMITIVE
1920         auto inputPrimitives = GetPrevLayersPrimitives(layer);
1921         auto inputDims = splitLayer->insData[0].lock()->dims;
1922         InferenceEngine::SizeVector startOffset(inputDims.size());
1923         std::vector<std::pair<cldnn::primitive_id, cldnn::tensor>> outputOffsets;
1924 std::cout << "Splitting layer: " << layer->name << "\n\tSize:" << CldnnTensorFromIEDims(inputDims) << std::endl;
1925         for (auto& outLayer : splitLayer->outData) {
1926             if (outLayer->dims.size() != startOffset.size()) {
1927                 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1928             }
1929             for (size_t i = 0; i < inputDims.size(); i++) {
1930                 if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) {
1931                     THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1932                 }
1933             }
1934             auto outTensor = CldnnTensorFromIEDims(outLayer->dims);
1935             auto cropPrim = cldnn::crop(outLayer->name, inputPrimitives[0], outTensor, CldnnTensorFromIEDims(startOffset));
1936             m_topology->add(cropPrim);
1937             m_env.primitiveIDs[outLayer->name] = outLayer->name;
1938             m_env.profilingIDs.push_back(outLayer->name);
1939             outputOffsets.push_back({ outLayer->name, CldnnTensorFromIEDims(startOffset) });
1940             for (size_t i = 0; i < inputDims.size(); i++) {
1941                 if (outLayer->dims[i] != inputDims[i]) {
1942                     startOffset[i] += outLayer->dims[i];
1943                 }
1944             }
1945         }
1946
1947         auto splitPrim = cldnn::split(
1948             splitLayer->name,
1949             inputPrimitives[0],
1950             outputOffsets);
1951         m_topology->add(splitPrim);
1952
1953
1954         // set split as not_run
1955         InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);  // Mark this layer as optimized out
1956
1957 #else  // _USE_SPLIT_PRIMITIVE
1958         // TODO: replace with clDNN split when it's implemented
1959         auto inputPrimitives = GetPrevLayersPrimitives(layer);
1960         auto inDataPtr = splitLayer->insData[0].lock();
1961         if (!inDataPtr) {
1962             THROW_CLDNN_EXCEPTION("Data inserts into split layer " << splitLayer->name << " is nullptr");
1963         }
1964         auto inputDims = inDataPtr->dims;
1965         InferenceEngine::SizeVector startOffset(inputDims.size());
1966
1967         auto TensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def) {
1968             switch (dims.size()) {
1969             case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def));
1970             case 2: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(def, def));
1971             case 3: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(def, dims[2]));
1972             case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
1973             default: THROW_CLDNN_EXCEPTION("Invalid dimensions size(" << dims.size() << ") in split layer");
1974             }
1975         };
1976
1977         for (auto& outLayer : splitLayer->outData) {
1978             std::string outLayerName = splitLayer->type + ":" + outLayer->name;
1979             if (outLayer->dims.size() != startOffset.size()) {
1980                 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1981             }
1982             for (size_t i = 0; i < inputDims.size(); i++) {
1983                 if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) {
1984                     THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1985                 }
1986             }
1987             SizeVector reverseDims = outLayer->dims;
1988             std::reverse(reverseDims.begin(), reverseDims.end());
1989             auto outTensor = TensorFromIEDims(reverseDims, 1);
1990
1991             SizeVector reverseOffset = startOffset;
1992             std::reverse(reverseOffset.begin(), reverseOffset.end());
1993             auto offsetTensor = TensorFromIEDims(reverseOffset, 0);
1994
1995             auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor);
1996             m_env.primitiveIDs[outLayerName] = outLayerName;
1997             m_topology->add(cropPrim);
1998             m_env.profilingIDs.push_back(outLayerName);
1999             InitProfileInfo(outLayerName, "Crop");
2000
2001             for (size_t i = 0; i < inputDims.size(); i++) {
2002                 if (outLayer->dims[i] != inputDims[i]) {
2003                     startOffset[i] += outLayer->dims[i];
2004                 }
2005             }
2006         }
2007
2008         // set split as not_run
2009         InitProfileInfo(layer->name, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);  // Mark this layer as optimized out
2010 #endif  // _USE_SPLIT_PRIMITIVE
2011     }
2012 }
2013
2014 void CLDNNGraph::CreateFusedSplitConvMergePrimitive(InferenceEngine::CNNLayerPtr &layer) {
2015     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2016     // only handle the split->conv->merge topology for now
2017     auto splitLayer = dynamic_cast<InferenceEngine::SplitLayer *> (layer.get());
2018     IE_ASSERT(IsValidSplitConvMerge(splitLayer));
2019
2020     auto convLayer1 =
2021         dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[0]).get());
2022     auto convLayer2 =
2023         dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[1]).get());
2024     auto concatLayer =
2025         dynamic_cast<InferenceEngine::ConcatLayer *> (GetNextSingleLayer(
2026             GetNextSingleLayer(splitLayer->outData[0])).get());
2027
2028     if (convLayer1 == nullptr ||
2029         convLayer2 == nullptr ||
2030         concatLayer == nullptr) {
2031         THROW_CLDNN_EXCEPTION("Expected single layer does not exist");
2032     }
2033     // Mark these layers as optimized out
2034     InitProfileInfo(convLayer1->name, convLayer1->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
2035     InitProfileInfo(convLayer2->name, convLayer2->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
2036     InitProfileInfo(concatLayer->name, concatLayer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
2037
2038     // build the split conv primitive
2039     std::vector<cldnn::primitive_id> weightPrimID;
2040     std::vector<cldnn::primitive_id> biasPrimID;
2041     CreateWeightAndBiasPrimitives(GetNextSingleLayer(splitLayer->outData[0]), weightPrimID, biasPrimID);
2042     CreateWeightAndBiasPrimitives(GetNextSingleLayer(splitLayer->outData[1]), weightPrimID, biasPrimID);
2043
2044     auto concatLayerPtr = std::make_shared<InferenceEngine::CNNLayer>(*concatLayer);
2045
2046     cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
2047                                          cldnn::spatial(convLayer1->_stride[X_AXIS], convLayer1->_stride[Y_AXIS]));
2048     auto allPad = getPaddings(*convLayer1);
2049     cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
2050                                           cldnn::spatial(-allPad.begin[X_AXIS], -allPad.begin[Y_AXIS]));
2051     cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
2052                                            cldnn::spatial(convLayer1->_dilation[X_AXIS], convLayer1->_dilation[Y_AXIS]));
2053
2054     std::string splitLayerName = layer_type_name_ID(layer);
2055     auto splitPrim = cldnn::convolution(splitLayerName,
2056                                         inputPrimitives[0],
2057                                         weightPrimID,
2058                                         biasPrimID,
2059                                         stride,
2060                                         padding,
2061                                         dilation,
2062                                         false,
2063                                         0.0f,
2064                                         CldnnTensorFromIEDims(concatLayer->outData[0]->dims));
2065
2066     layer = concatLayerPtr;
2067
2068     m_env.primitiveIDs[splitLayerName]  = splitLayerName;
2069     m_env.primitiveIDs[layer_type_name_ID(convLayer1)]  = splitLayerName;
2070     m_env.primitiveIDs[layer_type_name_ID(convLayer2)]  = splitLayerName;
2071     m_env.primitiveIDs[layer_type_name_ID(concatLayer)] = splitLayerName;  // pair the last merged layer (concat or relu) with
2072                                                                // this primitive name to be used as
2073                                                               // input prim for subsequent layers
2074     m_topology->add(splitPrim);
2075     m_env.profilingIDs.push_back(splitLayerName);
2076 }
2077
2078 void CLDNNGraph::CreatePowerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2079     ValidateLayer(layer, 1);
2080     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2081     auto powerLayer = dynamic_cast<InferenceEngine::PowerLayer *> (layer.get());
2082     if (powerLayer->power != 1.0f && powerLayer->power != 0.5f) {
2083         THROW_CLDNN_EXCEPTION("Power Layer " << layer->name << "uses unsupported power value");
2084     }
2085
2086     std::string powerLayerName = layer_type_name_ID(layer);
2087     if ((powerLayer->scale == 1.0f) && (powerLayer->offset == 0.0f)) {
2088         if (powerLayer->power == 0.5f) {
2089             auto activationPrim = cldnn::activation(powerLayerName, inputPrimitives[0], activation_sqrt);
2090             m_topology->add(activationPrim);
2091             m_env.profilingIDs.push_back(powerLayerName);
2092             m_env.primitiveIDs[powerLayerName] = powerLayerName;
2093         } else {
2094             // skip this layer
2095             m_env.primitiveIDs[powerLayerName] = inputPrimitives[0];  // register the previous primID for this layer too
2096             InitProfileInfo(layer->name, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::NOT_RUN);  // Mark this layer as not run
2097         }
2098     } else {
2099         // create scale primitive
2100         auto scaleValuePrimName = powerLayerName + m_scalesTag;
2101         AddSingleValuePrimitive(scaleValuePrimName,
2102             DataTypeFromPrecision(powerLayer->precision),
2103             powerLayer->scale);
2104
2105         cldnn::primitive_id biasValuePrimName = "";
2106         if (powerLayer->offset != 0.0f) {
2107             biasValuePrimName = powerLayerName + m_biasesTag;
2108             AddSingleValuePrimitive(biasValuePrimName,
2109                 DataTypeFromPrecision(powerLayer->precision),
2110                 powerLayer->offset);
2111         }
2112         auto scalePrim = cldnn::scale(
2113             powerLayerName,
2114             inputPrimitives[0],
2115             scaleValuePrimName,
2116             biasValuePrimName);
2117
2118         m_env.primitiveIDs[powerLayerName] = powerLayerName;
2119         m_topology->add(scalePrim);
2120         m_env.profilingIDs.push_back(powerLayerName);
2121
2122         if (powerLayer->power == 0.5f) {
2123             auto activationPrim = cldnn::activation(powerLayerName+"_sqrt", powerLayerName, activation_sqrt);
2124             m_topology->add(activationPrim);
2125             m_env.profilingIDs.push_back(powerLayerName+"_sqrt");
2126         }
2127     }
2128 }
2129
2130 void CLDNNGraph::CreateSoftMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2131     ValidateLayer(layer, 1);
2132     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2133     auto softmaxLayer = dynamic_cast<InferenceEngine::SoftMaxLayer *> (layer.get());
2134
2135     // additional WA for clDNN FullyConnected output in BX instead of BF
2136     int inputOrder = 0;
2137     auto prevData = layer->insData[0].lock();
2138
2139     if (prevData == nullptr) {
2140         THROW_CLDNN_EXCEPTION("SoftMax: nonexistent input for layer: " << layer->name);
2141     }
2142
2143     auto prevCreator = prevData->creatorLayer.lock();
2144     bool isPrevFC = false;
2145
2146     if (prevCreator && (LayerTypeFromStr(prevCreator->type) == FullyConnected))
2147         isPrevFC = true;
2148     // end of WA
2149
2150     std::string softmaxLayerName = layer_type_name_ID(layer);
2151     auto softmaxPrim = cldnn::softmax(softmaxLayerName, inputPrimitives[0], SoftmaxDimensionFromIEAxis(softmaxLayer, isPrevFC));
2152     m_env.primitiveIDs[softmaxLayerName] = softmaxLayerName;
2153     m_topology->add(softmaxPrim);
2154     m_env.profilingIDs.push_back(softmaxLayerName);
2155 }
2156
2157 void CLDNNGraph::CreateFullyConnectedPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2158     ValidateLayer(layer, 1);
2159     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2160     auto fcLayer = dynamic_cast<InferenceEngine::FullyConnectedLayer *> (layer.get());
2161
2162     std::string fcLayerName = layer_type_name_ID(layer);
2163     // create bias primitive
2164     cldnn::primitive_id biasesPrimID = "";
2165     if (fcLayer->_biases != nullptr) {
2166         biasesPrimID = fcLayerName + m_biasesTag;
2167         CreatePrimitiveFromBlob(biasesPrimID,
2168             fcLayer->_biases,
2169             cldnn::layout(DataTypeFromPrecision(fcLayer->precision), m_defaultFormat,
2170                 cldnn::spatial(TensorValue(fcLayer->_out_num))));
2171     }
2172
2173     // create weights primitive
2174     // gcc bug to resolve auto, at least for 5.4 version
2175     std::shared_ptr<Data> insData0 = fcLayer->insData[0].lock();
2176     IE_ASSERT(insData0 != nullptr);
2177     cldnn::primitive_id weightsPrimID = fcLayerName + m_weightsTag;
2178     cldnn::tensor weightsDims;
2179     switch (insData0->dims.size()) {
2180     case 4:
2181         weightsDims = { TensorValue(fcLayer->outData[0]->dims[0]),
2182                         TensorValue(insData0->dims[2]),
2183                         TensorValue(insData0->dims[0]),
2184                         TensorValue(insData0->dims[1]) };
2185         break;
2186     case 2:
2187         weightsDims = { TensorValue(fcLayer->outData[0]->dims[0]), TensorValue(insData0->dims[0]), 1, 1 };
2188         break;
2189     default: THROW_CLDNN_EXCEPTION("Invalid data dimensions");
2190     }
2191     CreatePrimitiveFromBlob(weightsPrimID,
2192                             fcLayer->_weights,
2193                             cldnn::layout(DataTypeFromPrecision(fcLayer->precision), m_defaultFormat, weightsDims));
2194
2195     auto fcPrim = cldnn::fully_connected(fcLayerName,
2196                                          inputPrimitives[0],
2197                                          weightsPrimID,
2198                                          biasesPrimID,
2199                                          false,
2200                                          0.0f);
2201
2202     m_env.primitiveIDs[fcLayerName] = fcLayerName;
2203     m_topology->add(fcPrim);
2204     m_env.profilingIDs.push_back(fcLayerName);
2205 }
2206
2207 void CLDNNGraph::CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2208     ValidateLayer(layer, 1);
2209     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2210     auto poolLayer = dynamic_cast<InferenceEngine::PoolingLayer *> (layer.get());
2211
2212     std::string poolLayerName = layer_type_name_ID(layer);
2213     auto allPads = getPaddings(*poolLayer);
2214     if (poolLayer->outData.size() > 1) {
2215         // max pooling with argmax
2216         SizeVector argmaxDims;
2217
2218         std::string realOutputID, argmaxOutputID;
2219         int outputOrder = 0;
2220
2221         for (auto out : poolLayer->outData) {
2222             auto layersMap = out->getInputTo();
2223
2224             for (auto item : layersMap) {
2225                 bool isUpooling = (LayerTypeFromStr(item.second->type) == Unpooling);
2226                 if (outputOrder == 1 && isUpooling) {
2227                     argmaxDims = out->dims;
2228                     argmaxOutputID = out->name;
2229                 } else {
2230                     realOutputID = out->name;
2231                 }
2232                 outputOrder++;
2233             }
2234         }
2235
2236         // create mutable_data primitive for storing argmax data
2237         cldnn::tensor mutableTensor;
2238         switch (argmaxDims.size()) {
2239         case 4: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[3]), TensorValue(argmaxDims[2]),
2240             TensorValue(argmaxDims[0]), TensorValue(argmaxDims[1]));
2241             break;
2242         case 3: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[2]), TensorValue(argmaxDims[1]),
2243             1, TensorValue(argmaxDims[0]));
2244             break;
2245         case 2: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[1]), TensorValue(argmaxDims[0]), 1, 1);
2246             break;
2247         case 1:  // not implemented yet.
2248         default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions");
2249         }
2250
2251         cldnn::layout mutableLayout = cldnn::layout(
2252             cldnn::data_types::f32,
2253             m_defaultFormat,
2254             mutableTensor);
2255
2256         cldnn::primitive_id argmaxPrimID = layer->name + "_argmax_mutable";
2257
2258         auto mem = cldnn::memory::allocate(*(m_env.engine), mutableLayout);
2259         auto argmax_mutable_prim = cldnn::mutable_data(argmaxPrimID, mem);
2260         m_topology->add(argmax_mutable_prim);
2261         m_env.primitiveIDs[argmaxPrimID] = argmaxPrimID;
2262         m_env.primitiveIDs[argmaxOutputID] = argmaxPrimID;
2263
2264         // create pooling primitive itself
2265         auto poolPrim = cldnn::pooling(poolLayerName,
2266             inputPrimitives[0],
2267             argmaxPrimID,
2268             cldnn::pooling_mode::max_with_argmax,
2269             cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), TensorValue(poolLayer->_kernel[Y_AXIS])),  // size
2270             cldnn::spatial(TensorValue(poolLayer->_stride[X_AXIS]), TensorValue(poolLayer->_stride[Y_AXIS])),  // stride
2271                                                                                                    // input offset (padding) - explicit tensor for 0 bf
2272             { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]) },
2273             CldnnTensorFromIEDims(poolLayer->outData[0]->dims));
2274         m_topology->add(poolPrim);
2275         m_env.primitiveIDs[realOutputID] = poolLayerName;
2276     } else {
2277         // regular pooling
2278         auto poolPrim = cldnn::pooling(poolLayerName,
2279             inputPrimitives[0],
2280             PoolingModeFromIEPooling(poolLayer->_type, poolLayer->_exclude_pad),
2281             cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), TensorValue(poolLayer->_kernel[Y_AXIS])),  // size
2282             cldnn::spatial(TensorValue(poolLayer->_stride[X_AXIS]), TensorValue(poolLayer->_stride[Y_AXIS])),  // stride
2283                                                                                                    // input offset (padding) - explicit tensor for 0 bf
2284             { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]) },
2285             CldnnTensorFromIEDims(poolLayer->outData[0]->dims));
2286     m_topology->add(poolPrim);
2287         m_env.primitiveIDs[poolLayerName] = poolLayerName;
2288     }
2289
2290     m_env.profilingIDs.push_back(poolLayerName);
2291 }
2292
2293 void CLDNNGraph::CreateLRNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2294     ValidateLayer(layer, 1);
2295     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2296     auto lrnLayer = dynamic_cast<InferenceEngine::NormLayer *> (layer.get());
2297     std::string lrnLayerName = layer_type_name_ID(layer);
2298     auto lrnPrim = cldnn::lrn(
2299         lrnLayerName,
2300         inputPrimitives[0],
2301         lrnLayer->_size,
2302         static_cast<float>(lrnLayer->_k),
2303         lrnLayer->_alpha,
2304         lrnLayer->_beta,
2305         lrnLayer->_isAcrossMaps ? cldnn_lrn_norm_region_across_channel : cldnn_lrn_norm_region_within_channel);
2306
2307     m_env.primitiveIDs[lrnLayerName] = lrnLayerName;
2308     m_topology->add(lrnPrim);
2309     m_env.profilingIDs.push_back(lrnLayerName);
2310 }
2311
2312 void CLDNNGraph::CreateActivationPrimitive(InferenceEngine::CNNLayerPtr &layer, const LayerType type) {
2313     ValidateLayer(layer, 1);
2314     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2315     cldnn_activation_additional_params params{ 0.0f, 0.0f };
2316     cldnn_activation_func func = cldnn_activation_func_t::activation_none;
2317
2318     LayerType activationType;
2319     if (type == Activation) {
2320         std::string activation_type = layer->GetParamAsString("type");
2321         if (activation_type == "tanh") {
2322             activationType = TanH;
2323         } else if (activation_type == "sigmoid" || activation_type == "logistic")  {
2324             activationType = Sigmoid;
2325         } else if (activation_type == "elu")  {
2326             activationType = ELU;
2327         } else if (activation_type == "relu")  {
2328             activationType = ReLU;
2329         } else if (activation_type == "relu6")  {
2330             activationType = ReLU6;
2331         } else if (activation_type == "clamp")  {
2332             activationType = Clamp;
2333         } else if (activation_type == "exp")  {
2334             activationType = Exp;
2335         } else if (activation_type == "not")  {
2336             activationType = Not;
2337         } else {
2338             THROW_CLDNN_EXCEPTION("Unsupported activation type (" + activation_type +
2339                                   ") in layer " + layer->name);
2340         }
2341     } else {
2342         activationType = type;
2343     }
2344
2345     switch (activationType) {
2346     case TanH:
2347     {
2348         func = cldnn_activation_func_t::activation_hyperbolic_tan;
2349         break;
2350     }
2351     case ELU:
2352     {
2353         func = cldnn_activation_func_t::activation_elu;
2354         params.a = layer->GetParamAsFloat("alpha", 1.0f);
2355         break;
2356     }
2357     case Sigmoid:
2358     {
2359         func = cldnn_activation_func_t::activation_logistic;
2360         break;
2361     }
2362     case ReLU:
2363     {
2364         func = cldnn_activation_func_t::activation_relu_negative_slope;
2365         params.a = layer->GetParamAsFloat("negative_slope", 0.0f);
2366         break;
2367     }
2368     case ReLU6:
2369     {
2370         func = cldnn_activation_func_t::activation_clamp;
2371         params.b = layer->GetParamAsFloat("n", 6.0f);
2372         break;
2373     }
2374     case Clamp:
2375     {
2376         func = cldnn_activation_func_t::activation_clamp;
2377         params.a = layer->GetParamAsFloat("min");
2378         params.b = layer->GetParamAsFloat("max");
2379         break;
2380     }
2381     case Exp:
2382     {
2383         func = cldnn_activation_func_t::activation_exp;
2384         break;
2385     }
2386     case Not:
2387     {
2388         func = cldnn_activation_func_t::activation_not;
2389         break;
2390     }
2391     default:
2392         THROW_CLDNN_EXCEPTION("Unsupported activation type (" + layer->type +
2393                               ") in layer " + layer->name);
2394     }
2395
2396     std::string layerName = layer_type_name_ID(layer);
2397     auto activationPrimitive = cldnn::activation(layerName, inputPrimitives[0], func, params);
2398     m_env.primitiveIDs[layerName] = layerName;
2399     m_topology->add(activationPrimitive);
2400     m_env.profilingIDs.push_back(layerName);
2401 }
2402
2403 void CLDNNGraph::CreateCopyPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2404     ValidateLayer(layer, 1);
2405     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2406     auto copyLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2407
2408     // Optimize out and just update references
2409     std::string layerName = layer_type_name_ID(layer);
2410     m_env.primitiveIDs[layerName] = inputPrimitives[0];
2411     InitProfileInfo(layerName, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);  // Mark this layer as optimized out
2412 }
2413
2414 void CLDNNGraph::CreateUpsamplingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2415     // Assuming multi-input will be handled by prev concat/eltwise layers
2416     ValidateLayer(layer, 1);
2417     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2418     auto upsamplingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2419     uint32_t scale = upsamplingLayer->GetParamAsUInt("scale");
2420     uint32_t numFilter = upsamplingLayer->GetParamAsUInt("num_filter");
2421     std::string sampleType = upsamplingLayer->GetParamAsString("sample_type");
2422
2423     std::string upsamplingLayerName = layer_type_name_ID(layer);
2424     auto upsamplingPrim = cldnn::upsampling(
2425         upsamplingLayerName,
2426         inputPrimitives[0],
2427         scale,
2428         numFilter,
2429         UpsamplingTypeFromString(sampleType));
2430
2431     m_env.primitiveIDs[upsamplingLayerName] = upsamplingLayerName;
2432     m_topology->add(upsamplingPrim);
2433     m_env.profilingIDs.push_back(upsamplingLayerName);
2434 }
2435
2436 void CLDNNGraph::CreateResamplePrimitive(InferenceEngine::CNNLayerPtr &layer) {
2437     ValidateLayer(layer, 1);
2438     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2439     auto resampleLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2440
2441     auto outDims = layer->outData[0]->dims;
2442     size_t inFeatures = 1;
2443     unsigned int scale = 1;
2444     std::shared_ptr<Data> insData0 = layer->insData[0].lock();
2445     IE_ASSERT(insData0 != nullptr);
2446     if (insData0->dims.size() > 2) {
2447         inFeatures = insData0->dims[2];
2448         scale = outDims[0]/insData0->dims[0];
2449         if (scale < 1) {
2450             THROW_CLDNN_EXCEPTION("Unsupported scale in layer " + layer->name);
2451         }
2452     }
2453     std::string sampleType = resampleLayer->GetParamAsString("type");
2454
2455     if (sampleType != "caffe.ResampleParameter.NEAREST") {
2456         THROW_CLDNN_EXCEPTION("Unsupported resampling type (" + sampleType + ") in layer " + layer->name);
2457     }
2458
2459     std::string resampleLayerName = layer_type_name_ID(layer);
2460     auto upsamplingPrim = cldnn::upsampling(
2461         resampleLayerName,
2462         inputPrimitives[0],
2463         scale,
2464         inFeatures,
2465         cldnn::upsampling_sample_type::nearest);
2466
2467     m_env.primitiveIDs[resampleLayerName] = resampleLayerName;
2468     m_topology->add(upsamplingPrim);
2469     m_env.profilingIDs.push_back(resampleLayerName);
2470 }
2471
2472 void CLDNNGraph::CreateYOLO2RegionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2473     ValidateLayer(layer, 1);
2474     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2475     auto YOLOregionLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2476
2477     uint32_t coords = YOLOregionLayer->GetParamAsUInt("coords", 4);
2478     uint32_t classes = YOLOregionLayer->GetParamAsUInt("classes", 20);
2479     uint32_t num = YOLOregionLayer->GetParamAsUInt("num", 1);
2480     bool do_softmax = YOLOregionLayer->GetParamsAsBool("do_softmax", true);
2481
2482     uint32_t mask_size = 0;
2483     if (HasParam(YOLOregionLayer->params, "mask")) {
2484         const auto mask = YOLOregionLayer->GetParamAsInts("mask");
2485         mask_size = static_cast<uint32_t>(mask.size());
2486     }
2487
2488     std::string YOLOregionLayerName = layer_type_name_ID(layer);
2489     auto regionPrim = cldnn::region_yolo(
2490         YOLOregionLayerName,
2491         inputPrimitives[0],
2492         coords,
2493         classes,
2494         num,
2495         mask_size,
2496         do_softmax);
2497
2498     m_env.primitiveIDs[YOLOregionLayerName] = YOLOregionLayerName;
2499     m_topology->add(regionPrim);
2500     m_env.profilingIDs.push_back(YOLOregionLayerName);
2501 }
2502
2503 void CLDNNGraph::CreateYOLO2ReorgPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2504     ValidateLayer(layer, 1);
2505     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2506     auto YOLOreorgLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2507     uint32_t stride = YOLOreorgLayer->GetParamAsUInt("stride");
2508
2509     std::string YOLOreorgLayerName = layer_type_name_ID(layer);
2510     auto reorgPrim = cldnn::reorg_yolo(
2511         YOLOreorgLayerName,
2512         inputPrimitives[0],
2513         stride);
2514
2515     m_env.primitiveIDs[YOLOreorgLayerName] = YOLOreorgLayerName;
2516     m_topology->add(reorgPrim);
2517     m_env.profilingIDs.push_back(YOLOreorgLayerName);
2518 }
2519
2520 void CLDNNGraph::CreateArgMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2521     ValidateLayer(layer, 1);
2522     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2523     auto ArgMaxLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2524     const cldnn::arg_max_min::out_type otype = cldnn::arg_max_min::out_type::max;
2525
2526     if (HasParam(ArgMaxLayer->params, "out_max_val")) {
2527         int32_t out_max_val_flag = ArgMaxLayer->GetParamAsInt("out_max_val");
2528         if (out_max_val_flag != 0) {
2529             THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "ArgMax: out_max_val param is not supported for layer: " << layer->name;
2530         }
2531     }
2532
2533     uint32_t top_k = ArgMaxLayer->GetParamAsUInt("top_k", 1);
2534
2535     cldnn::arg_max_min::axis_name chosen_axis = cldnn::arg_max_min::axis_name::xyf;
2536
2537     if (HasParam(ArgMaxLayer->params, "axis")) {
2538         int32_t axis_param = ArgMaxLayer->GetParamAsInt("axis", 1);
2539
2540         int32_t axis = axis_param;
2541         if (-4 <= axis && axis <= -1)
2542             axis += 4;
2543
2544         switch (axis) {
2545         case 0: chosen_axis = cldnn::arg_max_min::axis_name::batch; break;
2546         case 1: chosen_axis = cldnn::arg_max_min::axis_name::feature; break;
2547         case 2: chosen_axis = cldnn::arg_max_min::axis_name::y; break;
2548         case 3: chosen_axis = cldnn::arg_max_min::axis_name::x; break;
2549         }
2550     }
2551
2552     std::string ArgMaxLayerName = layer_type_name_ID(layer);
2553     auto argmaxPrim = cldnn::arg_max_min(
2554         ArgMaxLayerName,
2555         inputPrimitives[0],
2556         otype,
2557         top_k,
2558         chosen_axis);
2559
2560     m_env.primitiveIDs[ArgMaxLayerName] = ArgMaxLayerName;
2561     m_topology->add(argmaxPrim);
2562     m_env.profilingIDs.push_back(ArgMaxLayerName);
2563 }
2564
2565 void CLDNNGraph::CreateMaxUnpoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2566     ValidateLayer(layer, 2);
2567
2568     auto UnpoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2569
2570     cldnn::primitive_id real_input, argmax_mutable;
2571
2572     // locate ArgMax primitive
2573     int inputOrder = 0;
2574     for (auto inputData : layer->insData) {
2575         auto prevData = inputData.lock();
2576
2577         if (prevData == nullptr) {
2578             THROW_CLDNN_EXCEPTION("MaxUnpooling: nonexistent input for layer: " << layer->name);
2579         }
2580
2581         auto prevCreator = prevData->creatorLayer.lock();
2582
2583         if (prevCreator &&
2584             (LayerTypeFromStr(prevCreator->type) == Pooling) &&
2585             prevCreator->outData.size() > 1 &&
2586             inputOrder == 1) {
2587             argmax_mutable = m_env.primitiveIDs.at(prevCreator->name + "_argmax_mutable");
2588         } else {
2589             real_input = m_env.primitiveIDs.at(prevData->name);
2590         }
2591         inputOrder++;
2592     }
2593
2594     uint32_t stride = UnpoolingLayer->GetParamAsUInt("stride");
2595     uint32_t kernel_size = UnpoolingLayer->GetParamAsUInt("kernel_size");
2596
2597     std::string UnpoolingLayerName = layer_type_name_ID(layer);
2598     auto unpoolingPrim = cldnn::max_unpooling(
2599         UnpoolingLayerName,
2600         real_input,
2601         argmax_mutable,
2602         cldnn::spatial(kernel_size, kernel_size),  // size
2603         cldnn::spatial(stride, stride) );          // stride
2604
2605     m_env.primitiveIDs[UnpoolingLayerName] = UnpoolingLayerName;
2606     m_topology->add(unpoolingPrim);
2607     m_env.profilingIDs.push_back(UnpoolingLayerName);
2608 }
2609
2610 void CLDNNGraph::CreateMVNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2611     ValidateLayer(layer, 1);
2612     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2613     auto MvnLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2614
2615     bool across_channels = MvnLayer->GetParamsAsBool("across_channels", false);
2616     bool normalize_variance = MvnLayer->GetParamsAsBool("normalize_variance", true);
2617     float eps = MvnLayer->GetParamAsFloat("eps", 1e-10f);
2618
2619     std::string MvnLayerName = layer_type_name_ID(layer);
2620     auto mvnPrim = cldnn::mvn(
2621         MvnLayerName,
2622         inputPrimitives[0],
2623         across_channels,
2624         normalize_variance,
2625         eps);
2626
2627     m_env.primitiveIDs[MvnLayerName] = MvnLayerName;
2628     m_topology->add(mvnPrim);
2629     m_env.profilingIDs.push_back(MvnLayerName);
2630 }
2631
2632 void CLDNNGraph::CreateTilePrimitive(InferenceEngine::CNNLayerPtr &layer) {
2633     ValidateLayer(layer, 1);
2634     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2635     auto tileLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2636
2637     int axis = tileLayer->GetParamAsInt("axis", 1);
2638     int tiles = tileLayer->GetParamAsInt("tiles");
2639
2640     auto cldnnAxisFromIE = [](int axis) {
2641         switch (axis) {
2642             case 0: return cldnn::tile::tile_axis::along_b;
2643             case 1: return cldnn::tile::tile_axis::along_f;
2644             case 2: return cldnn::tile::tile_axis::along_y;
2645             case 3: return cldnn::tile::tile_axis::along_x;
2646             default: THROW_CLDNN_EXCEPTION("Unsupported tile axis: " << axis);
2647         }
2648     };
2649     std::string tileLayerName = layer_type_name_ID(layer);
2650     auto tilePrim = cldnn::tile(
2651         tileLayerName,
2652         inputPrimitives[0],
2653         cldnnAxisFromIE(axis),
2654         tiles);
2655
2656     m_env.primitiveIDs[tileLayerName] = tileLayerName;
2657     m_topology->add(tilePrim);
2658     m_env.profilingIDs.push_back(tileLayerName);
2659 }
2660
2661 void CLDNNGraph::CreatePadPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2662     ValidateLayer(layer, 1);
2663     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2664     auto padLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2665
2666     auto PadTensorFromArgs = [](const std::string &s) -> cldnn::tensor {
2667         std::stringstream ss(s);
2668         std::string item;
2669         std::vector<cldnn::tensor::value_type> elems;
2670         while (std::getline(ss, item, ',')) {
2671             elems.push_back(static_cast<cldnn::tensor::value_type>(std::atoll(item.c_str())));
2672         }
2673
2674         while (elems.size() < 4) {
2675             elems.push_back(0);
2676         }
2677
2678         // Swap x and y
2679         auto tmp = elems[2];
2680         elems[2] = elems[3];
2681         elems[3] = tmp;
2682
2683         return cldnn::tensor(elems, 0);
2684     };
2685
2686     auto pads_begin = PadTensorFromArgs(padLayer->GetParamAsString("pads_begin"));
2687     auto pads_end = PadTensorFromArgs(padLayer->GetParamAsString("pads_end"));
2688     std::string mode = padLayer->GetParamAsString("pad_mode");
2689     float pad_value = padLayer->GetParamAsFloat("pad_value", 0.0f);
2690
2691     cldnn::border_type border_mode;
2692     if (mode == "constant")
2693         border_mode = cldnn::border_type::constant;
2694     else if (mode == "edge")
2695         border_mode = cldnn::border_type::edge;
2696     else if (mode == "symmetric")
2697         border_mode = cldnn::border_type::mirror;
2698     else if (mode == "reflect")
2699         border_mode = cldnn::border_type::mirror_101;
2700     else
2701         THROW_CLDNN_EXCEPTION("Invalid border mode " << mode << " in layer " << padLayer->name);
2702
2703     std::string padLayerName = layer_type_name_ID(layer);
2704     auto tilePrim = cldnn::border(
2705             padLayerName,
2706             inputPrimitives[0],
2707             pads_begin,
2708             pads_end,
2709             border_mode,
2710             pad_value);
2711
2712     m_env.primitiveIDs[padLayerName] = padLayerName;
2713     m_topology->add(tilePrim);
2714     m_env.profilingIDs.push_back(padLayerName);
2715 }
2716
2717 std::string get_string_id(size_t i) {
2718     std::stringstream ss;
2719     ss << std::setw(5) << std::setfill('0') << i;
2720     return ss.str();
2721 }
2722
2723 void CLDNNGraph::CreateLSTMCellPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2724     int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size;
2725     SizeVector in_dims1, in_dims2;
2726     bool hasBias = false;
2727     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2728
2729     auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(layer->precision));
2730     std::string layerName = layer_type_name_ID(layer);
2731     cldnn::primitive_id weightID = layerName + m_weightsTag;
2732     cldnn::primitive_id recurrentID = layerName + "_recurrent" + m_weightsTag;
2733     cldnn::primitive_id biasID = layerName + m_biasesTag;
2734     auto cellLayer = dynamic_cast<InferenceEngine::LSTMCell*> (layer.get());
2735
2736     /* check incoming CNN layer and setup required variables */
2737     {
2738         auto in_data0 = layer->insData[0].lock();
2739         if (!in_data0)
2740             THROW_IE_EXCEPTION << "Missing first input for LSTMCell layer " << layer->name;
2741
2742         auto in_dims0 = in_data0->dims;
2743         auto out_dims0 = layer->outData[0]->dims;
2744
2745         lstm_input_size = in_dims0[0];
2746         lstm_batch_size = in_dims0[1];
2747         lstm_hidden_size = out_dims0[0];
2748
2749         /* do we have initial hidden and cell?
2750         if blobs are not null, direct the data from them
2751         into corresponding LSTM inputs */
2752
2753         auto in_data1 = layer->insData[1].lock();
2754         if (!in_data1)
2755             THROW_IE_EXCEPTION << "Missing second input for LSTMCell layer " << layer->name;
2756         in_dims1 = in_data1->dims;
2757
2758
2759         auto in_data2 = layer->insData[2].lock();
2760         if (!in_data2)
2761             THROW_IE_EXCEPTION << "Missing third input for LSTMCell layer " << layer->name;
2762         in_dims2 = in_data2->dims;
2763
2764
2765         if (in_dims0.size() != 2 || in_dims1.size() != 2 || in_dims2.size() != 2)
2766             THROW_IE_EXCEPTION << "Wrong input shapes for LSTMCell Layer " << layer->name;
2767     }
2768
2769     /* Prepare weight/bias memory primitives - split weight blob into W and R */
2770     {
2771         cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size));
2772         cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size));
2773         cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor);
2774         cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
2775
2776         auto wmem = cldnn::memory::allocate(*(m_env.engine), WLayout);
2777         auto wtmpPointer = wmem.pointer<char>();  // implicitly maps buffer - unmap in destructor
2778
2779         auto rmem = cldnn::memory::allocate(*(m_env.engine), RLayout);
2780         auto rtmpPointer = rmem.pointer<char>();
2781
2782         auto wLayer = dynamic_cast<InferenceEngine::WeightableLayer *> (layer.get());
2783         auto pWeightsBlob = wLayer->_weights;
2784         auto blobBytes = static_cast<const char *>(pWeightsBlob->buffer());
2785         const size_t WchunkSz = lstm_input_size * elementSize;
2786         const size_t RchunkSz = lstm_hidden_size * elementSize;
2787
2788         auto wBytes = wtmpPointer.data();
2789         auto rBytes = rtmpPointer.data();
2790
2791         for (int h = 0; h < 4 * lstm_hidden_size; h++) {
2792             // copy "input size" elements to W
2793             for (size_t b = 0; b < WchunkSz; b++)
2794                 *wBytes++ = *blobBytes++;
2795
2796             // copy "lstm_hidden_size" elements to R
2797             for (size_t b = 0; b < RchunkSz; b++)
2798                 *rBytes++ = *blobBytes++;
2799         }
2800
2801         m_topology->add(cldnn::data(weightID, wmem));
2802         m_topology->add(cldnn::data(recurrentID, rmem));
2803
2804         /* create bias memory primitive */
2805         auto pBiasBlob = wLayer->_biases;
2806         if (pBiasBlob != nullptr) {
2807             cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1));
2808             cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
2809
2810             auto bmem = cldnn::memory::allocate(*(m_env.engine), BLayout);
2811             auto btmpPointer = bmem.pointer<char>();
2812
2813             auto blobBytes = static_cast<const char *>(pBiasBlob->buffer());
2814             const size_t BchunkSz = lstm_hidden_size * elementSize;
2815             auto bBytes = btmpPointer.data();
2816
2817             for (size_t b = 0; b < 4 * BchunkSz; b++)
2818                 *bBytes++ = *blobBytes++;
2819
2820             m_topology->add(cldnn::data(biasID, bmem));
2821             hasBias = true;
2822         }
2823     }
2824
2825     cldnn::primitive_id inReshapeID = layerName + "_inReshape";
2826     cldnn::primitive_id permuteID = layerName + "_inputReorder";
2827     cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
2828
2829     cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 };
2830     cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
2831     cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape);
2832     m_topology->add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
2833     m_topology->add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
2834
2835     std::string hiddenInStr = inHiddenReshapeID + "_1";
2836     std::string cellInStr = inHiddenReshapeID + "_2";
2837     m_topology->add(cldnn::reshape(hiddenInStr, inputPrimitives[1], hiddenStateShape));
2838     m_topology->add(cldnn::reshape(cellInStr, inputPrimitives[2], hiddenStateShape));
2839
2840     cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
2841     cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
2842
2843     std::string lstm_gemm_id = layerName + "_lstm_gemm";
2844     std::string lstm_elt_id = layerName + "_lstm_elt";
2845     std::string crop_id = layerName + "_crop";
2846
2847     m_topology->add(cldnn::lstm_gemm(lstm_gemm_id, permuteID,
2848                                      weightID, recurrentID,
2849                                      hasBias ? biasID : "",
2850                                      hiddenInStr));
2851     m_topology->add(cldnn::lstm_elt(lstm_elt_id, lstm_gemm_id, cellInStr,
2852                                     0, 0, {}, {}, cldnn_lstm_offset_order_fizo));
2853
2854     cldnn::primitive_id outputHiddenID = layerName;
2855     m_topology->add(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
2856     cldnn::primitive_id outputCellID = layer->type + ":" + layer->outData[1]->name;
2857     m_topology->add(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz));
2858
2859     // output primitive IDs
2860     m_env.primitiveIDs[outputHiddenID] = outputHiddenID;                                // LSTMCell:LSTMCell - "concat hidden"
2861     m_env.primitiveIDs[layer->type + ":" + layer->outData[0]->name] = outputHiddenID;   // LSTMCell:LSTMCell:0 - hidden state
2862     m_env.primitiveIDs[outputCellID] = outputCellID;                                    // LSTMCell:LSTMCell:1 - cell state
2863
2864     m_env.profilingIDs.push_back(layerName);
2865 }
2866
2867 void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2868     int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size;
2869     SizeVector in_dims1, in_dims2;
2870     bool hasInitialHidden = false, hasInitialCell = false, hasBias = false, isForward = true;
2871     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2872
2873     auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(layer->precision));
2874     std::string layerName = layer_type_name_ID(layer);
2875     cldnn::primitive_id weightID = layerName + m_weightsTag;
2876     cldnn::primitive_id recurrentID = layerName + "_recurrent" + m_weightsTag;
2877     cldnn::primitive_id biasID = layerName + m_biasesTag;
2878     auto rnnLayer = dynamic_cast<InferenceEngine::RNNSequenceLayer*> (layer.get());
2879     bool permute_input = (1 != rnnLayer->axis);
2880
2881     /* check incoming CNN layer and setup required variables */
2882     {
2883         if (rnnLayer->cellType != RNNSequenceLayer::LSTM)
2884          THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell";
2885
2886         auto in_data0 = layer->insData[0].lock();
2887         if (!in_data0)
2888             THROW_IE_EXCEPTION << "Missing first input for RNN layer " << layer->name;
2889
2890         auto in_dims0 = in_data0->dims;
2891         auto out_dims0 = layer->outData[0]->dims;
2892
2893         if (!permute_input) {
2894             lstm_batch_size = in_dims0[2];
2895             lstm_sequence_len = in_dims0[1];
2896         } else {
2897             lstm_batch_size = in_dims0[1];
2898             lstm_sequence_len = in_dims0[2];
2899         }
2900
2901         lstm_input_size = in_dims0[0];
2902         lstm_hidden_size = out_dims0[0];
2903
2904         /* do we have initial hidden and cell?
2905         if blobs are not null, direct the data from them
2906         into corresponding LSTM inputs */
2907
2908         auto in_data1 = layer->insData[1].lock();
2909         if (in_data1) {
2910             in_dims1 = in_data1->dims;
2911             hasInitialHidden = true;
2912         }
2913
2914         auto in_data2 = layer->insData[2].lock();
2915         if (in_data2) {
2916             in_dims2 = in_data2->dims;
2917             hasInitialCell = true;
2918         }
2919
2920         if (rnnLayer->direction != RNNSequenceLayer::FWD && rnnLayer->direction != RNNSequenceLayer::BWD)
2921             THROW_IE_EXCEPTION << "Support only forward and backward direction for RNN Layer " << layer->name;
2922         isForward = rnnLayer->direction == RNNSequenceLayer::FWD;
2923
2924         if (in_dims0.size() != 3 || in_dims1.size() != 2 || in_dims2.size() != 2)
2925             THROW_IE_EXCEPTION << "Wrong input shapes for RNN Layer " << layer->name;
2926     }
2927
2928     /* Prepare weight/bias memory primitives - split weight blob into W and R */
2929     {
2930         cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size));
2931         cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size));
2932         cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor);
2933         cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
2934
2935         auto wmem = cldnn::memory::allocate(*(m_env.engine), WLayout);
2936         auto wtmpPointer = wmem.pointer<char>();  // implicitly maps buffer - unmap in destructor
2937
2938         auto rmem = cldnn::memory::allocate(*(m_env.engine), RLayout);
2939         auto rtmpPointer = rmem.pointer<char>();
2940
2941         auto wLayer = dynamic_cast<InferenceEngine::WeightableLayer *> (layer.get());
2942         auto pWeightsBlob = wLayer->_weights;
2943         auto blobBytes = static_cast<const char *>(pWeightsBlob->buffer());
2944         const size_t WchunkSz = lstm_input_size * elementSize;
2945         const size_t RchunkSz = lstm_hidden_size * elementSize;
2946
2947         auto wBytes = wtmpPointer.data();
2948         auto rBytes = rtmpPointer.data();
2949
2950         for (int h = 0; h < 4 * lstm_hidden_size; h++) {
2951             // copy "input size" elements to W
2952             for (size_t b = 0; b < WchunkSz; b++)
2953                 *wBytes++ = *blobBytes++;
2954
2955             // copy "lstm_hidden_size" elements to R
2956             for (size_t b = 0; b < RchunkSz; b++)
2957                 *rBytes++ = *blobBytes++;
2958         }
2959
2960         m_topology->add(cldnn::data(weightID, wmem));
2961         m_topology->add(cldnn::data(recurrentID, rmem));
2962
2963         /* create bias memory primitive */
2964         auto pBiasBlob = wLayer->_biases;
2965         if (pBiasBlob != nullptr) {
2966             cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1));
2967             cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
2968
2969             auto bmem = cldnn::memory::allocate(*(m_env.engine), BLayout);
2970             auto btmpPointer = bmem.pointer<char>();
2971
2972             auto blobBytes = static_cast<const char *>(pBiasBlob->buffer());
2973             const size_t BchunkSz = lstm_hidden_size * elementSize;
2974             auto bBytes = btmpPointer.data();
2975
2976             for (size_t b = 0; b < 4 * BchunkSz; b++)
2977                 *bBytes++ = *blobBytes++;
2978
2979             m_topology->add(cldnn::data(biasID, bmem));
2980             hasBias = true;
2981         }
2982     }
2983
2984     std::vector<std::pair<cldnn::primitive_id, cldnn::tensor>> input_ids_offsets;
2985     std::vector<cldnn::primitive_id> output_ids_offsets;
2986
2987     cldnn::primitive_id inReshapeID = layerName + "_inReshape";
2988     cldnn::primitive_id permuteID = layerName + "_inputReorder";
2989     cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
2990
2991     cldnn::tensor inputShape;
2992
2993     if (permute_input) {
2994         inputShape = { lstm_sequence_len, lstm_batch_size, lstm_input_size, 1 };
2995     } else {
2996         inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 };
2997     }
2998     cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
2999     cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape);
3000     m_topology->add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
3001     m_topology->add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
3002
3003     m_topology->add(cldnn::reshape(inHiddenReshapeID+"_1", inputPrimitives[1], hiddenStateShape));
3004     m_topology->add(cldnn::reshape(inHiddenReshapeID+"_2", inputPrimitives[2], hiddenStateShape));
3005
3006     for (int i = 0; i < lstm_sequence_len; ++i)
3007         input_ids_offsets.push_back({ get_string_id(i), {0, i, 0, 0} });
3008
3009     cldnn::primitive_id inputSplitID = layerName + "_inputSplit";
3010
3011     if (permute_input) {
3012         m_topology->add(cldnn::permute(layerName + "_inputSwap", permuteID, { 1, 0, 2, 3 }));
3013         m_topology->add(cldnn::split(inputSplitID, layerName + "_inputSwap", input_ids_offsets));
3014     } else {
3015         m_topology->add(cldnn::split(inputSplitID, permuteID, input_ids_offsets));
3016     }
3017
3018     cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
3019     cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
3020     std::string hiddenStr = hasInitialHidden ? inHiddenReshapeID+"_1" : "";
3021     std::string cellStr = hasInitialCell ? inHiddenReshapeID+"_2" : "";
3022
3023     for (int i = 0; i < lstm_sequence_len; ++i) {
3024         std::string lstm_gemm_id = layerName + "_lstm_gemm" + get_string_id(i);
3025         std::string lstm_elt_id = layerName + "_lstm_elt" + get_string_id(i);
3026         std::string crop_id = layerName + "_crop" + get_string_id(i);
3027
3028         int seqIdx = isForward ? i : lstm_sequence_len - 1 - i;
3029         m_topology->add(cldnn::lstm_gemm(lstm_gemm_id, inputSplitID + ":" + get_string_id(seqIdx),
3030                                             weightID, recurrentID,
3031                                             hasBias ? biasID : "",
3032                                             hiddenStr));
3033         m_topology->add(cldnn::lstm_elt(lstm_elt_id, lstm_gemm_id,
3034                                             cellStr, 0, 0, {}, {},
3035                                             cldnn_lstm_offset_order_fizo));
3036
3037         hiddenStr = crop_id + ":hidden";
3038         cellStr = crop_id + ":cell";
3039         m_topology->add(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
3040         output_ids_offsets.push_back(hiddenStr);
3041
3042         if (i < lstm_sequence_len - 1) {
3043             m_topology->add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
3044         } else {
3045             // last hidden state crop (output 2)
3046             if (layer->outData.size() > 1) {
3047                 cldnn::primitive_id outputHiddenID = layer->type + ":" + layer->outData[1]->name;
3048                 m_env.primitiveIDs[hiddenStr] = hiddenStr;
3049                 m_env.primitiveIDs[outputHiddenID] = hiddenStr;
3050             }
3051
3052             // last cell state crop (output 3)
3053             if (layer->outData.size() > 2) {
3054                 m_topology->add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
3055                 cldnn::primitive_id outputCellID = layer->type + ":" + layer->outData[2]->name;
3056                 m_env.primitiveIDs[cellStr] = cellStr;
3057                 m_env.primitiveIDs[outputCellID] = cellStr;
3058             }
3059         }
3060     }
3061
3062     if (!isForward) std::reverse(output_ids_offsets.begin(), output_ids_offsets.end());
3063
3064     if (permute_input) {
3065         m_topology->add(cldnn::concatenation(layerName + "_outputConcat", output_ids_offsets, cldnn::concatenation::along_f));
3066         m_topology->add(cldnn::permute(layerName, layerName + "_outputConcat", { 1, 0, 2, 3 }));
3067     } else {
3068         m_topology->add(cldnn::concatenation(layerName, output_ids_offsets, cldnn::concatenation::along_f));
3069     }
3070
3071     m_env.primitiveIDs[layerName] = layerName;
3072     m_env.primitiveIDs[layer->type + ":" + layer->outData[0]->name] = layerName;
3073     m_env.profilingIDs.push_back(layerName);
3074 }
3075
3076 void CLDNNGraph::AddConstantBlobInput(InferenceEngine::CNNLayerPtr &layer) {
3077     auto constBlob = layer->blobs.begin()->second;
3078     auto constDims = layer->outData[0]->dims;
3079
3080     cldnn::tensor constTensor;
3081     switch (constDims.size()) {
3082     case 4: constTensor = cldnn::tensor(TensorValue(constDims[3]), TensorValue(constDims[2]),
3083             TensorValue(constDims[0]), TensorValue(constDims[1]));
3084             break;
3085     case 3: constTensor = cldnn::tensor(TensorValue(constDims[2]), TensorValue(constDims[1]),
3086             1, TensorValue(constDims[0]));
3087             break;
3088     case 2: constTensor = cldnn::tensor(TensorValue(constDims[1]), TensorValue(constDims[0]), 1, 1);
3089             break;
3090     case 1: constTensor = cldnn::tensor(TensorValue(constDims[0]), 1, 1, 1);
3091             break;
3092         default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions");
3093     }
3094
3095     cldnn::layout constLayout = cldnn::layout(
3096         DataTypeFromPrecision(layer->blobs.begin()->second->precision()),
3097         m_defaultFormat,
3098         constTensor);
3099
3100     size_t bytes = constLayout.bytes_count();
3101     cldnn::primitive_id constPrimID = layer_type_name_ID(layer);
3102
3103     CreatePrimitiveFromBlob(constPrimID, constBlob, constLayout);
3104     m_env.primitiveIDs[constPrimID] = constPrimID;
3105 }
3106
3107 void CLDNNGraph::CreateConvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
3108     ValidateLayer(layer, 1);
3109     auto inputPrimitives = GetPrevLayersPrimitives(layer);
3110     auto convLayer = dynamic_cast<InferenceEngine::ConvolutionLayer *> (layer.get());
3111
3112     std::vector<cldnn::primitive_id> weightPrimID;
3113     std::vector<cldnn::primitive_id> biasPrimID;
3114     CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID);
3115
3116     cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
3117                                          cldnn::spatial(convLayer->_stride[X_AXIS], convLayer->_stride[Y_AXIS]));
3118     auto allPad = getPaddings(*convLayer);
3119     cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
3120                                           cldnn::spatial(-allPad.begin[X_AXIS], -allPad.begin[Y_AXIS]));
3121     cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
3122                                            cldnn::spatial(convLayer->_dilation[X_AXIS], convLayer->_dilation[Y_AXIS]));
3123
3124     std::string convLayerName = layer_type_name_ID(layer);
3125     if (convLayer->_group >= 16) {
3126         auto convPrim = cldnn::convolution(convLayerName,
3127                                            inputPrimitives[0],
3128                                            weightPrimID,
3129                                            biasPrimID,
3130                                            convLayer->_group,
3131                                            stride,
3132                                            padding,
3133                                            dilation,
3134                                            false,
3135                                            0.0,
3136                                            CldnnTensorFromIEDims(convLayer->outData[0]->dims));
3137         m_topology->add(convPrim);
3138     } else {
3139         auto convPrim = cldnn::convolution(convLayerName,
3140                                            inputPrimitives[0],
3141                                            weightPrimID,
3142                                            biasPrimID,
3143                                            stride,
3144                                            padding,
3145                                            dilation,
3146                                            false,
3147                                            0.0f,
3148                                            CldnnTensorFromIEDims(convLayer->outData[0]->dims));
3149         m_topology->add(convPrim);
3150     }
3151     m_env.primitiveIDs[convLayerName] = convLayerName;
3152     m_env.profilingIDs.push_back(convLayerName);
3153 }
3154
3155 void CLDNNGraph::CreateGatherPrimitive(InferenceEngine::CNNLayerPtr &layer) {
3156     ValidateLayer(layer, 2);
3157
3158     auto inputPrimitives = GetPrevLayersPrimitives(layer);
3159     auto gatherLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
3160
3161     int axis = gatherLayer->GetParamAsInt("axis", 0);
3162
3163     // Be careful, TensorFlow consist negative axis interpretation bug. Here: -3 = b, -2 = f, -1 = y, but must be -3 = f, -2 = y, -1 = x
3164     auto cldnnAxisFromIE = [](int axis) {
3165         switch (axis) {
3166             case 0: return cldnn::gather::gather_axis::along_b;
3167             case 1: return cldnn::gather::gather_axis::along_f;
3168             case 2: return cldnn::gather::gather_axis::along_y;
3169             case 3: return cldnn::gather::gather_axis::along_x;
3170             case -1: return cldnn::gather::gather_axis::along_y;
3171             case -2: return cldnn::gather::gather_axis::along_f;
3172             case -3: return cldnn::gather::gather_axis::along_b;
3173             default: THROW_CLDNN_EXCEPTION("Unsupported gather axis: " << axis);
3174         }
3175     };
3176
3177     std::string gatherLayerName = layer_type_name_ID(layer);
3178     auto gatherPrim = cldnn::gather(
3179             gatherLayerName,
3180             inputPrimitives[0],
3181             inputPrimitives[1],
3182             cldnnAxisFromIE(axis),
3183             CldnnTensorFromIEDims(gatherLayer->outData[0]->dims));
3184
3185     m_env.primitiveIDs[gatherLayerName] = gatherLayerName;
3186     m_topology->add(gatherPrim);
3187     m_env.profilingIDs.push_back(gatherLayerName);
3188 }
3189
3190 void CLDNNGraph::CreateDepthToSpacePrimitive(InferenceEngine::CNNLayerPtr &layer) {
3191     ValidateLayer(layer, 1);
3192
3193     auto inputPrimitives = GetPrevLayersPrimitives(layer);
3194     auto depthToSpace = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
3195
3196     size_t blockSize = depthToSpace->GetParamAsInt("block_size", 2);
3197
3198     if (depthToSpace->input().get()->dims.size() != 4)
3199         THROW_CLDNN_EXCEPTION("Unsupported size of tensor " << depthToSpace->input().get()->dims.size());
3200
3201     size_t blockSizeSquare = blockSize * blockSize;
3202
3203     if (depthToSpace->input().get()->dims[2] % blockSizeSquare != 0)
3204         THROW_CLDNN_EXCEPTION("The depth of the input tensor must be divisible by squared block size = " << blockSizeSquare);
3205
3206     std::string depthToSpaceName = layer_type_name_ID(layer);
3207     auto depthToSpacePrim = cldnn::depth_to_space(
3208             depthToSpaceName,
3209             inputPrimitives[0],
3210             blockSize);
3211
3212     m_env.primitiveIDs[depthToSpaceName] = depthToSpaceName;
3213     m_topology->add(depthToSpacePrim);
3214     m_env.profilingIDs.push_back(depthToSpaceName);
3215 }
3216
3217 void CLDNNGraph::CreateShuffleChannelsPrimitive(InferenceEngine::CNNLayerPtr &layer) {
3218     ValidateLayer(layer, 1);
3219
3220     auto inputPrimitives = GetPrevLayersPrimitives(layer);
3221     auto shuffleChannels = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
3222     const int32_t numberOfDims = shuffleChannels->input()->getDims().size();
3223
3224     int32_t group = shuffleChannels->GetParamAsInt("group", 1);
3225     int32_t axis = shuffleChannels->GetParamAsInt("axis", 1);
3226
3227     if (axis < 0)
3228         axis += numberOfDims;
3229
3230     if (axis < 0 || axis >= numberOfDims)
3231         THROW_CLDNN_EXCEPTION("Incorrect axis value! Actual axis is" + std::to_string(group));
3232
3233     if (group < 1)
3234         THROW_CLDNN_EXCEPTION("Invalid group size value (should equal at least one). Actual block size is" +
3235                                        std::to_string(group));
3236
3237     if (shuffleChannels->input().get()->getDims()[axis] % group != 0)
3238         THROW_CLDNN_EXCEPTION("Group parameter must evenly divide the channel dimension. Actual group size is " +
3239                                        std::to_string(axis));
3240
3241     std::string shuffleChannelsName = layer_type_name_ID(layer);
3242     auto shuffleChannelsPrim = cldnn::shuffle_channels(
3243             shuffleChannelsName,
3244             inputPrimitives[0],
3245             group,
3246             axis);
3247
3248     m_env.primitiveIDs[shuffleChannelsName] = shuffleChannelsName;
3249     m_topology->add(shuffleChannelsPrim);
3250     m_env.profilingIDs.push_back(shuffleChannelsName);
3251 }
3252
3253 void CLDNNGraph::CreateStridedSlicePrimitive(InferenceEngine::CNNLayerPtr &layer) {
3254     auto inputPrimitives = GetPrevLayersPrimitives(layer);
3255     auto stridedSliceLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
3256
3257     auto tmp = stridedSliceLayer->GetParamAsUInts("end_mask");
3258     std::vector<uint8_t> end_mask(tmp.begin(), tmp.end());
3259     tmp = stridedSliceLayer->GetParamAsUInts("begin_mask");
3260     std::vector<uint8_t> begin_mask(tmp.begin(), tmp.end());
3261     tmp = stridedSliceLayer->GetParamAsUInts("new_axis_mask");
3262     std::vector<uint8_t> new_axis_mask(tmp.begin(), tmp.end());
3263     tmp = stridedSliceLayer->GetParamAsUInts("shrink_axis_mask");
3264     std::vector<uint8_t> shrink_axis_mask(tmp.begin(), tmp.end());
3265
3266     std::string stridedSliceLayerName = layer_type_name_ID(layer);
3267     auto stridedSlicePrim = cldnn::strided_slice(
3268             stridedSliceLayerName,
3269             inputPrimitives[0], inputPrimitives[1], inputPrimitives[2], inputPrimitives[3],
3270             begin_mask, end_mask, new_axis_mask, shrink_axis_mask);
3271
3272     m_env.primitiveIDs[stridedSliceLayerName] = stridedSliceLayerName;
3273     m_topology->add(stridedSlicePrim);
3274     m_env.profilingIDs.push_back(stridedSliceLayerName);
3275 }
3276
3277 void CLDNNGraph::CreateReverseSequencePrimitive(InferenceEngine::CNNLayerPtr &layer) {
3278     ValidateLayer(layer, 2);
3279
3280     auto inputPrimitives = GetPrevLayersPrimitives(layer);
3281     auto reverseSequence = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
3282     const int32_t numberOfDims = reverseSequence->input()->getDims().size();
3283
3284     const auto input = reverseSequence->insData[0].lock()->getDims();
3285     const auto sequence_lengths = reverseSequence->insData[1].lock()->getDims();
3286
3287     int32_t batch_axis = reverseSequence->GetParamAsInt("batch_axis", 0);
3288     int32_t seq_axis = reverseSequence->GetParamAsInt("seq_axis", 1);
3289
3290     if (batch_axis < 0)
3291         batch_axis += input.size();
3292
3293     if (seq_axis < 0)
3294         seq_axis += input.size();
3295
3296     if (batch_axis == seq_axis)
3297         THROW_CLDNN_EXCEPTION("Batch axis and sequence axis should not be equal\n");
3298
3299     if (seq_axis < 0 || seq_axis >= input.size())
3300         THROW_CLDNN_EXCEPTION("Incorrect Sequence axis value! Actual axis is " + std::to_string(seq_axis));
3301
3302     if (batch_axis < 0 || batch_axis >= input.size())
3303         THROW_CLDNN_EXCEPTION("Incorrect Sequence axis value! Actual axis is " + std::to_string(batch_axis));
3304
3305     if (sequence_lengths[0] != input[batch_axis])
3306         THROW_CLDNN_EXCEPTION("Sequence lengths must be a vector of length " + std::to_string(input[batch_axis])
3307                             + "! Actual axis is " + std::to_string(sequence_lengths[0]));
3308
3309     std::string reverseSequenceLayerName = layer_type_name_ID(layer);
3310     auto reverseSequencePrim = cldnn::reverse_sequence(
3311             reverseSequenceLayerName,
3312             inputPrimitives[0],
3313             inputPrimitives[1],
3314             seq_axis,
3315             batch_axis);
3316
3317     m_env.primitiveIDs[reverseSequenceLayerName] = reverseSequenceLayerName;
3318     m_topology->add(reverseSequencePrim);
3319     m_env.profilingIDs.push_back(reverseSequence->name);
3320 }
3321
3322 bool CLDNNGraph::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const {
3323     if (splitLayer->outData.size() != 2) return false;  // split into 2
3324
3325     for (auto out : splitLayer->outData) {
3326         if (out->getInputTo().size() != 1) {
3327             return false;
3328         }
3329     }
3330
3331     auto convLayer1 =
3332         dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[0]).get());
3333     auto convLayer2 =
3334         dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[1]).get());
3335     if (!convLayer1 || !convLayer2) {   // outputs aren't convolutions
3336         return false;
3337     }
3338     auto allPad1 = getPaddings(*convLayer1);
3339     auto allPad2 = getPaddings(*convLayer2);
3340     if (convLayer1->precision != convLayer2->precision                       // wrong precision
3341         || convLayer1->_fusedWith || convLayer2->_fusedWith                     // convolutions are fused
3342         || convLayer1->outData.size() != 1 || convLayer2->outData.size() != 1   // more than 1 output for convolutions
3343         || allPad1.begin[X_AXIS] != allPad2.begin[X_AXIS]                     // different padding
3344         || allPad1.begin[Y_AXIS] != allPad2.begin[Y_AXIS]                     // different padding
3345         || convLayer1->_stride[X_AXIS] != convLayer2->_stride[X_AXIS]                       // different strides
3346         || convLayer1->_stride[Y_AXIS] != convLayer2->_stride[Y_AXIS]                       // different strides
3347         || convLayer1->_dilation[X_AXIS] != convLayer2->_dilation[X_AXIS]                   // different dilation
3348         || convLayer1->_dilation[Y_AXIS] != convLayer2->_dilation[Y_AXIS]                   // different dilation
3349         || (GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0]))      // no merge after convolutions
3350             != GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[1])))
3351         || (p_currentOutputs->find(convLayer1->name) != p_currentOutputs->end())
3352         || (p_currentOutputs->find(convLayer2->name) != p_currentOutputs->end())) {
3353         return false;
3354     }
3355     auto concatLayer =
3356         dynamic_cast<InferenceEngine::ConcatLayer *> (
3357                 GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0])).get());
3358     if (!concatLayer ||                         // not a merge layer
3359         concatLayer->_axis != 1 ||              // merge on unsupported axis
3360         concatLayer->outData.size() != 1) {     // too many outputs
3361         return false;
3362     }
3363     if (m_config.customLayers.find(convLayer1->type) != m_config.customLayers.end() ||
3364         m_config.customLayers.find(concatLayer->type) != m_config.customLayers.end()) {
3365         return false;  // convolution or concat were overwritten by a custom layer
3366     }
3367
3368     return true;
3369 }
3370
3371 void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo, Precision inputPrecision) {
3372     // first create and add the input layout
3373     auto inputDims = inputInfo->getDims();
3374     InferenceEngine::Layout l = inputInfo->getTensorDesc().getLayout();
3375     auto consumers = inputInfo->getInputData()->getInputTo();
3376     bool single_consumer = consumers.size() == 1;
3377     CLDNNGraph::LayerType consumerType = LayerTypeFromStr(consumers.begin()->second->type);
3378
3379     cldnn::tensor dataTensor;
3380     cldnn::tensor::value_type batch = (m_env.m_max_batch <= 1)
3381                                         ? (inputDims.size() == 4 ? TensorValue(inputDims[3]) : 1)
3382                                         : TensorValue(m_curBatch);
3383     switch (inputDims.size()) {
3384         case 4:
3385             if (InferenceEngine::Layout::NCHW == l || InferenceEngine::Layout::CHW == l) {
3386                 dataTensor = cldnn::tensor(batch,
3387                     TensorValue(inputDims[2]), TensorValue(inputDims[0]),
3388                     TensorValue(inputDims[1]));
3389             } else if (InferenceEngine::Layout::NHWC == l) {
3390                 dataTensor = cldnn::tensor(batch,
3391                     TensorValue(inputDims[2]), TensorValue(inputDims[0]),
3392                     TensorValue(inputDims[1]));
3393             } else {
3394                 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 4D input " + inputInfo->name());
3395             }
3396             break;
3397         case 3:
3398             if (InferenceEngine::Layout::CHW == l) {
3399                 dataTensor = cldnn::tensor(TensorValue(inputDims[2]), TensorValue(inputDims[1]), 1, TensorValue(inputDims[0]));
3400             } else {
3401                 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 3D input " + inputInfo->name());
3402             }
3403             break;
3404         case 2:
3405             if (InferenceEngine::Layout::NCHW == l) {
3406                 dataTensor = cldnn::tensor(1, 1, TensorValue(inputDims[1]), TensorValue(inputDims[0]));
3407             } else if (InferenceEngine::NC == l) {
3408                 dataTensor = cldnn::tensor(TensorValue(inputDims[1]), TensorValue(inputDims[0]), 1, 1);
3409             } else {
3410                 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 2D input " + inputInfo->name());
3411             }
3412             break;
3413         case 1:
3414             dataTensor = cldnn::tensor(TensorValue(inputDims[0]), 1, 1, 1);
3415             break;
3416         default: THROW_CLDNN_EXCEPTION("Invalid data dimensions");
3417     }
3418
3419     cldnn::layout inputLayout(DataTypeFromPrecision(inputInfo->getInputPrecision()),
3420         FormatFromLayout(l),
3421         dataTensor);
3422
3423     // save the input dims
3424     m_env.inputLayouts.insert({ inputInfo->name(), inputLayout });
3425
3426     auto inputName = "Input:" + inputInfo->name();
3427     m_topology->add(cldnn::input_layout(inputName, inputLayout));
3428
3429     // create preprocess primitive for this input
3430     auto preProcess = inputInfo->getPreProcess();
3431
3432     size_t meanChannels = preProcess.getNumberOfChannels();
3433     inputLayout.format = m_defaultFormat;
3434     inputLayout.size = inputLayout.size.transform(m_defaultFormat, 1);
3435     inputLayout.data_type = DataTypeFromPrecision(inputPrecision);
3436     auto preprocessPrimID = inputName + m_preProcessTag;
3437
3438     if ((meanChannels > 0) &&
3439         (meanChannels != inputLayout.size.feature[0])) {
3440         THROW_CLDNN_EXCEPTION("Mismatched mean values channels in input " + inputName);
3441     }
3442
3443     switch (preProcess.getMeanVariant()) {
3444     case NONE:
3445     case MEAN_VALUE: {
3446         std::vector<float> meanValues;
3447         if (meanChannels > 0) {
3448             for (size_t c = 0; c < meanChannels; c++) {
3449                 if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
3450                     THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName);
3451                 meanValues.push_back(preProcess[c]->meanValue);
3452             }
3453         }
3454         m_topology->add(cldnn::reorder(preprocessPrimID, inputName, inputLayout, meanValues));
3455         m_env.profilingIDs.push_back(preprocessPrimID);
3456         InitProfileInfo(preprocessPrimID, "Reorder");
3457     }
3458     break;
3459
3460     case MEAN_IMAGE: {
3461         IE_ASSERT(meanChannels);
3462         // first merge all mean values to a single blob
3463         // todo make sure mean blob precision is the same as the input precision
3464         auto meanDims = inputInfo->getDims();
3465         // overwrite batches with 1
3466         switch (meanDims.size()) {
3467         case 4: meanDims[3] = 1;
3468             break;
3469         default:
3470             THROW_CLDNN_EXCEPTION("Missing batch dimensions in input image");
3471         }
3472         InferenceEngine::TBlob<float> meanBlob(Precision(Precision::FP32), TensorDesc::getLayoutByDims(meanDims), meanDims);
3473         meanBlob.allocate();
3474         auto meanBlobData = meanBlob.data();
3475         for (size_t c = 0; c < meanChannels; c++) {
3476             if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
3477                 THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName);
3478             auto channelMeanBlob = std::dynamic_pointer_cast<TBlob<float>>(preProcess[c]->meanData);
3479             auto channelSize = channelMeanBlob->size();
3480             auto channelBlobData = channelMeanBlob->data();
3481             for (size_t i = 0; i < channelSize; i++) {
3482                 meanBlobData[(c * channelSize) + i] = channelBlobData[i];
3483             }
3484         }
3485         // then create a data primitive for the mean values
3486         auto meanBlobPtr = std::make_shared<InferenceEngine::TBlob<float>>(meanBlob);
3487
3488         // mean values will use external format (sub in the input format before convert to new format)
3489         cldnn::tensor meanBlobTensor(inputLayout.size);
3490         meanBlobTensor.batch[0] = 1;  // mean values have no batches
3491         cldnn::layout meanBlobLayout(cldnn::data_types::f32, m_defaultFormat, meanBlobTensor);
3492         CreatePrimitiveFromBlob(
3493             inputName + m_meanValuesTag,
3494             meanBlobPtr,
3495             meanBlobLayout);
3496         m_topology->add(cldnn::reorder(preprocessPrimID,
3497             inputName,
3498             inputLayout,
3499             inputName + m_meanValuesTag));
3500         m_env.profilingIDs.push_back(preprocessPrimID);
3501         InitProfileInfo(preprocessPrimID, "Reorder");
3502     }
3503     break;
3504
3505     default: THROW_CLDNN_EXCEPTION("Invalid mean variant in input " + inputName);
3506         break;
3507     }
3508     m_env.primitiveIDs[inputName] = preprocessPrimID;
3509     m_env.primitiveIDs[preprocessPrimID] = preprocessPrimID;
3510 }
3511
3512 std::vector<cldnn::primitive_id> CLDNNGraph::GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const {
3513     if (layer == nullptr) {
3514         return {};
3515     }
3516     std::vector<cldnn::primitive_id> inputPrimitives;
3517     for (auto inputData : layer->insData) {
3518         auto prevData = inputData.lock();
3519         if (prevData == nullptr) {
3520             THROW_CLDNN_EXCEPTION("Nonexistent input for layer: " << layer->name);
3521         }
3522         auto prevCreator = prevData->creatorLayer.lock();
3523         std::string prevName;
3524
3525         if (prevCreator) {
3526             prevName = prevCreator->type + ":";
3527             if (prevCreator->outData.size() > 1)
3528                 prevName += prevData->name;
3529             else
3530                 prevName += prevCreator->name;
3531         } else {
3532             prevName = prevData->name;
3533         }
3534         inputPrimitives.push_back(m_env.primitiveIDs.at(prevName));
3535     }
3536     return inputPrimitives;
3537 }
3538
3539 void CLDNNGraph::AddOutputPrimitive(std::string outputName, const InferenceEngine::DataPtr outputData, Precision outputPrecision) {
3540     // TODO: add precision check once there's an outputInfo object
3541     if (outputData->layout != InferenceEngine::NCHW &&
3542         outputData->layout != InferenceEngine::NHWC &&
3543         outputData->layout != InferenceEngine::CHW &&
3544         outputData->layout != InferenceEngine::NC) {
3545         THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(outputData->layout) << ") in output: " << outputName);
3546     }
3547
3548     auto outputCreator = outputData->getCreatorLayer().lock();
3549     std::string outLayerName = outputCreator->type + ":";
3550
3551     if (outputCreator->outData.size() > 1)
3552         outLayerName += outputName;
3553     else
3554         outLayerName += outputCreator->name;
3555
3556     auto outputReorderID = outputName + m_postProcessTag;
3557     Precision precision = outputPrecision == Precision::UNSPECIFIED ? outputData->getPrecision() : outputPrecision;
3558
3559     // Find correct output ID. Start with name stored in IR.
3560     std::string outputID = outLayerName;
3561     std::string finalID = m_env.primitiveIDs.at(outLayerName);
3562
3563     while (outputID != finalID) {
3564         auto prim = m_env.primitiveIDs.find(finalID);
3565
3566         if (prim == m_env.primitiveIDs.end()) {
3567             THROW_IE_EXCEPTION << "Unknown output primitive id " << outputID;
3568         }
3569         outputID = finalID;
3570         finalID = prim->second;
3571     }
3572
3573     m_topology->add(cldnn::reorder(outputReorderID, outputID,
3574         FormatFromLayout(outputData->getLayout()),
3575         DataTypeFromPrecision(precision)));
3576     m_env.primitiveIDs[outputName] = outputReorderID;
3577     m_env.profilingIDs.push_back(outputReorderID);
3578     InitProfileInfo(outputReorderID, "Reorder");
3579     m_env.outputDims[outputName] = outputData->dims;
3580     m_env.prevPrimitiveIDs[outputReorderID] = {outputName};
3581 }
3582
3583 void CLDNNGraph::AddSingleValuePrimitive(cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value) {
3584     cldnn::layout primLayout(dataType, m_defaultFormat, { 1, 1, 1, 1 });
3585     auto primMem = cldnn::memory::allocate(*(m_env.engine), primLayout);
3586     switch (dataType) {
3587     case cldnn::data_types::f32:
3588     {
3589         auto tmpPointer = primMem.pointer<float>();  // implicitly maps buffer - unmap in destructor
3590         tmpPointer[0] = value;
3591     }
3592         break;
3593     case cldnn::data_types::f16:
3594     {
3595         auto tmpPointer = primMem.pointer<uint16_t>();  // implicitly maps buffer - unmap in destructor
3596         cldnn_status status = CLDNN_SUCCESS;
3597         tmpPointer[0] = cldnn_float_to_half(value, &status);
3598         if (status != CLDNN_SUCCESS) {
3599             THROW_CLDNN_EXCEPTION("Error converting value to fp16.");
3600         }
3601     }
3602         break;
3603     default:
3604         THROW_CLDNN_EXCEPTION("Unhandled data type (precision)");
3605     }
3606
3607     m_topology->add(cldnn::data(valPrimID, primMem));
3608 }
3609
3610 cldnn::data_types CLDNNGraph::DataTypeFromPrecision(InferenceEngine::Precision p) {
3611     switch (p) {
3612     case Precision::I16:
3613     case Precision::FP32:
3614         return cldnn::data_types::f32;
3615     case Precision::FP16:
3616         return cldnn::data_types::f16;
3617     case Precision::U8:
3618         return cldnn::data_types::u8;
3619     case Precision::I32:
3620         return cldnn::data_types::i32;
3621     default:
3622         THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << p.name() << " precision";
3623         break;
3624     }
3625 }
3626
3627 cldnn::format CLDNNGraph::FormatFromLayout(InferenceEngine::Layout l) {
3628     switch (l) {
3629     case InferenceEngine::Layout::NCHW:
3630     case InferenceEngine::Layout::NC:
3631     case InferenceEngine::Layout::CHW:
3632     case InferenceEngine::Layout::C:
3633         return cldnn::format::bfyx;
3634     case InferenceEngine::Layout::NHWC:
3635         return cldnn::format::byxf;
3636     default:
3637         THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << l << " layout";
3638         break;
3639     }
3640 }
3641
3642 cldnn::upsampling_sample_type CLDNNGraph::UpsamplingTypeFromString(const std::string& str) {
3643     static const caseless_map<std::string, cldnn::upsampling_sample_type> UpsamplingTypeNameToType = {
3644         { "Bilinear" , cldnn::upsampling_sample_type::bilinear },
3645         { "Nearest" , cldnn::upsampling_sample_type::nearest },
3646     };
3647     auto it = UpsamplingTypeNameToType.find(str);
3648     if (it != UpsamplingTypeNameToType.end())
3649         return it->second;
3650     else
3651         THROW_CLDNN_EXCEPTION("Unknown Upsampling type: " << str);
3652 }
3653
3654 cldnn::softmax::dimension_t CLDNNGraph::SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer, bool isPrevFC) {
3655     // WA for default softmax dimension in cldnn for fyx
3656     // todo: remove this once clDNN changes FC output to BF instead of BX
3657     auto dims = softmaxLayer->outData[0]->dims;
3658     unsigned non1Dims = 0;
3659     for (size_t i = 0; i < dims.size(); i++) {
3660         if (dims[i] > 1) {
3661             non1Dims++;
3662         }
3663     }
3664     if (non1Dims == 1 || isPrevFC) {
3665         return cldnn::softmax::normalize_fyx;
3666     }
3667     // end of WA
3668
3669     switch (softmaxLayer->axis) {
3670     case 1: return cldnn::softmax::normalize_f;
3671     case 2: return cldnn::softmax::normalize_y;
3672     case 3: return cldnn::softmax::normalize_x;
3673     default: THROW_CLDNN_EXCEPTION("Invalid softmax axis " << softmaxLayer->axis);
3674     }
3675     return cldnn::softmax::normalize_fyx;
3676 }
3677
3678 cldnn::prior_box_code_type CLDNNGraph::PriorBoxCodeFromString(const std::string& str) {
3679     static const std::map<std::string, cldnn::prior_box_code_type> CodeNameToType = {
3680         { "caffe.PriorBoxParameter.CORNER" , cldnn::prior_box_code_type::corner },
3681         { "caffe.PriorBoxParameter.CENTER_SIZE" , cldnn::prior_box_code_type::center_size },
3682         { "caffe.PriorBoxParameter.CORNER_SIZE" , cldnn::prior_box_code_type::corner_size },
3683     };
3684     auto it = CodeNameToType.find(str);
3685     if (it != CodeNameToType.end()) {
3686         return it->second;
3687     } else {
3688         THROW_CLDNN_EXCEPTION("Unknown Prior-Box code type: " + str);
3689         return cldnn::prior_box_code_type::corner;
3690     }
3691 }
3692
3693 void CLDNNGraph::CreateGenericLayerBlobPrimitives(const InferenceEngine::GenericLayer* layer) {
3694     IE_ASSERT(layer);
3695     for (auto& blob : layer->blobs) {
3696         if (blob.second->dims().size() != 1) {
3697             THROW_CLDNN_EXCEPTION("Unhandled blob dim in layer " + layer->name);
3698         }
3699         CreatePrimitiveFromBlob(
3700             layer->type + ":" + layer->name + "_" + blob.first + m_weightsTag,
3701             blob.second,
3702             cldnn::layout(
3703                 DataTypeFromPrecision(blob.second->precision()),
3704                 m_defaultFormat, cldnn::spatial(TensorValue(blob.second->dims()[0]))));
3705     }
3706 }
3707
3708 void CLDNNGraph::ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector<std::string>& blobNames) {
3709     IE_ASSERT(layer);
3710     for (auto& name : blobNames) {
3711         if (layer->blobs.find(name) == layer->blobs.end()) {
3712             THROW_CLDNN_EXCEPTION("Missing blob " + name + " in layer " + layer->name);
3713         }
3714     }
3715 }
3716
3717 cldnn::tensor CLDNNGraph::CldnnTensorFromIEDims(const InferenceEngine::SizeVector& dims) {
3718     auto numDims = dims.size();
3719     std::vector<cldnn::tensor::value_type> outputTensor({ 1, 1, 1, 1 });
3720     for (size_t i = 0; i < numDims; i++) {
3721         outputTensor[i] = TensorValue(dims[numDims - i - 1]);
3722     }
3723     // swap x,y for cldnn tensor taking bfxy instead of bfyx
3724     auto tmp = outputTensor[2];
3725     outputTensor[2] = outputTensor[3];
3726     outputTensor[3] = tmp;
3727
3728     return outputTensor;
3729 }
3730
3731 InferRequestInternal::Ptr
3732 CLDNNGraph::CreateInferRequestImpl(InputsDataMap networkInputs, OutputsDataMap networkOutputs) {
3733     if (m_env.network == nullptr) {
3734         THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str;
3735     }
3736     return std::make_shared<CLDNNInferRequest>(m_env, m_config.useProfiling, networkInputs, networkOutputs);
3737 }
3738
3739 void CLDNNGraph::InitProfileInfo(const std::string& layerName,
3740                                  const std::string& layerType,
3741                                  bool isCPU,
3742                                  InferenceEngine::InferenceEngineProfileInfo::LayerStatus status) {
3743     m_env.perfMap[layerType + ":" + layerName].first = layerName;
3744     auto& perfEntry = m_env.perfMap[layerType + ":" + layerName].second;
3745     perfEntry.layerType = layerType;
3746     perfEntry.status = status;
3747     perfEntry.cpu_uSec = perfEntry.realTime_uSec = 0;
3748     perfEntry.isCPU = isCPU;
3749     perfEntry.status = status;
3750 }
3751
3752 };  // namespace CLDNNPlugin