Publishing R3
[platform/upstream/dldt.git] / inference-engine / src / cldnn_engine / cldnn_graph.cpp
1 // Copyright (C) 2018 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5
6 #include <list>
7 #include <set>
8 #include <unordered_set>
9 #include <sstream>
10 #include <CPP/cldnn_defs.h>
11 #include <CPP/data.hpp>
12 #include <CPP/input_layout.hpp>
13 #include <CPP/reorder.hpp>
14 #include <CPP/convolution.hpp>
15 #include <CPP/pooling.hpp>
16 #include <CPP/lrn.hpp>
17 #include <CPP/fully_connected.hpp>
18 #include <CPP/softmax.hpp>
19 #include <CPP/activation.hpp>
20 #include <CPP/concatenation.hpp>
21 #include <CPP/proposal.hpp>
22 #include <CPP/roi_pooling.hpp>
23 #include <CPP/scale.hpp>
24 #include <CPP/crop.hpp>
25 #include <CPP/deconvolution.hpp>
26 #include <CPP/prior_box.hpp>
27 #include <CPP/detection_output.hpp>
28 #include <CPP/normalize.hpp>
29 #include <CPP/reshape.hpp>
30 #include <CPP/batch_norm.hpp>
31 #include <CPP/permute.hpp>
32 #include <CPP/split.hpp>
33 #include <CPP/upsampling.hpp>
34 #include <CPP/network.hpp>
35 #include <CPP/profiling.hpp>
36 #include <CPP/custom_gpu_primitive.hpp>
37 #include <CPP/reorg_yolo.hpp>
38 #include <CPP/region_yolo.hpp>
39 #include <CPP/mutable_data.hpp>
40 #include <CPP/max_unpooling.hpp>
41 #include <CPP/arg_max_min.hpp>
42 #include <CPP/mvn.hpp>
43 #include <chrono>
44 #include <cmath>
45 #include <algorithm>
46 #include "cldnn_graph.h"
47 #include "simple_math.h"
48 #include <description_buffer.hpp>
49 #include <cldnn/cldnn_config.hpp>
50 #include <graph_tools.hpp>
51 #include "cldnn_infer_request.h"
52 #include <cpp_interfaces/ie_executor_manager.hpp>
53 #include <caseless.hpp>
54 #include <fstream>
55 #include <utility>
56 #include <sys/types.h>
57 #include <sys/stat.h>
58
59 using namespace InferenceEngine;
60 using namespace InferenceEngine::details;
61
62 #ifndef NDEBUG
63 #include <iostream>
64 #include <iomanip>
65 #define THROW_CLDNN_EXCEPTION(desc)\
66 do { \
67 InferenceEngineException ex(__FILE__, __LINE__);\
68 std::cout << desc << "\n---\nException detected at " << __FILE__ << ":" << \
69 __LINE__ << " (" << __FUNCTION__ << ")\n---\n" << std::endl; THROW_IE_EXCEPTION << desc; } while (0);
70 #else
71 #define THROW_CLDNN_EXCEPTION(desc) THROW_IE_EXCEPTION << desc;
72 #endif  // NDEBUG
73 #define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)
74
75 namespace CLDNNPlugin {
76
77 const cldnn::primitive_id CLDNNGraph::m_preProcessTag("_cldnn_input_preprocess");
78 const cldnn::primitive_id CLDNNGraph::m_weightsTag("_cldnn_weights");
79 const cldnn::primitive_id CLDNNGraph::m_biasesTag("_cldnn_biases");
80 const cldnn::primitive_id CLDNNGraph::m_meanValuesTag("_cldnn_mean_values");
81 const cldnn::primitive_id CLDNNGraph::m_postProcessTag("_cldnn_output_postprocess");
82 const cldnn::primitive_id CLDNNGraph::m_scalesTag("_cldnn_scales");
83 const cldnn::primitive_id CLDNNGraph::m_workaroundTag("_cldnn_workaround");
84 const cldnn::primitive_id CLDNNGraph::m_preCustomLayerTag("_cldnn_custom_preprocess");
85 const cldnn::primitive_id CLDNNGraph::m_postCustomLayerTag("_cldnn_custom_postprocess");
86
87 static void ValidateLayer(const InferenceEngine::CNNLayerPtr& layer, unsigned inputs) {  // todo: add more checks
88     if (inputs && layer->insData.size() != inputs) {
89         THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name);
90     }
91     if (layer->_fusedWith) {
92         THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
93     }
94 }
95
96 static void ValidateEltwiseLayer(const InferenceEngine::CNNLayerPtr& layer) {
97     if (layer->insData.size() < 2) {
98         THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name << ". Eltwise layer should take at least 2 inputs");
99     }
100     if (layer->_fusedWith) {
101         THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
102     }
103 }
104
105 #if defined(_WIN32)
106 #define mkdir(dir, mode) _mkdir(dir)
107 #endif
108
109 void CLDNNGraph::Config::LoadFromMap(const std::map<std::string, std::string>& configMap) {
110     for (auto& kvp : configMap) {
111         std::string key = kvp.first;
112         std::string val = kvp.second;
113
114         // TODO: refactor if-else to map?
115         if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0) {
116             if (val.compare(PluginConfigParams::YES) == 0) {
117                 useProfiling = true;
118             } else if (val.compare(PluginConfigParams::NO) == 0) {
119                 useProfiling = false;
120             } else {
121                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
122             }
123         } else if (key.compare(PluginConfigParams::KEY_DYN_BATCH_ENABLED) == 0) {
124             if (val.compare(PluginConfigParams::YES) == 0) {
125                 enableDynamicBatch = true;
126             } else if (val.compare(PluginConfigParams::NO) == 0) {
127                 enableDynamicBatch = false;
128             } else {
129                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
130             }
131         } else if (key.compare(PluginConfigParams::KEY_DUMP_KERNELS) == 0) {
132             if (val.compare(PluginConfigParams::YES) == 0) {
133                 dumpCustomKernels = true;
134             } else if (val.compare(PluginConfigParams::NO) == 0) {
135                 dumpCustomKernels = false;
136             } else {
137                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
138             }
139         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY) == 0) {
140             std::stringstream ss(val);
141             uint32_t uVal(0);
142             ss >> uVal;
143             if (ss.fail()) {
144                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
145             }
146             switch (uVal) {
147             case 0:
148                 queuePriority = cldnn::priority_mode_types::disabled;
149                 break;
150             case 1:
151                 queuePriority = cldnn::priority_mode_types::low;
152                 break;
153             case 2:
154                 queuePriority = cldnn::priority_mode_types::med;
155                 break;
156             case 3:
157                 queuePriority = cldnn::priority_mode_types::high;
158                 break;
159             default:
160                 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue priority value: " << uVal;
161                 break;
162             }
163
164         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) == 0) {
165             std::stringstream ss(val);
166             uint32_t uVal(0);
167             ss >> uVal;
168             if (ss.fail()) {
169                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
170             }
171             switch (uVal) {
172             case 0:
173                 queueThrottle = cldnn::throttle_mode_types::disabled;
174                 break;
175             case 1:
176                 queueThrottle = cldnn::throttle_mode_types::low;
177                 break;
178             case 2:
179                 queueThrottle = cldnn::throttle_mode_types::med;
180                 break;
181             case 3:
182                 queueThrottle = cldnn::throttle_mode_types::high;
183                 break;
184             default:
185                 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue throttle value: " << uVal;
186                 break;
187             }
188         } else if (key.compare(PluginConfigParams::KEY_CONFIG_FILE) == 0) {
189             std::stringstream ss(val);
190             std::istream_iterator<std::string> begin(ss);
191             std::istream_iterator<std::string> end;
192             std::vector<std::string> configFiles(begin, end);
193             for (auto& file : configFiles) {
194                 CLDNNCustomLayer::LoadFromFile(file, customLayers);
195             }
196         } else if (key.compare(PluginConfigParams::KEY_TUNING_MODE) == 0) {
197             if (val.compare(PluginConfigParams::TUNING_DISABLED) == 0) {
198                 tuningConfig.mode = cldnn::tuning_mode::tuning_disabled;
199             } else if (val.compare(PluginConfigParams::TUNING_CREATE) == 0) {
200                 tuningConfig.mode = cldnn::tuning_mode::tuning_tune_and_cache;
201             } else if (val.compare(PluginConfigParams::TUNING_USE_EXISTING) == 0) {
202                 tuningConfig.mode = cldnn::tuning_mode::tuning_use_cache;
203             } else {
204                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported tuning mode value by plugin: " << val;
205             }
206         } else if (key.compare(PluginConfigParams::KEY_TUNING_FILE) == 0) {
207             tuningConfig.cache_file_path = val;
208         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_MEM_POOL) == 0) {
209             if (val.compare(PluginConfigParams::YES) == 0) {
210                 memory_pool_on = true;
211             } else if (val.compare(PluginConfigParams::NO) == 0) {
212                 memory_pool_on = false;
213             } else {
214                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported memory pool flag value: " << val;
215             }
216         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR) == 0) {
217             if (!val.empty()) {
218                 graph_dumps_dir = val;
219                 mkdir(graph_dumps_dir.c_str(), 0755);
220             }
221         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR) == 0) {
222             if (!val.empty()) {
223                 sources_dumps_dir = val;
224                 mkdir(sources_dumps_dir.c_str(), 0755);
225             }
226         } else if (key.compare(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) == 0) {
227             if (val.compare(PluginConfigParams::YES) == 0) {
228                 exclusiveAsyncRequests = true;
229             } else if (val.compare(PluginConfigParams::NO) == 0) {
230                 exclusiveAsyncRequests = false;
231             } else {
232                 THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val;
233             }
234         } else {
235             THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property key by plugin: " << key;
236         }
237     }
238 }
239
240 void CLDNNGraph::changeInputBatch(size_t batch) {
241     m_curBatch = batch;
242 }
243
244 bool CLDNNGraph::CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const {
245     InputsDataMap inputs;
246     network.getInputsInfo(inputs);
247
248     CNNLayerSet inputLayers;
249     std::unordered_set<CNNLayer *> allLayers;
250
251     if (inputs.empty())
252         return false;
253
254     auto & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
255     if (secondLayers.empty())
256         return false;
257
258     bool check_result = true;
259     details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
260         auto type = LayerTypeFromStr(layer->type);
261         if (SimplerNMS == type ||
262             ROIPooling == type ||
263             PriorBox == type ||
264             DetectionOutput == type ||
265             Reshape == type ||
266             Permute == type ||
267             Flatten == type ||
268             Proposal == type ||
269             PSROIPooling == type ) {
270             check_result = false;
271         }
272
273         // check for custom layer
274         auto customLayer = m_config.customLayers.find(layer->type);
275         if (customLayer != m_config.customLayers.end()) {
276             check_result = false;
277         }
278     }, false);
279
280     return check_result;
281 }
282
283 CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, const Config& config, int max_batch) : m_config(config),
284     m_defaultFormat(cldnn::format::bfyx),
285     m_networkPrecision(cldnn::data_types::f32),
286     m_curBatch(-1) {
287     m_env.engine = std::make_shared<cldnn::engine>(cldnn::engine_configuration(
288         (config.useProfiling || (config.tuningConfig.mode != cldnn::tuning_mode::tuning_disabled)),
289         false,
290         config.dumpCustomKernels,
291         std::string(),
292         std::string(),
293         true,
294         std::string(),
295         config.sources_dumps_dir,
296         config.queuePriority,
297         config.queueThrottle,
298         config.memory_pool_on));
299 #if 0
300         m_env.debugOptions.PrintOptions();
301 #endif
302     if (config.exclusiveAsyncRequests) {
303         ExecutorManager *executorManager = ExecutorManager::getInstance();
304         _taskExecutor = executorManager->getExecutor(TargetDeviceInfo::name(TargetDevice::eGPU));
305     }
306
307     if (max_batch > 1) {
308         // check topology for applicability
309         if (!CanProcessDynBatch(network)) {
310             THROW_CLDNN_EXCEPTION("Such topology cannot be compiled for dynamic batch!");
311         }
312
313         // calculate number of networks necessary based on binary log
314         unsigned int tmp = max_batch;
315         unsigned int mask = 1 << 31;
316         unsigned int ldigit = 31;
317
318         while (!(tmp & mask)) {
319             mask >>= 1;
320             ldigit--;
321         }
322
323         m_env.m_bv_sz = ldigit + 1;
324     } else {
325         m_env.m_bv_sz = 0;
326     }
327
328     m_env.m_max_batch = max_batch;
329
330     // Handle workarounds
331     char networkName[128] = { 0 };
332     network.getName(networkName, 127);
333     m_env.debugOptions.EnableWA(networkName);
334     m_env.debugOptions.AddTimedEvent("Loading Begin");
335
336     if (max_batch > 1) {
337         for (int b = m_env.m_bv_sz - 1; b >= 0; b--) {
338             m_topology = std::make_shared<cldnn::topology>(cldnn::topology());
339             m_env.network.reset();
340             m_env.constBlobs.clear();
341             m_env.inputLayouts.clear();
342             m_env.outputDims.clear();
343             m_env.primitiveIDs.clear();
344
345             changeInputBatch(1 << b);
346             Load(network);
347             CompileNetwork();
348             m_env.batchNetworks.insert(m_env.batchNetworks.begin(), m_env.network);
349
350             m_topology.reset();
351             m_env.engine->release_pending_memory();
352         }
353     } else {
354         m_topology = std::make_shared<cldnn::topology>(cldnn::topology());
355         Load(network);
356         CompileNetwork();
357         m_topology.reset();
358         m_env.engine->release_pending_memory();
359     }
360
361     m_env.debugOptions.AddTimedEvent("Loading", "Loading Begin");
362     m_env.debugOptions.PrintTimedEvents();
363     m_env.debugOptions.ClearTimedEvents();
364 }
365
366 std::vector<InferenceEngine::CNNLayerPtr> CLDNNGraph::GetNextLayers(const InferenceEngine::DataPtr data) {
367     std::vector<InferenceEngine::CNNLayerPtr> nextLayers;
368     if (data == nullptr) {
369         return nextLayers;
370     }
371     for (auto nl : data->getInputTo()) {
372         nextLayers.push_back(nl.second);
373     }
374     return nextLayers;
375 }
376
377 std::vector<InferenceEngine::CNNLayerPtr> CLDNNGraph::GetNextLayers(const InferenceEngine::CNNLayerPtr layer) {
378     std::vector<InferenceEngine::CNNLayerPtr> nextLayers;
379     if (layer == nullptr) {
380         return nextLayers;
381     }
382     for (auto od : layer->outData) {
383         auto nextLayersVec = GetNextLayers(od);
384         for (auto nl : nextLayersVec) {
385             nextLayers.push_back(nl);
386         }
387     }
388     return nextLayers;
389 }
390
391 InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngine::DataPtr data) {
392     if (data == nullptr) {
393         return nullptr;
394     }
395     auto nextLayers = GetNextLayers(data);
396     IE_ASSERT(nextLayers.size() == 1);
397     return nextLayers[0];
398 }
399
400 InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer) {
401     if (layer == nullptr) {
402         return nullptr;
403     }
404     auto nextLayers = GetNextLayers(layer);
405     IE_ASSERT(nextLayers.size() == 1);
406     return nextLayers[0];
407 }
408
409 void CLDNNGraph::InitFormat(InferenceEngine::ICNNNetwork &network) {
410     m_defaultFormat    = FormatFromLayout(InferenceEngine::Layout::NCHW);
411     m_networkPrecision = DataTypeFromPrecision(network.getPrecision());
412 }
413
414 void CLDNNGraph::CompileNetwork() {
415     m_env.debugOptions.AddTimedEvent("Network Build Begin");
416     cldnn::build_options options;
417     if (!m_config.graph_dumps_dir.empty()) {
418         options.set_option(cldnn::build_option::graph_dumps_dir(m_config.graph_dumps_dir));
419     }
420     options.set_option(cldnn::build_option::optimize_data(true));
421     options.set_option(cldnn::build_option::tuning_config(m_config.tuningConfig));
422
423     m_env.network.reset();
424     m_env.network = std::make_shared<cldnn::network>(cldnn::network(*(m_env.engine), *m_topology, options));
425     m_env.debugOptions.AddTimedEvent("Network Build", "Network Build Begin");
426
427     // add input data from all constant blobs
428     for (auto& cblob : m_env.constBlobs) {
429         m_env.network->set_input_data(cblob.first, cblob.second);
430     }
431 }
432
433 void CLDNNGraph::Load(InferenceEngine::ICNNNetwork &network) {
434     InitFormat(network);
435     auto _networkPrecision = network.getPrecision();
436
437     // 1. create inputs
438     InferenceEngine::InputsDataMap networkInputs;
439     network.getInputsInfo(networkInputs);
440     p_currentInputs = &networkInputs;
441
442     InferenceEngine::OutputsDataMap networkOutputs;
443     network.getOutputsInfo(networkOutputs);
444     p_currentOutputs = &networkOutputs;
445
446     if (networkInputs.size() == 0) {
447         THROW_CLDNN_EXCEPTION("No inputs detected.");
448     }
449
450     std::list<InferenceEngine::CNNLayerPtr> layersToHandle;
451     for (auto input : networkInputs) {
452         IE_ASSERT(input.first.compare(input.second->name()) == 0);
453         AddInputPrimitive(input.second);
454
455         // collect next layers to process
456         for (auto l : input.second->getInputData()->getInputTo()) {
457             layersToHandle.push_back(l.second);
458         }
459     }
460
461     auto allInputs = CNNNetGetAllInputLayers(network);
462     for (auto input : allInputs) {
463         if (LayerTypeFromStr(input->type) == ConstantBlob) {
464             AddConstantBlobInput(input);
465
466             // collect next layers to process
467             for (auto nl : GetNextLayers(input)) {
468                 layersToHandle.push_back(nl);
469             }
470         }
471     }
472
473     // 2. traverse layers
474     unsigned infLoopProtection = 0;
475     while (!layersToHandle.empty()) {
476         if (infLoopProtection++ >= layersToHandle.size()) {
477             THROW_CLDNN_EXCEPTION("Infinite loop during network creation");
478             break;
479         }
480         InferenceEngine::CNNLayerPtr currLayer = layersToHandle.front();
481         layersToHandle.pop_front();
482         auto layerName = currLayer->name;
483
484         if (m_env.primitiveIDs.find(layerName) != m_env.primitiveIDs.end()) {
485             infLoopProtection = 0;
486             continue;  // this layer was already added (had multiple inputs)
487         }
488
489         bool missingInput = false;
490         try {
491             GetPrevLayersPrimitives(currLayer);
492         } catch (std::exception) {
493                 missingInput = true;
494         }
495
496         if (missingInput) {  // some inputs aren't created yet
497             layersToHandle.push_back(currLayer);  // push the current layer to the end of the line
498             continue;  // move on to the next layer
499         }
500
501         infLoopProtection = 0;  // found a layer with all inputs already existing
502         IE_ASSERT(_networkPrecision == currLayer->precision);
503         CreateSingleLayerPrimitive(currLayer);  // currLayer will be advanced if layer was skipped or merged
504         m_env.prevPrimitiveIDs[currLayer->name] = GetPrevLayersPrimitives(currLayer);
505
506         for (auto nl : GetNextLayers(currLayer)) {
507             layersToHandle.push_back(nl);
508         }
509     }
510
511     // 3. Handle output reordering
512     for (auto output : networkOutputs) {
513         // always reorder and let clDNN remove unneeded reorders
514         AddOutputPrimitive(output.first, output.second);
515     }
516
517     // 4. ???
518     // 5. profit
519     p_currentInputs = nullptr;
520     p_currentOutputs = nullptr;
521 }
522
523 CLDNNGraph::LayerType CLDNNGraph::LayerTypeFromStr(const std::string &str) {
524     static const caseless_map<std::string, CLDNNGraph::LayerType> LayerNameToType = {
525         { "Convolution" , Convolution },
526         { "ReLU" , ReLU },
527         { "ReLU6" , ReLU6 },
528         { "Sigmoid" , Sigmoid },
529         { "Logistic" , Sigmoid },
530         { "TanH" , TanH },
531         { "ELU" , ELU },
532         { "Activation" , Activation },
533         { "Norm" , LRN },
534         { "Pooling" , Pooling },
535         { "FullyConnected" , FullyConnected },
536         { "SoftMax" , SoftMax },
537         { "Power" , Power },
538         { "Split" , Split },
539         { "Slice" , Split },
540         { "Concat" , Concatenate },
541         { "Eltwise" , Eltwise },
542         { "SimplerNMS" , SimplerNMS },
543         { "ROIPooling" , ROIPooling },
544         { "Crop" , Crop },
545         { "Deconvolution" , Deconvolution },
546         { "PriorBox" , PriorBox },
547         { "DetectionOutput" , DetectionOutput },
548         { "Normalize" , Normalize },
549         { "Reshape" , Reshape },
550         { "Permute" , Permute },
551         { "Flatten" , Flatten },
552         { "BatchNormalization" , BatchNormalization },
553         { "PReLU" , PReLU },
554         { "ScaleShift" , ScaleShift },
555         { "Proposal" , Proposal },
556         { "PSROIPooling" , PSROIPooling },
557         { "Clamp" , Clamp },
558         { "Copy" , Copy },
559         { "Upsampling" , Upsampling },
560         { "Resample" , Resample },
561         { "RegionYolo" , RegionYolo },
562         { "ReorgYolo" , ReorgYolo },
563         { "Const" , ConstantBlob },
564         { "ArgMax" , ArgMax },
565         { "MVN" , MVN },
566         { "Unpooling" , Unpooling },
567     };
568     auto it = LayerNameToType.find(str);
569     if (it != LayerNameToType.end())
570         return it->second;
571     else
572         return NO_TYPE;
573 }
574
575 cldnn::pooling_mode CLDNNGraph::PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding) {
576     switch (pt) {
577         case InferenceEngine::PoolingLayer::PoolType::MAX:
578             return cldnn::pooling_mode::max;
579         case InferenceEngine::PoolingLayer::PoolType::AVG:
580             return excludePadding ? cldnn::pooling_mode::average_no_padding : cldnn::pooling_mode::average;
581         default: IE_ASSERT(0);  // unhandled pool mode
582             THROW_CLDNN_EXCEPTION("Unsupported pooling type: " << pt);
583             break;
584     }
585
586     return cldnn::pooling_mode::max;  // shouldn't get here
587 }
588
589 cldnn::eltwise_mode CLDNNGraph::EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op) {
590     switch (op) {
591         case InferenceEngine::EltwiseLayer::Sum:
592             return cldnn::eltwise_mode::sum;
593         case InferenceEngine::EltwiseLayer::Prod:
594             return cldnn::eltwise_mode::prod;
595         case InferenceEngine::EltwiseLayer::Max:
596             return cldnn::eltwise_mode::max;
597         default: THROW_CLDNN_EXCEPTION("Unsupported eltwise operation: " << op);
598             break;
599     }
600
601     return cldnn::eltwise_mode::max;  // shouldn't get here
602 }
603
604 cldnn::concatenation::concatenation_axis CLDNNGraph::ConcatAxisFromIEAxis(unsigned axis) {
605     switch (axis) {
606     case 0:
607         THROW_CLDNN_EXCEPTION("Unsupported concatenation axis: " << axis);  // Currently unsupported (although existing in the API)
608         return cldnn::concatenation::concatenation_axis::along_b;
609     case 1:
610         return cldnn::concatenation::concatenation_axis::along_f;
611     case 2:
612         return cldnn::concatenation::concatenation_axis::along_y;
613     case 3:
614         return cldnn::concatenation::concatenation_axis::along_x;
615     default: THROW_CLDNN_EXCEPTION("Unsupported concatenation axis: " << axis);
616         break;
617     }
618
619     return cldnn::concatenation::concatenation_axis::along_f;  // shouldn't get here
620 }
621
622 void CLDNNGraph::CreatePrimitiveFromBlob(cldnn::primitive_id primID,
623                                          const InferenceEngine::Blob::Ptr pBlob,
624                                          cldnn::layout blobLayout,
625                                          size_t blobByteOffset,
626                                          WeightRearrangeType rearrange) {
627     auto mem = cldnn::memory::allocate(*(m_env.engine), blobLayout);
628     auto tmpPointer = mem.pointer<char>();  // implicitly maps buffer - unmap in destructor
629     auto buf = tmpPointer.data();
630     auto bufSize = blobLayout.bytes_count();
631 // The condition below is not valid once we use groups - todo: think of some other size check here
632 //     if ((pBlob != nullptr) &&
633 //         (pBlob->size() * (broadcastFeatures ? blobLayout.size.feature[0] : 1)) != blobLayout.count()) {
634 //         THROW_CLDNN_EXCEPTION("Unexpected blob size");
635 //     }
636     if (pBlob == nullptr) {
637         THROW_CLDNN_EXCEPTION("Missing blob data: " << primID);
638     } else if ((pBlob->layout() != InferenceEngine::OIHW) &&
639                (pBlob->layout() != InferenceEngine::NCHW) &&
640                (pBlob->layout() != InferenceEngine::CHW) &&
641                (pBlob->layout() != InferenceEngine::C)) {
642         // TODO: support more layouts
643         THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(pBlob->layout()) << ") in blob: " << primID);
644     } else if (rearrange == BroadcastFeatures) {
645         size_t features = static_cast<size_t>(blobLayout.size.feature[0]);
646         if (pBlob->size() != features) {
647             THROW_CLDNN_EXCEPTION("Invalid blob dimensions to broadcast: " << primID);
648         }
649         auto data = static_cast<const char *>(pBlob->buffer());
650         auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type);
651         size_t featureElements = blobLayout.count() / static_cast<size_t>(blobLayout.size.feature[0]);
652         IE_ASSERT(blobLayout.format == cldnn::format::bfyx);
653         for (size_t f = 0; f < features; f++) {
654             for (size_t e = 0; e < featureElements; e++) {
655                 for (size_t b = 0; b < elementSize; b++) {
656                     buf[(f*featureElements + e)*elementSize + b] = data[f*elementSize + b];
657                 }
658             }
659         }
660     } else if (rearrange == FlipDeconvDims) {
661         auto data = static_cast<const char *>(pBlob->buffer());
662         auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type);
663
664         size_t inputFeatureElements = static_cast<size_t>(blobLayout.size.feature[0]);
665         size_t outputFeatureElements = static_cast<size_t>(blobLayout.size.batch[0]);
666
667         size_t featureSize = elementSize * static_cast<size_t>(blobLayout.size.spatial[0] * blobLayout.size.spatial[1]);
668
669         for (size_t i = 0; i < inputFeatureElements; i++) {
670             for (size_t o = 0; o < outputFeatureElements; o++) {
671                 size_t outputShift = (o*inputFeatureElements + i)*featureSize;
672                 size_t inputShift = (i*outputFeatureElements + o)*featureSize;
673
674                 for (size_t b = 0; b < featureSize; b++) {
675                     buf[outputShift + b] = data[inputShift + b];
676                 }
677             }
678         }
679     } else {
680         auto data = static_cast<const char *>(pBlob->buffer());
681         for (size_t i = 0; i < bufSize; i++) {
682             buf[i] = data[i + blobByteOffset];
683         }
684     }
685     m_topology->add(cldnn::data(primID, mem));
686 }
687
688 void CLDNNGraph::CreateWeightAndBiasPrimitives(const InferenceEngine::CNNLayerPtr& layer,
689                                                    std::vector<cldnn::primitive_id>& weightsPrimID,
690                                                    std::vector<cldnn::primitive_id>& biasesPrimID) {
691     cldnn::tensor::value_type inFeatures = 1;  // todo: workaround for xyf input, handle general case (xf, xyzf etc...)
692     std::shared_ptr<Data> insData0 = layer->insData[0].lock();
693     IE_ASSERT(insData0 != nullptr);
694     if (insData0->dims.size() > 2) {
695         inFeatures = TensorValue(insData0->dims[2]);
696     }
697     cldnn::tensor::value_type outFeatures(0);
698     std::vector<cldnn::tensor::value_type> weightDimsVec;
699     InferenceEngine::Blob::Ptr pWeightsBlob, pBiasBlob;
700     unsigned groupSize = 1;
701     WeightRearrangeType rearrange = NO_REARRANGE;
702
703     switch (LayerTypeFromStr(layer->type)) {
704     case Convolution: {
705         auto convLayer = dynamic_cast<InferenceEngine::ConvolutionLayer *> (layer.get());
706         groupSize = convLayer->_group;
707         if ((inFeatures % groupSize) || (convLayer->_out_depth % groupSize)) {
708             THROW_CLDNN_EXCEPTION("Invalid group size in layer " << convLayer->name);
709         }
710         weightDimsVec = {
711             TensorValue(convLayer->_out_depth / groupSize),
712             TensorValue(inFeatures / groupSize),
713             TensorValue(convLayer->_kernel_x),
714             TensorValue(convLayer->_kernel_y)
715         };
716         outFeatures = convLayer->_out_depth;
717         pWeightsBlob = convLayer->_weights;
718         pBiasBlob = convLayer->_biases;
719     }
720         break;
721     case Deconvolution: {
722         auto deconvLayer = dynamic_cast<InferenceEngine::DeconvolutionLayer *> (layer.get());
723         groupSize = deconvLayer->_group;
724         if ((inFeatures % groupSize) || (deconvLayer->_out_depth % groupSize)) {
725             THROW_CLDNN_EXCEPTION("Invalid group size in layer " << deconvLayer->name);
726         }
727         weightDimsVec = {
728             TensorValue(deconvLayer->_out_depth / groupSize),
729             TensorValue(inFeatures / groupSize),
730             TensorValue(deconvLayer->_kernel_x),
731             TensorValue(deconvLayer->_kernel_y)
732         };
733         outFeatures = deconvLayer->_out_depth;
734         pWeightsBlob = deconvLayer->_weights;
735         pBiasBlob = deconvLayer->_biases;
736
737         if ((groupSize < outFeatures) || (groupSize < inFeatures))
738             rearrange = FlipDeconvDims;
739     }
740         break;
741     default:
742         IE_ASSERT("Wrong weightable layer type");  // shouldn't get here
743         break;
744     }
745
746     // create weights primitive
747     cldnn::layout weightsLayout = cldnn::layout(
748         m_networkPrecision,
749         m_defaultFormat,
750         cldnn::tensor(weightDimsVec));
751     size_t bytesPerGroup = weightsLayout.bytes_count();
752
753     for (unsigned g = 0; g < groupSize; g++) {
754         cldnn::primitive_id weightID = layer->name + m_weightsTag + std::to_string(g);
755         CreatePrimitiveFromBlob(
756             weightID,
757             pWeightsBlob,
758             weightsLayout,
759             g * bytesPerGroup,
760             rearrange);
761         weightsPrimID.push_back(weightID);
762     }
763
764     // create bias primitive
765     if (pBiasBlob != nullptr) {
766         cldnn::layout biasesLayout = cldnn::layout(
767             m_networkPrecision,
768             m_defaultFormat,
769             cldnn::spatial(TensorValue(outFeatures / groupSize)));
770         size_t bytesPerGroup = biasesLayout.bytes_count();
771         for (unsigned g = 0; g < groupSize; g++) {
772             cldnn::primitive_id biasID = layer->name + m_biasesTag + std::to_string(g);
773             CreatePrimitiveFromBlob(
774                 biasID,
775                 pBiasBlob,
776                 biasesLayout,
777                 g * bytesPerGroup);
778             biasesPrimID.push_back(biasID);
779         }
780     }
781 }
782
783 void CLDNNGraph::CreateScaleWeightsAndBiasesFromBN(
784     const InferenceEngine::BatchNormalizationLayer* bnLayer,
785     cldnn::primitive_id weightsPrimID,
786     cldnn::primitive_id biasesPrimID) {
787
788     if (bnLayer->_weights->dims() != bnLayer->_biases->dims()) {
789         THROW_CLDNN_EXCEPTION("mean/variance dimensions mismatch in " << bnLayer->name);
790     }
791     if (bnLayer->_weights->precision() != bnLayer->_biases->precision()) {
792         THROW_CLDNN_EXCEPTION("mean/variance precision mismatch in " << bnLayer->name);
793     }
794
795     cldnn::tensor blobTensor(0);
796     switch (bnLayer->outData[0]->dims.size()) {
797     case 2:
798         blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[0]));
799         break;
800     case 4:
801         blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[2]));
802         break;
803     default:
804         THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
805     }
806     cldnn::layout blobLayout(
807         m_networkPrecision,
808         m_defaultFormat,
809         blobTensor);
810
811     switch (bnLayer->_weights->precision()) {
812     case Precision::FP16: {
813         InferenceEngine::TBlob<uint16_t> weightsBlob(bnLayer->_weights->precision(), bnLayer->_weights->layout(),  bnLayer->_weights->dims());
814         weightsBlob.allocate();
815         InferenceEngine::TBlob<uint16_t> biasesBlob(bnLayer->_biases->precision(), bnLayer->_weights->layout(), bnLayer->_biases->dims());
816         biasesBlob.allocate();
817
818         auto weightsData = weightsBlob.data();
819         auto biasesData = biasesBlob.data();
820         auto varianceData = static_cast<const uint16_t *>(bnLayer->_weights->buffer());
821         auto meanData = static_cast<const uint16_t *>(bnLayer->_biases->buffer());
822
823         cldnn_status status = CLDNN_SUCCESS;
824         for (size_t i = 0; i < weightsBlob.size(); i++) {
825             auto variance = cldnn_half_to_float(varianceData[i], &status);
826             if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
827             auto mean = cldnn_half_to_float(meanData[i], &status);
828             if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
829
830             float scale = 1.0f / sqrt(variance + bnLayer->epsilon);
831             weightsData[i] = cldnn_float_to_half(scale, &status);
832             if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
833             biasesData[i] = cldnn_float_to_half((-mean) * scale, &status);
834             if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
835         }
836         CreatePrimitiveFromBlob(weightsPrimID, std::make_shared<InferenceEngine::TBlob<uint16_t>>(weightsBlob), blobLayout);
837         CreatePrimitiveFromBlob(biasesPrimID, std::make_shared<InferenceEngine::TBlob<uint16_t>>(biasesBlob), blobLayout);
838     }
839         break;
840     case Precision::FP32: {
841         InferenceEngine::TBlob<float> weightsBlob(bnLayer->_weights->precision(), bnLayer->_weights->layout(), bnLayer->_weights->dims());
842         weightsBlob.allocate();
843         InferenceEngine::TBlob<float> biasesBlob(bnLayer->_biases->precision(), bnLayer->_weights->layout(), bnLayer->_biases->dims());
844         biasesBlob.allocate();
845
846         auto weightsData = weightsBlob.data();
847         auto biasesData = biasesBlob.data();
848         auto varianceData = static_cast<const float *>(bnLayer->_weights->buffer());
849         auto meanData = static_cast<const float *>(bnLayer->_biases->buffer());
850
851         for (size_t i = 0; i < weightsBlob.size(); i++) {
852             auto variance = varianceData[i];
853             auto mean = meanData[i];
854             weightsData[i] = 1.0f / sqrt(variance + bnLayer->epsilon);
855             biasesData[i] = (-mean) * weightsData[i];
856         }
857         CreatePrimitiveFromBlob(weightsPrimID, std::make_shared<InferenceEngine::TBlob<float>>(weightsBlob), blobLayout);
858         CreatePrimitiveFromBlob(biasesPrimID, std::make_shared<InferenceEngine::TBlob<float>>(biasesBlob), blobLayout);
859     }
860         break;
861     default:
862         THROW_CLDNN_EXCEPTION("Unhandled mean/variance precision in " << bnLayer->name);
863         break;
864     }
865 }
866
867 void CLDNNGraph::CreateSingleLayerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
868     // Initialize a profiling entry
869     InitProfileInfo(layer->name, layer->type, "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
870
871     // First check for custom layer
872     auto customLayer = m_config.customLayers.find(layer->type);
873     if (customLayer != m_config.customLayers.end()) {
874         CreateCustomLayerPrimitive(layer, customLayer->second);
875         return;
876     }
877
878     // Otherwise move on to built-in layer types
879     switch (LayerTypeFromStr(layer->type)) {
880         case Convolution: CreateConvolutionPrimitive(layer);
881             break;
882         case ReLU:
883         case ReLU6:
884         case Sigmoid:
885         case TanH:
886         case ELU:
887         case Clamp:
888         case Activation:
889             CreateActivationPrimitive(layer, LayerTypeFromStr(layer->type));
890             break;
891         case LRN: CreateLRNPrimitive(layer);
892             break;
893         case Pooling: CreatePoolingPrimitive(layer);
894             break;
895         case Unpooling: CreateMaxUnpoolingPrimitive(layer);
896             break;
897         case FullyConnected: CreateFullyConnectedPrimitive(layer);
898             break;
899         case SoftMax: CreateSoftMaxPrimitive(layer);
900             break;
901         case Power: CreatePowerPrimitive(layer);
902             break;
903         case Split: CreateSplitPrimitive(layer);
904             break;
905         case Concatenate: CreateConcatenatePrimitive(layer);
906             break;
907         case Eltwise: CreateEltwisePrimitive(layer);
908             break;
909         case SimplerNMS: CreateSimplerNMSPrimitive(layer);
910             break;
911         case ROIPooling: CreateROIPoolingPrimitive(layer);
912             break;
913         case Crop: CreateCropPrimitive(layer);
914             break;
915         case Deconvolution: CreateDeconvolutionPrimitive(layer);
916             break;
917         case PriorBox: CreatePriorBoxPrimitive(layer);
918             break;
919         case DetectionOutput: CreateDetectionOutputPrimitive(layer);
920             break;
921         case Normalize: CreateNormalizePrimitive(layer);
922             break;
923         case Reshape: CreateReshapePrimitive(layer);
924             break;
925         case Permute: CreatePermutePrimitive(layer);
926             break;
927         case Flatten: CreateFlattenPrimitive(layer);
928             break;
929         case BatchNormalization: CreateBatchNormalizationPrimitive(layer);
930             break;
931         case PReLU: CreatePReLUPrimitive(layer);
932             break;
933         case ScaleShift: CreateScaleShiftPrimitive(layer);
934             break;
935         case Proposal: CreateProposalPrimitive(layer);
936             break;
937         case PSROIPooling: CreatePSROIPoolingPrimitive(layer);
938             break;
939         case Copy: CreateCopyPrimitive(layer);
940             break;
941         case Upsampling: CreateUpsamplingPrimitive(layer);
942             break;
943         case Resample: CreateResamplePrimitive(layer);
944             break;
945         case ArgMax: CreateArgMaxPrimitive(layer);
946             break;
947         case MVN: CreateMVNPrimitive(layer);
948             break;
949         case RegionYolo: CreateYOLO2RegionPrimitive(layer);
950             break;
951         case ReorgYolo: CreateYOLO2ReorgPrimitive(layer);
952             break;
953         default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type);
954     }
955 }
956
957 void CLDNNGraph::CreateScaleShiftPrimitive(InferenceEngine::CNNLayerPtr &layer) {
958     ValidateLayer(layer, 1);
959     auto inputPrimitives = GetPrevLayersPrimitives(layer);
960     auto scaleShiftLayer = dynamic_cast<InferenceEngine::ScaleShiftLayer*> (layer.get());
961
962     // create scales and biases
963     cldnn::primitive_id scalePrimID = scaleShiftLayer->name + m_scalesTag;
964     cldnn::primitive_id biasPrimID = scaleShiftLayer->name + m_biasesTag;
965
966     const auto& dims = scaleShiftLayer->_weights->dims();
967     cldnn::tensor weightTensor(1);
968     switch (dims.size()) {
969     case 1: weightTensor = cldnn::feature(TensorValue(dims[0]));  // value per feature (or 1 global value)
970         break;
971     case 4: weightTensor = cldnn::tensor(TensorValue(dims[0]), TensorValue(dims[1]), TensorValue(dims[3]), TensorValue(dims[2]));  // value per pixel
972         break;
973     default: THROW_CLDNN_EXCEPTION("Invalid weights dimensions in layer " << layer->name);
974         break;
975     }
976
977     cldnn::layout blobLayout(m_networkPrecision, m_defaultFormat, weightTensor);
978     CreatePrimitiveFromBlob(scalePrimID, scaleShiftLayer->_weights, blobLayout);
979     if (scaleShiftLayer->_biases != nullptr) {
980         if (scaleShiftLayer->_biases->dims() != dims) {
981             THROW_CLDNN_EXCEPTION("Invalid bias blob dimensions in layer " << layer->name);
982         }
983         CreatePrimitiveFromBlob(biasPrimID, scaleShiftLayer->_biases, blobLayout);
984     } else {
985         biasPrimID = "";  // 0-bias
986     }
987
988     auto scaleShiftPrim = cldnn::scale(
989         scaleShiftLayer->name,
990         inputPrimitives[0],
991         scalePrimID,
992         biasPrimID);
993
994     m_env.primitiveIDs[scaleShiftLayer->name] = scaleShiftLayer->name;
995     m_topology->add(scaleShiftPrim);
996     m_env.profilingIDs.insert(scaleShiftLayer->name);
997 }
998
999 void CLDNNGraph::CreateProposalPrimitive(InferenceEngine::CNNLayerPtr & layer) {
1000     ValidateLayer(layer, 3);
1001     IE_ASSERT(layer->insData[0].lock()->dims[3] == 1);  // only handling input batch size 1
1002     IE_ASSERT(layer->insData[1].lock()->dims[3] == 1);  // only handling input batch size 1
1003     auto proposalLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1004
1005     float nms_thresh = proposalLayer->GetParamAsFloat("nms_thresh", 0.7f);
1006     int min_size = proposalLayer->GetParamAsInt("min_size", 16);
1007     int feature_stride = proposalLayer->GetParamAsInt("feat_stride", 16);
1008     int pre_nms_topn = proposalLayer->GetParamAsInt("pre_nms_topn", 6000);
1009     int post_nms_topn = proposalLayer->GetParamAsInt("post_nms_topn", 300);
1010     std::vector<float> ratio = proposalLayer->GetParamAsFloats("ratio");
1011     std::vector<float> scale = proposalLayer->GetParamAsFloats("scale");
1012
1013     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1014
1015     auto proposalPrim = cldnn::proposal(
1016         proposalLayer->name,
1017         inputPrimitives[0],  // cls_score
1018         inputPrimitives[1],  // bbox_pred
1019         inputPrimitives[2],  // im_info
1020         0,                   // max_num_proposals is unused
1021         nms_thresh,
1022         min_size,
1023         feature_stride,
1024         pre_nms_topn,
1025         post_nms_topn,
1026         ratio,
1027         scale);
1028
1029     m_env.primitiveIDs[proposalLayer->name] = proposalLayer->name;
1030     m_topology->add(proposalPrim);
1031     m_env.profilingIDs.insert(proposalLayer->name);
1032 }
1033
1034 void CLDNNGraph::CreatePReLUPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1035     ValidateLayer(layer, 1);
1036     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1037     auto preluLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1038
1039     auto inDataPtr = preluLayer->insData[0].lock();
1040     if (!inDataPtr) {
1041         THROW_CLDNN_EXCEPTION("Data inserted into PreLu " << preluLayer->name << " is nullptr");
1042     }
1043     auto inputDims = inDataPtr->dims;
1044     if (inputDims.size() == 2) {
1045         // WA for FC output as BF instead of BX
1046         // todo: remove this once FC output is changed in clDNN
1047         cldnn::primitive_id reshapeID = preluLayer->name + m_workaroundTag;
1048         m_topology->add(cldnn::reshape(
1049             reshapeID,
1050             inputPrimitives[0],
1051             cldnn::tensor(TensorValue(inputDims[1]), TensorValue(inputDims[0]), 1, 1)));
1052         m_env.primitiveIDs[inputPrimitives[0]] = reshapeID;
1053         inputPrimitives[0] = reshapeID;
1054         m_env.primitiveIDs[reshapeID] = reshapeID;
1055         m_env.profilingIDs.insert(reshapeID);
1056     }
1057
1058     static const std::string blobName("weights");
1059     ValidateGenericLayerBlobs(preluLayer, { blobName });
1060
1061     bool channel_shared = preluLayer->GetParamsAsBool("channel_shared", false);
1062
1063     auto slopeBlob = preluLayer->blobs.at(blobName);
1064     if (channel_shared) {
1065         if (slopeBlob->dims()[0] != 1) {
1066             THROW_CLDNN_EXCEPTION("PReLU slope blob with wrong dimensions in " << preluLayer->name);
1067         }
1068         float slope(0.0f);
1069         switch (slopeBlob->precision()) {
1070         case InferenceEngine::Precision::FP32:
1071             slope = *static_cast<const float *>(slopeBlob->buffer());
1072             break;
1073         case InferenceEngine::Precision::FP16:
1074         {
1075             cldnn_status status = CLDNN_SUCCESS;
1076             slope = cldnn_half_to_float(*static_cast<const uint16_t *>(slopeBlob->buffer()), &status);
1077             if (status != CLDNN_SUCCESS) {
1078                 THROW_CLDNN_EXCEPTION("Error converting fp16 value in " << preluLayer->name);
1079             }
1080         }
1081             break;
1082         default: THROW_CLDNN_EXCEPTION("Invalid PReLU slope blob precision in " << preluLayer->name);
1083         }
1084         m_topology->add(cldnn::activation(preluLayer->name, inputPrimitives[0], activation_relu_negative_slope, { slope, 0.f }));
1085     } else {
1086         CreateGenericLayerBlobPrimitives(preluLayer);
1087         cldnn::primitive_id slopePrimID(preluLayer->name + "_" + blobName + m_weightsTag);
1088         m_topology->add(cldnn::activation(preluLayer->name, inputPrimitives[0], slopePrimID, activation_relu_negative_slope));
1089     }
1090
1091     m_env.primitiveIDs[preluLayer->name] = preluLayer->name;
1092     m_env.profilingIDs.insert(preluLayer->name);
1093 }
1094
1095 void CLDNNGraph::CreateBatchNormalizationPrimitive(InferenceEngine::CNNLayerPtr & layer) {
1096     ValidateLayer(layer, 1);
1097     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1098
1099     auto bnLayer = dynamic_cast<InferenceEngine::BatchNormalizationLayer *> (layer.get());
1100     cldnn::primitive_id weightID = bnLayer->name + "_" + m_scalesTag;
1101     cldnn::primitive_id biasID = bnLayer->name + "_" + m_biasesTag;
1102
1103 #define _SCALE_BN_OPT
1104 #ifdef _SCALE_BN_OPT
1105     // Using scale as an optimization (1 mad instead of mad+rsq)
1106     // create new blobs for scale shift
1107     CreateScaleWeightsAndBiasesFromBN(bnLayer, weightID, biasID);
1108     auto scalePrim = cldnn::scale(bnLayer->name, inputPrimitives[0], weightID, biasID);
1109
1110     m_env.primitiveIDs[bnLayer->name] = bnLayer->name;
1111     m_topology->add(scalePrim);
1112     m_env.profilingIDs.insert(bnLayer->name);
1113     return;
1114 #endif  // _SCALE_BN_OPT
1115
1116     cldnn::tensor blobTensor(0);
1117     switch (bnLayer->outData[0]->dims.size()) {
1118     case 2:
1119         blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[0]));
1120         break;
1121     case 4:
1122         blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[2]));
1123         break;
1124     default:
1125         THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
1126     }
1127     cldnn::layout blobLayout(
1128         m_networkPrecision,
1129         m_defaultFormat,
1130         blobTensor);
1131
1132     // Create variance primitive
1133     cldnn::primitive_id varianceID = bnLayer->name + "_" + m_weightsTag;
1134     CreatePrimitiveFromBlob(varianceID, bnLayer->_weights, blobLayout);
1135
1136     // Create mean primitive
1137     cldnn::primitive_id meanID = bnLayer->name + "_" + m_biasesTag;
1138     CreatePrimitiveFromBlob(meanID, bnLayer->_biases, blobLayout);
1139
1140     auto bnPrim = cldnn::batch_norm(
1141         bnLayer->name,
1142         inputPrimitives[0],
1143         meanID,
1144         varianceID,
1145         bnLayer->epsilon);
1146
1147     m_env.primitiveIDs[bnLayer->name] = bnLayer->name;
1148     m_topology->add(bnPrim);
1149     m_env.profilingIDs.insert(bnLayer->name);
1150 }
1151
1152 void CLDNNGraph::CreateFlattenPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1153     ValidateLayer(layer, 1);
1154     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1155     auto flattenLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1156
1157     auto flattenPrim = cldnn::reshape(
1158         flattenLayer->name,
1159         inputPrimitives[0],
1160         CldnnTensorFromIEDims(flattenLayer->outData[0]->dims));
1161
1162     m_env.primitiveIDs[flattenLayer->name] = flattenLayer->name;
1163     m_topology->add(flattenPrim);
1164     m_env.profilingIDs.insert(flattenLayer->name);
1165 }
1166
1167 void CLDNNGraph::CreatePermutePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1168     ValidateLayer(layer, 1);
1169     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1170     auto permuteLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1171     std::vector<uint16_t> order;
1172     for (auto& a : permuteLayer->GetParamAsInts("order"))
1173         order.push_back(static_cast<uint16_t>(a));
1174     auto outputDims = permuteLayer->outData[0]->dims;
1175
1176     auto permutePrim = cldnn::permute(
1177         permuteLayer->name,
1178         inputPrimitives[0],
1179         order);
1180
1181     m_env.primitiveIDs[permuteLayer->name] = permuteLayer->name;
1182     m_topology->add(permutePrim);
1183     m_env.profilingIDs.insert(permuteLayer->name);
1184 }
1185
1186 void CLDNNGraph::CreateReshapePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1187     ValidateLayer(layer, 1);
1188     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1189     auto reshapeLayer = dynamic_cast<InferenceEngine::ReshapeLayer*> (layer.get());
1190     IE_ASSERT(reshapeLayer->outData.size());
1191
1192     auto reshapePrim = cldnn::reshape(
1193         reshapeLayer->name,
1194         inputPrimitives[0],
1195         CldnnTensorFromIEDims(reshapeLayer->outData[0]->dims));
1196
1197     m_env.primitiveIDs[reshapeLayer->name] = reshapeLayer->name;
1198     m_topology->add(reshapePrim);
1199     m_env.profilingIDs.insert(reshapeLayer->name);
1200 }
1201
1202 void CLDNNGraph::CreateNormalizePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1203     ValidateLayer(layer, 1);
1204     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1205     auto normLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1206     ValidateGenericLayerBlobs(normLayer, { "weights" });
1207     CreateGenericLayerBlobPrimitives(normLayer);
1208
1209     // params
1210     bool across_spatial = normLayer->GetParamsAsBool("across_spatial", true);
1211     float eps = normLayer->GetParamAsFloat("eps", 0.0f);
1212
1213     // WA for MO outputting %.6f
1214     if (eps == 0.0f) {
1215         eps = 1e-10f;
1216     }
1217
1218     auto normPrim = cldnn::normalize(
1219         normLayer->name,
1220         inputPrimitives[0],
1221         normLayer->name + "_weights" + m_weightsTag,
1222         across_spatial,
1223         eps);
1224
1225     m_env.primitiveIDs[normLayer->name] = normLayer->name;
1226     m_topology->add(normPrim);
1227     m_env.profilingIDs.insert(normLayer->name);
1228 }
1229
1230 void CLDNNGraph::CreateDetectionOutputPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1231     ValidateLayer(layer, 3);
1232     auto detectionLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1233
1234     uint32_t num_classes = detectionLayer->GetParamAsUInt("num_classes", 1);
1235     bool share_location = detectionLayer->GetParamsAsBool("share_location", true);
1236     int background_label_id = detectionLayer->GetParamAsInt("background_label_id", 0);
1237     float nms_threshold = detectionLayer->GetParamAsFloat("nms_threshold", 0.3f);
1238     int top_k = detectionLayer->GetParamAsInt("top_k", -1);
1239     float confidence_threshold = detectionLayer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
1240     float eta = detectionLayer->GetParamAsFloat("eta", 1.0f);
1241     int keep_top_k = detectionLayer->GetParamAsInt("keep_top_k", -1);
1242     bool variance_encoded_in_target = detectionLayer->GetParamsAsBool("variance_encoded_in_target", false);
1243     int input_width = detectionLayer->GetParamAsInt("input_width", -1);
1244     int input_height = detectionLayer->GetParamAsInt("input_height", -1);
1245     bool normalized = detectionLayer->GetParamsAsBool("normalized", true);
1246     std::string code_type = detectionLayer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
1247     bool clip = detectionLayer->GetParamsAsBool("clip", false);
1248     bool decrease_label_id = detectionLayer->GetParamsAsBool("decrease_label_id", false);
1249     cldnn::prior_box_code_type cldnnCodeType = PriorBoxCodeFromString(code_type);
1250
1251     int32_t prior_info_size = normalized != 0 ? 4 : 5;
1252     int32_t prior_coordinates_offset = normalized != 0 ? 0 : 1;
1253
1254     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1255     auto detectionPrim = cldnn::detection_output(
1256         detectionLayer->name,
1257         inputPrimitives[0],
1258         inputPrimitives[1],
1259         inputPrimitives[2],
1260         num_classes,
1261         keep_top_k,
1262         share_location,
1263         background_label_id,
1264         nms_threshold,
1265         top_k,
1266         eta,
1267         cldnnCodeType,
1268         variance_encoded_in_target,
1269         confidence_threshold,
1270         prior_info_size,
1271         prior_coordinates_offset,
1272         normalized,
1273         input_width,
1274         input_height,
1275         decrease_label_id,
1276         clip);
1277
1278     m_env.primitiveIDs[detectionLayer->name] = detectionLayer->name;
1279     m_topology->add(detectionPrim);
1280     m_env.profilingIDs.insert(detectionLayer->name);
1281 }
1282
1283 void CLDNNGraph::CreatePriorBoxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1284     ValidateLayer(layer, 2);
1285     auto priorBoxLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1286
1287     // params
1288     std::vector<float> min_size = priorBoxLayer->GetParamAsFloats("min_size");
1289     std::vector<float> max_size = priorBoxLayer->GetParamAsFloats("max_size", {});
1290     std::vector<float> aspect_ratio = priorBoxLayer->GetParamAsFloats("aspect_ratio", {});
1291     std::vector<float> variance = priorBoxLayer->GetParamAsFloats("variance");
1292     bool flip = priorBoxLayer->GetParamsAsBool("flip", true);
1293     bool clip = priorBoxLayer->GetParamsAsBool("clip", false);
1294     bool scale_all_sizes = priorBoxLayer->GetParamsAsBool("scale_all_sizes", true);
1295     float offset = priorBoxLayer->GetParamAsFloat("offset", 0.5f);
1296
1297     auto step_w = priorBoxLayer->GetParamAsFloat("step_w", 0.0f);
1298     auto step_h = priorBoxLayer->GetParamAsFloat("step_h", 0.0f);
1299     auto step   = priorBoxLayer->GetParamAsFloat("step", 0.0f);
1300
1301     float _step_w = 0.0f;
1302     float _step_h = 0.0f;
1303     if (HasParam(priorBoxLayer->params, "step_w") && step_w != 0.0f &&
1304         HasParam(priorBoxLayer->params, "step_h") && step_h != 0.0f) {
1305         _step_w = step_w;
1306         _step_h = step_h;
1307     } else if (HasParam(priorBoxLayer->params, "step") && step != 0.0f) {
1308         _step_w = step;
1309         _step_h = step;
1310     }
1311
1312     int img = priorBoxLayer->GetParamAsInt("img_size", 0);
1313     int img_w = priorBoxLayer->GetParamAsInt("img_w", 0);
1314     int img_h = priorBoxLayer->GetParamAsInt("img_h", 0);
1315     if ((img != 0) || (img_w != 0) || (img_h != 0)) {
1316         // unsupported mode
1317         THROW_CLDNN_EXCEPTION("Unsupported image sizes in prior box " + layer->name + " (use an image blob instead of dimensions)");
1318     }
1319
1320     IE_ASSERT(layer->insData[1].lock());
1321     auto img_dims = layer->insData[1].lock()->dims;
1322     cldnn::tensor img_size = cldnn::spatial(TensorValue(img_dims[0]), TensorValue(img_dims[1]));
1323     std::vector<cldnn::primitive_id> inputPrimitives = GetPrevLayersPrimitives(layer);
1324     // second input isn't used by value - only dimensions taken from the layer input
1325
1326     if (_step_w == 0.0f || _step_h == 0.0f) {
1327         _step_w = static_cast<float>(img_w) / static_cast<float>(img_dims[0]);
1328         _step_h = static_cast<float>(img_h) / static_cast<float>(img_dims[1]);
1329     }
1330
1331     auto priorBoxPrim = cldnn::prior_box(
1332         priorBoxLayer->name,
1333         inputPrimitives[0],
1334         img_size,
1335         min_size,
1336         max_size,
1337         aspect_ratio,
1338         flip,
1339         clip,
1340         variance,
1341         _step_w,
1342         _step_h,
1343         offset,
1344         scale_all_sizes);
1345
1346     m_env.primitiveIDs[priorBoxLayer->name] = priorBoxLayer->name;
1347     m_topology->add(priorBoxPrim);
1348     m_env.profilingIDs.insert(priorBoxLayer->name);
1349 }
1350
1351 void CLDNNGraph::CreateDeconvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1352     ValidateLayer(layer, 1);
1353     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1354     auto deconvLayer = dynamic_cast<InferenceEngine::DeconvolutionLayer *> (layer.get());
1355
1356     if (deconvLayer->_dilation_x != 1 || deconvLayer->_dilation_y != 1) {
1357         THROW_CLDNN_EXCEPTION("Unsupported dilation in deconvolution " << layer->name);
1358     }
1359
1360     std::vector<cldnn::primitive_id> weightPrimID;
1361     std::vector<cldnn::primitive_id> biasPrimID;
1362     CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID);
1363     cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
1364                                          cldnn::spatial(deconvLayer->_stride_x, deconvLayer->_stride_y));
1365     cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
1366                                          cldnn::spatial(-deconvLayer->_padding_x, -deconvLayer->_padding_y));
1367
1368     auto deconvPrim = cldnn::deconvolution(deconvLayer->name,
1369         inputPrimitives[0],
1370         weightPrimID,
1371         biasPrimID,
1372         stride,
1373         padding,
1374         false,
1375         0.0f,
1376         CldnnTensorFromIEDims(deconvLayer->outData[0]->dims));
1377     m_env.primitiveIDs[deconvLayer->name] = deconvLayer->name;
1378     m_topology->add(deconvPrim);
1379     m_env.profilingIDs.insert(deconvLayer->name);
1380 }
1381
1382 void CLDNNGraph::CreateCropPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1383     if (layer->insData.size() != 1 && layer->insData.size() != 2) {
1384         THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name);
1385     }
1386     if (layer->_fusedWith) {
1387         THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
1388     }
1389     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1390     auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*> (layer.get());
1391     IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size());
1392     IE_ASSERT(cropLayer->outData[0] && cropLayer->outData[0]->dims.size() == 4);
1393
1394     std::vector<cldnn::tensor::value_type> offset{ 0, 0, 0, 0 };
1395     for (size_t i = 0; i < cropLayer->axis.size(); i++) {
1396         if (cropLayer->axis[i] < 0 || cropLayer->axis[i] > 3) {
1397             THROW_CLDNN_EXCEPTION("Invalid crop axis: " + std::to_string(cropLayer->axis[i]) + " in layer " + cropLayer->name);
1398         }
1399         offset[cropLayer->axis[i]] = cropLayer->offset[i];
1400     }
1401     auto outputDims = cropLayer->outData[0]->dims;
1402     cldnn::tensor refSize(
1403         TensorValue(outputDims[3]),
1404         TensorValue(outputDims[2]),
1405         TensorValue(outputDims[0]),
1406         TensorValue(outputDims[1]));
1407
1408     auto cropPrim = cldnn::crop(
1409         cropLayer->name,
1410         inputPrimitives[0],
1411         refSize,
1412         cldnn::tensor(offset));
1413     m_env.primitiveIDs[cropLayer->name] = cropLayer->name;
1414     m_topology->add(cropPrim);
1415     m_env.profilingIDs.insert(cropLayer->name);
1416 }
1417
1418 void CLDNNGraph::CreateROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1419     ValidateLayer(layer, 2);
1420     auto roiPoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1421
1422     // params
1423     int pooled_width = roiPoolingLayer->GetParamAsInt("pooled_w", 0);
1424     int pooled_height = roiPoolingLayer->GetParamAsInt("pooled_h", 0);
1425     float spatial_scale = roiPoolingLayer->GetParamAsFloat("spatial_scale", 1.0f);
1426
1427     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1428
1429     auto roiPoolingPrim = cldnn::roi_pooling(
1430         roiPoolingLayer->name,
1431         inputPrimitives[0],  // input data
1432         inputPrimitives[1],  // input rois
1433         cldnn::pooling_mode::max,
1434         pooled_width,
1435         pooled_height,
1436         spatial_scale);
1437     m_env.primitiveIDs[roiPoolingLayer->name] = roiPoolingLayer->name;
1438     m_topology->add(roiPoolingPrim);
1439     m_env.profilingIDs.insert(roiPoolingLayer->name);
1440 }
1441
1442 void CLDNNGraph::CreatePSROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1443     ValidateLayer(layer, 2);
1444     auto psROIPoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1445
1446     // params
1447     int group_size = psROIPoolingLayer->GetParamAsInt("group_size");
1448     // todo: assert outputdim*group_size*group_size == input features
1449     float spatial_scale = psROIPoolingLayer->GetParamAsFloat("spatial_scale");
1450     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1451
1452     auto psROIPoolingPrim = cldnn::roi_pooling(
1453         psROIPoolingLayer->name,
1454         inputPrimitives[0],  // input data
1455         inputPrimitives[1],  // input rois
1456         cldnn::pooling_mode::average,
1457         group_size,
1458         group_size,
1459         spatial_scale,
1460         group_size);
1461     m_env.primitiveIDs[psROIPoolingLayer->name] = psROIPoolingLayer->name;
1462     m_topology->add(psROIPoolingPrim);
1463     m_env.profilingIDs.insert(psROIPoolingLayer->name);
1464 }
1465
1466 void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer, CLDNNCustomLayerPtr customLayer) {
1467     ValidateLayer(layer, 0);
1468     // todo: handling fusing
1469     auto genericLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1470     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1471
1472     // Handle defines
1473     std::string layerDefines;
1474     for (const auto& def : customLayer->Defines()) {
1475         std::string singleDefine("#define " + def.name + " " + def.prefix);
1476         if (genericLayer->params.find(def.param) != genericLayer->params.end()) {
1477             singleDefine += genericLayer->params.at(def.param);
1478         } else {
1479             singleDefine += def.default_value;
1480         }
1481         singleDefine += def.postfix + "\n";
1482         layerDefines.append(singleDefine);
1483     }
1484
1485     // reserve
1486     std::vector<cldnn::primitive_id> reorderedInputs;
1487     reorderedInputs.resize(inputPrimitives.size());
1488
1489     // Handle Blobs
1490     std::map<std::string, size_t> blobIndex;
1491     for (auto& blob : genericLayer->blobs) {
1492         // create primitive from blob (always 1d)
1493         cldnn::primitive_id blobId = genericLayer->name + "_" + blob.first;
1494         if (blob.second->dims().size() != 1) {
1495             THROW_CLDNN_EXCEPTION("Invalid dimensions for blob " << blob.first << " in layer " << genericLayer->name);
1496         }
1497         CreatePrimitiveFromBlob(blobId, blob.second, cldnn::layout(
1498             m_networkPrecision,
1499             m_defaultFormat,
1500             cldnn::tensor(1, 1, TensorValue(blob.second->dims()[0]), 1)));
1501         // save index in blobIndex
1502         blobIndex[blob.first] = reorderedInputs.size();
1503         // add to reorderedInputs
1504         reorderedInputs.push_back(blobId);
1505     }
1506
1507     // Handle kernel parameters
1508     std::vector<cldnn_arg> kernelParameters;
1509     cldnn::format outputFormat(cldnn::format::any);
1510     for (const auto& param : customLayer->KernelParams()) {
1511         switch (param.type) {
1512         case CLDNNCustomLayer::ParamType::Input: {
1513             kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1514             kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input;
1515             kernelParameters[param.paramIndex].index = static_cast<cldnn_arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
1516
1517             // Handle input reorder
1518             if (param.portIndex < inputPrimitives.size() && reorderedInputs[param.portIndex].empty()) {
1519                 // todo: add support for multiple reorders of the same input? (read as bfyx for one arg and yxfb for another)
1520                 if (param.format != cldnn::format::any) {
1521                     auto reorderPrimName = inputPrimitives[param.portIndex] + "_" + layer->name + m_preCustomLayerTag;
1522                     auto preprocessPrim = cldnn::reorder(
1523                         reorderPrimName,
1524                         inputPrimitives[param.portIndex],
1525                         param.format,
1526                         DataTypeFromPrecision(layer->precision));
1527                     m_topology->add(preprocessPrim);
1528                     m_env.profilingIDs.insert(reorderPrimName);
1529                     InitProfileInfo(reorderPrimName, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
1530                     reorderedInputs[param.portIndex] = (reorderPrimName);
1531                 } else {
1532                     reorderedInputs[param.portIndex] = inputPrimitives[param.portIndex];
1533                 }
1534             }
1535         }
1536             break;
1537         case CLDNNCustomLayer::ParamType::Output: {
1538             kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1539             kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_output;
1540             kernelParameters[param.paramIndex].index =
1541                 static_cast<cldnn_arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
1542             outputFormat = param.format;
1543         }
1544             break;
1545         case CLDNNCustomLayer::ParamType::Data: {
1546             kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
1547             kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input;
1548             kernelParameters[param.paramIndex].index =
1549                 static_cast<cldnn_arg_index>((blobIndex.find(param.blobName) == blobIndex.end()) ? -1 : blobIndex.at(param.blobName));
1550         }
1551             break;
1552         default:
1553             THROW_CLDNN_EXCEPTION("Invalid custom layer param type: " << param.type << " in layer: " << genericLayer->name);
1554         }
1555     }
1556     const std::string layerTitle("\n// Layer " + layer->name + " using Custom Layer " + customLayer->Name() + "\n");
1557     const std::string defineTitle("// Custom Layer User Defines\n");
1558
1559     auto dims = genericLayer->outData[0]->dims;
1560     std::reverse(dims.begin(), dims.end());
1561
1562     size_t N = (dims.size() > 0) ? dims[0] : 1;
1563     size_t C = (dims.size() > 1) ? dims[1] : 1;
1564     size_t H = (dims.size() > 2) ? dims[2] : 1;
1565     size_t W = (dims.size() > 3) ? dims[3] : 1;
1566     cldnn::tensor outputTensor = cldnn::tensor(cldnn::batch(N), cldnn::feature(C), cldnn::spatial(W, H));
1567
1568     cldnn::layout outputLayout = cldnn::layout(DataTypeFromPrecision(genericLayer->precision), outputFormat, outputTensor);
1569
1570     // evaluate work sizes rules
1571     std::vector<size_t> gws, lws;
1572
1573     // assume output tensor is dimension source by default
1574     int batchDim = outputTensor.batch[0];
1575     int featureDim = outputTensor.feature[0];
1576     int yDim = outputTensor.spatial[1];
1577     int xDim = outputTensor.spatial[0];
1578     int iidx = customLayer->InputDimSourceIndex();
1579
1580     // if input index is greater than -1, take dimension from input
1581     if (iidx >= 0) {
1582         if (iidx >= genericLayer->insData.size())
1583             THROW_CLDNN_EXCEPTION("Invalid input tensor for index: " << iidx);
1584         // get dimensions from one of the input tensors
1585         auto inDataPtr = genericLayer->insData[iidx].lock();
1586         if (!inDataPtr) {
1587             THROW_CLDNN_EXCEPTION("Data inserted into generic layer " << genericLayer->name << " is nullptr");
1588         }
1589         auto inputDims = inDataPtr->dims;
1590
1591         batchDim = featureDim = yDim = 0;
1592         xDim = inputDims[0];
1593
1594         if (dims.size() > 1)
1595             yDim = inputDims[1];
1596         if (dims.size() > 2)
1597             featureDim = inputDims[2];
1598         if (dims.size() > 3)
1599             batchDim = inputDims[3];
1600     }
1601     const std::map<char, int> vars = {
1602         { 'b', batchDim }  , { 'B', batchDim },
1603         { 'f', featureDim }, { 'F', featureDim },
1604         { 'y', yDim },       { 'Y', yDim },
1605         { 'x', xDim },       { 'X', xDim },
1606     };
1607     for (auto rule : customLayer->GlobalSizeRules()) {
1608         SimpleMathExpression expr;
1609         expr.SetVariables(vars);
1610         expr.SetExpression(rule);
1611         gws.push_back(expr.Evaluate());
1612     }
1613     for (auto rule : customLayer->LocalSizeRules()) {
1614         SimpleMathExpression expr;
1615         expr.SetVariables(vars);
1616         expr.SetExpression(rule);
1617         lws.push_back(expr.Evaluate());
1618     }
1619
1620     auto customPrim = cldnn::custom_gpu_primitive(
1621         genericLayer->name,
1622         reorderedInputs,
1623         { layerTitle, defineTitle, layerDefines, customLayer->KernelSource() },
1624         customLayer->KernelEntry(),
1625         kernelParameters,
1626         customLayer->CompilerOptions(),
1627         outputLayout,
1628         gws,
1629         lws);
1630
1631     if (outputLayout.format != cldnn::format::any &&
1632         p_currentOutputs->find(genericLayer->name) == p_currentOutputs->end()) {
1633         // Handle output reorder
1634         auto reorderPrimName = genericLayer->name + m_postCustomLayerTag;
1635         m_topology->add(
1636             cldnn::reorder(
1637                 reorderPrimName,
1638                 genericLayer->name,
1639                 m_defaultFormat,
1640                 m_networkPrecision));
1641         m_env.primitiveIDs[genericLayer->name] = reorderPrimName;
1642         m_env.profilingIDs.insert(reorderPrimName);
1643         InitProfileInfo(reorderPrimName, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
1644     } else {
1645         m_env.primitiveIDs[genericLayer->name] = genericLayer->name;
1646     }
1647     m_topology->add(customPrim);
1648     m_env.profilingIDs.insert(genericLayer->name);
1649 }
1650
1651 void CLDNNGraph::CreateSimplerNMSPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1652     ValidateLayer(layer, 3);
1653     IE_ASSERT(layer->insData[0].lock()->dims[3] == 1);  // only handling input batch size 1
1654     IE_ASSERT(layer->insData[1].lock()->dims[3] == 1);  // only handling input batch size 1
1655     auto simpleNMSLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
1656
1657     int max_num_proposals = simpleNMSLayer->GetParamAsInt("max_num_proposals");
1658     float iou_threshold = simpleNMSLayer->GetParamAsFloat("iou_threshold", 0.7f);
1659     int min_bbox_size = simpleNMSLayer->GetParamAsInt("min_bbox_size", 16);
1660     int feature_stride = simpleNMSLayer->GetParamAsInt("feat_stride", 16);
1661     int pre_nms_topn = simpleNMSLayer->GetParamAsInt("pre_nms_topn");
1662     int post_nms_topn = simpleNMSLayer->GetParamAsInt("post_nms_topn");
1663     std::vector<float> scale = simpleNMSLayer->GetParamAsFloats("scale");
1664     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1665
1666     auto simpleNMSPrim = cldnn::proposal(
1667         simpleNMSLayer->name,
1668         inputPrimitives[0],  // cls_score
1669         inputPrimitives[1],  // bbox_pred
1670         inputPrimitives[2],  // im_info
1671         max_num_proposals,
1672         iou_threshold,
1673         min_bbox_size,
1674         feature_stride,
1675         pre_nms_topn,
1676         post_nms_topn,
1677         { 0.5f, 1.0f, 2.0f },  // ratios for the SimplerNMS variant
1678         scale);
1679
1680     m_env.primitiveIDs[simpleNMSLayer->name] = simpleNMSLayer->name;
1681     m_topology->add(simpleNMSPrim);
1682     m_env.profilingIDs.insert(simpleNMSLayer->name);
1683 }
1684
1685 void CLDNNGraph::CreateEltwisePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1686     ValidateEltwiseLayer(layer);
1687
1688     auto eltwiseLayer = dynamic_cast<InferenceEngine::EltwiseLayer *> (layer.get());
1689     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1690
1691     std::vector<float> coefficients = eltwiseLayer->coeff;
1692     if (eltwiseLayer->_operation != InferenceEngine::EltwiseLayer::Sum && !coefficients.empty()) {
1693         THROW_IE_EXCEPTION << "Only sum operation supports operands coefficients";
1694     }
1695
1696     if (!coefficients.empty() && coefficients.size() != inputPrimitives.size()) {
1697         THROW_IE_EXCEPTION << "Number of provided coefficients is not equal to number of operands";
1698     }
1699
1700     auto eltwisePrim = cldnn::eltwise(
1701         eltwiseLayer->name,
1702         inputPrimitives,
1703         EltwiseModeFromIEEltwise(eltwiseLayer->_operation),
1704         coefficients);
1705     m_env.primitiveIDs[eltwiseLayer->name] = eltwiseLayer->name;
1706     m_topology->add(eltwisePrim);
1707     m_env.profilingIDs.insert(eltwiseLayer->name);
1708 }
1709
1710 void CLDNNGraph::CreateConcatenatePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1711     ValidateLayer(layer, 0);
1712     auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
1713     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1714     auto concatPrim = cldnn::concatenation(
1715         concatLayer->name,
1716         inputPrimitives,
1717         ConcatAxisFromIEAxis(concatLayer->_axis));
1718     m_env.primitiveIDs[concatLayer->name] = concatLayer->name;
1719     m_topology->add(concatPrim);
1720     m_env.profilingIDs.insert(concatLayer->name);
1721 }
1722
1723 void CLDNNGraph::CreateSplitPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1724     ValidateLayer(layer, 1);
1725     auto splitLayer = dynamic_cast<InferenceEngine::SplitLayer *> (layer.get());
1726     if (IsValidSplitConvMerge(splitLayer)) {
1727         // AlextNet style split->conv*2->merge
1728         CreateFusedSplitConvMergePrimitive(layer);
1729     } else {
1730 #ifdef _USE_SPLIT_PRIMITIVE
1731         auto inputPrimitives = GetPrevLayersPrimitives(layer);
1732         auto inputDims = splitLayer->insData[0].lock()->dims;
1733         InferenceEngine::SizeVector startOffset(inputDims.size());
1734         std::vector<std::pair<cldnn::primitive_id, cldnn::tensor>> outputOffsets;
1735 std::cout << "Splitting layer: " << layer->name << "\n\tSize:" << CldnnTensorFromIEDims(inputDims) << std::endl;
1736         for (auto& outLayer : splitLayer->outData) {
1737             if (outLayer->dims.size() != startOffset.size()) {
1738                 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1739             }
1740             for (size_t i = 0; i < inputDims.size(); i++) {
1741                 if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) {
1742                     THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1743                 }
1744             }
1745             auto outTensor = CldnnTensorFromIEDims(outLayer->dims);
1746             auto cropPrim = cldnn::crop(outLayer->name, inputPrimitives[0], outTensor, CldnnTensorFromIEDims(startOffset));
1747             m_topology->add(cropPrim);
1748             m_env.primitiveIDs[outLayer->name] = outLayer->name;
1749             m_env.profilingIDs.insert(outLayer->name);
1750             outputOffsets.push_back({ outLayer->name, CldnnTensorFromIEDims(startOffset) });
1751             for (size_t i = 0; i < inputDims.size(); i++) {
1752                 if (outLayer->dims[i] != inputDims[i]) {
1753                     startOffset[i] += outLayer->dims[i];
1754                 }
1755             }
1756         }
1757
1758         auto splitPrim = cldnn::split(
1759             splitLayer->name,
1760             inputPrimitives[0],
1761             outputOffsets);
1762         m_topology->add(splitPrim);
1763
1764
1765         // set split as not_run
1766         InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);  // Mark this layer as optimized out
1767
1768 #else  // _USE_SPLIT_PRIMITIVE
1769         // TODO: replace with clDNN split when it's implemented
1770         auto inputPrimitives = GetPrevLayersPrimitives(layer);
1771         auto inDataPtr = splitLayer->insData[0].lock();
1772         if (!inDataPtr) {
1773             THROW_CLDNN_EXCEPTION("Data inserts into split layer " << splitLayer->name << " is nullptr");
1774         }
1775         auto inputDims = inDataPtr->dims;
1776         InferenceEngine::SizeVector startOffset(inputDims.size());
1777
1778         auto TensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def) {
1779             switch (dims.size()) {
1780             case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def));
1781             case 2: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(dims[1], def));
1782             case 3: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[2], def));
1783             case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
1784             default: THROW_CLDNN_EXCEPTION("Invalid dimensions size(" << dims.size() << ") in split layer");
1785             }
1786         };
1787
1788         for (auto& outLayer : splitLayer->outData) {
1789             if (outLayer->dims.size() != startOffset.size()) {
1790                 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1791             }
1792             for (size_t i = 0; i < inputDims.size(); i++) {
1793                 if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) {
1794                     THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
1795                 }
1796             }
1797             SizeVector reverseDims = outLayer->dims;
1798             std::reverse(reverseDims.begin(), reverseDims.end());
1799             auto outTensor = TensorFromIEDims(reverseDims, 1);
1800
1801             SizeVector reverseOffset = startOffset;
1802             std::reverse(reverseOffset.begin(), reverseOffset.end());
1803             auto offsetTensor = TensorFromIEDims(reverseOffset, 0);
1804
1805             auto cropPrim = cldnn::crop(outLayer->name, inputPrimitives[0], outTensor, offsetTensor);
1806             m_env.primitiveIDs[outLayer->name] = outLayer->name;
1807             m_topology->add(cropPrim);
1808             m_env.profilingIDs.insert(outLayer->name);
1809             InitProfileInfo(outLayer->name, "Crop", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
1810
1811             for (size_t i = 0; i < inputDims.size(); i++) {
1812                 if (outLayer->dims[i] != inputDims[i]) {
1813                     startOffset[i] += outLayer->dims[i];
1814                 }
1815             }
1816         }
1817
1818         // set split as not_run
1819         InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);  // Mark this layer as optimized out
1820 #endif  // _USE_SPLIT_PRIMITIVE
1821     }
1822 }
1823
1824 void CLDNNGraph::CreateFusedSplitConvMergePrimitive(InferenceEngine::CNNLayerPtr &layer) {
1825     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1826     // only handle the split->conv->merge topology for now
1827     auto splitLayer = dynamic_cast<InferenceEngine::SplitLayer *> (layer.get());
1828     IE_ASSERT(IsValidSplitConvMerge(splitLayer));
1829
1830     auto convLayer1 =
1831         dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[0]).get());
1832     auto convLayer2 =
1833         dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[1]).get());
1834     auto concatLayer =
1835         dynamic_cast<InferenceEngine::ConcatLayer *> (GetNextSingleLayer(
1836             GetNextSingleLayer(splitLayer->outData[0])).get());
1837
1838     if (convLayer1 == nullptr ||
1839         convLayer2 == nullptr ||
1840         concatLayer == nullptr) {
1841         THROW_CLDNN_EXCEPTION("Expected single layer does not exist");
1842     }
1843     // Mark these layers as optimized out
1844     InitProfileInfo(convLayer1->name, convLayer1->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
1845     InitProfileInfo(convLayer2->name, convLayer2->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
1846     InitProfileInfo(concatLayer->name, concatLayer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
1847
1848     // build the split conv primitive
1849     std::vector<cldnn::primitive_id> weightPrimID;
1850     std::vector<cldnn::primitive_id> biasPrimID;
1851     CreateWeightAndBiasPrimitives(GetNextSingleLayer(splitLayer->outData[0]), weightPrimID, biasPrimID);
1852     CreateWeightAndBiasPrimitives(GetNextSingleLayer(splitLayer->outData[1]), weightPrimID, biasPrimID);
1853
1854     auto concatLayerPtr = std::make_shared<InferenceEngine::CNNLayer>(*concatLayer);
1855
1856     cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
1857                                          cldnn::spatial(convLayer1->_stride_x, convLayer1->_stride_y));
1858     cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
1859                                           cldnn::spatial(-convLayer1->_padding_x, -convLayer1->_padding_y));
1860     cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
1861                                            cldnn::spatial(convLayer1->_dilation_x, convLayer1->_dilation_y));
1862
1863     auto splitPrim = cldnn::convolution(splitLayer->name,
1864                                         inputPrimitives[0],
1865                                         weightPrimID,
1866                                         biasPrimID,
1867                                         stride,
1868                                         padding,
1869                                         dilation,
1870                                         false,
1871                                         0.0f,
1872                                         CldnnTensorFromIEDims(concatLayer->outData[0]->dims));
1873
1874     layer = concatLayerPtr;
1875
1876     m_env.primitiveIDs[splitLayer->name]  = splitLayer->name;
1877     m_env.primitiveIDs[convLayer1->name]  = splitLayer->name;
1878     m_env.primitiveIDs[convLayer2->name]  = splitLayer->name;
1879     m_env.primitiveIDs[concatLayer->name] = splitLayer->name;  // pair the last merged layer (concat or relu) with
1880                                                                // this primitive name to be used as
1881                                                               // input prim for subsequent layers
1882     m_topology->add(splitPrim);
1883     m_env.profilingIDs.insert(splitLayer->name);
1884 }
1885
1886 void CLDNNGraph::CreatePowerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1887     ValidateLayer(layer, 1);
1888     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1889     auto powerLayer = dynamic_cast<InferenceEngine::PowerLayer *> (layer.get());
1890     if (powerLayer->power != 1.0f && powerLayer->power != 0.5f) {
1891         THROW_CLDNN_EXCEPTION("Power Layer " << layer->name << "uses unsupported power value");
1892     }
1893
1894     if ((powerLayer->scale == 1.0f) && (powerLayer->offset == 0.0f)) {
1895         if (powerLayer->power == 0.5f) {
1896             auto activationPrim = cldnn::activation(powerLayer->name, inputPrimitives[0], activation_sqrt);
1897             m_topology->add(activationPrim);
1898             m_env.profilingIDs.insert(powerLayer->name);
1899             m_env.primitiveIDs[powerLayer->name] = powerLayer->name;
1900         } else {
1901             // skip this layer
1902             m_env.primitiveIDs[powerLayer->name] = inputPrimitives[0];  // register the previous primID for this layer too
1903             InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::NOT_RUN);  // Mark this layer as not run
1904         }
1905     } else {
1906         // create scale primitive
1907         auto scaleValuePrimName = powerLayer->name + m_scalesTag;
1908         AddSingleValuePrimitive(scaleValuePrimName,
1909             DataTypeFromPrecision(powerLayer->precision),
1910             powerLayer->scale);
1911
1912         cldnn::primitive_id biasValuePrimName = "";
1913         if (powerLayer->offset != 0.0f) {
1914             biasValuePrimName = powerLayer->name + m_biasesTag;
1915             AddSingleValuePrimitive(biasValuePrimName,
1916                 DataTypeFromPrecision(powerLayer->precision),
1917                 powerLayer->offset);
1918         }
1919         auto scalePrim = cldnn::scale(
1920             powerLayer->name,
1921             inputPrimitives[0],
1922             scaleValuePrimName,
1923             biasValuePrimName);
1924
1925         m_env.primitiveIDs[powerLayer->name] = powerLayer->name;
1926         m_topology->add(scalePrim);
1927         m_env.profilingIDs.insert(powerLayer->name);
1928
1929         if (powerLayer->power == 0.5f) {
1930             auto activationPrim = cldnn::activation(powerLayer->name+"_sqrt", powerLayer->name, activation_sqrt);
1931             m_topology->add(activationPrim);
1932             m_env.profilingIDs.insert(powerLayer->name+"_sqrt");
1933         }
1934     }
1935 }
1936
1937 void CLDNNGraph::CreateSoftMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1938     ValidateLayer(layer, 1);
1939     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1940     auto softmaxLayer = dynamic_cast<InferenceEngine::SoftMaxLayer *> (layer.get());
1941
1942     // additional WA for clDNN FullyConnected output in BX instead of BF
1943     int inputOrder = 0;
1944     auto prevData = layer->insData[0].lock();
1945
1946     if (prevData == nullptr) {
1947         THROW_CLDNN_EXCEPTION("SoftMax: nonexistent input for layer: " << layer->name);
1948     }
1949
1950     auto prevCreator = prevData->creatorLayer.lock();
1951     bool isPrevFC = false;
1952
1953     if (prevCreator && (LayerTypeFromStr(prevCreator->type) == FullyConnected))
1954         isPrevFC = true;
1955     // end of WA
1956
1957     auto softmaxPrim = cldnn::softmax(softmaxLayer->name, inputPrimitives[0], SoftmaxDimensionFromIEAxis(softmaxLayer, isPrevFC));
1958     m_env.primitiveIDs[softmaxLayer->name] = softmaxLayer->name;
1959     m_topology->add(softmaxPrim);
1960     m_env.profilingIDs.insert(softmaxLayer->name);
1961 }
1962
1963 void CLDNNGraph::CreateFullyConnectedPrimitive(InferenceEngine::CNNLayerPtr &layer) {
1964     ValidateLayer(layer, 1);
1965     auto inputPrimitives = GetPrevLayersPrimitives(layer);
1966     auto fcLayer = dynamic_cast<InferenceEngine::FullyConnectedLayer *> (layer.get());
1967
1968     // create bias primitive
1969     cldnn::primitive_id biasesPrimID = "";
1970     if (fcLayer->_biases != nullptr) {
1971         biasesPrimID = fcLayer->name + m_biasesTag;
1972         CreatePrimitiveFromBlob(biasesPrimID,
1973             fcLayer->_biases,
1974             cldnn::layout(m_networkPrecision, m_defaultFormat,
1975                 cldnn::spatial(TensorValue(fcLayer->_out_num))));
1976     }
1977
1978     // create weights primitive
1979     // gcc bug to resolve auto, at least for 5.4 version
1980     std::shared_ptr<Data> insData0 = fcLayer->insData[0].lock();
1981     IE_ASSERT(insData0 != nullptr);
1982     cldnn::primitive_id weightsPrimID = fcLayer->name + m_weightsTag;
1983     cldnn::tensor weightsDims;
1984     switch (insData0->dims.size()) {
1985     case 4:
1986         weightsDims = { TensorValue(fcLayer->outData[0]->dims[0]),
1987                         TensorValue(insData0->dims[2]),
1988                         TensorValue(insData0->dims[0]),
1989                         TensorValue(insData0->dims[1]) };
1990         break;
1991     case 2:
1992         weightsDims = { TensorValue(fcLayer->outData[0]->dims[0]), 1, TensorValue(insData0->dims[0]), 1 };
1993         break;
1994     default: THROW_CLDNN_EXCEPTION("Invalid data dimensions");
1995     }
1996     CreatePrimitiveFromBlob(weightsPrimID,
1997                             fcLayer->_weights,
1998                             cldnn::layout(m_networkPrecision, m_defaultFormat, weightsDims));
1999
2000     auto fcPrim = cldnn::fully_connected(fcLayer->name,
2001                                          inputPrimitives[0],
2002                                          weightsPrimID,
2003                                          biasesPrimID,
2004                                          false,
2005                                          0.0f);
2006
2007     m_env.primitiveIDs[fcLayer->name] = fcLayer->name;
2008     m_topology->add(fcPrim);
2009     m_env.profilingIDs.insert(fcLayer->name);
2010 }
2011
2012 void CLDNNGraph::CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2013     ValidateLayer(layer, 1);
2014     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2015     auto poolLayer = dynamic_cast<InferenceEngine::PoolingLayer *> (layer.get());
2016
2017     if (poolLayer->outData.size() > 1) {
2018         // max pooling with argmax
2019         SizeVector argmaxDims;
2020
2021         std::string realOutputID, argmaxOutputID;
2022         int outputOrder = 0;
2023
2024         for (auto out : poolLayer->outData) {
2025             auto layersMap = out->getInputTo();
2026
2027             for (auto item : layersMap) {
2028                 bool isUpooling = (LayerTypeFromStr(item.second->type) == Unpooling);
2029                 if (outputOrder == 1 && isUpooling) {
2030                     argmaxDims = out->dims;
2031                     argmaxOutputID = out->name;
2032                 } else {
2033                     realOutputID = out->name;
2034                 }
2035                 outputOrder++;
2036             }
2037         }
2038
2039         // create mutable_data primitive for storing argmax data
2040         cldnn::tensor mutableTensor;
2041         switch (argmaxDims.size()) {
2042         case 4: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[3]), TensorValue(argmaxDims[2]),
2043             TensorValue(argmaxDims[0]), TensorValue(argmaxDims[1]));
2044             break;
2045         case 3: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[2]), TensorValue(argmaxDims[1]),
2046             1, TensorValue(argmaxDims[0]));
2047             break;
2048         case 2: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[1]), 1, TensorValue(argmaxDims[0]), 1);
2049             break;
2050         case 1:  // not implemented yet.
2051         default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions");
2052         }
2053
2054         cldnn::layout mutableLayout = cldnn::layout(
2055             cldnn::data_types::f32,
2056             m_defaultFormat,
2057             mutableTensor);
2058
2059         cldnn::primitive_id argmaxPrimID = layer->name + "_argmax_mutable";
2060
2061         auto mem = cldnn::memory::allocate(*(m_env.engine), mutableLayout);
2062         auto argmax_mutable_prim = cldnn::mutable_data(argmaxPrimID, mem);
2063         m_topology->add(argmax_mutable_prim);
2064         m_env.primitiveIDs[argmaxPrimID] = argmaxPrimID;
2065         m_env.primitiveIDs[argmaxOutputID] = argmaxPrimID;
2066
2067         // create pooling primitive itself
2068         auto poolPrim = cldnn::pooling(poolLayer->name,
2069             inputPrimitives[0],
2070             argmaxPrimID,
2071             cldnn::pooling_mode::max_with_argmax,
2072             cldnn::spatial(TensorValue(poolLayer->_kernel_x), TensorValue(poolLayer->_kernel_y)),  // size
2073             cldnn::spatial(TensorValue(poolLayer->_stride_x), TensorValue(poolLayer->_stride_y)),  // stride
2074                                                                                                    // input offset (padding) - explicit tensor for 0 bf
2075             { 0, 0, -TensorValue(poolLayer->_padding_x), -TensorValue(poolLayer->_padding_y) },
2076             CldnnTensorFromIEDims(poolLayer->outData[0]->dims));
2077         m_topology->add(poolPrim);
2078         m_env.primitiveIDs[realOutputID] = poolLayer->name;
2079     } else {
2080         // regular pooling
2081         auto poolPrim = cldnn::pooling(poolLayer->name,
2082             inputPrimitives[0],
2083             PoolingModeFromIEPooling(poolLayer->_type, poolLayer->_exclude_pad),
2084             cldnn::spatial(TensorValue(poolLayer->_kernel_x), TensorValue(poolLayer->_kernel_y)),  // size
2085             cldnn::spatial(TensorValue(poolLayer->_stride_x), TensorValue(poolLayer->_stride_y)),  // stride
2086                                                                                                    // input offset (padding) - explicit tensor for 0 bf
2087             { 0, 0, -TensorValue(poolLayer->_padding_x), -TensorValue(poolLayer->_padding_y) },
2088             CldnnTensorFromIEDims(poolLayer->outData[0]->dims));
2089     m_topology->add(poolPrim);
2090         m_env.primitiveIDs[poolLayer->name] = poolLayer->name;
2091     }
2092
2093     m_env.profilingIDs.insert(poolLayer->name);
2094 }
2095
2096 void CLDNNGraph::CreateLRNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2097     ValidateLayer(layer, 1);
2098     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2099     auto lrnLayer = dynamic_cast<InferenceEngine::NormLayer *> (layer.get());
2100     auto lrnPrim = cldnn::lrn(
2101         lrnLayer->name,
2102         inputPrimitives[0],
2103         lrnLayer->_size,
2104         static_cast<float>(lrnLayer->_k),
2105         lrnLayer->_alpha,
2106         lrnLayer->_beta,
2107         lrnLayer->_isAcrossMaps ? cldnn_lrn_norm_region_across_channel : cldnn_lrn_norm_region_within_channel);
2108
2109     m_env.primitiveIDs[lrnLayer->name] = lrnLayer->name;
2110     m_topology->add(lrnPrim);
2111     m_env.profilingIDs.insert(lrnLayer->name);
2112 }
2113
2114 void CLDNNGraph::CreateActivationPrimitive(InferenceEngine::CNNLayerPtr &layer, const LayerType type) {
2115     ValidateLayer(layer, 1);
2116     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2117     cldnn_activation_additional_params params{ 0.0f, 0.0f };
2118     cldnn_activation_func func = cldnn_activation_func_t::activation_none;
2119
2120     LayerType activationType;
2121     if (type == Activation) {
2122         std::string activation_type = layer->GetParamAsString("type");
2123         if (activation_type == "tanh") {
2124             activationType = TanH;
2125         } else if (activation_type == "sigmoid" || activation_type == "logistic")  {
2126             activationType = Sigmoid;
2127         } else if (activation_type == "elu")  {
2128             activationType = ELU;
2129         } else if (activation_type == "relu")  {
2130             activationType = ReLU;
2131         } else if (activation_type == "relu6")  {
2132             activationType = ReLU6;
2133         } else if (activation_type == "clamp")  {
2134             activationType = Clamp;
2135         } else {
2136             THROW_CLDNN_EXCEPTION("Unsupported activation type (" + activation_type +
2137                                   ") in layer " + layer->name);
2138         }
2139     } else {
2140         activationType = type;
2141     }
2142
2143     switch (activationType) {
2144     case TanH:
2145     {
2146         func = cldnn_activation_func_t::activation_hyperbolic_tan;
2147         break;
2148     }
2149     case ELU:
2150     {
2151         func = cldnn_activation_func_t::activation_elu;
2152         params.a = layer->GetParamAsFloat("alpha", 1.0f);
2153         break;
2154     }
2155     case Sigmoid:
2156     {
2157         func = cldnn_activation_func_t::activation_logistic;
2158         break;
2159     }
2160     case ReLU:
2161     {
2162         func = cldnn_activation_func_t::activation_relu_negative_slope;
2163         params.a = layer->GetParamAsFloat("negative_slope", 0.0f);
2164         break;
2165     }
2166     case ReLU6:
2167     {
2168         func = cldnn_activation_func_t::activation_clamp;
2169         params.b = layer->GetParamAsFloat("n", 6.0f);
2170         break;
2171     }
2172     case Clamp:
2173     {
2174         func = cldnn_activation_func_t::activation_clamp;
2175         params.a = layer->GetParamAsFloat("min");
2176         params.b = layer->GetParamAsFloat("max");
2177         break;
2178     }
2179     default:
2180         THROW_CLDNN_EXCEPTION("Unsupported activation type (" + layer->type +
2181                               ") in layer " + layer->name);
2182     }
2183
2184     auto activationPrimitive = cldnn::activation(layer->name, inputPrimitives[0], func, params);
2185     m_env.primitiveIDs[layer->name] = layer->name;
2186     m_topology->add(activationPrimitive);
2187     m_env.profilingIDs.insert(layer->name);
2188 }
2189
2190 void CLDNNGraph::CreateCopyPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2191     ValidateLayer(layer, 1);
2192     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2193     auto copyLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2194
2195     // Optimize out and just update references
2196     m_env.primitiveIDs[copyLayer->name] = inputPrimitives[0];
2197     InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);  // Mark this layer as optimized out
2198 }
2199
2200 void CLDNNGraph::CreateUpsamplingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2201     // Assuming multi-input will be handled by prev concat/eltwise layers
2202     ValidateLayer(layer, 1);
2203     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2204     auto upsamplingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2205     uint32_t scale = upsamplingLayer->GetParamAsUInt("scale");
2206     uint32_t numFilter = upsamplingLayer->GetParamAsUInt("num_filter");
2207     std::string sampleType = upsamplingLayer->GetParamAsString("sample_type");
2208
2209     auto upsamplingPrim = cldnn::upsampling(
2210         upsamplingLayer->name,
2211         inputPrimitives[0],
2212         scale,
2213         numFilter,
2214         UpsamplingTypeFromString(sampleType));
2215
2216     m_env.primitiveIDs[upsamplingLayer->name] = upsamplingLayer->name;
2217     m_topology->add(upsamplingPrim);
2218     m_env.profilingIDs.insert(upsamplingLayer->name);
2219 }
2220
2221 void CLDNNGraph::CreateResamplePrimitive(InferenceEngine::CNNLayerPtr &layer) {
2222     ValidateLayer(layer, 1);
2223     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2224     auto resampleLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2225
2226     auto outDims = layer->outData[0]->dims;
2227     size_t inFeatures = 1;
2228     unsigned int scale = 1;
2229     std::shared_ptr<Data> insData0 = layer->insData[0].lock();
2230     IE_ASSERT(insData0 != nullptr);
2231     if (insData0->dims.size() > 2) {
2232         inFeatures = insData0->dims[2];
2233         scale = outDims[0]/insData0->dims[0];
2234         if (scale < 1) {
2235             THROW_CLDNN_EXCEPTION("Unsupported scale in layer " + layer->name);
2236         }
2237     }
2238     std::string sampleType = resampleLayer->GetParamAsString("type");
2239
2240     if (sampleType != "caffe.ResampleParameter.NEAREST") {
2241         THROW_CLDNN_EXCEPTION("Unsupported resampling type (" + sampleType + ") in layer " + layer->name);
2242     }
2243
2244     auto upsamplingPrim = cldnn::upsampling(
2245         resampleLayer->name,
2246         inputPrimitives[0],
2247         scale,
2248         inFeatures,
2249         cldnn::upsampling_sample_type::nearest);
2250
2251     m_env.primitiveIDs[resampleLayer->name] = resampleLayer->name;
2252     m_topology->add(upsamplingPrim);
2253     m_env.profilingIDs.insert(resampleLayer->name);
2254 }
2255
2256 void CLDNNGraph::CreateYOLO2RegionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2257     ValidateLayer(layer, 1);
2258     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2259     auto YOLOregionLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2260
2261     uint32_t coords = YOLOregionLayer->GetParamAsUInt("coords", 4);
2262     uint32_t classes = YOLOregionLayer->GetParamAsUInt("classes", 20);
2263     uint32_t num = YOLOregionLayer->GetParamAsUInt("num", 1);
2264     bool do_softmax = YOLOregionLayer->GetParamsAsBool("do_softmax", true);
2265
2266     uint32_t mask_size = 0;
2267     if (HasParam(YOLOregionLayer->params, "mask")) {
2268         const auto mask = YOLOregionLayer->GetParamAsInts("mask");
2269         mask_size = static_cast<uint32_t>(mask.size());
2270     }
2271
2272     auto regionPrim = cldnn::region_yolo(
2273         YOLOregionLayer->name,
2274         inputPrimitives[0],
2275         coords,
2276         classes,
2277         num,
2278         mask_size,
2279         do_softmax);
2280
2281     m_env.primitiveIDs[YOLOregionLayer->name] = YOLOregionLayer->name;
2282     m_topology->add(regionPrim);
2283     m_env.profilingIDs.insert(YOLOregionLayer->name);
2284 }
2285
2286 void CLDNNGraph::CreateYOLO2ReorgPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2287     ValidateLayer(layer, 1);
2288     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2289     auto YOLOreorgLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2290     uint32_t stride = YOLOreorgLayer->GetParamAsUInt("stride");
2291
2292     auto reorgPrim = cldnn::reorg_yolo(
2293         YOLOreorgLayer->name,
2294         inputPrimitives[0],
2295         stride);
2296
2297     m_env.primitiveIDs[YOLOreorgLayer->name] = YOLOreorgLayer->name;
2298     m_topology->add(reorgPrim);
2299     m_env.profilingIDs.insert(YOLOreorgLayer->name);
2300 }
2301
2302 void CLDNNGraph::CreateArgMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2303     ValidateLayer(layer, 1);
2304     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2305     auto ArgMaxLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2306     const cldnn::arg_max_min::out_type otype = cldnn::arg_max_min::out_type::max;
2307
2308     if (HasParam(ArgMaxLayer->params, "out_max_val")) {
2309         int32_t out_max_val_flag = ArgMaxLayer->GetParamAsInt("out_max_val");
2310         if (out_max_val_flag != 0) {
2311             THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "ArgMax: out_max_val param is not supported for layer: " << layer->name;
2312         }
2313     }
2314
2315     uint32_t top_k = ArgMaxLayer->GetParamAsUInt("top_k", 1);
2316
2317     cldnn::arg_max_min::axis_name chosen_axis = cldnn::arg_max_min::axis_name::xyf;
2318
2319     if (HasParam(ArgMaxLayer->params, "axis")) {
2320         int32_t axis_param = ArgMaxLayer->GetParamAsInt("axis", 1);
2321
2322         int32_t axis = axis_param;
2323         if (-4 <= axis && axis <= -1)
2324             axis += 4;
2325
2326         switch (axis) {
2327         case 0: chosen_axis = cldnn::arg_max_min::axis_name::batch; break;
2328         case 1: chosen_axis = cldnn::arg_max_min::axis_name::feature; break;
2329         case 2: chosen_axis = cldnn::arg_max_min::axis_name::y; break;
2330         case 3: chosen_axis = cldnn::arg_max_min::axis_name::x; break;
2331         }
2332     }
2333
2334     auto argmaxPrim = cldnn::arg_max_min(
2335         ArgMaxLayer->name,
2336         inputPrimitives[0],
2337         otype,
2338         top_k,
2339         chosen_axis);
2340
2341     m_env.primitiveIDs[ArgMaxLayer->name] = ArgMaxLayer->name;
2342     m_topology->add(argmaxPrim);
2343     m_env.profilingIDs.insert(ArgMaxLayer->name);
2344 }
2345
2346 void CLDNNGraph::CreateMaxUnpoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2347     ValidateLayer(layer, 2);
2348
2349     auto UnpoolingLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2350
2351     cldnn::primitive_id real_input, argmax_mutable;
2352
2353     // locate ArgMax primitive
2354     int inputOrder = 0;
2355     for (auto inputData : layer->insData) {
2356         auto prevData = inputData.lock();
2357
2358         if (prevData == nullptr) {
2359             THROW_CLDNN_EXCEPTION("MaxUnpooling: nonexistent input for layer: " << layer->name);
2360         }
2361
2362         auto prevCreator = prevData->creatorLayer.lock();
2363
2364         if (prevCreator &&
2365             (LayerTypeFromStr(prevCreator->type) == Pooling) &&
2366             prevCreator->outData.size() > 1 &&
2367             inputOrder == 1) {
2368             argmax_mutable = m_env.primitiveIDs.at(prevCreator->name + "_argmax_mutable");
2369         } else {
2370             real_input = m_env.primitiveIDs.at(prevData->name);
2371         }
2372         inputOrder++;
2373     }
2374
2375     uint32_t stride = UnpoolingLayer->GetParamAsUInt("stride");
2376     uint32_t kernel_size = UnpoolingLayer->GetParamAsUInt("kernel_size");
2377
2378     auto unpoolingPrim = cldnn::max_unpooling(
2379         UnpoolingLayer->name,
2380         real_input,
2381         argmax_mutable,
2382         cldnn::spatial(kernel_size, kernel_size),  // size
2383         cldnn::spatial(stride, stride) );          // stride
2384
2385     m_env.primitiveIDs[UnpoolingLayer->name] = UnpoolingLayer->name;
2386     m_topology->add(unpoolingPrim);
2387     m_env.profilingIDs.insert(UnpoolingLayer->name);
2388 }
2389
2390 void CLDNNGraph::CreateMVNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2391     ValidateLayer(layer, 1);
2392     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2393     auto MvnLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
2394
2395     bool across_channels = MvnLayer->GetParamsAsBool("across_channels", false);
2396     bool normalize_variance = MvnLayer->GetParamsAsBool("normalize_variance", true);
2397     float eps = MvnLayer->GetParamAsFloat("eps", 1e-10f);
2398
2399     auto mvnPrim = cldnn::mvn(
2400         MvnLayer->name,
2401         inputPrimitives[0],
2402         across_channels,
2403         normalize_variance,
2404         eps);
2405
2406     m_env.primitiveIDs[MvnLayer->name] = MvnLayer->name;
2407     m_topology->add(mvnPrim);
2408     m_env.profilingIDs.insert(MvnLayer->name);
2409 }
2410
2411
2412 void CLDNNGraph::AddConstantBlobInput(InferenceEngine::CNNLayerPtr &layer) {
2413     auto constBlob = layer->blobs.begin()->second;
2414     auto constDims = layer->outData[0]->dims;
2415
2416     cldnn::tensor constTensor;
2417     switch (constDims.size()) {
2418     case 4: constTensor = cldnn::tensor(TensorValue(constDims[3]), TensorValue(constDims[2]),
2419             TensorValue(constDims[0]), TensorValue(constDims[1]));
2420             break;
2421     case 3: constTensor = cldnn::tensor(TensorValue(constDims[2]), TensorValue(constDims[1]),
2422             1, TensorValue(constDims[0]));
2423             break;
2424     case 2: constTensor = cldnn::tensor(TensorValue(constDims[1]), 1, TensorValue(constDims[0]), 1);
2425             break;
2426         case 1:  // not implemented yet.
2427         default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions");
2428     }
2429
2430     cldnn::layout constLayout = cldnn::layout(
2431         DataTypeFromPrecision(layer->blobs.begin()->second->precision()),
2432         m_defaultFormat,
2433         constTensor);
2434
2435     size_t bytes = constLayout.bytes_count();
2436     cldnn::primitive_id constPrimID = layer->name;
2437
2438     auto mem = cldnn::memory::allocate(*(m_env.engine), constLayout);
2439     auto tmpPointer = mem.pointer<char>();  // implicitly maps buffer - unmap in destructor
2440     auto buf = tmpPointer.data();
2441
2442     // fill cldnn::memory from blob
2443     auto bufSize = constLayout.bytes_count();
2444     auto data = static_cast<const char *>(constBlob->buffer());
2445     for (size_t i = 0; i < bufSize; i++) {
2446         buf[i] = data[i];
2447     }
2448
2449     // add new input to topology
2450     // and put it in const blob map
2451     // (to set input memory after network compilation)
2452     m_topology->add(cldnn::input_layout(constPrimID, constLayout));
2453     m_env.primitiveIDs[layer->name] = constPrimID;
2454     m_env.constBlobs.insert({ layer->name, mem });
2455 }
2456
2457 void CLDNNGraph::CreateConvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
2458     ValidateLayer(layer, 1);
2459     auto inputPrimitives = GetPrevLayersPrimitives(layer);
2460     auto convLayer = dynamic_cast<InferenceEngine::ConvolutionLayer *> (layer.get());
2461
2462     std::vector<cldnn::primitive_id> weightPrimID;
2463     std::vector<cldnn::primitive_id> biasPrimID;
2464     CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID);
2465
2466     cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
2467                                          cldnn::spatial(convLayer->_stride_x, convLayer->_stride_y));
2468     cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
2469                                           cldnn::spatial(-convLayer->_padding_x, -convLayer->_padding_y));
2470     cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
2471                                            cldnn::spatial(convLayer->_dilation_x, convLayer->_dilation_y));
2472
2473     auto convPrim = cldnn::convolution(convLayer->name,
2474                                        inputPrimitives[0],
2475                                        weightPrimID,
2476                                        biasPrimID,
2477                                        stride,
2478                                        padding,
2479                                        dilation,
2480                                        false,
2481                                        0.0f,
2482                                        CldnnTensorFromIEDims(convLayer->outData[0]->dims));
2483
2484     m_env.primitiveIDs[convLayer->name] = convLayer->name;
2485     m_topology->add(convPrim);
2486     m_env.profilingIDs.insert(convLayer->name);
2487 }
2488
2489 bool CLDNNGraph::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const {
2490     if (splitLayer->outData.size() != 2) return false;  // split into 2
2491     auto convLayer1 =
2492         dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[0]).get());
2493     auto convLayer2 =
2494         dynamic_cast<InferenceEngine::ConvolutionLayer *> (GetNextSingleLayer(splitLayer->outData[1]).get());
2495     if (!convLayer1 || !convLayer2  // outputs aren't convolutions
2496         || convLayer1->precision != convLayer2->precision                       // wrong precision
2497         || convLayer1->_fusedWith || convLayer2->_fusedWith                     // convolutions are fused
2498         || convLayer1->outData.size() != 1 || convLayer2->outData.size() != 1   // more than 1 output for convolutions
2499         || convLayer1->_padding_x != convLayer2->_padding_x                     // different padding
2500         || convLayer1->_padding_y != convLayer2->_padding_y                     // different padding
2501         || convLayer1->_stride_x != convLayer2->_stride_x                       // different strides
2502         || convLayer1->_stride_y != convLayer2->_stride_y                       // different strides
2503         || convLayer1->_dilation_x != convLayer2->_dilation_x                   // different dilation
2504         || convLayer1->_dilation_y != convLayer2->_dilation_y                   // different dilation
2505         || (GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0]))      // no merge after convolutions
2506             != GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[1])))
2507         || (p_currentOutputs->find(convLayer1->name) != p_currentOutputs->end())
2508         || (p_currentOutputs->find(convLayer2->name) != p_currentOutputs->end())) {
2509         return false;
2510     }
2511     auto concatLayer =
2512         dynamic_cast<InferenceEngine::ConcatLayer *> (
2513                 GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0])).get());
2514     if (!concatLayer ||                         // not a merge layer
2515         concatLayer->_axis != 1 ||              // merge on unsupported axis
2516         concatLayer->outData.size() != 1) {     // too many outputs
2517         return false;
2518     }
2519     if (m_config.customLayers.find(convLayer1->type) != m_config.customLayers.end() ||
2520         m_config.customLayers.find(concatLayer->type) != m_config.customLayers.end()) {
2521         return false;  // convolution or concat were overwritten by a custom layer
2522     }
2523
2524     return true;
2525 }
2526
2527 void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo) {
2528     // first create and add the input layout
2529     auto inputDims = inputInfo->getDims();
2530     InferenceEngine::Layout l = inputInfo->getTensorDesc().getLayout();
2531
2532     cldnn::tensor dataTensor;
2533     switch (inputDims.size()) {
2534         case 4:
2535         {
2536             cldnn::tensor::value_type batch = (m_env.m_max_batch <= 1) ? TensorValue(inputDims[3]) : TensorValue(m_curBatch);
2537
2538             if (InferenceEngine::Layout::NCHW == l) {
2539                 dataTensor = cldnn::tensor(batch, TensorValue(inputDims[2]),
2540                     TensorValue(inputDims[0]), TensorValue(inputDims[1]));
2541             } else if (InferenceEngine::Layout::NHWC == l) {
2542                 dataTensor = cldnn::tensor(batch,
2543                     TensorValue(inputDims[2]), TensorValue(inputDims[0]),
2544                     TensorValue(inputDims[1]));
2545             } else {
2546                 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in input " + inputInfo->name());
2547             }
2548             break;
2549         }
2550         case 2:
2551             if (InferenceEngine::NC == l)
2552                 dataTensor = cldnn::tensor(TensorValue(inputDims[1]), 1, TensorValue(inputDims[0]), 1);
2553             else
2554                 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in input " + inputInfo->name());
2555             break;
2556         case 3:  // not implemented yet.
2557         case 1:  // not implemented yet.
2558         default: THROW_CLDNN_EXCEPTION("Invalid data dimensions");
2559     }
2560
2561     cldnn::layout inputLayout(DataTypeFromPrecision(inputInfo->getInputPrecision()),
2562         FormatFromLayout(l),
2563         dataTensor);
2564     auto inputName = inputInfo->name();
2565     m_topology->add(cldnn::input_layout(inputName, inputLayout));
2566
2567     // save the input dims
2568     m_env.inputLayouts.insert({ inputName, inputLayout });
2569
2570     // create preprocess primitive for this input
2571     auto preProcess = inputInfo->getPreProcess();
2572
2573     size_t meanChannels = preProcess.getNumberOfChannels();
2574     auto internalInputLayout = m_env.inputLayouts.at(inputName);
2575     internalInputLayout.format = m_defaultFormat;
2576     internalInputLayout.size = internalInputLayout.size.transform(m_defaultFormat, 1);
2577     internalInputLayout.data_type = m_networkPrecision;
2578     auto preprocessPrimID = inputName + m_preProcessTag;
2579
2580     if ((meanChannels > 0) &&
2581         (meanChannels != internalInputLayout.size.feature[0])) {
2582         THROW_CLDNN_EXCEPTION("Mismatched mean values channels in input " + inputName);
2583     }
2584
2585     switch (preProcess.getMeanVariant()) {
2586     case NONE:
2587     case MEAN_VALUE: {
2588         std::vector<float> meanValues;
2589         if (meanChannels > 0) {
2590             for (size_t c = 0; c < meanChannels; c++) {
2591                 if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
2592                     THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName);
2593                 meanValues.push_back(preProcess[c]->meanValue);
2594             }
2595         }
2596         m_topology->add(cldnn::reorder(preprocessPrimID, inputName, internalInputLayout, meanValues));
2597         m_env.profilingIDs.insert(preprocessPrimID);
2598         InitProfileInfo(preprocessPrimID, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
2599     }
2600     break;
2601
2602     case MEAN_IMAGE: {
2603         IE_ASSERT(meanChannels);
2604         // first merge all mean values to a single blob
2605         // todo make sure mean blob precision is the same as the input precision
2606         auto meanDims = inputInfo->getDims();
2607         // overwrite batches with 1
2608         switch (meanDims.size()) {
2609         case 4: meanDims[3] = 1;
2610             break;
2611         default:
2612             THROW_CLDNN_EXCEPTION("Missing batch dimensions in input image");
2613         }
2614         InferenceEngine::TBlob<float> meanBlob(Precision(Precision::FP32), TensorDesc::getLayoutByDims(meanDims), meanDims);
2615         meanBlob.allocate();
2616         auto meanBlobData = meanBlob.data();
2617         for (size_t c = 0; c < meanChannels; c++) {
2618             if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
2619                 THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName);
2620             auto channelMeanBlob = std::dynamic_pointer_cast<TBlob<float>>(preProcess[c]->meanData);
2621             auto channelSize = channelMeanBlob->size();
2622             auto channelBlobData = channelMeanBlob->data();
2623             for (size_t i = 0; i < channelSize; i++) {
2624                 meanBlobData[(c * channelSize) + i] = channelBlobData[i];
2625             }
2626         }
2627         // then create a data primitive for the mean values
2628         auto meanBlobPtr = std::make_shared<InferenceEngine::TBlob<float>>(meanBlob);
2629
2630         // mean values will use external format (sub in the input format before convert to new format)
2631         cldnn::tensor meanBlobTensor(internalInputLayout.size);
2632         meanBlobTensor.batch[0] = 1;  // mean values have no batches
2633         cldnn::layout meanBlobLayout(cldnn::data_types::f32, m_defaultFormat, meanBlobTensor);
2634         CreatePrimitiveFromBlob(
2635             inputName + m_meanValuesTag,
2636             meanBlobPtr,
2637             meanBlobLayout);
2638         m_topology->add(cldnn::reorder(preprocessPrimID,
2639             inputName,
2640             internalInputLayout,
2641             inputName + m_meanValuesTag));
2642         m_env.profilingIDs.insert(preprocessPrimID);
2643         InitProfileInfo(preprocessPrimID, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
2644     }
2645     break;
2646
2647     default: THROW_CLDNN_EXCEPTION("Invalid mean variant in input " + inputName);
2648         break;
2649     }
2650     m_env.primitiveIDs[inputName] = preprocessPrimID;
2651     m_env.primitiveIDs[preprocessPrimID] = preprocessPrimID;
2652 }
2653
2654 std::vector<cldnn::primitive_id> CLDNNGraph::GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const {
2655     if (layer == nullptr) {
2656         return {};
2657     }
2658     std::vector<cldnn::primitive_id> inputPrimitives;
2659     for (auto inputData : layer->insData) {
2660         auto prevData = inputData.lock();
2661         if (prevData == nullptr) {
2662             THROW_CLDNN_EXCEPTION("Nonexistent input for layer: " << layer->name);
2663         }
2664         auto prevCreator = prevData->creatorLayer.lock();
2665         auto prevName = prevCreator ? prevCreator->name : prevData->name;
2666         if (prevCreator && prevCreator->outData.size() > 1) {
2667             inputPrimitives.push_back(m_env.primitiveIDs.at(prevData->name));
2668         } else {
2669             inputPrimitives.push_back(m_env.primitiveIDs.at(prevName));
2670         }
2671     }
2672     return inputPrimitives;
2673 }
2674
2675 void CLDNNGraph::AddOutputPrimitive(std::string outputName, const InferenceEngine::DataPtr outputData, Precision outputPrecision) {
2676     // TODO: add precision check once there's an outputInfo object
2677     if (outputData->layout != InferenceEngine::NCHW &&
2678         outputData->layout != InferenceEngine::NHWC &&
2679         outputData->layout != InferenceEngine::CHW &&
2680         outputData->layout != InferenceEngine::NC) {
2681         THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(outputData->layout) << ") in output: " << outputName);
2682     }
2683     auto outputReorderID = outputName + m_postProcessTag;
2684     Precision precision = outputPrecision == Precision::UNSPECIFIED ? outputData->getPrecision() : outputPrecision;
2685
2686     // Find correct output ID. Start with name stored in IR.
2687     std::string outputID = outputName;
2688     std::string finalID = m_env.primitiveIDs.at(outputName);
2689
2690     while (outputID != finalID) {
2691         auto prim = m_env.primitiveIDs.find(finalID);
2692
2693         if (prim == m_env.primitiveIDs.end()) {
2694             THROW_IE_EXCEPTION << "Unknown output primitive id " << outputID;
2695         }
2696         outputID = finalID;
2697         finalID = prim->second;
2698     }
2699
2700     m_topology->add(cldnn::reorder(outputReorderID, outputID,
2701         FormatFromLayout(outputData->getLayout()),
2702         DataTypeFromPrecision(precision)));
2703     m_env.primitiveIDs[outputName] = outputReorderID;
2704     m_env.profilingIDs.insert(outputReorderID);
2705     InitProfileInfo(outputReorderID, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
2706     m_env.outputDims[outputName] = outputData->dims;
2707     m_env.prevPrimitiveIDs[outputReorderID] = {outputName};
2708 }
2709
2710 void CLDNNGraph::AddSingleValuePrimitive(cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value) {
2711     cldnn::layout primLayout(dataType, m_defaultFormat, { 1, 1, 1, 1 });
2712     auto primMem = cldnn::memory::allocate(*(m_env.engine), primLayout);
2713     switch (dataType) {
2714     case cldnn::data_types::f32:
2715     {
2716         auto tmpPointer = primMem.pointer<float>();  // implicitly maps buffer - unmap in destructor
2717         tmpPointer[0] = value;
2718     }
2719         break;
2720     case cldnn::data_types::f16:
2721     {
2722         auto tmpPointer = primMem.pointer<uint16_t>();  // implicitly maps buffer - unmap in destructor
2723         cldnn_status status = CLDNN_SUCCESS;
2724         tmpPointer[0] = cldnn_float_to_half(value, &status);
2725         if (status != CLDNN_SUCCESS) {
2726             THROW_CLDNN_EXCEPTION("Error converting value to fp16.");
2727         }
2728     }
2729         break;
2730     default:
2731         THROW_CLDNN_EXCEPTION("Unhandled data type (precision)");
2732     }
2733
2734     m_topology->add(cldnn::data(valPrimID, primMem));
2735 }
2736
2737 cldnn::data_types CLDNNGraph::DataTypeFromPrecision(InferenceEngine::Precision p) {
2738     switch (p) {
2739     case Precision::I16:
2740     case Precision::FP32:
2741         return cldnn::data_types::f32;
2742     case Precision::FP16:
2743         return cldnn::data_types::f16;
2744     case Precision::U8:
2745         return cldnn::data_types::u8;
2746     default:
2747         THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << p.name() << " precision";
2748         break;
2749     }
2750 }
2751
2752 cldnn::format CLDNNGraph::FormatFromLayout(InferenceEngine::Layout l) {
2753     switch (l) {
2754     case InferenceEngine::Layout::NCHW:
2755     case InferenceEngine::Layout::NC:
2756     case InferenceEngine::Layout::CHW:
2757         return cldnn::format::bfyx;
2758     case InferenceEngine::Layout::NHWC:
2759         return cldnn::format::byxf;
2760     default:
2761         THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << l << " layout";
2762         break;
2763     }
2764 }
2765
2766 cldnn::upsampling_sample_type CLDNNGraph::UpsamplingTypeFromString(const std::string& str) {
2767     static const caseless_map<std::string, cldnn::upsampling_sample_type> UpsamplingTypeNameToType = {
2768         { "Bilinear" , cldnn::upsampling_sample_type::bilinear },
2769         { "Nearest" , cldnn::upsampling_sample_type::nearest },
2770     };
2771     auto it = UpsamplingTypeNameToType.find(str);
2772     if (it != UpsamplingTypeNameToType.end())
2773         return it->second;
2774     else
2775         THROW_CLDNN_EXCEPTION("Unknown Upsampling type: " << str);
2776 }
2777
2778 cldnn::softmax::dimension_t CLDNNGraph::SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer, bool isPrevFC) {
2779     // WA for default softmax dimension in cldnn for fyx
2780     // todo: remove this once clDNN changes FC output to BF instead of BX
2781     auto dims = softmaxLayer->outData[0]->dims;
2782     unsigned non1Dims = 0;
2783     for (size_t i = 0; i < dims.size(); i++) {
2784         if (dims[i] > 1) {
2785             non1Dims++;
2786         }
2787     }
2788     if (non1Dims == 1 || isPrevFC) {
2789         return cldnn::softmax::normalize_fyx;
2790     }
2791     // end of WA
2792
2793     switch (softmaxLayer->axis) {
2794     case 1: return cldnn::softmax::normalize_f;
2795     case 2: return cldnn::softmax::normalize_y;
2796     case 3: return cldnn::softmax::normalize_x;
2797     default: THROW_CLDNN_EXCEPTION("Invalid softmax axis " << softmaxLayer->axis);
2798     }
2799     return cldnn::softmax::normalize_fyx;
2800 }
2801
2802 cldnn::prior_box_code_type CLDNNGraph::PriorBoxCodeFromString(const std::string& str) {
2803     static const std::map<std::string, cldnn::prior_box_code_type> CodeNameToType = {
2804         { "caffe.PriorBoxParameter.CORNER" , cldnn::prior_box_code_type::corner },
2805         { "caffe.PriorBoxParameter.CENTER_SIZE" , cldnn::prior_box_code_type::center_size },
2806         { "caffe.PriorBoxParameter.CORNER_SIZE" , cldnn::prior_box_code_type::corner_size },
2807     };
2808     auto it = CodeNameToType.find(str);
2809     if (it != CodeNameToType.end()) {
2810         return it->second;
2811     } else {
2812         THROW_CLDNN_EXCEPTION("Unknown Prior-Box code type: " + str);
2813         return cldnn::prior_box_code_type::corner;
2814     }
2815 }
2816
2817 void CLDNNGraph::CreateGenericLayerBlobPrimitives(const InferenceEngine::GenericLayer* layer) {
2818     IE_ASSERT(layer);
2819     for (auto& blob : layer->blobs) {
2820         if (blob.second->dims().size() != 1) {
2821             THROW_CLDNN_EXCEPTION("Unhandled blob dim in layer " + layer->name);
2822         }
2823         CreatePrimitiveFromBlob(
2824             layer->name + "_" + blob.first + m_weightsTag,
2825             blob.second,
2826             cldnn::layout(
2827                 DataTypeFromPrecision(blob.second->precision()),
2828                 m_defaultFormat, cldnn::spatial(TensorValue(blob.second->dims()[0]))));
2829     }
2830 }
2831
2832 void CLDNNGraph::ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector<std::string>& blobNames) {
2833     IE_ASSERT(layer);
2834     for (auto& name : blobNames) {
2835         if (layer->blobs.find(name) == layer->blobs.end()) {
2836             THROW_CLDNN_EXCEPTION("Missing blob " + name + " in layer " + layer->name);
2837         }
2838     }
2839 }
2840
2841 cldnn::tensor CLDNNGraph::CldnnTensorFromIEDims(const InferenceEngine::SizeVector& dims) {
2842     auto numDims = dims.size();
2843     std::vector<cldnn::tensor::value_type> outputTensor({ 1, 1, 1, 1 });
2844     for (size_t i = 0; i < numDims; i++) {
2845         outputTensor[i] = TensorValue(dims[numDims - i - 1]);
2846     }
2847     // swap x,y for cldnn tensor taking bfxy instead of bfyx
2848     auto tmp = outputTensor[2];
2849     outputTensor[2] = outputTensor[3];
2850     outputTensor[3] = tmp;
2851
2852     return outputTensor;
2853 }
2854
2855 InferRequestInternal::Ptr
2856 CLDNNGraph::CreateInferRequestImpl(InputsDataMap networkInputs, OutputsDataMap networkOutputs) {
2857     if (m_env.network == nullptr) {
2858         THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str;
2859     }
2860     return std::make_shared<CLDNNInferRequest>(m_env, m_config.useProfiling, networkInputs, networkOutputs);
2861 }
2862
2863 void CLDNNGraph::InitProfileInfo(const std::string& layerName,
2864                                  const std::string& layerType,
2865                                  const std::string& execType,
2866                                  InferenceEngine::InferenceEngineProfileInfo::LayerStatus status) {
2867     m_env.perfMap[layerName].status = status;
2868     m_env.perfMap[layerName].cpu_uSec = m_env.perfMap[layerName].realTime_uSec = 0;
2869     layerType.copy(m_env.perfMap[layerName].layer_type, layerType.length());
2870     execType.copy(m_env.perfMap[layerName].exec_type, execType.length());
2871 }
2872
2873 };  // namespace CLDNNPlugin