Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / gna_plugin / gna_plugin_passes.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "gna_plugin_policy.hpp"
6 #include <vector>
7 #include <string>
8 #include <memory>
9 #include <utility>
10 #include <algorithm>
11 #include <list>
12 #include <unordered_set>
13
14 #include <quantization/quantized_layer_params.hpp>
15 #include "gna_plugin.hpp"
16 #include "gna_layer_info.hpp"
17
18
19 using namespace InferenceEngine;
20 using namespace InferenceEngine::details;
21 using namespace GNAPluginNS;
22
23 void GNAPlugin::insertDiagonalLayer(std::vector<CNNLayerPtr> & layers) {
24     int numOfDiagLayers = 0;
25     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
26     for (auto & l : layers) {
27         if (l->insData.empty()) continue;
28         auto prevLayer = CNNNetPrevLayer(l);
29         if (LayerInfo(l).isActivation()) {
30             if (LayerInfo(prevLayer).has32BOutput())
31                 continue;
32         } else {
33             auto eltwise = dynamic_cast<InferenceEngine::EltwiseLayer *>(l.get());
34             if (!eltwise) {
35                 continue;
36             }
37             // in case of eltwise sum one of input would be 4 bytes one - 2
38             // in case of eltwise mull one of input would be 2 bytes one - 2
39             // for e sum if we have 4-4 inputs we will handle that by inserting identity activation
40             // for e sum if we have 4-2 - OK
41             // for e sum if we have 2-2 inputs we need to insert diagonal -- handling here
42             // for e mul if we have 2-2 - OK
43             // for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights
44             // for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights
45
46             if (eltwise->_operation != EltwiseLayer::Sum)
47                 continue;
48
49             auto prevLayer1 = CNNNetPrevLayer(l, 1);
50             if (!LayerInfo(prevLayer).has16BOutput() || !LayerInfo(prevLayer1).has16BOutput())
51                 continue;
52         }
53
54 #ifdef PLOT
55         std::cout << "Inserted Diagonal Layer between: " << prevLayer->name << " and " << l->name << "\n" << std::flush;
56 #endif
57         // actual insertion
58         auto diagName = std::string("SyntheticScaleShift_") + std::to_string(numOfDiagLayers++);
59         auto diagLayer = std::make_shared<ScaleShiftLayer>(LayerParams({diagName, "ScaleShift", Precision::FP32}));
60
61         // TODO: diagonal size
62         std::vector<float> arrayOf1(l->outData[0]->dims[0], 1.f);
63         diagLayer->_weights = make_shared_blob<float>(l->outData[0]->precision, Layout::C, arrayOf1);
64         auto newDims = l->outData[0]->dims;
65         auto dataPtr = std::make_shared<Data>(diagName,
66                                               newDims,
67                                               l->outData[0]->precision,
68                                               l->outData[0]->layout);
69
70         auto diagonalWithQuant = quantized ?
71                             InferenceEngine::injectData<QuantizedLayerParams>(diagLayer) :
72                                                                                     diagLayer;
73
74         dataPtr->creatorLayer = diagonalWithQuant;
75         diagonalWithQuant->outData.push_back(dataPtr);
76         CNNNetworkInsertLayer(prevLayer, l, diagonalWithQuant);
77     }
78 }
79
80 void GNAPlugin::reorderMaxPool(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
81     // detecting following pattern
82     // conv->relu->maxpooling
83     // changing it to conv->mxpooling->relu
84     for (auto & l : layers) {
85         auto pool = LayerInfo(l);
86         if (!pool.isMaxPooling()) continue;
87
88         // checking prev layer type
89         auto activation = LayerInfo(CNNNetPrevLayer(l));
90         if (!activation.isActivation()) continue;
91
92         // if activation came from convolution
93         auto convolution = LayerInfo(CNNNetPrevLayer(static_cast<InferenceEngine::CNNLayer*>(activation)));
94         if (!convolution.isConvolution()) continue;
95
96         gnalog() << "MaxPooling: " << pool << ", reordered with activation: " << activation << "\n";
97
98         CNNNetSwapLayers(activation, pool);
99     }
100 }
101
102 std::vector<CNNLayerPtr> GNAPlugin::getCandidatesForIdentityInsertion(const CNNLayerPtr l) {
103     std::vector<CNNLayerPtr> prevLayers;
104
105     // skipping memory inputs and true inputs layers
106     if (l->insData.empty()) return {};
107
108     auto eltwise = dynamic_cast<InferenceEngine::EltwiseLayer *>(l.get());
109     auto concat = dynamic_cast<InferenceEngine::ConcatLayer *>(l.get());
110
111     // eltwise
112     if (eltwise != nullptr) {
113         // eltwise layer has 2 inputs, so depends on situation identity should or should not be inserted
114
115         // for  sum if we have 4-4 inputs we will handle that by inserting identity activation case (1)
116         // for  sum if we have 4-2 - OK
117         // for  sum if we have 2-2 inputs we need to insert diagonal
118
119         // for  mul if we have 2-2 - OK
120         // for  mul if we have 2-4 - inputs we need to insert identity activation to make 2 bytes input
121         // for  mul if we have 4-4 - inputs we need to insert 2 identities activations  to put 2 bytes input and weights
122         auto prev0 = CNNNetPrevLayer(l, 0);
123         auto prev1 = CNNNetPrevLayer(l, 1);
124         switch (eltwise->_operation) {
125             case EltwiseLayer::Sum:
126                 if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
127                     return prevLayers;
128                 }
129                 // TODO: wether there - are possibility to select what layer to quantize
130                 prevLayers.push_back(prev0);
131                 break;
132             case EltwiseLayer::Prod:
133                 if (LayerInfo(prev0).has16BOutput() && LayerInfo(prev1).has16BOutput()) {
134                     return prevLayers;
135                 }
136
137                 if (LayerInfo(prev0).has32BOutput()) {
138                     prevLayers.push_back(prev0);
139                 }
140
141                 if (LayerInfo(prev1).has32BOutput()) {
142                     prevLayers.push_back(prev1);
143                 }
144
145                 break;
146             default :
147                 THROW_GNA_EXCEPTION << "Eltwise Layer of type: " << eltwise->_operation << " not supported";
148         }
149     } else if (concat != nullptr) {
150         for (int i = 0; CNNNetHasPrevLayer(l.get(), i); ++i) {
151             auto prev = CNNNetPrevLayer(l, i);
152             if (LayerInfo(prev).has32BOutput()) {
153                 prevLayers.push_back(prev);
154             }
155         }
156     } else {  // not eltwise or concat
157         // other layers has 1 inputs - situation is easier
158         // ex. activation or pooling - no need to insert identity activation.
159         if (LayerInfo(l).has32BInput())
160             return prevLayers;
161
162         auto prevLayer = CNNNetPrevLayer(l);
163         if (!LayerInfo(prevLayer).has32BOutput())
164             return prevLayers;
165
166         prevLayers.push_back(prevLayer);
167     }
168     return prevLayers;
169 }
170
171 void GNAPlugin::substitutePRelu(std::vector<InferenceEngine::CNNLayerPtr> &layers) {
172     auto getScale = [](CNNLayer* layer) {
173         auto powerCandidate = LayerInfo(layer);
174         if (!powerCandidate.isPower()) return 0.0f;
175         auto power = powerCandidate.as<PowerLayer*>();
176
177         return power->power == 1 && power->offset == 0.0f ? power->scale : 0.0f;
178     };
179
180     auto isScale = [getScale](CNNLayer* layer) {
181         return getScale(layer) != 0.0f;
182     };
183
184     auto isNegate = [getScale](CNNLayer* layer) {
185         return getScale(layer) == -1.0f;
186     };
187
188     auto getNext = [](CNNLayer* layer) {
189         CNNLayer* next = nullptr;
190         if (layer == nullptr) return next;
191         if (layer->outData.size() != 1) return next;
192         return layer->outData[0]->inputTo.begin()->second.get();
193     };
194
195     // TODO: unit tests for bad cases
196     for (auto & l : layers) {
197         // assume l is starting layer, that is followed by eltwise_sum(relu, negate/relu/scale/negate)
198         if (l->outData.size() != 1) continue;
199         auto &outputLayers = l->outData[0]->inputTo;
200         if (outputLayers.size() != 2) continue;
201
202         // one of followed layers need to be generic relu
203         auto first = LayerInfo(outputLayers.begin()->second);
204         auto second = LayerInfo((++outputLayers.begin())->second);
205
206         auto relu1 = outputLayers.begin()->second;
207         auto neg1 = (++outputLayers.begin())->second;
208         if (second.isRelu()) {
209             std::swap(first, second);
210             std::swap(relu1, neg1);
211         }
212         if (!first.isRelu()) continue;
213         // now we have relu as first layer, lets check second
214         // negate
215         if (!isNegate(neg1.get())) continue;
216
217         // relu
218         auto relu2 = getNext(second);
219         if (!LayerInfo(relu2).isRelu()) continue;
220
221         // scale
222         auto scale = getNext(relu2);
223         if (!isScale(scale)) continue;
224
225         // negate2
226         auto negate = getNext(scale);
227         if (!isNegate(negate)) continue;
228
229         // sum
230         auto sum = getNext(negate);
231         if (!LayerInfo(sum).isEltwiseSum()) continue;
232         if (sum->insData.size() != 2) continue;
233
234         auto s1 = sum->insData[0].lock()->creatorLayer.lock().get();
235         auto s2 = sum->insData[1].lock()->creatorLayer.lock().get();
236
237         if (s1 != static_cast<InferenceEngine::CNNLayer *>(first) &&
238             s2 != static_cast<InferenceEngine::CNNLayer *>(first)) {
239             continue;
240         }
241
242         // hurray we found parametric relu group - dont know what to do with it though
243         gnalog() << "PRelu with negative slope of " << -LayerInfo(scale).as<PowerLayer*>()->scale << " found" << std::endl;
244
245         // removing all layers references except of relu layer
246         outputLayers.clear();
247         outputLayers[relu1->name] = relu1;
248         // pointing relu to output of eltwise_summ
249         relu1->outData = sum->outData;
250         // changing creator layer
251         relu1->outData[0]->creatorLayer = relu1;
252         // pointing back to relu if any
253         if (!relu1->outData[0]->inputTo.empty()) {
254             auto summOutputLayer = relu1->outData[0]->inputTo.begin()->second;
255             summOutputLayer->insData.clear();
256             summOutputLayer->insData.push_back(relu1->outData[0]);
257         }
258
259         // changing negative slope
260         first.as<ReLULayer*>()->negative_slope = LayerInfo(scale).as<PowerLayer*>()->scale;
261     }
262 }
263
264 void GNAPlugin::reversePermutations(std::vector<CNNLayerPtr> &layers) {
265     std::function<CNNLayerPtr(CNNLayerPtr, std::function<bool(CNNLayerPtr)>)> prevLayerSkipCertain
266         = [&prevLayerSkipCertain](CNNLayerPtr layer, std::function<bool(CNNLayerPtr)> shouldSkip) -> CNNLayerPtr {
267         if (CNNNetHasPrevLayer(layer.get())) {
268             return nullptr;
269         }
270         auto prev = CNNNetPrevLayer(layer);
271
272         if (!shouldSkip(prev)) return prevLayerSkipCertain(prev, shouldSkip);
273
274         return prev;
275     };
276
277     auto prevLayerSkipReshape = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
278         return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
279             return LayerInfo(l2).isReshape();
280         });
281     };
282
283
284     std::function<CNNLayerPtr(CNNLayerPtr)> nextLayerSkipReshape = [&nextLayerSkipReshape](CNNLayerPtr layer) -> CNNLayerPtr {
285         if (layer->outData.empty()) {
286             return nullptr;
287         }
288         if (layer->outData.front()->inputTo.size() != 1) {
289             return nullptr;
290         }
291         auto next = layer->outData.front()->inputTo.begin()->second;
292
293         if (LayerInfo(next).isReshape()) return nextLayerSkipReshape(next);
294
295         return next;
296     };
297
298     auto prevConv = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
299         return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
300             return
301                 LayerInfo(l2).isReshape() ||
302                 LayerInfo(l2).isPooling() ||
303                 LayerInfo(l2).isActivation();
304         });
305     };
306
307     std::unordered_set<std::string> affineWithPermutedWeights;
308     std::list<CNNLayerPtr> permutationstoRemove;
309
310     for (auto & l : layers) {
311         if (!LayerInfo(l).isPermute()) {
312             continue;
313         }
314
315         auto layerOrder = l->GetParamAsInts("order");
316
317         if (layerOrder != std::vector<int>({0, 3, 2, 1})) {
318             THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << ", order: was " << l->GetParamAsString("order") <<
319                                ", but support order is 0,3,2,1";
320         }
321
322         // search for it's input convolution
323         auto prev = prevConv(l);
324
325         // pooling no used in speech models without convolution
326         if (!prev) {
327             THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid input to that layer";
328         }
329
330         // we can remove that permutation if it is input to ScaleShift or FC layer
331         auto next = nextLayerSkipReshape(l);
332         if (!next || !LayerInfo(next).isFullyConnected()) {
333             THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid output of that layer";
334         }
335
336         permutationstoRemove.push_back(l);
337
338         // removing that permutation layer and saving information about affine
339         affineWithPermutedWeights.insert(next->name);
340     }
341
342     for (auto && toRemove : permutationstoRemove) {
343         CNNNetworkRemoveLayer(toRemove);
344     }
345
346     // search for conv->affine sequences
347     for (auto & l : layers) {
348         if (!LayerInfo(l).isFullyConnected() || 0 != affineWithPermutedWeights.count(l->name)) {
349             continue;
350         }
351         // found an affine layer that not involved in permutations removing
352         // searching whether it has direct input from convolution
353         auto prevConvLayer = prevConv(l);
354         if (!prevConvLayer) continue;
355
356         auto directPrev = CNNNetPrevLayer(l);
357
358         // TODO : make new permute
359         CNNNetworkInsertLayer(l, directPrev, CNNLayerPtr(nullptr));
360     }
361 }
362
363 void GNAPlugin::insertIdentityLayer(std::vector<CNNLayerPtr> &layers) {
364     int numOfIdentityLayers = 0;
365     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
366     for (auto & l : layers) {
367         for (auto && prev : getCandidatesForIdentityInsertion(l)) {
368             // actual insertion
369             auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers++);
370
371             gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << l->name << "\n" << std::flush;
372
373             CNNLayerPtr activationLayer =
374                 std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
375             auto inputData = l->insData[0].lock();
376             auto newDims = inputData->dims;
377             std::reverse(begin(newDims), end(newDims));
378
379             auto dataPtr = std::make_shared<Data>("FullyConnected",
380                                                   TensorDesc(inputData->precision,
381                                                              newDims,
382                                                              inputData->layout));
383             auto activationLayerWithQuant = quantized ?
384                                     InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
385                                                                                             activationLayer;
386             dataPtr->creatorLayer = activationLayerWithQuant;
387             activationLayerWithQuant->outData.push_back(dataPtr);
388             // wether 1 identity or all outputs TODO possible grouping here, need to implement special groupped inserter
389             bool notAll = false;
390             for (auto && nextData  : prev->outData) {
391                 for (auto && nextLayer : nextData->inputTo) {
392                     if (nextLayer.second.get() == l.get())
393                         continue;
394                     if (getCandidatesForIdentityInsertion(nextLayer.second).empty()) {
395                         notAll = true;
396                     }
397                 }
398             }
399
400             CNNNetworkInsertLayer(prev, notAll ? l : CNNLayerPtr(nullptr), activationLayerWithQuant);
401         }
402     }
403 }
404
405 void GNAPlugin::insertCopyLayer(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
406     int numCopyLayers = 0;
407     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
408     for (auto & l : layers) {
409         if (l->insData.empty()) continue;
410         auto prevLayer = CNNNetPrevLayer(l);
411         if ((LayerInfo(l).isMemory() && LayerInfo(prevLayer).isConcat()) ||
412             (LayerInfo(l).isConcat() && LayerInfo(prevLayer).isCrop())) {
413             if (LayerInfo(prevLayer).isCrop()) {
414                 auto cropLayer = dynamic_cast<InferenceEngine::CropLayer *> (prevLayer.get());
415                 size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
416                 if (ALIGN(cropOffset, 8) != cropOffset) {
417                     // The crop will be replced by affine.
418                     // Copy layer insertion is not required
419                     continue;
420                 }
421             }
422             std::string copyName = std::string("copy_") + std::to_string(numCopyLayers++);
423             gnalog() << "Inserted "<< copyName << " between: " << l->name << " and " << prevLayer->name << "\n" << std::flush;
424
425             CNNLayerPtr copyLayer =
426             std::make_shared<GenericLayer>(LayerParams({copyName, "Copy", Precision::FP32}));
427
428             auto inputData = l->insData[0].lock();
429             auto newDims = inputData->dims;
430
431             std::reverse(begin(newDims), end(newDims));
432
433             auto dataPtr = std::make_shared<Data>(copyName,
434                                                   TensorDesc(inputData->precision,
435                                                              newDims,
436                                                              inputData->layout));
437
438             auto copyWithQuant = quantized ?
439                                     InferenceEngine::injectData<QuantizedLayerParams>(copyLayer) :
440                                                                                             copyLayer;
441             dataPtr->creatorLayer = copyWithQuant;
442             copyWithQuant->outData.push_back(dataPtr);
443             CNNNetworkInsertLayer(prevLayer, l, copyWithQuant);
444         }
445     }
446 }
447
448 void GNAPlugin::insertAligningFilterLayer(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
449     // currently split layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull for testing
450     const int bytesPerSplitElement = 2;
451     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
452
453     int numOfFilterLayers = 0;
454     for (auto &l : layers) {
455         auto info = LayerInfo(l);
456         if (!info.isSplit() && !info.isSlice()) {
457             continue;
458         }
459
460         size_t currentOffset = 0;
461         int splitOutIndex = 0;
462         for (auto &&splitOutput  : l->outData) {
463             auto outputSize = product(++begin(splitOutput->getDims()), end(splitOutput->getDims()));
464
465             if (currentOffset != ALIGN64(currentOffset)) {
466                 // this split output not beginning from 64 bytes aligned boundary - need to correct by alligning filter layer
467 #ifdef PLOT
468                 // getting list of layers attached to current split output
469                 gnalog() << "Inserted Affine Filter Layer between: " << l->name << " and : \n";
470                 for (auto &&followingLayers : splitOutput->getInputTo()) {
471                     gnalog() << "    " << followingLayers.second->name << "\n";
472                 }
473                 gnalog() << std::flush;
474 #endif
475                 // insert the filter
476                 auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++);
477                 auto filterLayer =
478                     std::make_shared<WeightableLayer>(LayerParams({filterName, "AffineFilter", Precision::FP32}));
479
480
481                 auto inputData = splitOutput;
482                 auto newDims = splitOutput->dims;
483
484                 size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
485                 size_t newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset)
486                     / bytesPerSplitElement;
487
488                 // encodes offset to beginning of split layer input
489                 filterLayer->params["offset"] = std::to_string(aligned64_offset);
490
491                 auto &num_rows_out = splitOutput->dims[0];
492
493                 std::vector<float> filterWeights(newOutputSize * num_rows_out, 0.f);
494
495                 auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement;
496
497                 for (int i = 0; i != outputSize; i++) {
498                     filterWeights[offset] = 1.0f;
499                     offset += newOutputSize + 1;
500                 }
501
502                 filterLayer->_weights = make_shared_blob<float>(inputData->precision, Layout::C, filterWeights);
503
504                 std::reverse(begin(newDims), end(newDims));
505
506                 auto outData = std::make_shared<Data>(filterName,
507                                                       TensorDesc(splitOutput->precision,
508                                                                  newDims,
509                                                                  inputData->layout));
510
511                 auto filterWithQuant = quantized ?
512                                        InferenceEngine::injectData<QuantizedLayerParams>(filterLayer) :
513                                        filterLayer;
514                 outData->creatorLayer = filterWithQuant;
515                 filterWithQuant->outData.push_back(outData);
516                 CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex);
517             }
518
519
520             // search data that starts from unaligned location
521             currentOffset += outputSize * bytesPerSplitElement;
522             splitOutIndex++;
523         }
524     }
525 }
526
527 void GNAPlugin::substituteScaleShiftBroadCast(std::vector<InferenceEngine::CNNLayerPtr> &layers) {
528     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
529     for (auto & l : layers) {
530         LayerInfo layerInfo(l);
531
532         if (!layerInfo.isScaleShift()) {
533             continue;
534         }
535
536         auto scaleShift = layerInfo.as<ScaleShiftLayer*>();
537
538         auto insData = scaleShift->insData.front().lock();
539         if (!insData) {
540             THROW_GNA_EXCEPTION << "Cannot get inputs data for layer: " << l->name;
541         }
542
543         if (insData->getDims().size() <= 2) {
544             // NC or C cannot do broadcast
545             continue;
546         }
547         auto batchSize = insData->getDims()[0];
548         auto nElements = details::product(insData->getDims()) / batchSize;
549         auto weightsElements = scaleShift->_weights->size();
550         auto weightsBytes = scaleShift->_weights->byteSize();
551
552         if (nElements == weightsElements) {
553             continue;
554         }
555
556         // only 3d scaleshift supported where number of c is arbitrary
557         auto lastD = insData->getDims()[insData->getDims().size() - 1];
558         if (lastD != weightsElements) {
559             THROW_GNA_EXCEPTION << "Unsupported layer: " << l->name
560                                 << " should have last dim(" << lastD << ") equal to weights(" << weightsElements << ") length";
561         }
562         if (insData->getDims().size() == 2) {
563             THROW_GNA_EXCEPTION << "For layer: " << l->name
564                                 << " weights size(" << weightsElements<< ") invalid: should match input size of(" << lastD << ")";
565         }
566
567         gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n";
568         // approach 1 - weights tiling
569         if (policy.ScaleShiftPolicy == Policy::WEIGHTS_TILING) {
570             auto tileBlob = [](Blob::Ptr &blob, size_t TileTo){
571                 auto weightsElements = blob->size();
572                 auto weightsBytes = blob->byteSize();
573                 if (weightsElements == 0) {
574                     THROW_IE_EXCEPTION << "Blob size is 0";
575                 }
576                 if (TileTo % weightsElements) {
577                     return false;
578                 }
579
580                 auto tiledBlob = make_plain_blob(blob->getTensorDesc().getPrecision(), {TileTo});
581                 tiledBlob->allocate();
582
583
584                 for (int i=0; i != TileTo / weightsElements; i++) {
585                     ie_memcpy(tiledBlob->buffer().as<uint8_t*>() + i * weightsBytes, weightsBytes, blob->cbuffer(), weightsBytes);
586                 }
587                 blob = tiledBlob;
588                 return true;
589             };
590
591             if (!tileBlob(scaleShift->_weights, nElements)) {
592                 THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
593             }
594             if (scaleShift->_biases) {
595                 if (!tileBlob(scaleShift->_biases, nElements)) {
596                     THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
597                 }
598             }
599
600             // currently data type no providing reshape method of tensor desc
601             scaleShift->outData.front()->reshape({batchSize, nElements}, Layout::NC);
602             insData->reshape({batchSize, nElements}, Layout::NC);
603         } else {
604             THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
605                                 << policy.ScaleShiftPolicy <<  "using layers tiling, layer: " << l->name;
606         }
607     }
608 }