inference-engine/src/gna_plugin/gna_plugin_passes.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "gna_plugin_policy.hpp"
   6 #include <vector>
   7 #include <string>
   8 #include <memory>
   9 #include <utility>
  10 #include <algorithm>
  11 #include <list>
  12 #include <unordered_set>
  13
  14 #include <quantization/quantized_layer_params.hpp>
  15 #include "gna_plugin.hpp"
  16 #include "gna_layer_info.hpp"
  17
  18
  19 using namespace InferenceEngine;
  20 using namespace InferenceEngine::details;
  21 using namespace GNAPluginNS;
  22
  23 void GNAPlugin::insertDiagonalLayer(std::vector<CNNLayerPtr> & layers) {
  24     int numOfDiagLayers = 0;
  25     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
  26     for (auto & l : layers) {
  27         if (l->insData.empty()) continue;
  28         auto prevLayer = CNNNetPrevLayer(l);
  29         if (LayerInfo(l).isActivation()) {
  30             if (LayerInfo(prevLayer).has32BOutput())
  31                 continue;
  32         } else {
  33             auto eltwise = dynamic_cast<InferenceEngine::EltwiseLayer *>(l.get());
  34             if (!eltwise) {
  35                 continue;
  36             }
  37             // in case of eltwise sum one of input would be 4 bytes one - 2
  38             // in case of eltwise mull one of input would be 2 bytes one - 2
  39             // for e sum if we have 4-4 inputs we will handle that by inserting identity activation
  40             // for e sum if we have 4-2 - OK
  41             // for e sum if we have 2-2 inputs we need to insert diagonal -- handling here
  42             // for e mul if we have 2-2 - OK
  43             // for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights
  44             // for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights
  45
  46             if (eltwise->_operation != EltwiseLayer::Sum)
  47                 continue;
  48
  49             auto prevLayer1 = CNNNetPrevLayer(l, 1);
  50             if (!LayerInfo(prevLayer).has16BOutput() || !LayerInfo(prevLayer1).has16BOutput())
  51                 continue;
  52         }
  53
  54 #ifdef PLOT
  55         std::cout << "Inserted Diagonal Layer between: " << prevLayer->name << " and " << l->name << "\n" << std::flush;
  56 #endif
  57         // actual insertion
  58         auto diagName = std::string("SyntheticScaleShift_") + std::to_string(numOfDiagLayers++);
  59         auto diagLayer = std::make_shared<ScaleShiftLayer>(LayerParams({diagName, "ScaleShift", Precision::FP32}));
  60
  61         // TODO: diagonal size
  62         std::vector<float> arrayOf1(l->outData[0]->dims[0], 1.f);
  63         diagLayer->_weights = make_shared_blob<float>(l->outData[0]->precision, Layout::C, arrayOf1);
  64         auto newDims = l->outData[0]->dims;
  65         auto dataPtr = std::make_shared<Data>(diagName,
  66                                               newDims,
  67                                               l->outData[0]->precision,
  68                                               l->outData[0]->layout);
  69
  70         auto diagonalWithQuant = quantized ?
  71                             InferenceEngine::injectData<QuantizedLayerParams>(diagLayer) :
  72                                                                                     diagLayer;
  73
  74         dataPtr->creatorLayer = diagonalWithQuant;
  75         diagonalWithQuant->outData.push_back(dataPtr);
  76         CNNNetworkInsertLayer(prevLayer, l, diagonalWithQuant);
  77     }
  78 }
  79
  80 void GNAPlugin::reorderMaxPool(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
  81     // detecting following pattern
  82     // conv->relu->maxpooling
  83     // changing it to conv->mxpooling->relu
  84     for (auto & l : layers) {
  85         auto pool = LayerInfo(l);
  86         if (!pool.isMaxPooling()) continue;
  87
  88         // checking prev layer type
  89         auto activation = LayerInfo(CNNNetPrevLayer(l));
  90         if (!activation.isActivation()) continue;
  91
  92         // if activation came from convolution
  93         auto convolution = LayerInfo(CNNNetPrevLayer(static_cast<InferenceEngine::CNNLayer*>(activation)));
  94         if (!convolution.isConvolution()) continue;
  95
  96         gnalog() << "MaxPooling: " << pool << ", reordered with activation: " << activation << "\n";
  97
  98         CNNNetSwapLayers(activation, pool);
  99     }
 100 }
 101
 102 std::vector<CNNLayerPtr> GNAPlugin::getCandidatesForIdentityInsertion(const CNNLayerPtr l) {
 103     std::vector<CNNLayerPtr> prevLayers;
 104
 105     // skipping memory inputs and true inputs layers
 106     if (l->insData.empty()) return {};
 107
 108     auto eltwise = dynamic_cast<InferenceEngine::EltwiseLayer *>(l.get());
 109     auto concat = dynamic_cast<InferenceEngine::ConcatLayer *>(l.get());
 110
 111     // eltwise
 112     if (eltwise != nullptr) {
 113         // eltwise layer has 2 inputs, so depends on situation identity should or should not be inserted
 114
 115         // for  sum if we have 4-4 inputs we will handle that by inserting identity activation case (1)
 116         // for  sum if we have 4-2 - OK
 117         // for  sum if we have 2-2 inputs we need to insert diagonal
 118
 119         // for  mul if we have 2-2 - OK
 120         // for  mul if we have 2-4 - inputs we need to insert identity activation to make 2 bytes input
 121         // for  mul if we have 4-4 - inputs we need to insert 2 identities activations  to put 2 bytes input and weights
 122         auto prev0 = CNNNetPrevLayer(l, 0);
 123         auto prev1 = CNNNetPrevLayer(l, 1);
 124         switch (eltwise->_operation) {
 125             case EltwiseLayer::Sum:
 126                 if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
 127                     return prevLayers;
 128                 }
 129                 // TODO: wether there - are possibility to select what layer to quantize
 130                 prevLayers.push_back(prev0);
 131                 break;
 132             case EltwiseLayer::Prod:
 133                 if (LayerInfo(prev0).has16BOutput() && LayerInfo(prev1).has16BOutput()) {
 134                     return prevLayers;
 135                 }
 136
 137                 if (LayerInfo(prev0).has32BOutput()) {
 138                     prevLayers.push_back(prev0);
 139                 }
 140
 141                 if (LayerInfo(prev1).has32BOutput()) {
 142                     prevLayers.push_back(prev1);
 143                 }
 144
 145                 break;
 146             default :
 147                 THROW_GNA_EXCEPTION << "Eltwise Layer of type: " << eltwise->_operation << " not supported";
 148         }
 149     } else if (concat != nullptr) {
 150         for (int i = 0; CNNNetHasPrevLayer(l.get(), i); ++i) {
 151             auto prev = CNNNetPrevLayer(l, i);
 152             if (LayerInfo(prev).has32BOutput()) {
 153                 prevLayers.push_back(prev);
 154             }
 155         }
 156     } else {  // not eltwise or concat
 157         // other layers has 1 inputs - situation is easier
 158         // ex. activation or pooling - no need to insert identity activation.
 159         if (LayerInfo(l).has32BInput())
 160             return prevLayers;
 161
 162         auto prevLayer = CNNNetPrevLayer(l);
 163         if (!LayerInfo(prevLayer).has32BOutput())
 164             return prevLayers;
 165
 166         prevLayers.push_back(prevLayer);
 167     }
 168     return prevLayers;
 169 }
 170
 171 void GNAPlugin::substitutePRelu(std::vector<InferenceEngine::CNNLayerPtr> &layers) {
 172     auto getScale = [](CNNLayer* layer) {
 173         auto powerCandidate = LayerInfo(layer);
 174         if (!powerCandidate.isPower()) return 0.0f;
 175         auto power = powerCandidate.as<PowerLayer*>();
 176
 177         return power->power == 1 && power->offset == 0.0f ? power->scale : 0.0f;
 178     };
 179
 180     auto isScale = [getScale](CNNLayer* layer) {
 181         return getScale(layer) != 0.0f;
 182     };
 183
 184     auto isNegate = [getScale](CNNLayer* layer) {
 185         return getScale(layer) == -1.0f;
 186     };
 187
 188     auto getNext = [](CNNLayer* layer) {
 189         CNNLayer* next = nullptr;
 190         if (layer == nullptr) return next;
 191         if (layer->outData.size() != 1) return next;
 192         return layer->outData[0]->inputTo.begin()->second.get();
 193     };
 194
 195     // TODO: unit tests for bad cases
 196     for (auto & l : layers) {
 197         // assume l is starting layer, that is followed by eltwise_sum(relu, negate/relu/scale/negate)
 198         if (l->outData.size() != 1) continue;
 199         auto &outputLayers = l->outData[0]->inputTo;
 200         if (outputLayers.size() != 2) continue;
 201
 202         // one of followed layers need to be generic relu
 203         auto first = LayerInfo(outputLayers.begin()->second);
 204         auto second = LayerInfo((++outputLayers.begin())->second);
 205
 206         auto relu1 = outputLayers.begin()->second;
 207         auto neg1 = (++outputLayers.begin())->second;
 208         if (second.isRelu()) {
 209             std::swap(first, second);
 210             std::swap(relu1, neg1);
 211         }
 212         if (!first.isRelu()) continue;
 213         // now we have relu as first layer, lets check second
 214         // negate
 215         if (!isNegate(neg1.get())) continue;
 216
 217         // relu
 218         auto relu2 = getNext(second);
 219         if (!LayerInfo(relu2).isRelu()) continue;
 220
 221         // scale
 222         auto scale = getNext(relu2);
 223         if (!isScale(scale)) continue;
 224
 225         // negate2
 226         auto negate = getNext(scale);
 227         if (!isNegate(negate)) continue;
 228
 229         // sum
 230         auto sum = getNext(negate);
 231         if (!LayerInfo(sum).isEltwiseSum()) continue;
 232         if (sum->insData.size() != 2) continue;
 233
 234         auto s1 = sum->insData[0].lock()->creatorLayer.lock().get();
 235         auto s2 = sum->insData[1].lock()->creatorLayer.lock().get();
 236
 237         if (s1 != static_cast<InferenceEngine::CNNLayer *>(first) &&
 238             s2 != static_cast<InferenceEngine::CNNLayer *>(first)) {
 239             continue;
 240         }
 241
 242         // hurray we found parametric relu group - dont know what to do with it though
 243         gnalog() << "PRelu with negative slope of " << -LayerInfo(scale).as<PowerLayer*>()->scale << " found" << std::endl;
 244
 245         // removing all layers references except of relu layer
 246         outputLayers.clear();
 247         outputLayers[relu1->name] = relu1;
 248         // pointing relu to output of eltwise_summ
 249         relu1->outData = sum->outData;
 250         // changing creator layer
 251         relu1->outData[0]->creatorLayer = relu1;
 252         // pointing back to relu if any
 253         if (!relu1->outData[0]->inputTo.empty()) {
 254             auto summOutputLayer = relu1->outData[0]->inputTo.begin()->second;
 255             summOutputLayer->insData.clear();
 256             summOutputLayer->insData.push_back(relu1->outData[0]);
 257         }
 258
 259         // changing negative slope
 260         first.as<ReLULayer*>()->negative_slope = LayerInfo(scale).as<PowerLayer*>()->scale;
 261     }
 262 }
 263
 264 void GNAPlugin::reversePermutations(std::vector<CNNLayerPtr> &layers) {
 265     std::function<CNNLayerPtr(CNNLayerPtr, std::function<bool(CNNLayerPtr)>)> prevLayerSkipCertain
 266         = [&prevLayerSkipCertain](CNNLayerPtr layer, std::function<bool(CNNLayerPtr)> shouldSkip) -> CNNLayerPtr {
 267         if (CNNNetHasPrevLayer(layer.get())) {
 268             return nullptr;
 269         }
 270         auto prev = CNNNetPrevLayer(layer);
 271
 272         if (!shouldSkip(prev)) return prevLayerSkipCertain(prev, shouldSkip);
 273
 274         return prev;
 275     };
 276
 277     auto prevLayerSkipReshape = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
 278         return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
 279             return LayerInfo(l2).isReshape();
 280         });
 281     };
 282
 283
 284     std::function<CNNLayerPtr(CNNLayerPtr)> nextLayerSkipReshape = [&nextLayerSkipReshape](CNNLayerPtr layer) -> CNNLayerPtr {
 285         if (layer->outData.empty()) {
 286             return nullptr;
 287         }
 288         if (layer->outData.front()->inputTo.size() != 1) {
 289             return nullptr;
 290         }
 291         auto next = layer->outData.front()->inputTo.begin()->second;
 292
 293         if (LayerInfo(next).isReshape()) return nextLayerSkipReshape(next);
 294
 295         return next;
 296     };
 297
 298     auto prevConv = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
 299         return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
 300             return
 301                 LayerInfo(l2).isReshape() ||
 302                 LayerInfo(l2).isPooling() ||
 303                 LayerInfo(l2).isActivation();
 304         });
 305     };
 306
 307     std::unordered_set<std::string> affineWithPermutedWeights;
 308     std::list<CNNLayerPtr> permutationstoRemove;
 309
 310     for (auto & l : layers) {
 311         if (!LayerInfo(l).isPermute()) {
 312             continue;
 313         }
 314
 315         auto layerOrder = l->GetParamAsInts("order");
 316
 317         if (layerOrder != std::vector<int>({0, 3, 2, 1})) {
 318             THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << ", order: was " << l->GetParamAsString("order") <<
 319                                ", but support order is 0,3,2,1";
 320         }
 321
 322         // search for it's input convolution
 323         auto prev = prevConv(l);
 324
 325         // pooling no used in speech models without convolution
 326         if (!prev) {
 327             THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid input to that layer";
 328         }
 329
 330         // we can remove that permutation if it is input to ScaleShift or FC layer
 331         auto next = nextLayerSkipReshape(l);
 332         if (!next || !LayerInfo(next).isFullyConnected()) {
 333             THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid output of that layer";
 334         }
 335
 336         permutationstoRemove.push_back(l);
 337
 338         // removing that permutation layer and saving information about affine
 339         affineWithPermutedWeights.insert(next->name);
 340     }
 341
 342     for (auto && toRemove : permutationstoRemove) {
 343         CNNNetworkRemoveLayer(toRemove);
 344     }
 345
 346     // search for conv->affine sequences
 347     for (auto & l : layers) {
 348         if (!LayerInfo(l).isFullyConnected() || 0 != affineWithPermutedWeights.count(l->name)) {
 349             continue;
 350         }
 351         // found an affine layer that not involved in permutations removing
 352         // searching whether it has direct input from convolution
 353         auto prevConvLayer = prevConv(l);
 354         if (!prevConvLayer) continue;
 355
 356         auto directPrev = CNNNetPrevLayer(l);
 357
 358         // TODO : make new permute
 359         CNNNetworkInsertLayer(l, directPrev, CNNLayerPtr(nullptr));
 360     }
 361 }
 362
 363 void GNAPlugin::insertIdentityLayer(std::vector<CNNLayerPtr> &layers) {
 364     int numOfIdentityLayers = 0;
 365     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
 366     for (auto & l : layers) {
 367         for (auto && prev : getCandidatesForIdentityInsertion(l)) {
 368             // actual insertion
 369             auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers++);
 370
 371             gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << l->name << "\n" << std::flush;
 372
 373             CNNLayerPtr activationLayer =
 374                 std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
 375             auto inputData = l->insData[0].lock();
 376             auto newDims = inputData->dims;
 377             std::reverse(begin(newDims), end(newDims));
 378
 379             auto dataPtr = std::make_shared<Data>("FullyConnected",
 380                                                   TensorDesc(inputData->precision,
 381                                                              newDims,
 382                                                              inputData->layout));
 383             auto activationLayerWithQuant = quantized ?
 384                                     InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
 385                                                                                             activationLayer;
 386             dataPtr->creatorLayer = activationLayerWithQuant;
 387             activationLayerWithQuant->outData.push_back(dataPtr);
 388             // wether 1 identity or all outputs TODO possible grouping here, need to implement special groupped inserter
 389             bool notAll = false;
 390             for (auto && nextData  : prev->outData) {
 391                 for (auto && nextLayer : nextData->inputTo) {
 392                     if (nextLayer.second.get() == l.get())
 393                         continue;
 394                     if (getCandidatesForIdentityInsertion(nextLayer.second).empty()) {
 395                         notAll = true;
 396                     }
 397                 }
 398             }
 399
 400             CNNNetworkInsertLayer(prev, notAll ? l : CNNLayerPtr(nullptr), activationLayerWithQuant);
 401         }
 402     }
 403 }
 404
 405 void GNAPlugin::insertCopyLayer(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
 406     int numCopyLayers = 0;
 407     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
 408     for (auto & l : layers) {
 409         if (l->insData.empty()) continue;
 410         auto prevLayer = CNNNetPrevLayer(l);
 411         if ((LayerInfo(l).isMemory() && LayerInfo(prevLayer).isConcat()) ||
 412             (LayerInfo(l).isConcat() && LayerInfo(prevLayer).isCrop())) {
 413             if (LayerInfo(prevLayer).isCrop()) {
 414                 auto cropLayer = dynamic_cast<InferenceEngine::CropLayer *> (prevLayer.get());
 415                 size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
 416                 if (ALIGN(cropOffset, 8) != cropOffset) {
 417                     // The crop will be replced by affine.
 418                     // Copy layer insertion is not required
 419                     continue;
 420                 }
 421             }
 422             std::string copyName = std::string("copy_") + std::to_string(numCopyLayers++);
 423             gnalog() << "Inserted "<< copyName << " between: " << l->name << " and " << prevLayer->name << "\n" << std::flush;
 424
 425             CNNLayerPtr copyLayer =
 426             std::make_shared<GenericLayer>(LayerParams({copyName, "Copy", Precision::FP32}));
 427
 428             auto inputData = l->insData[0].lock();
 429             auto newDims = inputData->dims;
 430
 431             std::reverse(begin(newDims), end(newDims));
 432
 433             auto dataPtr = std::make_shared<Data>(copyName,
 434                                                   TensorDesc(inputData->precision,
 435                                                              newDims,
 436                                                              inputData->layout));
 437
 438             auto copyWithQuant = quantized ?
 439                                     InferenceEngine::injectData<QuantizedLayerParams>(copyLayer) :
 440                                                                                             copyLayer;
 441             dataPtr->creatorLayer = copyWithQuant;
 442             copyWithQuant->outData.push_back(dataPtr);
 443             CNNNetworkInsertLayer(prevLayer, l, copyWithQuant);
 444         }
 445     }
 446 }
 447
 448 void GNAPlugin::insertAligningFilterLayer(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
 449     // currently split layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull for testing
 450     const int bytesPerSplitElement = 2;
 451     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
 452
 453     int numOfFilterLayers = 0;
 454     for (auto &l : layers) {
 455         auto info = LayerInfo(l);
 456         if (!info.isSplit() && !info.isSlice()) {
 457             continue;
 458         }
 459
 460         size_t currentOffset = 0;
 461         int splitOutIndex = 0;
 462         for (auto &&splitOutput  : l->outData) {
 463             auto outputSize = product(++begin(splitOutput->getDims()), end(splitOutput->getDims()));
 464
 465             if (currentOffset != ALIGN64(currentOffset)) {
 466                 // this split output not beginning from 64 bytes aligned boundary - need to correct by alligning filter layer
 467 #ifdef PLOT
 468                 // getting list of layers attached to current split output
 469                 gnalog() << "Inserted Affine Filter Layer between: " << l->name << " and : \n";
 470                 for (auto &&followingLayers : splitOutput->getInputTo()) {
 471                     gnalog() << "    " << followingLayers.second->name << "\n";
 472                 }
 473                 gnalog() << std::flush;
 474 #endif
 475                 // insert the filter
 476                 auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++);
 477                 auto filterLayer =
 478                     std::make_shared<WeightableLayer>(LayerParams({filterName, "AffineFilter", Precision::FP32}));
 479
 480
 481                 auto inputData = splitOutput;
 482                 auto newDims = splitOutput->dims;
 483
 484                 size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
 485                 size_t newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset)
 486                     / bytesPerSplitElement;
 487
 488                 // encodes offset to beginning of split layer input
 489                 filterLayer->params["offset"] = std::to_string(aligned64_offset);
 490
 491                 auto &num_rows_out = splitOutput->dims[0];
 492
 493                 std::vector<float> filterWeights(newOutputSize * num_rows_out, 0.f);
 494
 495                 auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement;
 496
 497                 for (int i = 0; i != outputSize; i++) {
 498                     filterWeights[offset] = 1.0f;
 499                     offset += newOutputSize + 1;
 500                 }
 501
 502                 filterLayer->_weights = make_shared_blob<float>(inputData->precision, Layout::C, filterWeights);
 503
 504                 std::reverse(begin(newDims), end(newDims));
 505
 506                 auto outData = std::make_shared<Data>(filterName,
 507                                                       TensorDesc(splitOutput->precision,
 508                                                                  newDims,
 509                                                                  inputData->layout));
 510
 511                 auto filterWithQuant = quantized ?
 512                                        InferenceEngine::injectData<QuantizedLayerParams>(filterLayer) :
 513                                        filterLayer;
 514                 outData->creatorLayer = filterWithQuant;
 515                 filterWithQuant->outData.push_back(outData);
 516                 CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex);
 517             }
 518
 519
 520             // search data that starts from unaligned location
 521             currentOffset += outputSize * bytesPerSplitElement;
 522             splitOutIndex++;
 523         }
 524     }
 525 }
 526
 527 void GNAPlugin::substituteScaleShiftBroadCast(std::vector<InferenceEngine::CNNLayerPtr> &layers) {
 528     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
 529     for (auto & l : layers) {
 530         LayerInfo layerInfo(l);
 531
 532         if (!layerInfo.isScaleShift()) {
 533             continue;
 534         }
 535
 536         auto scaleShift = layerInfo.as<ScaleShiftLayer*>();
 537
 538         auto insData = scaleShift->insData.front().lock();
 539         if (!insData) {
 540             THROW_GNA_EXCEPTION << "Cannot get inputs data for layer: " << l->name;
 541         }
 542
 543         if (insData->getDims().size() <= 2) {
 544             // NC or C cannot do broadcast
 545             continue;
 546         }
 547         auto batchSize = insData->getDims()[0];
 548         auto nElements = details::product(insData->getDims()) / batchSize;
 549         auto weightsElements = scaleShift->_weights->size();
 550         auto weightsBytes = scaleShift->_weights->byteSize();
 551
 552         if (nElements == weightsElements) {
 553             continue;
 554         }
 555
 556         // only 3d scaleshift supported where number of c is arbitrary
 557         auto lastD = insData->getDims()[insData->getDims().size() - 1];
 558         if (lastD != weightsElements) {
 559             THROW_GNA_EXCEPTION << "Unsupported layer: " << l->name
 560                                 << " should have last dim(" << lastD << ") equal to weights(" << weightsElements << ") length";
 561         }
 562         if (insData->getDims().size() == 2) {
 563             THROW_GNA_EXCEPTION << "For layer: " << l->name
 564                                 << " weights size(" << weightsElements<< ") invalid: should match input size of(" << lastD << ")";
 565         }
 566
 567         gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n";
 568         // approach 1 - weights tiling
 569         if (policy.ScaleShiftPolicy == Policy::WEIGHTS_TILING) {
 570             auto tileBlob = [](Blob::Ptr &blob, size_t TileTo){
 571                 auto weightsElements = blob->size();
 572                 auto weightsBytes = blob->byteSize();
 573                 if (weightsElements == 0) {
 574                     THROW_IE_EXCEPTION << "Blob size is 0";
 575                 }
 576                 if (TileTo % weightsElements) {
 577                     return false;
 578                 }
 579
 580                 auto tiledBlob = make_plain_blob(blob->getTensorDesc().getPrecision(), {TileTo});
 581                 tiledBlob->allocate();
 582
 583
 584                 for (int i=0; i != TileTo / weightsElements; i++) {
 585                     ie_memcpy(tiledBlob->buffer().as<uint8_t*>() + i * weightsBytes, weightsBytes, blob->cbuffer(), weightsBytes);
 586                 }
 587                 blob = tiledBlob;
 588                 return true;
 589             };
 590
 591             if (!tileBlob(scaleShift->_weights, nElements)) {
 592                 THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
 593             }
 594             if (scaleShift->_biases) {
 595                 if (!tileBlob(scaleShift->_biases, nElements)) {
 596                     THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
 597                 }
 598             }
 599
 600             // currently data type no providing reshape method of tensor desc
 601             scaleShift->outData.front()->reshape({batchSize, nElements}, Layout::NC);
 602             insData->reshape({batchSize, nElements}, Layout::NC);
 603         } else {
 604             THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
 605                                 << policy.ScaleShiftPolicy <<  "using layers tiling, layer: " << l->name;
 606         }
 607     }
 608 }