1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "gna_plugin_policy.hpp"
12 #include <unordered_set>
14 #include <quantization/quantized_layer_params.hpp>
15 #include "gna_plugin.hpp"
16 #include "gna_layer_info.hpp"
19 using namespace InferenceEngine;
20 using namespace InferenceEngine::details;
21 using namespace GNAPluginNS;
23 void GNAPlugin::insertDiagonalLayer(std::vector<CNNLayerPtr> & layers) {
24 int numOfDiagLayers = 0;
25 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
26 for (auto & l : layers) {
27 if (l->insData.empty()) continue;
28 auto prevLayer = CNNNetPrevLayer(l);
29 if (LayerInfo(l).isActivation()) {
30 if (LayerInfo(prevLayer).has32BOutput())
33 auto eltwise = dynamic_cast<InferenceEngine::EltwiseLayer *>(l.get());
37 // in case of eltwise sum one of input would be 4 bytes one - 2
38 // in case of eltwise mull one of input would be 2 bytes one - 2
39 // for e sum if we have 4-4 inputs we will handle that by inserting identity activation
40 // for e sum if we have 4-2 - OK
41 // for e sum if we have 2-2 inputs we need to insert diagonal -- handling here
42 // for e mul if we have 2-2 - OK
43 // for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights
44 // for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights
46 if (eltwise->_operation != EltwiseLayer::Sum)
49 auto prevLayer1 = CNNNetPrevLayer(l, 1);
50 if (!LayerInfo(prevLayer).has16BOutput() || !LayerInfo(prevLayer1).has16BOutput())
55 std::cout << "Inserted Diagonal Layer between: " << prevLayer->name << " and " << l->name << "\n" << std::flush;
58 auto diagName = std::string("SyntheticScaleShift_") + std::to_string(numOfDiagLayers++);
59 auto diagLayer = std::make_shared<ScaleShiftLayer>(LayerParams({diagName, "ScaleShift", Precision::FP32}));
61 // TODO: diagonal size
62 std::vector<float> arrayOf1(l->outData[0]->dims[0], 1.f);
63 diagLayer->_weights = make_shared_blob<float>(l->outData[0]->precision, Layout::C, arrayOf1);
64 auto newDims = l->outData[0]->dims;
65 auto dataPtr = std::make_shared<Data>(diagName,
67 l->outData[0]->precision,
68 l->outData[0]->layout);
70 auto diagonalWithQuant = quantized ?
71 InferenceEngine::injectData<QuantizedLayerParams>(diagLayer) :
74 dataPtr->creatorLayer = diagonalWithQuant;
75 diagonalWithQuant->outData.push_back(dataPtr);
76 CNNNetworkInsertLayer(prevLayer, l, diagonalWithQuant);
80 void GNAPlugin::reorderMaxPool(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
81 // detecting following pattern
82 // conv->relu->maxpooling
83 // changing it to conv->mxpooling->relu
84 for (auto & l : layers) {
85 auto pool = LayerInfo(l);
86 if (!pool.isMaxPooling()) continue;
88 // checking prev layer type
89 auto activation = LayerInfo(CNNNetPrevLayer(l));
90 if (!activation.isActivation()) continue;
92 // if activation came from convolution
93 auto convolution = LayerInfo(CNNNetPrevLayer(static_cast<InferenceEngine::CNNLayer*>(activation)));
94 if (!convolution.isConvolution()) continue;
96 gnalog() << "MaxPooling: " << pool << ", reordered with activation: " << activation << "\n";
98 CNNNetSwapLayers(activation, pool);
102 std::vector<CNNLayerPtr> GNAPlugin::getCandidatesForIdentityInsertion(const CNNLayerPtr l) {
103 std::vector<CNNLayerPtr> prevLayers;
105 // skipping memory inputs and true inputs layers
106 if (l->insData.empty()) return {};
108 auto eltwise = dynamic_cast<InferenceEngine::EltwiseLayer *>(l.get());
109 auto concat = dynamic_cast<InferenceEngine::ConcatLayer *>(l.get());
112 if (eltwise != nullptr) {
113 // eltwise layer has 2 inputs, so depends on situation identity should or should not be inserted
115 // for sum if we have 4-4 inputs we will handle that by inserting identity activation case (1)
116 // for sum if we have 4-2 - OK
117 // for sum if we have 2-2 inputs we need to insert diagonal
119 // for mul if we have 2-2 - OK
120 // for mul if we have 2-4 - inputs we need to insert identity activation to make 2 bytes input
121 // for mul if we have 4-4 - inputs we need to insert 2 identities activations to put 2 bytes input and weights
122 auto prev0 = CNNNetPrevLayer(l, 0);
123 auto prev1 = CNNNetPrevLayer(l, 1);
124 switch (eltwise->_operation) {
125 case EltwiseLayer::Sum:
126 if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
129 // TODO: wether there - are possibility to select what layer to quantize
130 prevLayers.push_back(prev0);
132 case EltwiseLayer::Prod:
133 if (LayerInfo(prev0).has16BOutput() && LayerInfo(prev1).has16BOutput()) {
137 if (LayerInfo(prev0).has32BOutput()) {
138 prevLayers.push_back(prev0);
141 if (LayerInfo(prev1).has32BOutput()) {
142 prevLayers.push_back(prev1);
147 THROW_GNA_EXCEPTION << "Eltwise Layer of type: " << eltwise->_operation << " not supported";
149 } else if (concat != nullptr) {
150 for (int i = 0; CNNNetHasPrevLayer(l.get(), i); ++i) {
151 auto prev = CNNNetPrevLayer(l, i);
152 if (LayerInfo(prev).has32BOutput()) {
153 prevLayers.push_back(prev);
156 } else { // not eltwise or concat
157 // other layers has 1 inputs - situation is easier
158 // ex. activation or pooling - no need to insert identity activation.
159 if (LayerInfo(l).has32BInput())
162 auto prevLayer = CNNNetPrevLayer(l);
163 if (!LayerInfo(prevLayer).has32BOutput())
166 prevLayers.push_back(prevLayer);
171 void GNAPlugin::substitutePRelu(std::vector<InferenceEngine::CNNLayerPtr> &layers) {
172 auto getScale = [](CNNLayer* layer) {
173 auto powerCandidate = LayerInfo(layer);
174 if (!powerCandidate.isPower()) return 0.0f;
175 auto power = powerCandidate.as<PowerLayer*>();
177 return power->power == 1 && power->offset == 0.0f ? power->scale : 0.0f;
180 auto isScale = [getScale](CNNLayer* layer) {
181 return getScale(layer) != 0.0f;
184 auto isNegate = [getScale](CNNLayer* layer) {
185 return getScale(layer) == -1.0f;
188 auto getNext = [](CNNLayer* layer) {
189 CNNLayer* next = nullptr;
190 if (layer == nullptr) return next;
191 if (layer->outData.size() != 1) return next;
192 return layer->outData[0]->inputTo.begin()->second.get();
195 // TODO: unit tests for bad cases
196 for (auto & l : layers) {
197 // assume l is starting layer, that is followed by eltwise_sum(relu, negate/relu/scale/negate)
198 if (l->outData.size() != 1) continue;
199 auto &outputLayers = l->outData[0]->inputTo;
200 if (outputLayers.size() != 2) continue;
202 // one of followed layers need to be generic relu
203 auto first = LayerInfo(outputLayers.begin()->second);
204 auto second = LayerInfo((++outputLayers.begin())->second);
206 auto relu1 = outputLayers.begin()->second;
207 auto neg1 = (++outputLayers.begin())->second;
208 if (second.isRelu()) {
209 std::swap(first, second);
210 std::swap(relu1, neg1);
212 if (!first.isRelu()) continue;
213 // now we have relu as first layer, lets check second
215 if (!isNegate(neg1.get())) continue;
218 auto relu2 = getNext(second);
219 if (!LayerInfo(relu2).isRelu()) continue;
222 auto scale = getNext(relu2);
223 if (!isScale(scale)) continue;
226 auto negate = getNext(scale);
227 if (!isNegate(negate)) continue;
230 auto sum = getNext(negate);
231 if (!LayerInfo(sum).isEltwiseSum()) continue;
232 if (sum->insData.size() != 2) continue;
234 auto s1 = sum->insData[0].lock()->creatorLayer.lock().get();
235 auto s2 = sum->insData[1].lock()->creatorLayer.lock().get();
237 if (s1 != static_cast<InferenceEngine::CNNLayer *>(first) &&
238 s2 != static_cast<InferenceEngine::CNNLayer *>(first)) {
242 // hurray we found parametric relu group - dont know what to do with it though
243 gnalog() << "PRelu with negative slope of " << -LayerInfo(scale).as<PowerLayer*>()->scale << " found" << std::endl;
245 // removing all layers references except of relu layer
246 outputLayers.clear();
247 outputLayers[relu1->name] = relu1;
248 // pointing relu to output of eltwise_summ
249 relu1->outData = sum->outData;
250 // changing creator layer
251 relu1->outData[0]->creatorLayer = relu1;
252 // pointing back to relu if any
253 if (!relu1->outData[0]->inputTo.empty()) {
254 auto summOutputLayer = relu1->outData[0]->inputTo.begin()->second;
255 summOutputLayer->insData.clear();
256 summOutputLayer->insData.push_back(relu1->outData[0]);
259 // changing negative slope
260 first.as<ReLULayer*>()->negative_slope = LayerInfo(scale).as<PowerLayer*>()->scale;
264 void GNAPlugin::reversePermutations(std::vector<CNNLayerPtr> &layers) {
265 std::function<CNNLayerPtr(CNNLayerPtr, std::function<bool(CNNLayerPtr)>)> prevLayerSkipCertain
266 = [&prevLayerSkipCertain](CNNLayerPtr layer, std::function<bool(CNNLayerPtr)> shouldSkip) -> CNNLayerPtr {
267 if (CNNNetHasPrevLayer(layer.get())) {
270 auto prev = CNNNetPrevLayer(layer);
272 if (!shouldSkip(prev)) return prevLayerSkipCertain(prev, shouldSkip);
277 auto prevLayerSkipReshape = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
278 return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
279 return LayerInfo(l2).isReshape();
284 std::function<CNNLayerPtr(CNNLayerPtr)> nextLayerSkipReshape = [&nextLayerSkipReshape](CNNLayerPtr layer) -> CNNLayerPtr {
285 if (layer->outData.empty()) {
288 if (layer->outData.front()->inputTo.size() != 1) {
291 auto next = layer->outData.front()->inputTo.begin()->second;
293 if (LayerInfo(next).isReshape()) return nextLayerSkipReshape(next);
298 auto prevConv = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
299 return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
301 LayerInfo(l2).isReshape() ||
302 LayerInfo(l2).isPooling() ||
303 LayerInfo(l2).isActivation();
307 std::unordered_set<std::string> affineWithPermutedWeights;
308 std::list<CNNLayerPtr> permutationstoRemove;
310 for (auto & l : layers) {
311 if (!LayerInfo(l).isPermute()) {
315 auto layerOrder = l->GetParamAsInts("order");
317 if (layerOrder != std::vector<int>({0, 3, 2, 1})) {
318 THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << ", order: was " << l->GetParamAsString("order") <<
319 ", but support order is 0,3,2,1";
322 // search for it's input convolution
323 auto prev = prevConv(l);
325 // pooling no used in speech models without convolution
327 THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid input to that layer";
330 // we can remove that permutation if it is input to ScaleShift or FC layer
331 auto next = nextLayerSkipReshape(l);
332 if (!next || !LayerInfo(next).isFullyConnected()) {
333 THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid output of that layer";
336 permutationstoRemove.push_back(l);
338 // removing that permutation layer and saving information about affine
339 affineWithPermutedWeights.insert(next->name);
342 for (auto && toRemove : permutationstoRemove) {
343 CNNNetworkRemoveLayer(toRemove);
346 // search for conv->affine sequences
347 for (auto & l : layers) {
348 if (!LayerInfo(l).isFullyConnected() || 0 != affineWithPermutedWeights.count(l->name)) {
351 // found an affine layer that not involved in permutations removing
352 // searching whether it has direct input from convolution
353 auto prevConvLayer = prevConv(l);
354 if (!prevConvLayer) continue;
356 auto directPrev = CNNNetPrevLayer(l);
358 // TODO : make new permute
359 CNNNetworkInsertLayer(l, directPrev, CNNLayerPtr(nullptr));
363 void GNAPlugin::insertIdentityLayer(std::vector<CNNLayerPtr> &layers) {
364 int numOfIdentityLayers = 0;
365 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
366 for (auto & l : layers) {
367 for (auto && prev : getCandidatesForIdentityInsertion(l)) {
369 auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers++);
371 gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << l->name << "\n" << std::flush;
373 CNNLayerPtr activationLayer =
374 std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
375 auto inputData = l->insData[0].lock();
376 auto newDims = inputData->dims;
377 std::reverse(begin(newDims), end(newDims));
379 auto dataPtr = std::make_shared<Data>("FullyConnected",
380 TensorDesc(inputData->precision,
383 auto activationLayerWithQuant = quantized ?
384 InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
386 dataPtr->creatorLayer = activationLayerWithQuant;
387 activationLayerWithQuant->outData.push_back(dataPtr);
388 // wether 1 identity or all outputs TODO possible grouping here, need to implement special groupped inserter
390 for (auto && nextData : prev->outData) {
391 for (auto && nextLayer : nextData->inputTo) {
392 if (nextLayer.second.get() == l.get())
394 if (getCandidatesForIdentityInsertion(nextLayer.second).empty()) {
400 CNNNetworkInsertLayer(prev, notAll ? l : CNNLayerPtr(nullptr), activationLayerWithQuant);
405 void GNAPlugin::insertCopyLayer(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
406 int numCopyLayers = 0;
407 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
408 for (auto & l : layers) {
409 if (l->insData.empty()) continue;
410 auto prevLayer = CNNNetPrevLayer(l);
411 if ((LayerInfo(l).isMemory() && LayerInfo(prevLayer).isConcat()) ||
412 (LayerInfo(l).isConcat() && LayerInfo(prevLayer).isCrop())) {
413 if (LayerInfo(prevLayer).isCrop()) {
414 auto cropLayer = dynamic_cast<InferenceEngine::CropLayer *> (prevLayer.get());
415 size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
416 if (ALIGN(cropOffset, 8) != cropOffset) {
417 // The crop will be replced by affine.
418 // Copy layer insertion is not required
422 std::string copyName = std::string("copy_") + std::to_string(numCopyLayers++);
423 gnalog() << "Inserted "<< copyName << " between: " << l->name << " and " << prevLayer->name << "\n" << std::flush;
425 CNNLayerPtr copyLayer =
426 std::make_shared<GenericLayer>(LayerParams({copyName, "Copy", Precision::FP32}));
428 auto inputData = l->insData[0].lock();
429 auto newDims = inputData->dims;
431 std::reverse(begin(newDims), end(newDims));
433 auto dataPtr = std::make_shared<Data>(copyName,
434 TensorDesc(inputData->precision,
438 auto copyWithQuant = quantized ?
439 InferenceEngine::injectData<QuantizedLayerParams>(copyLayer) :
441 dataPtr->creatorLayer = copyWithQuant;
442 copyWithQuant->outData.push_back(dataPtr);
443 CNNNetworkInsertLayer(prevLayer, l, copyWithQuant);
448 void GNAPlugin::insertAligningFilterLayer(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
449 // currently split layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull for testing
450 const int bytesPerSplitElement = 2;
451 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
453 int numOfFilterLayers = 0;
454 for (auto &l : layers) {
455 auto info = LayerInfo(l);
456 if (!info.isSplit() && !info.isSlice()) {
460 size_t currentOffset = 0;
461 int splitOutIndex = 0;
462 for (auto &&splitOutput : l->outData) {
463 auto outputSize = product(++begin(splitOutput->getDims()), end(splitOutput->getDims()));
465 if (currentOffset != ALIGN64(currentOffset)) {
466 // this split output not beginning from 64 bytes aligned boundary - need to correct by alligning filter layer
468 // getting list of layers attached to current split output
469 gnalog() << "Inserted Affine Filter Layer between: " << l->name << " and : \n";
470 for (auto &&followingLayers : splitOutput->getInputTo()) {
471 gnalog() << " " << followingLayers.second->name << "\n";
473 gnalog() << std::flush;
476 auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++);
478 std::make_shared<WeightableLayer>(LayerParams({filterName, "AffineFilter", Precision::FP32}));
481 auto inputData = splitOutput;
482 auto newDims = splitOutput->dims;
484 size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
485 size_t newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset)
486 / bytesPerSplitElement;
488 // encodes offset to beginning of split layer input
489 filterLayer->params["offset"] = std::to_string(aligned64_offset);
491 auto &num_rows_out = splitOutput->dims[0];
493 std::vector<float> filterWeights(newOutputSize * num_rows_out, 0.f);
495 auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement;
497 for (int i = 0; i != outputSize; i++) {
498 filterWeights[offset] = 1.0f;
499 offset += newOutputSize + 1;
502 filterLayer->_weights = make_shared_blob<float>(inputData->precision, Layout::C, filterWeights);
504 std::reverse(begin(newDims), end(newDims));
506 auto outData = std::make_shared<Data>(filterName,
507 TensorDesc(splitOutput->precision,
511 auto filterWithQuant = quantized ?
512 InferenceEngine::injectData<QuantizedLayerParams>(filterLayer) :
514 outData->creatorLayer = filterWithQuant;
515 filterWithQuant->outData.push_back(outData);
516 CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex);
520 // search data that starts from unaligned location
521 currentOffset += outputSize * bytesPerSplitElement;
527 void GNAPlugin::substituteScaleShiftBroadCast(std::vector<InferenceEngine::CNNLayerPtr> &layers) {
528 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
529 for (auto & l : layers) {
530 LayerInfo layerInfo(l);
532 if (!layerInfo.isScaleShift()) {
536 auto scaleShift = layerInfo.as<ScaleShiftLayer*>();
538 auto insData = scaleShift->insData.front().lock();
540 THROW_GNA_EXCEPTION << "Cannot get inputs data for layer: " << l->name;
543 if (insData->getDims().size() <= 2) {
544 // NC or C cannot do broadcast
547 auto batchSize = insData->getDims()[0];
548 auto nElements = details::product(insData->getDims()) / batchSize;
549 auto weightsElements = scaleShift->_weights->size();
550 auto weightsBytes = scaleShift->_weights->byteSize();
552 if (nElements == weightsElements) {
556 // only 3d scaleshift supported where number of c is arbitrary
557 auto lastD = insData->getDims()[insData->getDims().size() - 1];
558 if (lastD != weightsElements) {
559 THROW_GNA_EXCEPTION << "Unsupported layer: " << l->name
560 << " should have last dim(" << lastD << ") equal to weights(" << weightsElements << ") length";
562 if (insData->getDims().size() == 2) {
563 THROW_GNA_EXCEPTION << "For layer: " << l->name
564 << " weights size(" << weightsElements<< ") invalid: should match input size of(" << lastD << ")";
567 gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n";
568 // approach 1 - weights tiling
569 if (policy.ScaleShiftPolicy == Policy::WEIGHTS_TILING) {
570 auto tileBlob = [](Blob::Ptr &blob, size_t TileTo){
571 auto weightsElements = blob->size();
572 auto weightsBytes = blob->byteSize();
573 if (weightsElements == 0) {
574 THROW_IE_EXCEPTION << "Blob size is 0";
576 if (TileTo % weightsElements) {
580 auto tiledBlob = make_plain_blob(blob->getTensorDesc().getPrecision(), {TileTo});
581 tiledBlob->allocate();
584 for (int i=0; i != TileTo / weightsElements; i++) {
585 ie_memcpy(tiledBlob->buffer().as<uint8_t*>() + i * weightsBytes, weightsBytes, blob->cbuffer(), weightsBytes);
591 if (!tileBlob(scaleShift->_weights, nElements)) {
592 THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
594 if (scaleShift->_biases) {
595 if (!tileBlob(scaleShift->_biases, nElements)) {
596 THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
600 // currently data type no providing reshape method of tensor desc
601 scaleShift->outData.front()->reshape({batchSize, nElements}, Layout::NC);
602 insData->reshape({batchSize, nElements}, Layout::NC);
604 THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
605 << policy.ScaleShiftPolicy << "using layers tiling, layer: " << l->name;