From: Kamil Magierski <kamil.magierski@intel.com>
Date: Tue, 6 Oct 2020 07:59:03 +0000 (+0200)
Subject: [GNA] Fix LSTM Cell channel C being 0 on output (#1174)
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8abdc326768738ed118a5c547de8eef6d080756e;p=platform%2Fupstream%2Fdldt.git

[GNA] Fix LSTM Cell channel C being 0 on output (#1174)

* [GNA] get output before activation test

[GNA] SubstituteScaleShiftBroadCastPass fix for cases when there are multiple scaleshifts as an output from the layer

[GNA] Generalize Fix where LSTMCell output was zero due to being fused into activation

[GNA] Fix LSTMCell being zero on channel C if being output layer

* linux build fix
---

diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index 65bdde3..c36b47e 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -383,6 +383,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
             passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
         }
         passes->registerPass<InsertIdentityLayerPass>();
+        passes->registerPass<BreakFusingOfOutputLayersPass>();
         passes->registerPass<InsertCopyLayerPass>();
         passes->registerPass<InsertDiagonalLayerPass>();
         passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index 867e5f3..a0ec835 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -1158,6 +1158,7 @@ void EltwiseSplitOverChannelsPass::run() {
 }
 
 void SubstituteScaleShiftBroadCastPass::run() {
+    std::map<std::string, InferenceEngine::SizeVector> reshaped_data;
     for (auto & l : *pLayers) {
         LayerInfo layerInfo(l);
 
@@ -1172,12 +1173,20 @@ void SubstituteScaleShiftBroadCastPass::run() {
             THROW_GNA_EXCEPTION << "Cannot get inputs data for layer: " << l->name;
         }
 
-        if (insData->getDims().size() <= 2) {
+        bool was_reshaped = reshaped_data.count(insData->getName()) != 0;
+        InferenceEngine::SizeVector dataDims;
+        if (was_reshaped) {
+            dataDims = reshaped_data[insData->getName()];
+        } else {
+            dataDims = insData->getDims();
+        }
+
+        if (dataDims.size() <= 2) {
             // NC or C cannot do broadcast
             continue;
         }
-        auto batchSize = insData->getDims()[0];
-        auto nElements = product(begin(insData->getDims()), end(insData->getDims())) / batchSize;
+        auto batchSize = dataDims[0];
+        auto nElements = product(begin(dataDims), end(dataDims)) / batchSize;
         auto weightsElements = scaleShift->_weights->size();
         auto weightsBytes = scaleShift->_weights->byteSize();
 
@@ -1186,12 +1195,12 @@ void SubstituteScaleShiftBroadCastPass::run() {
         }
 
         // only 3d scaleshift supported where number of c is arbitrary
-        auto lastD = insData->getDims()[insData->getDims().size() - 1];
+        auto lastD = dataDims[dataDims.size() - 1];
         if (lastD != weightsElements) {
             THROW_GNA_EXCEPTION << "Unsupported layer: " << l->name
                                 << " should have last dim(" << lastD << ") equal to weights(" << weightsElements << ") length";
         }
-        if (insData->getDims().size() == 2) {
+        if (dataDims.size() == 2) {
             THROW_GNA_EXCEPTION << "For layer: " << l->name
                                 << " weights size(" << weightsElements<< ") invalid: should match input size of(" << lastD << ")";
         }
@@ -1212,7 +1221,10 @@ void SubstituteScaleShiftBroadCastPass::run() {
 
             // currently data type no providing reshape method of tensor desc
             scaleShift->outData.front()->reshape({batchSize, nElements}, Layout::NC);
-            insData->reshape({batchSize, nElements}, Layout::NC);
+            if (!was_reshaped) {
+                reshaped_data[insData->getName()] = insData->getDims();
+                insData->reshape({batchSize, nElements}, Layout::NC);
+            }
         } else {
             THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
                                 << getPassManager()->getPolicy().ScaleShiftPolicy <<  "using layers tiling, layer: " << l->name;
@@ -1307,6 +1319,46 @@ void InsertIdentityToLSTMCellPass::run() {
     }
 }
 
+void BreakFusingOfOutputLayersPass::run() {
+#if GNA_LIB_VER == 1
+    return;
+#endif
+    OutputsDataMap outputsMap;
+    this->getPassManager()->getNetwork()->getOutputsInfo(outputsMap);
+    for (auto layer : *pLayers) {
+        for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
+            auto& output = layer->outData[output_idx];
+            auto& input_to = getInputTo(output);
+
+            auto output_name = output->getName();
+            auto is_network_output = outputsMap.find(output_name) != outputsMap.end();
+            // In cases that this layer is network output you cannot use identity as sole output on
+            // it since it will possibly be fused and layer outputs will be unavailable
+            if (is_network_output) {
+                if (input_to.size() != 1) continue;
+                if (!LayerInfo(input_to.begin()->second).isActivation()) continue;
+
+                CNNLayerPtr additional_output =
+                    std::make_shared<GenericLayer>(LayerParams({output_name + "_side_identity", "identity", InferenceEngine::Precision::FP32}));
+
+                auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+                auto additional_output_quant = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(additional_output) : additional_output;
+
+                additional_output_quant->insData.resize(1);
+                additional_output_quant->outData.resize(1);
+
+                auto out_data = DataPtr(new Data(output_name + "_side_identity_data", output->getTensorDesc()));
+                getCreatorLayer(out_data) = additional_output_quant;
+
+                additional_output_quant->outData[0] = out_data;
+
+                input_to[additional_output_quant->name] = additional_output_quant;
+                additional_output_quant->insData[0] = output;
+            }
+        }
+    }
+}
+
 void UnrollLSTMCellPass::run() {
     InferenceEngine::NetPass::UnrollRNN_if(*getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
         if (rnn.clip != 0.0f)
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
index 9faa4d2..7e2957a 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
@@ -145,6 +145,12 @@ DECL_PASS(InsertConcatAligningFilter);
 DECL_PASS(ReorderConcatInputs);
 
 /**
+* @brief in cases that network output layer is connected to only one layer which is activation additional identity is inserted
+* so the operation is not fused with the activation allowing to get te results from said layer
+*/
+DECL_PASS(BreakFusingOfOutputLayers);
+
+/**
  * @brief insert identity at the output of LSTMCell which fixes cases where data is not propagated correctly through network
  * and LSTMCell returns all zeroes
  */
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
new file mode 100644
index 0000000..b6fa972
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#include <subgraph_tests/get_output_before_activation.hpp>
+#include "common_test_utils/test_constants.hpp"
+
+namespace SubgraphTestsDefinitions {
+namespace {
+    std::vector<size_t> input_sizes = {
+        80,
+        32,
+        64,
+        100
+    };
+
+    std::vector<midOutputType> midLayerTypes {
+        midOutputType::Mul,
+        midOutputType::Sub,
+        midOutputType::Sum
+    };
+
+    std::map<std::string, std::string> additional_config = {};
+} // namespace
+
+INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
+    ::testing::Combine(
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(InferenceEngine::Precision::FP32),
+        ::testing::ValuesIn(input_sizes),
+        ::testing::ValuesIn(midLayerTypes),
+        ::testing::Values(additional_config)),
+    OutputBeforeActivation::getTestCaseName);
+} // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
new file mode 100644
index 0000000..9799b92
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#include <subgraph_tests/get_output_before_activation.hpp>
+#include "common_test_utils/test_constants.hpp"
+
+namespace SubgraphTestsDefinitions {
+namespace {
+std::vector<size_t> input_sizes = {
+    80,
+    32,
+    64,
+    100
+};
+
+std::vector<midOutputType> midLayerTypes {
+    midOutputType::Mul,
+    midOutputType::Sub,
+    midOutputType::Sum
+};
+
+std::map<std::string, std::string> additional_config = {
+    {"GNA_COMPACT_MODE", "NO"},
+    {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+    {"GNA_SCALE_FACTOR_0", "1638.4"},
+    {"GNA_SCALE_FACTOR_1", "1638.4"},
+};
+} // namespace
+
+INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
+    ::testing::Combine(
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::Values(InferenceEngine::Precision::FP32),
+        ::testing::ValuesIn(input_sizes),
+        ::testing::ValuesIn(midLayerTypes),
+        ::testing::Values(additional_config)),
+    OutputBeforeActivation::getTestCaseName);
+} // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
new file mode 100644
index 0000000..d6be085
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#include <subgraph_tests/get_output_before_activation.hpp>
+#include "common_test_utils/test_constants.hpp"
+
+namespace SubgraphTestsDefinitions {
+namespace {
+    std::vector<size_t> input_sizes = {
+        80,
+        32,
+        64,
+        100
+    };
+
+    std::vector<midOutputType> midLayerTypes {
+        midOutputType::Mul,
+        midOutputType::Sub,
+        midOutputType::Sum
+    };
+
+    std::map<std::string, std::string> additional_config = {};
+} // namespace
+
+INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
+    ::testing::Combine(
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
+        ::testing::Values(InferenceEngine::Precision::FP32),
+        ::testing::ValuesIn(input_sizes),
+        ::testing::ValuesIn(midLayerTypes),
+        ::testing::Values(additional_config)),
+    OutputBeforeActivation::getTestCaseName);
+} // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/get_output_before_activation.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/get_output_before_activation.hpp
new file mode 100644
index 0000000..ed5c5d4
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/get_output_before_activation.hpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#pragma once
+
+#include "common_test_utils/test_common.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include <ie_core.hpp>
+
+namespace SubgraphTestsDefinitions {
+enum class midOutputType {
+    Sum,
+    Sub,
+    Mul,
+};
+
+typedef std::tuple<
+    std::string,                        // Target device name
+    InferenceEngine::Precision,         // Network precision
+    size_t,                             // Input size
+    midOutputType,                      // Type of layer that will be an output
+    std::map<std::string, std::string>  // Configuration
+> outputBeforeActivationParams;
+
+std::ostream& operator<< (std::ostream& os, const midOutputType& oType);
+
+class OutputBeforeActivation : public LayerTestsUtils::LayerTestsCommon,
+    public testing::WithParamInterface<outputBeforeActivationParams> {
+protected:
+    void SetUp() override;
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<outputBeforeActivationParams> &obj);
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
+};
+} // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/get_output_before_activation.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/get_output_before_activation.cpp
new file mode 100644
index 0000000..3f7207a
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/get_output_before_activation.cpp
@@ -0,0 +1,96 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#include <ie_core.hpp>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "subgraph_tests/get_output_before_activation.hpp"
+
+namespace SubgraphTestsDefinitions {
+std::ostream& operator<<(std::ostream& os, const midOutputType& oType) {
+    switch (oType) {
+    case midOutputType::Sub:
+        return (os << "Sub");
+    case midOutputType::Sum:
+        return (os << "Sum");
+    case midOutputType::Mul:
+        return (os << "Mul");
+    default:
+        return (os << "Unknown");
+    }
+}
+
+std::string OutputBeforeActivation::getTestCaseName(const testing::TestParamInfo<outputBeforeActivationParams>& obj) {
+    std::string targetDevice;
+    InferenceEngine::Precision netPrecision;
+    size_t inputSize;
+    midOutputType outputType;
+    std::map<std::string, std::string> config;
+    std::tie(targetDevice, netPrecision, inputSize, outputType, config) = obj.param;
+    std::ostringstream result;
+
+    result << "netPrecision=" << netPrecision.name() << "_";
+    result << "IS=" << inputSize << "_";
+    result << "OutputType=" << outputType << "_";
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void OutputBeforeActivation::SetUp() {
+    InferenceEngine::Precision netPrecision;
+    std::map<std::string, std::string> config;
+    size_t inputSize;
+    midOutputType outputType;
+    std::tie(targetDevice, netPrecision, inputSize, outputType, config) = this->GetParam();
+    configuration.insert(config.begin(), config.end());
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+    std::vector<size_t> input_dims { 1, inputSize };
+
+    auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims, input_dims});
+    auto input0 = input_parameter[0];
+    auto input1 = input_parameter[1];
+
+    ngraph::OutputVector outputs;
+    std::shared_ptr<ngraph::Node> midLayer;
+    switch (outputType) {
+    case SubgraphTestsDefinitions::midOutputType::Sum: {
+        midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::ADD);
+        break;
+    }
+    case SubgraphTestsDefinitions::midOutputType::Sub: {
+        midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::SUBTRACT);
+        break;
+    }
+    case SubgraphTestsDefinitions::midOutputType::Mul: {
+        midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::MULTIPLY);
+        break;
+    }
+    default:
+        GTEST_FAIL() << "Unknown midOutputType";
+    }
+
+    auto act = ngraph::builder::makeActivation(midLayer, ngPrc, ngraph::helpers::ActivationTypes::Tanh);
+    outputs.insert(outputs.end(), {midLayer, act});
+    function = std::make_shared<ngraph::Function>(outputs, input_parameter, "output_before_activation");
+}
+
+InferenceEngine::Blob::Ptr OutputBeforeActivation::GenerateInput(const InferenceEngine::InputInfo &info) const {
+    return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), 2, -1, 100);
+}
+
+TEST_P(OutputBeforeActivation, CompareWithRefs) {
+    Run();
+};
+} // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests_deprecated/functional/gna/shared_tests_instance/lstm/lstm_cell_test.cpp b/inference-engine/tests_deprecated/functional/gna/shared_tests_instance/lstm/lstm_cell_test.cpp
index 6a530ca..f91eab6 100644
--- a/inference-engine/tests_deprecated/functional/gna/shared_tests_instance/lstm/lstm_cell_test.cpp
+++ b/inference-engine/tests_deprecated/functional/gna/shared_tests_instance/lstm/lstm_cell_test.cpp
@@ -16,7 +16,6 @@ TEST_P(LSTMCellTestBase, GNA_sw_fp32_single_lstm_test) {
 }
 
 TEST_P(LSTMCellTestBase, GNA_I16_single_lstm_test) {
-    DISABLE_TEST_ON_GNA2
     runSingleLSTMTest( {
         {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
         {"GNA_COMPACT_MODE", "NO"},
@@ -28,7 +27,6 @@ TEST_P(LSTMCellTestBase, GNA_I16_single_lstm_test) {
 }
 
 TEST_P(LSTMCellTestBase, GNA_I8_single_lstm_test) {
-    DISABLE_TEST_ON_GNA2
     runSingleLSTMTest({
         {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
         {"GNA_COMPACT_MODE", "NO"},