From 65b00c1dfb82b14988b2df8ad314a95ae433b91c Mon Sep 17 00:00:00 2001
From: Edward Shogulin <edward.shogulin@intel.com>
Date: Sat, 30 May 2020 15:01:28 +0300
Subject: [PATCH] [LPT] FullyConnected transformation fix

---
 .../network_helper.hpp                             |   6 +-
 .../low_precision_transformations/transformer.hpp  |  29 ++--
 .../low_precision_transformations/src/eltwise.cpp  |   2 +-
 .../src/fake_quantize.cpp                          |   2 +-
 .../src/fully_connected.cpp                        | 172 ++++++++++++++-------
 .../src/network_helper.cpp                         |  32 +++-
 .../src/quantization_details.cpp                   |   4 +-
 .../src/transformer.cpp                            | 105 ++++++++-----
 .../src/weightable_layer_transformation.cpp        |  40 +++--
 .../fully_connected_transformation.cpp             |  40 +++++
 .../gemm_transformation.cpp                        |  41 +++++
 .../fully_connected_transformation.cpp             |  37 +++++
 .../gemm_transformation.cpp                        |  37 +++++
 .../fully_connected_transformation.hpp             |  27 ++++
 .../gemm_transformation.hpp                        |  27 ++++
 .../fully_connected_transformation.cpp             | 105 +++++++++++++
 .../gemm_transformation.cpp                        | 100 ++++++++++++
 .../layer_transformation.cpp                       |   2 +-
 18 files changed, 679 insertions(+), 129 deletions(-)
 create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/gemm_transformation.hpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/gemm_transformation.cpp
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp
index 14469d1..069d930 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp
@@ -60,6 +60,8 @@ public:
     // return true if at least one child uses layer on weights
     static bool onWeights(const CNNLayer& layer);
 
+    static bool onConstWeightsPath(const CNNLayer& quantize);
+
     static size_t getIndex(const CNNLayer& layer);
 
     static std::vector<CNNLayerPtr> transformFakeQuantizeToConst(
@@ -193,6 +195,8 @@ public:
         const bool roundValues,
         const Precision precision = Precision::UNSPECIFIED);
 
+    static bool isQuantizedConstWeights(const CNNLayer& quantize);
+
     static int getConstParentBranchID(const CNNLayer& layer);
 
     static Precision getPrecisionParent(const CNNLayer& layer);
@@ -228,7 +232,7 @@ private:
 
         checkConstWithBlobs(blobLayer);
 
-        return blobLayer->blobs.begin()->second;;
+        return blobLayer->blobs.begin()->second;
     }
 
     static void quantizeBlob(const CNNLayer& quantize, Blob::Ptr& targetBlob, bool roundValues);
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/transformer.hpp b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/transformer.hpp
index 381b546..9bc44e3 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/transformer.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/transformer.hpp
@@ -31,41 +31,50 @@ public:
     void setWeightsToConst(const bool weightsToConst);
     void setQuantizedTensorAlignmentOnActivations(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations);
     void setQuantizedTensorAlignmentOnWeights(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights);
-    LowPrecisionTransformations& remove(const std::string& layerName);
-    LowPrecisionTransformations& removeBranchSpecificTransformations(const std::string& layerName);
-    LowPrecisionTransformations& removeTransformations(const std::string& layerName);
-    LowPrecisionTransformations& removeCleanupTransformations(const std::string& layerName);
+    LowPrecisionTransformations& remove(const std::string& layerType);
+    LowPrecisionTransformations& removeBranchSpecificTransformations(const std::string& layerType);
+    LowPrecisionTransformations& removeTransformations(const std::string& layerType);
+    LowPrecisionTransformations& removeCleanupTransformations(const std::string& layerType);
 
     template <class T>
     LowPrecisionTransformations& addBranchSpecific(const LayerTransformation::Params& params, const std::string& layerType) {
-        const auto it = branchSpecificTransformations.find(layerType);
+        std::string type = layerType;
+        std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+        const auto it = branchSpecificTransformations.find(type);
         if (it != branchSpecificTransformations.end()) {
             branchSpecificTransformations.erase(it);
         }
 
-        branchSpecificTransformations.emplace(layerType, std::make_shared<T>(params));
+        branchSpecificTransformations.emplace(type, std::make_shared<T>(params));
         return *this;
     }
 
     template <class T>
     LowPrecisionTransformations& add(const LayerTransformation::Params& params, const std::string& layerType) {
-        const auto it = transformations.find(layerType);
+        std::string type = layerType;
+        std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+        const auto it = transformations.find(type);
         if (it != transformations.end()) {
             transformations.erase(it);
         }
 
-        transformations.emplace(layerType, std::make_shared<T>(params));
+        transformations.emplace(type, std::make_shared<T>(params));
         return *this;
     }
 
     template <class T>
     LowPrecisionTransformations& addCleanup(const LayerTransformation::Params& params, const std::string& layerType) {
-        const auto it = cleanupTransformations.find(layerType);
+        std::string type = layerType;
+        std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+        const auto it = cleanupTransformations.find(type);
         if (it != cleanupTransformations.end()) {
             cleanupTransformations.erase(it);
         }
 
-        cleanupTransformations.emplace(layerType, std::make_shared<T>(params));
+        cleanupTransformations.emplace(type, std::make_shared<T>(params));
         return *this;
     }
 
diff --git a/inference-engine/src/low_precision_transformations/src/eltwise.cpp b/inference-engine/src/low_precision_transformations/src/eltwise.cpp
index 146f4f7..d969fd5 100644
--- a/inference-engine/src/low_precision_transformations/src/eltwise.cpp
+++ b/inference-engine/src/low_precision_transformations/src/eltwise.cpp
@@ -272,7 +272,7 @@ int EltwiseTransformation::getNotEmpty(const CNNLayer& eltwise) {
         return 1;
     }
 
-    const std::vector<std::string> targetTypes = { "Convolution", "GEMM", "FullyConnected" };
+    const std::vector<std::string> targetTypes = { "Convolution", "Gemm", "FullyConnected" };
     const bool allBranchesAreEqual =
         std::all_of(parents.begin(), parents.end(), [&](const CNNLayerPtr& layer) { return isBranchWithTargetType(*layer, targetTypes); }) ||
         std::all_of(parents.begin(), parents.end(), [&](const CNNLayerPtr& layer) { return !isBranchWithTargetType(*layer, targetTypes); });
diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
index 52607fb..91c89e2 100644
--- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
@@ -38,7 +38,7 @@ void FakeQuantizeTransformation::transform(TransformationContext& context, CNNLa
     // CNNNetworkHelper::invertFakeQuantize(layer);
 
     // FakeQuantize on weights are used without dequantization ScaleShifts
-    const bool onWeights = CNNNetworkHelper::onWeights(layer);
+    const bool onWeights = CNNNetworkHelper::onConstWeightsPath(layer) && CNNNetworkHelper::onWeights(layer);
     if (onWeights) {
         return;
     }
diff --git a/inference-engine/src/low_precision_transformations/src/fully_connected.cpp b/inference-engine/src/low_precision_transformations/src/fully_connected.cpp
index b015db3..aa5c72b 100644
--- a/inference-engine/src/low_precision_transformations/src/fully_connected.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fully_connected.cpp
@@ -45,7 +45,8 @@ bool FullyConnectedTransformation::canBeTransformed(const TransformationContext&
     }
 
     const std::vector<size_t> inTensorDims = inputData->getDims();
-    if ((inTensorDims.size() != 2) && (inTensorDims.size() != 3)) {
+    if ((inTensorDims.size() != 2ul) && (inTensorDims.size() != 3ul) &&
+        ((fullyConnected.type == "FullyConnected") || ((fullyConnected.type == "Gemm") && (inTensorDims.size() != 4ul)))) {
         return false;
     }
 
@@ -95,7 +96,7 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
     }
 
     if (!CaselessEq<std::string>()(fullyConnected.type, "FullyConnected") &&
-        !CaselessEq<std::string>()(fullyConnected.type, "GEMM")) {
+        !CaselessEq<std::string>()(fullyConnected.type, "Gemm")) {
         THROW_IE_EXCEPTION << "layer '" << fullyConnected.name << "' is not correct";
     }
 
@@ -110,9 +111,9 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
     }
 
     const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(fullyConnected, 1);
-    const QuantizationDetails originalQuantizationDetails = parentOnWeights != nullptr ?
-        QuantizationDetails::getDetails(*parentOnWeights) :
-        QuantizationDetails();
+    if ((fullyConnected.type == "Gemm") && (parentOnWeights->type != "ScaleShift")) {
+        return;
+    }
 
     if (fullyConnected.outData.size() != 1) {
         THROW_IE_EXCEPTION << "layer outputs '" << fullyConnected.outData.size() << "' is not correct";
@@ -125,8 +126,21 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
     std::vector<float> originalWeightsDequantizationScales;
     std::vector<float> originalWeightsDequantizationShifts;
 
+    const bool weightsOnConstPath = CNNNetworkHelper::isQuantizedConstWeights(fullyConnected);
+    if (!weightsOnConstPath) {
+        if (std::any_of(
+            originalDataDequantizationShifts.begin(),
+            originalDataDequantizationShifts.end(),
+            [](const float value) { return value != 0.f; })) {
+            return;
+        }
+    }
+
     if (parentOnWeights != nullptr) {
         if (parentOnWeights->type == "FakeQuantize") {
+            if (!weightsOnConstPath) {
+                THROW_IE_LPT_EXCEPTION(*parentOnWeights) << "unexpected layer type";
+            }
             fillDequantizationsForWeightsPath(
                 fullyConnected,
                 supportAsymmetricQuantization,
@@ -136,6 +150,22 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
         } else if (parentOnWeights->type == "Const") {
             originalWeightsDequantizationScales.push_back(1.0);
             originalWeightsDequantizationShifts.push_back(0.0);
+        } else if (parentOnWeights->type == "ScaleShift") {
+            if (weightsOnConstPath) {
+                THROW_IE_LPT_EXCEPTION(*parentOnWeights) << "unexpected layer type";
+            }
+
+            fillFromDequantizationLayer(
+                *parentOnWeights,
+                originalWeightsDequantizationScales,
+                originalWeightsDequantizationShifts);
+
+            if (std::any_of(
+                originalWeightsDequantizationShifts.begin(),
+                originalWeightsDequantizationShifts.end(),
+                [](const float value) { return value != 0.f; })) {
+                return;
+            }
         } else {
             THROW_IE_EXCEPTION << "Unexpected dequantization layer type " << parentOnWeights->type;
         }
@@ -172,11 +202,18 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
         Precision weightsLowPrecision;
         if (parentOnWeights->type == "FakeQuantize") {
             weightsOriginalPrecision = parentOnWeights->outData[0]->getTensorDesc().getPrecision();
+            const bool weightsOnConstPath = CNNNetworkHelper::isQuantizedConstWeights(fullyConnected);
+            if (!weightsOnConstPath) {
+                THROW_IE_LPT_EXCEPTION(fullyConnected) << "unexpected layer type " << parentOnWeights->type << " on weights";
+            }
             weightsLowPrecision = getDataPrecision(
                 *parentOnWeights,
                 QuantizationDetails::getDetails(*parentOnWeights),
-                true,
+                weightsOnConstPath,
                 supportAsymmetricQuantization).precision;
+        } else if (parentOnWeights->type == "ScaleShift") {
+            weightsOriginalPrecision = parentOnWeights->outData[0]->getTensorDesc().getPrecision();
+            weightsLowPrecision = CNNNetworkHelper::getPrecisionParent(*parentOnWeights);
         } else {
             THROW_IE_EXCEPTION << "unexpected layer type on weights " << parentOnWeights->type;
         }
@@ -213,7 +250,11 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
         updateLayerBiases(context, fullyConnected, fullyConnected.type == "GEMM", dequantizationScales, dequantizationShifts, biasesShifts);
     }
 
-    if ((parentOnWeights != nullptr) && (parentOnWeights->type == "FakeQuantize")) {
+    if ((weightsOnConstPath) && (parentOnWeights != nullptr) && (parentOnWeights->type == "FakeQuantize")) {
+        const QuantizationDetails originalQuantizationDetails = parentOnWeights != nullptr ?
+            QuantizationDetails::getDetails(*parentOnWeights) :
+            QuantizationDetails();
+
         const DataPrecision dataPrecision = getDataPrecision(
             *parentOnWeights,
             originalQuantizationDetails,
@@ -271,13 +312,16 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric(
     if (inputData == nullptr) {
         THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
     }
+    if (inputData->getDims().size() < 2) {
+        THROW_IE_EXCEPTION << "Unexpected input layout " << inputData->getLayout();
+    }
 
     const DataPtr outputData = fullyConnected.outData[0];
     if (outputData == nullptr) {
         THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent";
     }
 
-    const size_t outputChannelsCount = fullyConnected.outData[0]->getDims()[1];
+    const size_t outputChannelsCount = outputData->getDims()[1];
     dequantizationScales.resize(outputChannelsCount);
     dequantizationShifts.resize(outputChannelsCount);
     biasesShifts.resize(outputChannelsCount);
@@ -290,15 +334,10 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric(
     const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights"));
     const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
 
-    const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
-    const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
-    const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
-    const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob);
-
     const bool dequantizationValuesAreBroadcasted = getDequantizationValuesAreBroadcasted(fullyConnected);
     for (size_t i = 0; i < outputChannelsCount; ++i) {
         dequantizationScales[i] =
-            (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[i]) *
+            prevDequantizationScaleBuffer.get()[0] *
             (originalWeightsDequantizationScales.size() == 0 ?
                 1.0 :
                 (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]));
@@ -308,25 +347,37 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric(
     if (insData == nullptr) {
         THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data ia absent";
     }
-    const size_t inputChannelsCount = insData->getDims().size() == 3ul ? insData->getDims()[2] : insData->getDims()[1];
-    for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
-        float sum = 0.0;
-        const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
-            1.0 :
-            ((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
-
-        for (size_t inputChannel = 0; inputChannel < inputChannelsCount; ++inputChannel) {
-            const float w = weightsBuffer.get()[channel * inputChannelsCount + inputChannel];
-            const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[inputChannel];
-            sum += w * shift * weightsDequantizationScale;
-        }
 
-        dequantizationShifts[channel] = biasesBuffer == nullptr ?
-            sum :
-            (sum + biasesBuffer.get()[channel] -
-                (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[channel]) *
-                biasesBuffer.get()[channel] * weightsDequantizationScale);
-        biasesShifts[channel] = sum;
+    if (CNNNetworkHelper::isQuantizedConstWeights(fullyConnected)) {
+        const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
+        const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
+        const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
+        const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob);
+
+        const size_t inputChannelsCount = insData->getDims().size() == 3ul ? insData->getDims()[2] : insData->getDims()[1];
+        for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
+            float sum = 0.0;
+            const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
+                1.0 :
+                ((originalWeightsDequantizationScales.size() == 1) ?
+                    originalWeightsDequantizationScales[0] :
+                    originalWeightsDequantizationScales[channel]);
+
+            for (size_t inputChannel = 0; inputChannel < inputChannelsCount; ++inputChannel) {
+                const float w = weightsBuffer.get()[channel * inputChannelsCount + inputChannel];
+                const float shift = dequantizationValuesAreBroadcasted ?
+                    prevDequantizationShiftBuffer.get()[0] :
+                    prevDequantizationShiftBuffer.get()[inputChannel];
+                sum += w * shift * weightsDequantizationScale;
+            }
+
+            dequantizationShifts[channel] = biasesBuffer == nullptr ?
+                sum :
+                (sum + biasesBuffer.get()[channel] -
+                    prevDequantizationScaleBuffer.get()[0] *
+                    biasesBuffer.get()[channel] * weightsDequantizationScale);
+            biasesShifts[channel] = sum;
+        }
     }
 }
 
@@ -340,16 +391,17 @@ void FullyConnectedTransformation::calculateDequantizationForAsymmetric(
     if (inputData == nullptr) {
         THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
     }
-    // const Layout inputLayout = inputData->getLayout();
-    // if (inputLayout != Layout::NC) {
-    //     THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout;
-    // }
+    if (inputData->getDims().size() < 2) {
+        THROW_IE_EXCEPTION << "Unexpected input layout " << inputData->getLayout();
+    }
     const size_t inputChannelsCount = inputData->getDims()[1];
 
     const DataPtr outputData = fullyConnected.outData[0];
     if (outputData == nullptr) {
         THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent";
     }
+    const size_t outputChannelsCount = outputData->getDims()[1];
+
 
     CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected);
     if (scaleShift->type != "ScaleShift") {
@@ -358,43 +410,45 @@ void FullyConnectedTransformation::calculateDequantizationForAsymmetric(
 
     const bool dequantizationValuesAreBroadcasted = getDequantizationValuesAreBroadcasted(fullyConnected);
 
-    const size_t outputChannelsCount = outputData->getDims()[1];
     dequantizationScales.resize(outputChannelsCount);
     dequantizationShifts.resize(outputChannelsCount);
 
     const std::shared_ptr<float> prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights"));
     for (size_t i = 0; i < outputChannelsCount; ++i) {
         dequantizationScales[i] =
-            (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[i]) *
+            prevDequantizationScaleBuffer.get()[0] *
             (originalWeightsDequantizationScales.size() == 0 ?
                 1.0 :
                 (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]));
     }
 
-    const auto weightsBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues));
-    const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
-    const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBiases(fullyConnected));
+    if (CNNNetworkHelper::isQuantizedConstWeights(fullyConnected)) {
+        const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
+        const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
+        const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
+        const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBiases(fullyConnected));
 
-    const std::shared_ptr<float> prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
+        const std::shared_ptr<float> prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
 
-    for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
-        float sum1 = 0.0;
-        float sum2 = 0.0;
-        const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
-            1.0 :
-            ((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
+        for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
+            float sum1 = 0.0;
+            float sum2 = 0.0;
+            const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
+                1.0 :
+                ((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
 
-        for (size_t w = 0; w < inputChannelsCount; ++w) {
-            const float kernel = weightsBuffer.get()[channel * inputChannelsCount + w];
-            const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[channel];
-            sum1 += kernel * shift * weightsDequantizationScale;
-            sum2 += kernel * dataZeroPoints[w] * weightsDequantizationScale;
-        }
+            for (size_t w = 0; w < inputChannelsCount; ++w) {
+                const float kernel = weightsBuffer.get()[channel * inputChannelsCount + w];
+                const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[w];
+                sum1 += kernel * shift * weightsDequantizationScale;
+                sum2 += kernel * dataZeroPoints[w] * weightsDequantizationScale;
+            }
 
-        dequantizationShifts[channel] = biasesBuffer == nullptr ?
-            sum1 :
-            (sum1 + biasesBuffer.get()[channel] -
-                (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[channel]) *
-                biasesBuffer.get()[channel] * weightsDequantizationScale);
+            dequantizationShifts[channel] = biasesBuffer == nullptr ?
+                sum1 :
+                (sum1 + biasesBuffer.get()[channel] -
+                    prevDequantizationScaleBuffer.get()[0] *
+                    biasesBuffer.get()[channel] * weightsDequantizationScale);
+        }
     }
 }
diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
index 2950244..d8c0538 100644
--- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp
+++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
@@ -382,7 +382,7 @@ int CNNNetworkHelper::onWeightsInDepth(const CNNLayer& layer) {
     for (const CNNLayerPtr& child : children) {
         if ((CaselessEq<std::string>()(child->type, "Convolution") ||
             CaselessEq<std::string>()(child->type, "FullyConnected") ||
-            CaselessEq<std::string>()(child->type, "GEMM")) &&
+            CaselessEq<std::string>()(child->type, "Gemm")) &&
             (child->insData.size() >= 2lu)) {
             const std::vector<CNNLayerPtr> parents = getParentsRecursivelyExceptTypes(*child, {}, 1);
             for (const CNNLayerPtr& parent : parents) {
@@ -406,6 +406,15 @@ bool CNNNetworkHelper::onWeights(const CNNLayer& layer) {
     return result == 1;
 }
 
+bool CNNNetworkHelper::onConstWeightsPath(const CNNLayer& quantize) {
+    CNNLayerPtr parent = CNNNetworkHelper::getParent(quantize, 0);
+    if (parent == nullptr) {
+        THROW_IE_LPT_EXCEPTION(quantize) << "parent layer is nullable";
+    }
+
+    return parent->type == "Const";
+}
+
 size_t CNNNetworkHelper::getIndex(const CNNLayer& layer) {
     const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
     if (children.size() != 1) {
@@ -1598,6 +1607,27 @@ Blob::Ptr CNNNetworkHelper::quantizeWeights(const CNNLayer& quantize, const bool
     return targetBlob;
 }
 
+bool CNNNetworkHelper::isQuantizedConstWeights(const CNNLayer& layer) {
+    CNNLayerPtr quantize = CNNNetworkHelper::getParent(layer, 1);
+    if (quantize == nullptr) {
+        return false;
+    }
+
+    if (quantize->type == "Const") {
+        return true;
+    }
+
+    if (quantize->type != "FakeQuantize") {
+        return false;
+    }
+
+    if (quantize->insData.size() != 5ul) {
+        THROW_IE_LPT_EXCEPTION(*quantize) << "unexpected inputs size";
+    }
+
+    return onConstWeightsPath(*quantize);
+}
+
 int CNNNetworkHelper::getConstParentBranchID(const CNNLayer& layer) {
     int constBranchID = -1;
     for (int i = 0; i < layer.insData.size(); i++) {
diff --git a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp
index 987f2bc..0d5503e 100644
--- a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp
+++ b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp
@@ -200,7 +200,9 @@ QuantizationDetails QuantizationDetails::getDetails(const CNNLayer& quantize) {
     size_t outputIntervalsCount;
     getOutputIntervals(quantize, outputLowValues, outputHighValues, outputIntervalsCount);
 
-    const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(quantize, CNNNetworkHelper::onWeights(quantize));
+    const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(
+        quantize,
+        CNNNetworkHelper::onWeights(quantize) && CNNNetworkHelper::onConstWeightsPath(quantize));
     if (!outputLayoutIsSupported(quantize)) {
         THROW_IE_LPT_EXCEPTION(quantize) << "Expected output channels count " << outputIntervalsCount << " but found " << outputChannelsCount;
     }
diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp
index c47328d..a4603ae 100644
--- a/inference-engine/src/low_precision_transformations/src/transformer.cpp
+++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp
@@ -103,40 +103,55 @@ void LowPrecisionTransformations::setQuantizedTensorAlignmentOnWeights(
     }
 }
 
-LowPrecisionTransformations& LowPrecisionTransformations::remove(const std::string& layerName) {
-    removeBranchSpecificTransformations(layerName);
-    removeTransformations(layerName);
-    removeCleanupTransformations(layerName);
+LowPrecisionTransformations& LowPrecisionTransformations::remove(const std::string& layerType) {
+    std::string type = layerType;
+    std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+    removeBranchSpecificTransformations(type);
+    removeTransformations(type);
+    removeCleanupTransformations(type);
     return *this;
 }
 
-LowPrecisionTransformations& LowPrecisionTransformations::removeBranchSpecificTransformations(const std::string& layerName) {
-    branchSpecificTransformations.erase(layerName);
+LowPrecisionTransformations& LowPrecisionTransformations::removeBranchSpecificTransformations(const std::string& layerType) {
+    std::string type = layerType;
+    std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+    branchSpecificTransformations.erase(type);
     return *this;
 }
 
-LowPrecisionTransformations& LowPrecisionTransformations::removeTransformations(const std::string& layerName) {
-    transformations.erase(layerName);
+LowPrecisionTransformations& LowPrecisionTransformations::removeTransformations(const std::string& layerType) {
+    std::string type = layerType;
+    std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+    transformations.erase(type);
     return *this;
 }
 
-LowPrecisionTransformations& LowPrecisionTransformations::removeCleanupTransformations(const std::string& layerName) {
-    cleanupTransformations.erase(layerName);
+LowPrecisionTransformations& LowPrecisionTransformations::removeCleanupTransformations(const std::string& layerType) {
+    std::string type = layerType;
+    std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+    cleanupTransformations.erase(type);
     return *this;
 }
 
 LayerTransformationPtr LowPrecisionTransformations::find(const std::string& layerType) const {
-    auto it = branchSpecificTransformations.find(layerType);
+    std::string type = layerType;
+    std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+    auto it = branchSpecificTransformations.find(type);
     if (it != branchSpecificTransformations.end()) {
         return it->second;
     }
 
-    it = transformations.find(layerType);
+    it = transformations.find(type);
     if (it != transformations.end()) {
         return it->second;
     }
 
-    it = cleanupTransformations.find(layerType);
+    it = cleanupTransformations.find(type);
     if (it != cleanupTransformations.end()) {
         return it->second;
     }
@@ -175,28 +190,28 @@ void LowPrecisionTransformations::setLayerTransformationsManager(
 LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const LayerTransformation::Params& params) {
     return LowPrecisionTransformations(
         std::map<std::string, LayerTransformationPtr>({
-            { "Concat", LayerTransformationPtr(new ConcatMultiChannelsTransformation(params))}
+            { "concat", LayerTransformationPtr(new ConcatMultiChannelsTransformation(params))}
         }),
         std::map<std::string, LayerTransformationPtr>({
-            { "Convolution", LayerTransformationPtr(new ConvolutionTransformation(params)) },
-            { "Pooling", LayerTransformationPtr(new PoolingTransformation(params)) },
-            { "FakeQuantize", LayerTransformationPtr(new FakeQuantizeTransformation(params)) },
-            { "Reshape", LayerTransformationPtr(new ReshapeTransformation(params)) },
-            { "FullyConnected", LayerTransformationPtr(new FullyConnectedTransformation(params)) },
-            { "GEMM", LayerTransformationPtr(new FullyConnectedTransformation(params)) },
-            { "Permute", LayerTransformationPtr(new PermuteTransformation(params)) },
-            { "Squeeze", LayerTransformationPtr(new SqueezeTransformation(params)) },
-            { "ReLU", LayerTransformationPtr(new ActivationTransformation(params)) },
-            { "MVN", LayerTransformationPtr(new MvnTransformation(params)) },
-            { "Eltwise", LayerTransformationPtr(new EltwiseTransformation(params)) },
-            { "Resample", LayerTransformationPtr(new ResampleTransformation(params)) },
-            { "Power", LayerTransformationPtr(new PowerTransformation(params)) },
-            { "DepthToSpace", LayerTransformationPtr(new DepthToSpaceTransformation(params)) },
-            { "Normalize", LayerTransformationPtr(new NormalizeTransformation(params)) }
+            { "convolution", LayerTransformationPtr(new ConvolutionTransformation(params)) },
+            { "pooling", LayerTransformationPtr(new PoolingTransformation(params)) },
+            { "fakequantize", LayerTransformationPtr(new FakeQuantizeTransformation(params)) },
+            { "reshape", LayerTransformationPtr(new ReshapeTransformation(params)) },
+            { "fullyconnected", LayerTransformationPtr(new FullyConnectedTransformation(params)) },
+            { "gemm", LayerTransformationPtr(new FullyConnectedTransformation(params)) },
+            { "permute", LayerTransformationPtr(new PermuteTransformation(params)) },
+            { "squeeze", LayerTransformationPtr(new SqueezeTransformation(params)) },
+            { "relu", LayerTransformationPtr(new ActivationTransformation(params)) },
+            { "mvn", LayerTransformationPtr(new MvnTransformation(params)) },
+            { "eltwise", LayerTransformationPtr(new EltwiseTransformation(params)) },
+            { "resample", LayerTransformationPtr(new ResampleTransformation(params)) },
+            { "power", LayerTransformationPtr(new PowerTransformation(params)) },
+            { "depthtospace", LayerTransformationPtr(new DepthToSpaceTransformation(params)) },
+            { "normalize", LayerTransformationPtr(new NormalizeTransformation(params)) }
         }),
         std::map<std::string, LayerTransformationPtr>({
-            { "FakeQuantize", LayerTransformationPtr(new FuseFakeQuantizeAndScaleShiftTransformation(params)) },
-            { "ScaleShift", LayerTransformationPtr(new ScaleShiftToConvolutionTransformation(params)) },
+            { "fakequantize", LayerTransformationPtr(new FuseFakeQuantizeAndScaleShiftTransformation(params)) },
+            { "scaleshift", LayerTransformationPtr(new ScaleShiftToConvolutionTransformation(params)) },
         }));
 }
 
@@ -314,7 +329,9 @@ void LowPrecisionTransformer::transform(ICNNNetwork& network) {
             continue;
         }
 
-        const auto it = transformations.branchSpecificTransformations.find(layer->type);
+        std::string type = layer->type;
+        std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+        const auto it = transformations.branchSpecificTransformations.find(type);
         if (it == transformations.branchSpecificTransformations.end()) {
             continue;
         }
@@ -345,7 +362,10 @@ void LowPrecisionTransformer::transform(ICNNNetwork& network) {
         }
 
         bool transformed;
-        const auto it = transformations.transformations.find(layer->type);
+
+        std::string type = layer->type;
+        std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+        const auto it = transformations.transformations.find(type);
         if (it != transformations.transformations.end()) {
             it->second->transform(context, *layer);
             transformed = true;
@@ -383,7 +403,9 @@ void LowPrecisionTransformer::transform(ICNNNetwork& network) {
             continue;
         }
 
-        const auto it = transformations.cleanupTransformations.find(layer->type);
+        std::string type = layer->type;
+        std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+        const auto it = transformations.cleanupTransformations.find(type);
         if (it != transformations.cleanupTransformations.end()) {
             it->second->transform(context, *layer);
         }
@@ -402,7 +424,10 @@ void LowPrecisionTransformer::transform(ICNNNetwork& network) {
 }
 
 std::vector<Precision> LowPrecisionTransformer::getPrecisionsOnActivations(const std::string& layerType) const noexcept {
-    const LayerTransformationPtr transformation = transformations.find(layerType);
+    std::string type = layerType;
+    std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+    const LayerTransformationPtr transformation = transformations.find(type);
     if (transformation == nullptr) {
         return std::vector<Precision>();
     }
@@ -410,7 +435,10 @@ std::vector<Precision> LowPrecisionTransformer::getPrecisionsOnActivations(const
 }
 
 bool LowPrecisionTransformer::isQuantized(const CNNLayer& layer) const noexcept {
-    const LayerTransformationPtr transformation = transformations.find(layer.type);
+    std::string type = layer.type;
+    std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+    const LayerTransformationPtr transformation = transformations.find(type);
     if (transformation == nullptr) {
         return false;
     }
@@ -418,7 +446,10 @@ bool LowPrecisionTransformer::isQuantized(const CNNLayer& layer) const noexcept
 }
 
 bool LowPrecisionTransformer::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
-    const LayerTransformationPtr transformation = transformations.find(layer.type);
+    std::string type = layer.type;
+    std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+
+    const LayerTransformationPtr transformation = transformations.find(type);
     if (transformation == nullptr) {
         return false;
     }
diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
index ce8a3f3..aa0fcfe 100644
--- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
@@ -96,26 +96,29 @@ bool WeightableLayerTransformation::isQuantized(const CNNLayer& layer) const noe
         return false;
     }
 
-    const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(layer, roundQuantizedValues);
-    if ((weightsBlob == nullptr) || (!CNNNetworkHelper::isBlobPrecisionSupported(weightsBlob->getTensorDesc().getPrecision()))) {
-        return false;
-    }
+    if (CNNNetworkHelper::isQuantizedConstWeights(layer)) {
+        const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(layer, roundQuantizedValues);
+        if ((weightsBlob == nullptr) || (!CNNNetworkHelper::isBlobPrecisionSupported(weightsBlob->getTensorDesc().getPrecision()))) {
+            return false;
+        }
 
-    const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(layer);
-    if ((biasesBlob != nullptr) && (!CNNNetworkHelper::isBlobPrecisionSupported(biasesBlob->getTensorDesc().getPrecision()))) {
-        return false;
-    }
 
-    const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1);
-    if (parentOnWeights == nullptr) {
-        return false;
-    }
+        const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(layer);
+        if ((biasesBlob != nullptr) && (!CNNNetworkHelper::isBlobPrecisionSupported(biasesBlob->getTensorDesc().getPrecision()))) {
+            return false;
+        }
 
-    if (parentOnWeights->type != "FakeQuantize") {
-        const Precision precision = parentOnWeights->outData[0]->getPrecision();
-        if ((precision != Precision::I8) && (precision != Precision::U8)) {
+        const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1);
+        if (parentOnWeights == nullptr) {
             return false;
         }
+
+        if (parentOnWeights->type != "FakeQuantize") {
+            const Precision precision = parentOnWeights->outData[0]->getPrecision();
+            if ((precision != Precision::I8) && (precision != Precision::U8)) {
+                return false;
+            }
+        }
     }
 
     return true;
@@ -289,7 +292,8 @@ void WeightableLayerTransformation::updateToSupportAsymmetricQuantization(
             weightsPrecisionsInfo.low);
         if (!std::all_of(weightsConvertedInBlob.get(), weightsConvertedInBlob.get() + weightsShifts.size(), [](float value) { return value == 0.0; })) {
             const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1ul);
-            createAsymmetric(context, *parentOnWeights, layer, weightsPrecisionsInfo, weightsShifts, true);
+            const bool onWeights = CNNNetworkHelper::isQuantizedConstWeights(layer);
+            createAsymmetric(context, *parentOnWeights, layer, weightsPrecisionsInfo, weightsShifts, onWeights);
         }
     }
 }
@@ -359,7 +363,9 @@ DataPrecision WeightableLayerTransformation::fillDequantizationsForWeightsPath(
     const bool supportAsymmetricQuantization,
     std::vector<float>& dequantizationScales,
     std::vector<float>& dequantizationShifts) const {
-    if ((weightableLayer.type != "Convolution") && (weightableLayer.type != "FullyConnected") && (weightableLayer.type != "GEMM")) {
+    if (CaselessEq<std::string>()(weightableLayer.type, "Convolution") &&
+        CaselessEq<std::string>()(weightableLayer.type, "FullyConnected") &&
+        CaselessEq<std::string>()(weightableLayer.type, "Gemm")) {
         THROW_IE_EXCEPTION << "layer '" << weightableLayer.name << "' has unexpected type '" << weightableLayer.type << "'";
     }
 
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp
new file mode 100644
index 0000000..2544cd6
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/fully_connected_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace InferenceEngine::details;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<InferenceEngine::SizeVector> dimensions = {
+    InferenceEngine::SizeVector({ 1, 16 })
+};
+
+const std::vector<LayerTransformation::Params> trasformationParamValues = {
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams(),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParamsI8I8(),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8()
+};
+
+INSTANTIATE_TEST_CASE_P(LPT, FullyConnectedTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(dimensions),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::ValuesIn(trasformationParamValues)),
+    FullyConnectedTransformation::getTestCaseName);
+}  // namespace
+
+
+
+
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp
new file mode 100644
index 0000000..add9688
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/gemm_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace InferenceEngine::details;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<InferenceEngine::SizeVector> dimensions = {
+    InferenceEngine::SizeVector({ 1, 3, 16, 16 })
+};
+
+const std::vector<LayerTransformation::Params> trasformationParamValues = {
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams().setSupportAsymmetricQuantization(true),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams().setSupportAsymmetricQuantization(false),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParamsI8I8(),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8()
+};
+
+INSTANTIATE_TEST_CASE_P(LPT, GemmTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(dimensions),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::ValuesIn(trasformationParamValues)),
+    GemmTransformation::getTestCaseName);
+}  // namespace
+
+
+
+
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp
new file mode 100644
index 0000000..99f0d80
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/fully_connected_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace InferenceEngine::details;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32
+};
+
+const std::vector<InferenceEngine::SizeVector> dimensions = {
+    InferenceEngine::SizeVector({ 1, 16 })
+};
+
+const std::vector<LayerTransformation::Params> trasformationParamValues = {
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams()
+};
+
+INSTANTIATE_TEST_CASE_P(LPT, FullyConnectedTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(dimensions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
+        ::testing::ValuesIn(trasformationParamValues)),
+    FullyConnectedTransformation::getTestCaseName);
+}  // namespace
+
+
+
+
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp
new file mode 100644
index 0000000..afe6987
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/gemm_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace InferenceEngine::details;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32
+};
+
+const std::vector<InferenceEngine::SizeVector> dimensions = {
+    InferenceEngine::SizeVector({ 1, 3, 16, 16 })
+};
+
+const std::vector<LayerTransformation::Params> trasformationParamValues = {
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams()
+};
+
+INSTANTIATE_TEST_CASE_P(LPT, GemmTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(dimensions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
+        ::testing::ValuesIn(trasformationParamValues)),
+    GemmTransformation::getTestCaseName);
+}  // namespace
+
+
+
+
diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp
new file mode 100644
index 0000000..f0da013
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp"
+
+namespace LayerTestsDefinitions {
+
+class FullyConnectedTransformation :
+    public testing::WithParamInterface<LayerTestsUtils::LayerTransformationParams>,
+    public LayerTestsUtils::LayerTransformation {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<LayerTestsUtils::LayerTransformationParams> obj);
+
+protected:
+    void SetUp() override;
+
+private:
+    void validate();
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/gemm_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/gemm_transformation.hpp
new file mode 100644
index 0000000..37a032c
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/gemm_transformation.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp"
+
+namespace LayerTestsDefinitions {
+
+class GemmTransformation :
+    public testing::WithParamInterface<LayerTestsUtils::LayerTransformationParams>,
+    public LayerTestsUtils::LayerTransformation {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<LayerTestsUtils::LayerTransformationParams> obj);
+
+protected:
+    void SetUp() override;
+
+private:
+    void validate();
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp
new file mode 100644
index 0000000..37e0323
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp
@@ -0,0 +1,105 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformations/fully_connected_transformation.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/pass/convert_prc.hpp"
+#include "low_precision_transformations/network_helper.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string FullyConnectedTransformation::getTestCaseName(testing::TestParamInfo<LayerTestsUtils::LayerTransformationParams> obj) {
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShapes;
+    std::string targetDevice;
+    InferenceEngine::details::LayerTransformation::Params params;
+    std::tie(netPrecision, inputShapes, targetDevice, params) = obj.param;
+
+    std::ostringstream result;
+    result << inputShapes.size() << "D_" << netPrecision.name() << "_" << targetDevice << "_" << toString(params);
+    return result.str();
+}
+
+void FullyConnectedTransformation::SetUp() {
+    InferenceEngine::SizeVector inputShape;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::details::LayerTransformation::Params params;
+    std::tie(netPrecision, inputShape, targetDevice, params) = this->GetParam();
+    auto ngPrecision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+    const auto paramNode = std::make_shared<ngraph::opset1::Parameter>(ngPrecision, ngraph::Shape(inputShape));
+    const auto fakeQuantizeOnAcitvations = ngraph::builder::makeFakeQuantize(
+        paramNode, ngPrecision, 256ul, { 1ul },
+        { 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f });
+    fakeQuantizeOnAcitvations->set_friendly_name("fakeQuantizeOnAcitvations");
+
+    auto weightsConst = std::make_shared<ngraph::op::Constant>(ngPrecision, ngraph::Shape{ inputShape[1], inputShape[1] }, std::vector<float>({ 1.f }));
+    const auto fakeQuantizeOnWeights = ngraph::builder::makeFakeQuantize(
+        weightsConst, ngPrecision, 256ul, { 1ul, 1ul },
+        { -128.f / 8.f }, { 127.f / 8.f }, { -128.f / 8.f }, { 127.f / 8.f });
+    fakeQuantizeOnWeights->set_friendly_name("fakeQuantizeOnWeights");
+
+    const std::shared_ptr<ngraph::opset1::MatMul> fullyConnected = std::make_shared<ngraph::opset1::MatMul>(
+        fakeQuantizeOnAcitvations->output(0),
+        fakeQuantizeOnWeights->output(0),
+        false,
+        false);
+    fullyConnected->set_friendly_name("fullyConnected");
+
+    ngraph::ResultVector results {std::make_shared<ngraph::opset1::Result>(fullyConnected)};
+    function = std::make_shared<ngraph::Function>(results, ngraph::ParameterVector { paramNode }, "FullyConnectedTransformation");
+
+    // TODO: move to some another place
+    validate();
+}
+
+void FullyConnectedTransformation::validate() {
+    InferenceEngine::SizeVector inputShape;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::details::LayerTransformation::Params params;
+    std::tie(netPrecision, inputShape, targetDevice, params) = this->GetParam();
+
+    const InferenceEngine::CNNNetwork network = transform(params);
+
+    IE_SUPPRESS_DEPRECATED_START
+
+    const InferenceEngine::CNNLayerPtr fullyConnected = InferenceEngine::details::CNNNetworkHelper::getLayer(network, "fullyConnected_original");
+    EXPECT_NE(nullptr, fullyConnected) << "fullyConnected_original was not found, transformation was not handled";
+    EXPECT_EQ("FullyConnected", fullyConnected->type);
+
+    InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
+    EXPECT_EQ(1, outputs.size());
+
+    for (const auto it : outputs) {
+        const InferenceEngine::CNNLayerPtr outputLayer = it.second->getCreatorLayer().lock();
+        EXPECT_TRUE(outputLayer != nullptr);
+        EXPECT_EQ("ScaleShift", outputLayer->type);
+
+        checkParentPrecision(outputLayer, false);
+    }
+
+    IE_SUPPRESS_DEPRECATED_END
+}
+
+TEST_P(FullyConnectedTransformation, CompareWithRefImpl) {
+    Run();
+
+    if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) {
+        PluginCache::get().reset();
+    }
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/gemm_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/gemm_transformation.cpp
new file mode 100644
index 0000000..71fbc61
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/gemm_transformation.cpp
@@ -0,0 +1,100 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformations/gemm_transformation.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/pass/convert_prc.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string GemmTransformation::getTestCaseName(testing::TestParamInfo<LayerTestsUtils::LayerTransformationParams> obj) {
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShapes;
+    std::string targetDevice;
+    InferenceEngine::details::LayerTransformation::Params params;
+    std::tie(netPrecision, inputShapes, targetDevice, params) = obj.param;
+
+    std::ostringstream result;
+    result << inputShapes.size() << "D_" << netPrecision.name() << "_" << targetDevice << "_" << toString(params);
+    return result.str();
+}
+
+void GemmTransformation::SetUp() {
+    InferenceEngine::SizeVector inputShape;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::details::LayerTransformation::Params params;
+    std::tie(netPrecision, inputShape, targetDevice, params) = this->GetParam();
+    auto ngPrecision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+    const auto paramNode1 = std::make_shared<ngraph::opset1::Parameter>(ngPrecision, ngraph::Shape(inputShape));
+    const auto fakeQuantizeOnAcitvations = ngraph::builder::makeFakeQuantize(
+        paramNode1, ngPrecision, 256ul, { 1ul },
+        { 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f });
+    fakeQuantizeOnAcitvations->set_friendly_name("fakeQuantizeOnAcitvations");
+
+    const auto paramNode2 = std::make_shared<ngraph::opset1::Parameter>(ngPrecision, ngraph::Shape(inputShape));
+    const auto fakeQuantizeOnWeights = ngraph::builder::makeFakeQuantize(
+        paramNode2, ngPrecision, 256ul, { 1ul },
+        { -128.f / 8.f }, { 127.f / 8.f }, { -128.f / 8.f }, { 127.f / 8.f });
+    fakeQuantizeOnWeights->set_friendly_name("fakeQuantizeOnWeights");
+
+    const auto matMul = std::make_shared<ngraph::opset1::MatMul>(
+        fakeQuantizeOnAcitvations->output(0),
+        fakeQuantizeOnWeights->output(0),
+        false,
+        false);
+    matMul->set_friendly_name("matMul");
+
+    ngraph::ResultVector results {std::make_shared<ngraph::opset1::Result>(matMul)};
+    function = std::make_shared<ngraph::Function>(results, ngraph::ParameterVector { paramNode1, paramNode2 }, "GemmTransformation");
+
+    // TODO: move to some another place
+    validate();
+}
+
+void GemmTransformation::validate() {
+    InferenceEngine::SizeVector inputShape;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::details::LayerTransformation::Params params;
+    std::tie(netPrecision, inputShape, targetDevice, params) = this->GetParam();
+
+    const InferenceEngine::CNNNetwork network = transform(params);
+
+    IE_SUPPRESS_DEPRECATED_START
+
+    InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
+    EXPECT_EQ(1, outputs.size());
+
+    for (const auto it : outputs) {
+        const InferenceEngine::CNNLayerPtr outputLayer = it.second->getCreatorLayer().lock();
+        EXPECT_TRUE(outputLayer != nullptr);
+        EXPECT_EQ("ScaleShift", outputLayer->type);
+
+        checkParentPrecision(outputLayer, false);
+    }
+
+    IE_SUPPRESS_DEPRECATED_END
+}
+
+TEST_P(GemmTransformation, CompareWithRefImpl) {
+    Run();
+
+    if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) {
+        PluginCache::get().reset();
+    }
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp
index 37722b1..2e43412 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp
@@ -81,7 +81,7 @@ InferenceEngine::details::LowPrecisionTransformer LayerTransformation::getLowPre
 void LayerTransformation::checkParentPrecision(const InferenceEngine::CNNLayerPtr& layer, const bool lowPrecision) {
     IE_SUPPRESS_DEPRECATED_START
 
-    EXPECT_EQ(1ul, layer->insData.size()) << "insert data count is no expected: " << layer->insData.size();
+    EXPECT_EQ(1ul, layer->insData.size()) << "insert data count " << layer->insData.size() << " is not correct for layer " << layer->name;
     const InferenceEngine::DataPtr insData = layer->insData[0].lock();
     EXPECT_TRUE(insData != nullptr) << "insert data is nullable";
     const InferenceEngine::Precision precision = insData->getTensorDesc().getPrecision();
-- 
2.7.4