From 65b00c1dfb82b14988b2df8ad314a95ae433b91c Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Sat, 30 May 2020 15:01:28 +0300 Subject: [PATCH] [LPT] FullyConnected transformation fix --- .../network_helper.hpp | 6 +- .../low_precision_transformations/transformer.hpp | 29 ++-- .../low_precision_transformations/src/eltwise.cpp | 2 +- .../src/fake_quantize.cpp | 2 +- .../src/fully_connected.cpp | 172 ++++++++++++++------- .../src/network_helper.cpp | 32 +++- .../src/quantization_details.cpp | 4 +- .../src/transformer.cpp | 105 ++++++++----- .../src/weightable_layer_transformation.cpp | 40 +++-- .../fully_connected_transformation.cpp | 40 +++++ .../gemm_transformation.cpp | 41 +++++ .../fully_connected_transformation.cpp | 37 +++++ .../gemm_transformation.cpp | 37 +++++ .../fully_connected_transformation.hpp | 27 ++++ .../gemm_transformation.hpp | 27 ++++ .../fully_connected_transformation.cpp | 105 +++++++++++++ .../gemm_transformation.cpp | 100 ++++++++++++ .../layer_transformation.cpp | 2 +- 18 files changed, 679 insertions(+), 129 deletions(-) create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/gemm_transformation.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/gemm_transformation.cpp diff --git a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp index 14469d1..069d930 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp @@ -60,6 +60,8 @@ public: // return true if at least one child uses layer on weights static bool onWeights(const CNNLayer& layer); + static bool onConstWeightsPath(const CNNLayer& quantize); + static size_t getIndex(const CNNLayer& layer); static std::vector transformFakeQuantizeToConst( @@ -193,6 +195,8 @@ public: const bool roundValues, const Precision precision = Precision::UNSPECIFIED); + static bool isQuantizedConstWeights(const CNNLayer& quantize); + static int getConstParentBranchID(const CNNLayer& layer); static Precision getPrecisionParent(const CNNLayer& layer); @@ -228,7 +232,7 @@ private: checkConstWithBlobs(blobLayer); - return blobLayer->blobs.begin()->second;; + return blobLayer->blobs.begin()->second; } static void quantizeBlob(const CNNLayer& quantize, Blob::Ptr& targetBlob, bool roundValues); diff --git a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/transformer.hpp b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/transformer.hpp index 381b546..9bc44e3 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/transformer.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/transformer.hpp @@ -31,41 +31,50 @@ public: void setWeightsToConst(const bool weightsToConst); void setQuantizedTensorAlignmentOnActivations(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations); void setQuantizedTensorAlignmentOnWeights(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights); - LowPrecisionTransformations& remove(const std::string& layerName); - LowPrecisionTransformations& removeBranchSpecificTransformations(const std::string& layerName); - LowPrecisionTransformations& removeTransformations(const std::string& layerName); - LowPrecisionTransformations& removeCleanupTransformations(const std::string& layerName); + LowPrecisionTransformations& remove(const std::string& layerType); + LowPrecisionTransformations& removeBranchSpecificTransformations(const std::string& layerType); + LowPrecisionTransformations& removeTransformations(const std::string& layerType); + LowPrecisionTransformations& removeCleanupTransformations(const std::string& layerType); template LowPrecisionTransformations& addBranchSpecific(const LayerTransformation::Params& params, const std::string& layerType) { - const auto it = branchSpecificTransformations.find(layerType); + std::string type = layerType; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + const auto it = branchSpecificTransformations.find(type); if (it != branchSpecificTransformations.end()) { branchSpecificTransformations.erase(it); } - branchSpecificTransformations.emplace(layerType, std::make_shared(params)); + branchSpecificTransformations.emplace(type, std::make_shared(params)); return *this; } template LowPrecisionTransformations& add(const LayerTransformation::Params& params, const std::string& layerType) { - const auto it = transformations.find(layerType); + std::string type = layerType; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + const auto it = transformations.find(type); if (it != transformations.end()) { transformations.erase(it); } - transformations.emplace(layerType, std::make_shared(params)); + transformations.emplace(type, std::make_shared(params)); return *this; } template LowPrecisionTransformations& addCleanup(const LayerTransformation::Params& params, const std::string& layerType) { - const auto it = cleanupTransformations.find(layerType); + std::string type = layerType; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + const auto it = cleanupTransformations.find(type); if (it != cleanupTransformations.end()) { cleanupTransformations.erase(it); } - cleanupTransformations.emplace(layerType, std::make_shared(params)); + cleanupTransformations.emplace(type, std::make_shared(params)); return *this; } diff --git a/inference-engine/src/low_precision_transformations/src/eltwise.cpp b/inference-engine/src/low_precision_transformations/src/eltwise.cpp index 146f4f7..d969fd5 100644 --- a/inference-engine/src/low_precision_transformations/src/eltwise.cpp +++ b/inference-engine/src/low_precision_transformations/src/eltwise.cpp @@ -272,7 +272,7 @@ int EltwiseTransformation::getNotEmpty(const CNNLayer& eltwise) { return 1; } - const std::vector targetTypes = { "Convolution", "GEMM", "FullyConnected" }; + const std::vector targetTypes = { "Convolution", "Gemm", "FullyConnected" }; const bool allBranchesAreEqual = std::all_of(parents.begin(), parents.end(), [&](const CNNLayerPtr& layer) { return isBranchWithTargetType(*layer, targetTypes); }) || std::all_of(parents.begin(), parents.end(), [&](const CNNLayerPtr& layer) { return !isBranchWithTargetType(*layer, targetTypes); }); diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp index 52607fb..91c89e2 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp @@ -38,7 +38,7 @@ void FakeQuantizeTransformation::transform(TransformationContext& context, CNNLa // CNNNetworkHelper::invertFakeQuantize(layer); // FakeQuantize on weights are used without dequantization ScaleShifts - const bool onWeights = CNNNetworkHelper::onWeights(layer); + const bool onWeights = CNNNetworkHelper::onConstWeightsPath(layer) && CNNNetworkHelper::onWeights(layer); if (onWeights) { return; } diff --git a/inference-engine/src/low_precision_transformations/src/fully_connected.cpp b/inference-engine/src/low_precision_transformations/src/fully_connected.cpp index b015db3..aa5c72b 100644 --- a/inference-engine/src/low_precision_transformations/src/fully_connected.cpp +++ b/inference-engine/src/low_precision_transformations/src/fully_connected.cpp @@ -45,7 +45,8 @@ bool FullyConnectedTransformation::canBeTransformed(const TransformationContext& } const std::vector inTensorDims = inputData->getDims(); - if ((inTensorDims.size() != 2) && (inTensorDims.size() != 3)) { + if ((inTensorDims.size() != 2ul) && (inTensorDims.size() != 3ul) && + ((fullyConnected.type == "FullyConnected") || ((fullyConnected.type == "Gemm") && (inTensorDims.size() != 4ul)))) { return false; } @@ -95,7 +96,7 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN } if (!CaselessEq()(fullyConnected.type, "FullyConnected") && - !CaselessEq()(fullyConnected.type, "GEMM")) { + !CaselessEq()(fullyConnected.type, "Gemm")) { THROW_IE_EXCEPTION << "layer '" << fullyConnected.name << "' is not correct"; } @@ -110,9 +111,9 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN } const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(fullyConnected, 1); - const QuantizationDetails originalQuantizationDetails = parentOnWeights != nullptr ? - QuantizationDetails::getDetails(*parentOnWeights) : - QuantizationDetails(); + if ((fullyConnected.type == "Gemm") && (parentOnWeights->type != "ScaleShift")) { + return; + } if (fullyConnected.outData.size() != 1) { THROW_IE_EXCEPTION << "layer outputs '" << fullyConnected.outData.size() << "' is not correct"; @@ -125,8 +126,21 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN std::vector originalWeightsDequantizationScales; std::vector originalWeightsDequantizationShifts; + const bool weightsOnConstPath = CNNNetworkHelper::isQuantizedConstWeights(fullyConnected); + if (!weightsOnConstPath) { + if (std::any_of( + originalDataDequantizationShifts.begin(), + originalDataDequantizationShifts.end(), + [](const float value) { return value != 0.f; })) { + return; + } + } + if (parentOnWeights != nullptr) { if (parentOnWeights->type == "FakeQuantize") { + if (!weightsOnConstPath) { + THROW_IE_LPT_EXCEPTION(*parentOnWeights) << "unexpected layer type"; + } fillDequantizationsForWeightsPath( fullyConnected, supportAsymmetricQuantization, @@ -136,6 +150,22 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN } else if (parentOnWeights->type == "Const") { originalWeightsDequantizationScales.push_back(1.0); originalWeightsDequantizationShifts.push_back(0.0); + } else if (parentOnWeights->type == "ScaleShift") { + if (weightsOnConstPath) { + THROW_IE_LPT_EXCEPTION(*parentOnWeights) << "unexpected layer type"; + } + + fillFromDequantizationLayer( + *parentOnWeights, + originalWeightsDequantizationScales, + originalWeightsDequantizationShifts); + + if (std::any_of( + originalWeightsDequantizationShifts.begin(), + originalWeightsDequantizationShifts.end(), + [](const float value) { return value != 0.f; })) { + return; + } } else { THROW_IE_EXCEPTION << "Unexpected dequantization layer type " << parentOnWeights->type; } @@ -172,11 +202,18 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN Precision weightsLowPrecision; if (parentOnWeights->type == "FakeQuantize") { weightsOriginalPrecision = parentOnWeights->outData[0]->getTensorDesc().getPrecision(); + const bool weightsOnConstPath = CNNNetworkHelper::isQuantizedConstWeights(fullyConnected); + if (!weightsOnConstPath) { + THROW_IE_LPT_EXCEPTION(fullyConnected) << "unexpected layer type " << parentOnWeights->type << " on weights"; + } weightsLowPrecision = getDataPrecision( *parentOnWeights, QuantizationDetails::getDetails(*parentOnWeights), - true, + weightsOnConstPath, supportAsymmetricQuantization).precision; + } else if (parentOnWeights->type == "ScaleShift") { + weightsOriginalPrecision = parentOnWeights->outData[0]->getTensorDesc().getPrecision(); + weightsLowPrecision = CNNNetworkHelper::getPrecisionParent(*parentOnWeights); } else { THROW_IE_EXCEPTION << "unexpected layer type on weights " << parentOnWeights->type; } @@ -213,7 +250,11 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN updateLayerBiases(context, fullyConnected, fullyConnected.type == "GEMM", dequantizationScales, dequantizationShifts, biasesShifts); } - if ((parentOnWeights != nullptr) && (parentOnWeights->type == "FakeQuantize")) { + if ((weightsOnConstPath) && (parentOnWeights != nullptr) && (parentOnWeights->type == "FakeQuantize")) { + const QuantizationDetails originalQuantizationDetails = parentOnWeights != nullptr ? + QuantizationDetails::getDetails(*parentOnWeights) : + QuantizationDetails(); + const DataPrecision dataPrecision = getDataPrecision( *parentOnWeights, originalQuantizationDetails, @@ -271,13 +312,16 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric( if (inputData == nullptr) { THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent"; } + if (inputData->getDims().size() < 2) { + THROW_IE_EXCEPTION << "Unexpected input layout " << inputData->getLayout(); + } const DataPtr outputData = fullyConnected.outData[0]; if (outputData == nullptr) { THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent"; } - const size_t outputChannelsCount = fullyConnected.outData[0]->getDims()[1]; + const size_t outputChannelsCount = outputData->getDims()[1]; dequantizationScales.resize(outputChannelsCount); dequantizationShifts.resize(outputChannelsCount); biasesShifts.resize(outputChannelsCount); @@ -290,15 +334,10 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric( const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights")); const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases")); - const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues); - const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob); - const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected); - const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob); - const bool dequantizationValuesAreBroadcasted = getDequantizationValuesAreBroadcasted(fullyConnected); for (size_t i = 0; i < outputChannelsCount; ++i) { dequantizationScales[i] = - (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[i]) * + prevDequantizationScaleBuffer.get()[0] * (originalWeightsDequantizationScales.size() == 0 ? 1.0 : (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i])); @@ -308,25 +347,37 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric( if (insData == nullptr) { THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data ia absent"; } - const size_t inputChannelsCount = insData->getDims().size() == 3ul ? insData->getDims()[2] : insData->getDims()[1]; - for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) { - float sum = 0.0; - const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ? - 1.0 : - ((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]); - - for (size_t inputChannel = 0; inputChannel < inputChannelsCount; ++inputChannel) { - const float w = weightsBuffer.get()[channel * inputChannelsCount + inputChannel]; - const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[inputChannel]; - sum += w * shift * weightsDequantizationScale; - } - dequantizationShifts[channel] = biasesBuffer == nullptr ? - sum : - (sum + biasesBuffer.get()[channel] - - (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[channel]) * - biasesBuffer.get()[channel] * weightsDequantizationScale); - biasesShifts[channel] = sum; + if (CNNNetworkHelper::isQuantizedConstWeights(fullyConnected)) { + const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues); + const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob); + const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected); + const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob); + + const size_t inputChannelsCount = insData->getDims().size() == 3ul ? insData->getDims()[2] : insData->getDims()[1]; + for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) { + float sum = 0.0; + const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ? + 1.0 : + ((originalWeightsDequantizationScales.size() == 1) ? + originalWeightsDequantizationScales[0] : + originalWeightsDequantizationScales[channel]); + + for (size_t inputChannel = 0; inputChannel < inputChannelsCount; ++inputChannel) { + const float w = weightsBuffer.get()[channel * inputChannelsCount + inputChannel]; + const float shift = dequantizationValuesAreBroadcasted ? + prevDequantizationShiftBuffer.get()[0] : + prevDequantizationShiftBuffer.get()[inputChannel]; + sum += w * shift * weightsDequantizationScale; + } + + dequantizationShifts[channel] = biasesBuffer == nullptr ? + sum : + (sum + biasesBuffer.get()[channel] - + prevDequantizationScaleBuffer.get()[0] * + biasesBuffer.get()[channel] * weightsDequantizationScale); + biasesShifts[channel] = sum; + } } } @@ -340,16 +391,17 @@ void FullyConnectedTransformation::calculateDequantizationForAsymmetric( if (inputData == nullptr) { THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent"; } - // const Layout inputLayout = inputData->getLayout(); - // if (inputLayout != Layout::NC) { - // THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout; - // } + if (inputData->getDims().size() < 2) { + THROW_IE_EXCEPTION << "Unexpected input layout " << inputData->getLayout(); + } const size_t inputChannelsCount = inputData->getDims()[1]; const DataPtr outputData = fullyConnected.outData[0]; if (outputData == nullptr) { THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent"; } + const size_t outputChannelsCount = outputData->getDims()[1]; + CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected); if (scaleShift->type != "ScaleShift") { @@ -358,43 +410,45 @@ void FullyConnectedTransformation::calculateDequantizationForAsymmetric( const bool dequantizationValuesAreBroadcasted = getDequantizationValuesAreBroadcasted(fullyConnected); - const size_t outputChannelsCount = outputData->getDims()[1]; dequantizationScales.resize(outputChannelsCount); dequantizationShifts.resize(outputChannelsCount); const std::shared_ptr prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights")); for (size_t i = 0; i < outputChannelsCount; ++i) { dequantizationScales[i] = - (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[i]) * + prevDequantizationScaleBuffer.get()[0] * (originalWeightsDequantizationScales.size() == 0 ? 1.0 : (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i])); } - const auto weightsBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues)); - const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected); - const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBiases(fullyConnected)); + if (CNNNetworkHelper::isQuantizedConstWeights(fullyConnected)) { + const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues); + const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob); + const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected); + const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBiases(fullyConnected)); - const std::shared_ptr prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases")); + const std::shared_ptr prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases")); - for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) { - float sum1 = 0.0; - float sum2 = 0.0; - const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ? - 1.0 : - ((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]); + for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) { + float sum1 = 0.0; + float sum2 = 0.0; + const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ? + 1.0 : + ((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]); - for (size_t w = 0; w < inputChannelsCount; ++w) { - const float kernel = weightsBuffer.get()[channel * inputChannelsCount + w]; - const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[channel]; - sum1 += kernel * shift * weightsDequantizationScale; - sum2 += kernel * dataZeroPoints[w] * weightsDequantizationScale; - } + for (size_t w = 0; w < inputChannelsCount; ++w) { + const float kernel = weightsBuffer.get()[channel * inputChannelsCount + w]; + const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[w]; + sum1 += kernel * shift * weightsDequantizationScale; + sum2 += kernel * dataZeroPoints[w] * weightsDequantizationScale; + } - dequantizationShifts[channel] = biasesBuffer == nullptr ? - sum1 : - (sum1 + biasesBuffer.get()[channel] - - (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[channel]) * - biasesBuffer.get()[channel] * weightsDequantizationScale); + dequantizationShifts[channel] = biasesBuffer == nullptr ? + sum1 : + (sum1 + biasesBuffer.get()[channel] - + prevDequantizationScaleBuffer.get()[0] * + biasesBuffer.get()[channel] * weightsDequantizationScale); + } } } diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp index 2950244..d8c0538 100644 --- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp +++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp @@ -382,7 +382,7 @@ int CNNNetworkHelper::onWeightsInDepth(const CNNLayer& layer) { for (const CNNLayerPtr& child : children) { if ((CaselessEq()(child->type, "Convolution") || CaselessEq()(child->type, "FullyConnected") || - CaselessEq()(child->type, "GEMM")) && + CaselessEq()(child->type, "Gemm")) && (child->insData.size() >= 2lu)) { const std::vector parents = getParentsRecursivelyExceptTypes(*child, {}, 1); for (const CNNLayerPtr& parent : parents) { @@ -406,6 +406,15 @@ bool CNNNetworkHelper::onWeights(const CNNLayer& layer) { return result == 1; } +bool CNNNetworkHelper::onConstWeightsPath(const CNNLayer& quantize) { + CNNLayerPtr parent = CNNNetworkHelper::getParent(quantize, 0); + if (parent == nullptr) { + THROW_IE_LPT_EXCEPTION(quantize) << "parent layer is nullable"; + } + + return parent->type == "Const"; +} + size_t CNNNetworkHelper::getIndex(const CNNLayer& layer) { const std::vector children = CNNNetworkHelper::getChildren(layer); if (children.size() != 1) { @@ -1598,6 +1607,27 @@ Blob::Ptr CNNNetworkHelper::quantizeWeights(const CNNLayer& quantize, const bool return targetBlob; } +bool CNNNetworkHelper::isQuantizedConstWeights(const CNNLayer& layer) { + CNNLayerPtr quantize = CNNNetworkHelper::getParent(layer, 1); + if (quantize == nullptr) { + return false; + } + + if (quantize->type == "Const") { + return true; + } + + if (quantize->type != "FakeQuantize") { + return false; + } + + if (quantize->insData.size() != 5ul) { + THROW_IE_LPT_EXCEPTION(*quantize) << "unexpected inputs size"; + } + + return onConstWeightsPath(*quantize); +} + int CNNNetworkHelper::getConstParentBranchID(const CNNLayer& layer) { int constBranchID = -1; for (int i = 0; i < layer.insData.size(); i++) { diff --git a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp index 987f2bc..0d5503e 100644 --- a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp +++ b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp @@ -200,7 +200,9 @@ QuantizationDetails QuantizationDetails::getDetails(const CNNLayer& quantize) { size_t outputIntervalsCount; getOutputIntervals(quantize, outputLowValues, outputHighValues, outputIntervalsCount); - const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(quantize, CNNNetworkHelper::onWeights(quantize)); + const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount( + quantize, + CNNNetworkHelper::onWeights(quantize) && CNNNetworkHelper::onConstWeightsPath(quantize)); if (!outputLayoutIsSupported(quantize)) { THROW_IE_LPT_EXCEPTION(quantize) << "Expected output channels count " << outputIntervalsCount << " but found " << outputChannelsCount; } diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp index c47328d..a4603ae 100644 --- a/inference-engine/src/low_precision_transformations/src/transformer.cpp +++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp @@ -103,40 +103,55 @@ void LowPrecisionTransformations::setQuantizedTensorAlignmentOnWeights( } } -LowPrecisionTransformations& LowPrecisionTransformations::remove(const std::string& layerName) { - removeBranchSpecificTransformations(layerName); - removeTransformations(layerName); - removeCleanupTransformations(layerName); +LowPrecisionTransformations& LowPrecisionTransformations::remove(const std::string& layerType) { + std::string type = layerType; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + removeBranchSpecificTransformations(type); + removeTransformations(type); + removeCleanupTransformations(type); return *this; } -LowPrecisionTransformations& LowPrecisionTransformations::removeBranchSpecificTransformations(const std::string& layerName) { - branchSpecificTransformations.erase(layerName); +LowPrecisionTransformations& LowPrecisionTransformations::removeBranchSpecificTransformations(const std::string& layerType) { + std::string type = layerType; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + branchSpecificTransformations.erase(type); return *this; } -LowPrecisionTransformations& LowPrecisionTransformations::removeTransformations(const std::string& layerName) { - transformations.erase(layerName); +LowPrecisionTransformations& LowPrecisionTransformations::removeTransformations(const std::string& layerType) { + std::string type = layerType; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + transformations.erase(type); return *this; } -LowPrecisionTransformations& LowPrecisionTransformations::removeCleanupTransformations(const std::string& layerName) { - cleanupTransformations.erase(layerName); +LowPrecisionTransformations& LowPrecisionTransformations::removeCleanupTransformations(const std::string& layerType) { + std::string type = layerType; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + cleanupTransformations.erase(type); return *this; } LayerTransformationPtr LowPrecisionTransformations::find(const std::string& layerType) const { - auto it = branchSpecificTransformations.find(layerType); + std::string type = layerType; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + auto it = branchSpecificTransformations.find(type); if (it != branchSpecificTransformations.end()) { return it->second; } - it = transformations.find(layerType); + it = transformations.find(type); if (it != transformations.end()) { return it->second; } - it = cleanupTransformations.find(layerType); + it = cleanupTransformations.find(type); if (it != cleanupTransformations.end()) { return it->second; } @@ -175,28 +190,28 @@ void LowPrecisionTransformations::setLayerTransformationsManager( LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const LayerTransformation::Params& params) { return LowPrecisionTransformations( std::map({ - { "Concat", LayerTransformationPtr(new ConcatMultiChannelsTransformation(params))} + { "concat", LayerTransformationPtr(new ConcatMultiChannelsTransformation(params))} }), std::map({ - { "Convolution", LayerTransformationPtr(new ConvolutionTransformation(params)) }, - { "Pooling", LayerTransformationPtr(new PoolingTransformation(params)) }, - { "FakeQuantize", LayerTransformationPtr(new FakeQuantizeTransformation(params)) }, - { "Reshape", LayerTransformationPtr(new ReshapeTransformation(params)) }, - { "FullyConnected", LayerTransformationPtr(new FullyConnectedTransformation(params)) }, - { "GEMM", LayerTransformationPtr(new FullyConnectedTransformation(params)) }, - { "Permute", LayerTransformationPtr(new PermuteTransformation(params)) }, - { "Squeeze", LayerTransformationPtr(new SqueezeTransformation(params)) }, - { "ReLU", LayerTransformationPtr(new ActivationTransformation(params)) }, - { "MVN", LayerTransformationPtr(new MvnTransformation(params)) }, - { "Eltwise", LayerTransformationPtr(new EltwiseTransformation(params)) }, - { "Resample", LayerTransformationPtr(new ResampleTransformation(params)) }, - { "Power", LayerTransformationPtr(new PowerTransformation(params)) }, - { "DepthToSpace", LayerTransformationPtr(new DepthToSpaceTransformation(params)) }, - { "Normalize", LayerTransformationPtr(new NormalizeTransformation(params)) } + { "convolution", LayerTransformationPtr(new ConvolutionTransformation(params)) }, + { "pooling", LayerTransformationPtr(new PoolingTransformation(params)) }, + { "fakequantize", LayerTransformationPtr(new FakeQuantizeTransformation(params)) }, + { "reshape", LayerTransformationPtr(new ReshapeTransformation(params)) }, + { "fullyconnected", LayerTransformationPtr(new FullyConnectedTransformation(params)) }, + { "gemm", LayerTransformationPtr(new FullyConnectedTransformation(params)) }, + { "permute", LayerTransformationPtr(new PermuteTransformation(params)) }, + { "squeeze", LayerTransformationPtr(new SqueezeTransformation(params)) }, + { "relu", LayerTransformationPtr(new ActivationTransformation(params)) }, + { "mvn", LayerTransformationPtr(new MvnTransformation(params)) }, + { "eltwise", LayerTransformationPtr(new EltwiseTransformation(params)) }, + { "resample", LayerTransformationPtr(new ResampleTransformation(params)) }, + { "power", LayerTransformationPtr(new PowerTransformation(params)) }, + { "depthtospace", LayerTransformationPtr(new DepthToSpaceTransformation(params)) }, + { "normalize", LayerTransformationPtr(new NormalizeTransformation(params)) } }), std::map({ - { "FakeQuantize", LayerTransformationPtr(new FuseFakeQuantizeAndScaleShiftTransformation(params)) }, - { "ScaleShift", LayerTransformationPtr(new ScaleShiftToConvolutionTransformation(params)) }, + { "fakequantize", LayerTransformationPtr(new FuseFakeQuantizeAndScaleShiftTransformation(params)) }, + { "scaleshift", LayerTransformationPtr(new ScaleShiftToConvolutionTransformation(params)) }, })); } @@ -314,7 +329,9 @@ void LowPrecisionTransformer::transform(ICNNNetwork& network) { continue; } - const auto it = transformations.branchSpecificTransformations.find(layer->type); + std::string type = layer->type; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + const auto it = transformations.branchSpecificTransformations.find(type); if (it == transformations.branchSpecificTransformations.end()) { continue; } @@ -345,7 +362,10 @@ void LowPrecisionTransformer::transform(ICNNNetwork& network) { } bool transformed; - const auto it = transformations.transformations.find(layer->type); + + std::string type = layer->type; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + const auto it = transformations.transformations.find(type); if (it != transformations.transformations.end()) { it->second->transform(context, *layer); transformed = true; @@ -383,7 +403,9 @@ void LowPrecisionTransformer::transform(ICNNNetwork& network) { continue; } - const auto it = transformations.cleanupTransformations.find(layer->type); + std::string type = layer->type; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + const auto it = transformations.cleanupTransformations.find(type); if (it != transformations.cleanupTransformations.end()) { it->second->transform(context, *layer); } @@ -402,7 +424,10 @@ void LowPrecisionTransformer::transform(ICNNNetwork& network) { } std::vector LowPrecisionTransformer::getPrecisionsOnActivations(const std::string& layerType) const noexcept { - const LayerTransformationPtr transformation = transformations.find(layerType); + std::string type = layerType; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + const LayerTransformationPtr transformation = transformations.find(type); if (transformation == nullptr) { return std::vector(); } @@ -410,7 +435,10 @@ std::vector LowPrecisionTransformer::getPrecisionsOnActivations(const } bool LowPrecisionTransformer::isQuantized(const CNNLayer& layer) const noexcept { - const LayerTransformationPtr transformation = transformations.find(layer.type); + std::string type = layer.type; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + const LayerTransformationPtr transformation = transformations.find(type); if (transformation == nullptr) { return false; } @@ -418,7 +446,10 @@ bool LowPrecisionTransformer::isQuantized(const CNNLayer& layer) const noexcept } bool LowPrecisionTransformer::isPrecisionPreserved(const CNNLayer& layer) const noexcept { - const LayerTransformationPtr transformation = transformations.find(layer.type); + std::string type = layer.type; + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + + const LayerTransformationPtr transformation = transformations.find(type); if (transformation == nullptr) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp index ce8a3f3..aa0fcfe 100644 --- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -96,26 +96,29 @@ bool WeightableLayerTransformation::isQuantized(const CNNLayer& layer) const noe return false; } - const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(layer, roundQuantizedValues); - if ((weightsBlob == nullptr) || (!CNNNetworkHelper::isBlobPrecisionSupported(weightsBlob->getTensorDesc().getPrecision()))) { - return false; - } + if (CNNNetworkHelper::isQuantizedConstWeights(layer)) { + const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(layer, roundQuantizedValues); + if ((weightsBlob == nullptr) || (!CNNNetworkHelper::isBlobPrecisionSupported(weightsBlob->getTensorDesc().getPrecision()))) { + return false; + } - const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(layer); - if ((biasesBlob != nullptr) && (!CNNNetworkHelper::isBlobPrecisionSupported(biasesBlob->getTensorDesc().getPrecision()))) { - return false; - } - const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1); - if (parentOnWeights == nullptr) { - return false; - } + const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(layer); + if ((biasesBlob != nullptr) && (!CNNNetworkHelper::isBlobPrecisionSupported(biasesBlob->getTensorDesc().getPrecision()))) { + return false; + } - if (parentOnWeights->type != "FakeQuantize") { - const Precision precision = parentOnWeights->outData[0]->getPrecision(); - if ((precision != Precision::I8) && (precision != Precision::U8)) { + const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1); + if (parentOnWeights == nullptr) { return false; } + + if (parentOnWeights->type != "FakeQuantize") { + const Precision precision = parentOnWeights->outData[0]->getPrecision(); + if ((precision != Precision::I8) && (precision != Precision::U8)) { + return false; + } + } } return true; @@ -289,7 +292,8 @@ void WeightableLayerTransformation::updateToSupportAsymmetricQuantization( weightsPrecisionsInfo.low); if (!std::all_of(weightsConvertedInBlob.get(), weightsConvertedInBlob.get() + weightsShifts.size(), [](float value) { return value == 0.0; })) { const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1ul); - createAsymmetric(context, *parentOnWeights, layer, weightsPrecisionsInfo, weightsShifts, true); + const bool onWeights = CNNNetworkHelper::isQuantizedConstWeights(layer); + createAsymmetric(context, *parentOnWeights, layer, weightsPrecisionsInfo, weightsShifts, onWeights); } } } @@ -359,7 +363,9 @@ DataPrecision WeightableLayerTransformation::fillDequantizationsForWeightsPath( const bool supportAsymmetricQuantization, std::vector& dequantizationScales, std::vector& dequantizationShifts) const { - if ((weightableLayer.type != "Convolution") && (weightableLayer.type != "FullyConnected") && (weightableLayer.type != "GEMM")) { + if (CaselessEq()(weightableLayer.type, "Convolution") && + CaselessEq()(weightableLayer.type, "FullyConnected") && + CaselessEq()(weightableLayer.type, "Gemm")) { THROW_IE_EXCEPTION << "layer '" << weightableLayer.name << "' has unexpected type '" << weightableLayer.type << "'"; } diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp new file mode 100644 index 0000000..2544cd6 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/fully_connected_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace InferenceEngine::details; + +namespace { +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector dimensions = { + InferenceEngine::SizeVector({ 1, 16 }) +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsFactory::createParams(), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsI8I8(), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8() +}; + +INSTANTIATE_TEST_CASE_P(LPT, FullyConnectedTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(dimensions), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(trasformationParamValues)), + FullyConnectedTransformation::getTestCaseName); +} // namespace + + + + diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp new file mode 100644 index 0000000..add9688 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/gemm_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace InferenceEngine::details; + +namespace { +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector dimensions = { + InferenceEngine::SizeVector({ 1, 3, 16, 16 }) +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsFactory::createParams().setSupportAsymmetricQuantization(true), + LayerTestsUtils::LayerTransformationParamsFactory::createParams().setSupportAsymmetricQuantization(false), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsI8I8(), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8() +}; + +INSTANTIATE_TEST_CASE_P(LPT, GemmTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(dimensions), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(trasformationParamValues)), + GemmTransformation::getTestCaseName); +} // namespace + + + + diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp new file mode 100644 index 0000000..99f0d80 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp @@ -0,0 +1,37 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/fully_connected_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace InferenceEngine::details; + +namespace { +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 +}; + +const std::vector dimensions = { + InferenceEngine::SizeVector({ 1, 16 }) +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsFactory::createParams() +}; + +INSTANTIATE_TEST_CASE_P(LPT, FullyConnectedTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(dimensions), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::ValuesIn(trasformationParamValues)), + FullyConnectedTransformation::getTestCaseName); +} // namespace + + + + diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp new file mode 100644 index 0000000..afe6987 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp @@ -0,0 +1,37 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/gemm_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace InferenceEngine::details; + +namespace { +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 +}; + +const std::vector dimensions = { + InferenceEngine::SizeVector({ 1, 3, 16, 16 }) +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsFactory::createParams() +}; + +INSTANTIATE_TEST_CASE_P(LPT, GemmTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(dimensions), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::ValuesIn(trasformationParamValues)), + GemmTransformation::getTestCaseName); +} // namespace + + + + diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp new file mode 100644 index 0000000..f0da013 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp" + +namespace LayerTestsDefinitions { + +class FullyConnectedTransformation : + public testing::WithParamInterface, + public LayerTestsUtils::LayerTransformation { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + +private: + void validate(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/gemm_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/gemm_transformation.hpp new file mode 100644 index 0000000..37a032c --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/gemm_transformation.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp" + +namespace LayerTestsDefinitions { + +class GemmTransformation : + public testing::WithParamInterface, + public LayerTestsUtils::LayerTransformation { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + +private: + void validate(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp new file mode 100644 index 0000000..37e0323 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp @@ -0,0 +1,105 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision_transformations/fully_connected_transformation.hpp" + +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ngraph_functions/pass/convert_prc.hpp" +#include "low_precision_transformations/network_helper.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { + +std::string FullyConnectedTransformation::getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes; + std::string targetDevice; + InferenceEngine::details::LayerTransformation::Params params; + std::tie(netPrecision, inputShapes, targetDevice, params) = obj.param; + + std::ostringstream result; + result << inputShapes.size() << "D_" << netPrecision.name() << "_" << targetDevice << "_" << toString(params); + return result.str(); +} + +void FullyConnectedTransformation::SetUp() { + InferenceEngine::SizeVector inputShape; + InferenceEngine::Precision netPrecision; + InferenceEngine::details::LayerTransformation::Params params; + std::tie(netPrecision, inputShape, targetDevice, params) = this->GetParam(); + auto ngPrecision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + const auto paramNode = std::make_shared(ngPrecision, ngraph::Shape(inputShape)); + const auto fakeQuantizeOnAcitvations = ngraph::builder::makeFakeQuantize( + paramNode, ngPrecision, 256ul, { 1ul }, + { 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f }); + fakeQuantizeOnAcitvations->set_friendly_name("fakeQuantizeOnAcitvations"); + + auto weightsConst = std::make_shared(ngPrecision, ngraph::Shape{ inputShape[1], inputShape[1] }, std::vector({ 1.f })); + const auto fakeQuantizeOnWeights = ngraph::builder::makeFakeQuantize( + weightsConst, ngPrecision, 256ul, { 1ul, 1ul }, + { -128.f / 8.f }, { 127.f / 8.f }, { -128.f / 8.f }, { 127.f / 8.f }); + fakeQuantizeOnWeights->set_friendly_name("fakeQuantizeOnWeights"); + + const std::shared_ptr fullyConnected = std::make_shared( + fakeQuantizeOnAcitvations->output(0), + fakeQuantizeOnWeights->output(0), + false, + false); + fullyConnected->set_friendly_name("fullyConnected"); + + ngraph::ResultVector results {std::make_shared(fullyConnected)}; + function = std::make_shared(results, ngraph::ParameterVector { paramNode }, "FullyConnectedTransformation"); + + // TODO: move to some another place + validate(); +} + +void FullyConnectedTransformation::validate() { + InferenceEngine::SizeVector inputShape; + InferenceEngine::Precision netPrecision; + InferenceEngine::details::LayerTransformation::Params params; + std::tie(netPrecision, inputShape, targetDevice, params) = this->GetParam(); + + const InferenceEngine::CNNNetwork network = transform(params); + + IE_SUPPRESS_DEPRECATED_START + + const InferenceEngine::CNNLayerPtr fullyConnected = InferenceEngine::details::CNNNetworkHelper::getLayer(network, "fullyConnected_original"); + EXPECT_NE(nullptr, fullyConnected) << "fullyConnected_original was not found, transformation was not handled"; + EXPECT_EQ("FullyConnected", fullyConnected->type); + + InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo(); + EXPECT_EQ(1, outputs.size()); + + for (const auto it : outputs) { + const InferenceEngine::CNNLayerPtr outputLayer = it.second->getCreatorLayer().lock(); + EXPECT_TRUE(outputLayer != nullptr); + EXPECT_EQ("ScaleShift", outputLayer->type); + + checkParentPrecision(outputLayer, false); + } + + IE_SUPPRESS_DEPRECATED_END +} + +TEST_P(FullyConnectedTransformation, CompareWithRefImpl) { + Run(); + + if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) { + PluginCache::get().reset(); + } +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/gemm_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/gemm_transformation.cpp new file mode 100644 index 0000000..71fbc61 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/gemm_transformation.cpp @@ -0,0 +1,100 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision_transformations/gemm_transformation.hpp" + +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ngraph_functions/pass/convert_prc.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { + +std::string GemmTransformation::getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes; + std::string targetDevice; + InferenceEngine::details::LayerTransformation::Params params; + std::tie(netPrecision, inputShapes, targetDevice, params) = obj.param; + + std::ostringstream result; + result << inputShapes.size() << "D_" << netPrecision.name() << "_" << targetDevice << "_" << toString(params); + return result.str(); +} + +void GemmTransformation::SetUp() { + InferenceEngine::SizeVector inputShape; + InferenceEngine::Precision netPrecision; + InferenceEngine::details::LayerTransformation::Params params; + std::tie(netPrecision, inputShape, targetDevice, params) = this->GetParam(); + auto ngPrecision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + const auto paramNode1 = std::make_shared(ngPrecision, ngraph::Shape(inputShape)); + const auto fakeQuantizeOnAcitvations = ngraph::builder::makeFakeQuantize( + paramNode1, ngPrecision, 256ul, { 1ul }, + { 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f }); + fakeQuantizeOnAcitvations->set_friendly_name("fakeQuantizeOnAcitvations"); + + const auto paramNode2 = std::make_shared(ngPrecision, ngraph::Shape(inputShape)); + const auto fakeQuantizeOnWeights = ngraph::builder::makeFakeQuantize( + paramNode2, ngPrecision, 256ul, { 1ul }, + { -128.f / 8.f }, { 127.f / 8.f }, { -128.f / 8.f }, { 127.f / 8.f }); + fakeQuantizeOnWeights->set_friendly_name("fakeQuantizeOnWeights"); + + const auto matMul = std::make_shared( + fakeQuantizeOnAcitvations->output(0), + fakeQuantizeOnWeights->output(0), + false, + false); + matMul->set_friendly_name("matMul"); + + ngraph::ResultVector results {std::make_shared(matMul)}; + function = std::make_shared(results, ngraph::ParameterVector { paramNode1, paramNode2 }, "GemmTransformation"); + + // TODO: move to some another place + validate(); +} + +void GemmTransformation::validate() { + InferenceEngine::SizeVector inputShape; + InferenceEngine::Precision netPrecision; + InferenceEngine::details::LayerTransformation::Params params; + std::tie(netPrecision, inputShape, targetDevice, params) = this->GetParam(); + + const InferenceEngine::CNNNetwork network = transform(params); + + IE_SUPPRESS_DEPRECATED_START + + InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo(); + EXPECT_EQ(1, outputs.size()); + + for (const auto it : outputs) { + const InferenceEngine::CNNLayerPtr outputLayer = it.second->getCreatorLayer().lock(); + EXPECT_TRUE(outputLayer != nullptr); + EXPECT_EQ("ScaleShift", outputLayer->type); + + checkParentPrecision(outputLayer, false); + } + + IE_SUPPRESS_DEPRECATED_END +} + +TEST_P(GemmTransformation, CompareWithRefImpl) { + Run(); + + if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) { + PluginCache::get().reset(); + } +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp index 37722b1..2e43412 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp @@ -81,7 +81,7 @@ InferenceEngine::details::LowPrecisionTransformer LayerTransformation::getLowPre void LayerTransformation::checkParentPrecision(const InferenceEngine::CNNLayerPtr& layer, const bool lowPrecision) { IE_SUPPRESS_DEPRECATED_START - EXPECT_EQ(1ul, layer->insData.size()) << "insert data count is no expected: " << layer->insData.size(); + EXPECT_EQ(1ul, layer->insData.size()) << "insert data count " << layer->insData.size() << " is not correct for layer " << layer->name; const InferenceEngine::DataPtr insData = layer->insData[0].lock(); EXPECT_TRUE(insData != nullptr) << "insert data is nullable"; const InferenceEngine::Precision precision = insData->getTensorDesc().getPrecision(); -- 2.7.4