add_subdirectory(inference_engine)
-add_subdirectory(low_precision_transformations_legacy)
-
add_subdirectory(low_precision_transformations)
# add a custom target to build all Inference Engine Core libraries
DEPENDS inference_engine_transformations inference_engine_legacy
inference_engine inference_engine_preproc
inference_engine_ir_v7_reader inference_engine_ir_reader
- inference_engine_lp_transformations_legacy
inference_engine_lp_transformations)
if(NGRAPH_ONNX_IMPORT_ENABLE)
target_link_libraries(${TARGET_NAME} PRIVATE clDNN_lib pugixml
inference_engine inference_engine_legacy
- inference_engine_transformations)
-
-if(USE_CNNNETWORK_LPT)
- target_link_libraries(${TARGET_NAME} PRIVATE inference_engine_lp_transformations_legacy)
-else()
- target_link_libraries(${TARGET_NAME} PRIVATE inference_engine_lp_transformations)
-endif()
+ inference_engine_transformations
+ inference_engine_lp_transformations)
set(CLDNN_TOP_FOLDER "${IE_MAIN_SOURCE_DIR}/thirdparty/clDNN")
target_include_directories(${TARGET_NAME} PRIVATE
#include "cldnn_executable_network.h"
#include "cldnn_custom_layer.h"
-#ifndef USE_CNNNETWORK_LPT
-# include <low_precision/transformer.hpp>
-# include <low_precision/mat_mul.hpp>
-#endif
+#include <low_precision/transformer.hpp>
+#include <low_precision/mat_mul.hpp>
#ifdef __linux__
# include <dlfcn.h>
// Disable shape inference (WA for generic operations)
::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
-#ifndef USE_CNNNETWORK_LPT
bool enableInt8;
-#endif
-
{
// Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
ngraph::pass::Manager manager;
manager.set_callback(transformations_callback);
manager.run_passes(nGraphFunc);
-#ifndef USE_CNNNETWORK_LPT
enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc);
if (enableInt8) {
const auto fp16_callback = [&baselineIsFP16](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
conversion_manager.set_callback(fp16_callback);
conversion_manager.run_passes(nGraphFunc);
}
-#endif
}
-#ifndef USE_CNNNETWORK_LPT
using namespace ngraph::pass::low_precision;
if (enableInt8) {
auto params = LayerTransformation::Params(
transformer.transform(nGraphFunc);
}
-#endif
{
ngraph::pass::Manager manager = ngraph::pass::Manager();
#include <sys/stat.h>
#include <exec_graph_info.hpp>
-#ifdef USE_CNNNETWORK_LPT
-#include "low_precision_transformations/transformer.hpp"
-#include "low_precision_transformations/fully_connected.hpp"
-#include "low_precision_transformations/gemm.hpp"
-#endif
-
#include <iostream>
#include <iomanip>
#include "cldnn_common_utils.h"
}
}
-#ifdef USE_CNNNETWORK_LPT
- bool allFQareSupported = true;
- if (config.enableInt8) {
- auto it = details::CNNNetworkIterator(&network);
- auto end = details::CNNNetworkIterator();
- while (it != end) {
- auto& layer = *it;
- if (layer->precision == Precision::FP16) {
- baselineIsFP16 = true;
- }
-
- if (CaselessEq<std::string>()(layer->type, "FakeQuantize")) {
- fqFound = true;
- auto levels = layer->GetParamAsUInt("levels");
- if (levels != 255 && levels != 256) {
- allFQareSupported = false;
- }
- }
- it++;
- }
- }
-
- if (config.enableInt8) {
- auto params = LayerTransformation::Params(true, // updatePrecisions
- true, // quantizeOutputs
- true, // weightsToConst
- LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
- LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
- true, // roundQuantizedValues
- true, // updateBiases
- true, // supportAsymmetricQuantization
- {Precision::U8, Precision::I8}, // Precision on activations
- {Precision::I8}); // Precision on weights
-
- auto transforms = LowPrecisionTransformer::getAllTransformations(params)
- .add<FullyConnectedTransformation>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false), "FullyConnected")
- .add<GemmTransformation>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false), "GEMM");
-
- // [WA part1] Convert quantized FP16 model to FP32 to avoid possible overflow and mixed precision errors
- if (fqFound && allFQareSupported) {
- NetPass::ConvertPrecision(network, Precision::FP16, Precision::FP32);
- }
-
- LowPrecisionTransformer transformer(transforms);
- transformer.transform(network);
- }
-#endif
-
// [WA part2] Try to find non-quantized layers and convert them back to FP16
if (config.enableInt8) {
if (fqFound && baselineIsFP16 && config.enable_fp16_for_quantized_models) {
+++ /dev/null
-# Copyright (C) 2018-2019 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-
-set (TARGET_NAME "inference_engine_lp_transformations_legacy")
-
-set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
-
-file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
-file(GLOB_RECURSE PUBLIC_HEADERS ${PUBLIC_HEADERS_DIR}/low_precision_transformations/*.hpp)
-
-# Create named folders for the sources within the .vcproj
-# Empty name lists them directly under the .vcproj
-
-source_group("src" FILES ${LIBRARY_SRC})
-source_group("include" FILES ${PUBLIC_HEADERS})
-
-# Create shared library
-
-add_library(${TARGET_NAME} SHARED
- ${LIBRARY_SRC}
- ${PUBLIC_HEADERS})
-
-ie_faster_build(${TARGET_NAME}
- UNITY
- PCH PRIVATE "src/precomp.hpp"
-)
-
-ie_add_vs_version_file(NAME ${TARGET_NAME}
- FILEDESCRIPTION "Inference Engine LP legacy transformations library")
-
-target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_API
- PUBLIC USE_CNNNETWORK_LPT)
-
-target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_legacy
- PRIVATE openvino::itt)
-
-target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR}
- $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
-
-add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
-
-# LTO
-
-set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
-
-# developer package
-
-ie_developer_export_targets(${TARGET_NAME})
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <map>
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include <algorithm>
-
-#include <ie_icnn_network.hpp>
-#include <cpp/ie_cnn_network.h>
-
-#include "low_precision_transformations/layer_transformation.hpp"
-#include "low_precision_transformations/transformation_context.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(ActivationTransformation) : public LayerTransformation {
-public:
- ActivationTransformation(const Params& params) : LayerTransformation(params) {}
- ~ActivationTransformation() override {};
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <vector>
-#include <details/ie_exception.hpp>
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class DequantizationDetails {
-public:
- DequantizationDetails(
- const std::vector<float>& scales,
- const std::vector<float>& shifts,
- const size_t channelsCount) :
- scales(scales), shifts(shifts), channelsCount(checkChannelsCount(channelsCount)) {}
-
- DequantizationDetails(
- const std::vector<float>& scales,
- const std::vector<float>& shifts) :
- scales(scales), shifts(shifts), channelsCount(checkChannelsCount(shifts.size())) {}
-
- size_t checkChannelsCount(const size_t channelsCount) {
- if ((scales.size() != shifts.size()) || (shifts.size() != channelsCount)) {
- THROW_IE_EXCEPTION << "channels count is not correct";
- }
- return channelsCount;
- }
-
- bool isPerTensor() const {
- return isPerTensor(scales, shifts);
- }
-
- static bool isPerTensor(const std::vector<float>& scales, const std::vector<float>& shifts) {
- if ((scales.size() == 0) || (shifts.size() == 0)) {
- THROW_IE_EXCEPTION << "scale or shift values count is not correct";
- }
- return
- std::all_of(scales.begin(), scales.end(), [&](const float value) { return value == scales[0]; }) &&
- std::all_of(shifts.begin(), shifts.end(), [&](const float value) { return value == shifts[0]; });
- }
-
- const std::vector<float> scales;
- const std::vector<float> shifts;
- const size_t channelsCount;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <vector>
-#include <cstddef>
-
-namespace InferenceEngine {
-namespace details {
-
-class DequantizationInfo {
-public:
- DequantizationInfo(
- const size_t levels,
- const std::vector<float>& outputLowValues,
- const std::vector<float>& outputHighValues);
-
- size_t outputChannels() const;
-
- const size_t levels;
- const std::vector<float> outputLowValues;
- const std::vector<float> outputHighValues;
-};
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "details/ie_exception.hpp"
-#include <string>
-#include <legacy/ie_layers.h>
-
-/**
-* @def THROW_IE_EXCEPTION_LPT
-* @brief A macro used to throw the exception with a notable description for low precision transformations
-*/
-#define THROW_IE_LPT_EXCEPTION(layer) throw InferenceEngine::details::InferenceEngineLptException(__FILE__, __LINE__, layer)
-
-namespace InferenceEngine {
-namespace details {
-
-class INFERENCE_ENGINE_API_CLASS(InferenceEngineLptException) : public InferenceEngineException {
-public:
- InferenceEngineLptException(const std::string& filename, const int line, const CNNLayer& layer) : InferenceEngineException(filename, line) {
- *this << "Exception during low precision transformation for " << layer.type << " layer '" << layer.name << "'. ";
- }
-};
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-#include <vector>
-#include <algorithm>
-#include <ie_common.h>
-
-#include "low_precision_transformations/network_helper.hpp"
-#include "low_precision_transformations/layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(ConcatTransformation) : public LayerTransformation {
-public:
- ConcatTransformation(const Params& params) : LayerTransformation(params) {}
- ~ConcatTransformation() override {};
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-
-protected:
- void addDequantizationLayers(
- TransformationContext& context,
- Subgraph& subgraph,
- std::function<void(
- const CNNLayer& layer,
- const std::string& originalLayerName,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts)> getLayerDequantizationCallback) const;
-
-private:
- size_t getMinQuantizationLevels(
- const DataPrecision& dataPrecision,
- const float maxOutputInterval,
- const std::vector<QuantizationDetails>& quantizationLayersDetails,
- const float outputLowValue,
- const float outputHighValue) const;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-#include <vector>
-#include <algorithm>
-#include <ie_common.h>
-#include "low_precision_transformations/concat.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(ConcatMultiChannelsTransformation) : public ConcatTransformation {
-public:
- ConcatMultiChannelsTransformation(const Params& params) : ConcatTransformation(params) {}
- ~ConcatMultiChannelsTransformation() override {};
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-
-private:
- static void fillDequantization(
- const CNNLayer& layer,
- const std::unordered_map<std::string, std::vector<float>>& dequantizationScalesLayers,
- const std::unordered_map<std::string, std::vector<float>>& dequantizationShiftsLayers,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts);
-
- static void fillQuantization(const CNNLayer& layer, std::vector<CNNLayerPtr>& fakeQuantizes);
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ie_common.h>
-#include <algorithm>
-#include "low_precision_transformations/layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(ConstTransformation) : public LayerTransformation {
-private:
-public:
- ConstTransformation(const Params& params) : LayerTransformation(params) {}
- ~ConstTransformation() override {};
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <vector>
-#include "low_precision_transformations/weightable_layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(ConvolutionTransformation) : public WeightableLayerTransformation {
-public:
- ConvolutionTransformation(const Params& params) : WeightableLayerTransformation(params) {}
- ~ConvolutionTransformation() override {};
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-
-private:
- void calculateDequantizationForAsymmetric(
- const CNNLayer& convolution,
- const std::vector<float>& originalDataDequantizationScales,
- const std::vector<float>& originalDataDequantizationShifts,
- const std::vector<float>& dataZeroPoints,
- const std::vector<float>& originalWeightsDequantizationScales,
- const std::vector<float>& originalWeightsDequantizationShifts,
- const std::vector<float>& weightsZeroPoints,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-
-#include <legacy/ie_layers.h>
-#include "low_precision_transformations/transformation_context.hpp"
-#include "low_precision_transformations/transparent_base_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(DepthToSpaceTransformation) : public TransparentBaseTransformation {
-public:
- DepthToSpaceTransformation(const Params& params) : TransparentBaseTransformation(params) {}
- ~DepthToSpaceTransformation() override {}
- void transform(TransformationContext& context, CNNLayer& layer) const override;
- bool isPrecisionPreserved(const CNNLayer& layer) const noexcept override;
- bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-
-#include "low_precision_transformations/layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(EltwiseTransformation) : public LayerTransformation {
-public:
- EltwiseTransformation(const Params& params) : LayerTransformation(params) {}
- ~EltwiseTransformation() override {}
- bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-
- bool isBroadcastByChannels(const CNNLayer& layer) const;
-
- static bool isSupported(const TensorDesc& tensorDesc1, const TensorDesc& tensorDesc2) noexcept;
- static bool isBroadcasted(const TensorDesc& tensorDesc) noexcept;
-
-private:
- static int getNotEmpty(const CNNLayer& eltwise);
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <vector>
-#include <ie_common.h>
-#include <algorithm>
-#include "low_precision_transformations/layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(FakeQuantizeTransformation) : public LayerTransformation {
-public:
- FakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {}
- ~FakeQuantizeTransformation() override {};
- void transform(TransformationContext& context, CNNLayer& layer) const override;
- void setWeightsToConst(const bool weightsToConst);
- bool isPrecisionPreserved(const CNNLayer& layer) const noexcept override;
-
-protected:
- void fuseScaleShift(TransformationContext& context, CNNLayerPtr fakeQuantizeLayer, CNNLayerPtr scaleShift) const;
-
- static Blob::Ptr reshapeWeightsIntervalConst(
- CNNLayer& constLayer,
- const std::vector<size_t>& dims,
- const Layout layout);
-
- static void reshapeFakeQuantize(
- CNNLayer& fakeQuantizeLayer,
- const std::vector<size_t>& dims,
- const Layout layout);
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <vector>
-#include <ie_common.h>
-#include <algorithm>
-#include "low_precision_transformations/weightable_layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(FullyConnectedTransformation) : public WeightableLayerTransformation {
-public:
- FullyConnectedTransformation(const Params& params) : WeightableLayerTransformation(params) {}
- ~FullyConnectedTransformation() override {};
- bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-
-private:
- void calculateDequantizationForSymmetric(
- const CNNLayer& fullyConnected,
- const std::vector<float>& originalWeightsDequantizationScales,
- const std::vector<float>& originalWeightsDequantizationShifts,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts,
- std::vector<float>& biasesShifts) const;
-
- void calculateDequantizationForAsymmetric(
- const CNNLayer& fullyConnected,
- const std::vector<float>& dataZeroPoints,
- const std::vector<float>& originalWeightsDequantizationScales,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <vector>
-#include <ie_common.h>
-#include <algorithm>
-#include "low_precision_transformations/fake_quantize.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(FuseFakeQuantizeAndScaleShiftTransformation) : public FakeQuantizeTransformation {
-public:
- FuseFakeQuantizeAndScaleShiftTransformation(const Params& params) : FakeQuantizeTransformation(params) {}
- ~FuseFakeQuantizeAndScaleShiftTransformation() override {};
-
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <vector>
-#include <ie_common.h>
-#include <algorithm>
-#include "low_precision_transformations/layer_transformation.hpp"
-#include "low_precision_transformations/fully_connected.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(GemmTransformation) : public FullyConnectedTransformation {
-public:
- GemmTransformation(const LayerTransformation::Params& params) : FullyConnectedTransformation(params) {}
- ~GemmTransformation() override {};
- bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-
- bool isQuantized(const CNNLayer& layer) const noexcept override;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include <legacy/ie_layers.h>
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-/**
- * @brief low precision transformation component interface.
- */
-class INFERENCE_ENGINE_API_CLASS(ILayerTransformationsManager) {
-public:
- virtual bool isQuantized(const CNNLayer& layer) const noexcept = 0;
- virtual bool isPrecisionPreserved(const CNNLayer& layer) const noexcept = 0;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "ie_precision.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-/**
- * @brief low precision transformation component interface.
- */
-class INFERENCE_ENGINE_API_CLASS(IParamsManager) {
-public:
- virtual std::vector<Precision> getPrecisionsOnActivations(const std::string& layerName) const noexcept = 0;
-};
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <limits>
-#include <list>
-#include <memory>
-#include <vector>
-
-#include <details/ie_exception.hpp>
-
-#include "iparams_manager.hpp"
-#include "ilayer_transformations_manager.hpp"
-#include "transformation_context.hpp"
-#include "quantization_details.hpp"
-
-/*****************************************************
- * Debug capability
- * - ORIGINAL_MODEL_PATH : Specify with existing folder name
- * to serialize original model into it (XML & BIN extensions were added)
- * - TRANSFORMED_MODEL_PATH : Specify with existing folder name
- * to serialize original model into it (XML & BIN extensions were added)
- * - LPT_PRINT_DEQUANTIZATION_INFO : Define it to enable
- * dequantization layers printing
- *
- *****************************************************/
-// #define LPT_ORIGINAL_MODEL_PATH "C:\\Projects\\temp\\original"
-// #define LPT_TRANSFORMED_MODEL_PATH "C:\\Projects\\temp\\transformed"
-// #define LPT_PRINT_DEQUANTIZATION_INFO
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(DataPrecision) {
-public:
- DataPrecision() : precision(Precision::UNSPECIFIED), min(0.f), max(0.f), hasZeroPoint(false) {}
-
- DataPrecision(const Precision precision, const float min, const float max, const bool hasZeroPoint) :
- precision(precision),
- min(min),
- max(max),
- hasZeroPoint(hasZeroPoint) {}
-
- static float getMinValue(const Precision precision, const size_t levels) {
- switch (precision) {
- case Precision::I8: {
- if (levels == 255) {
- return static_cast<float>(std::numeric_limits<signed char>::lowest()) + 1.f;
- } else if (levels == 256) {
- return static_cast<float>(std::numeric_limits<signed char>::lowest());
- } else {
- THROW_IE_EXCEPTION << "unexpected levels " << levels << " for precision " << precision;
- }
- }
- case Precision::U8: {
- return static_cast<float>(std::numeric_limits<unsigned char>::lowest());
- }
- case Precision::FP16: {
- return -1.0e15f;
- }
- case Precision::FP32: {
- return std::numeric_limits<float>::lowest();
- }
- default: {
- THROW_IE_EXCEPTION << "unexpected precision " << precision;
- }
- }
- }
-
- static float getMaxValue(const Precision precision) {
- switch (precision) {
- case Precision::I8: {
- return static_cast<float>(std::numeric_limits<signed char>::max());
- }
- case Precision::U8: {
- return static_cast<float>(std::numeric_limits<unsigned char>::max());
- }
- case Precision::FP16: {
- return 1.0e15f;
- }
- case Precision::FP32: {
- return std::numeric_limits<float>::max();
- }
- default: {
- THROW_IE_EXCEPTION << "unexpected precision " << precision;
- }
- }
- }
-
- static bool hasNegativeValues(const std::vector<float>& values) {
- for (const float value : values) {
- if (value < 0.0) {
- return true;
- }
- }
- return false;
- }
-
- Precision precision;
- float min;
- float max;
- bool hasZeroPoint;
-
- static Precision getPrecision(const std::vector<float>& outputLowValues, const std::vector<float>& outputHighValues) {
- return (hasNegativeValues(outputLowValues) || hasNegativeValues(outputHighValues)) ? Precision::I8 : Precision::U8;
- }
-
- static Precision getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) {
- return signedInterval ? Precision::I8 : Precision::U8;
- }
-
- static float getMin(const size_t quantizationLevels, const bool signedInterval) {
- if (quantizationLevels == 255) {
- return signedInterval ? -127.0 : 0.0;
- } else if (quantizationLevels == 256) {
- return signedInterval ? -128.0 : 0.0;
- } else {
- // THROW_IE_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
- // FIXME: not completed
- return signedInterval ? -128.0 : 0.0;
- }
- }
-
- static float getMax(const size_t quantizationLevels, const bool signedInterval) {
- if ((quantizationLevels == 255) || (quantizationLevels == 256)) {
- return signedInterval ? 127.0 : 255.0;
- } else {
- // THROW_IE_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
- // FIXME: not completed
- // return quantizationLevels - 1.0;
- return signedInterval ? 127.0 : 255.0;
- }
- }
-};
-
-inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) {
- return
- (value1.precision == value2.precision) &&
- (value1.min == value1.min) &&
- (value1.max == value1.max);
-}
-
-inline bool operator!=(const DataPrecision& value1, const DataPrecision& value2) {
- return !(value1 == value2);
-}
-
-inline std::ostream &operator << (std::ostream &os, const DataPrecision& value) {
- os << value.precision << ", min: " << value.min << ", max: " << value.max;
- return os;
-}
-
-class INFERENCE_ENGINE_API_CLASS(LayerTransformation) {
-public:
- enum QuantizedTensorAlignment {
- None,
- UpdateIntervals,
- UpdateLevel,
- // UpdateIntervals & UpdateLevel & ...
- Mixed
- };
-
- class Params {
- public:
- Params(
- const bool updatePrecisions = true,
- const bool quantizeOutputs = false,
- const bool weightsToConst = true,
- const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations = QuantizedTensorAlignment::UpdateLevel,
- const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights = QuantizedTensorAlignment::None,
- const bool roundQuantizedValues = true,
- const bool updateBiases = true,
- bool supportAsymmetricQuantization = true,
- std::vector<Precision> precisionsOnActivations = { Precision::U8, Precision::I8 },
- std::vector<Precision> precisionsOnWeights = { Precision::I8 }) :
- updatePrecisions(updatePrecisions),
- quantizeOutputs(quantizeOutputs),
- weightsToConst(weightsToConst),
- quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
- quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
- roundQuantizedValues(roundQuantizedValues),
- updateBiases(updateBiases),
- supportAsymmetricQuantization(supportAsymmetricQuantization),
- precisionsOnActivations(precisionsOnActivations),
- precisionsOnWeights(precisionsOnWeights) {
- if (precisionsOnActivations.size() == 0ul) {
- THROW_IE_EXCEPTION << "precisions on activations are not specisifed";
- }
-
- if (precisionsOnWeights.size() == 0ul) {
- THROW_IE_EXCEPTION << "precisions on weights are not specisifed";
- }
- }
-
- Params& setUpdatePrecisions(const bool updatePrecisions) {
- this->updatePrecisions = updatePrecisions;
- return *this;
- }
-
- Params& setQuantizeOutputs(const bool quantizeOutputs) {
- this->quantizeOutputs = quantizeOutputs;
- return *this;
- }
-
- Params& setWeightsToConst(const bool weightsToConst) {
- this->weightsToConst = weightsToConst;
- return *this;
- }
-
- Params& setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) {
- this->quantizedTensorAlignmentOnActivations = quantizedTensorAlignmentOnActivations;
- return *this;
- }
-
- Params& setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) {
- this->quantizedTensorAlignmentOnWeights = quantizedTensorAlignmentOnWeights;
- return *this;
- }
-
- Params& setRoundQuantizedValues(const bool roundQuantizedValues) {
- this->roundQuantizedValues = roundQuantizedValues;
- return *this;
- }
-
- Params& setUpdateBiases(const bool updateBiases) {
- this->updateBiases = updateBiases;
- return *this;
- }
-
- Params& setSupportAsymmetricQuantization(const bool supportAsymmetricQuantization) {
- this->supportAsymmetricQuantization = supportAsymmetricQuantization;
- return *this;
- }
-
- Params& setPrecisionsOnActivations(const std::vector<Precision>& precisionsOnActivations) {
- this->precisionsOnActivations = precisionsOnActivations;
- return *this;
- }
-
- Params& setPrecisionsOnWeights(const std::vector<Precision>& precisionsOnWeights) {
- this->precisionsOnWeights = precisionsOnWeights;
- return *this;
- }
-
- bool updatePrecisions;
- bool quantizeOutputs;
- bool weightsToConst;
- QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
- QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
- bool roundQuantizedValues;
- bool updateBiases;
- bool supportAsymmetricQuantization;
- std::vector<Precision> precisionsOnActivations;
- std::vector<Precision> precisionsOnWeights;
- };
-
- class PrecisionDetails {
- public:
- PrecisionDetails(const Precision& precision, const bool hasNegativeOutput, const bool hasZeroPoint) :
- precision(precision),
- hasNegativeOutput(hasNegativeOutput),
- hasZeroPoint(hasZeroPoint) {}
-
- const Precision precision;
- const bool hasNegativeOutput;
- const bool hasZeroPoint;
- };
-
- LayerTransformation(const Params& params);
- virtual ~LayerTransformation() = default;
- virtual void transform(TransformationContext& context, CNNLayer& layer) const = 0;
-
- void setParamsManager(IParamsManager* paramsManager) noexcept;
- void setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept;
-
- void setUpdatePrecisions(const bool updatePrecisions);
- void setQuantizeOutputs(const bool quantizeOutputs);
- void setWeightsToConst(const bool weightsToConst);
- void setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations);
- void setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights);
-
- void setQuantizationIntervalAsymmetryThreshold(const float value);
- void setZeroThreshold(const float value);
- void setDequantizationShiftToZeroRatioTreshold(const float value);
- void setMinQuantizationLevels(const size_t levels);
-
- const std::vector<Precision>& getPrecisionsOnActivations() const;
- const std::vector<Precision>& getPrecisionsOnWeights() const;
-
- virtual bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const;
-
- static Precision getPrecisionBeforeParentDequantizationScaleShift(const CNNLayer& layer);
- static Precision getPrecisionParent(const CNNLayer& layer);
- PrecisionDetails getPrecisionDetails(const QuantizationDetails& quantizationDetails) const;
-
- virtual bool isQuantized(const CNNLayer& layer) const noexcept;
- virtual bool isPrecisionPreserved(const CNNLayer& layer) const noexcept;
-
- DataPrecision getDataPrecision(
- const CNNLayer& layer,
- const QuantizationDetails& quantizationDetails,
- const bool onWeights,
- const bool supportAsymmetricQuantization) const;
-
- void fillAvailablePrecisions(const CNNLayer& layer, std::vector<Precision>& availablePrecisions) const;
-
- void fillFromDequantizationLayer(
- const CNNLayer& dequantizationLayer,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const;
-
-protected:
-#ifdef LPT_PRINT_DEQUANTIZATION_INFO
- static void printDequantizationInfo(const CNNLayer& layer);
- static void printDequantizationInfo(const DataPrecision& dataPrecision);
- static void printDequantizationValues(
- const std::vector<float>& dequantizationScales,
- const std::vector<float>& dequantizationShifts);
-#endif
- void addDequantizationLayer(
- TransformationContext& context,
- const CNNLayer& layer,
- const std::vector<float>& dequantizationScales,
- const std::vector<float>& dequantizationShifts) const;
-
- void fillFromQuantizationDetails(
- const QuantizationDetails& quantizationDetails,
- const DataPrecision& dataPrecision,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const;
-
- void checkAndUpdateDequantizationShiftWithZero(
- const QuantizationDetails& quantizationDetails,
- std::vector<float>& dequantizationShifts) const;
-
- bool updatePrecisions;
- bool quantizeOutputs;
- bool weightsToConst;
- QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
- QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
- bool roundQuantizedValues;
- bool updateBiases;
- bool supportAsymmetricQuantization;
- std::vector<Precision> precisionsOnActivations;
- std::vector<Precision> precisionsOnWeights;
-
- // absolute value, used to determine quantization interval asymmetry
- float quantizationIntervalAsymmetryThreshold;
- // absolute value, used to determine zero
- float zeroThreshold;
- // relative value, used to replace quantization shift to zero
- float dequantizationShiftToZeroRatioTreshold;
- size_t minQuantizationLevels;
-
- static const char lastLayerPostfix[];
- IParamsManager* paramsManager;
- ILayerTransformationsManager* layerTransformationsManager;
-};
-
-inline std::ostream &operator << (std::ostream &os, const LayerTransformation::QuantizedTensorAlignment& value) {
- switch (value) {
- case LayerTransformation::QuantizedTensorAlignment::None: {
- os << "None";
- break;
- }
- case LayerTransformation::QuantizedTensorAlignment::UpdateIntervals: {
- os << "UpdateIntervals";
- break;
- }
- case LayerTransformation::QuantizedTensorAlignment::UpdateLevel: {
- os << "UpdateLevel";
- break;
- }
- case LayerTransformation::QuantizedTensorAlignment::Mixed: {
- os << "Mixed";
- break;
- }
- default: {
- os << static_cast<int>(value);
- break;
- }
- }
- return os;
-}
-
-typedef std::shared_ptr<LayerTransformation> LayerTransformationPtr;
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <vector>
-#include "layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(MvnTransformation) : public LayerTransformation {
-public:
- MvnTransformation(const Params& params) : LayerTransformation(params) {}
- ~MvnTransformation() override {};
- void transform(TransformationContext& context, CNNLayer& layer) const override;
- bool isPrecisionPreserved(const CNNLayer& layer) const noexcept override;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <cmath>
-#include <memory>
-#include <string>
-#include <vector>
-#include <unordered_set>
-
-#include <legacy/ie_layers.h>
-#include <legacy/cnn_network_impl.hpp>
-
-#include "low_precision_transformations/common/dequantization_details.hpp"
-#include "low_precision_transformations/transformation_context.hpp"
-#include "low_precision_transformations/quantization_details.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(Subgraph) {
-public:
- bool fillSubgraphForConcat(const CNNLayerPtr& concat, std::unordered_set<std::string>& handledLayers);
- bool empty() const;
-
- std::vector<CNNLayerPtr> quantizationLayers;
- std::vector<CNNLayerPtr> concatLayers;
- std::unordered_map<std::string, CNNLayer*> layers;
-
-private:
- bool fillSubgraphForQuantization(const CNNLayerPtr& fakeQuantize, std::unordered_set<std::string>& handledLayers);
- bool fillSubgraphForIntermediate(const CNNLayerPtr& intermediate, std::unordered_set<std::string>& handledLayers);
- bool fill(const CNNLayerPtr& concat, std::unordered_set<std::string>& handledLayers);
-};
-
-/**
- * @brief CNNNetworkHelper class encapsulates manipulations with CNN Network.
- */
-class INFERENCE_ENGINE_API_CLASS(CNNNetworkHelper) {
-public:
- static Subgraph getSubgraph(const CNNLayer& concat);
-
- static CNNLayerPtr getLayer(const ICNNNetwork& network, const std::string& layerName);
-
- static Blob::Ptr makeNewBlobPtr(const TensorDesc& desc);
-
- static void updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, float value);
-
- static void updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, const std::vector<float>& values);
-
- static void updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, float value);
-
- static void updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, const std::vector<float>& values);
-
- static void updateBlobs(CNNLayer& layer, const std::string& blobName, const std::vector<float>& values);
-
- static CNNLayerPtr copyConstant(
- TransformationContext& context,
- const CNNLayer& quantizeLayer,
- const CNNLayerPtr& blobLayer,
- const size_t constLayerIndex);
-
- // return true if at least one child uses layer on weights
- static bool onWeights(const CNNLayer& layer);
-
- static bool onConstWeightsPath(const CNNLayer& quantize);
-
- static size_t getIndex(const CNNLayer& layer);
-
- static std::vector<CNNLayerPtr> transformFakeQuantizeToConst(
- TransformationContext& context,
- const CNNLayerPtr fakeQuantize,
- const Blob::Ptr weights,
- const std::string& constLayerName);
-
- static void setOutDataPrecision(const CNNLayer& layer, const Precision& precision);
-
- static void setOutDataPrecision(const std::vector<CNNLayerPtr>& layers, const Precision& precision);
-
- static void setOutDataPrecision(
- const CNNLayer& beginLayer,
- const size_t branchWithEndBeforeLayer,
- const CNNLayer& endBeforeLayer,
- const Precision& precision);
-
- static bool IsChild(
- const std::vector<CNNLayerPtr>& children,
- const std::unordered_set<std::string>& layerTypes,
- const std::unordered_set<std::string>& ignoreLayerTypes = {});
-
- static size_t getOutputChannelsCount(const CNNLayer& layer, bool isOnWeights = false);
-
- static std::vector<CNNLayerPtr> getLayers(const CNNLayer& parent, const CNNLayer& child);
-
- static Blob::Ptr getBlob(CNNLayerPtr layer, const std::string& blobName);
-
- static Blob::Ptr getBlob(const CNNLayer* layer, const std::string& blobName);
-
- static std::shared_ptr<float> getFloatData(const CNNLayerPtr& layer, const std::string& blobName);
-
- static std::shared_ptr<float> getFloatData(const Blob::Ptr& srcBlob);
-
- static bool isBlobPrecisionSupported(const Precision precision);
-
- static void fillBlobByFP32(Blob::Ptr& dstBlob, float value);
-
- static void fillBlobByFP32(Blob::Ptr& dstBlob, const float* srcData);
-
- static void fillBlobByFP32(const CNNLayerPtr& layer, const std::string& blobName, const float* srcData);
-
- static std::shared_ptr<float> convertFloatData(const float* srcData, const size_t dataSize, const Precision precision);
-
- static CNNLayerPtr getParent(
- const CNNLayer& layer,
- const size_t index = 0,
- const std::string& ignoreLayerType = "");
-
- static std::vector<CNNLayerPtr> getParents(
- const CNNLayer& layer,
- const std::string& exceptionLayerName = "");
-
- static std::vector<CNNLayerPtr> getParentsRecursivelyExceptTypes(
- const CNNLayer& layer,
- const std::unordered_set<std::string>& exceptionLayerTypes = {},
- const int portIndex = -1);
-
- static bool isLayoutSupported(const CNNLayer& layer);
-
- static size_t getInputChannelsCount(const CNNLayer& layer);
-
- static size_t getParamOutput(const CNNLayer& layer);
-
- static size_t getKernelSize(const CNNLayer& layer);
-
- static void renameLayer(ICNNNetwork& net, const std::string& currentName, const std::string& newName);
-
- static CNNLayerPtr addLayer(
- TransformationContext& context,
- const CNNLayerPtr parent,
- const CNNLayerPtr child,
- const CNNLayerPtr newLayer);
-
- static void replaceLayer(TransformationContext& context, const CNNLayerPtr source, const CNNLayerPtr target);
-
- // Add ScaleShift beween parent and child layers. Affected edges (output and input ports) are not specified.
- // As result ScaleShift will be added for all edges between parent and children.
- static std::vector<CNNLayerPtr> addScaleShiftBetween(
- TransformationContext& context,
- const CNNLayerPtr parent,
- const CNNLayerPtr child,
- const DequantizationDetails& dequantizationDetails,
- const std::string& name = "");
-
- static CNNLayerPtr addConstBetween(
- ICNNNetwork& net,
- const CNNLayerPtr layer1,
- const CNNLayerPtr layer2,
- const Blob::Ptr customBlob,
- const std::string& name);
-
- static void addLayerToCNNNetworkAfterData(
- DataPtr parentOutData,
- CNNLayer::Ptr layer,
- const std::string& nextLayerName,
- ICNNNetwork& net,
- const int childInsDataIndex = -1);
-
- IE_SUPPRESS_DEPRECATED_START
- static void fillInScaleShift(ScaleShiftLayer* layer, const size_t channels, const float* scales, const float* shifts);
- IE_SUPPRESS_DEPRECATED_END
-
- static std::vector<CNNLayerPtr> getChildren(const CNNLayer& layer, const std::string& exceptionLayerName = "");
-
- static std::vector<CNNLayerPtr> getChildrenRecursivelyExceptTypes(
- const CNNLayer& layer,
- const std::unordered_set<std::string>& exceptionLayerTypes = {});
-
- static void checkConstWithBlobs(const CNNLayerPtr layer);
-
- static void checkQuantizeOnWeights(const CNNLayerPtr layer);
-
- static void updateInput(details::CNNNetworkImpl* network, CNNLayerPtr& layer, DataPtr outData);
-
- static size_t disconnectLayers(
- CNNNetworkImpl* network,
- const CNNLayerPtr& parentLayer,
- const CNNLayerPtr& childLayer);
-
- static size_t getInputIndex(const CNNLayerPtr& childLayer, const CNNLayerPtr& parentLayer);
-
- static void removeLayer(ICNNNetwork& network, const CNNLayerPtr& layer);
-
- static bool isWeightsSupported(const CNNLayer& layer) noexcept;
-
- static Blob::Ptr getWeights(const CNNLayer& layer, const bool roundQuantizedValues);
-
- static Blob::Ptr getBiases(const CNNLayer& layer);
-
- static Blob::Ptr quantizeWeights(
- const CNNLayer& quantize,
- const bool roundValues,
- const Precision precision = Precision::UNSPECIFIED);
-
- static bool isQuantizedConstWeights(const CNNLayer& quantize);
-
- static int getConstParentBranchID(const CNNLayer& layer);
-
- static Precision getPrecisionParent(const CNNLayer& layer);
-
- static Precision getPrecisionParent(const CNNLayer& layer, const size_t parentIndex);
-
- static DataPtr getOutData(const CNNLayer& parentLayer, const CNNLayer& childLayer);
-
-private:
- // 1 - on weights
- // 0 - weightable layer was not found
- // -1 - on activations
- static int onWeightsInDepth(const CNNLayer& layer);
-
- static Precision getPrecisionParent(const CNNLayer& layer, const size_t parentIndex, const bool useParentIndex);
-
- static Blob::Ptr getQuantizeLayerBlob(const CNNLayer& quantize) {
- if (quantize.insData.size() < 1) {
- THROW_IE_EXCEPTION << "unexpected parents count for " << quantize.type << " layer " << quantize.name;
- }
-
- const DataPtr data = quantize.insData[0].lock();
- if (data == nullptr) {
- THROW_IE_EXCEPTION << "parent data is absent for " << quantize.type << " layer " << quantize.name;
- }
-
- IE_SUPPRESS_DEPRECATED_START
- const CNNLayerPtr blobLayer = getCreatorLayer(data).lock();
- if (blobLayer == nullptr) {
- THROW_IE_EXCEPTION << "parent layer is absent for " << quantize.type << " layer " << quantize.name;
- }
- IE_SUPPRESS_DEPRECATED_END
-
- checkConstWithBlobs(blobLayer);
-
- return blobLayer->blobs.begin()->second;
- }
-
- static void quantizeBlob(const CNNLayer& quantize, Blob::Ptr& targetBlob, bool roundValues);
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <legacy/ie_layers.h>
-#include "low_precision_transformations/layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-class INFERENCE_ENGINE_API_CLASS(NormalizeTransformation) : public LayerTransformation {
-public:
- NormalizeTransformation(const Params& params) : LayerTransformation(params) {}
- void transform(TransformationContext& context, CNNLayer& layer) const override;
- bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
- bool isPrecisionPreserved(const CNNLayer& layer) const noexcept override;
-};
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <legacy/ie_layers.h>
-#include "low_precision_transformations/transformation_context.hpp"
-#include "low_precision_transformations/layer_transformation.hpp"
-#include "low_precision_transformations/transparent_base_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(PermuteTransformation) : public TransparentBaseTransformation {
-public:
- PermuteTransformation(const Params& params) : TransparentBaseTransformation(params) {}
- ~PermuteTransformation() override {}
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <legacy/ie_layers.h>
-#include "low_precision_transformations/transformation_context.hpp"
-#include "low_precision_transformations/layer_transformation.hpp"
-#include "low_precision_transformations/transparent_base_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(PoolingTransformation) : public TransparentBaseTransformation {
-public:
- PoolingTransformation(const Params& params) : TransparentBaseTransformation(params) {}
- ~PoolingTransformation() override {}
- void transform(TransformationContext& context, CNNLayer& layer) const override;
- bool isPrecisionPreserved(const CNNLayer& layer) const noexcept override;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <legacy/ie_layers.h>
-#include "low_precision_transformations/transformation_context.hpp"
-#include "low_precision_transformations/layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-class INFERENCE_ENGINE_API_CLASS(PowerTransformation) : public LayerTransformation {
-public:
- PowerTransformation(const Params& params) : LayerTransformation(params) {}
- ~PowerTransformation() override {}
- void transform(TransformationContext& context, CNNLayer& layer) const override;
- bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
-};
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-#include <vector>
-
-#include <legacy/ie_layers.h>
-#include <cpp/ie_cnn_network.h>
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-/**
-* @brief Quantization layer details and basic operations on them.
-*/
-class INFERENCE_ENGINE_API_CLASS(QuantizationDetails) {
-public:
- QuantizationDetails();
- QuantizationDetails(const QuantizationDetails& quantizationDetails);
- QuantizationDetails(
- const size_t levels,
- const std::vector<float>& inputLowValues,
- const std::vector<float>& inputHighValues,
- const std::vector<float>& outputLowValues,
- const std::vector<float>& outputHighValues,
- const size_t inputIntervalsCount,
- const size_t outputIntervalsCount,
- const size_t outputChannelsCount);
-
- static bool outputLayoutIsSupported(const CNNLayer& quantize);
-
- static void getInputIntervals(
- const CNNLayer& quantize,
- std::vector<float>& inputLowValues,
- std::vector<float>& inputHighValues,
- size_t& inputIntervalsCount);
-
- static void getOutputIntervals(
- const CNNLayer& quantize,
- std::vector<float>& outputLowValues,
- std::vector<float>& outputHighValues,
- size_t& outputIntervalsCount);
-
- static QuantizationDetails getDetails(const CNNLayer& quantize);
- bool hasNegativeOutput() const;
- float maxOutput(const size_t channel) const;
- float maxInput(const size_t channel) const;
-
- float maxOutputHigh() const;
- float minOutputLow() const;
-
- float getInputLowValue(const size_t channel) const;
- float getInputHighValue(const size_t channel) const;
- float getOutputLowValue(const size_t channel) const;
- float getOutputHighValue(const size_t channel) const;
-
- static bool isSupportedLevel(const size_t level);
-
- const size_t levels;
- const std::vector<float> inputLowValues;
- const std::vector<float> inputHighValues;
- const std::vector<float> outputLowValues;
- const std::vector<float> outputHighValues;
- const size_t inputIntervalsCount;
- const size_t outputIntervalsCount;
- const size_t outputChannelsCount;
-
-private:
- QuantizationDetails &operator=(const QuantizationDetails & /*target*/) { return *this; }
- static void validate(const CNNLayerPtr& constantLayer);
- static std::vector<float> getBlobValue(const CNNLayerPtr& constantLayer);
-};
-
-inline std::ostream &operator << (std::ostream &os, const QuantizationDetails& value) {
- os << "levels: " << value.levels <<
- ", input 1/" << value.inputIntervalsCount << ": [" << value.getInputLowValue(0) << " : " << value.getInputHighValue(0) << "], " <<
- ", output 1/" << value.outputIntervalsCount << ": [" << value.getOutputLowValue(0) << " : " << value.getOutputHighValue(0) << "]";
- return os;
-}
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <vector>
-#include "low_precision_transformations/layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-class INFERENCE_ENGINE_API_CLASS(ResampleTransformation) : public LayerTransformation {
-public:
- ResampleTransformation(const Params& params) : LayerTransformation(params) {}
- ~ResampleTransformation() override {};
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-};
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <legacy/ie_layers.h>
-#include "low_precision_transformations/transformation_context.hpp"
-#include "low_precision_transformations/layer_transformation.hpp"
-#include "low_precision_transformations/transparent_base_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(ReshapeTransformation) : public TransparentBaseTransformation {
-public:
- ReshapeTransformation(const Params& params) : TransparentBaseTransformation(params) {}
- ~ReshapeTransformation() override {}
- void transform(TransformationContext& context, CNNLayer& layer) const override;
- bool isPrecisionPreserved(const CNNLayer& layer) const noexcept override;
-
-private:
- bool canTransformOriginal(const CNNLayer& layer) const;
- void transformOriginal(TransformationContext& context, CNNLayer& layer) const;
- bool canTransformConstPropagated(const CNNLayer& layer) const;
- void transformConstPropagated(TransformationContext& context, CNNLayer& layer) const;
- void quantize(TransformationContext& context, CNNLayer& layer) const;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <string>
-#include <unordered_set>
-#include <vector>
-#include "low_precision_transformations/weightable_layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(ScaleShiftToConvolutionTransformation) : public WeightableLayerTransformation {
-public:
- ScaleShiftToConvolutionTransformation(const Params& params);
- ~ScaleShiftToConvolutionTransformation() override {};
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-
- void setGroupSize(const size_t groupSize);
- size_t getGroupSize() const;
-
- void setIgnoreWithParents(const std::unordered_set<std::string>& ignoreWithParents);
- std::unordered_set<std::string> getIgnoreWithParents() const;
-
- bool isPrecisionPreserved(const CNNLayer& layer) const noexcept override;
- bool isQuantized(const CNNLayer& layer) const noexcept override;
-
-private:
- CNNLayerPtr transformToConvolution(TransformationContext& context, const CNNLayer& layer, const size_t group) const;
-
- size_t groupSize;
- std::unordered_set<std::string> ignoreWithParents;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <legacy/ie_layers.h>
-#include "low_precision_transformations/transformation_context.hpp"
-#include "low_precision_transformations/layer_transformation.hpp"
-#include "low_precision_transformations/transparent_base_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(SqueezeTransformation) : public TransparentBaseTransformation {
-public:
- SqueezeTransformation(const Params& params) : TransparentBaseTransformation(params) {}
- ~SqueezeTransformation() override {}
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-#include <vector>
-#include <unordered_map>
-#include <unordered_set>
-
-#include <ie_icnn_network.hpp>
-#include <cpp/ie_cnn_network.h>
-#include "low_precision_transformations/quantization_details.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class TransformationContext {
-public:
- explicit TransformationContext(ICNNNetwork& network);
-
- void removeLayer(const CNNLayer& layer);
- ICNNNetwork& network;
- std::unordered_set<std::string> quantizedFakeQuantizeNames;
- std::unordered_set<std::string> dequantizationLayersNames;
-
- const std::vector<CNNLayerPtr>& getLayers() {
- return layers;
- }
-
- inline Precision getOriginalLayerPrecision(const std::string& layer_name, const std::string& data_name = "") {
- const auto& data_map = _original_precisions_map.find(layer_name);
- if (data_map == _original_precisions_map.end())
- return Precision::UNSPECIFIED;
- if (data_name.empty() && data_map->second.size() > 0)
- return data_map->second.begin()->second;
- if (data_map->second.find(data_name) == data_map->second.end())
- return Precision::UNSPECIFIED;
- return data_map->second[data_name];
- }
-
-private:
- std::vector<CNNLayerPtr> layers;
- std::unordered_map<std::string, std::unordered_map<std::string, Precision>> _original_precisions_map;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "layer_transformation.hpp"
-#include "iparams_manager.hpp"
-#include "ilayer_transformations_manager.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(LowPrecisionTransformations) {
-public:
- LowPrecisionTransformations(
- const std::map<std::string, LayerTransformationPtr>& branchSpecificTransformations,
- const std::map<std::string, LayerTransformationPtr>& transformations,
- const std::map<std::string, LayerTransformationPtr>& cleanupTransformations);
-
- void setUpdatePrecisions(const bool updatePrecisions);
- void setQuantizeOutputs(const bool quantizeOutputs);
- void setWeightsToConst(const bool weightsToConst);
- void setQuantizedTensorAlignmentOnActivations(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations);
- void setQuantizedTensorAlignmentOnWeights(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights);
- LowPrecisionTransformations& remove(const std::string& layerType);
- LowPrecisionTransformations& removeBranchSpecificTransformations(const std::string& layerType);
- LowPrecisionTransformations& removeTransformations(const std::string& layerType);
- LowPrecisionTransformations& removeCleanupTransformations(const std::string& layerType);
-
- template <class T>
- LowPrecisionTransformations& addBranchSpecific(const LayerTransformation::Params& params, const std::string& layerType) {
- std::string type = layerType;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- const auto it = branchSpecificTransformations.find(type);
- if (it != branchSpecificTransformations.end()) {
- branchSpecificTransformations.erase(it);
- }
-
- branchSpecificTransformations.emplace(type, std::make_shared<T>(params));
- return *this;
- }
-
- template <class T>
- LowPrecisionTransformations& add(const LayerTransformation::Params& params, const std::string& layerType) {
- std::string type = layerType;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- const auto it = transformations.find(type);
- if (it != transformations.end()) {
- transformations.erase(it);
- }
-
- transformations.emplace(type, std::make_shared<T>(params));
- return *this;
- }
-
- template <class T>
- LowPrecisionTransformations& addCleanup(const LayerTransformation::Params& params, const std::string& layerType) {
- std::string type = layerType;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- const auto it = cleanupTransformations.find(type);
- if (it != cleanupTransformations.end()) {
- cleanupTransformations.erase(it);
- }
-
- cleanupTransformations.emplace(type, std::make_shared<T>(params));
- return *this;
- }
-
- LayerTransformationPtr find(const std::string& layerType) const;
-
- void setParamsManager(IParamsManager* paramsManager) noexcept;
- void setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept;
-
- std::map<std::string, LayerTransformationPtr> branchSpecificTransformations;
- std::map<std::string, LayerTransformationPtr> transformations;
- std::map<std::string, LayerTransformationPtr> cleanupTransformations;
-
-private:
- static void setParamsManager(IParamsManager* paramsManager, std::map<std::string, LayerTransformationPtr>& transformations) noexcept;
- static void setLayerTransformationsManager(
- ILayerTransformationsManager* layerTransformationsManager,
- std::map<std::string, LayerTransformationPtr>& transformations) noexcept;
-};
-
-/**
- * @brief low precision transformation component.
- */
-class INFERENCE_ENGINE_API_CLASS(LowPrecisionTransformer) : public IParamsManager, ILayerTransformationsManager {
-public:
- static LowPrecisionTransformations getAllTransformations(const LayerTransformation::Params& params = LayerTransformation::Params());
-
- LowPrecisionTransformer();
- LowPrecisionTransformer(const LowPrecisionTransformations& transformations);
- void transform(ICNNNetwork& network);
- void rename(ICNNNetwork& network) const;
-
- // IParamsManager interface implementation
- std::vector<Precision> getPrecisionsOnActivations(const std::string& layerName) const noexcept override;
-
- // ILayerTransformationsManager interface implementation
- bool isQuantized(const CNNLayer& layer) const noexcept override;
- bool isPrecisionPreserved(const CNNLayer& layer) const noexcept override;
-
-private:
- static void renameLayersByType(const std::vector<CNNLayerPtr>& layers, const std::string& type);
- LowPrecisionTransformations transformations;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <map>
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include <algorithm>
-
-#include <ie_icnn_network.hpp>
-#include <cpp/ie_cnn_network.h>
-
-#include "low_precision_transformations/layer_transformation.hpp"
-#include "low_precision_transformations/transformation_context.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(TransparentBaseTransformation) : public LayerTransformation {
-public:
- TransparentBaseTransformation(const Params& params) : LayerTransformation(params) {}
- ~TransparentBaseTransformation() override {};
- void transform(TransformationContext& context, CNNLayer& layer) const override;
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <memory>
-#include <vector>
-
-#include "low_precision_transformations/transformation_context.hpp"
-#include "low_precision_transformations/layer_transformation.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-class PrecisionsInfo {
-public:
- PrecisionsInfo(const Precision original, const Precision low) : original(original), low(low) {}
- const Precision original;
- const Precision low;
-};
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(WeightableLayerTransformation) : public LayerTransformation{
-public:
- WeightableLayerTransformation(const Params& params) : LayerTransformation(params) {}
- bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
- bool isPrecisionPreserved(const CNNLayer& layer) const noexcept override;
- bool isQuantized(const CNNLayer& layer) const noexcept override;
-
-protected:
- void updateLayerBiases(
- TransformationContext& context,
- const CNNLayer& convolution,
- const bool biasesDimsAsOutput,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts,
- std::vector<float>& biasesShifts) const;
-
- void updateLayerBiasesFcSpecific(
- TransformationContext& context,
- const CNNLayer& convolution,
- const bool biasesDimsAsOutput,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts,
- std::vector<float>& biasesShifts) const;
-
- void updateWeights(
- TransformationContext& context,
- const CNNLayerPtr fakeQuantize,
- std::vector<float>& outputLowValues,
- std::vector<float>& outputHighValues) const;
-
- void updateToSupportAsymmetricQuantization(
- TransformationContext& context,
- const CNNLayer& layer,
- const PrecisionsInfo& dataPrecisionsInfo,
- std::vector<float>& dataShifts,
- const PrecisionsInfo& weightsPrecisionsInfo,
- std::vector<float>& weightsShifts) const;
-
- void createAsymmetric(
- TransformationContext& context,
- const CNNLayer& parent,
- const CNNLayer& child,
- const PrecisionsInfo& precisionsInfo,
- const std::vector<float>& quantizationShifts,
- const bool onWeights) const;
-
- DataPrecision fillDequantizationsForWeightsPath(
- TransformationContext& context,
- const CNNLayer& weightableLayer,
- const bool supportAsymmetricQuantization,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const;
-
- static bool isDepthwise(const CNNLayer& layer);
-
- void calculateDequantizationForSymmetric(
- const CNNLayer& weightableLayer,
- const std::vector<float>& originalDataDequantizationScales,
- const std::vector<float>& originalDataDequantizationShifts,
- const std::vector<float>& originalWeightsDequantizationScales,
- const std::vector<float>& originalWeightsDequantizationShifts,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const;
-
- static bool getDequantizationDimIsSupported(const CNNLayer& weightableLayer);
-};
-
-typedef std::shared_ptr<WeightableLayerTransformation> WeightableLayerTransformationPtr;
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/activation.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <memory>
-#include <string>
-#include <vector>
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void ActivationTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!CaselessEq<std::string>()(layer.type, "ReLU")) {
- THROW_IE_EXCEPTION << "layer type '" << layer.name << "' is not correct";
- }
-
- const CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(layer, 0);
- if ((scaleShift == nullptr) || (scaleShift->type != "ScaleShift")) {
- return;
- }
-
- // TODO: temporary limitation
- if (scaleShift->insData.size() != 1) {
- return;
- }
-
- const Blob::Ptr weightsBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
- auto weights = CNNNetworkHelper::getFloatData(weightsBlob);
- const std::vector<float> scales = std::vector<float>(weights.get(), weights.get() + weightsBlob->size());
-
- const Blob::Ptr biasesBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
- auto biases = CNNNetworkHelper::getFloatData(biasesBlob);
- const std::vector<float> shifts = std::vector<float>(biases.get(), biases.get() + biasesBlob->size());
-
- CNNLayerPtr activationLayer;
- if ((std::all_of(shifts.begin(), shifts.end(),
- [](float value) {
- return value == 0.0;
- })) &&
- (std::all_of(scales.begin(), scales.end(), [](float value) {
- return value >= 0.0;
- }))) {
- activationLayer = std::make_shared<CNNLayer>(layer);
- } else {
- const float negativeSlope = layer.GetParamAsFloat("negative_slope", 0.0);
- if (negativeSlope != 0.0) {
- return;
- }
-
- if (!(std::equal(shifts.begin() + 1, shifts.end(), shifts.begin())) ||
- !(std::equal(scales.begin() + 1, scales.end(), scales.begin()))) {
- return;
- }
-
- const Precision precision = getPrecisionBeforeParentDequantizationScaleShift(layer);
-
- std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(*scaleShift);
- if (parents.size() != 1) {
- return;
- }
-
- LayerParams layerParams {layer.name + "_Clamp", "Clamp", precision};
- activationLayer = std::make_shared<ClampLayer>(layerParams);
-
- ClampLayer* clampLayer = dynamic_cast<ClampLayer*>(activationLayer.get());
- if (std::all_of(scales.begin(), scales.end(), [](float value) {
- return value >= 0.0;
- })) {
- clampLayer->min_value = -shifts[0] / scales[0];
- clampLayer->max_value = DataPrecision::getMaxValue(precision);
- clampLayer->params["min"] = CNNLayer::ie_serialize_float(clampLayer->min_value);
- clampLayer->params["max"] = CNNLayer::ie_serialize_float(clampLayer->max_value);
- } else {
- // TODO: workaround: only U8 on activations
- clampLayer->min_value = DataPrecision::getMinValue(precision, 256);
- clampLayer->max_value = -shifts[0] / scales[0];
- clampLayer->params["min"] = CNNLayer::ie_serialize_float(clampLayer->min_value);
- clampLayer->params["max"] = CNNLayer::ie_serialize_float(clampLayer->max_value);
- }
-
- std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
- if (children.size() != 1) {
- return;
- }
-
- for (CNNLayerPtr child : children) {
- CNNNetworkHelper::addLayer(context, std::make_shared<CNNLayer>(layer), child, activationLayer);
- }
-
- CNNNetworkHelper::removeLayer(context.network, std::make_shared<CNNLayer>(layer));
- context.removeLayer(layer);
- }
-
- if (updatePrecisions) {
- CNNNetworkHelper::setOutDataPrecision(layer, getPrecisionBeforeParentDequantizationScaleShift(layer));
- }
-
- CNNNetworkHelper::removeLayer(context.network, scaleShift);
- context.removeLayer(*scaleShift);
-
- addDequantizationLayer(context, *activationLayer, scales, shifts);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/concat.hpp"
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <cmath>
-#include <caseless.hpp>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <ie_common.h>
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/quantization_details.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void ConcatTransformation::transform(TransformationContext& context, CNNLayer& concat) const {
- if (!canBeTransformed(context, concat)) {
- return;
- }
-
- if (!CaselessEq<std::string>()(concat.type, "Concat")) {
- THROW_IE_EXCEPTION << "layer type '" << concat.name << "' is not correct";
- }
-
- if (concat.GetParamAsUInt("axis", 1) != 1) {
- return;
- }
-
- if ((concat.insData.size() < 2)) {
- THROW_IE_EXCEPTION << "layer inputs '" << concat.insData.size() << "' is not correct";
- }
-
- Subgraph subgraph = CNNNetworkHelper::getSubgraph(concat);
- if (subgraph.empty()) {
- return;
- }
-
- for (const CNNLayerPtr& quantizationLayer : subgraph.quantizationLayers) {
- if (context.quantizedFakeQuantizeNames.find(quantizationLayer->name) != context.quantizedFakeQuantizeNames.end()) {
- return;
- }
- }
-
- DataPrecision dataPrecision = getDataPrecision(
- *subgraph.quantizationLayers[0],
- QuantizationDetails::getDetails(*subgraph.quantizationLayers[0]), false, false);
- if (dataPrecision.precision == Precision::UNSPECIFIED) {
- return;
- }
-
-
- // TODO: FQ output I8 but Convolution U8 before <- we should handle that avoid asymmetric quantization
-
- std::vector<QuantizationDetails> quantizationLayersDetails;
- size_t quantizationLevels = 0lu;
- for (int i = 0; i < subgraph.quantizationLayers.size(); i++) {
- const QuantizationDetails& quantizationDetails = QuantizationDetails::getDetails(*subgraph.quantizationLayers[i]);
- if (!QuantizationDetails::isSupportedLevel(quantizationDetails.levels)) continue;
- if (quantizationLevels == 0lu) {
- quantizationLevels = quantizationDetails.levels;
- } else if (quantizationLevels != quantizationDetails.levels) {
- THROW_IE_EXCEPTION << "different quantization levels " << quantizationLevels << " are not supported";
- }
-
- quantizationLayersDetails.push_back(quantizationDetails);
-
- const DataPrecision dataPrecision2 = getDataPrecision(*subgraph.quantizationLayers[i], quantizationDetails, false, false);
- if (dataPrecision2.precision == Precision::UNSPECIFIED) {
- return;
- }
-
- if (dataPrecision.precision != dataPrecision2.precision) {
- // quantization levels are the same, difference can be in sign
- // wider interval (precision) is preferable: use signed if least one interval is signed
- dataPrecision = dataPrecision.precision.isSigned() ? dataPrecision : dataPrecision2;
- }
- }
-
- if (dataPrecision.precision == Precision::UNSPECIFIED) {
- return;
- }
-
- // per tensor scale is supported only
- if (quantizationLayersDetails.empty() || (quantizationLayersDetails[0].inputHighValues.size() != 1ul)) {
- return;
- }
-
-
- float dequantizationScale;
- float dequantizationShift;
-
- if ((quantizationLayersDetails[0].inputHighValues.size() == 1)) {
- float outputLowValue = quantizationLayersDetails[0].outputLowValues[0];
- float outputHighValue = quantizationLayersDetails[0].outputHighValues[0];
-
- for (size_t index = 0lu; index < subgraph.quantizationLayers.size(); index++) {
- const QuantizationDetails& quantizationDetails = quantizationLayersDetails[index];
- if (outputLowValue > quantizationDetails.outputLowValues[0]) {
- outputLowValue = quantizationDetails.outputLowValues[0];
- }
- if (outputHighValue < quantizationDetails.outputHighValues[0]) {
- outputHighValue = quantizationDetails.outputHighValues[0];
- }
- }
-
- if ((outputLowValue == 0.f) && (outputHighValue == 0.f)) {
- return;
- }
-
- const float maxOutputInterval = outputHighValue - outputLowValue;
- if (quantizedTensorAlignmentOnActivations == QuantizedTensorAlignment::UpdateLevel) {
- const size_t minLevels = getMinQuantizationLevels(
- dataPrecision,
- maxOutputInterval,
- quantizationLayersDetails,
- outputLowValue,
- outputHighValue);
- if (minLevels < this->minQuantizationLevels) {
- return;
- }
- }
-
-
- dequantizationScale = maxOutputInterval / (dataPrecision.max - dataPrecision.min);
- const float max = maxOutputInterval / ((dataPrecision.max - dataPrecision.min) / dataPrecision.max);
- const float min = maxOutputInterval / ((dataPrecision.max - dataPrecision.min) / dataPrecision.min);
- dequantizationShift = outputLowValue - min;
-
- const float quantizationScale = 1.f / dequantizationScale;
- const float quantizationShift = - dequantizationShift * quantizationScale;
-
- for (int index = 0; index < subgraph.quantizationLayers.size(); index++) {
- CNNLayer& fakeQuantizeLayer = *subgraph.quantizationLayers[index];
- const QuantizationDetails& quantizationDetails = quantizationLayersDetails[index];
-
- switch (quantizedTensorAlignmentOnActivations) {
- case QuantizedTensorAlignment::None: {
- const float updatedOutputLowValue = quantizationDetails.outputLowValues[0] * quantizationScale + quantizationShift;
- CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue);
-
- const float updatedOutputHighValue = quantizationDetails.outputHighValues[0] * quantizationScale + quantizationShift;
- CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue);
-
- break;
- }
- case QuantizedTensorAlignment::UpdateIntervals: {
- const float inputLowValue = quantizationDetails.outputLowValues[0] != 0.0
- ? (quantizationDetails.inputLowValues[0] *
- (outputLowValue / quantizationDetails.outputLowValues[0]))
- : outputLowValue;
- const float inputHighValue = quantizationDetails.outputHighValues[0] != 0.0
- ? (quantizationDetails.inputHighValues[0] *
- (outputHighValue / quantizationDetails.outputHighValues[0]))
- : outputHighValue;
-
- CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 1, inputLowValue);
- CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 2, inputHighValue);
- CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, dataPrecision.min);
- CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, dataPrecision.max);
- break;
- }
- case QuantizedTensorAlignment::UpdateLevel: {
- const float updatedOutputLowValue = quantizationDetails.outputLowValues[0] * quantizationScale + quantizationShift;
- CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue);
-
- const float updatedOutputHighValue = quantizationDetails.outputHighValues[0] * quantizationScale + quantizationShift;
- CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue);
-
- const int levels = static_cast<int>(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0);
- fakeQuantizeLayer.params["levels"] = std::to_string(levels);
- QuantizeLayer* layer = dynamic_cast<QuantizeLayer*>(&fakeQuantizeLayer);
- if (layer == nullptr) {
- THROW_IE_EXCEPTION << "incorrect type for layer " << fakeQuantizeLayer.name;
- }
- layer->levels = levels;
-
- break;
- }
- default: {
- THROW_IE_EXCEPTION << "unexpected value " << quantizedTensorAlignmentOnActivations;
- }
- }
- }
- } else {
- return;
- }
-
- if (updatePrecisions) {
- for (const auto it : subgraph.layers) {
- const CNNLayer* layer = it.second;
- CNNNetworkHelper::setOutDataPrecision(*layer, dataPrecision.precision);
- }
- }
-
- auto dequantizationValuesCallback = [&](
- const CNNLayer& layer,
- const std::string& originalLayerName,
- std::vector<float>& layerDequantizationScales,
- std::vector<float>& layerDequantizationShifts
- ) {
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(layer);
-
- layerDequantizationScales.resize(outputChannelsCount);
- std::fill(layerDequantizationScales.begin(), layerDequantizationScales.end(), dequantizationScale);
-
- layerDequantizationShifts.resize(outputChannelsCount);
- std::fill(layerDequantizationShifts.begin(), layerDequantizationShifts.end(), dequantizationShift);
- };
-
- addDequantizationLayers(context, subgraph, dequantizationValuesCallback);
-
- for (const CNNLayerPtr& quantizationLayer : subgraph.quantizationLayers) {
- context.quantizedFakeQuantizeNames.insert(quantizationLayer->name);
- }
-}
-
-void ConcatTransformation::addDequantizationLayers(
- TransformationContext& context,
- Subgraph& subgraph,
- std::function<void(
- const CNNLayer& layer,
- const std::string& originalLayerName,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts)> getLayerDequantizationCallback) const {
- OutputsDataMap outputs;
- context.network.getOutputsInfo(outputs);
-
- std::unordered_map<std::string, CNNLayer*> notHandledSubgraphLayers = subgraph.layers;
- while (notHandledSubgraphLayers.size() != 0ul) {
- const auto layerIt = notHandledSubgraphLayers.begin();
- CNNLayer* layer = layerIt->second;
- notHandledSubgraphLayers.erase(layerIt);
-
- std::vector<float> layerDequantizationScales;
- std::vector<float> layerDequantizationShifts;
-
- const std::vector<CNNLayerPtr>& children = CNNNetworkHelper::getChildren(*layer);
- for (const CNNLayerPtr& child : children) {
- if (subgraph.layers.find(child->name) == subgraph.layers.end()) {
- if (layerDequantizationScales.size() == 0ul) {
- getLayerDequantizationCallback(*layer, layer->name, layerDequantizationScales, layerDequantizationShifts);
- }
-
- const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
- context,
- std::make_shared<CNNLayer>(*layer),
- child,
- DequantizationDetails(layerDequantizationScales, layerDequantizationShifts, layerDequantizationScales.size()));
-
- for (const CNNLayerPtr& dequantizationLayer : dequantizationLayers) {
- context.dequantizationLayersNames.insert(dequantizationLayer->name);
- }
- }
- }
-
- const auto it = outputs.find(layer->name);
- if (it != outputs.end()) {
- const std::string originalName = layer->name;
- const std::string newName = layer->name + LayerTransformation::lastLayerPostfix;
- CNNNetworkHelper::renameLayer(context.network, originalName, newName);
-
- layer->name = newName;
- subgraph.layers[layer->name] = layer;
-
- if (layerDequantizationScales.size() == 0ul) {
- getLayerDequantizationCallback(*layer, originalName, layerDequantizationScales, layerDequantizationShifts);
- }
-
- const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
- context,
- std::make_shared<CNNLayer>(*layer),
- nullptr,
- DequantizationDetails(layerDequantizationScales, layerDequantizationShifts, layerDequantizationScales.size()),
- originalName);
-
- for (const CNNLayerPtr& dequantizationLayer : dequantizationLayers) {
- context.dequantizationLayersNames.insert(dequantizationLayer->name);
- subgraph.layers[dequantizationLayer->name] = dequantizationLayer.get();
- }
- }
- }
-}
-
-size_t ConcatTransformation::getMinQuantizationLevels(
- const DataPrecision& dataPrecision,
- const float maxOutputInterval,
- const std::vector<QuantizationDetails>& quantizationLayersDetails,
- const float outputLowValue,
- const float outputHighValue) const {
- size_t minLevels = std::numeric_limits<std::size_t>::max();
- for (const QuantizationDetails quantizationDetails : quantizationLayersDetails) {
- // if there is negative part then calculation is based on `outputLowValue` if not then on `outputHighValue` only
- const float updatedOutputLowValue = outputLowValue != 0.f ?
- (quantizationDetails.outputLowValues[0] / outputLowValue) * dataPrecision.min :
- (quantizationDetails.outputLowValues[0] / outputHighValue) * dataPrecision.max;
-
- // if there is positive part then calculation is based on `outputHighValue` if not then on `outputLowValue` only
- const float updatedOutputHighValue = outputHighValue != 0.f ?
- (quantizationDetails.outputHighValues[0] / outputHighValue) * dataPrecision.max :
- (quantizationDetails.outputHighValues[0] / outputLowValue) * dataPrecision.min;
-
- const int levels = static_cast<int>(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0);
- if (minLevels > levels) {
- minLevels = levels;
- }
- }
- return minLevels;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/concat_multi_channels.hpp"
-
-#include <ie_common.h>
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <caseless.hpp>
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-#include "low_precision_transformations/quantization_details.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-bool isMultiChannel(const std::vector<CNNLayerPtr>& concatLayers) {
- for (const CNNLayerPtr& concat : concatLayers) {
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildrenRecursivelyExceptTypes(*concat, {"Pooling", "Resample"});
- if (CNNNetworkHelper::IsChild(children, {"Convolution"})) {
- return false;
- }
- }
- return true;
-}
-
-void ConcatMultiChannelsTransformation::transform(TransformationContext& context, CNNLayer& concat) const {
- if (!canBeTransformed(context, concat)) {
- return;
- }
-
- if (!CaselessEq<std::string>()(concat.type, "Concat")) {
- THROW_IE_EXCEPTION << "layer type '" << concat.name << "' is not correct";
- }
-
- if ((concat.insData.size() < 2)) {
- THROW_IE_EXCEPTION << "layer inputs '" << concat.insData.size() << "' is not correct";
- }
-
- if (concat.GetParamAsUInt("axis", 1) != 1) {
- return;
- }
-
- Subgraph subgraph = CNNNetworkHelper::getSubgraph(concat);
- if (subgraph.empty()) {
- return;
- }
-
- for (const CNNLayerPtr& quantizationLayer : subgraph.quantizationLayers) {
- if (context.quantizedFakeQuantizeNames.find(quantizationLayer->name) != context.quantizedFakeQuantizeNames.end()) {
- return;
- }
- }
-
- if (!isMultiChannel(subgraph.concatLayers)) {
- ConcatTransformation::transform(context, concat);
- return;
- }
-
- // TODO: update later
- // TODO: check if precisions are different and return
- const DataPrecision dataPrecision = getDataPrecision(
- *subgraph.quantizationLayers[0],
- QuantizationDetails::getDetails(*subgraph.quantizationLayers[0]),
- false,
- false);
- if (dataPrecision.precision == Precision::UNSPECIFIED) {
- return;
- }
-
- std::unordered_map<std::string, std::vector<float>> dequantizationScalesLayers;
- std::unordered_map<std::string, std::vector<float>> dequantizationShiftsLayers;
-
- for (const CNNLayerPtr& fakeQuantizeLayer : subgraph.quantizationLayers) {
- if (fakeQuantizeLayer->type != "FakeQuantize") {
- continue;
- }
-
- const QuantizationDetails& quantizationDetails = QuantizationDetails::getDetails(*fakeQuantizeLayer);
- const size_t channelsCount = CNNNetworkHelper::getOutputChannelsCount(*fakeQuantizeLayer);
- std::vector<float> dequantizationScales(channelsCount);
- std::vector<float> dequantizationShifts(channelsCount);
- for (size_t i = 0ul; i < channelsCount; ++i) {
- dequantizationScales[i] = QuantizationDetails::isSupportedLevel(quantizationDetails.levels) ?
- (quantizationDetails.getOutputHighValue(i) - quantizationDetails.getOutputLowValue(i)) / (dataPrecision.max - dataPrecision.min) :
- 1.0;
-
- dequantizationShifts[i] = QuantizationDetails::isSupportedLevel(quantizationDetails.levels) ?
- (quantizationDetails.getOutputHighValue(i) - (quantizationDetails.getOutputHighValue(i) - quantizationDetails.getOutputLowValue(i)) *
- (dataPrecision.max / (dataPrecision.max - dataPrecision.min))) :
- 0.f;
- }
- checkAndUpdateDequantizationShiftWithZero(quantizationDetails, dequantizationShifts);
-
- dequantizationScalesLayers[fakeQuantizeLayer->name] = dequantizationScales;
- dequantizationShiftsLayers[fakeQuantizeLayer->name] = dequantizationShifts;
-
- CNNNetworkHelper::updateBlobs(context, *fakeQuantizeLayer, 3, dataPrecision.min);
- CNNNetworkHelper::updateBlobs(context, *fakeQuantizeLayer, 4, dataPrecision.max);
- }
-
- if (updatePrecisions) {
- for (const auto it : subgraph.layers) {
- const CNNLayer* layer = it.second;
- CNNNetworkHelper::setOutDataPrecision(*layer, dataPrecision.precision);
- }
- }
-
- auto dequantizationValuesCallback = [&](
- const CNNLayer& layer,
- const std::string originalLayerName,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts
- ) {
- if (layer.name != originalLayerName) {
- const auto update = [](
- const std::string& originalLayerName,
- const std::string& newLayerName,
- std::unordered_map<std::string, std::vector<float>>& dequantizationLayers) {
- auto it = dequantizationLayers.find(originalLayerName);
- if (it != dequantizationLayers.end()) {
- dequantizationLayers.emplace(newLayerName, it->second);
- dequantizationLayers.erase(it);
- }
- };
- update(originalLayerName, layer.name, dequantizationScalesLayers);
- update(originalLayerName, layer.name, dequantizationShiftsLayers);
- }
-
- fillDequantization(
- layer,
- dequantizationScalesLayers, dequantizationShiftsLayers,
- dequantizationScales, dequantizationShifts);
- };
-
- addDequantizationLayers(context, subgraph, dequantizationValuesCallback);
-
- for (const CNNLayerPtr& quantizationLayer : subgraph.quantizationLayers) {
- context.quantizedFakeQuantizeNames.insert(quantizationLayer->name);
- }
-}
-
-void ConcatMultiChannelsTransformation::fillDequantization(
- const CNNLayer& layer,
- const std::unordered_map<std::string, std::vector<float>>& dequantizationScalesLayers,
- const std::unordered_map<std::string, std::vector<float>>& dequantizationShiftsLayers,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) {
- std::vector<CNNLayerPtr> fakeQuantizes;
- if (layer.type == "FakeQuantize") {
- fakeQuantizes.push_back(std::make_shared<CNNLayer>(layer));
- } else {
- fillQuantization(layer, fakeQuantizes);
- }
-
- for (const CNNLayerPtr fakeQuantize : fakeQuantizes) {
- {
- const auto scalesIt = dequantizationScalesLayers.find(fakeQuantize->name);
- if (scalesIt == dequantizationScalesLayers.end()) {
- THROW_IE_LPT_EXCEPTION(*fakeQuantize) << "dequantization scale values are not found";
- }
- const std::vector<float>& fakeQuantizeDequantizationScales = scalesIt->second;
- dequantizationScales.insert(dequantizationScales.end(), fakeQuantizeDequantizationScales.begin(), fakeQuantizeDequantizationScales.end());
- }
- {
- const auto shiftsIt = dequantizationShiftsLayers.find(fakeQuantize->name);
- if (shiftsIt == dequantizationShiftsLayers.end()) {
- THROW_IE_LPT_EXCEPTION(*fakeQuantize) << "dequantization shift values are not found";
- }
- const std::vector<float>& fakeQuantizeDequantizationShifts = shiftsIt->second;
- dequantizationShifts.insert(dequantizationShifts.end(), fakeQuantizeDequantizationShifts.begin(), fakeQuantizeDequantizationShifts.end());
- }
- }
-}
-
-void ConcatMultiChannelsTransformation::fillQuantization(const CNNLayer& layer, std::vector<CNNLayerPtr>& fakeQuantizes) {
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(layer);
- for (const CNNLayerPtr parent : parents) {
- if (parent->type == "FakeQuantize") {
- fakeQuantizes.push_back(parent);
- } else {
- fillQuantization(*parent, fakeQuantizes);
- }
- }
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/const.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-#include <ie_common.h>
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <cmath>
-#include <caseless.hpp>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void ConstTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!canBeTransformed(context, layer)) {
- return;
- }
-
- if (!CaselessEq<std::string>()(layer.type, "Const")) {
- THROW_IE_EXCEPTION << "layer type '" << layer.name << "' is not correct";
- }
-
- if ((layer.insData.size() != 0) || (layer.outData.size() != 1)) {
- return;
- }
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
- if (!CNNNetworkHelper::IsChild(children, {"FakeQuantize"})) {
- return;
- }
- if (children.size() != 1) {
- THROW_IE_EXCEPTION << "unexpected children count " << children.size();
- }
-
- const auto fakeQuantize = children[0];
- const CNNLayerPtr inputLayer = CNNNetworkHelper::getParent(*fakeQuantize, 0);
- if (inputLayer == nullptr) {
- THROW_IE_EXCEPTION << "input data layer for FakeQuantize " << fakeQuantize->name << " is nullable";
- }
- if (inputLayer->name != layer.name) {
- return;
- }
-
- const Blob::Ptr weights = CNNNetworkHelper::quantizeWeights(*fakeQuantize, roundQuantizedValues);
- CNNNetworkHelper::transformFakeQuantizeToConst(context, fakeQuantize, weights, layer.name);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/convolution.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <memory>
-#include <string>
-#include <vector>
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void ConvolutionTransformation::calculateDequantizationForAsymmetric(
- const CNNLayer& convolution,
- const std::vector<float>& originalDataDequantizationScales,
- const std::vector<float>& originalDataDequantizationShifts,
- const std::vector<float>& dataZeroPoints,
- const std::vector<float>& originalWeightsDequantizationScales,
- const std::vector<float>& originalWeightsDequantizationShifts,
- const std::vector<float>& weightsZeroPoints,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const {
- const size_t outputChannelCount = CNNNetworkHelper::getOutputChannelsCount(convolution);
- if (originalDataDequantizationScales.size() != outputChannelCount) {
- for (size_t i = 1ul; i < originalDataDequantizationScales.size(); ++i) {
- if (originalDataDequantizationScales[i - 1] != originalDataDequantizationScales[i])
- THROW_IE_EXCEPTION << "original dequantization scales on activations have different values";
- }
- }
-
- dequantizationScales.resize(outputChannelCount);
- for (size_t i = 0lu; i < dequantizationScales.size(); ++i) {
- const float originalWeightsDequantizationScale = (originalWeightsDequantizationScales.size() == 0) ?
- 1.0 : (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]);
- const float originalDataDequantizationScale = (originalDataDequantizationScales.size() != dequantizationScales.size()) ?
- originalDataDequantizationScales[0] : originalDataDequantizationScales[i];
- dequantizationScales[i] = originalDataDequantizationScale * originalWeightsDequantizationScale;
- }
-
- dequantizationShifts.resize(outputChannelCount);
-
- const Blob::Ptr convolutionBiasesBlob = CNNNetworkHelper::getBiases(convolution);
- if ((convolutionBiasesBlob != nullptr) &&
- convolutionBiasesBlob->getTensorDesc().getPrecision() != Precision::FP32 &&
- convolutionBiasesBlob->getTensorDesc().getPrecision() != Precision::FP16) {
- THROW_IE_EXCEPTION << "Unexpected convolution biases precision "
- << convolutionBiasesBlob->getTensorDesc().getPrecision();
- }
- const auto convolutionBiasesBuffer = convolutionBiasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(convolutionBiasesBlob);
-
- for (size_t outputChannel = 0lu; outputChannel < outputChannelCount; ++outputChannel) {
- const float originalWeightsDequantizationScale =
- originalWeightsDequantizationScales.size() == 0lu
- ? 1.0
- : (originalWeightsDequantizationScales.size() == 1
- ? originalWeightsDequantizationScales[0]
- : originalWeightsDequantizationScales[outputChannel]);
-
- const float originalDataDequantizationScale = (outputChannel < originalDataDequantizationScales.size()) ?
- originalDataDequantizationScales[outputChannel] :
- originalDataDequantizationScales[0];
-
- dequantizationShifts[outputChannel] =
- convolutionBiasesBuffer == nullptr
- ? 0.0
- : convolutionBiasesBuffer.get()[outputChannel] *
- (1.0f - originalDataDequantizationScale * originalWeightsDequantizationScale);
- }
-}
-
-void ConvolutionTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!WeightableLayerTransformation::canBeTransformed(context, layer)) {
- return;
- }
-
- if (!CaselessEq<std::string>()(layer.type, "Convolution")) {
- THROW_IE_EXCEPTION << "Layer '" << layer.name << "' has invalid type '" << layer.type << "'. Convolution is expected.";
- }
-
- const CNNLayerPtr scaleShiftOnData = CNNNetworkHelper::getParent(layer, 0);
- const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1);
- if (parentOnWeights->type != "FakeQuantize") {
- return;
- }
-
- std::vector<float> originalDataDequantizationScales;
- std::vector<float> originalDataDequantizationShifts;
- fillFromDequantizationLayer(*scaleShiftOnData, originalDataDequantizationScales, originalDataDequantizationShifts);
-
- const bool isDepthwiseConvolution = isDepthwise(layer);
- if (!isDepthwiseConvolution) {
- for (size_t i = 0lu; i < (originalDataDequantizationScales.size() - 1); ++i) {
- if (originalDataDequantizationScales[i] != originalDataDequantizationScales[i + 1]) {
- return;
- }
- }
- }
-
- std::vector<float> originalWeightsDequantizationScales;
- std::vector<float> originalWeightsDequantizationShifts;
- const CNNLayerPtr parentOnData = CNNNetworkHelper::getParent(layer, 0ul);
-
- const DataPrecision dataPrecisionOnWeights = fillDequantizationsForWeightsPath(
- context,
- layer,
- supportAsymmetricQuantization,
- originalWeightsDequantizationScales,
- originalWeightsDequantizationShifts);
-
-#ifdef LPT_PRINT_DEQUANTIZATION_INFO
- printDequantizationValues(originalWeightsDequantizationScales, originalWeightsDequantizationShifts);
-#endif
-
- std::vector<float> dequantizationScales;
- std::vector<float> dequantizationShifts;
- if (supportAsymmetricQuantization) {
- std::vector<float> dataZeroPoints(originalDataDequantizationShifts.size());
- for (size_t i = 0ul; i < originalDataDequantizationShifts.size(); ++i) {
- dataZeroPoints[i] = originalDataDequantizationShifts[i] / originalDataDequantizationScales[i];
- }
-
- std::vector<float> weightsZeroPoints(originalWeightsDequantizationShifts.size());
- for (size_t i = 0ul; i < originalWeightsDequantizationShifts.size(); ++i) {
- weightsZeroPoints[i] = originalWeightsDequantizationShifts[i] / originalWeightsDequantizationScales[i];
- }
-
- calculateDequantizationForAsymmetric(
- layer,
- originalDataDequantizationScales,
- originalDataDequantizationShifts,
- dataZeroPoints,
- originalWeightsDequantizationScales,
- originalWeightsDequantizationShifts,
- weightsZeroPoints,
- dequantizationScales,
- dequantizationShifts);
-
- const Precision weightsOriginalPrecision = parentOnWeights->outData[0]->getTensorDesc().getPrecision();
- const PrecisionsInfo dataPrecisionsInfo(
- scaleShiftOnData->outData[0]->getTensorDesc().getPrecision(),
- CNNNetworkHelper::getPrecisionParent(*scaleShiftOnData));
-
- std::vector<float> dataShifts(originalDataDequantizationShifts.size());
- for (size_t i = 0; i < dataShifts.size(); ++i) {
- dataShifts[i] = -originalDataDequantizationShifts[i] / originalDataDequantizationScales[i];
- }
-
- std::vector<float> weightsShifts(originalWeightsDequantizationShifts.size());
- for (size_t i = 0; i < weightsShifts.size(); ++i) {
- weightsShifts[i] = -originalWeightsDequantizationShifts[i] / originalWeightsDequantizationScales[i];
- }
-
- updateToSupportAsymmetricQuantization(
- context,
- layer,
- dataPrecisionsInfo,
- dataShifts,
- PrecisionsInfo(weightsOriginalPrecision, dataPrecisionOnWeights.precision),
- weightsShifts);
- } else {
- if (std::any_of(
- originalWeightsDequantizationShifts.begin(),
- originalWeightsDequantizationShifts.end(),
- [](const float value) { return value != 0.f; })) {
- return;
- }
-
- calculateDequantizationForSymmetric(
- layer,
- originalDataDequantizationScales,
- originalDataDequantizationShifts,
- originalWeightsDequantizationScales,
- originalWeightsDequantizationShifts,
- dequantizationScales,
- dequantizationShifts);
- }
-
- if (this->updateBiases) {
- std::vector<float> biasesShifts(dequantizationShifts.size(), 0.f);
- updateLayerBiases(context, layer, false, dequantizationScales, dequantizationShifts, biasesShifts);
- }
-
- CNNNetworkHelper::removeLayer(context.network, scaleShiftOnData);
- context.removeLayer(*scaleShiftOnData);
-
- if (parentOnWeights->type == "ScaleShift") {
- CNNNetworkHelper::removeLayer(context.network, parentOnWeights);
- context.removeLayer(*parentOnWeights);
- } else if (parentOnWeights->type == "FakeQuantize") {
- if (weightsToConst) {
- const Blob::Ptr weights = updatePrecisions ?
- CNNNetworkHelper::quantizeWeights(*parentOnWeights, roundQuantizedValues, dataPrecisionOnWeights.precision) :
- CNNNetworkHelper::quantizeWeights(*parentOnWeights, roundQuantizedValues);
-
- const std::vector<CNNLayerPtr> constLayers = CNNNetworkHelper::transformFakeQuantizeToConst(
- context,
- parentOnWeights,
- weights,
- CNNNetworkHelper::getParent(*parentOnWeights, 0)->name);
-
- if (updatePrecisions) {
- for (const CNNLayerPtr constLayer : constLayers) {
- CNNNetworkHelper::setOutDataPrecision(*constLayer, dataPrecisionOnWeights.precision);
- }
- }
- }
- } else {
- THROW_IE_EXCEPTION << "unexpected parent layer type on weights: " << parentOnWeights->type;
- }
-
- addDequantizationLayer(context, layer, dequantizationScales, dequantizationShifts);
-}
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/depth_to_space.hpp"
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void DepthToSpaceTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!canBeTransformed(context, layer)) {
- return;
- }
-
- if ((layer.insData.size() == 0) || layer.insData.size() > 2) {
- THROW_IE_EXCEPTION << "layer inputs '" << layer.insData.size() << "' is not correct";
- }
-
- if (!CaselessEq<std::string>()(layer.type, "DepthToSpace")) {
- THROW_IE_EXCEPTION << "layer '" << layer.name << "' is not correct";
- }
-
- TransparentBaseTransformation::transform(context, layer);
-}
-
-bool DepthToSpaceTransformation::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
- return true;
-}
-
-bool DepthToSpaceTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const {
- if (!TransparentBaseTransformation::canBeTransformed(context, layer)) {
- return false;
- }
-
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(layer);
- if (parents.size() != 1) {
- return false;
- }
-
- if (parents[0]->type != "ScaleShift") {
- return false;
- }
-
- const std::vector<size_t> inputDims = parents[0]->outData[0]->getDims();
- if (inputDims.size() < 3) {
- return false;
- }
-
- const size_t inputChannels = CNNNetworkHelper::getInputChannelsCount(layer);
- const size_t outputChannels = CNNNetworkHelper::getOutputChannelsCount(layer);
- if (inputChannels != outputChannels) {
- std::vector<float> scales;
- std::vector<float> shifts;
- fillFromDequantizationLayer(*parents[0], scales, shifts);
-
- if (!DequantizationDetails::isPerTensor(scales, shifts)) {
- return false;
- }
- }
-
- return true;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/common/dequantization_info.hpp"
-
-#include <ie_common.h>
-
-#include <blob_factory.hpp>
-#include <cmath>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-DequantizationInfo::DequantizationInfo(const size_t levels, const std::vector<float>& outputLowValues,
- const std::vector<float>& outputHighValues)
- : levels(levels), outputLowValues(outputLowValues), outputHighValues(outputHighValues) {
- if (outputLowValues.size() != outputHighValues.size()) {
- THROW_IE_EXCEPTION << "values size is not correct";
- }
-}
-
-size_t DequantizationInfo::outputChannels() const {
- return outputHighValues.size();
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/eltwise.hpp"
-
-#include <ie_common.h>
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <legacy/ie_util_internal.hpp>
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-bool EltwiseTransformation::isSupported(const TensorDesc& tensorDesc1, const TensorDesc& tensorDesc2) noexcept {
- if (tensorDesc1.getPrecision() != tensorDesc2.getPrecision()) {
- return false;
- }
-
- const std::vector<size_t> dims1 = tensorDesc1.getDims();
- const size_t channelsCount1 = dims1.size() == 1ul ? dims1[0] : dims1[1];
- const std::vector<size_t> dims2 = tensorDesc2.getDims();
- const size_t channelsCount2 = dims2.size() == 1ul ? dims2[0] : dims2[1];
- if ((channelsCount1 != channelsCount2) && (channelsCount1 != 1ul) && (channelsCount2 != 1ul)) {
- return false;
- }
-
- if (((dims1.size() == 2ul) && (channelsCount1 == 1ul)) ||
- ((dims2.size() == 2ul) && (channelsCount2 == 1ul))) {
- return true;
- }
-
- if ((dims1 == dims2) && (tensorDesc1.getLayout() != tensorDesc2.getLayout())) {
- return false;
- }
-
- if (dims1 == dims2) {
- return true;
- }
-
- if ((dims1.size() > 1ul) && (dims2.size() > 1ul)) {
- if (dims1[1] != dims2[1]) {
- return false;
- }
-
- const size_t dimensionsSize = std::min(dims1.size(), dims2.size());
- for (size_t dimension = 2ul; dimension < dimensionsSize; ++dimension) {
- if ((dims1[dimension] != dims2[dimension]) && (dims1[dimension] != 1ul) && (dims2[dimension] != 1ul)) {
- return false;
- }
- }
- }
-
- return true;
-}
-
-bool EltwiseTransformation::isBroadcasted(const TensorDesc& tensorDesc) noexcept {
- const std::vector<size_t> dims = tensorDesc.getDims();
- const size_t channelIndex = dims.size() == 1 ? 0ul : (dims.size() == 2ul ? 1ul : 2ul);
- for (size_t dimension = channelIndex; dimension < dims.size(); ++dimension) {
- if (dims[dimension] != 1ul) {
- return false;
- }
- }
-
- return true;
-}
-
-
-bool EltwiseTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const {
- if ((!LayerTransformation::canBeTransformed(context, layer)) || isBroadcastByChannels(layer)) {
- return false;
- }
-
- if (!CaselessEq<std::string>()(layer.type, "Eltwise")) {
- THROW_IE_EXCEPTION << "layer type '" << layer.name << "' is not correct";
- }
-
- const DataPtr insData0 = layer.insData[0].lock();
- if (insData0 == nullptr) {
- THROW_IE_LPT_EXCEPTION(layer) << "input data 0 is absent";
- }
-
- const TensorDesc& tensorDesc0 = insData0->getTensorDesc();
- for (size_t i = 1ul; i < layer.insData.size(); ++i) {
- const DataPtr insData = layer.insData[i].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(layer) << "input data " << i << " is absent";
- }
- if (!isSupported(tensorDesc0, insData->getTensorDesc())) {
- return false;
- }
- }
-
- const EltwiseLayer* eltwiseLayer = dynamic_cast<const EltwiseLayer*>(&layer);
- if (eltwiseLayer == nullptr) {
- THROW_IE_EXCEPTION << "unexpected layer type for layer " << layer.name;
- }
-
- if ((eltwiseLayer->_operation != EltwiseLayer::eOperation::Sum) && (eltwiseLayer->_operation != EltwiseLayer::eOperation::Prod)) {
- return false;
- }
-
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(layer);
- if ((parents.size() != 2) || (parents[0]->type != "ScaleShift") || (parents[1]->type != "ScaleShift")) {
- return false;
- }
-
- return true;
-}
-
-bool EltwiseTransformation::isBroadcastByChannels(const CNNLayer& layer) const {
- const int fullPathIndex = getNotEmpty(layer);
- if (fullPathIndex == -1) {
- return false;
- }
- const DataPtr fullPathInsData = layer.insData[fullPathIndex].lock();
- if (fullPathInsData == nullptr) {
- THROW_IE_EXCEPTION << "parent ins data is absent";
- }
- const std::vector<size_t> fullDims = fullPathInsData->getTensorDesc().getDims();
- const size_t fullChannelsCount = fullDims.size() == 1ul ? fullDims[0] : fullDims[1];
-
- const size_t emptyPathIndex = fullPathIndex == 0ul ? 1lu : 0lu;
- const DataPtr emptyPathInsData = layer.insData[emptyPathIndex].lock();
- if (emptyPathInsData == nullptr) {
- THROW_IE_EXCEPTION << "parent ins data is absent";
- }
- const std::vector<size_t> emptyDims = emptyPathInsData->getTensorDesc().getDims();
- const size_t emptyChannelsCount = emptyDims.size() == 1ul ? emptyDims[0] : emptyDims[1];
-
- return (fullChannelsCount != emptyChannelsCount) && (fullChannelsCount == 1ul);
-}
-
-void EltwiseTransformation::transform(TransformationContext& context, CNNLayer& eltwise) const {
- if (!canBeTransformed(context, eltwise)) {
- return;
- }
-
- const int fullPathIndex = getNotEmpty(eltwise);
- if (fullPathIndex == -1) {
- return;
- }
-
- const EltwiseLayer* eltwiseLayer = dynamic_cast<const EltwiseLayer*>(&eltwise);
- if (eltwiseLayer == nullptr) {
- THROW_IE_EXCEPTION << "unexpected layer type for layer " << eltwise.name;
- }
-
- const size_t emptyPathIndex = fullPathIndex == 0 ? 1lu : 0lu;
- std::vector<float> emptyPathDequantizationScales;
- std::vector<float> emptyPathDequantizationShifts;
- const DataPtr emptyPathData = eltwise.insData[emptyPathIndex].lock();
- if (emptyPathData == nullptr) {
- THROW_IE_LPT_EXCEPTION(eltwise) << "data for empty path is absent";
- }
- const CNNLayerPtr emptyPathDequantizationLayer = getCreatorLayer(emptyPathData).lock();
- {
- fillFromDequantizationLayer(*emptyPathDequantizationLayer, emptyPathDequantizationScales, emptyPathDequantizationShifts);
-
- if ((eltwiseLayer->_operation == EltwiseLayer::eOperation::Prod) && std::any_of(
- emptyPathDequantizationShifts.begin(),
- emptyPathDequantizationShifts.end(),
- [](const float value) { return value != 0.f; })) {
- return;
- }
- }
-
- {
- const DataPtr fullPathData = eltwise.insData[fullPathIndex].lock();
- if (fullPathData == nullptr) {
- THROW_IE_LPT_EXCEPTION(eltwise) << "data for full path is absent";
- }
- const CNNLayerPtr fullPathDequantizationLayer = getCreatorLayer(fullPathData).lock();
- std::vector<float> fullPathDequantizationScales;
- std::vector<float> fullPathDequantizationShifts;
- fillFromDequantizationLayer(*fullPathDequantizationLayer, fullPathDequantizationScales, fullPathDequantizationShifts);
-
- if ((emptyPathDequantizationScales.size() != fullPathDequantizationScales.size()) ||
- (emptyPathDequantizationShifts.size() != fullPathDequantizationShifts.size())) {
- return;
- }
-
- if (eltwiseLayer->_operation == EltwiseLayer::eOperation::Sum) {
- for (size_t i = 0ul; i < emptyPathDequantizationScales.size(); ++i) {
- fullPathDequantizationScales[i] = fullPathDequantizationScales[i] / emptyPathDequantizationScales[i];
- fullPathDequantizationShifts[i] = (fullPathDequantizationShifts[i] + emptyPathDequantizationShifts[i]) / emptyPathDequantizationScales[i];
- }
-
- CNNNetworkHelper::updateBlobs(*fullPathDequantizationLayer, "weights", fullPathDequantizationScales);
- CNNNetworkHelper::updateBlobs(*fullPathDequantizationLayer, "biases", fullPathDequantizationShifts);
- } else if (eltwiseLayer->_operation == EltwiseLayer::eOperation::Prod) {
- for (size_t i = 0ul; i < emptyPathDequantizationScales.size(); ++i) {
- fullPathDequantizationScales[i] = fullPathDequantizationScales[i] * emptyPathDequantizationScales[i];
- fullPathDequantizationShifts[i] = fullPathDequantizationShifts[i] * emptyPathDequantizationScales[i];
- }
-
- CNNNetworkHelper::updateBlobs(*fullPathDequantizationLayer, "weights", fullPathDequantizationScales);
- CNNNetworkHelper::updateBlobs(*fullPathDequantizationLayer, "biases", fullPathDequantizationShifts);
- } else {
- THROW_IE_EXCEPTION << "unexpected operation '" << eltwiseLayer->_operation << "'";
- }
- }
-
- context.quantizedFakeQuantizeNames.erase(emptyPathDequantizationLayer->name);
- CNNNetworkHelper::removeLayer(context.network, emptyPathDequantizationLayer);
-
- if (eltwiseLayer->_operation == EltwiseLayer::eOperation::Sum) {
- std::vector<float> eltwiseDequantizationScales(emptyPathDequantizationScales.size());
- for (size_t i = 0lu; i < eltwiseDequantizationScales.size(); ++i) {
- eltwiseDequantizationScales[i] = emptyPathDequantizationScales[i];
- }
-
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(eltwise);
-
- if ((eltwiseDequantizationScales.size() == 1ul) && (eltwiseDequantizationScales.size() != outputChannelsCount)) {
- eltwiseDequantizationScales.resize(outputChannelsCount);
- std::fill(eltwiseDequantizationScales.begin(), eltwiseDequantizationScales.end(), eltwiseDequantizationScales[0]);
- }
-
- const std::vector<float> eltwiseDequantizationShifts(emptyPathDequantizationShifts.size());
- addDequantizationLayer(context, eltwise, eltwiseDequantizationScales, eltwiseDequantizationShifts);
- } else if (eltwiseLayer->_operation != EltwiseLayer::eOperation::Prod) {
- THROW_IE_EXCEPTION << "unexpected operation '" << eltwiseLayer->_operation << "'";
- }
-}
-
-bool isBranchWithTargetType(const CNNLayer& fakeQuantize, const std::string& type) {
- if (!CaselessEq<std::string>()(fakeQuantize.type, "FakeQuantize")) {
- return false;
- }
-
- if ((fakeQuantize.outData.size() == 1) && (getInputTo(fakeQuantize.outData[0]).size() == 1)) {
- const CNNLayerPtr parentOnActivation = CNNNetworkHelper::getParent(fakeQuantize, 0);
- if ((parentOnActivation != nullptr) && CaselessEq<std::string>()(parentOnActivation->type, type) &&
- (parentOnActivation->outData.size() == 1) && (getInputTo(parentOnActivation->outData[0]).size() == 1)) {
- return true;
- }
- }
-
- return false;
-}
-
-bool isBranchWithTargetType(const CNNLayer& fakeQuantize, const std::vector<std::string> types) {
- if (!CaselessEq<std::string>()(fakeQuantize.type, "FakeQuantize")) {
- return false;
- }
-
- return std::any_of(types.begin(), types.end(), [&](const std::string& type) { return isBranchWithTargetType(fakeQuantize, type); });
-}
-
-int EltwiseTransformation::getNotEmpty(const CNNLayer& eltwise) {
- // TODO: Pooling specific operations are supported only
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParentsRecursivelyExceptTypes(eltwise, {"Pooling", "ScaleShift"});
- if (parents.size() != 2lu) {
- return -1;
- }
-
- if ((CaselessEq<std::string>()(parents[0]->type, "FakeQuantize")) && (!CaselessEq<std::string>()(parents[1]->type, "FakeQuantize"))) {
- return 0;
- }
-
- if ((CaselessEq<std::string>()(parents[1]->type, "FakeQuantize")) && (!CaselessEq<std::string>()(parents[0]->type, "FakeQuantize"))) {
- return 1;
- }
-
- const std::vector<std::string> targetTypes = { "Convolution", "Gemm", "FullyConnected" };
- const bool allBranchesAreEqual =
- std::all_of(parents.begin(), parents.end(), [&](const CNNLayerPtr& layer) { return isBranchWithTargetType(*layer, targetTypes); }) ||
- std::all_of(parents.begin(), parents.end(), [&](const CNNLayerPtr& layer) { return !isBranchWithTargetType(*layer, targetTypes); });
-
- for (size_t index = 0ul; index < parents.size(); ++index) {
- const CNNLayerPtr& parent = parents[index];
- if ((allBranchesAreEqual && isBroadcasted(parent->outData[0]->getTensorDesc())) ||
- ((!allBranchesAreEqual) && isBranchWithTargetType(*parent, targetTypes))) {
- return index;
- }
- }
-
- int fullPathIndex = 0;
- int constBranchID = CNNNetworkHelper::getConstParentBranchID(eltwise);
- if (constBranchID == -1) {
- for (size_t i = 0ul; i < parents.size(); ++i) {
- if (parents[i]->outData.size() != 1) {
- continue;
- }
-
- if (getInputTo(parents[i]->outData[0]).size() == 1) {
- return i;
- }
- }
- } else {
- fullPathIndex = constBranchID == 0 ? 1 : 0;
- }
-
- return fullPathIndex;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/fake_quantize.hpp"
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <cmath>
-#include <caseless.hpp>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <ie_common.h>
-#include <precision_utils.h>
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void FakeQuantizeTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!CaselessEq<std::string>()(layer.type, "FakeQuantize")) {
- THROW_IE_EXCEPTION << "Layer '" << layer.name << "' has invalid type. FakeQuantize is expected.";
- }
-
- if (layer.insData.size() != 5lu) {
- THROW_IE_EXCEPTION << "Layer '" << layer.insData.size() << "' has invalid inputs number. 5 is expected.";
- }
-
- // FakeQuantize on weights are used without dequantization ScaleShifts
- const bool onWeights = CNNNetworkHelper::onConstWeightsPath(layer) && CNNNetworkHelper::onWeights(layer);
- if (onWeights) {
- return;
- }
-
- if (!QuantizationDetails::outputLayoutIsSupported(layer)) {
- return;
- }
-
- CNNLayerPtr fakeQuantizeLayer = std::make_shared<CNNLayer>(layer);
- CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(layer, 0);
- auto scaleShiftChildren = CNNNetworkHelper::getChildren(*scaleShift);
- if ((scaleShift != nullptr) && (scaleShift->type == "ScaleShift") && scaleShiftChildren.size() == 1) {
- fuseScaleShift(context, fakeQuantizeLayer, scaleShift);
- }
-
- if (context.quantizedFakeQuantizeNames.find(layer.name) != context.quantizedFakeQuantizeNames.end()) {
- return;
- }
-
- if (!QuantizationDetails::isSupportedLevel(layer.GetParamAsUInt("levels"))) return;
-
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer);
- const DataPrecision dataPrecision = getDataPrecision(layer, quantizationDetails, onWeights, supportAsymmetricQuantization);
- if (dataPrecision.precision == Precision::UNSPECIFIED) {
- return;
- }
-
- std::vector<float> dequantizationScales;
- std::vector<float> dequantizationShifts;
- fillFromQuantizationDetails(
- quantizationDetails,
- dataPrecision,
- dequantizationScales,
- dequantizationShifts);
-
-#ifdef LPT_PRINT_DEQUANTIZATION_INFO
- printDequantizationValues(dequantizationScales, dequantizationShifts);
-#endif
-
- CNNNetworkHelper::updateBlobs(context, layer, 3, dataPrecision.min);
- CNNNetworkHelper::updateBlobs(context, layer, 4, dataPrecision.max);
-
- if (updatePrecisions) {
- CNNNetworkHelper::setOutDataPrecision(layer, dataPrecision.precision);
- }
-
- addDequantizationLayer(context, layer, dequantizationScales, dequantizationShifts);
-
- context.quantizedFakeQuantizeNames.insert(layer.name);
-}
-
-bool FakeQuantizeTransformation::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
- return false;
-}
-
-void FakeQuantizeTransformation::fuseScaleShift(TransformationContext& context, CNNLayerPtr fakeQuantizeLayer,
- CNNLayerPtr scaleShift) const {
- // TODO: add check if previous blobs precision is enough to store current values
- const Blob::Ptr scalesBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
- std::shared_ptr<float> scalesBufferPtr = CNNNetworkHelper::getFloatData(scalesBlob);
-
- const Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
- std::shared_ptr<float> shiftsBufferPtr = CNNNetworkHelper::getFloatData(shiftsBlob);
-
- if (scalesBlob->size() != shiftsBlob->size())
- THROW_IE_EXCEPTION << "Scales and shifts values count are different for " << scaleShift->name;
-
- const float* shiftsBuffer = shiftsBufferPtr.get();
- const float* scalesBuffer = scalesBufferPtr.get();
- // Don't fuse when there is a negative scale, because it leads to invalid results of FQ
- for (size_t i = 0lu; i < scalesBlob->size(); ++i) {
- if (scalesBuffer[i] <= 0.0f) return;
- }
-
- CNNLayerPtr inputLow = CNNNetworkHelper::getParent(*fakeQuantizeLayer, 1);
- CNNLayerPtr inputHigh = CNNNetworkHelper::getParent(*fakeQuantizeLayer, 2);
-
- Layout layout;
- size_t channelIndex;
- const DataPtr insData = scaleShift->insData[0].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(*scaleShift) << "input data is absent";
- }
- const size_t inputDims = insData->getDims().size();
- switch (inputDims) {
- case 5: {
- layout = Layout::NCDHW;
- channelIndex = 1ul;
- break;
- }
- case 4: {
- layout = Layout::NCHW;
- channelIndex = 1ul;
- break;
- }
- case 3: {
- layout = Layout::BLOCKED;
- channelIndex = 1ul;
- break;
- }
- case 2: {
- layout = Layout::NC;
- channelIndex = 1ul;
- break;
- }
- case 1: {
- layout = Layout::C;
- channelIndex = 0ul;
- break;
- }
- default: THROW_IE_EXCEPTION << "FakeQuantizeTransform: unexpected dimensions count " << inputDims << " in ScaleShift optimization";
- }
- std::vector<size_t> dims(inputDims, 1lu);
- dims[channelIndex] = scalesBlob->size();
-
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*fakeQuantizeLayer);
-
- Blob::Ptr targetInputLowBufferPtr = reshapeWeightsIntervalConst(*inputLow, dims, layout);
- auto targetInputLowBuffer = CNNNetworkHelper::getFloatData(targetInputLowBufferPtr);
- Blob::Ptr targetInputHighBufferPtr = reshapeWeightsIntervalConst(*inputHigh, dims, layout);
- auto targetInputHighBuffer = CNNNetworkHelper::getFloatData(targetInputHighBufferPtr);
-
- for (size_t i = 0lu; i < scalesBlob->size(); ++i) {
- auto q_lo = quantizationDetails.getInputLowValue(i);
- auto q_hi = quantizationDetails.getInputHighValue(i);
- auto sc = scalesBlob->size() == 1 ? scalesBuffer[0] : scalesBuffer[i];
- auto sh = shiftsBlob->size() == 1 ? shiftsBuffer[0] : shiftsBuffer[i];
- targetInputLowBuffer.get()[i] = (q_lo - sh) / sc;
- targetInputHighBuffer.get()[i] = (q_hi - sh) / sc;
- }
-
- CNNNetworkHelper::fillBlobByFP32(targetInputLowBufferPtr, targetInputLowBuffer.get());
- CNNNetworkHelper::fillBlobByFP32(targetInputHighBufferPtr, targetInputHighBuffer.get());
-
- reshapeFakeQuantize(*fakeQuantizeLayer, dims, layout);
-
- CNNNetworkHelper::removeLayer(context.network, scaleShift);
- context.removeLayer(*scaleShift);
-}
-
-Blob::Ptr FakeQuantizeTransformation::reshapeWeightsIntervalConst(CNNLayer& constLayer, const std::vector<size_t>& dims,
- const Layout layout) {
- if (constLayer.blobs.size() != 1lu) {
- THROW_IE_EXCEPTION << "Unexpected blobs count " << constLayer.blobs.size() << " for layer " << constLayer.name;
- }
- if (constLayer.outData.size() != 1lu)
- THROW_IE_EXCEPTION << "Unexpected outputs for layer " << constLayer.name;
-
- auto it = constLayer.blobs.find("custom");
- if (it == constLayer.blobs.end()) THROW_IE_EXCEPTION << "blob 'custom' was not found for layer " << constLayer.name;
-
- const Precision& srcPrecision = it->second->getTensorDesc().getPrecision();
-
- Blob::Ptr targetBlob = CNNNetworkHelper::makeNewBlobPtr({srcPrecision, dims, layout});
- targetBlob->allocate();
- constLayer.blobs["custom"] = targetBlob;
-
- constLayer.outData[0]->reshape(dims, layout);
-
- return targetBlob;
-}
-
-void FakeQuantizeTransformation::reshapeFakeQuantize(
- CNNLayer& fakeQuantizeLayer,
- const std::vector<size_t>& dims,
- const Layout layout) {
- DataPtr inputLowData = fakeQuantizeLayer.insData[1].lock();
- if (inputLowData == nullptr) {
- THROW_IE_EXCEPTION << "input low interval data is absent";
- }
- inputLowData->reshape(dims, layout);
-
- DataPtr inputHighData = fakeQuantizeLayer.insData[2].lock();
- if (inputHighData == nullptr) {
- THROW_IE_EXCEPTION << "input hight interval data is absent";
- }
- inputHighData->reshape(dims, layout);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/fully_connected.hpp"
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <cmath>
-#include <caseless.hpp>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <ie_common.h>
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-bool FullyConnectedTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& fullyConnected) const {
- if (!WeightableLayerTransformation::canBeTransformed(context, fullyConnected)) {
- return false;
- }
-
- const DataPtr inputData = fullyConnected.insData[0].lock();
- if (inputData == nullptr) {
- return false;
- }
-
- const std::vector<size_t> inTensorDims = inputData->getDims();
- if ((inTensorDims.size() != 2ul) && (inTensorDims.size() != 3ul)) {
- return false;
- }
-
- const DataPtr outputData = fullyConnected.outData[0];
- if (outputData == nullptr) {
- return false;
- }
-
- const std::vector<size_t> outTensorDims = outputData->getTensorDesc().getDims();
- if (inTensorDims.size() != outTensorDims.size()) {
- return false;
- }
-
- if (inTensorDims[0] != outTensorDims[0]) {
- return false;
- }
-
- CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected);
- if (scaleShift->type != "ScaleShift") {
- return false;
- }
-
- std::vector<float> dequantizationScales;
- std::vector<float> dequantizationShifts;
- fillFromDequantizationLayer(*scaleShift, dequantizationScales, dequantizationShifts);
-
- const bool dequantizationDimIsSupported = !getDequantizationDimIsSupported(fullyConnected);
- if ((!dequantizationDimIsSupported) &&
- (!DequantizationDetails::isPerTensor(dequantizationScales, dequantizationShifts) ||
- // if asymmetric quantization is not supported then no shifts for dequantizationDimIsSupported = false case:
- // in this case we can not dequantize with shifts
- (!supportAsymmetricQuantization && (dequantizationShifts[0] != 0.f)))) {
- return false;
- }
-
- if ((dequantizationScales.size() != inTensorDims[1]) || (dequantizationShifts.size() != inTensorDims[1])) {
- return false;
- }
-
- return true;
-}
-
-void FullyConnectedTransformation::transform(TransformationContext& context, CNNLayer& fullyConnected) const {
- if (!canBeTransformed(context, fullyConnected)) {
- return;
- }
-
- if ((!CaselessEq<std::string>()(fullyConnected.type, "FullyConnected")) && (!CaselessEq<std::string>()(fullyConnected.type, "Gemm"))) {
- THROW_IE_EXCEPTION << "layer '" << fullyConnected.name << "' is not correct";
- }
-
- if ((fullyConnected.insData.size() != 1) && (fullyConnected.insData.size() != 2) &&
- (fullyConnected.insData.size() != 3)) {
- THROW_IE_EXCEPTION << "layer inputs '" << fullyConnected.insData.size() << "' is not correct";
- }
-
- const CNNLayerPtr scaleShiftOnData = CNNNetworkHelper::getParent(fullyConnected, 0);
- if (scaleShiftOnData->type != "ScaleShift") {
- return;
- }
-
- const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(fullyConnected, 1);
- if (fullyConnected.outData.size() != 1) {
- THROW_IE_EXCEPTION << "layer outputs '" << fullyConnected.outData.size() << "' is not correct";
- }
-
- std::vector<float> originalDataDequantizationScales;
- std::vector<float> originalDataDequantizationShifts;
- fillFromDequantizationLayer(*scaleShiftOnData, originalDataDequantizationScales, originalDataDequantizationShifts);
-
- std::vector<float> originalWeightsDequantizationScales;
- std::vector<float> originalWeightsDequantizationShifts;
-
- if (parentOnWeights != nullptr) {
- if (parentOnWeights->type == "FakeQuantize") {
- const std::vector<size_t> dims = fullyConnected.outData[0]->getDims();
- if (dims.size() > 2ul) {
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*parentOnWeights);
- const DataPrecision dataPrecision = getDataPrecision(*parentOnWeights, quantizationDetails, true, supportAsymmetricQuantization);
- if (dataPrecision.precision == Precision::UNSPECIFIED) {
- return;
- }
-
- fillFromQuantizationDetails(
- quantizationDetails,
- dataPrecision,
- originalWeightsDequantizationScales,
- originalWeightsDequantizationShifts);
-
- if ((dims[1ul] != originalWeightsDequantizationScales.size()) &&
- (std::any_of(
- originalWeightsDequantizationScales.begin(),
- originalWeightsDequantizationScales.end(),
- [&](const float value) { return value != originalWeightsDequantizationScales[0]; }))) {
- return;
- }
- }
-
- fillDequantizationsForWeightsPath(
- context,
- fullyConnected,
- supportAsymmetricQuantization,
- originalWeightsDequantizationScales,
- originalWeightsDequantizationShifts);
-
- } else if (parentOnWeights->type == "Const") {
- originalWeightsDequantizationScales.push_back(1.0);
- originalWeightsDequantizationShifts.push_back(0.0);
- } else {
- THROW_IE_EXCEPTION << "Unexpected dequantization layer type " << parentOnWeights->type;
- }
- }
-
- std::vector<float> dequantizationScales;
- std::vector<float> dequantizationShifts;
- std::vector<float> biasesShifts;
-
- if (supportAsymmetricQuantization) {
- std::vector<float> dataShifts(originalDataDequantizationShifts.size());
- for (size_t i = 0; i < dataShifts.size(); ++i) {
- dataShifts[i] = -originalDataDequantizationShifts[i] / originalDataDequantizationScales[i];
- }
- std::vector<float> weightsShifts(originalWeightsDequantizationShifts.size());
- for (size_t i = 0; i < weightsShifts.size(); ++i) {
- weightsShifts[i] = -originalWeightsDequantizationShifts[i] / originalWeightsDequantizationScales[i];
- }
-
- std::vector<float> dataZeroPoints(originalDataDequantizationShifts.size());
- for (size_t i = 0ul; i < originalDataDequantizationShifts.size(); ++i) {
- dataZeroPoints[i] = originalDataDequantizationShifts[i] / originalDataDequantizationScales[i];
- }
-
- calculateDequantizationForAsymmetric(
- fullyConnected,
- dataZeroPoints,
- originalWeightsDequantizationScales,
- dequantizationScales,
- dequantizationShifts);
-
- biasesShifts.resize(dequantizationShifts.size());
-
- Precision weightsOriginalPrecision;
- Precision weightsLowPrecision;
- if (parentOnWeights->type == "FakeQuantize") {
- weightsOriginalPrecision = parentOnWeights->outData[0]->getTensorDesc().getPrecision();
- const bool weightsOnConstPath = CNNNetworkHelper::isQuantizedConstWeights(fullyConnected);
- if (!weightsOnConstPath) {
- THROW_IE_LPT_EXCEPTION(fullyConnected) << "unexpected layer type " << parentOnWeights->type << " on weights";
- }
- weightsLowPrecision = getDataPrecision(
- *parentOnWeights,
- QuantizationDetails::getDetails(*parentOnWeights),
- weightsOnConstPath,
- supportAsymmetricQuantization).precision;
- } else if (parentOnWeights->type == "ScaleShift") {
- weightsOriginalPrecision = parentOnWeights->outData[0]->getTensorDesc().getPrecision();
- weightsLowPrecision = CNNNetworkHelper::getPrecisionParent(*parentOnWeights);
- } else {
- THROW_IE_EXCEPTION << "unexpected layer type on weights " << parentOnWeights->type;
- }
-
- const PrecisionsInfo dataPrecisionsInfo(
- scaleShiftOnData->outData[0]->getTensorDesc().getPrecision(),
- CNNNetworkHelper::getPrecisionParent(*scaleShiftOnData));
-
- updateToSupportAsymmetricQuantization(
- context,
- fullyConnected,
- dataPrecisionsInfo,
- dataShifts,
- PrecisionsInfo(weightsOriginalPrecision, weightsLowPrecision),
- weightsShifts);
- } else {
- if (std::any_of(
- originalWeightsDequantizationShifts.begin(),
- originalWeightsDequantizationShifts.end(),
- [](const float value) { return value != 0.f; })) {
- return;
- }
-
- calculateDequantizationForSymmetric(
- fullyConnected,
- originalWeightsDequantizationScales,
- originalWeightsDequantizationShifts,
- dequantizationScales,
- dequantizationShifts,
- biasesShifts);
- }
-
- if (this->updateBiases) {
- updateLayerBiases(context, fullyConnected, false, dequantizationScales, dequantizationShifts, biasesShifts);
- }
-
- if (parentOnWeights != nullptr) {
- const QuantizationDetails originalQuantizationDetails = parentOnWeights != nullptr ?
- QuantizationDetails::getDetails(*parentOnWeights) :
- QuantizationDetails();
-
- const DataPrecision dataPrecision = getDataPrecision(
- *parentOnWeights,
- originalQuantizationDetails,
- true,
- supportAsymmetricQuantization);
-
- // disabled, looks like not necessary more - use asymmetric quantization instead
- // std::vector<float> outputLowValues(originalQuantizationDetails.outputIntervalsCount, dataPrecision.min);
- // std::vector<float> outputHighValues(originalQuantizationDetails.outputIntervalsCount, dataPrecision.max);
- // updateWeights(parentOnWeights, outputLowValues, outputHighValues);
-
- if (weightsToConst) {
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*parentOnWeights);
- const DataPrecision dataPrecision = getDataPrecision(
- *parentOnWeights,
- quantizationDetails,
- true,
- supportAsymmetricQuantization);
-
- const Blob::Ptr weights =
- updatePrecisions
- ? CNNNetworkHelper::quantizeWeights(*parentOnWeights, roundQuantizedValues, dataPrecision.precision)
- : CNNNetworkHelper::quantizeWeights(*parentOnWeights, roundQuantizedValues);
-
- const std::vector<CNNLayerPtr> constLayers = CNNNetworkHelper::transformFakeQuantizeToConst(
- context, parentOnWeights, weights, CNNNetworkHelper::getParent(*parentOnWeights, 0)->name);
-
- if (updatePrecisions) {
- for (const CNNLayerPtr constLayer : constLayers) {
- CNNNetworkHelper::setOutDataPrecision(*constLayer, dataPrecision.precision);
- }
- }
- }
- }
-
- CNNNetworkHelper::removeLayer(context.network, scaleShiftOnData);
- context.removeLayer(*scaleShiftOnData);
-
- addDequantizationLayer(context, fullyConnected, dequantizationScales, dequantizationShifts);
-}
-
-void FullyConnectedTransformation::calculateDequantizationForSymmetric(
- const CNNLayer& fullyConnected,
- const std::vector<float>& originalWeightsDequantizationScales,
- const std::vector<float>& originalWeightsDequantizationShifts,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts,
- std::vector<float>& biasesShifts) const {
- for (size_t i = 0; i < originalWeightsDequantizationShifts.size(); ++i) {
- if (originalWeightsDequantizationShifts[i] != 0.0) {
- THROW_IE_EXCEPTION << "shift values on weights for '" << fullyConnected.type << "' layer '" << fullyConnected.name << "' are not supported";
- }
- }
-
- const DataPtr inputData = fullyConnected.insData[0].lock();
- if (inputData == nullptr) {
- THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
- }
- if (inputData->getDims().size() < 2) {
- THROW_IE_EXCEPTION << "Unexpected input layout " << inputData->getLayout();
- }
-
- const DataPtr outputData = fullyConnected.outData[0];
- if (outputData == nullptr) {
- THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent";
- }
-
- const size_t outputChannelsCount = outputData->getDims()[1];
- dequantizationScales.resize(outputChannelsCount);
- dequantizationShifts.resize(outputChannelsCount);
- biasesShifts.resize(outputChannelsCount);
-
- CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected);
- if (scaleShift->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "Unexpected layer type to calculate quantization values " << scaleShift->type;
- }
-
- const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights"));
- const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
-
- const bool dequantizationValuesAreBroadcasted = !getDequantizationDimIsSupported(fullyConnected);
- for (size_t i = 0; i < outputChannelsCount; ++i) {
- dequantizationScales[i] =
- prevDequantizationScaleBuffer.get()[0] *
- (originalWeightsDequantizationScales.size() == 0 ?
- 1.0 :
- (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]));
- }
-
- const DataPtr insData = fullyConnected.insData[0].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data ia absent";
- }
-
- if (CNNNetworkHelper::isQuantizedConstWeights(fullyConnected)) {
- const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
- const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
- const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
- const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob);
-
- const size_t inputChannelsCount = insData->getDims().size() == 3ul ? insData->getDims()[2] : insData->getDims()[1];
- for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
- float sum = 0.0;
- const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
- 1.0 :
- ((originalWeightsDequantizationScales.size() == 1) ?
- originalWeightsDequantizationScales[0] :
- originalWeightsDequantizationScales[channel]);
-
- for (size_t inputChannel = 0; inputChannel < inputChannelsCount; ++inputChannel) {
- const float w = weightsBuffer.get()[channel * inputChannelsCount + inputChannel];
- const float shift = dequantizationValuesAreBroadcasted ?
- prevDequantizationShiftBuffer.get()[0] :
- prevDequantizationShiftBuffer.get()[inputChannel];
- sum += w * shift * weightsDequantizationScale;
- }
-
- dequantizationShifts[channel] = biasesBuffer == nullptr ?
- sum :
- (sum + biasesBuffer.get()[channel] -
- prevDequantizationScaleBuffer.get()[0] *
- biasesBuffer.get()[channel] * weightsDequantizationScale);
- biasesShifts[channel] = sum;
- }
- }
-}
-
-void FullyConnectedTransformation::calculateDequantizationForAsymmetric(
- const CNNLayer& fullyConnected,
- const std::vector<float>& dataZeroPoints,
- const std::vector<float>& originalWeightsDequantizationScales,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const {
- const DataPtr inputData = fullyConnected.insData[0].lock();
- if (inputData == nullptr) {
- THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
- }
- if (inputData->getDims().size() < 2) {
- THROW_IE_EXCEPTION << "Unexpected input layout " << inputData->getLayout();
- }
-
- const DataPtr outputData = fullyConnected.outData[0];
- if (outputData == nullptr) {
- THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent";
- }
-
- const size_t inputChannelsCount = inputData->getDims()[1];
- const size_t outputChannelsCount = outputData->getDims()[1];
- if ((originalWeightsDequantizationScales.size() != outputChannelsCount) &&
- std::any_of(
- originalWeightsDequantizationScales.begin(),
- originalWeightsDequantizationScales.end(),
- [&](const float value) { return value != originalWeightsDequantizationScales[0]; })) {
- THROW_IE_LPT_EXCEPTION(fullyConnected) << "can not insert dequantization layer for " <<
- outputChannelsCount << " output channels and " <<
- originalWeightsDequantizationScales.size() << " weigths dequantization scales";
- }
-
- CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected);
- if (scaleShift->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "Unexpected layer type to calculate quantization values " << scaleShift->type;
- }
-
- const bool dequantizationValuesAreBroadcasted = !getDequantizationDimIsSupported(fullyConnected);
-
- dequantizationScales.resize(outputChannelsCount);
- dequantizationShifts.resize(outputChannelsCount);
-
- const std::shared_ptr<float> prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights"));
- for (size_t i = 0; i < outputChannelsCount; ++i) {
- dequantizationScales[i] =
- prevDequantizationScaleBuffer.get()[0] *
- (originalWeightsDequantizationScales.size() == 0 ?
- 1.0 :
- originalWeightsDequantizationScales[((originalWeightsDequantizationScales.size() == 1) || dequantizationValuesAreBroadcasted) ? 0 : i]);
- }
-
- if (CNNNetworkHelper::isQuantizedConstWeights(fullyConnected) && (!dequantizationValuesAreBroadcasted)) {
- const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
- const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
- const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
- const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBiases(fullyConnected));
-
- const std::shared_ptr<float> prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
-
- for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
- float sum1 = 0.0;
- float sum2 = 0.0;
- const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
- 1.0 :
- ((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
-
- for (size_t w = 0; w < inputChannelsCount; ++w) {
- const float kernel = weightsBuffer.get()[channel * inputChannelsCount + w];
- const float shift = prevDequantizationShiftBuffer.get()[w];
- sum1 += kernel * shift * weightsDequantizationScale;
- sum2 += kernel * dataZeroPoints[w] * weightsDequantizationScale;
- }
-
- dequantizationShifts[channel] = biasesBuffer == nullptr ?
- sum1 :
- (sum1 + biasesBuffer.get()[channel] -
- prevDequantizationScaleBuffer.get()[0] *
- biasesBuffer.get()[channel] * weightsDequantizationScale);
- }
- }
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/fuse_fake_quantize_and_scale_shift.hpp"
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include <caseless.hpp>
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void FuseFakeQuantizeAndScaleShiftTransformation::transform(
- TransformationContext& context,
- CNNLayer& fakeQuantizeLayer) const {
- if (!CaselessEq<std::string>()(fakeQuantizeLayer.type, "FakeQuantize"))
- return;
-
- // Fuse if only all children are ScaleShift
- auto dScaleShiftsVector = CNNNetworkHelper::getChildren(fakeQuantizeLayer);
- for (const auto& child : dScaleShiftsVector) {
- if (!CaselessEq<std::string>()(child->type, "ScaleShift"))
- return;
-
- const DataPtr insData = child->insData[0].lock();
- if (insData == nullptr) {
- return;
- }
-
- if (insData->getDims().size() > 5) {
- return;
- }
- }
-
- auto dScaleShift = dScaleShiftsVector[0];
-
- const Blob::Ptr scalesBlob = CNNNetworkHelper::getBlob(dScaleShift, "weights");
- auto scalesBufferPtr = CNNNetworkHelper::getFloatData(scalesBlob);
-
- const Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(dScaleShift, "biases");
- auto shiftsBufferPtr = CNNNetworkHelper::getFloatData(shiftsBlob);
-
- if (scalesBlob->size() != shiftsBlob->size())
- THROW_IE_EXCEPTION << "Scales and shifts values count are different for layer '" << dScaleShift->name << "'";
-
- const float* shiftsBuffer = shiftsBufferPtr.get();
- const float* scalesBuffer = scalesBufferPtr.get();
- // Don't fuse when there is a negative scale, because it leads to invalid results of FQ
- for (size_t i = 0lu; i < scalesBlob->size(); ++i) {
- if (scalesBuffer[i] <= 0.0f)
- return;
- }
-
- OutputsDataMap outputs;
- context.network.getOutputsInfo(outputs);
- const bool dScaleShiftIsLastLayer = outputs.find(dScaleShift->name) != outputs.end();
- if (dScaleShiftIsLastLayer && (dScaleShiftsVector.size() > 1ul)) {
- // not possible to fuse ScaleShifts if at least one is output
- return;
- }
-
- // All ScaleShifts must be equal
- for (size_t i = 1lu; i < dScaleShiftsVector.size(); i++) {
- auto ssLayer = dScaleShiftsVector[i];
- if (outputs.find(ssLayer->name) != outputs.end()) {
- // not possible to fuse ScaleShifts if at least one is output
- return;
- }
-
- const Blob::Ptr scBlob = CNNNetworkHelper::getBlob(ssLayer, "weights");
- auto scBufferPtr = CNNNetworkHelper::getFloatData(scBlob);
-
- const Blob::Ptr shBlob = CNNNetworkHelper::getBlob(ssLayer, "biases");
- auto shBufferPtr = CNNNetworkHelper::getFloatData(shBlob);
-
- for (size_t j = 0lu; j < scalesBlob->size(); j++) {
- if (scalesBuffer[j] != scBufferPtr.get()[j] ||
- shiftsBuffer[j] != shBufferPtr.get()[j])
- return;
- }
- }
-
- CNNLayerPtr outputLow = CNNNetworkHelper::getParent(fakeQuantizeLayer, 3);
- CNNLayerPtr outputHigh = CNNNetworkHelper::getParent(fakeQuantizeLayer, 4);
-
- const DataPtr insData = dScaleShift->insData[0].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(*dScaleShift) << "insert data is absent";
- }
-
- const size_t inputDims = insData->getDims().size();
- Layout layout;
- size_t channelIndex;
- switch (inputDims) {
- case 5: {
- layout = Layout::NCDHW;
- channelIndex = 1ul;
- break;
- }
- case 4: {
- layout = Layout::NCHW;
- channelIndex = 1ul;
- break;
- }
- case 3: {
- layout = Layout::BLOCKED;
- channelIndex = 1ul;
- break;
- }
- case 2: {
- layout = Layout::NC;
- channelIndex = 1ul;
- break;
- }
- case 1: {
- layout = Layout::C;
- channelIndex = 0ul;
- break;
- }
- default: {
- THROW_IE_EXCEPTION << "FuseFakeQuantizeAndScaleShiftTransformation: unexpected dimensions count " <<
- inputDims << " in ScaleShift optimization";
- }
- }
- std::vector<size_t> dims(inputDims, 1lu);
- dims[channelIndex] = scalesBlob->size();
-
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantizeLayer);
-
- Blob::Ptr targetOutputLowBufferPtr = reshapeWeightsIntervalConst(*outputLow, dims, layout);
- auto targetOutputLowBuffer = CNNNetworkHelper::getFloatData(targetOutputLowBufferPtr);
- Blob::Ptr targetOutputHighBufferPtr = reshapeWeightsIntervalConst(*outputHigh, dims, layout);
- auto targetOutputHighBuffer = CNNNetworkHelper::getFloatData(targetOutputHighBufferPtr);
-
- for (size_t i = 0lu; i < scalesBlob->size(); ++i) {
- auto q_lo = quantizationDetails.getOutputLowValue(i);
- auto q_ho = quantizationDetails.getOutputHighValue(i);
- auto sc = scalesBlob->size() == 1lu ? scalesBuffer[0] : scalesBuffer[i];
- auto sh = shiftsBlob->size() == 1lu ? shiftsBuffer[0] : shiftsBuffer[i];
- targetOutputLowBuffer.get()[i] = q_lo * sc + sh;
- targetOutputHighBuffer.get()[i] = q_ho * sc + sh;
- }
-
- CNNNetworkHelper::fillBlobByFP32(targetOutputLowBufferPtr, targetOutputLowBuffer.get());
- CNNNetworkHelper::fillBlobByFP32(targetOutputHighBufferPtr, targetOutputHighBuffer.get());
-
- for (auto& ss : dScaleShiftsVector) {
- CNNNetworkHelper::removeLayer(context.network, ss);
- context.removeLayer(*ss);
- }
- if (updatePrecisions) {
- auto ssPrecision = dScaleShiftsVector[0]->outData[0]->getPrecision();
- fakeQuantizeLayer.outData[0]->setPrecision(ssPrecision);
- }
-
-
- if (dScaleShiftIsLastLayer) {
- CNNNetworkHelper::renameLayer(context.network, fakeQuantizeLayer.name, dScaleShift->name);
- }
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/gemm.hpp"
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <cmath>
-#include <caseless.hpp>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <ie_common.h>
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-bool GemmTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& gemm) const {
- if (!LayerTransformation::canBeTransformed(context, gemm)) {
- return false;
- }
-
- if ((gemm.insData.size() != 2) || (gemm.outData.size() != 1)) {
- THROW_IE_EXCEPTION << "layer outputs '" << gemm.outData.size() << "' is not correct";
- }
-
- const DataPtr inputData = gemm.insData[0].lock();
- if (inputData == nullptr) {
- return false;
- }
-
- const size_t inputChannelsCount = CNNNetworkHelper::getInputChannelsCount(gemm);
- std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(gemm);
-
- const auto checkDequantizationLayer = [&](const CNNLayer& gemm, const size_t index) -> bool {
- if (parents.size() <= index) {
- return false;
- }
- const CNNLayerPtr scaleShift = parents[index];
- if (scaleShift->type != "ScaleShift") {
- return false;
- }
-
- std::vector<float> scales;
- std::vector<float> shifts;
- fillFromDequantizationLayer(*scaleShift, scales, shifts);
-
- if (scales.size() != inputChannelsCount) {
- return false;
- }
- if (std::any_of(scales.begin(), scales.end(), [&](const float value) { return value != scales[0]; })) {
- return false;
- }
-
- if (shifts.size() != inputChannelsCount) {
- return false;
- }
- if (std::any_of(shifts.begin(), shifts.end(), [&](const float value) { return value != 0.f; })) {
- return false;
- }
-
- return true;
- };
-
- if ((CNNNetworkHelper::getParents(gemm).size() != 2ul) ||
- (!checkDequantizationLayer(gemm, 0ul))) {
- return false;
- }
-
- if (parents[1]->type == "FakeQuantize") {
- if (!QuantizationDetails::isSupportedLevel(parents[1]->GetParamAsUInt("levels"))) {
- return false;
- }
-
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*parents[1]);
- const DataPrecision dataPrecision = getDataPrecision(*parents[1], quantizationDetails, false, false);
- if (dataPrecision.precision == Precision::UNSPECIFIED) {
- return false;
- }
- }
-
- if (((parents[1]->type != "FakeQuantize") && (!checkDequantizationLayer(gemm, 1ul))) ||
- ((parents[1]->type == "FakeQuantize") && (!CNNNetworkHelper::onConstWeightsPath(*parents[1]) || !CNNNetworkHelper::onWeights(*parents[1])))) {
- return false;
- }
-
- return true;
-}
-
-void GemmTransformation::transform(TransformationContext& context, CNNLayer& gemm) const {
- if (!canBeTransformed(context, gemm)) {
- return;
- }
-
- if (!CaselessEq<std::string>()(gemm.type, "Gemm")) {
- THROW_IE_EXCEPTION << "layer '" << gemm.name << "' is not correct";
- }
-
- std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(gemm);
- if (parents[1]->type == "FakeQuantize") {
- FullyConnectedTransformation::transform(context, gemm);
- return;
- }
-
- std::vector<float> originalDataDequantizationScales1;
- std::vector<float> originalDataDequantizationShifts1;
- fillFromDequantizationLayer(*parents[0], originalDataDequantizationScales1, originalDataDequantizationShifts1);
- std::vector<float> originalDataDequantizationScales2;
- std::vector<float> originalDataDequantizationShifts2;
- fillFromDequantizationLayer(*parents[1], originalDataDequantizationScales2, originalDataDequantizationShifts2);
-
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(gemm);
- std::vector<float> dequantizationScales(outputChannelsCount, originalDataDequantizationScales1[0] * originalDataDequantizationScales2[0]);
- std::vector<float> dequantizationShifts(outputChannelsCount, 0.f);
-
- CNNNetworkHelper::removeLayer(context.network, parents[0]);
- context.removeLayer(*parents[0]);
-
- if (parents[1]->type != "FakeQuantize") {
- CNNNetworkHelper::removeLayer(context.network, parents[1]);
- context.removeLayer(*parents[1]);
- }
-
- addDequantizationLayer(context, gemm, dequantizationScales, dequantizationShifts);
-}
-
-bool GemmTransformation::isQuantized(const CNNLayer& layer) const noexcept {
- // weightable layer version overriding
- return true;
-}
+++ /dev/null
-//*****************************************************************************
-// Copyright 2017-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-/**
- * @brief Defines openvino domains for tracing
- * @file itt.hpp
- */
-
-#pragma once
-
-#include <openvino/itt.hpp>
-
-namespace InferenceEngine {
-namespace itt {
-namespace domains {
- OV_ITT_DOMAIN(LPT);
-}
-}
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/layer_transformation.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-#include <ie_common.h>
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <cmath>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <unordered_set>
-#include <vector>
-
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-const char LayerTransformation::lastLayerPostfix[] = "_original";
-
-LayerTransformation::LayerTransformation(const Params& params) :
- updatePrecisions(params.updatePrecisions),
- quantizeOutputs(params.quantizeOutputs),
- weightsToConst(params.weightsToConst),
- quantizedTensorAlignmentOnActivations(params.quantizedTensorAlignmentOnActivations),
- quantizedTensorAlignmentOnWeights(params.quantizedTensorAlignmentOnWeights),
- roundQuantizedValues(params.roundQuantizedValues),
- updateBiases(params.updateBiases),
- supportAsymmetricQuantization(params.supportAsymmetricQuantization),
- precisionsOnActivations(params.precisionsOnActivations),
- precisionsOnWeights(params.precisionsOnWeights),
- layerTransformationsManager(nullptr),
- paramsManager(nullptr),
- quantizationIntervalAsymmetryThreshold(0.002f),
- zeroThreshold(1.e-6f),
- dequantizationShiftToZeroRatioTreshold(4.e-4f),
- minQuantizationLevels(2ul) {}
-
-void LayerTransformation::setParamsManager(IParamsManager* paramsManager) noexcept {
- this->paramsManager = paramsManager;
-}
-
-void LayerTransformation::setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept {
- this->layerTransformationsManager = layerTransformationsManager;
-}
-
-void LayerTransformation::setUpdatePrecisions(const bool updatePrecisions) {
- this->updatePrecisions = updatePrecisions;
-}
-
-void LayerTransformation::setQuantizeOutputs(const bool quantizeOutputs) {
- this->quantizeOutputs = quantizeOutputs;
-}
-
-void LayerTransformation::setWeightsToConst(const bool weightsToConst) {
- this->weightsToConst = weightsToConst;
-}
-
-void LayerTransformation::setQuantizedTensorAlignmentOnActivations(
- const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) {
- this->quantizedTensorAlignmentOnActivations = quantizedTensorAlignmentOnActivations;
-}
-
-void LayerTransformation::setQuantizedTensorAlignmentOnWeights(
- const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) {
- this->quantizedTensorAlignmentOnWeights = quantizedTensorAlignmentOnWeights;
-}
-
-const std::vector<Precision>& LayerTransformation::getPrecisionsOnActivations() const {
- return precisionsOnActivations;
-}
-
-const std::vector<Precision>& LayerTransformation::getPrecisionsOnWeights() const {
- return precisionsOnWeights;
-}
-
-bool LayerTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const {
- if (!CNNNetworkHelper::isLayoutSupported(layer)) {
- return false;
- }
-
- if (!isQuantized(layer)) {
- return false;
- }
-
- if (!quantizeOutputs) {
- OutputsDataMap outputs;
- context.network.getOutputsInfo(outputs);
- if (outputs.find(layer.name) != outputs.end()) {
- return false;
- }
- }
-
- return true;
-}
-
-Precision LayerTransformation::getPrecisionBeforeParentDequantizationScaleShift(const CNNLayer& layer) {
- const CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(layer, 0);
- if (scaleShift == nullptr) {
- THROW_IE_EXCEPTION << "dequantization ScaleShift layer is absent";
- }
-
- if (scaleShift->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "not expected dequantization layer type " << scaleShift->type;
- }
-
- if (scaleShift->insData.size() < 1) {
- THROW_IE_EXCEPTION << "is not expected ScaleShift '" << scaleShift->name << "' insert data size "
- << scaleShift->insData.size();
- }
-
- const DataWeakPtr insDataWeak = scaleShift->insData[0];
- const DataPtr insData = insDataWeak.lock();
- if (insData == nullptr) {
- THROW_IE_EXCEPTION << "input data is absent";
- }
-
- return insData->getPrecision();
-}
-
-#ifdef LPT_PRINT_DEQUANTIZATION_INFO
-std::stringstream toStream(const std::vector<float>& dequantizationValues) {
- std::stringstream ss;
- const size_t scalesCount = dequantizationValues.size() > 9ul ? 9ul : dequantizationValues.size();
- ss << "{";
- for (size_t i = 0ul; i < scalesCount; ++i) {
- ss << dequantizationValues[i] << (i < (scalesCount - 1) ? "," : "");
- }
- ss << "}";
- return ss;
-}
-
-void LayerTransformation::printDequantizationInfo(const CNNLayer& layer) {
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer);
- std::cout <<
- layer.type << (CNNNetworkHelper::onWeights(layer) ? " on weights " : " on activations ") <<
- layer.name << ":" << std::endl <<
- " details : " << quantizationDetails << std::endl;
-}
-
-void LayerTransformation::printDequantizationInfo(const DataPrecision& dataPrecision) {
- std::cout << " precision: " << dataPrecision << std::endl;
-}
-
-void LayerTransformation::printDequantizationValues(
- const std::vector<float>& dequantizationScales,
- const std::vector<float>& dequantizationShifts) {
- std::cout <<
- " scales : " << toStream(dequantizationScales).str() << std::endl <<
- " shifts : " << toStream(dequantizationShifts).str() << std::endl;
-}
-#endif
-
-void LayerTransformation::fillFromQuantizationDetails(
- const QuantizationDetails& quantizationDetails,
- const DataPrecision& dataPrecision,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const {
- // TODO: refactor: make optional
- const float minQuantizationScale = 1e-32f;
- const float maxQuantizationScale = 1e32f;
-
- bool denormalOutputValuesWasUpdated = false;
- dequantizationScales.resize(quantizationDetails.outputChannelsCount);
- dequantizationShifts.resize(quantizationDetails.outputChannelsCount);
-
- for (size_t channel = 0lu; channel < quantizationDetails.outputChannelsCount; ++channel) {
- float dequantizationScale = 0.f;
- float dequantizationShift = 0.f;
- if (dataPrecision.precision.isSigned()) {
- // I8
- dequantizationScale =
- (quantizationDetails.getOutputHighValue(channel) - quantizationDetails.getOutputLowValue(channel)) /
- (dataPrecision.max - dataPrecision.min);
- const float quantValue =
- (quantizationDetails.getOutputHighValue(channel) - quantizationDetails.getOutputLowValue(channel)) /
- (dataPrecision.max - dataPrecision.min);
-
- const float actualLowPartQuantValue = std::fabs(quantizationDetails.getOutputLowValue(channel) / dataPrecision.min);
- const float actualHighPartQuantValue = std::fabs(quantizationDetails.getOutputHighValue(channel) / dataPrecision.max);
-
- if (dataPrecision.hasZeroPoint) {
- if (actualLowPartQuantValue < actualHighPartQuantValue) {
- dequantizationShift = quantizationDetails.getOutputLowValue(channel) - dataPrecision.min * quantValue;
- } else {
- dequantizationShift = quantizationDetails.getOutputHighValue(channel) - dataPrecision.max * quantValue;
- }
- }
- } else {
- // U8
- dequantizationScale =
- (quantizationDetails.getOutputHighValue(channel) - quantizationDetails.getOutputLowValue(channel)) /
- (dataPrecision.max - dataPrecision.min);
- if (dataPrecision.hasZeroPoint) {
- dequantizationShift = quantizationDetails.getOutputLowValue(channel);
- }
- }
-
- if (fabs(dequantizationScale) < minQuantizationScale) {
- dequantizationScales[channel] = minQuantizationScale;
- denormalOutputValuesWasUpdated = true;
- } else if (fabs(dequantizationScale) > maxQuantizationScale) {
- dequantizationScales[channel] = dequantizationScale > 0.f ? maxQuantizationScale : -maxQuantizationScale;
- denormalOutputValuesWasUpdated = true;
- } else {
- dequantizationScales[channel] = dequantizationScale;
- }
-
- dequantizationShifts[channel] = dequantizationShift;
- }
-}
-
-void LayerTransformation::checkAndUpdateDequantizationShiftWithZero(
- const QuantizationDetails& quantizationDetails,
- std::vector<float>& dequantizationShifts) const {
- auto compare = [](float value1, float value2) { return (std::fabs(value1) < std::fabs(value2)); };
-
- const auto maxShiftIt = std::max_element(dequantizationShifts.begin(), dequantizationShifts.end(), compare);
- if (maxShiftIt == dequantizationShifts.end()) {
- THROW_IE_EXCEPTION << "unexpected dequantization shifts max value";
- }
-
- const auto maxOutputLowIt = std::max_element(quantizationDetails.outputLowValues.begin(), quantizationDetails.outputLowValues.end(), compare);
- if (maxOutputLowIt == quantizationDetails.outputLowValues.end()) {
- THROW_IE_EXCEPTION << "unexpected dequantization output low value";
- }
-
- const auto maxOutputHighIt = std::max_element(quantizationDetails.outputHighValues.begin(), quantizationDetails.outputHighValues.end(), compare);
- if (maxOutputHighIt == quantizationDetails.outputHighValues.end()) {
- THROW_IE_EXCEPTION << "unexpected dequantization output high value";
- }
-
- const float maxOutputIt = std::max(std::fabs(*maxOutputLowIt), std::fabs(*maxOutputHighIt));
- const float relative = std::fabs(*maxShiftIt) / std::fabs(maxOutputIt);
- if (relative < dequantizationShiftToZeroRatioTreshold) {
- std::fill(dequantizationShifts.begin(), dequantizationShifts.end(), 0.f);
- }
-}
-
-void LayerTransformation::addDequantizationLayer(
- TransformationContext& context,
- const CNNLayer& layer,
- const std::vector<float>& dequantizationScales,
- const std::vector<float>& dequantizationShifts) const {
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(layer);
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
- for (const CNNLayerPtr& child : children) {
- const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
- context,
- std::make_shared<CNNLayer>(layer),
- child,
- DequantizationDetails(dequantizationScales, dequantizationShifts, outputChannelsCount));
-
- for (const auto& dequantizationLayer : dequantizationLayers) {
- context.dequantizationLayersNames.insert(dequantizationLayer->name);
- }
- }
-
- OutputsDataMap outputs;
- context.network.getOutputsInfo(outputs);
- const auto it = outputs.find(layer.name);
- if (it != outputs.end()) {
- const std::string dequantizationLayerName = layer.name;
- CNNNetworkHelper::renameLayer(context.network, layer.name, layer.name + LayerTransformation::lastLayerPostfix);
-
- const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
- context,
- std::make_shared<CNNLayer>(layer),
- nullptr,
- DequantizationDetails(dequantizationScales, dequantizationShifts, outputChannelsCount),
- dequantizationLayerName);
-
- for (const auto& dequantizationLayer : dequantizationLayers) {
- context.dequantizationLayersNames.insert(dequantizationLayer->name);
- }
- }
-}
-
-void LayerTransformation::fillFromDequantizationLayer(
- const CNNLayer& dequantizationLayer,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const {
- if (dequantizationLayer.type != "ScaleShift") {
- THROW_IE_EXCEPTION << "unexpected dequantization layer type " << dequantizationLayer.type;
- }
-
- CNNLayerPtr dequantizationLayerPtr = std::make_shared<CNNLayer>(dequantizationLayer);
- Blob::Ptr weightsBlob = CNNNetworkHelper::getBlob(dequantizationLayerPtr, "weights");
- const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
-
- Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(dequantizationLayerPtr, "biases");
- const auto shiftsBuffer = CNNNetworkHelper::getFloatData(shiftsBlob);
-
- const size_t inputCannelsCount = CNNNetworkHelper::getInputChannelsCount(dequantizationLayer);
- dequantizationScales.resize(inputCannelsCount);
- dequantizationShifts.resize(inputCannelsCount);
- for (size_t channel = 0; channel < inputCannelsCount; ++channel) {
- dequantizationScales[channel] = (weightsBlob->size() == 1ul) ? weightsBuffer.get()[0] : weightsBuffer.get()[channel];
- dequantizationShifts[channel] = (shiftsBlob->size() == 1ul) ? shiftsBuffer.get()[0] : shiftsBuffer.get()[channel];
- }
-}
-
-void LayerTransformation::setQuantizationIntervalAsymmetryThreshold(const float value) {
- this->quantizationIntervalAsymmetryThreshold = value;
-}
-
-void LayerTransformation::setZeroThreshold(const float value) {
- this->zeroThreshold = value;
-}
-
-void LayerTransformation::setDequantizationShiftToZeroRatioTreshold(const float value) {
- this->dequantizationShiftToZeroRatioTreshold = value;
-}
-
-void LayerTransformation::setMinQuantizationLevels(const size_t levels) {
- this->minQuantizationLevels = levels;
-}
-
-Precision LayerTransformation::getPrecisionParent(const CNNLayer& layer) {
- const CNNLayerPtr parent = CNNNetworkHelper::getParent(layer, 0);
- if (parent == nullptr) {
- THROW_IE_EXCEPTION << "parent layer is absent";
- }
-
- for (const DataPtr outData : parent->outData) {
- const auto inputTo = getInputTo(outData);
- for (auto it = inputTo.begin(); it != inputTo.end(); ++it) {
- if (it->second->name == layer.name) {
- return outData->getPrecision();
- }
- }
- }
-
- THROW_IE_EXCEPTION << "out data from '" << parent->name << "' to '" << layer.name << "' was not found";
-}
-
-LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(const QuantizationDetails& quantizationDetails) const {
- const float asymmetricIntervalSideRatio256 = -128.f / 127.f;
- bool hasNegative = false;
- bool signedPrecision = true;
- bool unsignedPrecision = true;
-
- bool hasZeroPoint = false;
- for (size_t i = 0; i < quantizationDetails.outputLowValues.size(); ++i) {
- const bool signedInterval = std::signbit(quantizationDetails.outputLowValues[i]) != std::signbit(quantizationDetails.outputHighValues[i]);
- const bool boundaryValuesAreNotZero =
- (std::fabs(quantizationDetails.outputLowValues[i]) >= zeroThreshold) &&
- (std::fabs(quantizationDetails.outputHighValues[i]) >= zeroThreshold);
- if (signedInterval && boundaryValuesAreNotZero) {
- // signed
- unsignedPrecision = false;
- hasNegative = true;
-
- const float expectedRatio = quantizationDetails.levels == 256 ? asymmetricIntervalSideRatio256 : -1.f;
- const float actualRatio = quantizationDetails.outputLowValues[i] / quantizationDetails.outputHighValues[i];
- const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio));
- if (actual > quantizationIntervalAsymmetryThreshold) {
- hasZeroPoint = true;
- }
-
-#ifdef LPT_PRINT_DEQUANTIZATION_INFO
- if (hasZeroPoint) {
- std::cout << " actual: " << actual << ", threshold: " << quantizationIntervalAsymmetryThreshold << std::endl;
- std::cout << " hasZeroPoint: " << (hasZeroPoint ? "True" : "False") << std::endl;
- }
-#endif
- } else {
- // unsigned
- signedPrecision = false;
- if (boundaryValuesAreNotZero) {
- hasZeroPoint = boundaryValuesAreNotZero;
- }
-
-#ifdef LPT_PRINT_DEQUANTIZATION_INFO
- if (hasZeroPoint) {
- const float actual = quantizationDetails.outputLowValues[i] > 0.f ?
- quantizationDetails.outputLowValues[i] :
- quantizationDetails.outputHighValues[i];
- std::cout << " actual: " << actual << ", threshold: 0.0" << std::endl;
- std::cout << " hasZeroPoint: " << (hasZeroPoint ? "True" : "False") << std::endl;
- }
-#endif
- }
- }
-
- if (!hasZeroPoint) {
- if (signedPrecision && (!unsignedPrecision)) {
- return LayerTransformation::PrecisionDetails(Precision::I8, hasNegative, hasZeroPoint);
- }
-
- if ((!signedPrecision) && unsignedPrecision) {
- return LayerTransformation::PrecisionDetails(Precision::U8, hasNegative, hasZeroPoint);
- }
- }
-
- return LayerTransformation::PrecisionDetails(Precision::UNSPECIFIED, hasNegative, hasZeroPoint);
-}
-
-bool LayerTransformation::isQuantized(const CNNLayer& layer) const noexcept {
- return true;
-}
-
-bool LayerTransformation::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
- return true;
-}
-
-DataPrecision LayerTransformation::getDataPrecision(
- const CNNLayer& layer,
- const QuantizationDetails& quantizationDetails,
- const bool onWeights,
- const bool supportAsymmetricQuantization) const {
-#ifdef LPT_PRINT_DEQUANTIZATION_INFO
- printDequantizationInfo(layer);
-#endif
- std::vector<Precision> precisions = onWeights ? precisionsOnWeights : precisionsOnActivations;
- PrecisionDetails precisionDetailsAtOutputIntervals = getPrecisionDetails(quantizationDetails);
- {
- if (precisionDetailsAtOutputIntervals.precision != Precision::UNSPECIFIED) {
- if (!onWeights) {
- fillAvailablePrecisions(layer, precisions);
- }
-
- // if supportedPrecisions is empty then use the first available, not supported layer will be in original precision
- if (!precisions.empty()) {
- const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision);
- const Precision resultPrecision = foundIt != precisions.end() ?
- precisionDetailsAtOutputIntervals.precision :
- *precisions.begin();
-
- const DataPrecision dataPrecision(
- resultPrecision,
- DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
- DataPrecision::getMaxValue(resultPrecision),
- foundIt != precisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true);
-
-#ifdef LPT_PRINT_DEQUANTIZATION_INFO
- printDequantizationInfo(dataPrecision);
-#endif
- return dataPrecision;
- }
- }
- }
-
- const DataPrecision dataPrecision = precisions.empty() ?
- DataPrecision(Precision::UNSPECIFIED, 0.f, 0.f, false) :
- DataPrecision(
- *precisions.begin(),
- DataPrecision::getMinValue(*precisions.begin(), quantizationDetails.levels),
- DataPrecision::getMaxValue(*precisions.begin()),
- true);
-#ifdef LPT_PRINT_DEQUANTIZATION_INFO
- printDequantizationInfo(dataPrecision);
-#endif
- return dataPrecision;
-}
-
-void LayerTransformation::fillAvailablePrecisions(const CNNLayer& layer, std::vector<Precision>& availablePrecisions) const {
- if (availablePrecisions.empty()) {
- return;
- }
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
- for (CNNLayerPtr child : children) {
- if (child->type == "FakeQuantize") {
- // FakeQuantize layer updates precision
- continue;
- }
-
- if (!layerTransformationsManager->isQuantized(*child)) {
- // low precision chain is interrupted here: next layer supported precisions are ignored
- continue;
- }
-
- const std::vector<Precision> childPrecisionsOnActivations = paramsManager->getPrecisionsOnActivations(child->type);
- if (childPrecisionsOnActivations.size() == 0ul) {
- continue;
- }
-
- for (size_t index = 0ul; index < availablePrecisions.size();) {
- const Precision availablePrecision = availablePrecisions[index];
- if (!std::any_of(
- childPrecisionsOnActivations.begin(),
- childPrecisionsOnActivations.end(),
- [&](const Precision precision) { return availablePrecision == precision; })) {
- availablePrecisions.erase(availablePrecisions.begin() + index);
- } else {
- ++index;
- }
- }
-
- if (!layerTransformationsManager->isPrecisionPreserved(*child)) {
- continue;
- }
-
- fillAvailablePrecisions(*child, availablePrecisions);
- if (availablePrecisions.empty()) {
- return;
- }
- }
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "low_precision_transformations/mvn.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void MvnTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!LayerTransformation::canBeTransformed(context, layer)) {
- return;
- }
-
- if (!CaselessEq<std::string>()(layer.type, "MVN")) {
- THROW_IE_EXCEPTION << "Layer '" << layer.name << "' has invalid type '" << layer.type << "'. Convolution is expected.";
- }
-
- const CNNLayerPtr scaleShiftOnData = CNNNetworkHelper::getParent(layer, 0);
- if (scaleShiftOnData->type != "ScaleShift") {
- return;
- }
-
- std::vector<float> originalDataDequantizationScales;
- std::vector<float> originalDataDequantizationShifts;
- fillFromDequantizationLayer(*scaleShiftOnData, originalDataDequantizationScales, originalDataDequantizationShifts);
- if (std::any_of(originalDataDequantizationShifts.begin(), originalDataDequantizationShifts.end(), [](const float value) { return value != 0.f; })) {
- return;
- }
-
- const size_t acrossChannels = layer.GetParamAsUInt("across_channels", 0ul);
- if ((acrossChannels == 1ul) &&
- std::any_of(
- originalDataDequantizationScales.begin(),
- originalDataDequantizationScales.end(),
- [&](const float value) { return value != originalDataDequantizationScales[0]; })) {
- return;
- }
-
- const size_t normalizeVariance = layer.GetParamAsUInt("normalize_variance", 0ul);
-
- std::vector<float> dequantizationScales(originalDataDequantizationScales.size());
- std::vector<float> dequantizationShifts(originalDataDequantizationShifts.size(), 0.f);
-
- for (size_t channel = 0ul; channel < dequantizationScales.size(); ++channel) {
- dequantizationScales[channel] = normalizeVariance == 0ul ?
- originalDataDequantizationScales[channel] :
- std::signbit(originalDataDequantizationScales[channel]) ? -1.f : 1.f;
- }
-
- CNNNetworkHelper::removeLayer(context.network, scaleShiftOnData);
- context.removeLayer(*scaleShiftOnData);
-
- addDequantizationLayer(context, layer, dequantizationScales, dequantizationShifts);
-}
-
-bool MvnTransformation::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
- return false;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/network_helper.hpp"
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <cmath>
-#include <caseless.hpp>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <legacy/details/ie_cnn_network_tools.h>
-#include <ie_common.h>
-#include <precision_utils.h>
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-#include "ie_parallel.hpp"
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-static const std::unordered_set<std::string> intermediateLayers{
- "Pooling",
- "Resample"
-};
-
-bool Subgraph::fillSubgraphForQuantization(const CNNLayerPtr& fakeQuantize, std::unordered_set<std::string>& handledLayers) {
- if (fakeQuantize->type != "FakeQuantize") {
- THROW_IE_EXCEPTION << "unexpected layer type " << fakeQuantize->type;
- }
-
- if (!QuantizationDetails::outputLayoutIsSupported(*fakeQuantize)) {
- return false;
- }
-
- quantizationLayers.push_back(fakeQuantize);
- handledLayers.insert(fakeQuantize->name);
- layers.emplace(fakeQuantize->name, fakeQuantize.get());
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*fakeQuantize);
- for (const CNNLayerPtr& child : children) {
- if (handledLayers.find(child->name) != handledLayers.end()) {
- continue;
- }
-
- if (child->type == "Concat") {
- if (!fillSubgraphForConcat(child, handledLayers)) {
- return false;
- }
- } else if (child->type == "FakeQuantize") {
- //
- } else if (intermediateLayers.find(child->type) != intermediateLayers.end()) {
- if (!fillSubgraphForIntermediate(child, handledLayers)) {
- return false;
- }
- }
- }
-
- return true;
-}
-
-bool Subgraph::fill(const CNNLayerPtr& layer, std::unordered_set<std::string>& handledLayers) {
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(*layer);
- for (const CNNLayerPtr& parent : parents) {
- if (handledLayers.find(parent->name) != handledLayers.end()) {
- continue;
- }
-
- if (parent->type == "Concat") {
- if (!fillSubgraphForConcat(parent, handledLayers)) {
- return false;
- }
- } else if (parent->type == "FakeQuantize") {
- if (!fillSubgraphForQuantization(parent, handledLayers)) {
- return false;
- }
- } else if (intermediateLayers.find(parent->type) != intermediateLayers.end()) {
- if (!fillSubgraphForIntermediate(parent, handledLayers)) {
- return false;
- }
- } else {
- return false;
- }
- }
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*layer);
- for (const CNNLayerPtr& child : children) {
- if (handledLayers.find(child->name) != handledLayers.end()) {
- continue;
- }
-
- if (child->type == "Concat") {
- if (!fillSubgraphForConcat(child, handledLayers)) {
- return false;
- }
- } else if (child->type == "FakeQuantize") {
- //
- } else if (intermediateLayers.find(child->type) != intermediateLayers.end()) {
- if (!fillSubgraphForIntermediate(child, handledLayers)) {
- return false;
- }
- }
- }
-
- return true;
-}
-
-bool Subgraph::fillSubgraphForIntermediate(const CNNLayerPtr& intermediate, std::unordered_set<std::string>& handledLayers) {
- if (intermediateLayers.find(intermediate->type) == intermediateLayers.end()) {
- THROW_IE_EXCEPTION << "unexpected layer type " << intermediate->type;
- }
-
- handledLayers.insert(intermediate->name);
- layers.emplace(intermediate->name, intermediate.get());
-
- return fill(intermediate, handledLayers);
-}
-
-bool Subgraph::empty() const {
- return quantizationLayers.empty();
-}
-
-bool Subgraph::fillSubgraphForConcat(const CNNLayerPtr& concat, std::unordered_set<std::string>& handledLayers) {
- if (concat->type != "Concat") {
- THROW_IE_EXCEPTION << "unexpected layer type " << concat->type;
- }
-
- concatLayers.push_back(concat);
- handledLayers.insert(concat->name);
- layers.emplace(concat->name, concat.get());
-
- return fill(concat, handledLayers);
-}
-
-Subgraph CNNNetworkHelper::getSubgraph(const CNNLayer& concat) {
- if (concat.type != "Concat") {
- THROW_IE_EXCEPTION << "unexpected layer type " << concat.type;
- }
-
- Subgraph subgraph;
- std::unordered_set<std::string> handledLayers;
- if (!subgraph.fillSubgraphForConcat(std::make_shared<CNNLayer>(concat), handledLayers)) {
- return Subgraph();
- }
-
- return subgraph;
-}
-
-CNNLayerPtr CNNNetworkHelper::getLayer(const ICNNNetwork& network, const std::string& layerName) {
- std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (CNNLayerPtr layer : layers) {
- if (layer->name == layerName) {
- return layer;
- }
- }
-
- return nullptr;
-}
-
-Blob::Ptr CNNNetworkHelper::makeNewBlobPtr(const TensorDesc& desc) {
- Blob::Ptr newBlob;
- if (desc.getPrecision() == Precision::FP32)
- newBlob = make_shared_blob<PrecisionTrait<Precision::FP32>::value_type>(desc);
- else if (desc.getPrecision() == Precision::FP16)
- newBlob = make_shared_blob<PrecisionTrait<Precision::FP16>::value_type>(desc);
- else if (desc.getPrecision() == Precision::I8)
- newBlob = make_shared_blob<PrecisionTrait<Precision::I8>::value_type>(desc);
- else if (desc.getPrecision() == Precision::U8)
- newBlob = make_shared_blob<PrecisionTrait<Precision::U8>::value_type>(desc);
- else if (desc.getPrecision() == Precision::I32)
- newBlob = make_shared_blob<PrecisionTrait<Precision::I32>::value_type>(desc);
- else
- THROW_IE_EXCEPTION << "Unsupported transformation precision: " << desc.getPrecision();
-
- return newBlob;
-}
-
-void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex,
- const std::vector<float>& values) {
- CNNLayerPtr blobLayer = CNNNetworkHelper::getParent(quantizeLayer, constLayerIndex);
- if (blobLayer == nullptr) {
- THROW_IE_EXCEPTION << "layer is absent";
- }
-
- const auto existingBlobIt = blobLayer->blobs.find("custom");
- if (existingBlobIt == blobLayer->blobs.end()) {
- THROW_IE_EXCEPTION << "custom blob was not found ";
- }
-
- TensorDesc newBlobTensorDesc;
-
- const TensorDesc existingBlobTensorDesc = existingBlobIt->second->getTensorDesc();
- if ((existingBlobIt->second->size() != values.size()) && (values.size() != 1)) {
- if (existingBlobTensorDesc.getLayout() == Layout::SCALAR) {
- //
- } else if (existingBlobTensorDesc.getLayout() == Layout::C) {
- if (existingBlobTensorDesc.getDims().size() != 1) {
- THROW_IE_EXCEPTION << "temporary dimensions size " << existingBlobTensorDesc.getDims().size()
- << " for layout " << existingBlobTensorDesc.getLayout() << " is not supported";
- }
- if (existingBlobTensorDesc.getDims()[0] != 1) {
- THROW_IE_EXCEPTION << "temporary is not supported";
- }
- } else if (existingBlobTensorDesc.getLayout() == Layout::NCHW) {
- if (existingBlobTensorDesc.getDims().size() != 4) {
- THROW_IE_EXCEPTION << "temporary dimensions size " << existingBlobTensorDesc.getDims().size()
- << " for layout " << existingBlobTensorDesc.getLayout() << " is not supported";
- }
- // OIHW
- if (existingBlobTensorDesc.getDims()[0] != 1) {
- THROW_IE_EXCEPTION << "temporary is not supported";
- }
- }
-
- const std::vector<size_t> dims = {values.size()};
- const Layout layout = Layout::C;
- newBlobTensorDesc = TensorDesc(existingBlobTensorDesc.getPrecision(), dims, layout);
- for (DataPtr data : blobLayer->outData) {
- data->reshape(dims, layout);
- }
- } else {
- newBlobTensorDesc = existingBlobTensorDesc;
- }
-
- Blob::Ptr newBlob = makeNewBlobPtr(newBlobTensorDesc);
- newBlob->allocate();
- blobLayer->blobs[existingBlobIt->first] = newBlob;
-
- if (values.size() == 1)
- fillBlobByFP32(newBlob, values[0]);
- else
- fillBlobByFP32(newBlob, values.data());
-}
-
-void CNNNetworkHelper::updateBlobs(
- TransformationContext& context,
- const CNNLayer& quantizeLayer,
- int constLayerIndex,
- const std::vector<float>& values) {
- CNNLayerPtr blobLayer = CNNNetworkHelper::getParent(quantizeLayer, constLayerIndex);
- if (blobLayer == nullptr) {
- THROW_IE_EXCEPTION << "layer is absent";
- }
-
- const auto existingBlobIt = blobLayer->blobs.find("custom");
- if (existingBlobIt == blobLayer->blobs.end()) {
- THROW_IE_EXCEPTION << "custom blob was not found ";
- }
-
- blobLayer = copyConstant(context, quantizeLayer, blobLayer, constLayerIndex);
- updateBlobs(quantizeLayer, constLayerIndex, values);
-}
-
-void CNNNetworkHelper::updateBlobs(CNNLayer& layer, const std::string& blobName, const std::vector<float>& values) {
- const auto existingBlobIt = layer.blobs.find(blobName);
- if (existingBlobIt == layer.blobs.end()) {
- THROW_IE_EXCEPTION << "custom blob was not found ";
- }
-
- TensorDesc newBlobTensorDesc;
-
- const TensorDesc existingBlobTensorDesc = existingBlobIt->second->getTensorDesc();
- if ((existingBlobIt->second->size() != values.size()) && (values.size() != 1)) {
- if (existingBlobTensorDesc.getLayout() == Layout::SCALAR) {
- //
- } else if (existingBlobTensorDesc.getLayout() == Layout::C) {
- if (existingBlobTensorDesc.getDims().size() != 1) {
- THROW_IE_EXCEPTION << "temporary dimensions size " << existingBlobTensorDesc.getDims().size()
- << " for layout " << existingBlobTensorDesc.getLayout() << " is not supported";
- }
- if (existingBlobTensorDesc.getDims()[0] != 1) {
- THROW_IE_EXCEPTION << "temporary is not supported";
- }
- } else if (existingBlobTensorDesc.getLayout() == Layout::NCHW) {
- if (existingBlobTensorDesc.getDims().size() != 4) {
- THROW_IE_EXCEPTION << "temporary dimensions size " << existingBlobTensorDesc.getDims().size()
- << " for layout " << existingBlobTensorDesc.getLayout() << " is not supported";
- }
- // OIHW
- if (existingBlobTensorDesc.getDims()[0] != 1) {
- THROW_IE_EXCEPTION << "temporary is not supported";
- }
- }
-
- const std::vector<size_t> dims = {values.size()};
- const Layout layout = Layout::C;
- newBlobTensorDesc = TensorDesc(existingBlobTensorDesc.getPrecision(), dims, layout);
- for (DataPtr data : layer.outData) {
- data->reshape(dims, layout);
- }
- } else {
- newBlobTensorDesc = existingBlobTensorDesc;
- }
-
- Blob::Ptr newBlob = makeNewBlobPtr(newBlobTensorDesc);
- newBlob->allocate();
- layer.blobs[existingBlobIt->first] = newBlob;
-
- if ((blobName == "weights") || (blobName == "biases")) {
- WeightableLayer* weightableLayer = dynamic_cast<WeightableLayer*>(&layer);
- if (weightableLayer == nullptr) {
- THROW_IE_EXCEPTION << "layer '" << layer.name << "' with blob name '" << blobName << "' is not weightable";
- }
- if (blobName == "weights") {
- weightableLayer->_weights = newBlob;
- } else if (blobName == "biases") {
- weightableLayer->_biases = newBlob;
- } else {
- THROW_IE_EXCEPTION << "unexpected blob name '" << blobName << "' for layer " << layer.name;
- }
- }
-
- if (values.size() == 1)
- fillBlobByFP32(newBlob, values[0]);
- else
- fillBlobByFP32(newBlob, values.data());
-}
-
-void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, float value) {
- auto inData = quantizeLayer.insData[constLayerIndex].lock();
- if (inData == nullptr) {
- THROW_IE_EXCEPTION << "data is absent";
- }
-
- CNNLayerPtr blobLayer = getCreatorLayer(inData).lock();
- if (blobLayer == nullptr) {
- THROW_IE_EXCEPTION << "layer is absent";
- }
-
- if (blobLayer->blobs.size() != 1) {
- THROW_IE_EXCEPTION << "unexpected blobs size";
- }
-
- const auto existingBlobIt = blobLayer->blobs.begin();
- const auto& existingBlobTensorDesc = existingBlobIt->second->getTensorDesc();
- Blob::Ptr newBlob = makeNewBlobPtr(existingBlobTensorDesc);
-
- newBlob->allocate();
- fillBlobByFP32(newBlob, value);
- blobLayer->blobs[existingBlobIt->first] = newBlob;
-}
-
-void CNNNetworkHelper::updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, float value) {
- auto inData = quantizeLayer.insData[constLayerIndex].lock();
- if (inData == nullptr) {
- THROW_IE_EXCEPTION << "data is absent";
- }
-
- CNNLayerPtr blobLayer = getCreatorLayer(inData).lock();
- if (blobLayer == nullptr) {
- THROW_IE_EXCEPTION << "layer is absent";
- }
-
- if (blobLayer->blobs.size() != 1) {
- THROW_IE_EXCEPTION << "unexpected blobs size";
- }
-
- blobLayer = copyConstant(context, quantizeLayer, blobLayer, constLayerIndex);
- updateBlobs(quantizeLayer, constLayerIndex, value);
-}
-
-CNNLayerPtr CNNNetworkHelper::copyConstant(
- TransformationContext& context,
- const CNNLayer& quantizeLayer,
- const CNNLayerPtr& blobLayer,
- const size_t constLayerIndex) {
- size_t repeatsCount = 0ul;
- for (size_t i = 0; i < quantizeLayer.insData.size(); ++i) {
- auto parentInData = quantizeLayer.insData[i].lock();
- if (parentInData == nullptr) {
- continue;
- }
- const auto quantizeLayerParent = getCreatorLayer(parentInData).lock();
- if (quantizeLayerParent == nullptr) {
- continue;
- }
- if (quantizeLayerParent->name == blobLayer->name) {
- repeatsCount++;
- }
- }
-
- if (repeatsCount < 2ul) {
- return blobLayer;
- }
-
- details::CNNNetworkImpl* networkImpl = dynamic_cast<details::CNNNetworkImpl*>(&context.network);
- if (networkImpl == nullptr) {
- THROW_IE_EXCEPTION << "Unexpected network type";
- }
-
- const DataPtr outData = blobLayer->outData[0];
- const std::map<std::string, CNNLayerPtr>& inputTo = getInputTo(outData);
- const auto quantizeLayerIt = inputTo.find(quantizeLayer.name);
- if (quantizeLayerIt == inputTo.end()) {
- THROW_IE_EXCEPTION << "Layer was not found";
- }
-
- const auto blobIt = blobLayer->blobs.find("custom");
- if (blobIt == blobLayer->blobs.end()) {
- THROW_IE_EXCEPTION << "Blob was not found";
- }
-
- const Blob::Ptr blob = blobIt->second;
- Blob::Ptr newBlob = makeNewBlobPtr(blob->getTensorDesc());
- newBlob->allocate();
-
- const std::shared_ptr<float> blobValues = CNNNetworkHelper::getFloatData(blob);
- fillBlobByFP32(newBlob, blobValues.get());
-
- auto newBlobValues = CNNNetworkHelper::getFloatData(newBlob);
-
- const std::string layerName = blobLayer->name + "/new" + std::to_string(repeatsCount);
- CNNLayerPtr newBlobLayer = CNNLayerPtr(new CNNLayer({ layerName, "Const", blob->getTensorDesc().getPrecision() }));
- newBlobLayer->blobs.emplace("custom", newBlob);
-
- const TensorDesc& tensorDesc = blobLayer->outData[0]->getTensorDesc();
- DataPtr newEdgeAfterLayer(new Data(newBlobLayer->name, tensorDesc));
- newEdgeAfterLayer->setName(newBlobLayer->name);
- newEdgeAfterLayer->setPrecision(blob->getTensorDesc().getPrecision());
- quantizeLayerIt->second->insData[constLayerIndex] = newEdgeAfterLayer;
- getInputTo(newEdgeAfterLayer)[quantizeLayer.name] = quantizeLayerIt->second;
-
- getCreatorLayer(newEdgeAfterLayer) = newBlobLayer;
- newBlobLayer->outData.push_back(newEdgeAfterLayer);
-
- CNNNetworkImpl* netImpl = dynamic_cast<CNNNetworkImpl*>(&context.network);
- netImpl->addData(newBlobLayer->name.c_str(), newEdgeAfterLayer);
- netImpl->addLayer(newBlobLayer);
-
- return newBlobLayer;
-}
-
-int CNNNetworkHelper::onWeightsInDepth(const CNNLayer& layer) {
- const std::vector<CNNLayerPtr> children = getChildren(layer);
- for (const CNNLayerPtr& child : children) {
- if ((CaselessEq<std::string>()(child->type, "Convolution") ||
- CaselessEq<std::string>()(child->type, "FullyConnected") ||
- CaselessEq<std::string>()(child->type, "Gemm")) &&
- (child->insData.size() >= 2lu)) {
- const std::vector<CNNLayerPtr> parents = getParentsRecursivelyExceptTypes(*child, {}, 1);
- for (const CNNLayerPtr& parent : parents) {
- if (parent->name == layer.name) {
- return 1;
- }
- }
- return -1;
- }
-
- const int result = onWeightsInDepth(*child);
- if (result != 0) {
- return result;
- }
- }
- return 0;
-}
-
-bool CNNNetworkHelper::onWeights(const CNNLayer& layer) {
- const int result = onWeightsInDepth(layer);
- return result == 1;
-}
-
-bool CNNNetworkHelper::onConstWeightsPath(const CNNLayer& quantize) {
- CNNLayerPtr parent = CNNNetworkHelper::getParent(quantize, 0);
- if (parent == nullptr) {
- THROW_IE_LPT_EXCEPTION(quantize) << "parent layer is nullable";
- }
-
- return parent->type == "Const";
-}
-
-size_t CNNNetworkHelper::getIndex(const CNNLayer& layer) {
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
- if (children.size() != 1) {
- THROW_IE_EXCEPTION << "not supported";
- }
-
- for (size_t i = 0; i < children[0]->insData.size(); ++i) {
- const DataPtr insData = children[0]->insData[i].lock();
- if (insData == nullptr) {
- continue;
- }
- const CNNLayerPtr parent = getCreatorLayer(insData).lock();
- if ((parent != nullptr) && (parent->name == layer.name)) {
- return i;
- }
- }
-
- THROW_IE_EXCEPTION << "not found";
-}
-
-std::vector<CNNLayerPtr> CNNNetworkHelper::transformFakeQuantizeToConst(TransformationContext& context,
- const CNNLayerPtr fakeQuantize,
- const Blob::Ptr weights,
- const std::string& constLayerName) {
- std::set<CNNLayerPtr> constLayersToRemove;
-
- for (const DataWeakPtr& insDataWeak : fakeQuantize->insData) {
- const DataPtr insData = insDataWeak.lock();
- if (insData == nullptr) {
- THROW_IE_EXCEPTION << "input data for FakeQuantize '" << fakeQuantize->name << "' is nullable";
- }
- const CNNLayerPtr parent = getCreatorLayer(insData).lock();
- if (parent == nullptr) {
- THROW_IE_EXCEPTION << "input layer for FakeQuantize '" << fakeQuantize->name << "' is nullable";
- }
- if (!CaselessEq<std::string>()(parent->type, "Const") || (parent->insData.size() != 0lu)) {
- THROW_IE_EXCEPTION << "unexpected FakeQuantize input layer type " << parent->type << " for layer '"
- << fakeQuantize->name << "' is nullable";
- }
-
- constLayersToRemove.insert(parent);
- }
-
- for (const CNNLayerPtr& parent : constLayersToRemove) {
- CNNNetworkHelper::removeLayer(context.network, parent);
- context.removeLayer(*parent);
- }
-
- if (fakeQuantize->outData.size() != 1lu) {
- THROW_IE_EXCEPTION << "FakeQuantize " << fakeQuantize->name << " has several outputs";
- }
-
- const DataPtr outData = fakeQuantize->outData[0];
- if (outData == nullptr) {
- THROW_IE_EXCEPTION << "FakeQuantize output data is nullable";
- }
-
- // const Precision precision = outData->getPrecision();
- const auto inputTo = getInputTo(outData);
- std::vector<CNNLayerPtr> constLayers;
- for (auto it : inputTo) {
- const CNNLayerPtr child = it.second;
- if (child == nullptr) {
- THROW_IE_EXCEPTION << "child layer for FakeQuantize " << fakeQuantize->name << " is nullable";
- }
-
- constLayers.push_back(
- CNNNetworkHelper::addConstBetween(context.network, fakeQuantize, child, weights, constLayerName));
- }
-
- CNNNetworkHelper::removeLayer(context.network, fakeQuantize);
- context.removeLayer(*fakeQuantize);
-
- return constLayers;
-}
-
-void CNNNetworkHelper::setOutDataPrecision(const CNNLayer& layer, const Precision& precision) {
- for (const DataPtr& data : layer.outData) {
- data->setPrecision(precision);
- }
-}
-
-void CNNNetworkHelper::setOutDataPrecision(const std::vector<CNNLayerPtr>& layers, const Precision& precision) {
- for (const CNNLayerPtr layer : layers) {
- setOutDataPrecision(*layer, precision);
- }
-}
-
-void CNNNetworkHelper::setOutDataPrecision(const CNNLayer& beginLayer, const size_t branchWithEndBeforeLayer,
- const CNNLayer& endBeforeLayer, const Precision& precision) {
- CNNLayerPtr child = std::make_shared<CNNLayer>(beginLayer);
- while (child->name != endBeforeLayer.name) {
- CNNNetworkHelper::setOutDataPrecision(*child, precision);
- std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*child);
- if (child->name == beginLayer.name) {
- if (branchWithEndBeforeLayer >= children.size()) {
- THROW_IE_EXCEPTION << "branch with end before layer is out of children count " << children.size();
- }
- child = children[branchWithEndBeforeLayer];
- } else {
- if (children.size() != 1) {
- THROW_IE_EXCEPTION << "not supported";
- }
-
- child = children[0];
- }
- }
-}
-
-bool CNNNetworkHelper::IsChild(const std::vector<CNNLayerPtr>& children,
- const std::unordered_set<std::string>& layerTypes,
- const std::unordered_set<std::string>& ignoreLayerTypes) {
- for (const CNNLayerPtr& child : children) {
- if (layerTypes.find(child->type) != layerTypes.end()) {
- return true;
- }
- if (ignoreLayerTypes.find(child->type) != ignoreLayerTypes.end()) {
- if (child->outData.size() != 1) {
- return true;
- }
- if (IsChild(CNNNetworkHelper::getChildren(*child), layerTypes, ignoreLayerTypes)) {
- return true;
- }
- }
- }
- return false;
-}
-
-size_t CNNNetworkHelper::getOutputChannelsCount(const CNNLayer& layer, bool isOnWeights) {
- if (layer.outData.empty()) {
- THROW_IE_EXCEPTION << "Layer " << layer.name << " doesn't have output tensors";
- }
-
- auto& data = layer.outData[0];
- if (isOnWeights) {
- if (data->getDims().empty()) {
- THROW_IE_EXCEPTION << "Invalid dimensions count (0) in output of " << layer.name << " layer on weights";
- }
- return data->getDims()[0];
- } else {
- if (data->getDims().empty()) {
- THROW_IE_EXCEPTION << "Invalid dimensions count (0) in output of " << layer.name << " layer on activations";
- }
- if (data->getDims().size() == 1ul) {
- return data->getDims()[0];
- }
- return data->getDims()[1];
- }
-}
-
-std::vector<CNNLayerPtr> CNNNetworkHelper::getLayers(const CNNLayer& parent, const CNNLayer& child) {
- std::vector<CNNLayerPtr> layers;
- CNNLayerPtr tmpChild = std::make_shared<CNNLayer>(child);
- while (tmpChild != nullptr) {
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(*tmpChild);
- for (const CNNLayerPtr tmpParent : parents) {
- if (tmpParent->name == parent.name) {
- return layers;
- }
- }
-
- if (parents.size() == 0) {
- THROW_IE_EXCEPTION << "not found";
- }
-
- if (parents.size() != 1ul) {
- THROW_IE_EXCEPTION << "not supported";
- }
-
- layers.push_back(parents[0]);
- tmpChild = parents[0];
- }
- return layers;
-}
-
-Blob::Ptr CNNNetworkHelper::getBlob(const CNNLayer* layer, const std::string& blobName) {
- if (layer == nullptr) {
- THROW_IE_EXCEPTION << "layer is nullable";
- }
-
- if (blobName.empty()) {
- if (layer->blobs.empty()) {
- THROW_IE_LPT_EXCEPTION(*layer) << "does not have any blob";
- }
-
- if (layer->blobs.size() != 1) {
- THROW_IE_LPT_EXCEPTION(*layer) << "there are several blobs";
- }
- return layer->blobs.begin()->second;
- }
-
- const auto it = layer->blobs.find(blobName);
- if (it == layer->blobs.end()) {
- THROW_IE_LPT_EXCEPTION(*layer) << " does not have blob " << blobName;
- }
-
- return it->second;
-}
-
-Blob::Ptr CNNNetworkHelper::getBlob(CNNLayerPtr layer, const std::string& blobName) {
- return getBlob(layer.get(), blobName);
-}
-
-std::shared_ptr<float> CNNNetworkHelper::getFloatData(const Blob::Ptr& srcBlob) {
- if (srcBlob == nullptr) {
- THROW_IE_EXCEPTION << "Invalid blob";
- }
-
- const auto& precision = srcBlob->getTensorDesc().getPrecision();
- if (!isBlobPrecisionSupported(precision)) {
- THROW_IE_EXCEPTION << "precision '" << precision << "' is not supported";
- }
-
- const size_t dataSize = srcBlob->size();
- std::shared_ptr<float> floatPtr(new float[dataSize], std::default_delete<float[]>());
-
- if (precision == Precision::FP32) {
- const float* srcData = srcBlob->buffer().as<float*>();
- std::copy(srcData, srcData + dataSize, floatPtr.get());
- } else if (precision == Precision::FP16) {
- const short* srcData = srcBlob->buffer().as<short*>();
- PrecisionUtils::f16tof32Arrays(floatPtr.get(), srcData, dataSize, 1.f, 0.f);
- } else if (precision == Precision::I8) {
- const auto* srcData = srcBlob->buffer().as<PrecisionTrait<Precision::I8>::value_type*>();
- std::copy(srcData, srcData + dataSize, floatPtr.get());
- } else if (precision == Precision::U8) {
- const auto* srcData = srcBlob->buffer().as<PrecisionTrait<Precision::U8>::value_type*>();
- std::copy(srcData, srcData + dataSize, floatPtr.get());
- } else if (precision == Precision::I32) {
- const auto* srcData = srcBlob->buffer().as<PrecisionTrait<Precision::I32>::value_type*>();
- std::copy(srcData, srcData + dataSize, floatPtr.get());
- } else if (precision == Precision::U32) {
- const auto* srcData = srcBlob->buffer().as<PrecisionTrait<Precision::U32>::value_type*>();
- std::copy(srcData, srcData + dataSize, floatPtr.get());
- } else if (precision == Precision::I64) {
- const auto* srcData = srcBlob->buffer().as<PrecisionTrait<Precision::I64>::value_type*>();
- std::copy(srcData, srcData + dataSize, floatPtr.get());
- } else if (precision == Precision::U64) {
- const auto* srcData = srcBlob->buffer().as<PrecisionTrait<Precision::U64>::value_type*>();
- std::copy(srcData, srcData + dataSize, floatPtr.get());
- } else {
- THROW_IE_EXCEPTION << "Unsupported transformation precision: " << precision;
- }
-
- return floatPtr;
-}
-
-bool CNNNetworkHelper::isBlobPrecisionSupported(const Precision precision) {
- return (precision == Precision::FP32) ||
- (precision == Precision::FP16) ||
- (precision == Precision::I8) ||
- (precision == Precision::U8) ||
- (precision == Precision::I32) ||
- (precision == Precision::U32) ||
- (precision == Precision::I64) ||
- (precision == Precision::U64);
-}
-
-std::shared_ptr<float> CNNNetworkHelper::getFloatData(const CNNLayerPtr& layer, const std::string& blobName) {
- const Blob::Ptr blob = getBlob(layer, blobName);
- if (blob == nullptr) THROW_IE_EXCEPTION << "Could not find blob '" << blobName << "' for layer " << layer->name;
-
- return getFloatData(blob);
-}
-
-void CNNNetworkHelper::fillBlobByFP32(Blob::Ptr& dstBlob, const float* srcData) {
- if (dstBlob == nullptr) THROW_IE_EXCEPTION << "Invalid blob";
-
- const auto& precision = dstBlob->getTensorDesc().getPrecision();
- const size_t dataSize = dstBlob->size();
-
- if (precision == Precision::FP32) {
- float* dstData = dstBlob->buffer().as<float*>();
- std::copy(srcData, srcData + dataSize, dstData);
- } else if (precision == Precision::FP16) {
- short* dstData = dstBlob->buffer().as<short*>();
- PrecisionUtils::f32tof16Arrays(dstData, srcData, dataSize, 1.f, 0.f);
- } else if (precision == Precision::I8) {
- auto* dstData = dstBlob->buffer().as<PrecisionTrait<Precision::I8>::value_type*>();
- for (size_t i = 0ul; i < dataSize; ++i) {
- dstData[i] = static_cast<PrecisionTrait<Precision::I8>::value_type>(std::roundf(srcData[i]));
- }
- } else if (precision == Precision::U8) {
- auto* dstData = dstBlob->buffer().as<PrecisionTrait<Precision::U8>::value_type*>();
- for (size_t i = 0ul; i < dataSize; ++i) {
- dstData[i] = static_cast<PrecisionTrait<Precision::U8>::value_type>(std::roundf(srcData[i]));
- }
- } else if (precision == Precision::I32) {
- auto* dstData = dstBlob->buffer().as<PrecisionTrait<Precision::I32>::value_type*>();
- for (size_t i = 0ul; i < dataSize; ++i) {
- dstData[i] = static_cast<PrecisionTrait<Precision::I32>::value_type>(std::roundf(srcData[i]));
- }
- } else {
- THROW_IE_EXCEPTION << "Unsupported transformation precision: " << precision;
- }
-}
-
-std::shared_ptr<float> CNNNetworkHelper::convertFloatData(const float* srcData, const size_t dataSize,
- const Precision precision) {
- std::shared_ptr<float> dstData(new float[dataSize], std::default_delete<float[]>());
-
- if (precision == Precision::FP32) {
- std::copy(srcData, srcData + dataSize, dstData.get());
- } else if (precision == Precision::FP16) {
- for (size_t i = 0ul; i < dataSize; ++i) {
- dstData.get()[i] = PrecisionUtils::f16tof32(PrecisionUtils::f16tof32(srcData[i]));
- }
- } else if (precision == Precision::I8) {
- for (size_t i = 0ul; i < dataSize; ++i) {
- dstData.get()[i] =
- static_cast<float>(static_cast<PrecisionTrait<Precision::I8>::value_type>(std::roundf(srcData[i])));
- }
- } else if (precision == Precision::U8) {
- for (size_t i = 0ul; i < dataSize; ++i) {
- dstData.get()[i] =
- static_cast<float>(static_cast<PrecisionTrait<Precision::U8>::value_type>(std::roundf(srcData[i])));
- }
- } else if (precision == Precision::I32) {
- for (size_t i = 0ul; i < dataSize; ++i) {
- dstData.get()[i] =
- static_cast<float>(static_cast<PrecisionTrait<Precision::I32>::value_type>(std::roundf(srcData[i])));
- }
- } else {
- THROW_IE_EXCEPTION << "Unsupported transformation precision: " << precision;
- }
-
- return dstData;
-}
-
-void CNNNetworkHelper::fillBlobByFP32(const CNNLayerPtr& layer, const std::string& blobName, const float* srcData) {
- Blob::Ptr blob = getBlob(layer, blobName);
- return fillBlobByFP32(blob, srcData);
-}
-
-void CNNNetworkHelper::fillBlobByFP32(Blob::Ptr& dstBlob, float value) {
- const auto& precision = dstBlob->getTensorDesc().getPrecision();
- const size_t dataSize = dstBlob->size();
-
- if (precision == Precision::FP32) {
- float* dstData = dstBlob->buffer().as<float*>();
- std::fill(dstData, dstData + dataSize, value);
- } else if (precision == Precision::FP16) {
- short* dstData = dstBlob->buffer().as<short*>();
- const short s_value = PrecisionUtils::f32tof16(value);
- std::fill(dstData, dstData + dataSize, s_value);
- } else if (precision == Precision::I8) {
- auto* dstData = dstBlob->buffer().as<PrecisionTrait<Precision::I8>::value_type*>();
- std::fill(dstData, dstData + dataSize, static_cast<PrecisionTrait<Precision::I8>::value_type>(value));
- } else if (precision == Precision::U8) {
- auto* dstData = dstBlob->buffer().as<PrecisionTrait<Precision::U8>::value_type*>();
- std::fill(dstData, dstData + dataSize, static_cast<PrecisionTrait<Precision::U8>::value_type>(value));
- } else if (precision == Precision::I32) {
- auto* dstData = dstBlob->buffer().as<PrecisionTrait<Precision::I32>::value_type*>();
- std::fill(dstData, dstData + dataSize, static_cast<PrecisionTrait<Precision::I32>::value_type>(value));
- } else {
- THROW_IE_EXCEPTION << "Unsupported transformation precision: " << precision;
- }
-}
-
-CNNLayerPtr CNNNetworkHelper::getParent(const CNNLayer& layer, const size_t index, const std::string& ignoreLayerType) {
- if (index >= layer.insData.size()) {
- return nullptr;
- }
-
- DataPtr inputLayerData = layer.insData[index].lock();
- if (inputLayerData == nullptr) {
- THROW_IE_EXCEPTION << "input data is absent";
- }
-
- CNNLayerPtr inputLayer;
- do {
- inputLayer = getCreatorLayer(inputLayerData).lock();
- if (!inputLayer) {
- THROW_IE_EXCEPTION << "input is absent";
- }
-
- if (inputLayer->type != ignoreLayerType) {
- break;
- }
-
- if (inputLayer->insData.size() == 0) {
- inputLayer = nullptr;
- break;
- }
-
- if (inputLayer->insData.size() != 1) {
- THROW_IE_EXCEPTION << "too much branches";
- }
-
- inputLayerData = inputLayer->insData[0].lock();
- if (inputLayerData == nullptr) {
- THROW_IE_EXCEPTION << "input data is absent";
- }
- } while (true);
-
- return inputLayer;
-}
-
-std::vector<CNNLayerPtr> CNNNetworkHelper::getParents(const CNNLayer& layer, const std::string& exceptionLayerName) {
- std::vector<CNNLayerPtr> parents;
- for (const DataWeakPtr insDataWeak : layer.insData) {
- const DataPtr insData = insDataWeak.lock();
- if (insData == nullptr) {
- THROW_IE_EXCEPTION << "input data is absent";
- }
-
- CNNLayerPtr parent = getCreatorLayer(insData).lock();
- if (parent == nullptr) {
- THROW_IE_EXCEPTION << "input layer is absent";
- }
-
- if (exceptionLayerName.empty() || parent->name != exceptionLayerName) {
- parents.push_back(parent);
- }
- }
- return parents;
-}
-
-std::vector<CNNLayerPtr> CNNNetworkHelper::getParentsRecursivelyExceptTypes(
- const CNNLayer& layer, const std::unordered_set<std::string>& exceptionLayerTypes, const int portIndex) {
- std::vector<CNNLayerPtr> parents;
- size_t i = 0ul;
- for (DataWeakPtr insDataWeak : layer.insData) {
- if (insDataWeak.expired()) {
- continue;
- }
-
- const DataPtr insData = insDataWeak.lock();
- if (insData == nullptr) {
- THROW_IE_EXCEPTION << "input data is absent";
- }
-
- CNNLayerWeakPtr parentWeak = getCreatorLayer(insData);
- if (parentWeak.expired()) {
- continue;
- }
-
- if ((portIndex == -1) || (portIndex == i)) {
- CNNLayerPtr parent = parentWeak.lock();
- if (parent == nullptr) {
- THROW_IE_EXCEPTION << "input layer is absent";
- }
-
- if (exceptionLayerTypes.find(parent->type) != exceptionLayerTypes.end()) {
- const std::vector<CNNLayerPtr> tmpParents = CNNNetworkHelper::getParentsRecursivelyExceptTypes(*parent, exceptionLayerTypes);
- parents.insert(parents.end(), tmpParents.begin(), tmpParents.end());
- } else {
- parents.push_back(parent);
- }
- }
-
- i++;
- }
- return parents;
-}
-
-bool CNNNetworkHelper::isLayoutSupported(const CNNLayer& layer) {
- auto isSupported = [](const Data& data) -> bool {
- switch (data.getLayout()) {
- case Layout::NC:
- case Layout::NCHW:
- case Layout::NCDHW: {
- return true;
- }
- case Layout::CHW: {
- if (data.getDims().size() != 3lu) {
- return false;
- }
- return true;
- }
- default: {
- return false;
- }
- }
-
- return true;
- };
-
- for (const auto& data : layer.outData) {
- if (!isSupported(*data)) {
- return false;
- }
- }
-
- return true;
-}
-size_t CNNNetworkHelper::getInputChannelsCount(const CNNLayer& layer) {
- if (!isLayoutSupported(layer)) {
- THROW_IE_LPT_EXCEPTION(layer) << "Not supported layout";
- }
-
- if (layer.insData.size() == 0) {
- THROW_IE_EXCEPTION << "There are no input layers";
- }
-
- const DataPtr insertData = layer.insData[0].lock();
- if (insertData == nullptr) {
- THROW_IE_EXCEPTION << "insert data is absent";
- }
-
- // For CHW: actually MO assumes NCH layout for 3D blobs, so we get channels count from dimension 1
- return insertData->getDims()[1];
-}
-
-size_t CNNNetworkHelper::getParamOutput(const CNNLayer& layer) {
- if (!layer.CheckParamPresence("output")) {
- THROW_IE_EXCEPTION << "convolution parameter 'output' is absent";
- }
- return layer.GetParamAsUInt("output");
-}
-
-size_t CNNNetworkHelper::getKernelSize(const CNNLayer& layer) {
- if (!layer.CheckParamPresence("kernel")) {
- THROW_IE_EXCEPTION << "convolution parameter 'kernel' is absent";
- }
- const auto dims = layer.GetParamAsUInts("kernel");
- if (dims.size() == 2) {
- return dims[0] * dims[1];
- } else if (dims.size() == 3) {
- return dims[0] * dims[1] * dims[2];
- } else {
- THROW_IE_EXCEPTION << "kernel dimensions are not correct";
- }
-}
-
-void CNNNetworkHelper::renameLayer(ICNNNetwork& net, const std::string& currentName, const std::string& newName) {
- CNNNetworkImpl* netImpl = dynamic_cast<CNNNetworkImpl*>(&net);
- if (netImpl == nullptr) {
- THROW_IE_EXCEPTION << "unexpected network type";
- }
-
- netImpl->renameLayer(currentName, newName);
-}
-
-CNNLayerPtr CNNNetworkHelper::addLayer(
- TransformationContext& context,
- const CNNLayerPtr parent,
- const CNNLayerPtr child,
- const CNNLayerPtr newLayer) {
- DataPtr outData;
- Precision precision;
- if (parent != nullptr) {
- // Searching the connection between the layers
- int l1_out_i = 0;
- if (child != nullptr) {
- for (; l1_out_i < parent->outData.size(); l1_out_i++) {
- if (getInputTo(parent->outData[l1_out_i]).find(child->name) !=
- getInputTo(parent->outData[l1_out_i]).end()) {
- break;
- }
- }
- }
- if (l1_out_i == parent->outData.size()) {
- if (child != nullptr)
- THROW_IE_EXCEPTION << "Can't find layer " << child->name << " among layer " << parent->name << " outputs";
- else
- THROW_IE_EXCEPTION << "Layer '" << parent->name << "' has invalid output";
- }
-
- outData = parent->outData[l1_out_i];
- precision = context.getOriginalLayerPrecision(parent->name, outData->getName());
- if (precision == Precision::UNSPECIFIED) {
- if (child != nullptr)
- precision = child->precision;
- else
- precision = Precision::FP32;
- }
- } else {
- // TODO: FIXME
- precision = Precision::FP32;
- outData = nullptr;
- }
- addLayerToCNNNetworkAfterData(outData, newLayer, child != nullptr ? child->name : "", context.network);
-
- CNNNetworkHelper::setOutDataPrecision(*newLayer, precision);
- return newLayer;
-}
-
-void CNNNetworkHelper::replaceLayer(TransformationContext& context, const CNNLayerPtr source, const CNNLayerPtr target) {
- CNNNetworkImpl* networkImpl = dynamic_cast<CNNNetworkImpl*>(&context.network);
- networkImpl->removeLayer(source->name);
-
- std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(*source);
- for (CNNLayerPtr parent : parents) {
- for (size_t outDataIndex = 0ul; outDataIndex < parent->outData.size(); ++outDataIndex) {
- const DataPtr outData = parent->outData[outDataIndex];
- std::map<std::string, CNNLayerPtr>& inputTo = getInputTo(outData);
- inputTo[source->name] = target;
- target->insData.push_back(outData);
- }
- }
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*source);
-
- target->outData.resize(source->outData.size());
- for (size_t outDataIndex = 0ul; outDataIndex < source->outData.size(); ++outDataIndex) {
- const DataPtr outData = source->outData[outDataIndex];
- networkImpl->removeData(outData->getName());
-
- DataPtr newOutData(new Data(outData->getName(), outData->getTensorDesc()));
- getCreatorLayer(newOutData) = target;
- target->outData[outDataIndex] = newOutData;
- networkImpl->addData(newOutData->getName().c_str(), newOutData);
-
- std::map<std::string, CNNLayerPtr> inputTo = getInputTo(outData);
- for (const auto it : inputTo) {
- const CNNLayerPtr child = it.second;
- getInputTo(newOutData).emplace(it.first, child);
-
- for (const CNNLayerPtr& child : children) {
- for (size_t insDataIndex = 0ul; insDataIndex < child->insData.size(); ++insDataIndex) {
- const DataPtr insData = child->insData[insDataIndex].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(*child) << "insert data " << insDataIndex << " is absent";
- }
-
- const CNNLayerPtr parent = getCreatorLayer(insData).lock();
- if (parent == nullptr) {
- THROW_IE_LPT_EXCEPTION(*child) << "parent layer for insert data " << insDataIndex << " is absent";
- }
- if (parent->name == source->name) {
- const auto it = target->outData[outDataIndex];
- child->insData[insDataIndex] = newOutData;
- }
- }
- }
- }
- getInputTo(outData).clear();
- }
-
- networkImpl->addLayer(target);
-}
-
-std::vector<CNNLayerPtr> CNNNetworkHelper::addScaleShiftBetween(TransformationContext& context, const CNNLayerPtr parent,
- const CNNLayerPtr child,
- const DequantizationDetails& dequantizationDetails,
- const std::string& name) {
- if (parent == nullptr)
- THROW_IE_EXCEPTION << "Parent layer is nullable";
-
- if (child && (child->type == "ScaleShift") && (CNNNetworkHelper::getParents(*child).size() == 1)) {
- auto scalesIt = child->blobs.find("weights");
- if (scalesIt == child->blobs.end()) {
- THROW_IE_EXCEPTION << "weights for layer " << child->name << " was not found";
- }
- const std::shared_ptr<float> scales = CNNNetworkHelper::getFloatData(scalesIt->second);
- std::vector<float> updatedScales(scalesIt->second->size());
- for (size_t i = 0ul; i < updatedScales.size(); ++i) {
- updatedScales[i] = scales.get()[i] * dequantizationDetails.scales[i];
- }
- CNNNetworkHelper::updateBlobs(*child, "weights", updatedScales);
-
- auto shiftsIt = child->blobs.find("biases");
- if (shiftsIt != child->blobs.end()) {
- const std::shared_ptr<float> shifts = CNNNetworkHelper::getFloatData(shiftsIt->second);
- std::vector<float> updatedShifts(shiftsIt->second->size());
- for (size_t i = 0ul; i < updatedShifts.size(); ++i) {
- updatedShifts[i] = scales.get()[i] * dequantizationDetails.shifts[i] + shifts.get()[i];
- }
- CNNNetworkHelper::updateBlobs(*child, "biases", updatedShifts);
- }
-
- return { child };
- }
-
- // Searching the connection between the layers
-
- // specify parent/child edges here and manipulate with them below
- std::vector<int> parentOutDataIndexes;
- std::vector<int> childInsDataIndexes;
- if (child != nullptr) {
- for (int l1_out_i = 0; l1_out_i < parent->outData.size(); l1_out_i++) {
- auto& inputTo = getInputTo(parent->outData[l1_out_i]);
- if (inputTo.find(child->name) != inputTo.end()) {
- parentOutDataIndexes.push_back(l1_out_i);
- }
- }
-
- for (size_t i = 0; i < child->insData.size(); ++i) {
- const auto& insData = child->insData[i];
- const CNNLayerPtr& creatorLayer = getCreatorLayer(insData.lock()).lock();
- if (creatorLayer->name == parent->name) {
- childInsDataIndexes.push_back(i);
- }
- }
- } else {
- parentOutDataIndexes.push_back(0);
- childInsDataIndexes.push_back(0);
- }
-
- if (childInsDataIndexes.empty()) {
- if (child != nullptr)
- THROW_IE_EXCEPTION << "Can't find layer " << child->name << " among layer " << parent->name << " outputs";
- else
- THROW_IE_EXCEPTION << "Layer '" << parent->name << "' has invalid output";
- }
-
- std::vector<CNNLayerPtr> ssCnnLayers;
- ssCnnLayers.reserve(childInsDataIndexes.size());
- for (int l1_out_i : parentOutDataIndexes) {
- DataPtr outData = parent->outData[l1_out_i];
-
- for (int i = 0; i < childInsDataIndexes.size(); ++i) {
- const int childInsDataIndex = childInsDataIndexes[i];
- std::string layerName = name.empty() ?
- (child != nullptr ?
- (parent->name + "_ScaleShift" + (childInsDataIndexes.size() == 1 ? "" : std::to_string(childInsDataIndex)) + "_" + child->name) :
- (parent->name + "_ScaleShift" + (childInsDataIndexes.size() == 1 ? "" : std::to_string(childInsDataIndex))))
- : name;
-
- Precision ssPrecision = context.getOriginalLayerPrecision(parent->name, outData->getName());
- if (ssPrecision == Precision::UNSPECIFIED) {
- if (child != nullptr)
- ssPrecision = child->precision;
- else
- ssPrecision = Precision::FP32;
- }
-
- LayerParams ssCnnLayerParams{ layerName, "ScaleShift", ssPrecision };
- CNNLayerPtr ssCnnLayer(new ScaleShiftLayer(ssCnnLayerParams));
-
- const std::vector<size_t> dims = outData->getDims();
-
- if ((dims.size() != 2ul) || ((dims.size() == 2ul) && (dims[0] != dequantizationDetails.channelsCount))) {
- if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
- THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
- }
- }
- addLayerToCNNNetworkAfterData(outData, ssCnnLayer, child != nullptr ? child->name : "", context.network, childInsDataIndex);
-
- {
- ScaleShiftLayer* scshLayer = dynamic_cast<ScaleShiftLayer*>(ssCnnLayer.get());
- if (scshLayer == nullptr) {
- THROW_IE_EXCEPTION << "Layer " << ssCnnLayer->name << " is not instance of ScaleShiftLayer class";
- }
- fillInScaleShift(
- scshLayer,
- dequantizationDetails.channelsCount,
- dequantizationDetails.scales.data(),
- dequantizationDetails.shifts.data());
- }
-
- CNNNetworkHelper::setOutDataPrecision(*ssCnnLayer, ssPrecision);
- ssCnnLayers.push_back(ssCnnLayer);
- }
- }
-
- return ssCnnLayers;
-}
-
-CNNLayerPtr CNNNetworkHelper::addConstBetween(ICNNNetwork& net, const CNNLayerPtr layer1, const CNNLayerPtr layer2,
- const Blob::Ptr customBlob, const std::string& name) {
- if (layer1 == nullptr)
- THROW_IE_EXCEPTION << "First layer is nullable";
- // Searching the connection between the layers
- int l1_out_i = 0;
- if (layer2 != nullptr) {
- for (; l1_out_i < layer1->outData.size(); l1_out_i++) {
- if (getInputTo(layer1->outData[l1_out_i]).find(layer2->name) !=
- getInputTo(layer1->outData[l1_out_i]).end()) {
- break;
- }
- }
- }
-
- if (l1_out_i == layer1->outData.size()) {
- if (layer2 != nullptr)
- THROW_IE_EXCEPTION << "Can't find layer " << layer2->name << " among layer " << layer1->name << " outputs";
- else
- THROW_IE_EXCEPTION << "Layer " << layer1->name << " has invalid outputs";
- }
-
- DataPtr outData = layer1->outData[l1_out_i];
-
- std::string layerName = name.empty() ? layer1->name + "_Const" : name;
- CNNLayerPtr layer(new CNNLayer({layerName, "Const", customBlob->getTensorDesc().getPrecision()}));
-
- addLayerToCNNNetworkAfterData(outData, layer, layer2 != nullptr ? layer2->name : "", net);
- layer->blobs.emplace("custom", customBlob);
- layer->outData[0]->setPrecision(customBlob->getTensorDesc().getPrecision());
- return layer;
-}
-
-void CNNNetworkHelper::addLayerToCNNNetworkAfterData(
- DataPtr parentOutData,
- CNNLayer::Ptr layer,
- const std::string& nextLayerName,
- ICNNNetwork& net,
- const int childInsDataIndex) {
- CNNNetworkImpl* netImpl = dynamic_cast<CNNNetworkImpl*>(&net);
- if (netImpl == nullptr) {
- THROW_IE_EXCEPTION << "unexpected network type";
- }
-
- CNNLayerPtr nextLayer;
- if (!nextLayerName.empty()) {
- netImpl->getLayerByName(nextLayerName.c_str(), nextLayer, nullptr);
- }
-
- if (layer && (nextLayerName.empty() || (parentOutData == nullptr) || (childInsDataIndex != -1) ||
- (getInputTo(parentOutData).find(nextLayerName) != getInputTo(parentOutData).end()))) {
- auto getTensorDesc = [](CNNLayerPtr& nextLayer) {
- const DataPtr insData = nextLayer->insData[0].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(*nextLayer) << "insert data is absent";
- }
- return insData->getTensorDesc();
- };
-
- const TensorDesc& parentTensorDesc = parentOutData != nullptr ? parentOutData->getTensorDesc() : getTensorDesc(nextLayer);
- DataPtr newEdgeAfterLayer(new Data(layer->name, parentTensorDesc));
- newEdgeAfterLayer->setName(layer->name);
- getCreatorLayer(newEdgeAfterLayer) = layer;
- getInputTo(newEdgeAfterLayer).clear();
-
- CNNNetworkImpl* netImpl = dynamic_cast<CNNNetworkImpl*>(&net);
- if (netImpl == nullptr) {
- THROW_IE_EXCEPTION << "unexpected network type";
- }
- netImpl->addData(layer->name.c_str(), newEdgeAfterLayer);
- IE_SUPPRESS_DEPRECATED_START
- netImpl->addLayer(layer);
- IE_SUPPRESS_DEPRECATED_END
-
- if (parentOutData != nullptr) {
- getInputTo(parentOutData)[layer->name] = layer;
- layer->insData.push_back(parentOutData);
- }
- layer->outData.push_back(newEdgeAfterLayer);
-
- if (!nextLayerName.empty()) {
- // CNNLayerPtr nextLayer = getInputTo(parentOutData)[nextLayerName];
- getInputTo(newEdgeAfterLayer)[nextLayerName] = nextLayer;
-
- if (parentOutData != nullptr) {
- getInputTo(parentOutData).erase(nextLayerName);
-
- if (childInsDataIndex == -1) {
- for (size_t i = 0; i < nextLayer->insData.size(); i++) {
- if (nextLayer->insData[i].lock() == parentOutData) {
- nextLayer->insData[i] = newEdgeAfterLayer;
- }
- }
- } else {
- nextLayer->insData[childInsDataIndex] = newEdgeAfterLayer;
- }
- } else {
- // TODO: why new?
- nextLayer->insData.push_back(newEdgeAfterLayer);
- }
- } else {
- CNNLayerPtr parent = getCreatorLayer(parentOutData).lock();
- if (parent == nullptr) {
- THROW_IE_EXCEPTION << "parent data is absent";
- }
- netImpl->removeOutput(parent->name);
- netImpl->addData(parent->name.c_str(), parentOutData);
-
- netImpl->addData(layer->name.c_str(), newEdgeAfterLayer);
- netImpl->addOutput(layer->name);
- }
- } else {
- THROW_IE_EXCEPTION << "Invalid argument";
- }
-}
-
-void CNNNetworkHelper::fillInScaleShift(ScaleShiftLayer* layer, const size_t channels, const float* scales,
- const float* shifts) {
- if (layer == nullptr) {
- THROW_IE_EXCEPTION << "ScaleShiftLayer is nullable";
- }
-
- layer->_weights = makeNewBlobPtr({layer->precision, {channels}, Layout::C});
- layer->_weights->allocate();
- fillBlobByFP32(layer->_weights, scales);
- layer->blobs["weights"] = layer->_weights;
-
- layer->_biases = makeNewBlobPtr({layer->precision, {channels}, Layout::C});
- layer->_biases->allocate();
- fillBlobByFP32(layer->_biases, shifts);
- layer->blobs["biases"] = layer->_biases;
-}
-
-std::vector<CNNLayerPtr> CNNNetworkHelper::getChildren(const CNNLayer& layer, const std::string& exceptionLayerName) {
- std::vector<CNNLayerPtr> children;
- for (const DataPtr outData : layer.outData) {
- const std::map<std::string, CNNLayerPtr>& inputTo = getInputTo(outData);
- for (auto it = inputTo.begin(); it != inputTo.end(); ++it) {
- CNNLayerPtr child = it->second;
- if (exceptionLayerName.empty() || child->name != exceptionLayerName) {
- children.push_back(child);
- }
- }
- }
- return children;
-}
-
-std::vector<CNNLayerPtr> CNNNetworkHelper::getChildrenRecursivelyExceptTypes(
- const CNNLayer& layer, const std::unordered_set<std::string>& exceptionLayerTypes) {
- std::vector<CNNLayerPtr> children;
- for (const DataPtr outData : layer.outData) {
- const std::map<std::string, CNNLayerPtr>& inputTo = getInputTo(outData);
- for (auto it = inputTo.begin(); it != inputTo.end(); ++it) {
- CNNLayerPtr child = it->second;
- if (exceptionLayerTypes.find(child->type) != exceptionLayerTypes.end()) {
- const std::vector<CNNLayerPtr> tmpChildren =
- getChildrenRecursivelyExceptTypes(*child, exceptionLayerTypes);
- children.insert(children.end(), tmpChildren.begin(), tmpChildren.end());
- continue;
- }
-
- children.push_back(child);
- }
- }
- return children;
-}
-
-void CNNNetworkHelper::checkConstWithBlobs(const CNNLayerPtr layer) {
- if (layer->type != "Const") {
- THROW_IE_EXCEPTION << "Unexpected layer type '" << layer->name << "'";
- }
- if (layer->blobs.size() != 1) {
- THROW_IE_EXCEPTION << "Unexpected blobs count " << layer->blobs.size() << " for layer '" << layer->name << "'";
- }
- if (layer->insData.size() != 0) {
- THROW_IE_EXCEPTION << "Unexpected inputs count " << layer->insData.size() << " for layer '" << layer->name
- << "'";
- }
- if (layer->outData.size() != 1) {
- THROW_IE_EXCEPTION << "Unexpected outputs count " << layer->outData.size() << " for layer '" << layer->name
- << "'";
- }
-}
-
-void CNNNetworkHelper::checkQuantizeOnWeights(const CNNLayerPtr layer) {
- if (layer->type != "FakeQuantize") {
- THROW_IE_EXCEPTION << "Unexpected layer type '" << layer->name << "'";
- }
- if (layer->blobs.size() != 0) {
- THROW_IE_EXCEPTION << "Unexpected blobs count " << layer->blobs.size() << " for layer '" << layer->name << "'";
- }
- if (layer->insData.size() != 5) {
- THROW_IE_EXCEPTION << "Unexpected inputs count " << layer->insData.size() << " for layer '" << layer->name
- << "'";
- }
- if (layer->outData.size() != 1) {
- THROW_IE_EXCEPTION << "Unexpected outputs count " << layer->outData.size() << " for layer '" << layer->name
- << "'";
- }
-}
-
-void CNNNetworkHelper::updateInput(CNNNetworkImpl* network, CNNLayerPtr& layer, DataPtr outData) {
- if (!CaselessEq<std::string>()(layer->type, "Input")) {
- return;
- }
-
- InputInfo::Ptr inputInfo = network->getInput(layer->name);
- if (inputInfo->name() == layer->name) {
- inputInfo->setInputData(outData);
- }
-}
-
-size_t CNNNetworkHelper::disconnectLayers(CNNNetworkImpl* network, const CNNLayerPtr& parentLayer,
- const CNNLayerPtr& childLayer) {
- bool wasFound = false;
- for (auto dataIt = parentLayer->outData.begin(); dataIt != parentLayer->outData.end(); ++dataIt) {
- auto data = *dataIt;
-
- auto inputIt = getInputTo(data).begin();
- while (inputIt != getInputTo(data).end()) {
- auto currentChildLayer = inputIt->second;
- if (currentChildLayer == nullptr) {
- THROW_IE_EXCEPTION << "Output layer for '" << parentLayer->name << "'is absent";
- }
-
- if (currentChildLayer->name == childLayer->name) {
- inputIt = getInputTo(data).erase(inputIt);
- wasFound = true;
- continue;
- }
-
- ++inputIt;
- }
- }
- if (!wasFound) {
- THROW_IE_EXCEPTION << "Output layer '" << childLayer->name << "' was not found for '" << parentLayer->name
- << "'";
- }
-
- wasFound = false;
- auto it = childLayer->insData.begin();
- while (it != childLayer->insData.end()) {
- auto data = it->lock();
- if (data == nullptr) {
- THROW_IE_EXCEPTION << "Input layer data for '" << childLayer->name << "'is absent";
- }
- auto currentParentLayer = getCreatorLayer(data).lock();
- if (currentParentLayer == nullptr) {
- THROW_IE_EXCEPTION << "Input layer for '" << childLayer->name << "'is absent";
- }
-
- if (currentParentLayer->name == parentLayer->name) {
- it = childLayer->insData.erase(it);
- wasFound = true;
- continue;
- }
-
- ++it;
- }
- if (!wasFound) {
- THROW_IE_EXCEPTION << "Input layer '" << parentLayer->name << "' was not found for '" << childLayer->name
- << "'";
- }
- return 0;
-}
-
-size_t CNNNetworkHelper::getInputIndex(const CNNLayerPtr& childLayer, const CNNLayerPtr& parentLayer) {
- for (size_t index = 0; index < childLayer->insData.size(); ++index) {
- DataPtr currentParenData = childLayer->insData[index].lock();
- if (currentParenData == nullptr) {
- THROW_IE_EXCEPTION << "parent layer data is absent";
- }
- CNNLayerPtr currentParrentLayer = getCreatorLayer(currentParenData).lock();
- if (currentParrentLayer == nullptr) {
- THROW_IE_EXCEPTION << "parent layer is absent";
- }
- if (currentParrentLayer->name == parentLayer->name) {
- return index;
- }
- }
-
- THROW_IE_EXCEPTION << "parent layer was not found";
-}
-
-void CNNNetworkHelper::removeLayer(ICNNNetwork& network, const CNNLayerPtr& layer) {
- details::CNNNetworkImpl* networkImpl = dynamic_cast<details::CNNNetworkImpl*>(&network);
- if (networkImpl == nullptr) {
- THROW_IE_EXCEPTION << "Unexpected network type";
- }
-
- if (layer->outData.size() > 1) {
- THROW_IE_EXCEPTION << "Layer '" << layer->name << "' has too many outputs " << layer->outData.size();
- }
-
- if (layer->insData.size() > 1) {
- do {
- DataPtr data = layer->insData[0].lock();
- if (data == nullptr) {
- THROW_IE_EXCEPTION << "Layer's inserted data is nullptr";
- }
- CNNLayerPtr parentLayer = getCreatorLayer(data).lock();
- if (parentLayer == nullptr) {
- THROW_IE_EXCEPTION << "Layer's parent layer is nullptr";
- }
- CNNNetworkHelper::removeLayer(network, parentLayer);
- } while (!layer->insData.empty());
- }
-
- DataPtr childData;
- std::vector<CNNLayerPtr> children;
- std::vector<size_t> childrenIndexes;
- if (layer->outData.size() > 0) {
- childData = layer->outData[0];
- auto inputTo = getInputTo(childData);
- if (inputTo.size() == 0) {
- std::vector<CNNLayerPtr> parents = getParents(*layer);
- if (parents.size() != 1) {
- THROW_IE_EXCEPTION << "not possible remove output layer with several parents";
- }
- networkImpl->addOutput(parents[0]->name);
- CNNNetworkImpl* networkImpl = dynamic_cast<CNNNetworkImpl*>(&network);
- networkImpl->removeOutput(layer->name);
- } else {
- for (auto it = inputTo.begin(); it != inputTo.end(); ++it) {
- children.push_back(it->second);
- childrenIndexes.push_back(getInputIndex(it->second, layer));
- disconnectLayers(networkImpl, layer, it->second);
- }
- }
- }
-
- if (layer->insData.size() > 1) {
- // TODO: implement
- THROW_IE_EXCEPTION << "not implemented";
- }
-
- DataPtr parentData;
- CNNLayerPtr parentLayer;
- if (layer->insData.size() > 0) {
- // remove connections with parent layers
- parentData = layer->insData[0].lock();
- if (parentData == nullptr) {
- THROW_IE_EXCEPTION << "Input data is absent";
- }
- parentLayer = getCreatorLayer(parentData).lock();
- if (parentLayer == nullptr) {
- THROW_IE_EXCEPTION << "Input layer for '" << layer->name << "' is absent";
- }
-
- const size_t ouputLayerOutDataIndex = disconnectLayers(networkImpl, parentLayer, layer);
- if (ouputLayerOutDataIndex >= parentLayer->outData.size()) {
- THROW_IE_EXCEPTION << "Index " << ouputLayerOutDataIndex << " out of range output ports count "
- << parentLayer->outData.size() << " for layer " << parentLayer->name;
- }
-
- for (size_t index = 0; index < children.size(); ++index) {
- CNNLayerPtr childLayer = children[index];
- const size_t childInputIndex = childrenIndexes[index];
-
- DataPtr outData = parentLayer->outData[ouputLayerOutDataIndex];
- getInputTo(outData).emplace(childLayer->name, childLayer);
- childLayer->insData.insert(childLayer->insData.begin() + childInputIndex, outData);
-
- updateInput(networkImpl, parentLayer, outData);
- }
- }
-
- networkImpl->removeData(layer->name);
- networkImpl->removeLayer(layer->name);
-}
-
-bool CNNNetworkHelper::isWeightsSupported(const CNNLayer& layer) noexcept {
- if (layer.insData.size() > 1) {
- CNNLayerPtr weightsLayer = CNNNetworkHelper::getParent(layer, 1);
- if (weightsLayer == nullptr)
- return false;
- if ((weightsLayer->type == "Const") || (weightsLayer->type == "FakeQuantize")) {
- return true;
- }
-
- if (weightsLayer->type == "ScaleShift") {
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(*weightsLayer);
- if (parents.size() != 1ul) {
- return false;
- }
-
- return (parents[0]->type == "FakeQuantize") || (parents[0]->type == "Const");
- }
-
- return false;
- } else {
- return layer.blobs.find("weights") != layer.blobs.end();
- }
-}
-
-Blob::Ptr CNNNetworkHelper::getWeights(
- const CNNLayer& layer,
- const bool roundQuantizedValues) {
- if (layer.insData.size() > 1) {
- CNNLayerPtr weightsLayer = CNNNetworkHelper::getParent(layer, 1);
- if (weightsLayer == nullptr) {
- THROW_IE_EXCEPTION << "Convolution weights const layer are absent";
- }
-
- if (weightsLayer->type == "Const") {
- CNNNetworkHelper::checkConstWithBlobs(weightsLayer);
- return weightsLayer->blobs.find("custom")->second;
- } else if (weightsLayer->type == "FakeQuantize") {
- return CNNNetworkHelper::quantizeWeights(*weightsLayer, roundQuantizedValues, Precision::UNSPECIFIED);
- } else if (weightsLayer->type == "ScaleShift") {
- const CNNLayerPtr parent = CNNNetworkHelper::getParent(*weightsLayer);
- if (parent == nullptr)
- THROW_IE_EXCEPTION << "Layer '" << weightsLayer->name << "' does not have parent";
- if (parent->type == "FakeQuantize") {
- return CNNNetworkHelper::quantizeWeights(*parent, roundQuantizedValues, Precision::UNSPECIFIED);
- } else if (parent->type == "Const") {
- CNNNetworkHelper::checkConstWithBlobs(parent);
- return CNNNetworkHelper::getBlob(parent, "custom");
- } else {
- THROW_IE_EXCEPTION << "Unexpected weights layer " << parent->type << " " << parent->name << " for " << layer.type << " " << layer.name;
- }
- } else {
- THROW_IE_EXCEPTION << "Unexpected weights layer type " << weightsLayer->type;
- }
- } else {
- if (layer.blobs.find("weights") == layer.blobs.end()) {
- THROW_IE_EXCEPTION << "Convolution weights are absent";
- }
- return layer.blobs.find("weights")->second;
- }
-}
-
-Blob::Ptr CNNNetworkHelper::getBiases(const CNNLayer& layer) {
- if (layer.insData.size() > 1U) {
- if (layer.insData.size() > 2U) {
- CNNLayerPtr biasesLayer = CNNNetworkHelper::getParent(layer, 2U);
- if (biasesLayer == nullptr) {
- return nullptr;
- }
-
- CNNNetworkHelper::checkConstWithBlobs(biasesLayer);
- return biasesLayer->blobs.find("custom")->second;
- } else {
- return nullptr;
- }
- } else {
- const auto it = layer.blobs.find("biases");
- return (it != layer.blobs.end()) ? it->second : nullptr;
- }
-}
-
-Blob::Ptr CNNNetworkHelper::quantizeWeights(const CNNLayer& quantize, const bool roundValues, const Precision precision) {
- if (quantize.insData.size() != 5lu) {
- THROW_IE_EXCEPTION << "Unexpected inputs count: " << quantize.insData.size();
- }
- for (int i = 0; i < quantize.insData.size(); i++)
- if (quantize.insData[i].lock() == nullptr)
- THROW_IE_EXCEPTION << "Invalid input data for layer '" << quantize.name << "' with index " << i;
-
- const Blob::Ptr sourceBlob = getQuantizeLayerBlob(quantize);
- if (sourceBlob == nullptr) {
- THROW_IE_EXCEPTION << "weights blob is empty for " << quantize.type << " layer " << quantize.name;
- }
-
- const auto& sourceBlobTD = sourceBlob->getTensorDesc();
- const Precision blobPrecision = sourceBlobTD.getPrecision();
-
- auto targetBlobPrecision = precision == Precision::UNSPECIFIED ? blobPrecision : precision;
- if (targetBlobPrecision != Precision::FP32 && targetBlobPrecision != Precision::FP16 &&
- targetBlobPrecision != Precision::I8 && targetBlobPrecision != Precision::U8)
- THROW_IE_EXCEPTION << "Unexpected precision: " << precision;
-
- Blob::Ptr targetBlob = make_blob_with_precision(TensorDesc(targetBlobPrecision, sourceBlobTD.getDims(), sourceBlobTD.getLayout()));
- targetBlob->allocate();
-
- quantizeBlob(quantize, targetBlob, roundValues);
-
- return targetBlob;
-}
-
-bool CNNNetworkHelper::isQuantizedConstWeights(const CNNLayer& layer) {
- CNNLayerPtr quantize = CNNNetworkHelper::getParent(layer, 1);
- if (quantize == nullptr) {
- return false;
- }
-
- if (quantize->type == "Const") {
- return true;
- }
-
- if (quantize->type != "FakeQuantize") {
- return false;
- }
-
- if (quantize->insData.size() != 5ul) {
- THROW_IE_LPT_EXCEPTION(*quantize) << "unexpected inputs size";
- }
-
- return onConstWeightsPath(*quantize);
-}
-
-int CNNNetworkHelper::getConstParentBranchID(const CNNLayer& layer) {
- int constBranchID = -1;
- for (int i = 0; i < layer.insData.size(); i++) {
- bool allConst = true;
-
- const DataPtr insData = layer.insData[i].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(layer) << "invalid input data with index " << i;
- }
-
- const CNNLayerPtr parent = getCreatorLayer(insData).lock();
- if (parent == nullptr) {
- THROW_IE_LPT_EXCEPTION(layer) << "parent layer is absent";
- }
-
- if (!CaselessEq<std::string>()(parent->type, "FakeQuantize")) continue;
- for (const auto& p : parent->insData) {
- const DataPtr parentConstInsData = p.lock();
- if (parentConstInsData == nullptr) {
- THROW_IE_LPT_EXCEPTION(*parent) << "input data is absent";
- }
- const CNNLayerPtr parentConst = getCreatorLayer(parentConstInsData).lock();
- if (parentConst == nullptr) {
- THROW_IE_LPT_EXCEPTION(*parent) << "input layer is absent";
- }
- if (!CaselessEq<std::string>()(parentConst->type, "Const")) {
- allConst = false;
- break;
- }
- }
- if (allConst) {
- constBranchID = i;
- break;
- }
- }
-
- return constBranchID;
-}
-
-Precision CNNNetworkHelper::getPrecisionParent(const CNNLayer& layer) {
- return getPrecisionParent(layer, 0ul, false);
-}
-
-Precision CNNNetworkHelper::getPrecisionParent(const CNNLayer& layer, const size_t parentIndex) {
- return getPrecisionParent(layer, parentIndex, true);
-}
-
-Precision CNNNetworkHelper::getPrecisionParent(const CNNLayer& layer, const size_t parentIndex, const bool useParentIndex) {
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(layer);
- if (parents.empty()) {
- THROW_IE_EXCEPTION << "parents for layer " << layer.type << " '" << layer.name << "' are absent";
- }
-
- if (useParentIndex) {
- DataPtr parentOutData = getOutData(*parents[parentIndex], layer);
- if (parentOutData == nullptr) {
- THROW_IE_EXCEPTION <<
- "parent layer " << parents[parentIndex]->type << " '" << parents[parentIndex]->name <<
- "' output data was not found for child " << layer.type << " '" << layer.name << "'";
- }
- return parentOutData->getTensorDesc().getPrecision();
- }
-
- Precision parentOutDataPrecision = Precision::UNSPECIFIED;
- for (CNNLayerPtr parent : parents) {
- DataPtr parentOutData = getOutData(*parent, layer);
- if (parentOutData == nullptr) {
- THROW_IE_EXCEPTION <<
- "parent layer " << parent->type << " '" << parent->name <<
- "' output data was not found for child " << layer.type << " '" << layer.name << "'";
- }
-
- if (parentOutDataPrecision == Precision::UNSPECIFIED) {
- parentOutDataPrecision = parentOutData->getTensorDesc().getPrecision();
- } else if (parentOutDataPrecision != parentOutData->getTensorDesc().getPrecision()) {
- THROW_IE_EXCEPTION <<
- "Parent layer " << parent->type << " '" << parent->name <<
- "' output port has unexpected precision " << parentOutData->getTensorDesc().getPrecision();
- }
- }
-
- return parentOutDataPrecision;
-}
-
-DataPtr CNNNetworkHelper::getOutData(const CNNLayer& parentLayer, const CNNLayer& childLayer) {
- DataPtr parentOutData;
- for (DataPtr outData : parentLayer.outData) {
- const std::map<std::string, CNNLayerPtr> inputTo = getInputTo(outData);
- for (auto childIt : inputTo) {
- if (childIt.second->name == childLayer.name) {
- parentOutData = outData;
- break;
- }
- }
-
- if (parentOutData != nullptr) {
- break;
- }
- }
- return parentOutData;
-}
-
-void CNNNetworkHelper::quantizeBlob(const CNNLayer& quantize, Blob::Ptr& targetBlob, bool roundValues) {
- const Blob::Ptr sourceBlob = getQuantizeLayerBlob(quantize);
- if (sourceBlob == nullptr) {
- THROW_IE_EXCEPTION << "quantized blob is empty for " << quantize.type << " layer " << quantize.name;
- }
-
- auto srcData = getFloatData(sourceBlob);
- const std::vector<size_t>& outDims = quantize.outData[0]->getDims();
- if (outDims.empty() || outDims.size() > 5lu) {
- THROW_IE_EXCEPTION << "Unexpected dimensions count " << outDims.size() << " for layer '" << quantize.name << "'";
- }
-
- // OIDHW
- const size_t OC = outDims[0];
- const size_t IC = outDims.size() > 1lu ? outDims[1] : 1;
- const size_t D = outDims.size() > 4lu ? outDims[outDims.size() - 3] : 1;
- const size_t H = outDims.size() > 2lu ? outDims.size() == 3lu ? outDims[2] : outDims[outDims.size() - 2] : 1;
- const size_t W = outDims.size() > 3lu ? outDims[outDims.size() - 1] : 1;
-
- // Const layer blob shape (sourceBlob->getTensorDesc().getDims()) can be different from output port shape
- // CVS-27850: [IE COMMON] Align Const layer blob shape with output port shape
- if (sourceBlob->size() != OC * IC * D * H * W) {
- THROW_IE_EXCEPTION << "Unexpected weights size for layer '" << quantize.name << "'";
- }
-
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(quantize);
-
- const bool isInputLowBroadcasted = quantizationDetails.inputLowValues.size() != OC;
- if ((quantizationDetails.inputLowValues.size() != 1) && (quantizationDetails.inputLowValues.size() != OC)) {
- THROW_IE_EXCEPTION << "Unexpected input low values count " << quantizationDetails.inputLowValues.size() <<
- " for " << OC << " channels, layer '" << quantize.name << "'";
- }
-
- const bool isInputHighBroadcasted = quantizationDetails.inputHighValues.size() != OC;
- if ((quantizationDetails.inputHighValues.size() != 1) && (quantizationDetails.inputHighValues.size() != OC)) {
- THROW_IE_EXCEPTION << "Unexpected input high values count " << quantizationDetails.inputHighValues.size() <<
- " for " << OC << " channels, layer '" << quantize.name << "'";
- }
-
- const bool isOutputLowBroadcasted = quantizationDetails.outputLowValues.size() != OC;
- if ((quantizationDetails.outputLowValues.size() != 1) && (quantizationDetails.outputLowValues.size() != OC)) {
- THROW_IE_EXCEPTION << "Unexpected output low values count " << quantizationDetails.outputLowValues.size() <<
- " for " << OC << " channels, layer '" << quantize.name << "'";
- }
-
- const bool isOutputHighBroadcasted = quantizationDetails.outputHighValues.size() != OC;
- if ((quantizationDetails.outputHighValues.size() != 1) && (quantizationDetails.outputHighValues.size() != OC)) {
- THROW_IE_EXCEPTION << "Unexpected output high values count " << quantizationDetails.outputHighValues.size() <<
- " for " << OC << " channels, layer '" << quantize.name << "'";
- }
-
- auto levels_1 = static_cast<float>(quantize.GetParamAsUInt("levels")) - 1.f;
-
- const size_t DHW = D * H * W;
- const size_t IDHW = IC * DHW;
-
- std::vector<float> dstBuffer(targetBlob->size());
-
- auto srcPtr = srcData.get();
- auto dstPtr = &dstBuffer[0];
-
- parallel_for4d(OC, IC, D, H, [&](size_t oc, size_t ic, size_t d, size_t h) {
- const float inputLow = quantizationDetails.inputLowValues[isInputLowBroadcasted ? 0 : oc];
- const float inputHigh = quantizationDetails.inputHighValues[isInputHighBroadcasted ? 0 : oc];
- const float outputLow = quantizationDetails.outputLowValues[isOutputLowBroadcasted ? 0 : oc];
- const float outputHigh = quantizationDetails.outputHighValues[isOutputHighBroadcasted ? 0 : oc];
-
- for (size_t w = 0; w < W; w++) {
- const size_t idx = oc * IDHW + ic * DHW + d * H * W + h * W + w;
-
- if (srcPtr[idx] <= inputLow) {
- dstPtr[idx] = roundValues ? std::roundf(outputLow) : outputLow;
- } else if (srcPtr[idx] > inputHigh) {
- dstPtr[idx] = roundValues ? std::roundf(outputHigh) : outputHigh;
- } else {
- const float value = std::roundf((srcPtr[idx] - inputLow) / (inputHigh - inputLow) * levels_1) /
- levels_1 * (outputHigh - outputLow) + outputLow;
- dstPtr[idx] = roundValues ? std::roundf(value) : value;
- }
- }
- });
-
- fillBlobByFP32(targetBlob, dstPtr);
-}
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/normalize.hpp"
-
-#include <algorithm>
-#include <string>
-#include <memory>
-#include <vector>
-
-#include <caseless.hpp>
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-bool NormalizeTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const {
- if (!LayerTransformation::canBeTransformed(context, layer)) {
- return false;
- }
-
- if (layer.insData.size() != 1) {
- THROW_IE_LPT_EXCEPTION(layer) << "layer inputs '" << layer.insData.size() << "' is not correct";
- }
-
- if (!CaselessEq<std::string>()(layer.type, "Normalize")) {
- THROW_IE_LPT_EXCEPTION(layer) << "layer '" << layer.name << "' is not correct";
- }
-
- const CNNLayerPtr parent = CNNNetworkHelper::getParent(layer, 0);
- return (parent->type == "ScaleShift");
-}
-
-void NormalizeTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!canBeTransformed(context, layer)) {
- return;
- }
-
- const CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(layer, 0);
- std::vector<float> originalDequantizationScales;
- std::vector<float> originalDequantizationShifts;
- fillFromDequantizationLayer(*scaleShift, originalDequantizationScales, originalDequantizationShifts);
-
- const bool acrossSpatial = layer.GetParamAsBool("across_spatial");
- if (std::any_of(originalDequantizationShifts.begin(), originalDequantizationShifts.end(), [](const float value) { return value != 0.f; })) {
- return;
- }
-
- if (acrossSpatial &&
- std::any_of(
- originalDequantizationScales.begin(),
- originalDequantizationScales.end(),
- [&](const float value) { return value != originalDequantizationScales[0]; })) {
- return;
- }
-
- std::vector<float> dequantizationScales(originalDequantizationScales.size());
- std::vector<float> dequantizationShifts(originalDequantizationShifts.size(), 0.f);
- for (size_t channel = 0ul; channel < dequantizationScales.size(); ++channel) {
- dequantizationScales[channel] = std::signbit(originalDequantizationScales[channel]) ? -1.f : 1.f;
- }
-
- CNNNetworkHelper::removeLayer(context.network, scaleShift);
- context.removeLayer(*scaleShift);
-
- addDequantizationLayer(context, layer, dequantizationScales, dequantizationShifts);
-}
-
-bool NormalizeTransformation::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
- return false;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/permute.hpp"
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <string>
-#include <vector>
-
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void PermuteTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!canBeTransformed(context, layer)) {
- return;
- }
-
- if (layer.insData.size() != 1) {
- THROW_IE_EXCEPTION << "layer inputs '" << layer.insData.size() << "' is not correct";
- }
-
- if (!CaselessEq<std::string>()(layer.type, "Permute")) {
- THROW_IE_EXCEPTION << "layer '" << layer.name << "' is not correct";
- }
-
- if (!layer.CheckParamPresence("order")) {
- THROW_IE_EXCEPTION << "Permute parameter 'order' is absent";
- }
-
- const CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(layer);
- if ((scaleShift == nullptr) || (scaleShift->type != "ScaleShift")) {
- return;
- }
-
- std::vector<float> dequantizationScales;
- std::vector<float> dequantizationShifts;
- fillFromDequantizationLayer(*scaleShift, dequantizationScales, dequantizationShifts);
-
- if (!DequantizationDetails::isPerTensor(dequantizationScales, dequantizationShifts)) {
- std::vector<unsigned int> orders = layer.GetParamAsUInts("order");
- if ((orders.size() < 2) || (orders[0] != 0U) || (orders[1] != 1U)) {
- return;
- }
- }
-
- TransparentBaseTransformation::transform(context, layer);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/pooling.hpp"
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <string>
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void PoolingTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!canBeTransformed(context, layer)) {
- return;
- }
-
- if (layer.insData.size() != 1) {
- THROW_IE_EXCEPTION << "layer inputs '" << layer.insData.size() << "' is not correct";
- }
-
- if (!CaselessEq<std::string>()(layer.type, "Pooling")) {
- THROW_IE_EXCEPTION << "layer '" << layer.name << "' is not correct";
- }
-
- TransparentBaseTransformation::transform(context, layer);
-}
-
-bool PoolingTransformation::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
- const std::string poolMethod = layer.GetParamAsString("pool-method", "");
- return poolMethod == "max";
-}
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/power.hpp"
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <string>
-#include <memory>
-#include <vector>
-
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-bool PowerTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const {
- if (!LayerTransformation::canBeTransformed(context, layer)) {
- return false;
- }
-
- if (layer.insData.size() != 1) {
- THROW_IE_LPT_EXCEPTION(layer) << "layer inputs '" << layer.insData.size() << "' is not correct";
- }
-
- if (!CaselessEq<std::string>()(layer.type, "Power")) {
- THROW_IE_LPT_EXCEPTION(layer) << "layer '" << layer.name << "' is not correct";
- }
-
- const PowerLayer* powerLayer = dynamic_cast<const PowerLayer*>(&layer);
- if (powerLayer == nullptr) {
- THROW_IE_LPT_EXCEPTION(layer) << "unexpected Power layer type";
- }
- if (powerLayer->power != 1.f) {
- return false;
- }
-
- const CNNLayerPtr parent = CNNNetworkHelper::getParent(layer, 0);
- return !(parent->type != "ScaleShift");
-}
-
-void PowerTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!canBeTransformed(context, layer)) {
- return;
- }
-
- const PowerLayer* powerLayer = dynamic_cast<const PowerLayer*>(&layer);
- if (powerLayer == nullptr) {
- THROW_IE_LPT_EXCEPTION(layer) << "unexpected Power layer type";
- }
-
- auto scale_and_shift_blob = [] (Blob::Ptr &&blob, float scale, float shift) {
- auto float_data = CNNNetworkHelper::getFloatData(blob);
- auto float_data_ptr = float_data.get();
- auto float_data_size = blob->size();
-
- for (size_t i = 0ul; i < float_data_size; i++) {
- float_data_ptr[i] = float_data_ptr[i] * scale + shift;
- }
-
- CNNNetworkHelper::fillBlobByFP32(blob, float_data_ptr);
- };
-
- const CNNLayerPtr parent = CNNNetworkHelper::getParent(layer, 0);
-
- scale_and_shift_blob(CNNNetworkHelper::getBlob(parent, "weights"), powerLayer->scale, 0.0f);
- scale_and_shift_blob(CNNNetworkHelper::getBlob(parent, "biases") , powerLayer->scale, powerLayer->offset);
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
- CNNNetworkHelper::removeLayer(context.network, std::make_shared<CNNLayer>(layer));
- context.removeLayer(layer);
- if (children.empty()) {
- const std::string originalName = layer.name;
- CNNNetworkHelper::renameLayer(context.network, parent->name, layer.name);
- }
-}
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <functional>
-#include <initializer_list>
-#include <iterator>
-#include <map>
-#include <memory>
-#include <numeric>
-#include <ostream>
-#include <set>
-#include <sstream>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <typeinfo>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <cassert>
-#include <cctype>
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/quantization_details.hpp"
-#include <math.h>
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <cmath>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <ie_common.h>
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-class ConstTensorDesc {
-public:
- static void validate(const Layout layout, const std::vector<size_t>& dims) {
- switch (layout) {
- case Layout::SCALAR: {
- if (dims.size() != 0) {
- THROW_IE_EXCEPTION << "unexpected dimensions size " << dims.size() << " for layout " << layout;
- }
- break;
- }
- case Layout::C: {
- if (dims.size() != 1) {
- THROW_IE_EXCEPTION << "unexpected dimensions size " << dims.size() << " for layout " << layout;
- }
- break;
- }
- case Layout::NCHW: {
- if (dims.size() != 4) {
- THROW_IE_EXCEPTION << "unexpected dimensions size " << dims.size() << " for layout " << layout;
- }
- break;
- }
- default: {
- THROW_IE_EXCEPTION << "unexpected layout " << layout;
- }
- }
- }
-
- static size_t getChannelsCount(const Layout layout, const std::vector<size_t>& dims) {
- switch (layout) {
- case Layout::SCALAR: {
- return 1;
- }
- case Layout::C: {
- return dims[0];
- }
- case Layout::NCHW: {
- return dims[1];
- }
- default: {
- THROW_IE_EXCEPTION << "unexpected layout " << layout;
- }
- }
- }
-};
-
-QuantizationDetails::QuantizationDetails()
- : levels(),
- inputLowValues({}),
- inputHighValues({}),
- outputLowValues({}),
- outputHighValues({}),
- inputIntervalsCount(0),
- outputIntervalsCount(0),
- outputChannelsCount(0) {}
-
-QuantizationDetails::QuantizationDetails(const QuantizationDetails& quantizationDetails)
- : levels(quantizationDetails.levels),
- inputLowValues(quantizationDetails.inputLowValues),
- inputHighValues(quantizationDetails.inputHighValues),
- outputLowValues(quantizationDetails.outputLowValues),
- outputHighValues(quantizationDetails.outputHighValues),
- inputIntervalsCount(quantizationDetails.inputIntervalsCount),
- outputIntervalsCount(quantizationDetails.outputIntervalsCount),
- outputChannelsCount(quantizationDetails.outputChannelsCount) {}
-
-QuantizationDetails::QuantizationDetails(const size_t levels, const std::vector<float>& inputLowValues,
- const std::vector<float>& inputHighValues,
- const std::vector<float>& outputLowValues,
- const std::vector<float>& outputHighValues, const size_t inputIntervalsCount,
- const size_t outputIntervalsCount, const size_t outputChannelsCount)
- : levels(levels),
- inputLowValues(inputLowValues),
- inputHighValues(inputHighValues),
- outputLowValues(outputLowValues),
- outputHighValues(outputHighValues),
- inputIntervalsCount(inputIntervalsCount),
- outputIntervalsCount(outputIntervalsCount),
- outputChannelsCount(outputChannelsCount) {}
-
-bool QuantizationDetails::outputLayoutIsSupported(const CNNLayer& quantize) {
- std::vector<float> outputLowValues;
- std::vector<float> outputHighValues;
- size_t outputIntervalsCount;
- getOutputIntervals(quantize, outputLowValues, outputHighValues, outputIntervalsCount);
-
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(
- quantize,
- CNNNetworkHelper::onWeights(quantize) && CNNNetworkHelper::onConstWeightsPath(quantize));
- if ((outputIntervalsCount != 1ul) && (outputIntervalsCount != outputChannelsCount)) {
- return false;
- }
-
- return true;
-}
-
-void QuantizationDetails::getInputIntervals(
- const CNNLayer& quantize,
- std::vector<float>& inputLowValues,
- std::vector<float>& inputHighValues,
- size_t& inputIntervalsCount) {
- if (quantize.insData.size() != 5) {
- THROW_IE_LPT_EXCEPTION(quantize) << "Unexpected inputs size " << quantize.insData.size();
- }
-
- const DataPtr inputLowData = quantize.insData[1].lock();
- if (inputLowData == nullptr) {
- THROW_IE_LPT_EXCEPTION(quantize) << "input low data is absent";
- }
- const CNNLayerPtr inputLowLayer = getCreatorLayer(inputLowData).lock();
- validate(inputLowLayer);
- const std::vector<float> inputLowBlobValues = getBlobValue(inputLowLayer);
- inputLowValues.insert(inputLowValues.end(), inputLowBlobValues.begin(), inputLowBlobValues.end());
-
- const DataPtr inputHighData = quantize.insData[2].lock();
- if (inputHighData == nullptr) {
- THROW_IE_LPT_EXCEPTION(quantize) << "input high data is absent";
- }
- const CNNLayerPtr inputHighLayer = getCreatorLayer(inputHighData).lock();
- validate(inputHighLayer);
- const std::vector<float> inputHighBlobValues = getBlobValue(inputHighLayer);
- inputHighValues.insert(inputHighValues.end(), inputHighBlobValues.begin(), inputHighBlobValues.end());
-
- if (inputLowValues.size() != inputHighValues.size()) {
- THROW_IE_LPT_EXCEPTION(quantize) << "Quantize input values sizes are not equal for layer " << quantize.name;
- }
-
- inputIntervalsCount = inputLowValues.size();
-}
-
-void QuantizationDetails::getOutputIntervals(
- const CNNLayer& quantize,
- std::vector<float>& outputLowValues,
- std::vector<float>& outputHighValues,
- size_t& outputIntervalsCount) {
- if (quantize.insData.size() != 5) {
- THROW_IE_LPT_EXCEPTION(quantize) << "unexpected inputs size " << quantize.insData.size();
- }
-
- const DataPtr outputLowData = quantize.insData[3].lock();
- if (outputLowData == nullptr) {
- THROW_IE_LPT_EXCEPTION(quantize) << "output low data is absent";
- }
- const CNNLayerPtr outputLowLayer = getCreatorLayer(outputLowData).lock();
- validate(outputLowLayer);
- const std::vector<float>& outputLowBlobValues = getBlobValue(outputLowLayer);
- outputLowValues.insert(outputLowValues.end(), outputLowBlobValues.begin(), outputLowBlobValues.end());
-
- const DataPtr outputHighData = quantize.insData[4].lock();
- if (outputHighData == nullptr) {
- THROW_IE_LPT_EXCEPTION(quantize) << "output high data is absent";
- }
- const CNNLayerPtr outputHighLayer = getCreatorLayer(outputHighData).lock();
- validate(outputHighLayer);
- const std::vector<float> outputHighBlobValues = getBlobValue(outputHighLayer);
- outputHighValues.insert(outputHighValues.end(), outputHighBlobValues.begin(), outputHighBlobValues.end());
-
- if (outputLowValues.size() != outputHighValues.size()) {
- THROW_IE_LPT_EXCEPTION(quantize) << "Quantize output values sizes are not equal for layer " << quantize.name;
- }
-
- outputIntervalsCount = outputLowValues.size();
-}
-
-QuantizationDetails QuantizationDetails::getDetails(const CNNLayer& quantize) {
- std::vector<float> inputLowValues;
- std::vector<float> inputHighValues;
- size_t inputIntervalsCount;
- getInputIntervals(quantize, inputLowValues, inputHighValues, inputIntervalsCount);
-
- std::vector<float> outputLowValues;
- std::vector<float> outputHighValues;
- size_t outputIntervalsCount;
- getOutputIntervals(quantize, outputLowValues, outputHighValues, outputIntervalsCount);
-
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(
- quantize,
- CNNNetworkHelper::onWeights(quantize) && CNNNetworkHelper::onConstWeightsPath(quantize));
- if (!outputLayoutIsSupported(quantize)) {
- THROW_IE_LPT_EXCEPTION(quantize) << "Expected output channels count " << outputIntervalsCount << " but found " << outputChannelsCount;
- }
-
- if (!quantize.CheckParamPresence("levels")) {
- THROW_IE_LPT_EXCEPTION(quantize) << "Parameter 'levels' is absent";
- }
-
- return QuantizationDetails(
- quantize.GetParamAsInt("levels"),
- inputLowValues,
- inputHighValues,
- outputLowValues,
- outputHighValues,
- inputIntervalsCount,
- outputIntervalsCount,
- outputChannelsCount);
-}
-
-bool QuantizationDetails::hasNegativeOutput() const {
- for (const float value : outputLowValues) {
- if (value < 0.f) {
- return true;
- }
- }
-
- for (const float value : outputHighValues) {
- if (value < 0.f) {
- return true;
- }
- }
-
- return false;
-}
-
-float QuantizationDetails::maxOutput(const size_t channel) const {
- const auto value = fmax(fabs(outputLowValues[outputLowValues.size() == 1 ? 0 : channel]),
- fabs(outputHighValues[outputHighValues.size() == 1 ? 0 : channel]));
- return value;
-}
-
-float QuantizationDetails::maxInput(const size_t channel) const {
- const auto value = fmax(fabs(outputLowValues[inputLowValues.size() == 1 ? 0 : channel]),
- fabs(outputHighValues[inputHighValues.size() == 1 ? 0 : channel]));
- return value;
-}
-
-float QuantizationDetails::maxOutputHigh() const {
- float output = getOutputHighValue(0);
- for (size_t channel = 1; channel < outputIntervalsCount; ++channel) {
- if (output < getOutputHighValue(channel)) {
- output = getOutputHighValue(channel);
- }
- }
- return output;
-}
-
-float QuantizationDetails::minOutputLow() const {
- float output = getOutputLowValue(0);
- for (size_t channel = 1; channel < outputIntervalsCount; ++channel) {
- if (output > getOutputLowValue(channel)) {
- output = getOutputLowValue(channel);
- }
- }
- return output;
-}
-
-float QuantizationDetails::getInputLowValue(const size_t channel) const {
- if ((inputIntervalsCount != 1) && (channel >= inputIntervalsCount)) {
- THROW_IE_EXCEPTION << "channel " << channel << " is out of bound, input channels count " << inputIntervalsCount;
- }
- const float value = inputLowValues.size() == 1 ? inputLowValues[0] : inputLowValues[channel];
- return value;
-}
-
-float QuantizationDetails::getInputHighValue(const size_t channel) const {
- if ((inputIntervalsCount != 1) && (channel >= inputIntervalsCount)) {
- THROW_IE_EXCEPTION << "channel " << channel << " is out of bound, input channels count " << inputIntervalsCount;
- }
- const float value = inputHighValues.size() == 1 ? inputHighValues[0] : inputHighValues[channel];
- return value;
-}
-
-float QuantizationDetails::getOutputLowValue(const size_t channel) const {
- if ((outputIntervalsCount != 1) && (channel >= outputIntervalsCount)) {
- THROW_IE_EXCEPTION << "channel " << channel << " is out of bound, output channels count "
- << outputIntervalsCount;
- }
- const float value = outputLowValues.size() == 1 ? outputLowValues[0] : outputLowValues[channel];
- return value;
-}
-
-float QuantizationDetails::getOutputHighValue(const size_t channel) const {
- if ((outputIntervalsCount != 1) && (channel >= outputIntervalsCount)) {
- THROW_IE_EXCEPTION << "channel " << channel << " is out of bound, output channels count "
- << outputIntervalsCount;
- }
- const float value = outputHighValues.size() == 1 ? outputHighValues[0] : outputHighValues[channel];
- return value;
-}
-
-void QuantizationDetails::validate(const CNNLayerPtr& constantLayer) {
- if (constantLayer == nullptr) {
- THROW_IE_EXCEPTION << "Quantize layer input is absent";
- }
-
- if (constantLayer->blobs.size() == 0) {
- THROW_IE_EXCEPTION << "Quantize layer input '" << constantLayer->name << "' doesn't have blobs";
- }
-
- if (constantLayer->blobs.size() > 1) {
- THROW_IE_EXCEPTION << "Quantize layer input '" << constantLayer->name << "' has too much blobs";
- }
-
- const auto blob = constantLayer->blobs.begin()->second;
- // const auto byteSize = blob->byteSize();
- // if ((blob->getTensorDesc().getDims().size() != 0) &&
- // (blob->getTensorDesc().getDims().size() != 1) &&
- // (blob->getTensorDesc().getDims().size() != 4)) {
- // THROW_IE_EXCEPTION << "Quantize layer input '" << constantLayer->name << "' blob dimensions are not correct";
- // }
-
- const auto tensorDesc = blob->getTensorDesc();
- // if ((tensorDesc.getLayout() != Layout::SCALAR) &&
- // (tensorDesc.getLayout() != Layout::C) &&
- // ((tensorDesc.getLayout() != Layout::NCHW))) {
- // THROW_IE_EXCEPTION << "Quantize layer input '" << constantLayer->name << "' layout not correct";
- // }
-
- // const auto dims = tensorDesc.getDims();
- // if ((dims.size() != 0) && (dims.size() != 1) && (dims.size() != 4)) {
- // THROW_IE_EXCEPTION << "Quantize layer input '" << constantLayer->name << "' blob dimensions size " <<
- // dims.size() << " not correct";
- // }
-
- // ConstTensorDesc::validate(tensorDesc.getLayout(), tensorDesc.getDims());
-}
-
-std::vector<float> QuantizationDetails::getBlobValue(const CNNLayerPtr& constantLayer) {
- if (constantLayer->blobs.empty()) {
- THROW_IE_LPT_EXCEPTION(*constantLayer) << "blobs are empty";
- }
- const auto blob = constantLayer->blobs.begin()->second;
- auto buffer = CNNNetworkHelper::getFloatData(blob);
- return std::vector<float>(buffer.get(), buffer.get() + blob->size());
-}
-
-bool QuantizationDetails::isSupportedLevel(const size_t level) {
- static const std::unordered_set<size_t> supported_levels = { 15ul, 16ul, 255ul, 256ul };
- return supported_levels.find(level) != supported_levels.end();
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/resample.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-#include <algorithm>
-#include <memory>
-#include <string>
-#include <vector>
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void ResampleTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!LayerTransformation::canBeTransformed(context, layer)) {
- return;
- }
-
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(layer);
- if (parents.size() != 1ul) {
- THROW_IE_EXCEPTION << "unexpected input layers count " << parents.size();
- }
-
- if (parents[0]->type != "ScaleShift") {
- return;
- }
-
- const std::string type = layer.GetParamAsString("type", "");
- if (type != "caffe.ResampleParameter.NEAREST") {
- return;
- }
-
- const Precision precision = getPrecisionBeforeParentDequantizationScaleShift(layer);
-
- std::vector<float> dequantizationScales;
- std::vector<float> dequantizationShifts;
- fillFromDequantizationLayer(*parents[0], dequantizationScales, dequantizationShifts);
-
- // transparent base transformation
- CNNNetworkHelper::removeLayer(context.network, parents[0]);
- context.removeLayer(*parents[0]);
-
- if (updatePrecisions) {
- CNNNetworkHelper::setOutDataPrecision(layer, precision);
- }
-
- addDequantizationLayer(context, layer, dequantizationScales, dequantizationShifts);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/reshape.hpp"
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-size_t getChannelVolume(const SizeVector& dims) {
- size_t volume = 1ul;
- for (size_t i = 2; i < dims.size(); ++i) {
- volume = volume * dims[i];
- }
-
- return volume;
-}
-
-void ReshapeTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!canBeTransformed(context, layer)) {
- return;
- }
-
- if ((layer.insData.size() == 0) || layer.insData.size() > 2) {
- THROW_IE_EXCEPTION << "layer inputs '" << layer.insData.size() << "' is not correct";
- }
-
- if (!CaselessEq<std::string>()(layer.type, "Reshape")) {
- THROW_IE_EXCEPTION << "layer '" << layer.name << "' is not correct";
- }
-
- if (layer.insData.size() > 1) {
- transformOriginal(context, layer);
- } else {
- transformConstPropagated(context, layer);
- }
-}
-
-bool ReshapeTransformation::canTransformOriginal(const CNNLayer& layer) const {
- const CNNLayerPtr constLayer = CNNNetworkHelper::getParent(layer, 1);
- if (constLayer == nullptr) {
- THROW_IE_EXCEPTION << "Layer '" << layer.name << "' does not have parent at 1 position";
- }
- if (constLayer->type != "Const") {
- return false;
- }
-
- const Blob::Ptr paramsBlob = CNNNetworkHelper::getBlob(constLayer, "custom");
- const Precision precision = paramsBlob->getTensorDesc().getPrecision();
- if (!CNNNetworkHelper::isBlobPrecisionSupported(precision)) {
- THROW_IE_EXCEPTION << "layer " << constLayer->type << " '" << constLayer->name << "' unexpected precision " << precision;
- }
-
- if (paramsBlob->size() < 2) {
- return false;
- }
-
- const DataPtr inputData = layer.insData[0].lock();
- if (inputData == nullptr) {
- THROW_IE_EXCEPTION << "input data is absent";
- }
-
- const std::vector<size_t> inputDims = inputData->getTensorDesc().getDims();
- if (inputDims.size() < 2) {
- return false;
- }
-
- std::shared_ptr<float> paramsBufferData = CNNNetworkHelper::getFloatData(paramsBlob);
- float* params = paramsBufferData.get();
- if (((params[0] != -1) && (params[0] != 0) && (inputDims[0] != params[0])) ||
- ((params[1] != -1) && (params[1] != 0) && (inputDims[1] != params[1]))) {
- return false;
- }
-
- return true;
-}
-
-void ReshapeTransformation::transformOriginal(TransformationContext& context, CNNLayer& layer) const {
- if (!canTransformOriginal(layer)) {
- return;
- }
-
- const CNNLayerPtr constLayer = CNNNetworkHelper::getParent(layer, 1);
- const Blob::Ptr paramsBlob = CNNNetworkHelper::getBlob(constLayer, "custom");
- const signed int* paramsBuffer = paramsBlob->buffer().as<const signed int*>();
- if (paramsBuffer[1] == -1) {
- quantize(context, layer);
- return;
- }
-
- TransparentBaseTransformation::transform(context, layer);
-}
-
-bool ReshapeTransformation::canTransformConstPropagated(const CNNLayer& layer) const {
- if (layer.insData.size() != 1) {
- THROW_IE_EXCEPTION << "unexpected input count " << layer.insData.size();
- }
- const DataPtr input = layer.insData[0].lock();
- if (input == nullptr) {
- THROW_IE_EXCEPTION << "input is absent";
- }
- const std::vector<size_t> inputDims = input->getDims();
- if (inputDims.size() < 2) {
- return false;
- }
-
- if (layer.outData.size() != 1) {
- THROW_IE_EXCEPTION << "unexpected output count " << layer.outData.size();
- }
- const std::vector<size_t> outputDims = layer.outData[0]->getDims();
- if (outputDims.size() < 2) {
- return false;
- }
-
- const CNNLayerPtr dequantizationLayer = CNNNetworkHelper::getParent(layer, 0ul);
- if ((dequantizationLayer->outData[0]->getTensorDesc().getLayout() != Layout::NCHW) || (layer.outData[0]->getTensorDesc().getLayout() != Layout::NC)) {
- for (size_t i = 0; i < 2; ++i) {
- if (inputDims[i] != outputDims[i]) {
- return false;
- }
- }
- }
-
- return true;
-}
-
-void ReshapeTransformation::transformConstPropagated(TransformationContext& context, CNNLayer& layer) const {
- if (!canTransformConstPropagated(layer)) {
- return;
- }
-
- const CNNLayerPtr dequantizationLayer = CNNNetworkHelper::getParent(layer, 0ul);
- if ((dequantizationLayer->outData[0]->getTensorDesc().getLayout() == Layout::NCHW) && (layer.outData[0]->getTensorDesc().getLayout() == Layout::NC)) {
- quantize(context, layer);
- return;
- }
-
- TransparentBaseTransformation::transform(context, layer);
-}
-
-void ReshapeTransformation::quantize(TransformationContext& context, CNNLayer& layer) const {
- const CNNLayerPtr dequantizationLayer = CNNNetworkHelper::getParent(layer, 0ul);
- if ((dequantizationLayer == nullptr) || (dequantizationLayer->type != "ScaleShift")) {
- return;
- }
-
- const size_t inputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(*dequantizationLayer);
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(layer);
- const DataPtr insData = layer.insData[0].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(layer) << "input data is absent";
- }
- const size_t channelVolume = getChannelVolume(insData->getTensorDesc().getDims());
- const DataPtr dequantizationDataPtr = dequantizationLayer->insData[0].lock();
- if (dequantizationDataPtr == nullptr) {
- THROW_IE_LPT_EXCEPTION(*dequantizationLayer) << "input data is absent";
- }
- if (insData->getTensorDesc().getDims()[0] != dequantizationDataPtr->getTensorDesc().getDims()[0] ||
- inputChannelsCount * channelVolume != outputChannelsCount)
- return;
-
- std::vector<float> originalDataDequantizationScales;
- std::vector<float> originalDataDequantizationShifts;
- fillFromDequantizationLayer(*dequantizationLayer, originalDataDequantizationScales, originalDataDequantizationShifts);
-
- std::vector<float> dequantizationScales(outputChannelsCount);
- std::vector<float> dequantizationShifts(outputChannelsCount);
-
- for (size_t inputChannel = 0ul; inputChannel < inputChannelsCount; inputChannel++) {
- for (size_t i = 0ul; i < channelVolume; i++) {
- dequantizationScales[inputChannel * channelVolume + i] = originalDataDequantizationScales[inputChannel];
- dequantizationShifts[inputChannel * channelVolume + i] = originalDataDequantizationShifts[inputChannel];
- }
- }
-
- if (updatePrecisions) {
- const Precision lowPrecision = getPrecisionBeforeParentDequantizationScaleShift(layer);
- CNNNetworkHelper::setOutDataPrecision(layer, lowPrecision);
- }
-
- CNNNetworkHelper::removeLayer(context.network, dequantizationLayer);
- context.removeLayer(*dequantizationLayer);
-
- addDequantizationLayer(context, layer, dequantizationScales, dequantizationShifts);
-}
-
-bool ReshapeTransformation::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
- return (layer.insData.size() > 1) ? canTransformOriginal(layer) : canTransformConstPropagated(layer);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/scaleshift_to_convolution.hpp"
-
-#include <algorithm>
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#include <caseless.hpp>
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-static const char * defaultIgnoreWithParents[] = {
- "Convolution",
- "FakeQuantize"
-};
-
-ScaleShiftToConvolutionTransformation::ScaleShiftToConvolutionTransformation(const Params& params) :
- WeightableLayerTransformation(params),
- groupSize(1ul),
- ignoreWithParents(defaultIgnoreWithParents, defaultIgnoreWithParents +
- sizeof(defaultIgnoreWithParents) / sizeof(defaultIgnoreWithParents[0])) {
-}
-
-void ScaleShiftToConvolutionTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!CaselessEq<std::string>()(layer.type, "ScaleShift")) {
- THROW_IE_EXCEPTION << "Layer '" << layer.name << "' has invalid type '" << layer.type << "'. Convolution is expected.";
- }
-
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(layer);
- if (parents.size() != 1)
- return;
-
- const DataPtr outData = CNNNetworkHelper::getOutData(*parents[0], layer);
- if (outData == nullptr) {
- THROW_IE_EXCEPTION << "layer " << layer.type << " '" << layer.name << "' is child for " << parents[0]->type << " '" << parents[0]->name << "'";
- }
-
- const Precision parentPrecision = outData->getTensorDesc().getPrecision();
- if (std::all_of(
- precisionsOnActivations.begin(),
- precisionsOnActivations.end(),
- [&](const Precision precision) { return precision != parentPrecision; })) {
- return;
- }
-
- if (getInputTo(outData).size() == 1ul && parents[0]->type != "Concat") {
- return;
- }
-
- if (getInputTo(layer.outData[0]).size() == 0ul) {
- return;
- }
-
- if (updatePrecisions) {
- const Precision parentPrecision = CNNNetworkHelper::getPrecisionParent(layer);
- if ((parentPrecision != Precision::I8) && (parentPrecision != Precision::U8)) {
- return;
- }
- }
-
- if (std::any_of(parents.begin(), parents.end(), [](CNNLayerPtr parent) { return CaselessEq<std::string>()(parent->type, "Input"); })) {
- return;
- }
-
- const size_t channelsCount = CNNNetworkHelper::getOutputChannelsCount(layer);
- if (channelsCount != CNNNetworkHelper::getInputChannelsCount(layer)) {
- return;
- }
-
- if (channelsCount % groupSize != 0) {
- return;
- }
-
- const DataPtr insData = layer.insData[0].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(layer) << "input data is absent";
- }
- if (insData->getDims().size() != 4) {
- return;
- }
-
- CNNLayerPtr convolutionLayerPtr = transformToConvolution(
- context,
- layer,
- channelsCount / groupSize);
-
- if (updatePrecisions) {
- std::vector<float> originalDataDequantizationScales(channelsCount, 1.f);
- std::vector<float> originalDataDequantizationShifts(channelsCount, 0.f);
- std::vector<float> originalWeightsDequantizationScales(channelsCount);
- const Blob::Ptr weightsOriginalShiftsBlob = CNNNetworkHelper::getBlob(std::make_shared<CNNLayer>(layer), "weights");
- const float* weightsOriginalShiftsBuffer = weightsOriginalShiftsBlob->buffer().as<float*>();
- for (size_t i = 0ul; i < originalWeightsDequantizationScales.size(); ++i) {
- originalWeightsDequantizationScales[i] = weightsOriginalShiftsBuffer[i];
- }
- std::vector<float> originalWeightsDequantizationShifts(channelsCount, 0.f);
- std::vector<float> dequantizationScales;
- std::vector<float> dequantizationShifts;
- calculateDequantizationForSymmetric(
- *convolutionLayerPtr,
- originalDataDequantizationScales,
- originalDataDequantizationShifts,
- originalWeightsDequantizationScales,
- originalWeightsDequantizationShifts,
- dequantizationScales,
- dequantizationShifts);
-
- if (this->updateBiases) {
- std::vector<float> biasesShifts(dequantizationShifts.size(), 0.f);
- updateLayerBiases(context, *convolutionLayerPtr, false, dequantizationScales, dequantizationShifts, biasesShifts);
- }
-
- addDequantizationLayer(context, *convolutionLayerPtr, dequantizationScales, dequantizationShifts);
- }
-}
-
-void ScaleShiftToConvolutionTransformation::setGroupSize(const size_t groupSize) {
- this->groupSize = groupSize;
-}
-
-size_t ScaleShiftToConvolutionTransformation::getGroupSize() const {
- return groupSize;
-}
-
-void ScaleShiftToConvolutionTransformation::setIgnoreWithParents(const std::unordered_set<std::string>& ignoreWithParents) {
- this->ignoreWithParents = ignoreWithParents;
-}
-
-std::unordered_set<std::string> ScaleShiftToConvolutionTransformation::getIgnoreWithParents() const {
- return ignoreWithParents;
-}
-
-bool ScaleShiftToConvolutionTransformation::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
- return false;
-}
-
-bool ScaleShiftToConvolutionTransformation::isQuantized(const CNNLayer& layer) const noexcept {
- return true;
-}
-
-CNNLayerPtr ScaleShiftToConvolutionTransformation::transformToConvolution(
- TransformationContext& context,
- const CNNLayer& layer,
- const size_t group) const {
- const Precision originalPrecision = layer.outData[0]->getTensorDesc().getPrecision();
- const LayerParams convolutionLayerParams{ layer.name, "Convolution", originalPrecision };
- CNNLayerPtr convolutionLayerPtr = std::make_shared<ConvolutionLayer>(convolutionLayerParams);
- ConvolutionLayer* convolutionLayer = dynamic_cast<ConvolutionLayer*>(convolutionLayerPtr.get());
- convolutionLayer->_kernel.insert(X_AXIS, 1);
- convolutionLayer->_kernel.insert(Y_AXIS, 1);
- convolutionLayer->params["kernel"] = "1,1";
- convolutionLayer->_stride.insert(X_AXIS, 1);
- convolutionLayer->_stride.insert(Y_AXIS, 1);
- convolutionLayer->_padding.insert(X_AXIS, 0);
- convolutionLayer->_padding.insert(Y_AXIS, 0);
- convolutionLayer->_pads_end.insert(X_AXIS, 0);
- convolutionLayer->_pads_end.insert(Y_AXIS, 0);
- convolutionLayer->_dilation.insert(X_AXIS, 1);
- convolutionLayer->_dilation.insert(Y_AXIS, 1);
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(layer);
- convolutionLayer->_out_depth = outputChannelsCount;
- convolutionLayer->_group = group;
- convolutionLayer->params["group"] = std::to_string(group);
-
- CNNLayerPtr layerPtr = std::make_shared<CNNLayer>(layer);
- CNNNetworkHelper::replaceLayer(context, layerPtr, convolutionLayerPtr);
-
- {
- const Precision weightsPrecision = updatePrecisions ? precisionsOnWeights[0] : CNNNetworkHelper::getPrecisionParent(layer);
- const Precision biasesPrecision = originalPrecision;
-
- LayerParams weightsLayerParams{ layer.name + "Weights", "Const", weightsPrecision };
- CNNLayerPtr weightsConstLayer = std::make_shared<CNNLayer>(weightsLayerParams);
- CNNNetworkHelper::addLayer(context, nullptr, convolutionLayerPtr, weightsConstLayer);
-
- {
- const size_t inputChannelsCount = CNNNetworkHelper::getInputChannelsCount(layer);
- const size_t weightsSize = outputChannelsCount * inputChannelsCount / group;
- std::shared_ptr<float> weightsBufferPtr(new float[weightsSize], std::default_delete<float[]>());
- float* weightsBuffer = weightsBufferPtr.get();
-
- const Blob::Ptr weightsOriginalShiftsBlob = CNNNetworkHelper::getBlob(std::make_shared<CNNLayer>(layer), "weights");
- const float* weightsOriginalShiftsBlobBuffer = weightsOriginalShiftsBlob->buffer().as<float*>();
- const size_t kernelsCount = inputChannelsCount / group;
- if (group == 1ul) {
- for (size_t outputChannel = 0ul; outputChannel < outputChannelsCount; ++outputChannel) {
- for (size_t kernel = 0ul; kernel < kernelsCount; ++kernel) {
- const float value = (outputChannel == kernel) ? (updatePrecisions ? 1.f : weightsOriginalShiftsBlobBuffer[outputChannel]) : 0.f;
- weightsBuffer[kernelsCount * outputChannel + kernel] = value;
- }
- }
- } else {
- const float channelsInGroup = outputChannelsCount / group;
- for (size_t outputChannel = 0ul; outputChannel < outputChannelsCount; ++outputChannel) {
- const size_t groupIndex = outputChannel / channelsInGroup;
- for (size_t kernel = 0ul; kernel < kernelsCount; ++kernel) {
- const size_t outputChannelIndexInGroup = outputChannel - groupIndex * channelsInGroup;
- const float value = (outputChannelIndexInGroup == kernel) ?
- (updatePrecisions ? 1.f : weightsOriginalShiftsBlobBuffer[outputChannel]) : 0.f;
- weightsBuffer[kernelsCount * outputChannel + kernel] = value;
- }
- }
- }
-
- Blob::Ptr weights = CNNNetworkHelper::makeNewBlobPtr(TensorDesc(weightsPrecision, { weightsSize }, Layout::C));
- weights->allocate();
- CNNNetworkHelper::fillBlobByFP32(weights, weightsBuffer);
- weightsConstLayer->blobs["custom"] = weights;
- weightsConstLayer->outData[0]->reshape({ outputChannelsCount, inputChannelsCount / group, 1, 1 }, Layout::NCHW);
- weightsConstLayer->outData[0]->setPrecision(weightsPrecision);
- // TODO: workaround
- weightsConstLayer->precision = weightsPrecision;
- }
-
- LayerParams biasesLayerParams{ layer.name + "Biases", "Const", biasesPrecision };
- CNNLayerPtr biasesConstLayer = std::make_shared<CNNLayer>(biasesLayerParams);
- CNNNetworkHelper::addLayer(context, nullptr, convolutionLayerPtr, biasesConstLayer);
-
- Blob::Ptr biasesOriginalShiftsBlob = CNNNetworkHelper::getBlob(std::make_shared<CNNLayer>(layer), "biases");
- biasesConstLayer->blobs["custom"] = biasesOriginalShiftsBlob;
- biasesConstLayer->outData[0]->reshape({ biasesOriginalShiftsBlob->size() }, Layout::C);
- }
-
- return convolutionLayerPtr;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/squeeze.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <string>
-#include <vector>
-
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void SqueezeTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- if (!canBeTransformed(context, layer)) {
- return;
- }
-
- if ((layer.insData.size() == 0) || (layer.insData.size() > 2)) {
- THROW_IE_EXCEPTION << "layer inputs '" << layer.insData.size() << "' is not correct";
- }
-
- if (!CaselessEq<std::string>()(layer.type, "Squeeze")) {
- THROW_IE_EXCEPTION << "layer '" << layer.name << "' is not correct";
- }
-
- if (layer.insData.size() > 1) {
- CNNLayerPtr constLayer = CNNNetworkHelper::getParent(layer, 1);
- if ((constLayer != nullptr) && (constLayer->type != "Const")) {
- return;
- }
-
- const Blob::Ptr paramsBlob = CNNNetworkHelper::getBlob(constLayer, "custom");
- const Precision precision = paramsBlob->getTensorDesc().getPrecision();
- if (precision != Precision::I32) {
- return;
- }
-
- DataPtr inputData = layer.insData[0].lock();
- if (inputData == nullptr) {
- THROW_IE_EXCEPTION << "input data is absent";
- }
-
- const std::vector<size_t> inputDims = inputData->getTensorDesc().getDims();
- if (inputDims.size() < paramsBlob->size()) {
- return;
- }
-
- const signed int* paramsBuffer = paramsBlob->buffer().as<const signed int*>();
- for (size_t index = 0; index < paramsBlob->size(); ++index) {
- if ((paramsBuffer[index] == 0) || (paramsBuffer[index] == 1)) {
- return;
- }
- }
- } else {
- if (layer.outData.size() != 1) {
- THROW_IE_EXCEPTION << "unexpected output count " << layer.outData.size();
- }
- const std::vector<size_t> outputDims = layer.outData[0]->getDims();
-
- auto it = std::find(outputDims.begin(), outputDims.end(), 1lu);
- if (it != outputDims.end()) {
- return;
- }
-
- if (layer.insData.size() != 1) {
- THROW_IE_EXCEPTION << "unexpected input count " << layer.insData.size();
- }
- const DataPtr input = layer.insData[0].lock();
- if (input == nullptr) {
- THROW_IE_EXCEPTION << "input is absent";
- }
- const std::vector<size_t> inputDims = input->getDims();
- for (size_t i = 0; (i < 2) && (i < outputDims.size()); ++i) {
- if (inputDims[i] != outputDims[i]) {
- return;
- }
- }
- }
-
- TransparentBaseTransformation::transform(context, layer);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/transformation_context.hpp"
-#include <legacy/details/ie_cnn_network_iterator.hpp>
-#include <legacy/details/ie_cnn_network_tools.h>
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-TransformationContext::TransformationContext(ICNNNetwork& network)
- : network(network), layers(CNNNetSortTopologically(network)) {
- auto it = details::CNNNetworkIterator(&network);
- auto end = details::CNNNetworkIterator();
- while (it != end) {
- _original_precisions_map[(*it)->name] = {};
- for (auto data : (*it)->outData) _original_precisions_map[(*it)->name][data->getName()] = data->getPrecision();
- it++;
- }
-}
-
-void TransformationContext::removeLayer(const CNNLayer& layer) {
- for (size_t i = 0lu; i < layers.size(); ++i) {
- if ((layers[i] != nullptr) && (layers[i]->name == layer.name)) {
- layers[i] = nullptr;
- break;
- }
- }
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/transformer.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-#include "itt.hpp"
-
-#include <ie_common.h>
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <cmath>
-#include <caseless.hpp>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
-
-#include "low_precision_transformations/activation.hpp"
-#include "low_precision_transformations/concat_multi_channels.hpp"
-#include "low_precision_transformations/const.hpp"
-#include "low_precision_transformations/convolution.hpp"
-#include "low_precision_transformations/depth_to_space.hpp"
-#include "low_precision_transformations/fake_quantize.hpp"
-#include "low_precision_transformations/fully_connected.hpp"
-#include "low_precision_transformations/fuse_fake_quantize_and_scale_shift.hpp"
-#include "low_precision_transformations/gemm.hpp"
-#include "low_precision_transformations/mvn.hpp"
-#include "low_precision_transformations/permute.hpp"
-#include "low_precision_transformations/pooling.hpp"
-#include "low_precision_transformations/resample.hpp"
-#include "low_precision_transformations/power.hpp"
-#include "low_precision_transformations/reshape.hpp"
-#include "low_precision_transformations/scaleshift_to_convolution.hpp"
-#include "low_precision_transformations/squeeze.hpp"
-#include "low_precision_transformations/eltwise.hpp"
-#include "low_precision_transformations/normalize.hpp"
-
-// uncomment to display precision info during low precision transformations
-// #define DISPLAY_PECISION
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-LowPrecisionTransformations::LowPrecisionTransformations(
- const std::map<std::string, LayerTransformationPtr>& branchSpecificTransformations,
- const std::map<std::string, LayerTransformationPtr>& transformations,
- const std::map<std::string, LayerTransformationPtr>& cleanupTransformations) :
- branchSpecificTransformations(branchSpecificTransformations),
- transformations(transformations),
- cleanupTransformations(cleanupTransformations) {}
-
-void LowPrecisionTransformations::setUpdatePrecisions(const bool updatePrecisions) {
- for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) {
- it->second->setUpdatePrecisions(updatePrecisions);
- }
- for (auto it = transformations.begin(); it != transformations.end(); ++it) {
- it->second->setUpdatePrecisions(updatePrecisions);
- }
-}
-
-void LowPrecisionTransformations::setQuantizeOutputs(const bool quantizeOutputs) {
- for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) {
- it->second->setQuantizeOutputs(quantizeOutputs);
- }
- for (auto it = transformations.begin(); it != transformations.end(); ++it) {
- it->second->setQuantizeOutputs(quantizeOutputs);
- }
-}
-
-void LowPrecisionTransformations::setWeightsToConst(const bool weightsToConst) {
- for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) {
- it->second->setWeightsToConst(weightsToConst);
- }
- for (auto it = transformations.begin(); it != transformations.end(); ++it) {
- it->second->setWeightsToConst(weightsToConst);
- }
-}
-
-void LowPrecisionTransformations::setQuantizedTensorAlignmentOnActivations(
- const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) {
- for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) {
- it->second->setQuantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations);
- }
- for (auto it = transformations.begin(); it != transformations.end(); ++it) {
- it->second->setQuantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations);
- }
-}
-
-void LowPrecisionTransformations::setQuantizedTensorAlignmentOnWeights(
- const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) {
- for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) {
- it->second->setQuantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights);
- }
- for (auto it = transformations.begin(); it != transformations.end(); ++it) {
- it->second->setQuantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights);
- }
-}
-
-LowPrecisionTransformations& LowPrecisionTransformations::remove(const std::string& layerType) {
- std::string type = layerType;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- removeBranchSpecificTransformations(type);
- removeTransformations(type);
- removeCleanupTransformations(type);
- return *this;
-}
-
-LowPrecisionTransformations& LowPrecisionTransformations::removeBranchSpecificTransformations(const std::string& layerType) {
- std::string type = layerType;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- branchSpecificTransformations.erase(type);
- return *this;
-}
-
-LowPrecisionTransformations& LowPrecisionTransformations::removeTransformations(const std::string& layerType) {
- std::string type = layerType;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- transformations.erase(type);
- return *this;
-}
-
-LowPrecisionTransformations& LowPrecisionTransformations::removeCleanupTransformations(const std::string& layerType) {
- std::string type = layerType;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- cleanupTransformations.erase(type);
- return *this;
-}
-
-LayerTransformationPtr LowPrecisionTransformations::find(const std::string& layerType) const {
- std::string type = layerType;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- auto it = branchSpecificTransformations.find(type);
- if (it != branchSpecificTransformations.end()) {
- return it->second;
- }
-
- it = transformations.find(type);
- if (it != transformations.end()) {
- return it->second;
- }
-
- it = cleanupTransformations.find(type);
- if (it != cleanupTransformations.end()) {
- return it->second;
- }
-
- return nullptr;
-}
-
-void LowPrecisionTransformations::setParamsManager(IParamsManager* paramsManager) noexcept {
- setParamsManager(paramsManager, branchSpecificTransformations);
- setParamsManager(paramsManager, transformations);
- setParamsManager(paramsManager, cleanupTransformations);
-}
-
-void LowPrecisionTransformations::setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept {
- setLayerTransformationsManager(layerTransformationsManager, branchSpecificTransformations);
- setLayerTransformationsManager(layerTransformationsManager, transformations);
- setLayerTransformationsManager(layerTransformationsManager, cleanupTransformations);
-}
-
-void LowPrecisionTransformations::setParamsManager(
- IParamsManager* paramsManager,
- std::map<std::string, LayerTransformationPtr>& transformations) noexcept {
- for (auto it : transformations) {
- it.second->setParamsManager(paramsManager);
- }
-}
-
-void LowPrecisionTransformations::setLayerTransformationsManager(
- ILayerTransformationsManager* layerTransformationsManager,
- std::map<std::string, LayerTransformationPtr>& transformations) noexcept {
- for (auto it : transformations) {
- it.second->setLayerTransformationsManager(layerTransformationsManager);
- }
-}
-
-LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const LayerTransformation::Params& params) {
- return LowPrecisionTransformations(
- std::map<std::string, LayerTransformationPtr>({
- { "concat", LayerTransformationPtr(new ConcatMultiChannelsTransformation(params))}
- }),
- std::map<std::string, LayerTransformationPtr>({
- { "convolution", LayerTransformationPtr(new ConvolutionTransformation(params)) },
- { "pooling", LayerTransformationPtr(new PoolingTransformation(params)) },
- { "fakequantize", LayerTransformationPtr(new FakeQuantizeTransformation(params)) },
- { "reshape", LayerTransformationPtr(new ReshapeTransformation(params)) },
- { "fullyconnected", LayerTransformationPtr(new FullyConnectedTransformation(params)) },
- { "gemm", LayerTransformationPtr(new GemmTransformation(params)) },
- { "permute", LayerTransformationPtr(new PermuteTransformation(params)) },
- { "squeeze", LayerTransformationPtr(new SqueezeTransformation(params)) },
- { "relu", LayerTransformationPtr(new ActivationTransformation(params)) },
- { "mvn", LayerTransformationPtr(new MvnTransformation(params)) },
- { "eltwise", LayerTransformationPtr(new EltwiseTransformation(params)) },
- { "resample", LayerTransformationPtr(new ResampleTransformation(params)) },
- { "power", LayerTransformationPtr(new PowerTransformation(params)) },
- { "depthtospace", LayerTransformationPtr(new DepthToSpaceTransformation(params)) },
- { "normalize", LayerTransformationPtr(new NormalizeTransformation(params)) }
- }),
- std::map<std::string, LayerTransformationPtr>({
- { "fakequantize", LayerTransformationPtr(new FuseFakeQuantizeAndScaleShiftTransformation(params)) },
- { "scaleshift", LayerTransformationPtr(new ScaleShiftToConvolutionTransformation(params)) },
- }));
-}
-
-LowPrecisionTransformer::LowPrecisionTransformer(): transformations(LowPrecisionTransformer::getAllTransformations()) {}
-
-LowPrecisionTransformer::LowPrecisionTransformer(const LowPrecisionTransformations& transformations)
- : transformations(transformations) {}
-
-void LowPrecisionTransformer::renameLayersByType(const std::vector<CNNLayerPtr>& layers, const std::string& type) {
- size_t number = 1;
- for (size_t i = 0; i < layers.size(); ++i) {
- const CNNLayerPtr layer = layers[i];
- if (layer->type != type) {
- continue;
- }
-
- layer->name = layer->type + std::to_string(number);
- ++number;
- }
-}
-
-void LowPrecisionTransformer::rename(ICNNNetwork& network) const {
- TransformationContext context(network);
-
- const std::unordered_set<std::string> standaloneLayerTypes = {"Convolution", "Concat", "Eltwise",
- "Reshape", "Pooling", "Clamp"};
- for (const std::string& standaloneLayerType : standaloneLayerTypes) {
- renameLayersByType(context.getLayers(), standaloneLayerType);
- }
-
- size_t fakeQuantizeNumber = 1;
- for (size_t i = 0lu; i < context.getLayers().size(); ++i) {
- const CNNLayerPtr layer = context.getLayers()[i];
- if (layer->type != "FakeQuantize") {
- continue;
- }
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*layer);
- if ((children.size() == 1) && (children[0]->type == "Convolution")) {
- const std::string postfix = CNNNetworkHelper::getIndex(*layer) == 0 ? "data" : "weights";
- layer->name = children[0]->name + "_FakeQuantize_" + postfix;
- } else {
- layer->name = layer->type + std::to_string(fakeQuantizeNumber);
- ++fakeQuantizeNumber;
- }
- }
-
- size_t otherNumber = 1;
- for (size_t i = 0; i < context.getLayers().size(); ++i) {
- std::string name;
- const CNNLayerPtr layer = context.getLayers()[i];
- if ((standaloneLayerTypes.find(layer->type) != standaloneLayerTypes.end()) || (layer->type == "FakeQuantize")) {
- continue;
- }
-
- if (layer->type == "Const") {
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*layer);
- if (children.size() == 1) {
- if (children[0]->type == "Convolution") {
- const std::string postfix = CNNNetworkHelper::getIndex(*layer) == 1 ? "weights" : "biases";
- name = children[0]->name + "_Const_" + postfix;
- } else if (children[0]->type == "FakeQuantize") {
- name = children[0]->name + "_Const_" + std::to_string(CNNNetworkHelper::getIndex(*layer));
- }
- }
- }
-
- if (name.empty()) {
- name = layer->type + std::to_string(otherNumber);
- ++otherNumber;
- }
-
- layer->name = name;
- }
-}
-
-void LowPrecisionTransformer::transform(ICNNNetwork& network) {
- OV_ITT_SCOPED_TASK(itt::domains::LPT, "LowPrecisionTransformer::transform");
-
-#ifdef LPT_ORIGINAL_MODEL_PATH
- ResponseDesc originalModelResponse;
- network.serialize(
- std::string(LPT_ORIGINAL_MODEL_PATH) + ".xml",
- std::string(LPT_ORIGINAL_MODEL_PATH) + ".bin",
- &originalModelResponse);
- if (originalModelResponse.msg[0] != '\0') {
- THROW_IE_EXCEPTION << "LowPrecisionTransformer::transform: " << LPT_ORIGINAL_MODEL_PATH << ": " << originalModelResponse.msg;
- }
-#endif
- auto it = details::CNNNetworkIterator(&network);
- auto end = details::CNNNetworkIterator();
- bool fqFound = false;
- bool allFQareUnsupported = true;
- while (it != end) {
- if (CaselessEq<std::string>()((*it)->type, "FakeQuantize")) {
- fqFound = true;
- if (QuantizationDetails::isSupportedLevel((*it)->GetParamAsUInt("levels"))) {
- allFQareUnsupported = false;
- break;
- }
- }
- it++;
- }
- // If network does not have FakeQuantize layers
- // or all found FQ layers are binary - do nothing and return
- if (!fqFound || allFQareUnsupported) return;
-
- transformations.setParamsManager(this);
- transformations.setLayerTransformationsManager(this);
-
- TransformationContext context(network);
-
- // TODO: branch specific transformations execution
- for (size_t i = 0lu; i < context.getLayers().size(); ++i) {
- const CNNLayerPtr layer = context.getLayers()[i];
- if (layer == nullptr) {
- continue;
- }
-
- std::string type = layer->type;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
- const auto it = transformations.branchSpecificTransformations.find(type);
- if (it == transformations.branchSpecificTransformations.end()) {
- continue;
- }
- it->second->transform(context, *layer);
- }
-
- // Step #1: FakeQuantize layer transformation execution
- LayerTransformationPtr fqTransformation = transformations.find("FakeQuantize");
- if (fqTransformation == nullptr) {
- THROW_IE_EXCEPTION << "FakeQuantize transformation was not found";
- }
- for (size_t i = 0lu; i < context.getLayers().size(); ++i) {
- const CNNLayerPtr layer = context.getLayers()[i];
- if (layer == nullptr) {
- continue;
- }
-
- if (CaselessEq<std::string>()(layer->type, "FakeQuantize")) {
- fqTransformation->transform(context, *layer);
- }
- }
-
- // Step #2: layer transformations execution
- for (size_t i = 0; i < context.getLayers().size(); ++i) {
- const CNNLayerPtr layer = context.getLayers()[i];
- if (layer == nullptr) {
- continue;
- }
-
- bool transformed;
-
- std::string type = layer->type;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
- const auto it = transformations.transformations.find(type);
- if (it != transformations.transformations.end()) {
- it->second->transform(context, *layer);
- transformed = true;
- }
-
-#ifdef DISPLAY_PECISION
- CNNLayerPtr transformedLayer = CNNNetworkHelper::getLayer(context.network, layer->name);
- if (transformedLayer == nullptr) {
- if (layer->type == "FakeQuantize") {
- std::cout << "Layer " << layer->name << ": " << QuantizationDetails::getDetails(*layer) << std::endl;
- }
-
- std::cout << "Layer was " << (transformed ? "transformed: " : "skipped: ") << layer->type << ", "
- << layer->name << ": [REMOVED]" << std::endl;
- } else {
- if (transformedLayer->type == "FakeQuantize") {
- std::cout << "Layer " << transformedLayer->name << ": "
- << QuantizationDetails::getDetails(*transformedLayer) << std::endl;
- }
-
- std::cout << "Layer was " << (transformed ? "transformed: " : "skipped: ") << transformedLayer->type << ", "
- << transformedLayer->name << ", output layer precision: "
- << ((transformedLayer->outData.size() != 0) ? transformedLayer->outData[0]->getPrecision()
- : Precision::UNSPECIFIED)
- << std::endl;
- }
-
-#endif
- }
-
- // Step #3: cleanup transformations execution
- for (size_t i = 0; i < context.getLayers().size(); ++i) {
- const CNNLayerPtr layer = context.getLayers()[i];
- if (layer == nullptr) {
- continue;
- }
-
- std::string type = layer->type;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
- const auto it = transformations.cleanupTransformations.find(type);
- if (it != transformations.cleanupTransformations.end()) {
- it->second->transform(context, *layer);
- }
- }
-
-#ifdef LPT_TRANSFORMED_MODEL_PATH
- ResponseDesc transformedModelResponse;
- network.serialize(
- std::string(LPT_TRANSFORMED_MODEL_PATH) + ".xml",
- std::string(LPT_TRANSFORMED_MODEL_PATH) + ".bin",
- &transformedModelResponse);
- if (transformedModelResponse.msg[0] != '\0') {
- THROW_IE_EXCEPTION << "LowPrecisionTransformer::transform: " << LPT_TRANSFORMED_MODEL_PATH << ": " << transformedModelResponse.msg;
- }
-#endif
-}
-
-std::vector<Precision> LowPrecisionTransformer::getPrecisionsOnActivations(const std::string& layerType) const noexcept {
- std::string type = layerType;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- const LayerTransformationPtr transformation = transformations.find(type);
- if (transformation == nullptr) {
- return std::vector<Precision>();
- }
- return transformation->getPrecisionsOnActivations();
-}
-
-bool LowPrecisionTransformer::isQuantized(const CNNLayer& layer) const noexcept {
- std::string type = layer.type;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- const LayerTransformationPtr transformation = transformations.find(type);
- if (transformation == nullptr) {
- return false;
- }
- return transformation->isQuantized(layer);
-}
-
-bool LowPrecisionTransformer::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
- std::string type = layer.type;
- std::transform(type.begin(), type.end(), type.begin(), ::tolower);
-
- const LayerTransformationPtr transformation = transformations.find(type);
- if (transformation == nullptr) {
- return false;
- }
- return transformation->isPrecisionPreserved(layer);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/transparent_base_transformation.hpp"
-
-#include <algorithm>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void TransparentBaseTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
- const CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(layer, 0);
- if (scaleShift == nullptr) {
- return;
- }
-
- if (scaleShift->type == "Concat") {
- if (updatePrecisions) {
- // TODO: looks like as workaround for Concat -> Pooling -> Concat: refactor later
- CNNNetworkHelper::setOutDataPrecision(layer, CNNNetworkHelper::getPrecisionParent(layer, 0ul));
- }
- } else if (scaleShift->type == "ScaleShift") {
- if (updatePrecisions) {
- CNNNetworkHelper::setOutDataPrecision(layer, getPrecisionBeforeParentDequantizationScaleShift(layer));
- }
-
- std::vector<float> scales;
- std::vector<float> shifts;
- fillFromDequantizationLayer(*scaleShift, scales, shifts);
-
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(layer);
- if (outputChannelsCount != CNNNetworkHelper::getInputChannelsCount(layer)) {
- if (!DequantizationDetails::isPerTensor(scales, shifts)) {
- THROW_IE_LPT_EXCEPTION(layer) << "input and output channels count values are different for per channel quantization";
- }
- scales = std::vector<float>(outputChannelsCount, scales[0]);
- shifts = std::vector<float>(outputChannelsCount, shifts[0]);
- }
-
- CNNNetworkHelper::removeLayer(context.network, scaleShift);
- context.removeLayer(*scaleShift);
-
- addDequantizationLayer(context, layer, scales, shifts);
- }
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformations/weightable_layer_transformation.hpp"
-
-#include <algorithm>
-#include <caseless.hpp>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "low_precision_transformations/common/ie_lpt_exception.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-std::shared_ptr<float> broadcastActivations(const size_t batchSize, const std::vector<float>& values) {
- std::shared_ptr<float> valuesPtr(new float[values.size()], std::default_delete<float[]>());
- float* valuesRaw = valuesPtr.get();
- std::copy(values.begin(), values.end(), valuesRaw);
- return valuesPtr;
-}
-
-std::shared_ptr<float> broadcastWeights(const size_t filtersCount, const std::vector<float>& shiftsPerOuputChannel) {
- std::shared_ptr<float> valuesPtr(new float[shiftsPerOuputChannel.size()], std::default_delete<float[]>());
- float* valuesRaw = valuesPtr.get();
- std::copy(shiftsPerOuputChannel.begin(), shiftsPerOuputChannel.end(), valuesRaw);
- return valuesPtr;
-}
-
-void fillConstBlob(CNNLayer& layer, const std::vector<float>& values) {
- Blob::Ptr newBlob = CNNNetworkHelper::makeNewBlobPtr(layer.outData[0]->getTensorDesc());
- newBlob->allocate();
- CNNNetworkHelper::fillBlobByFP32(newBlob, values.data());
- layer.blobs["custom"] = newBlob;
-}
-
-bool WeightableLayerTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const {
- if (!LayerTransformation::canBeTransformed(context, layer)) {
- return false;
- }
-
- if ((layer.insData.size() == 0) && (layer.insData.size() > 3)) {
- THROW_IE_EXCEPTION << "layer inputs '" << layer.insData.size() << "' is not correct";
- }
-
- if (layer.outData.size() != 1) {
- THROW_IE_EXCEPTION << "layer outputs '" << layer.outData.size() << "' is not correct";
- }
-
- const CNNLayerPtr scaleShiftLayer = CNNNetworkHelper::getParent(layer, 0);
- if (!scaleShiftLayer) {
- THROW_IE_EXCEPTION << "input is absent";
- }
-
- // TODO: check if scaleshift is dequantization
- // (context.dequantizationLayersNames.find(scaleShiftLayer->name) == context.dequantizationLayersNames.end())
- if (scaleShiftLayer->type != "ScaleShift") {
- return false;
- }
-
- const bool isDepthwiseConvolution = isDepthwise(layer);
- if (!isDepthwiseConvolution) {
- // TODO: move scale values validation to standalone method for FullyConnected & GEMM
- const Blob::Ptr scalesBlob = CNNNetworkHelper::getBlob(scaleShiftLayer, "weights");
- const auto scalesBuffer = CNNNetworkHelper::getFloatData(scalesBlob);
- for (size_t i = 1lu; i < scalesBlob->size(); ++i) {
- if (scalesBuffer.get()[i - 1] != scalesBuffer.get()[i]) {
- return false;
- }
- }
- }
-
- const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1);
- if (parentOnWeights == nullptr) {
- return false;
- }
-
- OutputsDataMap outputsInfo;
- context.network.getOutputsInfo(outputsInfo);
- if (outputsInfo.find(parentOnWeights->name) != outputsInfo.end()) return false;
-
- const std::vector<CNNLayerPtr> weightsChildren = CNNNetworkHelper::getChildren(*parentOnWeights);
- if ((weightsChildren.size() != 1lu) || (CaselessEq<std::string>()(parentOnWeights->type, "Const") &&
- (parentOnWeights->outData[0]->getPrecision() != Precision::I8))) {
- return false;
- }
-
- return true;
-}
-
-bool WeightableLayerTransformation::isQuantized(const CNNLayer& layer) const noexcept {
- if (!CNNNetworkHelper::isWeightsSupported(layer)) {
- return false;
- }
-
- if (CNNNetworkHelper::isQuantizedConstWeights(layer)) {
- const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(layer, roundQuantizedValues);
- if ((weightsBlob == nullptr) || (!CNNNetworkHelper::isBlobPrecisionSupported(weightsBlob->getTensorDesc().getPrecision()))) {
- return false;
- }
-
-
- const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(layer);
- if ((biasesBlob != nullptr) && (!CNNNetworkHelper::isBlobPrecisionSupported(biasesBlob->getTensorDesc().getPrecision()))) {
- return false;
- }
-
- const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1);
- if (parentOnWeights == nullptr) {
- return false;
- }
-
- if (parentOnWeights->type != "FakeQuantize") {
- const Precision precision = parentOnWeights->outData[0]->getPrecision();
- if ((precision != Precision::I8) && (precision != Precision::U8)) {
- return false;
- }
- }
- }
-
- return true;
-}
-
-bool WeightableLayerTransformation::isPrecisionPreserved(const CNNLayer& layer) const noexcept {
- return false;
-}
-
-bool WeightableLayerTransformation::getDequantizationDimIsSupported(const CNNLayer& fullyConnected) {
- const DataPtr inputData = fullyConnected.insData[0].lock();
- if (inputData == nullptr) {
- THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
- }
-
- return inputData->getDims().size() != 3ul;
-}
-
-void WeightableLayerTransformation::updateLayerBiases(
- TransformationContext& context,
- const CNNLayer& weightableLayer,
- const bool biasesDimsAsOutput,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts,
- std::vector<float>& biasesShifts) const {
- const bool dequantizationShiftsAreZero = std::all_of(
- dequantizationShifts.begin(),
- dequantizationShifts.end(),
- [](float value) { return value == 0.0; });
-
- const bool dequantizationDimIsNotSupported = !getDequantizationDimIsSupported(weightableLayer);
- CNNLayerPtr biasesLayer = CNNNetworkHelper::getParent(weightableLayer, 2);
-
- // we need to correct biases if dequantization shifts values are not zero or
- // dequantization dimention is not supported (as result dequantization shifts can not be calculated)
- if ((dequantizationDimIsNotSupported && (biasesLayer != nullptr)) || (!dequantizationShiftsAreZero)) {
- const DataPtr insData = weightableLayer.insData[0].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(weightableLayer) << "input data is absent";
- }
- const std::vector<size_t> insDataDims = insData->getTensorDesc().getDims();
-
- std::shared_ptr<float> biasesBufferPtr;
- Blob::Ptr biasesBlob;
- if (biasesLayer == nullptr) {
- if (weightableLayer.outData.size() != 1ul) {
- THROW_IE_LPT_EXCEPTION(weightableLayer) << "unexpected output data count " << weightableLayer.outData.size();
- }
- const DataPtr outData = weightableLayer.outData[0];
- const std::vector<size_t> biasesDims = biasesDimsAsOutput ?
- outData->getDims() :
- std::vector<size_t>({ insDataDims.size() == 3ul ? insDataDims[2] : dequantizationShifts.size() });
- const Layout biasesLayout = InferenceEngine::TensorDesc::getLayoutByDims(biasesDims);
-
- biasesBlob = CNNNetworkHelper::makeNewBlobPtr(TensorDesc(Precision::FP32, biasesDims, biasesLayout));
- biasesBlob->allocate();
-
- biasesBufferPtr = CNNNetworkHelper::getFloatData(biasesBlob);
- float* biasesBuffer = biasesBufferPtr.get();
- std::fill(biasesBuffer, biasesBuffer + biasesBlob->size(), 0.f);
-
- LayerParams biasesLayerParams{ weightableLayer.name + "_Biases", "Const", outData->getTensorDesc().getPrecision() };
- biasesLayer = CNNNetworkHelper::addLayer(
- context,
- nullptr,
- std::make_shared<CNNLayer>(weightableLayer),
- std::make_shared<CNNLayer>(biasesLayerParams));
- biasesLayer->blobs["custom"] = biasesBlob;
- biasesLayer->outData[0]->reshape(biasesDims, biasesLayout);
- } else {
- biasesBlob = CNNNetworkHelper::getBlob(biasesLayer, "custom");
- DataPtr insData = weightableLayer.insData[0].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(weightableLayer) << "input data is absent";
- }
-
- if ((insData->getDims().size() != 3) && (biasesBlob->size() != dequantizationShifts.size())) {
- THROW_IE_LPT_EXCEPTION(weightableLayer) <<
- "dequantization shifts size " << dequantizationShifts.size() <<
- " is not equal biases blob size " << biasesBlob->size();
- }
- biasesBufferPtr = CNNNetworkHelper::getFloatData(biasesBlob);
- }
- const float* biasesBuffer = biasesBufferPtr.get();
- std::vector<float> biases(biasesBlob->size());
- const bool broadcast = insDataDims.size() == 3ul;
- for (size_t channel = 0ul; channel < biases.size(); ++channel) {
- biases[channel] = broadcast ?
- (biasesShifts[0] + biasesBuffer[channel]) / dequantizationScales[0] :
- (biasesShifts[channel] + biasesBuffer[channel]) / dequantizationScales[channel];
- }
- std::fill(dequantizationShifts.begin(), dequantizationShifts.end(), 0.f);
- CNNNetworkHelper::updateBlobs(*biasesLayer, "custom", biases);
- }
-}
-
-void WeightableLayerTransformation::updateLayerBiasesFcSpecific(
- TransformationContext& context,
- const CNNLayer& weightableLayer,
- const bool biasesDimsAsOutput,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts,
- std::vector<float>& biasesShifts) const {
- CNNLayerPtr biasesLayer = CNNNetworkHelper::getParent(weightableLayer, 2);
- if (biasesLayer == nullptr) {
- return;
- }
-
- Blob::Ptr biasesBlob = CNNNetworkHelper::getBlob(biasesLayer, "custom");
- DataPtr insData = weightableLayer.insData[0].lock();
- if (insData == nullptr) {
- THROW_IE_LPT_EXCEPTION(weightableLayer) << "input data is absent";
- }
-
- if ((insData->getDims().size() != 3) && (biasesBlob->size() != dequantizationShifts.size())) {
- THROW_IE_LPT_EXCEPTION(weightableLayer) <<
- "dequantization shifts size " << dequantizationShifts.size() <<
- " is not equal biases blob size " << biasesBlob->size();
- }
- std::shared_ptr<float> biasesBufferPtr = CNNNetworkHelper::getFloatData(biasesBlob);
-
- const float* biasesBuffer = biasesBufferPtr.get();
- std::vector<float> biases(biasesBlob->size());
- for (size_t i = 0ul; i < biases.size(); ++i) {
- biases[i] = biasesBuffer[i] / dequantizationScales[0];
- }
- std::fill(dequantizationShifts.begin(), dequantizationShifts.end(), 0.f);
-
- CNNNetworkHelper::updateBlobs(*biasesLayer, "custom", biases);
-}
-
-void WeightableLayerTransformation::updateWeights(TransformationContext& context, const CNNLayerPtr parent, std::vector<float>& outputLowValues,
- std::vector<float>& outputHighValues) const {
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*parent);
- // TODO: refactor: move to standalone method
- switch (quantizedTensorAlignmentOnWeights) {
- case LayerTransformation::QuantizedTensorAlignment::None: {
- CNNNetworkHelper::updateBlobs(context, *parent, 3, outputLowValues);
- CNNNetworkHelper::updateBlobs(context, *parent, 4, outputHighValues);
- break;
- }
- case LayerTransformation::QuantizedTensorAlignment::UpdateIntervals:
- case LayerTransformation::QuantizedTensorAlignment::UpdateLevel: {
- THROW_IE_EXCEPTION << "not implemented for weights " << quantizedTensorAlignmentOnWeights;
- }
- case LayerTransformation::QuantizedTensorAlignment::Mixed: {
- float minOutputIntervalLowValue = 0.0;
- float maxOutputIntervalHighValue = 0.0;
-
- for (size_t i = 0lu; i < quantizationDetails.outputLowValues.size(); ++i) {
- const float outputInterval = fabs(outputHighValues[i] - outputLowValues[i]);
- if (std::isinf(outputInterval)) {
- continue;
- }
-
- if (minOutputIntervalLowValue < fabs(outputLowValues[i])) {
- minOutputIntervalLowValue = fabs(outputLowValues[i]);
- }
- if (maxOutputIntervalHighValue < outputHighValues[i]) {
- maxOutputIntervalHighValue = outputHighValues[i];
- }
- }
-
- if (quantizationDetails.inputIntervalsCount != 1) {
- // TODO: complete later
- THROW_IE_EXCEPTION << "multi input interval temporary is not supported, layer " << parent->name;
- }
-
- std::vector<float> inputLowValues(quantizationDetails.outputIntervalsCount);
- std::vector<float> inputHighValues(quantizationDetails.outputIntervalsCount);
- for (size_t i = 0; i < quantizationDetails.outputIntervalsCount; ++i) {
- const float minK = outputLowValues[i] == 0.0 ? 0.0 : (minOutputIntervalLowValue / fabs(outputLowValues[i]));
- inputLowValues[i] = quantizationDetails.getInputLowValue(i) * minK;
- outputLowValues[i] = roundf(outputLowValues[i] * minK);
-
- const float maxK =
- outputHighValues[i] == 0.0 ? 0.0 : (maxOutputIntervalHighValue / fabs(outputHighValues[i]));
- inputHighValues[i] = quantizationDetails.getInputHighValue(i) * maxK;
- outputHighValues[i] = roundf(outputHighValues[i] * maxK);
- }
-
- CNNNetworkHelper::updateBlobs(context, *parent, 1, inputLowValues);
- CNNNetworkHelper::updateBlobs(context, *parent, 2, inputHighValues);
- CNNNetworkHelper::updateBlobs(context, *parent, 3, outputLowValues);
- CNNNetworkHelper::updateBlobs(context, *parent, 4, outputHighValues);
-
- const size_t levels = static_cast<size_t>(roundf(minOutputIntervalLowValue + maxOutputIntervalHighValue + 1.0));
- parent->params["levels"] = std::to_string(levels);
- QuantizeLayer* fakeQuantizeLayer = dynamic_cast<QuantizeLayer*>(parent.get());
- if (fakeQuantizeLayer == nullptr) {
- THROW_IE_EXCEPTION << "incorrect type for layer " << parent->name;
- }
- fakeQuantizeLayer->levels = levels;
-
- break;
- }
- default: {
- THROW_IE_EXCEPTION << "unexpected value " << quantizedTensorAlignmentOnWeights;
- }
- }
-}
-
-void WeightableLayerTransformation::updateToSupportAsymmetricQuantization(
- TransformationContext& context,
- const CNNLayer& layer,
- const PrecisionsInfo& dataPrecisionsInfo,
- std::vector<float>& dataShifts,
- const PrecisionsInfo& weightsPrecisionsInfo,
- std::vector<float>& weightsShifts) const {
- const CNNLayerPtr parentOnData = CNNNetworkHelper::getParent(layer, 0ul);
- if (parentOnData->type == "ScaleShift") { // FIXME: it is always true
- const std::shared_ptr<float> dataConvertedInBlob = CNNNetworkHelper::convertFloatData(
- dataShifts.data(),
- dataShifts.size(),
- dataPrecisionsInfo.low);
- if (!std::all_of(dataConvertedInBlob.get(), dataConvertedInBlob.get() + dataShifts.size(), [](float value) { return value == 0.0; })) {
- createAsymmetric(context, *parentOnData, layer, dataPrecisionsInfo, dataShifts, false);
- }
-
- const std::shared_ptr<float> weightsConvertedInBlob = CNNNetworkHelper::convertFloatData(
- weightsShifts.data(),
- weightsShifts.size(),
- weightsPrecisionsInfo.low);
- if (!std::all_of(weightsConvertedInBlob.get(), weightsConvertedInBlob.get() + weightsShifts.size(), [](float value) { return value == 0.0; })) {
- const CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1ul);
- const bool onWeights = CNNNetworkHelper::isQuantizedConstWeights(layer);
- createAsymmetric(context, *parentOnWeights, layer, weightsPrecisionsInfo, weightsShifts, onWeights);
- }
- }
-}
-
-void WeightableLayerTransformation::createAsymmetric(TransformationContext& context, const CNNLayer& parent,
- const CNNLayer& child, const PrecisionsInfo& precisionsInfo,
- const std::vector<float>& quantizationShifts,
- const bool onWeights) const {
- if (onWeights && (parent.type != "FakeQuantize")) {
- THROW_IE_EXCEPTION << "unexpected layer type on weights " << parent.type;
- }
-
- if (child.insData.size() < 1ul) {
- THROW_IE_EXCEPTION << "unexpected layer '" << child.name << "' inputs size " << child.insData.size();
- }
-
- const DataPtr insData = child.insData[0].lock();
- if (insData == nullptr) {
- THROW_IE_EXCEPTION << "insert data is absent for layer " << child.name;
- }
-
- const size_t dimsSize = insData->getDims().size();
- if ((dimsSize != 2ul) && (dimsSize != 3ul) && (dimsSize != 4ul) && (dimsSize != 5ul)) {
- THROW_IE_EXCEPTION << "unexpected dimensions size " << dimsSize << " layer " << child.type << " " << child.name;
- }
-
- LayerParams eltwiseLayerParams {child.name + "_Sub_" + parent.name, "Eltwise", precisionsInfo.original};
- std::shared_ptr<EltwiseLayer> eltwiseLayer = std::make_shared<EltwiseLayer>(eltwiseLayerParams);
- eltwiseLayer->_operation = EltwiseLayer::eOperation::Sub;
- eltwiseLayer->params["operation"] = "sub";
- CNNNetworkHelper::addLayer(context, std::make_shared<CNNLayer>(parent), std::make_shared<CNNLayer>(child),
- eltwiseLayer);
- if (updatePrecisions) {
- CNNNetworkHelper::setOutDataPrecision({eltwiseLayer}, precisionsInfo.original);
- }
-
- LayerParams constLayerParams {child.name + "_Const_" + parent.name, "Const",
- updatePrecisions ? precisionsInfo.low : precisionsInfo.original};
- CNNLayerPtr constLayer = std::make_shared<CNNLayer>(constLayerParams);
- constLayer = CNNNetworkHelper::addLayer(context, nullptr, eltwiseLayer, constLayer);
- if (updatePrecisions) {
- CNNNetworkHelper::setOutDataPrecision({constLayer}, precisionsInfo.low);
- }
-
- const TensorDesc constTensorDesc = constLayer->outData[0]->getTensorDesc();
- if ((dimsSize != 3) && (constTensorDesc.getLayout() != insData->getTensorDesc().getLayout())) {
- THROW_IE_EXCEPTION << "unexpected Const layer layout " << constTensorDesc.getLayout();
- }
- const SizeVector& constDims = constTensorDesc.getDims();
- if ((dimsSize != 3) && (constDims.size() != insData->getTensorDesc().getDims().size())) {
- THROW_IE_EXCEPTION << "unexpected dimension size " << constDims.size();
- }
-
- SizeVector dims(constLayer->outData[0]->getTensorDesc().getDims().size(), 1);
- if (onWeights) {
- dims[0] = constDims[0];
- } else {
- dims[1] = constDims[1];
- }
- constLayer->outData[0]->setDims(dims);
-
- fillConstBlob(*constLayer, quantizationShifts);
-}
-
-DataPrecision WeightableLayerTransformation::fillDequantizationsForWeightsPath(
- TransformationContext& context,
- const CNNLayer& weightableLayer,
- const bool supportAsymmetricQuantization,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const {
- if ((!CaselessEq<std::string>()(weightableLayer.type, "Convolution")) &&
- (!CaselessEq<std::string>()(weightableLayer.type, "FullyConnected")) &&
- (!CaselessEq<std::string>()(weightableLayer.type, "Gemm"))) {
- THROW_IE_EXCEPTION << "layer '" << weightableLayer.name << "' has unexpected type '" << weightableLayer.type << "'";
- }
-
- if (weightableLayer.insData.size() < 2) {
- return DataPrecision();
- }
-
- const DataPtr data = weightableLayer.insData[1].lock();
- if (data == nullptr) {
- THROW_IE_EXCEPTION << "Dequantization ScaleShift layer on weight is absent";
- }
-
- const CNNLayerPtr parent = CNNNetworkHelper::getParent(weightableLayer, 1);
- if (parent->type != "FakeQuantize") {
- THROW_IE_EXCEPTION << "Unexpected dequantization layer type " << parent->type;
- }
-
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*parent);
- const DataPrecision dataPrecision = getDataPrecision(*parent, quantizationDetails, true, supportAsymmetricQuantization);
- fillFromQuantizationDetails(
- quantizationDetails,
- dataPrecision,
- dequantizationScales,
- dequantizationShifts);
-
- if ((!supportAsymmetricQuantization) && (
- std::any_of(dequantizationShifts.begin(), dequantizationShifts.end(), [](const float value) { return value != 0.f; }))) {
- return DataPrecision();
- }
-
- // TODO: going to update network: extract update weights from this method
- std::vector<float> outputLowValues(quantizationDetails.outputIntervalsCount);
- std::vector<float> outputHighValues(quantizationDetails.outputIntervalsCount);
- for (size_t i = 0; i < quantizationDetails.outputIntervalsCount; ++i) {
- if (supportAsymmetricQuantization) {
- outputLowValues[i] = dataPrecision.min;
- outputHighValues[i] = dataPrecision.max;
- } else {
- outputLowValues[i] = quantizationDetails.getOutputLowValue(i) / dequantizationScales[i];
- outputHighValues[i] = quantizationDetails.getOutputHighValue(i) / dequantizationScales[i];
- }
- }
-
- updateWeights(context, parent, outputLowValues, outputHighValues);
- return dataPrecision;
-}
-
-bool WeightableLayerTransformation::isDepthwise(const CNNLayer& layer) {
- if (layer.type != "Convolution") {
- return false;
- }
-
- if (!layer.CheckParamPresence("group")) {
- return false;
- }
-
- const size_t group = layer.GetParamAsUInt("group");
- const size_t inputChannelsCount = CNNNetworkHelper::getInputChannelsCount(layer);
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(layer);
- return (group == inputChannelsCount) && (inputChannelsCount == outputChannelsCount);
-}
-
-void WeightableLayerTransformation::calculateDequantizationForSymmetric(
- const CNNLayer& convolution,
- const std::vector<float>& originalDataDequantizationScales,
- const std::vector<float>& originalDataDequantizationShifts,
- const std::vector<float>& originalWeightsDequantizationScales,
- const std::vector<float>& originalWeightsDequantizationShifts,
- std::vector<float>& dequantizationScales,
- std::vector<float>& dequantizationShifts) const {
- const size_t outputChannelCount = CNNNetworkHelper::getOutputChannelsCount(convolution);
- dequantizationScales.resize(outputChannelCount);
- dequantizationShifts.resize(outputChannelCount);
-
- const Blob::Ptr convolutionWeightsBlob = CNNNetworkHelper::getWeights(convolution, roundQuantizedValues);
- const auto convolutionWeightsBuffer = CNNNetworkHelper::getFloatData(convolutionWeightsBlob);
-
- const Blob::Ptr convolutionBiasesBlob = CNNNetworkHelper::getBiases(convolution);
- const auto convolutionBiasesBuffer = convolutionBiasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(convolutionBiasesBlob);
-
-
- for (size_t i = 0lu; i < dequantizationScales.size(); ++i) {
- const float originalWeightsDequantizationScale = originalWeightsDequantizationScales.size() == 0
- ? 1.0 : (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]);
- dequantizationScales[i] = originalDataDequantizationScales[0] * originalWeightsDequantizationScale;
- }
-
- const size_t inputChannelCount = CNNNetworkHelper::getInputChannelsCount(convolution);
- const size_t kernelSize = CNNNetworkHelper::getKernelSize(convolution);
-
- const size_t group = convolution.GetParamAsUInt("group", 1lu);
- const float originalDataDequantizationScale = originalDataDequantizationScales[0];
-
- const size_t outputChannelsInGroup = outputChannelCount / group;
- const size_t inputChannelsInGroup = inputChannelCount / group;
- const size_t filterSize = inputChannelsInGroup * kernelSize;
-
- for (size_t outputChannel = 0lu; outputChannel < outputChannelCount; ++outputChannel) {
- float sum = 0.0;
- const float originalWeightsDequantizationScale = originalWeightsDequantizationScales.size() == 0lu ?
- 1.0 :
- (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[outputChannel]);
- const size_t outputChannelFilterOffset = outputChannel * filterSize;
-
- const size_t beginInputChannel = (outputChannel / outputChannelsInGroup) * inputChannelsInGroup;
- const size_t endInputChannel = beginInputChannel + inputChannelsInGroup;
- for (size_t inputChannel = beginInputChannel; inputChannel < endInputChannel; ++inputChannel) {
- const float originalDataDequantizationShift = originalDataDequantizationShifts[inputChannel];
- const size_t inputChannelKernelOffset = outputChannelFilterOffset + (inputChannel - beginInputChannel) * kernelSize;
- for (size_t kernelIndex = 0lu; kernelIndex < kernelSize; ++kernelIndex) {
- const float kernel = convolutionWeightsBuffer.get()[inputChannelKernelOffset + kernelIndex];
- sum += kernel * originalDataDequantizationShift * originalWeightsDequantizationScale;
- }
- }
-
- dequantizationShifts[outputChannel] = convolutionBiasesBuffer == nullptr
- ? sum :
- (sum + convolutionBiasesBuffer.get()[outputChannel] -
- convolutionBiasesBuffer.get()[outputChannel] * originalDataDequantizationScale * originalWeightsDequantizationScale);
- }
-}
target_compile_definitions(${TARGET_NAME} PUBLIC -DMKLDNN_THR=${MKLDNN_THR})
target_link_libraries(${TARGET_NAME} PRIVATE mkldnn inference_engine inference_engine_legacy
- inference_engine_transformations)
-
-if(USE_CNNNETWORK_LPT)
- target_link_libraries(${TARGET_NAME} PRIVATE inference_engine_lp_transformations_legacy)
-else()
- target_link_libraries(${TARGET_NAME} PRIVATE inference_engine_lp_transformations)
-endif()
+ inference_engine_transformations inference_engine_lp_transformations)
# Cross compiled function
# TODO: The same for proposal, proposalONNX, topk
target_include_directories(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:inference_engine_preproc_s,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
- $<TARGET_PROPERTY:openvino::itt,INTERFACE_INCLUDE_DIRECTORIES>)
-
-if(USE_CNNNETWORK_LPT)
- target_include_directories(${TARGET_NAME}_obj PRIVATE
- $<TARGET_PROPERTY:inference_engine_lp_transformations_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
- target_compile_definitions(${TARGET_NAME}_obj PRIVATE
- $<TARGET_PROPERTY:inference_engine_lp_transformations_legacy,INTERFACE_COMPILE_DEFINITIONS>)
-else()
- target_include_directories(${TARGET_NAME}_obj PRIVATE
- $<TARGET_PROPERTY:inference_engine_lp_transformations,INTERFACE_INCLUDE_DIRECTORIES>)
-endif()
+ $<TARGET_PROPERTY:openvino::itt,INTERFACE_INCLUDE_DIRECTORIES>
+ $<TARGET_PROPERTY:inference_engine_lp_transformations,INTERFACE_INCLUDE_DIRECTORIES>)
set_ie_threading_interface_for(${TARGET_NAME}_obj)
#include <legacy/graph_tools.hpp>
#include <threading/ie_executor_manager.hpp>
-#ifdef USE_CNNNETWORK_LPT
-#include "low_precision_transformations/convolution.hpp"
-#include "low_precision_transformations/scaleshift_to_convolution.hpp"
-#include "low_precision_transformations/transformer.hpp"
-#endif
-
#include <threading/ie_cpu_streams_executor.hpp>
#include <ie_system_conf.h>
#include <threading/ie_thread_affinity.hpp>
_clonedNetwork = cloneNet(network);
if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) {
-#ifdef USE_CNNNETWORK_LPT
- auto params = LayerTransformation::Params(true, // updatePrecisions
- true, // quantizeOutputs
- true, // weightsToConst
- LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
- LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
- true, // roundQuantizedValues
- true, // updateBiases
- true); // supportAsymmetricQuantization
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
- add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
- remove("ScaleShift").
- remove("Power"));
- transformer.transform(*_clonedNetwork);
-#endif
-
// Check if network is INT8 or Binary.
// BF16 transformations were disabled since CPU plug-in doesn't support mixed precision execution:
// BF16 + INT8 or BF16 + BIN.
#include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
-#ifndef USE_CNNNETWORK_LPT
# include <low_precision/transformer.hpp>
# include <low_precision/convolution.hpp>
# include <low_precision/group_convolution.hpp>
# include <low_precision/multiply_to_group_convolution.hpp>
-#endif
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
#if defined(_WIN32) || defined(WIN32)
manager.run_passes(nGraphFunc);
-#ifndef USE_CNNNETWORK_LPT
using namespace ngraph::pass::low_precision;
if (conf.lpTransformsMode == Config::LPTransformsMode::On) {
auto params = LayerTransformation::Params(
transformer.transform(nGraphFunc);
}
-#endif
ngraph::pass::Manager legacyManager;
legacyManager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
unitTestUtils
mkldnn
inference_engine_transformations
+ inference_engine_lp_transformations
ADD_CPPLINT
LABELS
CPU
)
-if(USE_CNNNETWORK_LPT)
- target_link_libraries(${TARGET_NAME} PRIVATE inference_engine_lp_transformations_legacy)
-else()
- target_link_libraries(${TARGET_NAME} PRIVATE inference_engine_lp_transformations)
-endif()
-
ie_faster_build(${TARGET_NAME}
UNITY
)
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/ie_class/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/single_layer_tests/*.cpp)
-if(USE_CNNNETWORK_LPT)
- file(GLOB CLDNN_TEST_SOURCES
- ${CLDNN_TEST_SOURCES}
- ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/transformations/*.cpp)
-
- list(APPEND CLDNN_LIBS
- inference_engine_lp_transformations_legacy)
-endif()
-
list(APPEND TEST_SRC ${CLDNN_TEST_SOURCES})
list(APPEND CLDNN_LIBS
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include <gtest/gtest.h>
-#include <string>
-#include <memory>
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-
-TEST_P(SingleLayerTransformationsTest, LPT) {
-}
-
-INSTANTIATE_TEST_CASE_P(
- SingleLayerTransformationsTestFP32,
- SingleLayerTransformationsTest,
- ::testing::Values(
- //SingleLayerTransformationsTestParams(
- // "GPU",
- // SingleLayerTestModel::Ptr(new FullyConnectedAndScaleShiftsOnActivationsTestModel()),
- // { { 1, 2048 } },
- // { { 1, 1000 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndWeightsPositiveTestModel()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndWeightsNegativeTestModel()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnUnsignedActivationsAndWeightsTestModel()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndInvertedWeightsTestModel()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeReshapePoolingTestModelWithConstants()),
- { { 1, 1280, 7 } },
- { { 1, 1280, 7 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeReshapePoolingTestModelWithoutConstants()),
- { { 1, 1280, 7 } },
- { { 1, 1280, 7 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FullyConnectedAndQuantizeTestModel()),
- { { 1, 32, 1, 1 } },
- { { 1, 32, 1, 1 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FullyConnectedAndScaleShiftsOnActivationsTestModel()),
- { { 1, 2048 } },
- { { 1, 1000 } }),
-
-// SingleLayerTransformationsTestParams(
-// "GPU",
-// SingleLayerTestModel::Ptr(new GemmAndQuantizeTestModel()),
-// { { 1, 32, 149, 149 } },
-// { { 1, 32, 147, 147 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new PoolingTestModel()),
- { { 149, 149, 32, 1 } },
- { { 149, 149, 32, 1 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel()),
- { { 1, 32, 147, 147 } },
- { { 1, 64, 147, 147 } }),
-
- // Const transformation is disabled
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnWeightsWithoutConstTransformationTestModel()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndPoolingAndQuantizeOnActivationsTestModel()),
- { { 1, 64, 147, 147 } },
- { { 1, 80, 73, 73 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnActivationsTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 32, 149, 149 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndDequantizationScaleShiftsOnActivationsTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 32, 149, 149 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndDequantizationScaleShiftAndQuantizeOnActivationsTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 32, 149, 149 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionDepthwiseTestModel()),
- { { 1, 32, 112, 112 } },
- { { 1, 32, 112, 112 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionGroupedTestModel()),
- { { 1, 32, 112, 112 } },
- { { 1, 32, 112, 112 } }),
-
-// SingleLayerTransformationsTestParams(
-// "GPU",
-// SingleLayerTestModel::Ptr(new EltwiseTestModel()),
-// { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-// { { 1, 3, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new EltwiseCpuTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 3, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConcatTestModel(true, true, true)),
- { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
- { { 1, 6, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConcatTestModel(true, true, false)),
- { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
- { { 1, 6, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConcatTestModel(false)),
- { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
- { { 1, 6, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConcatMultiChannelTestModel()),
- { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
- { { 1, 6, 299, 299 } }),
-
- //SingleLayerTransformationsTestParams(
- // "GPU",
- // SingleLayerTestModel::Ptr(new ConcatMultiBranchTestModel()),
- // { { 299, 299, 3, 1 }, { 299, 299, 3, 1 } },
- // { { 299, 299, 12, 1 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new QuantizationOnWeightsTestModel()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new QuantizationOnInvertedWeightsTestModel()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeAsOutputTest()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeWithMultiOutputsTest()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeAndScaleShiftTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 3, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeAndActivationTestModel({ {-10.25, 10.1641} })),
- { { 1, 3, 299, 299 } },
- { { 1, 3, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeAndActivationTestModel({ {-0.00174255, 0.00174255} })),
- { { 1, 3, 299, 299 } },
- { { 1, 3, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeAndActivationTestModel({ {-329.688, 327.188} })),
- { { 1, 3, 299, 299 } },
- { { 1, 3, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeAndActivationWithNegativeScalesTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 3, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeAndActivationWithNegativeSlopeTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 3, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ScaleShiftAndFakeQuantizeTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 3, 299, 299 } })
-
- ),
- SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
-
-
-INSTANTIATE_TEST_CASE_P(
- SingleLayerTransformationsTestFP16,
- SingleLayerTransformationsTest,
- ::testing::Values(
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FullyConnectedAndScaleShiftsOnActivationsTestModel()),
- { { 1, 2048 } },
- { { 1, 1000 } },
- "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FullyConnectedAndQuantizeTestModel()),
- { { 1, 32, 1, 1 } },
- { { 1, 32, 1, 1 } },
- "FP16"),
-
- // TODO: uncomment after fix
- //SingleLayerTransformationsTestParams(
- // "GPU",
- // SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndWeightsTestModel()),
- // { { 1, 32, 149, 149 } },
- // { { 1, 32, 147, 147 } },
- // "FP16"),
-
- // TODO: uncomment after fix
- //SingleLayerTransformationsTestParams(
- // "GPU",
- // SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnUnsignedActivationsAndWeightsTestModel()),
- // { { 1, 32, 149, 149 } },
- // { { 1, 32, 147, 147 } },
- // "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeReshapePoolingTestModelWithConstants()),
- { { 1, 1280, 7 } },
- { { 1, 1280, 7 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeReshapePoolingTestModelWithoutConstants()),
- { { 1, 1280, 7 } },
- { { 1, 1280, 7 } }),
-
-
- //Not parametrized yet. Executed on FP32
-
- //SingleLayerTransformationsTestParams(
- // "GPU",
- // SingleLayerTestModel::Ptr(new FullyConnectedAndQuantizeTestModel()),
- // { { 1, 32, 149, 149 } },
- // { { 1, 32, 147, 147 } },
- // "FP16"),
-
- //SingleLayerTransformationsTestParams(
- // "GPU",
- // SingleLayerTestModel::Ptr(new GemmAndQuantizeTestModel()),
- // { { 1, 32, 149, 149 } },
- // { { 1, 32, 147, 147 } },
- // "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new PoolingTestModel()),
- { { 149, 149, 32, 1 } },
- { { 149, 149, 32, 1 } },
- "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel()),
- { { 1, 32, 147, 147 } },
- { { 1, 64, 147, 147 } },
- "FP16"),
-
- // TODO: uncomment after fix
- //SingleLayerTransformationsTestParams(
- // "GPU",
- // SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnWeightsWithoutConstTransformationTestModel()),
- // { { 1, 32, 149, 149 } },
- // { { 1, 32, 147, 147 } },
- // "FP16"),
-
- // TODO: uncomment after fix
- //SingleLayerTransformationsTestParams(
- // "GPU",
- // SingleLayerTestModel::Ptr(new ConvolutionAndPoolingAndQuantizeOnActivationsTestModel()),
- // { { 1, 64, 147, 147 } },
- // { { 1, 80, 73, 73 } },
- // "FP16"),
-
- // TODO: uncomment after fix
- //SingleLayerTransformationsTestParams(
- // "GPU",
- // SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnActivationsTestModel()),
- // { { 1, 3, 299, 299 } },
- // { { 1, 32, 149, 149 } },
- // "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndDequantizationScaleShiftsOnActivationsTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 32, 149, 149 } },
- "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionAndDequantizationScaleShiftAndQuantizeOnActivationsTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 32, 149, 149 } },
- "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionDepthwiseTestModel()),
- { { 1, 32, 112, 112 } },
- { { 1, 32, 112, 112 } },
- "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConvolutionGroupedTestModel()),
- { { 1, 32, 112, 112 } },
- { { 1, 32, 112, 112 } }),
-
-// SingleLayerTransformationsTestParams(
-// "GPU",
-// SingleLayerTestModel::Ptr(new EltwiseTestModel()),
-// { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-// { { 1, 3, 299, 299 } },
-// "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new EltwiseCpuTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 3, 299, 299 } }),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConcatTestModel(true)),
- { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
- { { 1, 6, 299, 299 } },
- "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConcatTestModel(false)),
- { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
- { { 1, 6, 299, 299 } },
- "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new ConcatMultiChannelTestModel()),
- { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
- { { 1, 6, 299, 299 } }),
-
- //SingleLayerTransformationsTestParams(
- // "GPU",
- // SingleLayerTestModel::Ptr(new ConcatMultiBranchTestModel()),
- // { { 299, 299, 3, 1 }, { 299, 299, 3, 1 } },
- // { { 299, 299, 12, 1 } },
- // "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new QuantizationOnWeightsTestModel()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } },
- "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new QuantizationOnInvertedWeightsTestModel()),
- { { 1, 32, 149, 149 } },
- { { 1, 32, 147, 147 } },
- "FP16"),
-
- SingleLayerTransformationsTestParams(
- "GPU",
- SingleLayerTestModel::Ptr(new FakeQuantizeAndScaleShiftTestModel()),
- { { 1, 3, 299, 299 } },
- { { 1, 3, 299, 299 } },
- "FP16")
- ),
- SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
${CMAKE_CURRENT_SOURCE_DIR}/config_param_test/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/extensions_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/network_tests/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/normalization_tests/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/snippet_test/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/regression_tests/*.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/common_single_layer_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/graph_tools/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/io_blob_tests/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/int8_tests/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/input_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/inference_engine_regression_tests/*.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/input_tests/*.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/io_blob_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/lstm/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/common_single_layer_tests/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/ie_class/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/single_layer_tests/*.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/*.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/snippet_test/*.cpp
)
-if(USE_CNNNETWORK_LPT)
- file(GLOB MKL_DNN_TEST_SOURCES
- ${MKL_DNN_TEST_SOURCES}
- ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/network_tests/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/transformations/*.cpp)
- list(APPEND MKL_DNN_LIBS
- inference_engine_lp_transformations_legacy)
-endif()
-
list(APPEND MKL_DNN_LIBS
IESharedTests
${Boost_REGEX_LIBRARY})
ngraphFunctions
)
-if(USE_CNNNETWORK_LPT)
- file(GLOB SHARED_TESTS_SRC
+file(GLOB SHARED_TESTS_SRC
${CMAKE_CURRENT_SOURCE_DIR}/common_single_layer_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/lstm/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/graph_tools/*.cpp
- # requires legacy LPT
- ${CMAKE_CURRENT_SOURCE_DIR}/network_tests/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/transformations/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/transformations/*.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/transformations/common/*.cpp
)
- list(APPEND SHARED_LIBRARIES inference_engine_lp_transformations_legacy)
-else()
- file(GLOB SHARED_TESTS_SRC
- ${CMAKE_CURRENT_SOURCE_DIR}/common_single_layer_tests/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/lstm/*.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/graph_tools/*.cpp
- )
-endif()
add_library(${TARGET_NAME} STATIC ${SHARED_TESTS_SRC})
add_dependencies(${TARGET_NAME} inference_engine_preproc MultiDevicePlugin mock_engine)
endif()
target_include_directories(${TARGET_NAME} PUBLIC
- ${CMAKE_CURRENT_SOURCE_DIR}/network_tests
${CMAKE_CURRENT_SOURCE_DIR}/io_blob_tests
${CMAKE_CURRENT_SOURCE_DIR}/input_tests
${CMAKE_CURRENT_SOURCE_DIR}/inference_engine_regression_tests
${CMAKE_CURRENT_SOURCE_DIR}/common_single_layer_tests
${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests
${CMAKE_CURRENT_SOURCE_DIR}/graph_tools
- ${CMAKE_CURRENT_SOURCE_DIR}/transformations
$<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
)
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#pragma once
-
-#include <memory>
-#include <unordered_set>
-
-#include <gtest/gtest.h>
-#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
-#include "ie_precision.hpp"
-#include <tests_common.hpp>
-#include <tests_common_func.hpp>
-#include <multi-device/multi_device_config.hpp>
-#include "low_precision_transformations/transformer.hpp"
-#include <regression_tests.hpp>
-#include "common/validation.hpp"
-#include "low_precision_transformations/concat_multi_channels.hpp"
-#include "low_precision_transformations/convolution.hpp"
-#include "low_precision_transformations/fully_connected.hpp"
-#include "low_precision_transformations/eltwise.hpp"
-#include "low_precision_transformations/scaleshift_to_convolution.hpp"
-#include <legacy/ie_util_internal.hpp>
-
-#include "cnn_network_ngraph_impl.hpp"
-#include <ie_system_conf.h>
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-inline CNNLayerPtr getLayer(const ICNNNetwork& network, const std::string& layerName) {
- std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (CNNLayerPtr layer : layers) {
- if (layer->name == layerName) {
- return layer;
- }
- }
-
- return nullptr;
-}
-
-inline void checkLayerOuputPrecision(const ICNNNetwork& network, const std::string& layerName, Precision expectedPrecision) {
- CNNLayerPtr layer = getLayer(network, layerName);
- for (DataPtr data : layer->outData) {
- ASSERT_EQ(expectedPrecision, data->getPrecision()) << " unexpected precision " << data->getPrecision() << " for layer " << layerName;
- }
-}
-
-struct network_params {
- std::string deviceName;
- std::string modelFile;
- std::string imageName;
- std::vector<std::pair<int, float>> refValue;
- // optional config (used for multi-device)
- std::map<std::string, std::string> config;
-
- std::string model() {
- ModelsPath result;
- result += kPathSeparator;
- result += modelFile;
- return result;
- }
-
- std::string weights() {
- ModelsPath result;
- result += kPathSeparator;
- result += testing::FileUtils::fileNameNoExt(modelFile);
- result += ".bin";
- return result;
- }
-
- std::string image() {
- std::string result = TestDataHelpers::get_data_path();
- result += kPathSeparator;
- result += imageName;
- return result;
- }
-};
-
-static LayerTransformation::Params createParam() {
- return LayerTransformation::Params(
- false,
- true,
- true,
- LayerTransformation::QuantizedTensorAlignment::None,
- LayerTransformation::QuantizedTensorAlignment::None,
- false);
-}
-
-static LayerTransformation::Params createParamU8I8() {
- return LayerTransformation::Params(
- false,
- true,
- true,
- LayerTransformation::QuantizedTensorAlignment::None,
- LayerTransformation::QuantizedTensorAlignment::None,
- false,
- true,
- true,
- { Precision::U8 },
- { Precision::I8 });
-}
-
-static LayerTransformation::Params createParamU8U8() {
- return LayerTransformation::Params(
- false,
- true,
- true,
- LayerTransformation::QuantizedTensorAlignment::None,
- LayerTransformation::QuantizedTensorAlignment::None,
- false,
- true,
- true,
- { Precision::U8 },
- { Precision::U8 });
-}
-
-static LayerTransformation::Params createParamI8I8() {
- return LayerTransformation::Params(
- false,
- true,
- true,
- LayerTransformation::QuantizedTensorAlignment::None,
- LayerTransformation::QuantizedTensorAlignment::None,
- false,
- true,
- true,
- { Precision::I8 },
- { Precision::I8 });
-}
-
-static LayerTransformation::Params createParamCpu() {
- return LayerTransformation::Params(
- true,
- true,
- true,
- LayerTransformation::QuantizedTensorAlignment::UpdateLevel,
- LayerTransformation::QuantizedTensorAlignment::None,
- true,
- true,
- true);
-}
-
-static std::vector<float> generateInput(const size_t size, const bool reverse = false) {
- std::vector<float> in(size);
- for (size_t i = 0; i < in.size(); ++i) {
- in[i] = reverse ? in.size() - i : i;
- }
- return in;
-}
-
-
-class TransformationsParams;
-
-class ModelParams {
-public:
- ModelParams(
- const std::string name,
- const std::string irFilePath,
- const std::string dataFilePath,
- const std::vector<std::pair<int, float>> referenceOutputDataWithoutTransformations,
- const std::vector<std::pair<int, float>> referenceOutputDataWithTransformations = {}) :
- name(name),
- irFilePath(irFilePath),
- dataFilePath(dataFilePath),
- referenceOutputDataWithoutTransformations({ referenceOutputDataWithoutTransformations }),
- referenceOutputDataWithTransformations((referenceOutputDataWithTransformations.size() != 0ul) ?
- std::vector<std::vector<std::pair<int, float>>>({ referenceOutputDataWithTransformations }) :
- std::vector<std::vector<std::pair<int, float>>>({ referenceOutputDataWithoutTransformations })),
- validation(nullptr),
- inputs({}),
- transformations({}) {}
-
-
- ModelParams(
- const std::string name,
- const std::string irFilePath,
- const std::string dataFilePath,
- const std::vector<std::pair<int, float>> referenceOutputDataWithoutTransformations,
- const std::vector<std::pair<int, float>> referenceOutputDataWithTransformations,
- std::function<void(const TransformationsParams& params, CNNNetworkImplPtr usedNetwork)> validation,
- const std::vector<std::pair<std::string, std::vector<float>>> inputs = {},
- const std::vector<std::pair<std::string, std::shared_ptr<LayerTransformation>>> transformations = {}) :
- name(name),
- irFilePath(irFilePath),
- dataFilePath(dataFilePath),
- referenceOutputDataWithoutTransformations({ referenceOutputDataWithoutTransformations }),
- referenceOutputDataWithTransformations(referenceOutputDataWithTransformations.size() != 0ul ?
- std::vector<std::vector<std::pair<int, float>>>({ referenceOutputDataWithTransformations }) :
- std::vector<std::vector<std::pair<int, float>>>({ referenceOutputDataWithoutTransformations })),
- validation(validation),
- inputs(inputs),
- transformations(transformations) {}
-
- ModelParams(
- const std::string name,
- const std::string irFilePath,
- const std::string dataFilePath,
- const std::vector<std::vector<std::pair<int, float>>> referenceOutputDataWithoutTransformations,
- const std::vector<std::vector<std::pair<int, float>>> referenceOutputDataWithTransformations,
- std::function<void(const TransformationsParams& params, CNNNetworkImplPtr usedNetwork)> validation) :
- name(name),
- irFilePath(irFilePath),
- dataFilePath(dataFilePath),
- referenceOutputDataWithoutTransformations(referenceOutputDataWithoutTransformations),
- referenceOutputDataWithTransformations(referenceOutputDataWithTransformations.size() != 0ul ? referenceOutputDataWithTransformations : referenceOutputDataWithoutTransformations),
- validation(validation),
- inputs({}),
- transformations({}) {}
-
- const std::string name;
- const std::string irFilePath;
- const std::string dataFilePath;
- const std::vector<std::vector<std::pair<int, float>>> referenceOutputDataWithoutTransformations;
- const std::vector<std::vector<std::pair<int, float>>> referenceOutputDataWithTransformations;
- const std::function<void(const TransformationsParams& params, CNNNetworkImplPtr usedNetwork)> validation;
- const std::vector<std::pair<std::string, std::vector<float>>> inputs;
- const std::vector<std::pair<std::string, std::shared_ptr<LayerTransformation>>> transformations;
-};
-
-class TransformationsParams {
-public:
- TransformationsParams(
- const bool transformationsInPluginEnabled = true,
- const bool transformationsInTestEnabled = false,
- const LayerTransformation::Params& params = LayerTransformation::Params(),
- const std::unordered_set<std::string>& notTransformedLayers = {},
- const size_t classesCanBeChangedIndex = 9999,
- const bool compareRawValues = true,
- const std::unordered_set<std::string>& removedLayers = {}) :
- deviceName(""),
- modelParams(ModelParams("", "", "", {})),
- batchSize(1ul),
- transformationsInPluginEnabled(transformationsInPluginEnabled),
- transformationsInTestEnabled(transformationsInTestEnabled),
- params(params),
- notTransformedLayers(notTransformedLayers),
- classesCanBeChangedIndex(classesCanBeChangedIndex),
- compareRawValues(compareRawValues),
- removedLayers(removedLayers) {}
-
- TransformationsParams(
- const std::string deviceName,
- const ModelParams modelParams,
- const size_t batchSize,
- const bool transformationsInPluginEnabled = true,
- const bool transformationsInTestEnabled = false,
- const LayerTransformation::Params& params = LayerTransformation::Params(),
- const std::unordered_set<std::string>& notTransformedLayers = {},
- const size_t classesCanBeChangedIndex = 9999,
- const bool compareRawValues = true,
- const std::unordered_set<std::string>& removedLayers = {},
- const std::vector<std::pair<std::string, std::vector<float>>> inputs = {},
- const std::vector<std::pair<std::string, std::shared_ptr<LayerTransformation>>> transformations = {}) :
- deviceName(deviceName),
- modelParams(modelParams),
- batchSize(batchSize),
- transformationsInPluginEnabled(transformationsInPluginEnabled),
- transformationsInTestEnabled(transformationsInTestEnabled),
- params(params),
- notTransformedLayers(notTransformedLayers),
- classesCanBeChangedIndex(classesCanBeChangedIndex),
- compareRawValues(compareRawValues),
- removedLayers(removedLayers) {}
-
- const std::string deviceName;
- const ModelParams modelParams;
- const size_t batchSize;
-
- static std::string getLowPrecisionTransformerSingleLayerTestName(testing::TestParamInfo<TransformationsParams> params) {
- const TransformationsParams& p = params.param;
- std::stringstream ss;
- ss << p.modelParams.name <<
- "_batch" << p.batchSize <<
- "_" << (p.transformationsInPluginEnabled ? "inPluginEnabled" : "inPluginDisabled") <<
- "_" << (p.transformationsInTestEnabled ? "inTestEnabled" : "inTestDisabled") <<
- "_" << (p.params.supportAsymmetricQuantization ? "asymmetric" : "symmetric") <<
- "_" << p.params.precisionsOnActivations <<
- "_" << p.params.precisionsOnWeights <<
- "_" << p.params.quantizedTensorAlignmentOnActivations;
- return ss.str();
- }
-
- const bool transformationsInPluginEnabled;
- const bool transformationsInTestEnabled;
- const LayerTransformation::Params params;
- const std::unordered_set<std::string> notTransformedLayers;
- const size_t classesCanBeChangedIndex;
- const bool compareRawValues;
- const std::unordered_set<std::string> removedLayers;
-};
-
-class smoke_NetworkClassifyTest : public TestsCommon, public TestsCommonFunc, public WithParamInterface<TransformationsParams> {
-protected:
- void classify(
- network_params p,
- size_t batch_size = 1,
- float threshold = 0.005f,
- const TransformationsParams& transformationsParams = TransformationsParams(),
- const std::vector<std::pair<std::string, std::vector<float>>>& inputs = {},
- const std::vector<std::pair<std::string, std::shared_ptr<LayerTransformation>>>& transformations = {}) {
- CNNNetworkImplPtr usedNetwork;
- classify(p, batch_size, threshold, transformationsParams, usedNetwork, inputs, transformations);
- }
-
- void classify(
- network_params p,
- size_t batch_size,
- float threshold,
- const TransformationsParams& transformationsParams,
- CNNNetworkImplPtr& usedNetwork,
- const std::vector<std::pair<std::string, std::vector<float>>>& inputs = {},
- const std::vector<std::pair<std::string, std::shared_ptr<LayerTransformation>>>& transformations = {}) {
-
-#ifdef DISPLAY_RESULTS
- std::cout << std::endl << p.modelFile << ": was started" << std::endl;
- if (transformationsParams.transformationsInTestEnabled) {
- std::cout <<
- "\tenabled: " << (transformationsParams.transformationsInTestEnabled ? "true" : "false") << std::endl <<
- "\tbatch_size: " << batch_size << std::endl <<
- "\tupdatePrecision: " << (transformationsParams.params.updatePrecisions ? "true" : "false") << std::endl <<
- "\tquantizeOutputs: " << (transformationsParams.params.quantizeOutputs ? "true" : "false") << std::endl <<
- "\tweightsToConst: " << (transformationsParams.params.weightsToConst ? "true" : "false") << std::endl <<
- "\tquantizedTensorAlignmentOnActivations: " << transformationsParams.params.quantizedTensorAlignmentOnActivations << std::endl <<
- "\tquantizedTensorAlignmentOnWeights: " << transformationsParams.params.quantizedTensorAlignmentOnWeights << std::endl <<
- "\troundQuantizedValues: " << (transformationsParams.params.roundQuantizedValues ? "true" : "false") << std::endl <<
- "\tupdateBiases: " << (transformationsParams.params.updateBiases ? "true" : "false") << std::endl <<
- "\tsupportAsymmetricQuantization: " << (transformationsParams.params.supportAsymmetricQuantization ? "true" : "false") << std::endl <<
- "\tprecisionsOnActivations: " << transformationsParams.params.precisionsOnActivations << std::endl <<
- "\tprecisionsOnWeights: " << transformationsParams.params.precisionsOnWeights << std::endl;
- } else {
- std::cout << "\tenabled: " << (transformationsParams.transformationsInTestEnabled ? "true" : "false") << std::endl;
- }
-#endif
-
- Core ie;
- CNNNetwork network;
- if (*p.modelFile.begin() == '/') {
- network = ie.ReadNetwork(p.modelFile);
- } else {
- network = ie.ReadNetwork(p.model(), p.weights());
- }
-
- if (batch_size != 1)
- network.setBatchSize(batch_size);
-
- ie.SetConfig(p.config);
-
- if (transformationsParams.transformationsInTestEnabled) {
- ICNNNetwork& icnnnetwork = network;
- auto networkNGraph = dynamic_cast<CNNNetworkNGraphImpl*>(&icnnnetwork);
- if (networkNGraph) {
- auto netPtr = std::make_shared<details::CNNNetworkImpl>(*networkNGraph);
- network = CNNNetwork(netPtr);
- }
-
- auto originalLayersInfo = LowPrecisionTransformationValidation::getLayers(network);
- for (const std::string removedLayer : transformationsParams.removedLayers) {
- for (auto originalLayerIt = originalLayersInfo.begin(); originalLayerIt != originalLayersInfo.end(); ++originalLayerIt) {
- if (removedLayer == originalLayerIt->first) {
- originalLayersInfo.erase(originalLayerIt);
- break;
- }
- }
- }
-
- LowPrecisionTransformations lowPrecisionTransformations = LowPrecisionTransformer::getAllTransformations(transformationsParams.params).
- addBranchSpecific<EltwiseTransformation>(LayerTransformation::Params(transformationsParams.params), "Eltwise").
- add<ConvolutionTransformation>(
- LayerTransformation::Params(transformationsParams.params).setPrecisionsOnActivations({ Precision::U8 }),
- "Convolution").
- addCleanup<ScaleShiftToConvolutionTransformation>(
- LayerTransformation::Params(transformationsParams.params).setPrecisionsOnActivations({ Precision::U8 }),
- "ScaleShift");
-
- for (const auto transformation : transformations) {
- auto it = lowPrecisionTransformations.transformations.find(transformation.first);
- if (it != lowPrecisionTransformations.transformations.end()) {
- lowPrecisionTransformations.transformations.erase(it);
- }
-
- lowPrecisionTransformations.transformations.emplace(transformation.first, transformation.second);
- }
-
- LowPrecisionTransformer transformer(lowPrecisionTransformations);
- transformer.transform(network);
-
- LowPrecisionTransformationValidation::validate(
- network,
- transformationsParams.params,
- transformationsParams.notTransformedLayers,
- originalLayersInfo);
- }
-
- std::map<std::string, std::string> config;
- // config[PluginConfigInternalParams::KEY_LP_TRANSFORMS_VERSION] = PluginConfigInternalParams::LP_TRANSFORMS_NGRAPH;
- if (!transformationsParams.transformationsInPluginEnabled) {
- config.emplace(PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE, PluginConfigParams::NO);
- }
-
- // use to enable LPT ON devices with explicit KEY_LP_TRANSFORMS_MODE definition (GPU)
- //config.emplace(
- // PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE,
- // transformationsParams.transformationsInPluginEnabled ? PluginConfigParams::YES : PluginConfigParams::NO);
-
- if (network.getFunction()) {
- usedNetwork = std::make_shared<InferenceEngine::details::CNNNetworkImpl>(network);
- } else {
- usedNetwork = cloneNet(network);
- }
- ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.deviceName, config);
- InferRequest inferRequest = exeNetwork.CreateInferRequest();
- if (inputs.empty()) {
- Blob::Ptr src = readInput(p.image(), batch_size);
- ASSERT_NE(nullptr, src.get()) << "Cannot read Input " << p.image();
- auto inputsInfo = network.getInputsInfo();
- if (inputsInfo.size() == 3ul) {
- std::vector<float> data = { 1.f, 2.f, 3.f };
- Blob::Ptr blob = make_shared_blob<float>(TensorDesc(Precision::FP32, { 1ul, 3ul }, Layout::NC));
- blob->allocate();
- CNNNetworkHelper::fillBlobByFP32(blob, data.data());
-
- auto it = inputsInfo.begin();
- inferRequest.SetBlob(it->first, blob);
-
- ++it;
- inferRequest.SetBlob(it->first, src);
-
- ++it;
- inferRequest.SetBlob(it->first, src);
- } else {
- inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
- }
- } else {
- for (const auto input : inputs) {
- Blob::Ptr blob = make_shared_blob<float>(TensorDesc(Precision::FP32, { input.second.size() }, Layout::C));
- blob->allocate();
- CNNNetworkHelper::fillBlobByFP32(blob, input.second.data());
- inferRequest.SetBlob(input.first, blob);
- }
- }
-
- OutputsDataMap outInfo;
- outInfo = network.getOutputsInfo();
- ASSERT_EQ(outInfo.size(), 1);
- ASSERT_NE(outInfo.begin()->second, nullptr);
- Blob::Ptr dst = make_shared_blob<float>(outInfo.begin()->second->getTensorDesc());
- dst->allocate();
- inferRequest.SetBlob(outInfo.begin()->first, dst);
-
- inferRequest.Infer();
-
- for (size_t i = 0; i < batch_size; i++)
- ASSERT_TRUE(compareTop(*dst.get(), p.refValue, i, threshold, transformationsParams.classesCanBeChangedIndex, transformationsParams.compareRawValues)) << "Doesn't match with ref values";
- }
-
- Regression::Builder please() {
- std::shared_ptr<Core> ie = PluginCache::get().ie();
- Regression::Builder b(ie);
- b.usingDevice("CPU");
-
- return b;
- }
-
-private:
- static bool onWeights(const CNNLayer& layer) {
- const std::vector<CNNLayerPtr> children = getChildren(layer);
- return (children.size() == 1) &&
- (children[0]->type == "Convolution") &&
- (children[0]->insData.size() >= 2) &&
- (getCreatorLayer(children[0]->insData[1].lock()).lock()->name == layer.name);
- }
-
- static std::vector<CNNLayerPtr> getChildren(const CNNLayer& layer, const std::string& exceptionLayerName = "") {
- std::vector<CNNLayerPtr> children;
- for (const DataPtr outData : layer.outData) {
- const std::map<std::string, CNNLayerPtr>& inputTo = getInputTo(outData);
- for (auto it = inputTo.begin(); it != inputTo.end(); ++it) {
- CNNLayerPtr child = it->second;
- if (exceptionLayerName.empty() || child->name != exceptionLayerName) {
- children.push_back(child);
- }
- }
- }
- return children;
- }
-};
-
-class ModelTransformationsTest : public smoke_NetworkClassifyTest {
-protected:
- void SetUp() override {
- const TransformationsParams transformationsParam = ::testing::WithParamInterface<TransformationsParams>::GetParam();
- CNNNetworkImplPtr usedNetwork;
-
- std::vector<std::pair<int, float>> referenceValues;
- if (transformationsParam.params.updatePrecisions &&
- (transformationsParam.transformationsInPluginEnabled || transformationsParam.transformationsInTestEnabled)) {
- if (transformationsParam.modelParams.referenceOutputDataWithTransformations.size() == 1) {
- referenceValues = transformationsParam.modelParams.referenceOutputDataWithTransformations[0];
- } else {
- referenceValues = InferenceEngine::with_cpu_x86_avx512f() ?
- transformationsParam.modelParams.referenceOutputDataWithTransformations[1] :
- transformationsParam.modelParams.referenceOutputDataWithTransformations[0];
- }
- } else {
- if (transformationsParam.modelParams.referenceOutputDataWithoutTransformations.size() == 1) {
- referenceValues = transformationsParam.modelParams.referenceOutputDataWithoutTransformations[0];
- } else {
- referenceValues = InferenceEngine::with_cpu_x86_avx512f() ?
- transformationsParam.modelParams.referenceOutputDataWithoutTransformations[1] :
- transformationsParam.modelParams.referenceOutputDataWithoutTransformations[0];
- }
- }
-
- network_params p{
- "CPU",
- transformationsParam.modelParams.irFilePath,
- transformationsParam.modelParams.dataFilePath,
- referenceValues
- };
-
- classify(p,
- transformationsParam.batchSize,
- 1.f,
- transformationsParam,
- usedNetwork,
- transformationsParam.modelParams.inputs,
- transformationsParam.modelParams.transformations);
-
- if (transformationsParam.modelParams.validation != nullptr) {
- transformationsParam.modelParams.validation(transformationsParam, usedNetwork);
- }
- }
-};
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_tests_utils.hpp"
-
-#include <legacy/details/ie_cnn_network_tools.h>
-#include <precision_utils.h>
-#include <cmath>
-
-using InferenceEngine::CNNLayerPtr;
-using InferenceEngine::Blob;
-using InferenceEngine::details::CNNNetworkImpl;
-using InferenceEngine::CNNNetwork;
-using InferenceEngine::DataPtr;
-using InferenceEngine::Precision;
-
-// TODO: FP32 detected
-void fillDataWithInitValue(float *data, size_t size, float initValue) {
- for (size_t i = 0lu; i < size; i++) {
- data[i] = sin((i + initValue + 1.0f) * 0.03f);
- }
-}
-
-void fillDataWithInitValue(std::vector<float>& data, float initValue) {
- for (size_t i = 0; i < data.size(); i++) {
- data[i] = sin((i + initValue + 1.0) * 0.03);
- }
-}
-
-void fillDataWithInitValue(Blob::Ptr& blob, float initValue) {
- if (blob == nullptr) {
- THROW_IE_EXCEPTION << "Blob is nullable";
- }
-
- const Precision& precision = blob->getTensorDesc().getPrecision();
- const size_t dataSize = blob->size();
- if (precision == Precision::FP32) {
- float* buffer = blob->buffer().as<float*>();
- for (size_t i = 0lu; i < dataSize; i++) {
- buffer[i] = sin((float(i) + initValue + 1.f) * 0.03f);
- }
- } else if (precision == Precision::FP16) {
- short* buffer = blob->buffer().as<short*>();
- for (size_t i = 0lu; i < dataSize; i++) {
- buffer[i] = InferenceEngine::PrecisionUtils::f32tof16(sin((float(i) + initValue + 1.f) * 0.03f));
- }
- }
-}
-
-void fillDataWithInitValue(CNNLayerPtr layer, const std::string& blobName, float initValue) {
- if (layer == nullptr) {
- THROW_IE_EXCEPTION << "layer is nullable";
- }
- if (blobName.empty() && (layer->blobs.size() != 1)) {
- THROW_IE_EXCEPTION << "several blobs";
- }
-
- Blob::Ptr blob = blobName.empty() ? layer->blobs.begin()->second : layer->blobs[blobName];
- if (blob == nullptr)
- THROW_IE_EXCEPTION << "Layer '" << layer->name << "' does not have blob '" << blobName << "'";
- fillDataWithInitValue(blob, initValue);
-}
-
-void fillData(float *dst, size_t size, float value) {
- std::fill(dst, dst + size, value);
-}
-
-void fillData(float* dst, size_t size, const float* src) {
- std::copy(src, src + size, dst);
-}
-
-void fillData(float *dst, size_t size, const std::vector<float>& src) {
- if (size != src.size()) {
- THROW_IE_EXCEPTION << "values size is not correct";
- }
- fillData(dst, size, src.data());
-}
-
-void fillData(Blob::Ptr& blob, float value) {
- if (blob == nullptr) {
- THROW_IE_EXCEPTION << "Blob is nullable";
- }
-
- const Precision& precision = blob->getTensorDesc().getPrecision();
- const size_t dataSize = blob->size();
- if (precision == Precision::FP32) {
- fillData(blob->buffer().as<float*>(), dataSize, value);
- } else if (precision == Precision::FP16) {
- short* buffer = blob->buffer().as<short*>();
- for (size_t i = 0lu; i < blob->size(); i++) {
- buffer[i] = InferenceEngine::PrecisionUtils::f32tof16(value);
- }
- }
-}
-
-void fillData(Blob::Ptr& blob, const float* src) {
- if (blob == nullptr) {
- THROW_IE_EXCEPTION << "Blob is nullable";
- }
-
- const Precision& precision = blob->getTensorDesc().getPrecision();
- const size_t dataSize = blob->size();
- if (precision == Precision::FP32) {
- fillData(blob->buffer().as<float*>(), dataSize, src);
- } else if (precision == Precision::FP16) {
- short* dstData = blob->buffer().as<short*>();
- InferenceEngine::PrecisionUtils::f32tof16Arrays(dstData, src, dataSize, 1.f, 0.f);
- } else {
- THROW_IE_EXCEPTION << "Unsupported precision: " << precision;
- }
-}
-
-void fillData(Blob::Ptr& blob, const std::vector<float>& src) {
- fillData(blob, src.data());
-}
-
-void fillData(CNNLayerPtr layer, float value, const std::string& blobName) {
- if (layer == nullptr) {
- THROW_IE_EXCEPTION << "layer is nullable";
- }
- if (blobName.empty() && (layer->blobs.size() != 1)) {
- THROW_IE_EXCEPTION << "several blobs";
- }
-
- Blob::Ptr blob = blobName.empty() ? layer->blobs.begin()->second : layer->blobs[blobName];
- fillData(blob, value);
-}
-
-void fillData(CNNLayerPtr layer, const std::vector<float>& values, const std::string& blobName) {
- if (layer == nullptr) {
- THROW_IE_EXCEPTION << "layer is nullable";
- }
- if (blobName.empty() && (layer->blobs.size() != 1)) {
- THROW_IE_EXCEPTION << "several blobs";
- }
-
- Blob::Ptr blob = blobName.empty() ? layer->blobs.begin()->second : layer->blobs[blobName];
- if (blob->size() != values.size()) {
- THROW_IE_EXCEPTION << "values size is not correct";
- }
-
- fillData(blob, values);
-}
-
-CNNLayerPtr getLayer(const CNNNetwork& network, const std::string& layerName) {
- std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (CNNLayerPtr& layer : layers) {
- if (layer->name == layerName) {
- return layer;
- }
- }
-
- return nullptr;
-}
-
-Blob::Ptr getBlob(CNNLayerPtr layer, const std::string& blobName) {
- if (layer == nullptr) {
- THROW_IE_EXCEPTION << "layer is nullable";
- }
- if (blobName.empty() && (layer->blobs.size() != 1)) {
- THROW_IE_EXCEPTION << "several blobs";
- }
- Blob::Ptr blob = blobName.empty() ? layer->blobs.begin()->second : layer->blobs[blobName];
- return blob;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <vector>
-
-#include <cpp/ie_cnn_network.h>
-#include <legacy/cnn_network_impl.hpp>
-
-void fillDataWithInitValue(InferenceEngine::Blob::Ptr& blob, float initValue);
-
-void fillDataWithInitValue(float *data, size_t size, float initValue = 0.0);
-
-void fillDataWithInitValue(std::vector<float>& data, float initValue = 0.0);
-
-void fillDataWithInitValue(InferenceEngine::CNNLayerPtr layer, const std::string& blobName = "", float initValue = 0.0);
-
-void fillData(InferenceEngine::CNNLayerPtr layer, float value, const std::string& blobName = "");
-void fillData(InferenceEngine::CNNLayerPtr layer, const std::vector<float>& values, const std::string& blobName = "");
-
-inline void fillData(float *dst, size_t size, float value);
-inline void fillData(float *dst, size_t size, const float* src);
-inline void fillData(float *dst, size_t size, const std::vector<float>& src);
-
-void fillData(InferenceEngine::Blob::Ptr& blob, float value);
-void fillData(InferenceEngine::Blob::Ptr& blob, const float* src);
-void fillData(InferenceEngine::Blob::Ptr& blob, const std::vector<float>& values);
-
-InferenceEngine::CNNLayerPtr getLayer(const InferenceEngine::CNNNetwork& network, const std::string& layerName);
-
-InferenceEngine::Blob::Ptr getBlob(InferenceEngine::CNNLayerPtr layer, const std::string& blobName);
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "validation.hpp"
-
-#include <algorithm>
-#include <gtest/gtest.h>
-#include <string>
-#include <vector>
-#include <unordered_set>
-
-#include "low_precision_transformations/network_helper.hpp"
-#include "low_precision_transformations/fake_quantize.hpp"
-#include "low_precision_transformations/transformer.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-void LowPrecisionTransformationValidation::validate(
- CNNNetwork& network,
- // TODO: not correct, quantization parameters are defined per transformation
- const LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers,
- const std::vector<std::pair<std::string, std::string>>& originalLayersInfo) {
- validateIntervalsAndLevel(network, params, notTransformedLayers);
- validateWeightsToConst(network, params, notTransformedLayers);
- validatePrecision(network, params, notTransformedLayers);
- validateActivations(network, params, notTransformedLayers);
- validateScaleShifts(network, params, notTransformedLayers);
- validateConvolutions(network, params, notTransformedLayers);
- validateWithReference(network, originalLayersInfo);
-
- validateAsymmetricPattern(network, params, notTransformedLayers);
-
- const std::vector<CNNLayerPtr> layers = CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- if (layer->type == "Eltwise") {
- validateEltwise(network, params, *layer);
- }
- }
-
- // TODO: not ready
- // validateCustomLayerHandling(network, notTransformedLayers);
-}
-
-std::vector<std::pair<std::string, std::string>> LowPrecisionTransformationValidation::getLayers(const CNNNetwork& network) {
- std::vector<std::pair<std::string, std::string>> layerNames;
- const std::vector<CNNLayerPtr> layers = CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- layerNames.push_back(std::pair<std::string, std::string>(layer->name, layer->type));
- }
- return layerNames;
-}
-
-void LowPrecisionTransformationValidation::validateIntervalsAndLevel(
- const CNNNetwork& network,
- const LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers) {
- const std::vector<CNNLayerPtr> layers = CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- if (notTransformedLayers.find(layer->name) != notTransformedLayers.end()) {
- continue;
- }
-
- if (layer->type == "FakeQuantize") {
- const size_t levelsAsParam = layer->GetParamAsUInt("levels");
- QuantizeLayer* quantizeLayer = dynamic_cast<QuantizeLayer*>(layer.get());
- if (quantizeLayer == nullptr) {
- THROW_IE_EXCEPTION << "unexpected type";
- }
-
- if (levelsAsParam != quantizeLayer->levels) {
- THROW_IE_EXCEPTION << "level as param " << levelsAsParam << " is not equal level as member " << quantizeLayer->levels;
- }
-
- //// TODO: debug only
- //QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*layer);
- //std::cout << layer->name << (CNNNetworkHelper::onWeights(*layer) ? " on weights" : " on activations") <<
- // ": levels=" << quantizationDetails.levels <<
- // ": input [" << quantizationDetails.inputLowValues[0] << " - " << quantizationDetails.inputHighValues[0]
- // << "], output [" << quantizationDetails.outputLowValues[0] << " - " << quantizationDetails.outputHighValues[0] << "]" << std::endl;
- bool multiBranch = false;
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*layer, "Pooling");
- for (const CNNLayerPtr& child : children) {
- if ((child->type == "Eltwise") || (child->type == "Concat")) {
- multiBranch = true;
- break;
- }
- }
-
- validateFakeQuantize(layer, params, multiBranch);
- } else if (layer->type == "Eltwise") {
- // TODO: FQ on Eltwise specific logic is under development
- } else if (layer->type == "Concat") {
- // TODO: FQ on Concat specific logic is under development
- }
- }
-}
-
-void LowPrecisionTransformationValidation::validateWeightsToConst(
- const CNNNetwork& network,
- const LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers) {
- if ((!params.weightsToConst) ||
- (!std::any_of(
- params.precisionsOnActivations.begin(),
- params.precisionsOnActivations.end(),
- [](const Precision precision) { return precision == Precision::U8; }))) {
- return;
- }
-
- if ((!params.supportAsymmetricQuantization) &&
- (!std::any_of(params.precisionsOnWeights.begin(), params.precisionsOnWeights.end(), [](const Precision precision) { return precision.isSigned(); }))) {
- // U8 on weights in symmetric mode is ignored, shifts on weights are not supported
- return;
- }
-
- const std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- if ((layer->type == "FakeQuantize") && CNNNetworkHelper::onWeights(*layer) && (layer->outData.size() == 1) &&
- (getInputTo(layer->outData[0]).begin()->second->type == "Convolution")) {
- CNNLayerPtr childLayer = CNNNetworkHelper::getChildren(*layer)[0];
- if (params.quantizeOutputs || (getInputTo(childLayer->outData[0]).size() != 0)) {
- ASSERT_TRUE(notTransformedLayers.find(childLayer->name) != notTransformedLayers.end()) <<
- "FakeQuantize on weights was found: " << layer->name <<
- " for layer " << childLayer->name;
- }
- }
- }
-}
-
-Precision getInputPrecision(const CNNLayer& layer) {
- if (layer.insData.size() < 1ul) {
- THROW_IE_EXCEPTION << "unexpected inputs count";
- }
-
- DataPtr layerParentData = layer.insData[0].lock();
- if (layerParentData == nullptr) {
- THROW_IE_EXCEPTION << "input data is nullable";
- }
-
- CNNLayerPtr layerParent = getCreatorLayer(layerParentData).lock();
- if (layerParent == nullptr) {
- THROW_IE_EXCEPTION << "parent is nullable";
- }
-
- if ((layer.type == "Convolution") && (layerParent->type == "Eltwise")) {
- DataPtr eltwiseParentData = layerParent->insData[0].lock();
- if (eltwiseParentData == nullptr) {
- THROW_IE_EXCEPTION << "Eltwise parent data is nullable";
- }
-
- // TODO: workaround for the first Convolution:
- // Issue-26622: [IE COMMON][LPT] Check if ScaleShift is dequantization ScaleShift(dequantizationLayersNames) before to apply transformation
- CNNLayerPtr eltwiseParent = getCreatorLayer(eltwiseParentData).lock();
- if (eltwiseParent->type == "Input") {
- return Precision::U8;
- }
-
- return eltwiseParentData->getTensorDesc().getPrecision();;
- } else {
- return layerParentData->getTensorDesc().getPrecision();
- }
-}
-
-Precision getOutputPrecision(const CNNLayer& layer) {
- if (layer.outData.size() < 1ul) {
- THROW_IE_EXCEPTION << "unexpected outputs count";
- }
-
- return layer.outData[0]->getTensorDesc().getPrecision();
-}
-
-// TODO: refactor (I8/U8 is used)
-void LowPrecisionTransformationValidation::validatePrecision(
- const CNNNetwork& network,
- const LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers) {
- const std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- if (notTransformedLayers.find(layer->name) != notTransformedLayers.end()) {
- continue;
- }
-
- if ((!params.quantizeOutputs) && (getInputTo(layer->outData[0]).size() == 0ul)) {
- continue;
- }
-
- if (CaselessEq<std::string>()(layer->type, "FakeQuantize") && !isFakeQuantizeBeforeEltwiseOnConvolutionBranch(*layer)) {
- // TODO: handle if FakeQuantize on weights -> Const on weights transformation is disabled
- //if (CNNNetworkHelper::onWeights(*layer)) {
- // for (const DataPtr data : layer->outData) {
- // ASSERT_EQ(Precision::I8, data->getPrecision()) << "FakeQuantize out data on weights has unexpected precision";
- // }
- //}
-
- if (!params.quantizeOutputs) {
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildrenRecursivelyExceptTypes(*layer, { "ScaleShift" });
- if ((children.size() == 0ul) || (children[0]->outData.size() == 0ul) || (getInputTo(children[0]->outData[0]).size() == 0ul)) {
- continue;
- }
- }
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*layer);
- bool hasDequantizationSS = false;
- for (const auto& child : children) {
- if (CaselessEq<std::string>()(child->type, "ScaleShift")) {
- hasDequantizationSS = true;
- break;
- }
- }
-
- if (params.updatePrecisions && hasDequantizationSS) {
- // while S8 is not supported on activations
- for (const DataPtr data : layer->outData) {
- ASSERT_TRUE((data->getPrecision() == Precision::U8) || (data->getPrecision() == Precision::I8)) << "'" <<
- layer->type << "', name '" <<
- layer->name << "' out data on activations has unexpected precision " << data->getPrecision();
- }
- }
- } else if (layer->type == "Const") {
- if (CNNNetworkHelper::onWeights(*layer)) {
- // Note: Const layer on weights can has any original precision - check original network Const layer precision
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildrenRecursivelyExceptTypes(*layer, { "Eltwise" });
- if (children[0]->type == "FakeQuantize") {
- // FakeQuantize on weights is possible if weights graph is complex
- continue;
- }
-
- ASSERT_EQ(1ul, children.size()) <<
- "children count " << children.size() <<
- " is unexpected for " << layer->type << " '" << layer->name << "' layer on weights";
- ASSERT_TRUE((children[0]->type == "Convolution") || (children[0]->type == "FullyConnected") || (children[0]->type == "GEMM")) <<
- "unexpected child type " << children[0]->type << " '" << children[0]->name << "' for layer " << layer->type << " '" << layer->name << "' on weights";
-
- if (getInputTo(children[0]->outData[0]).size() == 0) {
- // output data precision depends on device
- continue;
- }
-
- const Precision originalPrecision = getOutputPrecision(*children[0]);
- const Precision inputPrecision = getInputPrecision(*children[0]);
- const Precision weightsPrecision = inputPrecision == originalPrecision ? originalPrecision : params.precisionsOnWeights[0];
-
- if (inputPrecision != originalPrecision) {
- ASSERT_TRUE((weightsPrecision == Precision::I8) || (weightsPrecision == Precision::U8)) <<
- "unexpected weights precision " << weightsPrecision <<
- " for " << children[0]->type << " " << children[0]->name;
- }
-
- for (auto it = layer->blobs.begin(); it != layer->blobs.end(); ++it) {
- ASSERT_EQ(params.updatePrecisions ? weightsPrecision : originalPrecision, it->second->getTensorDesc().getPrecision()) <<
- " constant layer on weights blob precison is not correct" <<
- " for " << layer->type << " " << layer->name;;
- }
-
- for (const DataPtr data : layer->outData) {
- ASSERT_EQ(params.updatePrecisions ? weightsPrecision : originalPrecision, data->getPrecision()) <<
- " constant layer " << layer->name << " on weights blob precison is not correct";
- }
- }
- } else if ((layer->type == "Concat") || (layer->type == "Pooling")) {
- for (const DataPtr data : layer->outData) {
- if (params.updatePrecisions && (!CNNNetworkHelper::onWeights(*layer))) {
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParentsRecursivelyExceptTypes(*layer, { "Pooling" });
- if (std::all_of(
- parents.begin(),
- parents.end(),
- [](const CNNLayerPtr parent) { return (parent->type != "FakeQuantize") || QuantizationDetails::outputLayoutIsSupported(*parent); })) {
- ASSERT_TRUE((data->getPrecision() == Precision::U8) || (data->getPrecision() == Precision::I8)) <<
- layer->type << " layer, name '" <<
- layer->name << "' out data has unexpected precision " << data->getPrecision();
- }
- }
- // ASSERT_EQ(params.updatePrecisions ? Precision::U8 : Precision::FP32, data->getPrecision()) << " " << layer->type << " out data has unexpected precision " << data->getPrecision();
- }
- } else if ((layer->type == "Eltwise") || (layer->type == "Convolution")) {
- for (const DataPtr data : layer->outData) {
- // TODO: refactor: get original layer output precision from original network
- ASSERT_TRUE((data->getPrecision() == Precision::FP16) || (data->getPrecision() == Precision::FP32)) << "'" <<
- layer->type << "', name '" <<
- layer->name << "' out data has unexpected precision " << data->getPrecision();
- }
- }
- }
-}
-
-void LowPrecisionTransformationValidation::validateActivations(
- const CNNNetwork& network,
- const LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers) {
- const std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- if ((notTransformedLayers.find(layer->name) != notTransformedLayers.end()) || (layer->type != "ReLU")) {
- continue;
- }
-
- const std::vector<CNNLayerPtr> reluParents = CNNNetworkHelper::getParentsRecursivelyExceptTypes(*layer, { "Pooling" });
- if ((reluParents.size() != 1) || (reluParents[0]->type != "ScaleShift")) {
- continue;
- }
-
- const CNNLayerPtr scaleShift = reluParents[0];
-
- const std::vector<CNNLayerPtr> scaleShiftParents = CNNNetworkHelper::getParentsRecursivelyExceptTypes(*scaleShift, { "Pooling" });
- // if Convolution is parent then ScaleShift can be generated by clean up transformation
- if ((scaleShiftParents.size() != 1) || (scaleShiftParents[0]->type == "Convolution")) {
- continue;
- }
-
- const float negativeSlope = layer->GetParamAsFloat("negative_slope", 0.0);
- if (negativeSlope != 0.0) {
- continue;
- }
-
- const Blob::Ptr weightsBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
- auto weights = CNNNetworkHelper::getFloatData(weightsBlob);
- const std::vector<float> scales = std::vector<float>(weights.get(), weights.get() + weightsBlob->size());
-
- const Blob::Ptr biasesBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
- auto biases = CNNNetworkHelper::getFloatData(biasesBlob);
- const std::vector<float> shifts = std::vector<float>(biases.get(), biases.get() + biasesBlob->size());
-
- if (!(std::equal(shifts.begin() + 1, shifts.end(), shifts.begin())) ||
- !(std::equal(scales.begin() + 1, scales.end(), scales.begin()))) {
- continue;
- }
-
- ASSERT_TRUE(true) << scaleShift->type << " '" << scaleShift->name << "' before " << layer->type << " '" << layer->name << "' was found";
- }
-}
-
-void LowPrecisionTransformationValidation::validateScaleShifts(
- const CNNNetwork& network,
- const LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers) {
- if (!params.updateBiases) {
- return;
- }
-
- const std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- if ((notTransformedLayers.find(layer->name) != notTransformedLayers.end()) || (layer->type != "ScaleShift")) {
- continue;
- }
-
- const std::vector<CNNLayerPtr> scaleShiftParents = CNNNetworkHelper::getParentsRecursivelyExceptTypes(*layer, { "Pooling" });
- if ((scaleShiftParents.size() != 1) || (scaleShiftParents[0]->type != "Convolution")) {
- continue;
- }
-
- const Blob::Ptr biasesBlob = CNNNetworkHelper::getBlob(layer, "biases");
- auto biases = CNNNetworkHelper::getFloatData(biasesBlob);
- const std::vector<float> shifts = std::vector<float>(biases.get(), biases.get() + biasesBlob->size());
-
- ASSERT_TRUE(std::all_of(shifts.begin(), shifts.end(), [](float value) { return value == 0.0; })) <<
- layer->type << " '" << layer->name << "' after " <<
- scaleShiftParents[0]->type << " '" << scaleShiftParents[0]->name << "' has not zero shift values";
- }
-}
-
-void LowPrecisionTransformationValidation::validateConvolutions(
- const CNNNetwork& network,
- const LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers) {
- if (!params.updatePrecisions) {
- return;
- }
-
- const std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- if (layer->type != "Convolution") {
- continue;
- }
-
- CNNLayerPtr parent = CNNNetworkHelper::getParent(*layer, 0ul);
- const CNNLayerPtr precisionLayer = (parent->type == "Eltwise") ? parent : layer;
- const Precision precision = precisionLayer->insData[0].lock()->getTensorDesc().getPrecision();
- ASSERT_NE(Precision::I8, precision) << "unexpected input precision " << precision << " for " << layer->type << " " << layer->name;
-
- //std::cout << "LowPrecisionTransformationValidation::validateConvolutions: " << layer->type << " " << layer->name << ": " << precision << std::endl;
- }
-}
-
-void LowPrecisionTransformationValidation::validateWithReference(
- CNNNetwork& network,
- const std::vector<std::pair<std::string, std::string>>& originalLayersInfo) {
- std::unordered_map<std::string, CNNLayerPtr> layersMap;
- const std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- layersMap.emplace(layer->name, layer);
- }
-
- for (const auto layerInfo : originalLayersInfo) {
- const auto it = layersMap.find(layerInfo.first);
-
- // TODO: refactor: transformations move all ScaleShifts
- if (layerInfo.second == "ScaleShift") {
- continue;
- }
-
- // TODO: refactor: transformations can remove FakeQuantize and Const layers on weights
- if ((layerInfo.second == "FakeQuantize") || (layerInfo.second == "Const")) {
- continue;
- }
-
- if (it == layersMap.end()) {
- THROW_IE_EXCEPTION << "Layer '" << layerInfo.first << "' (" << layerInfo.second << ") is absent in transformed network";
- // std::cout << "Layer '" << layerInfo.first << "' (" << layerInfo.second << ") is absent in transformed network" << std::endl;
- // continue;
- }
-
- // TODO: last layer is ignored
- if ((it->second->outData.size() != 0) && (getInputTo(it->second->outData[0]).size() == 0)) {
- continue;
- }
-
- if (it->second->type != layerInfo.second) {
- THROW_IE_EXCEPTION << "Layer '" << layerInfo.first << "' (" << layerInfo.second << ") has unexpected type. Expected value " << it->second->type;
- // std::cout << "Layer '" << layerInfo.first << "' (" << layerInfo.second << ") has unexpected type. Expected value " << it->second->type << std::endl;
- }
- }
-}
-
-void LowPrecisionTransformationValidation::validateCustomLayerHandling(
- const CNNNetwork& network,
- const std::unordered_set<std::string>& notTransformedLayers) {
- const std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- if (layer->type == "FullyConnected") {
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*layer);
- if ((children.size() == 0) || (children[0]->type != "ScaleShift")) {
- THROW_IE_EXCEPTION << "Layer " << layer->name << " is not handled";
- }
- }
- }
-}
-
-DataPrecision LowPrecisionTransformationValidation::getDataPrecision(const CNNLayer& layer, const LayerTransformation::Params& params) {
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer);
- const bool onWeights = CNNNetworkHelper::onWeights(layer);
-
- if ((onWeights && (params.precisionsOnWeights.size() > 1ul)) ||
- ((!onWeights) && (params.precisionsOnActivations.size() > 1ul))) {
- const LayerTransformation::PrecisionDetails precisionDetails = FakeQuantizeTransformation(params).getPrecisionDetails(quantizationDetails);
- if (precisionDetails.precision != Precision::UNSPECIFIED) {
- const std::vector<Precision>& supportedPrecisions = onWeights ? params.precisionsOnWeights : params.precisionsOnActivations;
- const auto foundIt = std::find(supportedPrecisions.begin(), supportedPrecisions.end(), precisionDetails.precision);
- if (foundIt != supportedPrecisions.end()) {
- return DataPrecision(
- precisionDetails.precision,
- DataPrecision::getMinValue(precisionDetails.precision, quantizationDetails.levels),
- DataPrecision::getMaxValue(precisionDetails.precision),
- false);
- }
- }
- }
-
- const Precision precision = onWeights ? *params.precisionsOnWeights.begin() : *params.precisionsOnActivations.begin();
- return DataPrecision(
- precision,
- DataPrecision::getMinValue(precision, quantizationDetails.levels),
- DataPrecision::getMaxValue(precision),
- false);
-}
-
-// TODO: quantizedTensorAlignmentOnActivations is used
-void LowPrecisionTransformationValidation::validateFakeQuantize(
- const CNNLayerPtr& layer,
- const LayerTransformation::Params& params,
- const bool multiBranch) {
-
- if (isFakeQuantizeBeforeEltwiseOnConvolutionBranch(*layer) || isFakeQuantizeBeforeConcat(*layer)) {
- return;
- }
-
- if (!params.quantizeOutputs) {
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*layer);
- for (const CNNLayerPtr& child : children) {
- for (const DataPtr data : child->outData) {
- if (getInputTo(data).size() == 0ul) {
- return;
- }
- }
- }
- }
-
- // TODO: Eltwise doesn't support assymetric quantization
- // TODO: make params per transformation
- // TODO: uncomment
- //if (params.supportAsymmetricQuantization) {
- // if (CNNNetworkHelper::onWeights(*layer) && (params.precisionsOnWeights.size() == 1)) {
- // const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*layer);
- // if (params.precisionsOnWeights.begin()->isSigned()) {
- // ASSERT_TRUE(quantizationDetails.hasNegativeOutput());
- // } else {
- // ASSERT_FALSE(quantizationDetails.hasNegativeOutput());
- // }
- // } else if ((!CNNNetworkHelper::onWeights(*layer)) && (params.precisionsOnActivations.size() == 1)) {
- // const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*layer);
- // if (params.precisionsOnActivations.begin()->isSigned()) {
- // ASSERT_TRUE(quantizationDetails.hasNegativeOutput());
- // } else {
- // ASSERT_FALSE(quantizationDetails.hasNegativeOutput());
- // }
- // }
- //}
-
- const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*layer);
- // TODO: temporary fix: not possible to get min/max value for I8 if level was changed
- if (((quantizationDetails.levels != 255) && (quantizationDetails.levels != 256)) ||
- (!layer->outData.empty() &&
- // not quantized
- ((layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP16) ||
- (layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)))) {
- return;
- }
-
- const DataPrecision dataPrecision = getDataPrecision(*layer, params);
- for (size_t i = 0; i < quantizationDetails.outputLowValues.size(); ++i) {
- const auto lowValue = quantizationDetails.outputLowValues[i];
- const auto highValue = quantizationDetails.outputHighValues[i];
-
- if (((
- (params.quantizedTensorAlignmentOnActivations == LayerTransformation::QuantizedTensorAlignment::None) ||
- (params.quantizedTensorAlignmentOnActivations == LayerTransformation::QuantizedTensorAlignment::UpdateLevel)) &&
- ((!equals(dataPrecision.min, lowValue)) && (!equals(dataPrecision.max, highValue)))
- ) ||
- ((params.quantizedTensorAlignmentOnActivations == LayerTransformation::QuantizedTensorAlignment::UpdateIntervals) &&
- ((!equals(dataPrecision.min, lowValue)) || (!equals(dataPrecision.max, highValue))))
- ) {
- ASSERT_TRUE(true) <<
- "Output interval [" << lowValue << " - " << highValue <<
- "] for layer " << layer->name << " is not correct, " <<
- "expected [" << dataPrecision.min << " - " << dataPrecision.max << "]";
-
- //// TODO: debug only
- //std::cout <<
- // "Output interval [" << lowValue << " - " << highValue <<
- // "] for layer " << layer->name << " is not correct, " <<
- // "expected [" << dataPrecision.min << " - " << dataPrecision.max << "]" << std::endl;
- }
-
-
- switch (params.quantizedTensorAlignmentOnActivations) {
- case LayerTransformation::QuantizedTensorAlignment::None: {
- if ((dataPrecision.precision == Precision::U8) || (dataPrecision.precision == Precision::I8)) {
- if ((quantizationDetails.levels != 255) && (quantizationDetails.levels != 256)) {
- ASSERT_TRUE(false) << "unexpected quantization levels " << quantizationDetails.levels <<
- " for layer " << layer->name;
- }
- } else {
- ASSERT_TRUE(false) << "layer '" << layer->type << "', name '" << layer->name << "' has unexpected precision" << dataPrecision.precision;
- }
-
- break;
- }
- case LayerTransformation::QuantizedTensorAlignment::UpdateIntervals: {
- if ((dataPrecision.precision == Precision::U8) || (dataPrecision.precision == Precision::I8)) {
- if ((quantizationDetails.levels != 255) && (quantizationDetails.levels != 256)) {
- ASSERT_TRUE(false) << "unexpected quantization levels " << quantizationDetails.levels <<
- " for layer " << layer->name;
- }
- } else {
- ASSERT_TRUE(false) << "layer '" << layer->type << "', name '" << layer->name << "' has unexpected precision" << dataPrecision.precision;
- }
-
- break;
- }
- case LayerTransformation::QuantizedTensorAlignment::UpdateLevel: {
- if ((dataPrecision.precision == Precision::U8) || (dataPrecision.precision == Precision::I8)) {
- if (quantizationDetails.levels > 256) {
- ASSERT_TRUE(false) << "layer '" << layer->type << "', name '" << layer->name << "' has unexpected quantization levels " << quantizationDetails.levels;
- }
-
- if (dataPrecision.precision == Precision::U8) {
- if (quantizationDetails.outputLowValues[0] != 0.0) {
- ASSERT_TRUE(false) << "unexpected output interval low value: " << quantizationDetails << " for layer " << layer->name;
- }
- if (quantizationDetails.levels != (quantizationDetails.outputHighValues[0] + 1)) {
- ASSERT_TRUE(false) << "unexpected quantization levels " << quantizationDetails.levels <<
- " for layer " << layer->name;
- }
- } else if (dataPrecision.precision == Precision::I8) {
- // FIXME: alignment on weights is temporary unsupported
- if (CNNNetworkHelper::onWeights(*layer)) {
- break;
- }
-
- if (quantizationDetails.levels != (fabs(quantizationDetails.outputLowValues[0]) + quantizationDetails.outputHighValues[0] + 1)) {
- ASSERT_TRUE(false) << "unexpected quantization levels " << quantizationDetails.levels << " for layer " << layer->name;
- }
- }
- } else {
- ASSERT_TRUE(false) << "layer '" << layer->type << "', name '" << layer->name << "' has unexpected precision" << dataPrecision.precision;
- }
- break;
- }
- default: {
- THROW_IE_EXCEPTION << "unsupported QuantizedTensorAlignment mode";
- }
- }
-
-
- if (multiBranch) {
- if (((dataPrecision.precision == Precision::I8) || (dataPrecision.precision == Precision::U8)) &&
- (quantizationDetails.levels > 256)) {
- ASSERT_TRUE(false) << "unexpected quantization levels " << quantizationDetails.levels;
- }
-
- // TODO: FQ before Eltwise uses another algorithm - fix it
- //if ((lowValue < (dataPrecision.min - 0.0001)) || (highValue > (dataPrecision.max + 0.0001))) {
- // ASSERT_TRUE(false) <<
- // "Output interval [" << lowValue << " - " << highValue << "] for layer " << layer->name <<
- // " is not included in [" << dataPrecision.min << " - " << dataPrecision.max << "]";
-
- // //// TODO: debug only
- // //std::cout <<
- // // "Output interval [" << lowValue << " - " << highValue << "] for layer " << layer->name <<
- // // " is not included in [" << dataPrecision.min << " - " << dataPrecision.max << "]" << std::endl;
- //}
- } else {
- if ((dataPrecision.precision == Precision::I8) || (dataPrecision.precision == Precision::U8)) {
- // FIXME: alignment on weights is temporary unsupported
- if (!CNNNetworkHelper::onWeights(*layer)) {
- if ((dataPrecision.precision == Precision::U8) &&
- ((!equals(dataPrecision.min, lowValue)) || (!equals(dataPrecision.max, highValue)))) {
- ASSERT_TRUE(false) <<
- "Output interval [" << lowValue << " - " << highValue <<
- "] for layer " << layer->name << " is not correct, " <<
- "expected [" << dataPrecision.min << " - " << dataPrecision.max << "]";
- }
- }
- } else {
- ASSERT_TRUE(false) << "layer '" << layer->type << "', name '" << layer->name << "' has unexpected precision" << dataPrecision.precision;
- }
- }
- }
-}
-
-bool LowPrecisionTransformationValidation::isFakeQuantizeBeforeEltwiseOnConvolutionBranch(const CNNLayer& fakeQuantize) {
- // TODO: were is check on Convolution branch?
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(fakeQuantize);
- if (children.size() == 1lu) {
- if (CaselessEq<std::string>()(children[0]->type, "Eltwise"))
- return true;
- if (CaselessEq<std::string>()(children[0]->type, "ScaleShift")) {
- const std::vector<CNNLayerPtr> children2 = CNNNetworkHelper::getChildren(*children[0]);
- return (children2.size() == 1lu) && (CaselessEq<std::string>()(children2[0]->type, "Eltwise"));
- }
- }
- return false;
-}
-
-bool LowPrecisionTransformationValidation::isFakeQuantizeBeforeConcat(const CNNLayer& fakeQuantize) {
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildrenRecursivelyExceptTypes(fakeQuantize, { "Pooling" });
- for (const CNNLayerPtr& child : children) {
- if (child->type == "Concat") {
- return true;
- }
- }
- return false;
-}
-
-bool inline LowPrecisionTransformationValidation::equals(const float value1, const float value2, const float max_diff) {
- return (std::fabs(value1 - value2) < max_diff);
-}
-
-void LowPrecisionTransformationValidation::validateEltwise(CNNNetwork& network, const LayerTransformation::Params& params, const CNNLayer& eltwise) {
- if (params.updatePrecisions) {
- // TODO: refactor: use used transformations to identify is Eltwise transformation or Eltwise CPU transformation used
- //const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParentsRecursivelyExceptTypes(eltwise, { "Pooling", "ScaleShift" });
- //if ((parents[0]->type == "FakeQuantize") && (parents[1]->type == "FakeQuantize")) {
- // const Precision precision0 = parents[0]->outData[0]->getPrecision();
- // const Precision precision1 = parents[1]->outData[0]->getPrecision();
- // if (
- // (((precision0 != Precision::I8) && (precision0 != Precision::U8)) ||
- // ((precision1 != Precision::FP32) && (precision1 != Precision::FP16))) &&
- // (((precision0 != Precision::FP32) && (precision0 != Precision::FP16)) ||
- // ((precision1 != Precision::I8) && (precision1 != Precision::U8)))
- // ) {
- // ASSERT_TRUE(false) << "layer precisions are not correct: " <<
- // parents[0]->name << ", " << parents[0]->precision << " and " <<
- // parents[1]->name << ", " << parents[1]->precision;
- // }
- //}
- }
-}
-
-void LowPrecisionTransformationValidation::validateAsymmetricPattern(
- const CNNNetwork& network,
- const LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers) {
- const std::vector<CNNLayerPtr> layers = InferenceEngine::details::CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- if (notTransformedLayers.find(layer->name) != notTransformedLayers.end()) {
- continue;
- }
- validateAsymmetricPattern(*layer, params);
- }
-}
-
-void LowPrecisionTransformationValidation::validateAsymmetricPattern(const CNNLayer& layer, const LayerTransformation::Params& params) {
- if (layer.type != "Convolution") {
- return;
- }
-
- if (params.supportAsymmetricQuantization && params.updatePrecisions) {
- CNNLayerPtr parentOnData = CNNNetworkHelper::getParent(layer, 0ul);
- if (parentOnData->type == "Eltwise") {
- validateAsymmetricPatternEltwise(*parentOnData, params);
- }
-
- CNNLayerPtr parentOnWeights = CNNNetworkHelper::getParent(layer, 1ul);
- if (parentOnWeights == nullptr) {
- THROW_IE_EXCEPTION << "weights layer is absent for " << layer.type << " " << layer.name;
- // std::cout << "weights layer is absent for " << layer.type << " " << layer.name << std::endl;
- // return;
- }
- if (parentOnWeights->type == "Eltwise") {
- validateAsymmetricPatternEltwise(*parentOnWeights, params);
- }
- }
-}
-
-void LowPrecisionTransformationValidation::validateAsymmetricPatternEltwise(const CNNLayer& eltwise, const LayerTransformation::Params& params) {
- if ((!eltwise.CheckParamPresence("operation")) || (eltwise.GetParamAsString("operation") != "sub")) {
- return;
- }
-
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(eltwise);
- for (const CNNLayerPtr& parent : parents) {
- if (parent->type == "Input") {
- return;
- }
- }
-
- // TODO: hardcoded for CPU
- const Precision precision = CNNNetworkHelper::onWeights(eltwise) ? Precision::I8 : Precision::U8;
- for (const CNNLayerPtr& parent : parents) {
- if (parent->type == "Const") {
- validateEmptyConst(*parent, params);
- }
-
- ASSERT_EQ(1, parent->outData.size());
- ASSERT_EQ(precision, parent->outData[0]->getPrecision()) <<
- "layer " << parent->type << " '" << parent->name <<
- "' has unexpected precision " << parent->outData[0]->getPrecision() <<
- ", expected: " << precision;
- }
-}
-
-void LowPrecisionTransformationValidation::validateEmptyConst(const CNNLayer& layer, const LayerTransformation::Params& params) {
- if (layer.type == "Const") {
- const Precision precision = layer.outData[0]->getTensorDesc().getPrecision();
- if (params.updatePrecisions) {
- // TODO: get correct precision here
- ASSERT_TRUE((precision == Precision::U8) || (precision == Precision::I8));
- } else {
- ASSERT_TRUE((precision == Precision::FP32) || (precision == Precision::FP16));
- }
-
- const auto it = layer.blobs.find("custom");
- ASSERT_NE(layer.blobs.end(), it);
- const Blob::Ptr blob = it->second;
- std::shared_ptr<float> buffer = CNNNetworkHelper::getFloatData(blob);
- ASSERT_TRUE(std::any_of(buffer.get(), buffer.get() + blob->size(), [](const float value) { return value != 0.0; })) <<
- layer.type << " layer '" << layer.name << "' has " << blob->getTensorDesc().getPrecision() << " zero values blob";
- }
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-#include <vector>
-#include <memory>
-#include <unordered_map>
-#include <unordered_set>
-
-#include <legacy/details/ie_cnn_network_tools.h>
-#include <caseless.hpp>
-#include "low_precision_transformations/network_helper.hpp"
-#include "low_precision_transformations/layer_transformation.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-class LowPrecisionChainValidation {
-public:
- class Chain : public std::unordered_set<std::string> {
- public:
- Chain(const Precision precision) : precision(precision) {}
- const Precision precision;
- bool exist(const std::vector<std::string> layerNames) {
- for (const std::string& layerName : layerNames) {
- if (find(layerName) == end()) {
- return false;
- }
- }
- return true;
- }
- };
-
- using ChainsVector = std::vector<std::shared_ptr<Chain>>;
-
- static ChainsVector validate(
- const CNNNetwork& network,
- const CNNLayerPtr layer,
- const CNNLayerPtr endLayer) {
- std::unordered_map<std::string, Precision> precisionByPort;
- analyse(network, precisionByPort);
-
- std::unordered_map<std::string, std::shared_ptr<InternalChain>> handledLayers;
-
- InternalChainsMap chains;
- const std::shared_ptr<InternalChain> chain = std::make_shared<InternalChain>(handledLayers.size(), layer->outData[0]->getTensorDesc().getPrecision());
- chains.emplace(chain->id, chain);
-
- std::unordered_map<size_t, std::unordered_set<size_t>> hasToBeMerged;
-
- validate(
- layer,
- endLayer,
- precisionByPort,
- handledLayers,
- chains,
- chains[0],
- layer->outData[0]->getTensorDesc().getPrecision(),
- hasToBeMerged);
-
- auto it = hasToBeMerged.begin();
- while (it != hasToBeMerged.end()) {
- const size_t destinationChainId = it->first;
- const auto destinationChainIt = chains.find(destinationChainId);
- if (destinationChainIt == chains.end()) {
- THROW_IE_EXCEPTION << "chain with id was not found " << destinationChainId;
- }
-
- const std::shared_ptr<InternalChain> destinationChain = destinationChainIt->second;
-
- for (auto const sourceChainId : it->second) {
- const auto sourceChainIt = chains.find(sourceChainId);
- if (sourceChainIt == chains.end()) {
- THROW_IE_EXCEPTION << "chain with id was not found " << sourceChainId;
- }
-
- std::shared_ptr<InternalChain> sourceChain = sourceChainIt->second;
- for (auto sourceIt = sourceChain->begin(); sourceIt != sourceChain->end(); ++sourceIt) {
- destinationChain->emplace(*sourceIt);
- }
-
- chains.erase(sourceChainIt);
- }
-
- hasToBeMerged.erase(it);
- it = hasToBeMerged.begin();
- }
-
- ChainsVector resultChains;
- for (auto internalChainIt : chains) {
- auto internalChain = internalChainIt.second;
- std::shared_ptr<Chain> chain = std::make_shared<Chain>(internalChain->precision);
- resultChains.push_back(chain);
- for (auto layerNameIt = internalChain->begin(); layerNameIt != internalChain->end(); ++layerNameIt) {
- chain->insert(*layerNameIt);
- }
- }
- return resultChains;
- }
-
-private:
- class InternalChain : public std::unordered_set<std::string> {
- public:
- InternalChain(const size_t id, const Precision precision) : id(id), precision(precision) {}
- const size_t id;
- const Precision precision;
- };
-
- using InternalChainsMap = std::map<size_t, std::shared_ptr<InternalChain>>;
-
- static void validate(
- const CNNLayerPtr layer,
- const CNNLayerPtr endLayer,
- const std::unordered_map<std::string, Precision>& precisionByPort,
- std::unordered_map<std::string, std::shared_ptr<InternalChain>>& handledLayers,
- InternalChainsMap& chains,
- std::shared_ptr<InternalChain> chain,
- const Precision chainPrecision,
- std::unordered_map<std::size_t, std::unordered_set<size_t>>& hasToBeMerged) {
- const auto handledLayerIt = handledLayers.find(layer->name);
- if (handledLayerIt != handledLayers.end())
- {
- if (chain->precision == handledLayerIt->second->precision) {
- const auto it = hasToBeMerged.find(handledLayerIt->second->id);
- std::unordered_set<size_t>& fused = it == hasToBeMerged.end() ?
- hasToBeMerged.emplace(handledLayerIt->second->id, std::unordered_set<size_t>()).first->second :
- it->second;
- fused.insert(chain->id);
- }
- return;
- }
-
- handledLayers.emplace(layer->name, chain);
-
- chain->insert(layer->name);
-
- if ((endLayer != nullptr) && (layer->name == endLayer->name)) {
- return;
- }
-
- for (size_t outDataIndex = 0; outDataIndex < layer->outData.size(); ++outDataIndex) {
- DataPtr outData = layer->outData[outDataIndex];
- const std::map<std::string, CNNLayerPtr> inputTo = getInputTo(outData);
- const Precision parentOutPrecision = getDataPrecision(precisionByPort, *layer, outDataIndex);
-
- for (auto it = inputTo.begin(); it != inputTo.end(); it++) {
- const CNNLayerPtr child = it->second;
-
- for (size_t childOutDataIndex = 0ul; childOutDataIndex < child->outData.size(); ++childOutDataIndex) {
- const Precision childOutPrecision = getDataPrecision(precisionByPort, *child, childOutDataIndex);
- if (parentOutPrecision == childOutPrecision) {
- validate(child, endLayer, precisionByPort, handledLayers, chains, chain, chainPrecision, hasToBeMerged);
- } else {
- std::shared_ptr<InternalChain> childChain = std::make_shared<InternalChain>(handledLayers.size(), childOutPrecision);
- chains.emplace(childChain->id, childChain);
- validate(child, endLayer, precisionByPort, handledLayers, chains, childChain, childOutPrecision, hasToBeMerged);
- }
- }
- }
- }
- }
-
- static void analyse(const CNNNetwork& network, std::unordered_map<std::string, Precision>& precisionByPort) {
- std::unordered_set<std::string> handledLayers;
-
- const std::vector<CNNLayerPtr> layers = CNNNetSortTopologically(network);
- for (const CNNLayerPtr layer : layers) {
- if (handledLayers.find(layer->name) != handledLayers.end()) {
- continue;
- }
-
- if (analyseAsymmetricQuantizationPattern(*layer, precisionByPort, handledLayers) != Precision::UNSPECIFIED) {
- continue;
- }
-
- if (analyseSymmetricQuantizationPattern(*layer, precisionByPort, handledLayers) != Precision::UNSPECIFIED) {
- continue;
- }
-
- fillPrecisionByPort(*layer, Precision::UNSPECIFIED, precisionByPort);
- handledLayers.emplace(layer->name);
- }
- }
-
- static void fillPrecisionByPort(
- const CNNLayer& layer,
- const Precision precision,
- std::unordered_map<std::string, Precision>& precisionByPort) {
- for (size_t outDataIndex = 0; outDataIndex < layer.outData.size(); ++outDataIndex) {
- DataPtr outData = layer.outData[outDataIndex];
- const std::string outDataId = getDataId(layer, outDataIndex);
- if (precisionByPort.find(outDataId) != precisionByPort.end()) {
- continue;
- }
-
- precisionByPort.emplace(outDataId, precision == Precision::UNSPECIFIED ? outData->getTensorDesc().getPrecision() : precision);
- }
- }
-
- static std::string getDataId(const CNNLayer& layer, const size_t dataIndex) {
- return layer.name + ".outputPort" + std::to_string(dataIndex);
- }
-
- static Precision getDataPrecision(const std::unordered_map<std::string, Precision>& precisionByPort, const CNNLayer& layer, const size_t dataIndex) {
- const auto precisionIt = precisionByPort.find(getDataId(layer, dataIndex));
- if (precisionIt == precisionByPort.end()) {
- THROW_IE_EXCEPTION <<
- "Precision for data '" << getDataId(layer, dataIndex) <<
- "' was not found for layer " << layer.type << " " << layer.name;
- }
- return precisionIt->second;
- }
-
- static Precision analyseAsymmetricQuantizationPattern(
- const CNNLayer& layer,
- std::unordered_map<std::string, Precision>& precisionByPort,
- std::unordered_set<std::string>& handledLayers) {
- if (!CaselessEq<std::string>()(layer.type, "Eltwise")) {
- return Precision::UNSPECIFIED;
- }
-
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(layer);
- if ((parents.size() != 2ul) ||
- (!CaselessEq<std::string>()(parents[0]->type, "FakeQuantize")) ||
- (!CaselessEq<std::string>()(parents[1]->type, "Const")) ||
- CNNNetworkHelper::getParents(*parents[1]).size() != 0) {
- return Precision::UNSPECIFIED;
- }
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
- if ((children.size() != 1ul) || (!CaselessEq<std::string>()(children[0]->type, "Convolution"))) {
- return Precision::UNSPECIFIED;
- }
-
- const std::vector<CNNLayerPtr> convolutionChildren = CNNNetworkHelper::getChildren(*children[0]);
- if ((convolutionChildren.size() != 1ul) || (!CaselessEq<std::string>()(convolutionChildren[0]->type, "FakeQuantize"))) {
- return Precision::UNSPECIFIED;
- }
-
- const Precision precisionBefore = CNNNetworkHelper::getPrecisionParent(layer);
- const Precision precisionAfterFakeQuantize = convolutionChildren[0]->outData[0]->getTensorDesc().getPrecision();
- const Precision precision = (precisionBefore == precisionAfterFakeQuantize) ? precisionAfterFakeQuantize : layer.outData[0]->getTensorDesc().getPrecision();
-
- fillPrecisionByPort(layer, precision, precisionByPort);
- handledLayers.emplace(layer.name);
- handledLayers.emplace(children[0]->name);
-
- return precision;
- }
-
- static Precision analyseSymmetricQuantizationPattern(
- const CNNLayer& layer,
- std::unordered_map<std::string, Precision>& precisionByPort,
- std::unordered_set<std::string>& handledLayers) {
- if ((!CaselessEq<std::string>()(layer.type, "Convolution")) &&
- (!CaselessEq<std::string>()(layer.type, "FullyConnected")) &&
- (!CaselessEq<std::string>()(layer.type, "GEMM"))) {
- return Precision::UNSPECIFIED;
- }
-
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
- if ((children.size() != 1ul) || (!CaselessEq<std::string>()(children[0]->type, "FakeQuantize"))) {
- return Precision::UNSPECIFIED;
- }
-
- const Precision precisionBefore = CNNNetworkHelper::getPrecisionParent(layer, 0ul);
- const Precision precisionAfterFakeQuantize = children[0]->outData[0]->getTensorDesc().getPrecision();
- const Precision precision = (precisionBefore == precisionAfterFakeQuantize) ? precisionAfterFakeQuantize : layer.outData[0]->getTensorDesc().getPrecision();
-
- // TODO: convolution weights and biases layers are skipped
- fillPrecisionByPort(layer, precision, precisionByPort);
- handledLayers.emplace(layer.name);
-
- return precision;
- }
-};
-
-class LowPrecisionTransformationValidation {
-public:
- static void validate(
- InferenceEngine::CNNNetwork& network,
- // TODO: not correct, quantization parameters are defined per transformation
- const InferenceEngine::details::LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers = {},
- const std::vector<std::pair<std::string, std::string>>& originalLayersInfo = {});
-
- static std::vector<std::pair<std::string, std::string>> getLayers(const InferenceEngine::CNNNetwork& network);
-
- static void validateIntervalsAndLevel(
- const InferenceEngine::CNNNetwork& network,
- const InferenceEngine::details::LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers);
-
- static void validateWeightsToConst(
- const InferenceEngine::CNNNetwork& network,
- const InferenceEngine::details::LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers);
-
- // TODO: refactor (I8/U8 is used)
- static void validatePrecision(
- const InferenceEngine::CNNNetwork& network,
- const InferenceEngine::details::LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers);
-
- static void validateActivations(
- const InferenceEngine::CNNNetwork& network,
- const InferenceEngine::details::LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers);
-
- static void validateScaleShifts(
- const InferenceEngine::CNNNetwork& network,
- const InferenceEngine::details::LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers);
-
- static void validateConvolutions(
- const InferenceEngine::CNNNetwork& network,
- const InferenceEngine::details::LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers);
-
- static void validateWithReference(
- InferenceEngine::CNNNetwork& network,
- const std::vector<std::pair<std::string, std::string>>& originalLayersInfo);
-
- static void validateCustomLayerHandling(
- const InferenceEngine::CNNNetwork& network,
- const std::unordered_set<std::string>& notTransformedLayers);
-
-private:
- static InferenceEngine::details::DataPrecision getDataPrecision(
- const InferenceEngine::CNNLayer& layer,
- const InferenceEngine::details::LayerTransformation::Params& params);
-
- // TODO: quantizedTensorAlignmentOnActivations is used
- static void validateFakeQuantize(
- const InferenceEngine::CNNLayerPtr& layer,
- const InferenceEngine::details::LayerTransformation::Params& params,
- const bool multiBranch);
-
- static bool isFakeQuantizeBeforeEltwiseOnConvolutionBranch(const InferenceEngine::CNNLayer& fakeQuantize);
-
- static bool isFakeQuantizeBeforeConcat(const InferenceEngine::CNNLayer& fakeQuantize);
-
- static inline bool equals(const float value1, const float value2, const float max_diff = 0.0001f);
-
- static void validateEltwise(
- InferenceEngine::CNNNetwork& network,
- const InferenceEngine::details::LayerTransformation::Params& params,
- const InferenceEngine::CNNLayer& eltwise);
-
- static void validateAsymmetricPattern(
- const InferenceEngine::CNNNetwork& network,
- const InferenceEngine::details::LayerTransformation::Params& params,
- const std::unordered_set<std::string>& notTransformedLayers);
-
- static void validateAsymmetricPattern(const InferenceEngine::CNNLayer& layer, const InferenceEngine::details::LayerTransformation::Params& params);
-
- static void validateAsymmetricPatternEltwise(const InferenceEngine::CNNLayer& eltwise, const InferenceEngine::details::LayerTransformation::Params& params);
-
- static void validateEmptyConst(const InferenceEngine::CNNLayer& layer, const InferenceEngine::details::LayerTransformation::Params& params);
-};
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ConcatMultiBranchTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- std::string layers = layersTemplate;
- // TODO: hard-coded values
-
- size_t totalOffset = 0;
-
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_LOW_OFFSET_1", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_HIGHT_OFFSET_1", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_LOW_OFFSET_1", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_HIGH_OFFSET_1", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_LOW_OFFSET_2", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_HIGHT_OFFSET_2", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_LOW_OFFSET_2", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_HIGH_OFFSET_2", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_LOW_OFFSET_3", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_HIGHT_OFFSET_3", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_LOW_OFFSET_3", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_HIGH_OFFSET_3", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_OFFSET", totalOffset);
- totalOffset += 6 * 6 * 3 * 3 * 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_SIZE", 6 * 6 * 3 * 3 * 4);
-
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_HIGHT_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_OUTPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_OUTPUT_HIGH_OFFSET", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "BIASES_CONST_OFFSET", totalOffset);
- totalOffset += 6 * 4;
- REPLACE_WITH_NUM(layers, "BIASES_CONST_SIZE", 6 * 4);
-
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_LOW_OFFSET_4", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_HIGHT_OFFSET_4", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_LOW_OFFSET_4", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_HIGH_OFFSET_4", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "DEQUANTIZE_SCALESHIFT_WEIGHTS_OFFSET", totalOffset);
- totalOffset += 24;
- REPLACE_WITH_NUM(layers, "DEQUANTIZE_SCALESHIFT_BIASES_OFFSET", totalOffset);
- totalOffset += 24;
-
- REPLACE_WITH_STR(layers, "_PR_", p._network_precision);
-
- const std::string model = IRTemplateGenerator::getIRTemplate(
- "TransformationsTest",
- { { 1lu, 3, 299, 299 }, { 1lu, 3, 299, 299 } },
- p._network_precision,
- layers,
- edgesTemplate,
- 6);
-
- return model;
-}
-
-std::string ConcatMultiBranchTestModel::getName() const {
- return "ConcatMultiBranchTestModel";
-}
-
-bool ConcatMultiBranchTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
- return true;
-}
-
-void ConcatMultiBranchTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "branch1/dataConstInputLow1"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch1/dataConstInputHigh1"), 255.0 / 100.0, "custom");
- fillData(getLayer(network, "branch1/dataConstOutputLow1"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch1/dataConstOutputHigh1"), 255.0 / 100.0, "custom");
-
- fillData(getLayer(network, "branch1/dataConstInputLow2"), 255.0 / 400.0, "custom");
- fillData(getLayer(network, "branch1/dataConstInputHigh2"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch1/dataConstOutputLow2"), 255.0 / 400.0, "custom");
- fillData(getLayer(network, "branch1/dataConstOutputHigh2"), 255.0 / 200.0, "custom");
-
- fillData(getLayer(network, "branch2/dataConstInputLow3"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch2/dataConstInputHigh3"), 255.0 / 100.0, "custom");
- fillData(getLayer(network, "branch2/dataConstOutputLow3"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch2/dataConstOutputHigh3"), 255.0 / 100.0, "custom");
-
- fillData(getLayer(network, "branch2/weightsConstInput"), 0.0, "custom");
- fillData(getLayer(network, "branch2/weightsConstInputLow"), 0.0, "custom");
- fillData(getLayer(network, "branch2/weightsConstInputHigh"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch2/weightsConstOutputLow"), 0.0, "custom");
- fillData(getLayer(network, "branch2/weightsConstOutputHigh"), 255.0 / 200.0, "custom");
-
- fillData(getLayer(network, "branch2/biasesConst"), { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 });
-
- fillData(getLayer(network, "branch2/dataConstInputLow4"), 255.0 / 800.0, "custom");
- fillData(getLayer(network, "branch2/dataConstInputHigh4"), 255.0 / 400.0, "custom");
- fillData(getLayer(network, "branch2/dataConstOutputLow4"), 255.0 / 800.0, "custom");
- fillData(getLayer(network, "branch2/dataConstOutputHigh4"), 255.0 / 400.0, "custom");
-}
-
-const std::string ConcatMultiBranchTestModel::layersTemplate = R"V0G0N(
-<layer name="branch1/dataConstInputLow1" type="Const" precision="_PR_" id="102">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_LOW_OFFSET_1" size="4"/>
- </blobs>
-</layer>
-<layer name="branch1/dataConstInputHigh1" type="Const" precision="_PR_" id="103">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_HIGHT_OFFSET_1" size="4"/>
- </blobs>
-</layer>
-
-<layer name="branch1/dataConstOutputLow1" type="Const" precision="_PR_" id="104">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_LOW_OFFSET_1" size="4"/>
- </blobs>
-</layer>
-<layer name="branch1/dataConstOutputHigh1" type="Const" precision="_PR_" id="105">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_HIGH_OFFSET_1" size="4"/>
- </blobs>
-</layer>
-
-<layer name="branch1/dataFakeQuantize1" type="FakeQuantize" precision="_PR_" id="106">
- <data levels="256" />
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- </port>
- <port id="2">
- <dim>1</dim>
- </port>
- <port id="3">
- <dim>1</dim>
- </port>
- <port id="4">
- <dim>1</dim>
- </port>
- </input>
- <output>
- <port id="5">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
-</layer>
-
-<layer name="branch1/dataConstInputLow2" type="Const" precision="_PR_" id="107">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_LOW_OFFSET_2" size="4"/>
- </blobs>
-</layer>
-<layer name="branch1/dataConstInputHigh2" type="Const" precision="_PR_" id="108">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_HIGHT_OFFSET_2" size="4"/>
- </blobs>
-</layer>
-
-<layer name="branch1/dataConstOutputLow2" type="Const" precision="_PR_" id="109">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_LOW_OFFSET_2" size="4"/>
- </blobs>
-</layer>
-<layer name="branch1/dataConstOutputHigh2" type="Const" precision="_PR_" id="110">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_HIGH_OFFSET_2" size="4"/>
- </blobs>
-</layer>
-
-
-<layer name="branch1/dataFakeQuantize2" type="FakeQuantize" precision="_PR_" id="111">
- <data levels="256" />
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- </port>
- <port id="2">
- <dim>1</dim>
- </port>
- <port id="3">
- <dim>1</dim>
- </port>
- <port id="4">
- <dim>1</dim>
- </port>
- </input>
- <output>
- <port id="5">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
-</layer>
-
-<layer name="branch1/concat" type="Concat" precision="_PR_" id="113">
- <data axis="1" />
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
-
- </input>
- <output>
- <port id="2">
- <dim>1</dim>
- <dim>6</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
-</layer>
-
-<layer name="branch2/dataConstInputLow3" type="Const" precision="_PR_" id="207">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_LOW_OFFSET_3" size="4"/>
- </blobs>
-</layer>
-<layer name="branch2/dataConstInputHigh3" type="Const" precision="_PR_" id="208">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_HIGHT_OFFSET_3" size="4"/>
- </blobs>
-</layer>
-
-<layer name="branch2/dataConstOutputLow3" type="Const" precision="_PR_" id="209">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_LOW_OFFSET_3" size="4"/>
- </blobs>
-</layer>
-<layer name="branch2/dataConstOutputHigh3" type="Const" precision="_PR_" id="210">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_HIGH_OFFSET_3" size="4"/>
- </blobs>
-</layer>
-
-
-<layer name="branch2/dataFakeQuantize3" type="FakeQuantize" precision="_PR_" id="211">
- <data levels="256" />
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>6</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- </port>
- <port id="2">
- <dim>1</dim>
- </port>
- <port id="3">
- <dim>1</dim>
- </port>
- <port id="4">
- <dim>1</dim>
- </port>
- </input>
- <output>
- <port id="5">
- <dim>1</dim>
- <dim>6</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
-</layer>
-
-
-<layer name="branch2/weightsConstInput" type="Const" precision="_PR_" id="212">
- <output>
- <port id="0">
- <dim>6</dim>
- <dim>6</dim>
- <dim>3</dim>
- <dim>3</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_OFFSET" size="WEIGHTS_CONST_INPUT_SIZE"/>
- </blobs>
-</layer>
-<layer name="branch2/weightsConstInputLow" type="Const" precision="_PR_" id="213">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_LOW_OFFSET" size="4"/>
- </blobs>
-</layer>
-<layer name="branch2/weightsConstInputHigh" type="Const" precision="_PR_" id="214">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_HIGHT_OFFSET" size="4"/>
- </blobs>
-</layer>
-
-<layer name="branch2/weightsConstOutputLow" type="Const" precision="_PR_" id="215">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_OUTPUT_LOW_OFFSET" size="4"/>
- </blobs>
-</layer>
-<layer name="branch2/weightsConstOutputHigh" type="Const" precision="_PR_" id="216">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_OUTPUT_HIGH_OFFSET" size="4"/>
- </blobs>
-</layer>
-
-
-<layer name="branch2/weightsFakeQuantize" type="FakeQuantize" precision="_PR_" id="218">
- <data levels="256" />
- <input>
- <port id="0">
- <dim>6</dim>
- <dim>6</dim>
- <dim>3</dim>
- <dim>3</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- </port>
- <port id="2">
- <dim>1</dim>
- </port>
- <port id="3">
- <dim>1</dim>
- </port>
- <port id="4">
- <dim>1</dim>
- </port>
- </input>
- <output>
- <port id="5">
- <dim>6</dim>
- <dim>6</dim>
- <dim>3</dim>
- <dim>3</dim>
- </port>
- </output>
-</layer>
-
-<layer name="branch2/biasesConst" type="Const" precision="_PR_" id="219">
- <output>
- <port id="0">
- <dim>6</dim>
- </port>
- </output>
- <blobs>
- <custom offset="BIASES_CONST_OFFSET" size="BIASES_CONST_SIZE"/>
- </blobs>
-</layer>
-
-
-<layer name="branch2/convolution" precision="_PR_" type="Convolution" id="220">
- <data auto_pad="valid" dilations="1,1" group="1" kernel="3,3" output="6" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>6</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>6</dim>
- <dim>6</dim>
- <dim>3</dim>
- <dim>3</dim>
- </port>
- <port id="2">
- <dim>6</dim>
- </port>
- </input>
- <output>
- <port id="3">
- <dim>1</dim>
- <dim>6</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
- </layer>
-
-<layer name="branch2/dataConstInputLow4" type="Const" precision="_PR_" id="222">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_LOW_OFFSET_4" size="4"/>
- </blobs>
-</layer>
-<layer name="branch2/dataConstInputHigh4" type="Const" precision="_PR_" id="223">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_HIGHT_OFFSET_4" size="4"/>
- </blobs>
-</layer>
-
-<layer name="branch2/dataConstOutputLow4" type="Const" precision="_PR_" id="224">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_LOW_OFFSET_4" size="4"/>
- </blobs>
-</layer>
-<layer name="branch2/dataConstOutputHigh4" type="Const" precision="_PR_" id="225">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_HIGH_OFFSET_4" size="4"/>
- </blobs>
-</layer>
-
-<layer name="branch2/dataFakeQuantize4" type="FakeQuantize" precision="_PR_" id="226">
- <data levels="256" />
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>6</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- </port>
- <port id="2">
- <dim>1</dim>
- </port>
- <port id="3">
- <dim>1</dim>
- </port>
- <port id="4">
- <dim>1</dim>
- </port>
- </input>
- <output>
- <port id="5">
- <dim>1</dim>
- <dim>6</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
-</layer>
-
-<layer name="branch2/concat" type="Concat" precision="_PR_" id="227">
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>6</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- <dim>6</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
-
- </input>
- <output>
- <port id="2">
- <dim>1</dim>
- <dim>12</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
-</layer>
-
-
-<layer name="outputPower" type="Power" precision="_PR_" id="300">
- <power_data power="1" scale="1" shift="0"/>
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>12</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </input>
- <output>
- <port id="1">
- <dim>1</dim>
- <dim>12</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
-</layer>
-
-)V0G0N";
\ No newline at end of file
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "low_precision_transformations/eltwise.hpp"
-#include "low_precision_transformations/concat_multi_channels.hpp"
-
-std::string ConcatMultiChannelTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
-// ASSERT_EQ(2, p.inputDimensions.size());
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- const size_t axis = 1; // should be passed in 'p' argument
-
- std::vector<size_t> concat_out_dims = p.inputDimensions[0];
- concat_out_dims[axis] += p.inputDimensions[1][axis];
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> concat_params = {
- {"axis", "1"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "10,10"}, {"1,1", "11,16"}, // Inputs to FakeQuantize
- {"2,2", "10,11"}, {"3,3", "10,12"}, {"4,4", "10,13"}, {"5,5", "10,14"}, // Const layers
- {"6,6", "11,17"}, {"7,7", "11,18"}, {"8,8", "11,19"}, {"9,9", "11,20"}, // Const layers
- {"10,15", "12,22"}, {"11,21", "12,23"} // FakeQuantize to Concat
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Concat_transformations_", p.inputDimensions[0], p._network_precision)
- .addInputLayer(p._network_precision, p.inputDimensions[1])
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[1], {1}, {1}, {1}, {1}}, {{p.inputDimensions[1]}}})
- .addLayer("Concat", p._network_precision, &concat_params, { {p.inputDimensions[0], p.inputDimensions[1]}, { concat_out_dims }})
- .finish(&edges);
-}
-
-std::string ConcatMultiChannelTestModel::getName() const {
- return "ConcatMultiChannelTestModel";
-}
-
-bool ConcatMultiChannelTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
- addBranchSpecific<ConcatMultiChannelsTransformation>(params, "Concat")
- );
- transformer.transform(network);
- return true;
-}
-
-void ConcatMultiChannelTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 0.0, "custom");
- fillData(getLayer(network, "Const3"), 255.0 / 10.0, "custom");
- fillData(getLayer(network, "Const4"), 0.0, "custom");
- fillData(getLayer(network, "Const5"), 255.0 / 10.0, "custom");
-
- fillData(getLayer(network, "Const6"), -255.0 / 400.0, "custom");
- fillData(getLayer(network, "Const7"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "Const8"), -255.0 / 400.0, "custom");
- fillData(getLayer(network, "Const9"), 255.0 / 200.0, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "low_precision_transformations/concat.hpp"
-#include "low_precision_transformations/eltwise.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-ConcatTestModel::ConcatTestModel(
- const bool signedIntervals,
- const bool symmetricInterval,
- const bool multiChannel,
- const std::vector<size_t>& constInputDimentions) :
- signedIntervals(signedIntervals),
- symmetricInterval(symmetricInterval),
- multiChannel(multiChannel),
- constInputDimentions(constInputDimentions) {}
-
-std::string ConcatTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
-// ASSERT_EQ(2, p.inputDimensions.size());
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- const size_t axis = 1; // should be passed in 'p' argument
-
- std::vector<size_t> concat_out_dims = p.inputDimensions[0];
- concat_out_dims[axis] += p.inputDimensions[1][axis];
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {{"levels", "256"}};
- std::map<std::string, std::string> concat_params = {{"axis", "1"}};
- std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "10,10"}, {"1,1", "11,16"}, // Inputs to FakeQuantize
- {"2,2", "10,11"}, {"3,3", "10,12"}, {"4,4", "10,13"}, {"5,5", "10,14"}, // Const layers
- {"6,6", "11,17"}, {"7,7", "11,18"}, {"8,8", "11,19"}, {"9,9", "11,20"}, // Const layers
- {"10,15", "12,22"}, {"11,21", "12,23"} // FakeQuantize to Concat
- };
-
- size_t constSize = std::accumulate(constInputDimentions.begin(), constInputDimentions.end(), 1lu, std::multiplies<size_t>());
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Concat_transformations_", p.inputDimensions[0], p._network_precision)
- .addInputLayer(p._network_precision, p.inputDimensions[1])
- .addLayer("Const", p._network_precision, &const_params, {{}, {constInputDimentions}}, type_size*constSize, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {constInputDimentions}}, type_size*constSize, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {constInputDimentions}}, type_size*constSize, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {constInputDimentions}}, type_size*constSize, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {constInputDimentions}}, type_size*constSize, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {constInputDimentions}}, type_size*constSize, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {constInputDimentions}}, type_size*constSize, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {constInputDimentions}}, type_size*constSize, 0)
- .addLayer(
- "FakeQuantize",
- p._network_precision,
- &fake_quantize_params,
- {{p.inputDimensions[0], constInputDimentions, constInputDimentions, constInputDimentions, constInputDimentions}, {{p.inputDimensions[0]}}},
- "fakeQuantize1")
- .addLayer(
- "FakeQuantize",
- p._network_precision,
- &fake_quantize_params,
- {{p.inputDimensions[1], constInputDimentions, constInputDimentions, constInputDimentions, constInputDimentions}, {{p.inputDimensions[1]}}},
- "fakeQuantize2")
- .addLayer("Concat", p._network_precision, &concat_params, { {p.inputDimensions[0], p.inputDimensions[1]}, { concat_out_dims }}, "concat")
- .finish(&edges);
-}
-
-std::string ConcatTestModel::getName() const {
- return std::string("ConcatTestModel") +
- (signedIntervals ? "_Signed" : "_Unsigned") +
- (symmetricInterval ? "_Symmetric" : "_Asymmetric") +
- (multiChannel ? "_MultiChannel" : "_OneChannel") +
- (constInputDimentions.size() == 1ul ? "" : ("_const" + std::to_string(constInputDimentions.size()) + "D"));
-}
-
-bool ConcatTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- // TODO: remove when updatePrecisions is configurable
- params.updatePrecisions = true;
-
- LowPrecisionTransformations transformations = getLowPrecisionTransformations(params);
-
- if (!multiChannel) {
- // avoid ConcatMultiChannelsTransformation
- transformations = transformations.
- removeBranchSpecificTransformations("Concat").
- addBranchSpecific<ConcatTransformation>(params, "Concat");
- }
-
- LowPrecisionTransformer transformer(transformations);
- transformer.transform(network);
-
- const CNNLayerPtr concatLayer = CommonTestUtils::getLayerByName(network, "concat");
- if (concatLayer == nullptr) {
- THROW_IE_EXCEPTION << "concat layer was not found";
- }
-
- const std::vector<size_t> dims = concatLayer->outData[0]->getDims();
- if (dims.size() == 4ul) {
- const CNNLayerPtr fakeQuantizeLayer1 = CommonTestUtils::getLayerByName(network, "fakeQuantize1");
- QuantizeLayer* fakeQuantize1 = dynamic_cast<QuantizeLayer*>(fakeQuantizeLayer1.get());
- if (fakeQuantize1 == nullptr) {
- THROW_IE_EXCEPTION << "incorrect type for layer " << fakeQuantizeLayer1->name;
- }
- if (fakeQuantize1->levels == 0) {
- //
- }
-
- const CNNLayerPtr fakeQuantizeLayer2 = CommonTestUtils::getLayerByName(network, "fakeQuantize2");
- QuantizeLayer* fakeQuantize2 = dynamic_cast<QuantizeLayer*>(fakeQuantizeLayer2.get());
- if (fakeQuantize2 == nullptr) {
- THROW_IE_EXCEPTION << "incorrect type for layer " << fakeQuantizeLayer2->name;
- }
- if (fakeQuantize2->levels == 0) {
- //
- }
- } else if (dims.size() == 2ul) {
- if (getInputTo(concatLayer->outData[0]).size() != 0ul) {
- THROW_IE_EXCEPTION << "2D is not supported";
- }
- }
- return true;
-}
-
-void ConcatTestModel::resetTransformation(CNNNetwork& network) const {
- const float intervalsCoefficient = 0.5f;
- if (signedIntervals) {
- const float symmetricCoefficient = symmetricInterval ? 1.f : 0.5f;
- fillData(getLayer(network, "Const2"), (-128.f / 20.0) * symmetricCoefficient * intervalsCoefficient, "custom");
- fillData(getLayer(network, "Const3"), (127.f / 20.0) * symmetricCoefficient * intervalsCoefficient, "custom");
- fillData(getLayer(network, "Const4"), (-128.f / 20.0) * symmetricCoefficient * intervalsCoefficient, "custom");
- fillData(getLayer(network, "Const5"), (127.f / 20.0) * symmetricCoefficient * intervalsCoefficient, "custom");
-
- fillData(getLayer(network, "Const6"), (-128.f / 20.0) * symmetricCoefficient, "custom");
- fillData(getLayer(network, "Const7"), 127.f / 20.0, "custom");
- fillData(getLayer(network, "Const8"), (-128.f / 20.0) * symmetricCoefficient, "custom");
- fillData(getLayer(network, "Const9"), 127.f / 20.0, "custom");
-
- } else {
- const float shift = symmetricInterval ? 0.f : (255.f / 20.0) / 4.f;
- fillData(getLayer(network, "Const2"), (0.0 + shift) * intervalsCoefficient, "custom");
- fillData(getLayer(network, "Const3"), (255.f / 20.0) * intervalsCoefficient, "custom");
- fillData(getLayer(network, "Const4"), (0.0 + shift) * intervalsCoefficient, "custom");
- fillData(getLayer(network, "Const5"), (255.f / 20.0) * intervalsCoefficient, "custom");
-
- fillData(getLayer(network, "Const6"), 0.f, "custom");
- fillData(getLayer(network, "Const7"), 255.f / 20.0, "custom");
- fillData(getLayer(network, "Const8"), 0.f, "custom");
- fillData(getLayer(network, "Const9"), 255.f / 20.0, "custom");
- }
-}
-
-float ConcatTestModel::getThreshold(const std::string& device_name, const Precision precision, LayerTransformation::Params& params) const {
- if (device_name == "CPU") {
- if (params.updatePrecisions) {
- // FakeQuantize intervals are rounded in INT8 and as result threshold is increased
- return 0.0250001f;
- }
- }
-
- if (device_name == "GPU") {
- if (precision == Precision::FP32) {
- return 0.00200001f;
- } else {
- return 0.00062f;
- }
- }
-
- return SingleLayerTestModel::getThreshold(device_name, precision, params);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "low_precision_transformations/concat.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-std::string ConcatWithPoolingTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(PrecisionTrait<Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(PrecisionTrait<Precision::FP16>::value_type);
-
- std::map<std::string, std::string> constParams = {};
- std::map<std::string, std::string> fakeQuantizeParams = { {"levels", "256"} };
- std::map<std::string, std::string> concatParams = { {"axis", "1"} };
- std::map<std::string, std::string> powerParams = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
- std::map<std::string, std::string> poolingParams = {
- {"kernel", "1,1"},
- {"pool-method", "max"},
- {"exclude-pad", "false"}
- };
-
- CommonTestUtils::conv_common_params convolutionParams = { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 3, false, false };
- std::vector<size_t> weightsConstInputDims = { 3lu, 3lu, 1lu, 1lu };
- std::vector<size_t> biasesConvolutionConstDims = { convolutionParams.out_c };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "11,17"}, {"1,2", "6,7"}, // Inputs
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"7,13", "11,18"}, {"8,14", "11,19"}, {"9,15", "11,20"}, {"10,16", "11,21"}, // Const layers
- {"6,12", "17,33"}, {"11,22", "12,23"}, // Pooling12
- {"12,24", "15,27"}, // Pooling12 -> Convolution15
- {"13,25", "15,28"}, // Const13 -> Convolution15
- {"14,26", "15,29"}, // Const14 -> Convolution15
- {"15,30", "1,1"}, // Convolution15 -> Power
- {"12,24", "16,31"}, // Pooling12 -> Pooling16
- {"16,32", "17,34"} // Pooling16 -> FakeQuantize20
- };
-
- auto modelBuilder = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("ConcatWithPoolingTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- //.addInputLayer(p._network_precision, p.inputDimensions[1])
- .addLayer("Power", p._network_precision, &powerParams, { {p.inputDimensions[1]}, {p.inputDimensions[1]} })
- // 2
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 5
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 6
- .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} })
- // 7
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 8
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 9
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 10
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 11
- .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, { {p.inputDimensions[1], {1}, {1}, {1}, {1}}, {{p.inputDimensions[1]}} })
- // 12
- .addLayer("Pooling", p._network_precision, &poolingParams, { {p.inputDimensions[1]}, {p.inputDimensions[1]} })
- // 13
- .addLayer("Const", p._network_precision, &constParams, { {}, {weightsConstInputDims} },
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size)
- // 14
- .addLayer("Const", p._network_precision, &constParams, { {}, {biasesConvolutionConstDims} }, type_size * convolutionParams.out_c, 0)
- // 15
- .convolutionLayer(p._network_precision, { {p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }, {p.inputDimensions[0]} }, convolutionParams)
- // 16
- .addLayer("Pooling", p._network_precision, &poolingParams, { {p.inputDimensions[1]}, {p.inputDimensions[1]} })
- // 17
- .addLayer("Concat", p._network_precision, &concatParams, { {p.inputDimensions[0], p.inputDimensions[1]}, {{p.outputDimensions[0]}} }, 0, 0);
-
- auto modelString = modelBuilder.finish(&edges);
- return modelString;
-}
-
-std::string ConcatWithPoolingTestModel::getName() const {
- return std::string("ConcatWithPoolingTestModel") +
- (multiChannel ? "_multiChannel" : "_oneChannel") +
- (signedIntervals ? "_signedInterval" : "_notSignedInterval") +
- (shift ? "_withShift" : "") +
- "_" + std::to_string(dequantizationIntervalsDifference);
-}
-
-bool ConcatWithPoolingTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- // TODO: remove when updatePrecisions is configurable
- params.updatePrecisions = true;
-
- LowPrecisionTransformations transformations = getLowPrecisionTransformations(params);
- if (!multiChannel) {
- // avoid ConcatMultiChannelsTransformation
- transformations = transformations.
- removeBranchSpecificTransformations("Concat").
- addBranchSpecific<ConcatTransformation>(params, "Concat");
- }
-
- LowPrecisionTransformer transformer(transformations);
- transformer.transform(network);
-
- const std::string intermediateDequantizationLayerName = "Pooling12_ScaleShift_Convolution15";
- const CNNLayerPtr intermediateDequantizationLayer = CNNNetworkHelper::getLayer(network, intermediateDequantizationLayerName);
- if (intermediateDequantizationLayer == nullptr) {
- THROW_IE_EXCEPTION << "DequantizationLayer '" << intermediateDequantizationLayerName << "' was not found";
- }
-
- return true;
-}
-
-void ConcatWithPoolingTestModel::resetTransformation(CNNNetwork& network) const {
- const float low = signedIntervals ? -128 : 0.f;
- const float high = signedIntervals ? 127 : 255.f;
-
- const float coefficient1 = 10.f;
- const float coefficient2 = coefficient1 * dequantizationIntervalsDifference;
- const float shift1 = shift ? (low / coefficient1) / 3 : 0.f;
- const float shift2 = shift ? (low / coefficient1) / 3 : 0.f;
-
- fillData(getLayer(network, "Const2"), low / coefficient1 + shift1, "custom");
- fillData(getLayer(network, "Const3"), high / coefficient1, "custom");
- fillData(getLayer(network, "Const4"), low / coefficient1 + shift1, "custom");
- fillData(getLayer(network, "Const5"), high / coefficient1, "custom");
-
- fillData(getLayer(network, "Const7"), low / coefficient2 + shift2, "custom");
- fillData(getLayer(network, "Const8"), high / coefficient2, "custom");
- fillData(getLayer(network, "Const9"), low / coefficient2 + shift2, "custom");
- fillData(getLayer(network, "Const10"), high / coefficient2, "custom");
-
- fillData(getLayer(network, "Const13"), 3.f, "custom");
- fillData(getLayer(network, "Const14"), 2.f, "custom");
-}
-
-float ConcatWithPoolingTestModel::getThreshold(
- const std::string& deviceName,
- const Precision precision,
- LayerTransformation::Params& params) const {
- if (params.quantizeOutputs && signedIntervals && shift && (dequantizationIntervalsDifference != 0.f)) {
- return 0.0153;
- }
-
- return SingleLayerTestModel::getThreshold(deviceName, precision, params);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ConvolutionAndDequantizationScaleShiftAndQuantizeOnActivationsTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(float);
- if (p._network_precision == "FP16")
- type_size = sizeof(short);
-
- CommonTestUtils::conv_common_params conv =
- { {2, 2}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "", 1, 32, true, true };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"},
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"}, // Fake quantize to Convolution
- {"7,14", "8,15"} // Convolution to Power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "dequantizeScaleShift_", p.inputDimensions[0], p._network_precision)
- .addLayer("ScaleShift", p._network_precision, &const_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}}, p.inputDimensions[0][1] * type_size, p.inputDimensions[0][1] * type_size)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .convolutionLayer(p._network_precision, {{p.inputDimensions[0]}, {convOutShape}}, conv)
- .addLayer("Power", p._network_precision, &power_params, {{convOutShape}, {convOutShape}})
- .finish(&edges);
-}
-
-std::string ConvolutionAndDequantizationScaleShiftAndQuantizeOnActivationsTestModel::getName() const {
- return "ConvolutionAndDequantizationScaleShiftAndQuantizeOnActivationsTestModel";
-}
-
-bool ConvolutionAndDequantizationScaleShiftAndQuantizeOnActivationsTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-void ConvolutionAndDequantizationScaleShiftAndQuantizeOnActivationsTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "ScaleShift1"), 3, "weights");
- fillData(getLayer(network, "ScaleShift1"), 5, "biases");
- fillData(getLayer(network, "Const2"), -128.0, "custom");
- fillData(getLayer(network, "Const3"), 127.0, "custom");
- fillData(getLayer(network, "Const4"), -128.0, "custom");
- fillData(getLayer(network, "Const5"), 127.0, "custom");
- fillDataWithInitValue(getLayer(network, "Convolution7"), "weights", 1.234);
- fillDataWithInitValue(getLayer(network, "Convolution7"), "biases", 5.678);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ConvolutionAndDequantizationScaleShiftsOnActivationsTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- std::map<std::string, std::string> scale_shift_params = {};
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "", 1, 32, true, true };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "2,3"}, {"2,4", "3,5"}
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Conv_ScaleShift_transformations", p.inputDimensions[0], p._network_precision)
- .addLayer("ScaleShift", p._network_precision, &scale_shift_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}}, p.inputDimensions[0][1] * type_size, p.inputDimensions[0][1] * type_size)
- .convolutionLayer(p._network_precision, {{p.inputDimensions[0]}, {convOutShape}}, conv)
- .addLayer("Power", p._network_precision, &power_params, {{convOutShape}, {convOutShape}})
- .finish(&edges);
-}
-
-std::string ConvolutionAndDequantizationScaleShiftsOnActivationsTestModel::getName() const {
- return "ConvolutionAndDequantizationScaleShiftsOnActivationsTestModel";
-}
-
-bool ConvolutionAndDequantizationScaleShiftsOnActivationsTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-void ConvolutionAndDequantizationScaleShiftsOnActivationsTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "ScaleShift1"), 3.f, "weights");
- fillData(getLayer(network, "ScaleShift1"), 4.f, "biases");
-
- fillDataWithInitValue(getLayer(network, "Convolution2"), "weights", 1.234f);
- fillDataWithInitValue(getLayer(network, "Convolution2"), "biases", 5.678f);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ConvolutionAndPoolingAndQuantizeOnActivationsTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::pool_common_params pooling =
- { {2, 2}, {3, 3}, {0, 0}, {0, 0}, "valid", false, true };
- std::vector<size_t> poolOutShape(p.inputDimensions[0].size());
- getPoolOutShape(p.inputDimensions[0], pooling, poolOutShape);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 80, true, true };
- std::vector<size_t> convOutShape(poolOutShape.size());
- getConvOutShape(poolOutShape, conv, convOutShape);
-
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "5,5"}, // FQ
- {"1,1", "5,6"}, {"2,2", "5,7"}, {"3,3", "5,8"}, {"4,4", "5,9"}, // const
- {"5,10", "6,11"}, {"6,12", "7,13"} // Pool, Conv
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Conv_ScaleShift_transformations", p.inputDimensions[0], p._network_precision)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .poolingLayer(p._network_precision, {{p.inputDimensions[0]}, {poolOutShape}}, pooling)
- .convolutionLayer(p._network_precision, {{poolOutShape}, {convOutShape}}, conv)
- .finish(&edges);
-}
-
-std::string ConvolutionAndPoolingAndQuantizeOnActivationsTestModel::getName() const {
- return "ConvolutionAndPoolingAndQuantizeOnActivationsTestModel";
-}
-
-bool ConvolutionAndPoolingAndQuantizeOnActivationsTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-void ConvolutionAndPoolingAndQuantizeOnActivationsTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const1"), -128.f / 20.f, "custom");
- fillData(getLayer(network, "Const2"), 127.f / 20.f, "custom");
- fillData(getLayer(network, "Const3"), -128.f / 20.f, "custom");
- fillData(getLayer(network, "Const4"), 127.f / 20.f, "custom");
- fillDataWithInitValue(getLayer(network, "Convolution7"), "weights", 1.234f);
- fillDataWithInitValue(getLayer(network, "Convolution7"), "biases", 5.678f);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ConvolutionAndQuantizeOnActivationsAndWeightsBaseTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
-
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"7,13", "12,18"}, {"8,14", "12,19"}, {"9,15", "12,20"}, {"10,16", "12,21"}, {"11,17", "12,22"}, // Const layers
- {"6,12", "14,25"}, {"12,23", "14,26"}, // Fake quantize to Conv
- {"13,24", "14,27"} // biases to Conv
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {weightsConstInputDims}},
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {biasesConvolutionConstDims}}, type_size * conv.out_c, 0)
- .convolutionLayer(p._network_precision, {{p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }, {convOutShape}}, conv)
- .finish(&edges);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ConvolutionAndQuantizeOnActivationsTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {2, 2}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "", 1, 32, true, true };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "5,5"}, // FQ
- {"1,1", "5,6"}, {"2,2", "5,7"}, {"3,3", "5,8"}, {"4,4", "5,9"}, // const
- {"5,10", "6,11"}, {"6,12", "7,13"} // Pool, Conv, power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Conv_ScaleShift_transformations", p.inputDimensions[0], p._network_precision)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .convolutionLayer(p._network_precision, {{p.inputDimensions[0]}, {convOutShape}}, conv)
- .addLayer("Power", p._network_precision, &power_params, {{convOutShape}, {convOutShape}})
- .finish(&edges);
-}
-
-std::string ConvolutionAndQuantizeOnActivationsTestModel::getName() const {
- return "ConvolutionAndQuantizeOnActivationsTestModel";
-}
-
-bool ConvolutionAndQuantizeOnActivationsTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-void ConvolutionAndQuantizeOnActivationsTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const1"), -128.0 / 20.0, "custom");
- fillData(getLayer(network, "Const2"), 127.0 / 20.0, "custom");
- fillData(getLayer(network, "Const3"), -128.0 / 20.0, "custom");
- fillData(getLayer(network, "Const4"), 127.0 / 20.0, "custom");
- fillDataWithInitValue(getLayer(network, "Convolution6"), "weights", 1.234);
- fillDataWithInitValue(getLayer(network, "Convolution6"), "biases", 5.678);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void ConvolutionAndQuantizeOnSignedActivationsAndInvertedWeightsTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const3"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "Const4"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const5"), 127.f / 4.f, "custom");
-
- fillDataWithInitValue(getLayer(network, "Const7"), "custom", 1.234);
-
- fillData(getLayer(network, "Const8"), 1.28f, "custom");
- fillData(getLayer(network, "Const9"), -1.27f, "custom");
- fillData(getLayer(network, "Const10"), 1.28f, "custom");
- fillData(getLayer(network, "Const11"), -1.27f, "custom");
-
- fillDataWithInitValue(getLayer(network, "Const13"), "custom", 2.123f);
-}
-
-std::string ConvolutionAndQuantizeOnSignedActivationsAndInvertedWeightsTestModel::getName() const {
- return "ConvolutionAndQuantizeOnSignedActivationsAndInvertedWeightsTestModel";
-}
-
-bool ConvolutionAndQuantizeOnSignedActivationsAndInvertedWeightsTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
-
- if (std::any_of(
- params.precisionsOnActivations.begin(),
- params.precisionsOnActivations.end(),
- [](const Precision precision) { return precision == Precision::U8; }) &&
- params.quantizeOutputs) {
- CNNLayerPtr scaleShfit = CNNNetworkHelper::getLayer(network, "Convolution14");
- if (scaleShfit->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "unexpected last output dequantization layer type " << scaleShfit->name;
- }
-
- if (params.updateBiases) {
- const Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(scaleShfit, "biases");
- std::shared_ptr<float> shiftsBuffer = CNNNetworkHelper::getFloatData(shiftsBlob);
- for (size_t i = 0ul; i < shiftsBlob->size(); ++i) {
- if (shiftsBuffer.get()[i] != 0.0) {
- THROW_IE_EXCEPTION << "unexpected dequantization shift value";
- }
- }
- }
- }
-
- return true;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void ConvolutionAndQuantizeOnSignedActivationsAndWeightsNegativeTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const3"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "Const4"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const5"), 127.f / 4.f, "custom");
-
- fillDataWithInitValue(getLayer(network, "Const7"), "custom", 1.234);
-
- //fillData(getLayer(network, "Const8"), 0.f, "custom");
- //fillData(getLayer(network, "Const9"), 255.f / 40.f, "custom");
- //fillData(getLayer(network, "Const10"), 0.f, "custom");
- //fillData(getLayer(network, "Const11"), 255.f / 40.f, "custom");
-
- fillData(getLayer(network, "Const8"), -255.f / 40.f, "custom");
- fillData(getLayer(network, "Const9"), 0.f, "custom");
- fillData(getLayer(network, "Const10"), -255.f / 40.f, "custom");
- fillData(getLayer(network, "Const11"), 0.f, "custom");
-
-
- fillDataWithInitValue(getLayer(network, "Const13"), "custom", 2.123f);
-}
-
-std::string ConvolutionAndQuantizeOnSignedActivationsAndWeightsNegativeTestModel::getName() const {
- return "ConvolutionAndQuantizeOnSignedActivationsAndWeightsNegativeTestModel";
-}
-
-bool ConvolutionAndQuantizeOnSignedActivationsAndWeightsNegativeTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
-
- if (std::any_of(
- params.precisionsOnActivations.begin(),
- params.precisionsOnActivations.end(),
- [](const Precision precision) { return precision == Precision::U8; }) &&
- params.quantizeOutputs) {
- CNNLayerPtr scaleShfit = CNNNetworkHelper::getLayer(network, "Convolution14");
- if (scaleShfit->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "unexpected last output dequantization layer type " << scaleShfit->name;
- }
-
- if (params.updateBiases) {
- const Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(scaleShfit, "biases");
- std::shared_ptr<float> shiftsBuffer = CNNNetworkHelper::getFloatData(shiftsBlob);
- for (size_t i = 0ul; i < shiftsBlob->size(); ++i) {
- if (shiftsBuffer.get()[i] != 0.0) {
- THROW_IE_EXCEPTION << "unexpected dequantization shift value";
- }
- }
- }
- }
-
- return true;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void ConvolutionAndQuantizeOnSignedActivationsAndWeightsPositiveTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const3"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "Const4"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const5"), 127.f / 4.f, "custom");
-
- fillDataWithInitValue(getLayer(network, "Const7"), "custom", 1.234);
-
- fillData(getLayer(network, "Const8"), 0.f, "custom");
- fillData(getLayer(network, "Const9"), 255.f / 40.f, "custom");
- fillData(getLayer(network, "Const10"), 0.f, "custom");
- fillData(getLayer(network, "Const11"), 255.f / 40.f, "custom");
-
- fillDataWithInitValue(getLayer(network, "Const13"), "custom", 2.123f);
-}
-
-std::string ConvolutionAndQuantizeOnSignedActivationsAndWeightsPositiveTestModel::getName() const {
- return "ConvolutionAndQuantizeOnSignedActivationsAndWeightsPositiveTestModel";
-}
-
-bool ConvolutionAndQuantizeOnSignedActivationsAndWeightsPositiveTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
-
- if (std::any_of(
- params.precisionsOnActivations.begin(),
- params.precisionsOnActivations.end(),
- [](const Precision precision) { return precision == Precision::U8;}) &&
- params.quantizeOutputs) {
- CNNLayerPtr scaleShfit = CNNNetworkHelper::getLayer(network, "Convolution14");
- if (scaleShfit->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "unexpected last output dequantization layer type " << scaleShfit->name;
- }
-
- if (params.updateBiases) {
- const Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(scaleShfit, "biases");
- std::shared_ptr<float> shiftsBuffer = CNNNetworkHelper::getFloatData(shiftsBlob);
- for (size_t i = 0ul; i < shiftsBlob->size(); ++i) {
- if (shiftsBuffer.get()[i] != 0.0) {
- THROW_IE_EXCEPTION << "unexpected dequantization shift value";
- }
- }
- }
- }
-
- return true;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void ConvolutionAndQuantizeOnUnsignedActivationsAndWeightsTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 63.5f, "custom");
- fillData(getLayer(network, "Const3"), 127.f, "custom");
- fillData(getLayer(network, "Const4"), 63.5f, "custom");
- fillData(getLayer(network, "Const5"), 127.f, "custom");
-
- fillDataWithInitValue(getLayer(network, "Const7"), "custom", 1.234f);
-
- fillData(getLayer(network, "Const8"), -1.275f / 2.f, "custom");
- fillData(getLayer(network, "Const9"), 1.275f, "custom");
- fillData(getLayer(network, "Const10"), -1.275f / 2.f, "custom");
- fillData(getLayer(network, "Const11"), 1.275f, "custom");
-
- fillDataWithInitValue(getLayer(network, "Const13"), "custom", 2.123f);
-}
-
-std::string ConvolutionAndQuantizeOnUnsignedActivationsAndWeightsTestModel::getName() const {
- return "ConvolutionAndQuantizeOnUnsignedActivationsAndWeightsTestModel";
-}
-
-bool ConvolutionAndQuantizeOnUnsignedActivationsAndWeightsTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
-
- if (params.quantizeOutputs) {
- const std::vector<CNNLayerPtr> layers = CNNNetSortTopologically(network);
-
- const CNNLayerPtr convolution = layers[layers.size() - 2];
- if ((convolution->type != "Convolution") || (convolution->name != "Convolution14_original")) {
- THROW_IE_EXCEPTION << "unexpected layer type '" << convolution->type << "' or name '" << convolution->name << "'";
- }
-
- const CNNLayerPtr dequantizationScaleShift = layers[layers.size() - 1];
- if ((dequantizationScaleShift->type != "ScaleShift") || (dequantizationScaleShift->name != "Convolution14")) {
- THROW_IE_EXCEPTION << "unexpected layer type '" << dequantizationScaleShift->type << "' or name '" << dequantizationScaleShift->name << "'";
- }
- }
-
- return true;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 64, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 64lu, 32lu, 3lu, 3lu };
- std::vector<size_t> weightsConstOutputDims = { 64lu, 1lu, 1lu, 1lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"7,13", "12,18"}, {"8,14", "12,19"}, {"9,15", "12,20"}, {"10,16", "12,21"}, {"11,17", "12,22"}, // Const layers
- {"6,12", "14,25"}, {"12,23", "14,26"}, // Fake quantize to Conv
- {"13,24", "14,27"} // biases to Conv
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {weightsConstInputDims}},
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {weightsConstOutputDims}},
- std::accumulate(weightsConstOutputDims.begin(), weightsConstOutputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {weightsConstOutputDims}},
- std::accumulate(weightsConstOutputDims.begin(), weightsConstOutputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{weightsConstInputDims, {1}, {1}, weightsConstOutputDims, weightsConstOutputDims}, {{weightsConstInputDims}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {biasesConvolutionConstDims}}, type_size * conv.out_c, 0)
- .convolutionLayer(p._network_precision, {{p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }, {convOutShape}}, conv)
- .finish(&edges);
-}
-
-std::string ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel::getName() const {
- return "ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel";
-}
-
-bool ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-void ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel::resetTransformation(CNNNetwork& network) const {
- // int values for range test
- fillData(getLayer(network, "Const2"), 0.0, "custom");
- fillData(getLayer(network, "Const3"), 255.0, "custom");
- fillData(getLayer(network, "Const4"), 0.0, "custom");
- fillData(getLayer(network, "Const5"), 255.0, "custom");
-
- fillData(getLayer(network, "Const7"), 4.0, "custom");
-
- fillData(getLayer(network, "Const8"), -128.0, "custom");
- fillData(getLayer(network, "Const9"), 127.0, "custom");
- fillData(getLayer(network, "Const10"), -128.0, "custom");
- fillData(getLayer(network, "Const11"), 127.0, "custom");
-
- fillData(getLayer(network, "Const13"), 5.0, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ConvolutionAndQuantizeOnWeightsWithoutConstTransformationTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"7,13", "12,18"}, {"8,14", "12,19"}, {"9,15", "12,20"}, {"10,16", "12,21"}, {"11,17", "12,22"}, // Const layers
- {"6,12", "14,25"}, {"12,23", "14,26"}, // Fake quantize to Conv
- {"13,24", "14,27"} // biases to Conv
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {weightsConstInputDims}},
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {biasesConvolutionConstDims}}, type_size * conv.out_c, 0)
- .convolutionLayer(p._network_precision, {{p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }, {convOutShape}}, conv)
- .finish(&edges);
-}
-
-std::string ConvolutionAndQuantizeOnWeightsWithoutConstTransformationTestModel::getName() const {
- return "ConvolutionAndQuantizeOnWeightsWithoutConstTransformationTestModel";
-}
-
-bool ConvolutionAndQuantizeOnWeightsWithoutConstTransformationTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- auto transformationsWithoutConst = getLowPrecisionTransformations(params);
- transformationsWithoutConst.remove("Const");
-
- LowPrecisionTransformer transformer(transformationsWithoutConst);
- transformer.transform(network);
-
- return true;
-}
-
-void ConvolutionAndQuantizeOnWeightsWithoutConstTransformationTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 63.5f, "custom");
- fillData(getLayer(network, "Const3"), 127.f, "custom");
- fillData(getLayer(network, "Const4"), 63.5f, "custom");
- fillData(getLayer(network, "Const5"), 127.f, "custom");
-
- fillDataWithInitValue(getLayer(network, "Const7"), "custom", 1.234f);
-
- fillData(getLayer(network, "Const8"), -1.275f / 2.f, "custom");
- fillData(getLayer(network, "Const9"), 1.275f, "custom");
- fillData(getLayer(network, "Const10"), -1.275f / 2.f, "custom");
- fillData(getLayer(network, "Const11"), 1.275f, "custom");
-
- fillDataWithInitValue(getLayer(network, "Const13"), "custom", 2.123f);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-//const size_t channelsCount = 32ul;
-//const size_t group = channelsCount;
-//std::vector<size_t> weightsConstInputDims = { channelsCount, 1lu, 3lu, 3lu };
-
-ConvolutionBaseTestModel::ConvolutionBaseTestModel(const bool addBiasesLayer) : addBiasesLayer(addBiasesLayer) {}
-
-std::string ConvolutionBaseTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- const size_t group = getGroupsCount(p);
- const size_t inputChannelsCount = p.inputDimensions[0][1];
- const size_t outputChannelsCount = p.outputDimensions[0][1];
- CommonTestUtils::conv_common_params conv = { {1, 1}, {3, 3}, {1, 1}, {1, 1}, {1, 1}, "valid", group, outputChannelsCount, false, false };
- std::vector<size_t> weightsConstInputDims = { outputChannelsCount, inputChannelsCount / group, 3lu, 3lu };
-
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
- std::map<std::string, std::string> fake_quantize_params2 = { {"levels", "255"} };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
-
- const std::vector<std::vector<size_t>> convolutionDims = addBiasesLayer ?
- std::vector<std::vector<size_t>>({p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }) :
- std::vector<std::vector<size_t>>({p.inputDimensions[0], weightsConstInputDims });
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"7,13", "12,18"}, {"8,14", "12,19"}, {"9,15", "12,20"}, {"10,16", "12,21"}, {"11,17", "12,22"}, // Const layers
- {"6,12", "13,24"}, {"12,23", "13,25"} // Fake quantize to Conv
- };
-
- if (addBiasesLayer) {
- edges.push_back({ "14,28", "13,26" }); // biases to Conv
- }
-
- std::vector<size_t> quantizationParamsDims(p.inputDimensions[0].size(), 1);
- quantizationParamsDims[1] = inputChannelsCount;
-
- CommonTestUtils::DefaultNetBuilder builder = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputLowConst")
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputHighConst")
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputLowConst")
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputHighConst")
- .addLayer("FakeQuantize",
- p._network_precision,
- &fake_quantize_params,
- { {p.inputDimensions[0], quantizationParamsDims, quantizationParamsDims, quantizationParamsDims, quantizationParamsDims},
- {{p.inputDimensions[0]}} },
- "fakeQuantizeOnActivations")
- .addLayer("Const", p._network_precision, &const_params, { {}, {weightsConstInputDims} },
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, "weigthsConst")
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputLowConst")
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputHighConst")
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputLowConst")
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputHighConst")
- .addLayer(
- "FakeQuantize",
- p._network_precision,
- &fake_quantize_params,
- { {weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}} },
- "fakeQuantizeOnWeights")
- .convolutionLayer(p._network_precision, { convolutionDims, {convOutShape} }, conv, "Convolution");
-
- if (addBiasesLayer) {
- builder.addLayer("Const", p._network_precision, &const_params, { {}, {biasesConvolutionConstDims} }, type_size * conv.out_c, "biasesConst");
- }
-
- return builder.finish(&edges);
-}
-
-bool ConvolutionBaseTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-void ConvolutionBaseTestModel::resetTransformation(CNNNetwork& network) const {
- CNNLayerPtr convolution = CNNNetworkHelper::getLayer(network, "Convolution");
-
- const size_t channelsCount = convolution->GetParamAsUInt("output");
- const size_t groupsCount = convolution->GetParamAsUInt("group");
- const size_t filtersCountPerOutputChannel = channelsCount / groupsCount;
- const size_t kernelH = convolution->GetParamAsUInts("kernel")[0];
- const size_t kernelW = convolution->GetParamAsUInts("kernel")[1];
-
- // Const on activations
- std::vector<float> lowValues(channelsCount); // to have shifts
- std::vector<float> highValues(channelsCount);
- if (areScalesOnActivationsDifferent()) {
- for (size_t inputChannel = 0; inputChannel < highValues.size(); ++inputChannel) {
- highValues[inputChannel] = 255.f / (1.f + inputChannel);
- }
- } else {
- highValues = std::vector<float>(channelsCount, 255.f);
- }
-
- fillData(getLayer(network, "dataInputLowConst"), lowValues, "custom");
- fillData(getLayer(network, "dataInputHighConst"), highValues, "custom");
- fillData(getLayer(network, "dataOutputLowConst"), lowValues, "custom");
- fillData(getLayer(network, "dataOutputHighConst"), highValues, "custom");
-
- // Const on weights
- std::vector<float> weights(channelsCount * filtersCountPerOutputChannel * kernelH * kernelW);
- for (size_t outputChannel = 0ul; outputChannel < channelsCount; ++outputChannel) {
- for (size_t filter = 0ul; filter < filtersCountPerOutputChannel; ++filter) {
- for (size_t kernel = 0ul; kernel < kernelH * kernelW; ++kernel) {
- weights[outputChannel * filtersCountPerOutputChannel * kernelH * kernelW + filter * kernelH * kernelW + kernel] =
- static_cast<float>(outputChannel * filtersCountPerOutputChannel + filter) + 1.f;
- }
- }
- }
- fillData(getLayer(network, "weigthsConst"), weights, "custom");
-
- fillData(getLayer(network, "weigthsInputLowConst"), -128.f / 4.0, "custom");
- fillData(getLayer(network, "weigthsInputHighConst"), 127.f / 4.0, "custom");
- fillData(getLayer(network, "weigthsOutputLowConst"), -128.f / 4.0, "custom");
- fillData(getLayer(network, "weigthsOutputHighConst"), 127.f / 4.0, "custom");
-
- if (addBiasesLayer) {
- fillData(getLayer(network, "biasesConst"), 2.f, "custom");
- }
-}
-
-size_t ConvolutionBaseTestModel::getGroupsCount(SingleLayerTransformationsTestParams& p) const {
- return 1ul;
-}
-
-bool ConvolutionBaseTestModel::areScalesOnActivationsDifferent() const {
- return false;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ConvolutionDepthwiseTestModel::getName() const {
- return "ConvolutionDepthwiseTestModel";
-}
-
-size_t ConvolutionDepthwiseTestModel::getGroupsCount(SingleLayerTransformationsTestParams& p) const {
- return p.inputDimensions[0][1];
-}
-
-bool ConvolutionDepthwiseTestModel::areScalesOnActivationsDifferent() const {
- return true;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ConvolutionGroupedTestModel::getName() const {
- return "ConvolutionGroupedTestModel";
-}
-
-void ConvolutionGroupedTestModel::initInput(Blob::Ptr input) const {
- fillDataWithInitValue(input, -1.f);
-}
-
-size_t ConvolutionGroupedTestModel::getGroupsCount(SingleLayerTransformationsTestParams& p) const {
- const size_t channelsPerGroup = 8ul;
- const size_t inputChannelsCount = p.inputDimensions[0][1];
- if ((inputChannelsCount % channelsPerGroup) != 0ul) {
- THROW_IE_EXCEPTION << "not possible to divide " << inputChannelsCount << " channels to groups";
- }
-
- return inputChannelsCount / channelsPerGroup;
-}
-
-bool ConvolutionGroupedTestModel::areScalesOnActivationsDifferent() const {
- return false;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-std::string EltwiseBroadcastTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(PrecisionTrait<Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(PrecisionTrait<Precision::FP16>::value_type);
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> eltwise_params = {
- {"operation", "sum"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "6,6"}, {"1,1", "11,16"}, // Inputs
- {"2,2", "6,7"}, {"3,3", "6,8"}, {"4,4", "6,9"}, {"5,5", "6,10"}, // Const layers
- {"7,12", "11,17"}, {"8,13", "11,18"}, {"9,14", "11,19"}, {"10,15", "11,20"}, // Const layers
- {"6,11", "12,22"}, {"11,21", "12,23"} // Fake quantize to Convolution
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Eltwise", p.inputDimensions[0], p._network_precision)
- .addLayer("Const", p._network_precision, &const_params, {{}, {p.inputDimensions[1]}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[1], {1}, {1}, {1}, {1}}, {{p.inputDimensions[1]}}})
- .addLayer("Eltwise", p._network_precision, &eltwise_params, {{p.inputDimensions[0], p.inputDimensions[1]}, {{p.outputDimensions[0]}}}, 0, 0)
- .finish(&edges);
-}
-
-std::string EltwiseBroadcastTestModel::getName() const {
- return "EltwiseBroadcastTestModel";
-}
-
-bool EltwiseBroadcastTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(
- LayerTransformation::Params(params)));
- transformer.transform(network);
- return true;
-}
-
-void EltwiseBroadcastTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 255.f / 10.0, "custom");
- fillData(getLayer(network, "Const3"), 255.f / 4.0, "custom");
- fillData(getLayer(network, "Const4"), 255.f / 10.0, "custom");
- fillData(getLayer(network, "Const5"), 255.f / 4.0, "custom");
-
- fillData(getLayer(network, "Const7"), 255.f / 10.0, "custom");
- fillData(getLayer(network, "Const8"), 255.f / 2.0, "custom");
- fillData(getLayer(network, "Const9"), 255.f / 10.0, "custom");
- fillData(getLayer(network, "Const10"), 255.f / 2.0, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-std::string EltwiseFqWithChildrenTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(PrecisionTrait<Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(PrecisionTrait<Precision::FP16>::value_type);
-
- std::map<std::string, std::string> constParams = {};
- std::map<std::string, std::string> fakeQuantizeParams = { {"levels", "256"} };
- std::map<std::string, std::string> eltwiseParams = { {"operation", operation} };
- std::map<std::string, std::string> poolingParams = { {"kernel", "1,1"}, {"pool-method", "max"}, {"exclude-pad", "false"} };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "5,5"}, {"5,10", "12,24"}, // Inputs
- {"1,1", "5,6"}, {"2,2", "5,7"}, {"3,3", "5,8"}, {"4,4", "5,9"}, // Const layers
- {"6,11", "10,16"}, {"7,12", "10,17"}, {"8,13", "10,18"}, {"9,14", "10,19"}, // Const layers
- {"5,10", "11,21"}, {"10,20", "11,22"}, // Fake quantize to Eltwise
- {"12,25", "10,15"},
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("EltwiseTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Const", p._network_precision, &constParams, {{}, {{1}}}, type_size, 0)
- // 2
- .addLayer("Const", p._network_precision, &constParams, {{}, {{1}}}, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &constParams, {{}, {{1}}}, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &constParams, {{}, {{1}}}, type_size, 0)
- // 5
- .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}}, "fakeQuantize1")
- // 6
- .addLayer("Const", p._network_precision, &constParams, {{}, {{1}}}, type_size, 0)
- // 7
- .addLayer("Const", p._network_precision, &constParams, {{}, {{1}}}, type_size, 0)
- // 8
- .addLayer("Const", p._network_precision, &constParams, {{}, {{1}}}, type_size, 0)
- // 9
- .addLayer("Const", p._network_precision, &constParams, {{}, {{1}}}, type_size, 0)
- // 10
- .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}}, "fakeQuantize2")
- // 11
- .addLayer("Eltwise", p._network_precision, &eltwiseParams, {{p.inputDimensions[0], p.inputDimensions[0]}, {{p.inputDimensions[0]}}}, 0, "eltwise")
-
- // 12
- .addLayer("Pooling", p._network_precision, &poolingParams, {p.inputDimensions, {p.inputDimensions}}, 0, "pooling")
- .finish(&edges);
-}
-
-std::string EltwiseFqWithChildrenTestModel::getName() const {
- return std::string("EltwiseFqWithChildrenTestModel") +
- (cpuSpecific ? "_cpuSpecific" : "") +
- "_" + operation +
- (signedIntervals ? "_signedInterval" : "_notsignedInterval") +
- (minLevels != 2ul ? ("_minLevels" + std::to_string(minLevels)) : "");
-}
-
-bool EltwiseFqWithChildrenTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- params.updatePrecisions = true;
- LowPrecisionTransformations transformations = getLowPrecisionTransformations(params);
- if (!cpuSpecific) {
- THROW_IE_EXCEPTION << "not CPU/GPU specific Eltwise is not supported";
- }
-
- LayerTransformationPtr eltwiseTransformation = transformations.find("Eltwise");
- eltwiseTransformation->setMinQuantizationLevels(minLevels);
-
- LowPrecisionTransformer transformer(transformations);
- transformer.transform(network);
-
- if (params.quantizeOutputs) {
- if ((params.quantizedTensorAlignmentOnActivations == LayerTransformation::QuantizedTensorAlignment::UpdateLevel) && (minLevels != 2ul)) {
- const CNNLayerPtr eltwise = getLayer(network, "eltwise");
- if (eltwise->type != "Eltwise") {
- THROW_IE_EXCEPTION << "layer " << eltwise->type << " " << eltwise->name << " was quantized";
- }
- }
-
- if (params.updatePrecisions) {
- {
- const CNNLayerPtr fakeQuantize1 = getLayer(network, "fakeQuantize1");
- const Precision defaultPrecision = signedIntervals ? Precision::I8 : Precision::U8;
- const Precision expectedPrecision = params.precisionsOnActivations.size() == 1 ? params.precisionsOnActivations[0] : defaultPrecision;
- if (fakeQuantize1->outData[0]->getPrecision() != expectedPrecision) {
- THROW_IE_EXCEPTION << "unexpected precision " << fakeQuantize1->outData[0]->getPrecision() << " for " << fakeQuantize1->type << " " << fakeQuantize1->name;
- }
- }
-
- {
- const CNNLayerPtr fakeQuantize2 = getLayer(network, "fakeQuantize2");
- const CNNLayerPtr input = getLayer(network, "Input0");
- const Precision originalPrecision = input->outData[0]->getTensorDesc().getPrecision();
- if (fakeQuantize2->outData[0]->getPrecision() != originalPrecision) {
- THROW_IE_EXCEPTION << "unexpected precision " << fakeQuantize2->outData[0]->getPrecision() << " for " << fakeQuantize2->type << " " << fakeQuantize2->name;
- }
- }
- }
- }
- return true;
-}
-
-void EltwiseFqWithChildrenTestModel::resetTransformation(CNNNetwork& network) const {
- const float low = signedIntervals ? -128 : 0.f;
- const float high = signedIntervals ? 127 : 255.f;
-
- fillData(getLayer(network, "Const1"), low / 4.f, "custom");
- fillData(getLayer(network, "Const2"), high / 4.f, "custom");
- fillData(getLayer(network, "Const3"), low / 4.f, "custom");
- fillData(getLayer(network, "Const4"), high / 4.f, "custom");
-
- fillData(getLayer(network, "Const6"), low / 2.f, "custom");
- fillData(getLayer(network, "Const7"), high / 2.f, "custom");
- fillData(getLayer(network, "Const8"), low / 2.f, "custom");
- fillData(getLayer(network, "Const9"), high / 2.f, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-std::string EltwiseTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(PrecisionTrait<Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(PrecisionTrait<Precision::FP16>::value_type);
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
- std::map<std::string, std::string> eltwise_params = { {"operation", operation} };
- std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "6,6"}, {"1,1", "11,16"}, // Inputs
- {"2,2", "6,7"}, {"3,3", "6,8"}, {"4,4", "6,9"}, {"5,5", "6,10"}, // Const layers
- {"7,12", "11,17"}, {"8,13", "11,18"}, {"9,14", "11,19"}, {"10,15", "11,20"}, // Const layers
- {"6,11", "12,22"}, {"11,21", "12,23"} // Fake quantize to Convolution
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("EltwiseTestModel", p.inputDimensions[0], p._network_precision)
- .addInputLayer(p._network_precision, p.inputDimensions[1])
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[1], {1}, {1}, {1}, {1}}, {{p.inputDimensions[1]}}})
- .addLayer("Eltwise", p._network_precision, &eltwise_params, {{p.inputDimensions[0], p.inputDimensions[1]}, {{p.inputDimensions[0]}}}, 0, 0)
- .finish(&edges);
-}
-
-std::string EltwiseTestModel::getName() const {
- return std::string("EltwiseTestModel") +
- (cpuSpecific ? "_cpuSpecific" : "") +
- "_" + operation +
- (signedIntervals ? "_signedInterval" : "_notsignedInterval") +
- (minLevels != 2ul ? ("_minLevels" + std::to_string(minLevels)) : "");
-}
-
-bool EltwiseTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformations transformations = getLowPrecisionTransformations(params);
- if (!cpuSpecific) {
- THROW_IE_EXCEPTION << "not CPU/GPU specific Eltwise is not supported";
- }
-
- LayerTransformationPtr eltwiseTransformation = transformations.find("Eltwise");
- eltwiseTransformation->setMinQuantizationLevels(minLevels);
-
- LowPrecisionTransformer transformer(transformations);
- transformer.transform(network);
-
- if (params.quantizeOutputs) {
- if ((params.quantizedTensorAlignmentOnActivations == LayerTransformation::QuantizedTensorAlignment::UpdateLevel) && (minLevels != 2ul)) {
- const CNNLayerPtr eltwise = getLayer(network, "Eltwise12");
- if (eltwise->type != "Eltwise") {
- THROW_IE_EXCEPTION << "layer " << eltwise->type << " " << eltwise->name << " was quantized";
- }
- }
-
- if (params.updatePrecisions) {
- const CNNLayerPtr fakeQuantize1 = getLayer(network, "FakeQuantize6");
- const CNNLayerPtr fakeQuantize2 = getLayer(network, "FakeQuantize11");
-
- const Precision expectedPrecision = signedIntervals ? Precision::I8 : Precision::U8;
- if (fakeQuantize1->outData[0]->getPrecision() != expectedPrecision) {
- THROW_IE_EXCEPTION << "unexpected precision " << fakeQuantize1->outData[0]->getPrecision() << " for " << fakeQuantize1->type << " " << fakeQuantize1->name;
- }
- if (fakeQuantize2->outData[0]->getPrecision() != expectedPrecision) {
- THROW_IE_EXCEPTION << "unexpected precision " << fakeQuantize2->outData[0]->getPrecision() << " for " << fakeQuantize2->type << " " << fakeQuantize2->name;
- }
- }
- }
- return true;
-}
-
-void EltwiseTestModel::resetTransformation(CNNNetwork& network) const {
- const float low = signedIntervals ? -128 : 0.f;
- const float high = signedIntervals ? 127 : 255.f;
-
- fillData(getLayer(network, "Const2"), low / 4.f, "custom");
- fillData(getLayer(network, "Const3"), high / 4.f, "custom");
- fillData(getLayer(network, "Const4"), low / 4.f, "custom");
- fillData(getLayer(network, "Const5"), high / 4.f, "custom");
-
- fillData(getLayer(network, "Const7"), low / 2.f, "custom");
- fillData(getLayer(network, "Const8"), high / 2.f, "custom");
- fillData(getLayer(network, "Const9"), low / 2.f, "custom");
- fillData(getLayer(network, "Const10"), high / 2.f, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-std::string EltwiseWithPoolingTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(PrecisionTrait<Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(PrecisionTrait<Precision::FP16>::value_type);
-
- std::map<std::string, std::string> constParams = {};
- std::map<std::string, std::string> fakeQuantizeParams = { {"levels", "256"} };
- std::map<std::string, std::string> eltwiseParams = { {"operation", operation} };
- std::map<std::string, std::string> powerParams = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
- std::map<std::string, std::string> poolingParams = {
- {"kernel", "1,1"},
- {"pool-method", "max"},
- {"exclude-pad", "false"}
- };
-
- CommonTestUtils::conv_common_params convolutionParams = { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 3, false, false };
- std::vector<size_t> weightsConstInputDims = { 3lu, 3lu, 1lu, 1lu };
- std::vector<size_t> biasesConvolutionConstDims = { convolutionParams.out_c };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "11,17"}, {"1,2", "6,7"}, // Inputs
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"7,13", "11,18"}, {"8,14", "11,19"}, {"9,15", "11,20"}, {"10,16", "11,21"}, // Const layers
- {"6,12", "17,33"}, {"11,22", "12,23"}, // Pooling12
- {"12,24", "15,27"}, // Pooling12 -> Convolution15
- {"13,25", "15,28"}, // Const13 -> Convolution15
- {"14,26", "15,29"}, // Const14 -> Convolution15
- {"15,30", "1,1"}, // Convolution15 -> Power
- {"12,24", "16,31"}, // Pooling12 -> Pooling16
- {"16,32", "17,34"} // Pooling16 -> FakeQuantize20
- };
-
- auto modelBuilder = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("EltwiseWithPoolingTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- //.addInputLayer(p._network_precision, p.inputDimensions[1])
- .addLayer("Power", p._network_precision, &powerParams, { {p.inputDimensions[1]}, {p.inputDimensions[1]} })
- // 2
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 5
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 6
- .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} })
- // 7
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 8
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 9
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 10
- .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
- // 11
- .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, { {p.inputDimensions[1], {1}, {1}, {1}, {1}}, {{p.inputDimensions[1]}} })
- // 12
- .addLayer("Pooling", p._network_precision, &poolingParams, { {p.inputDimensions[1]}, {p.inputDimensions[1]} })
- // 13
- .addLayer("Const", p._network_precision, &constParams, { {}, {weightsConstInputDims} },
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size)
- // 14
- .addLayer("Const", p._network_precision, &constParams, { {}, {biasesConvolutionConstDims} }, type_size * convolutionParams.out_c, 0)
- // 15
- .convolutionLayer(p._network_precision, { {p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }, {p.inputDimensions[0]} }, convolutionParams)
- // 16
- .addLayer("Pooling", p._network_precision, &poolingParams, { {p.inputDimensions[1]}, {p.inputDimensions[1]} })
- // 17
- .addLayer("Eltwise", p._network_precision, &eltwiseParams, { {p.inputDimensions[0], p.inputDimensions[1]}, {{p.inputDimensions[0]}} }, 0, 0);
-
- auto modelString = modelBuilder.finish(&edges);
- return modelString;
-}
-
-std::string EltwiseWithPoolingTestModel::getName() const {
- return std::string("EltwiseWithPoolingTestModel") +
- (cpuSpecific ? "_cpuSpecific" : "") +
- "_" + operation +
- (signedIntervals ? "_signedInterval" : "_notSignedInterval") +
- (minLevels != 2ul ? ("_minLevels" + std::to_string(minLevels)) : "");
-}
-
-bool EltwiseWithPoolingTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- if (std::any_of(
- params.precisionsOnActivations.begin(),
- params.precisionsOnActivations.end(),
- [](const Precision precision) { return precision == Precision::U8; })) {
- params.updatePrecisions = true;
- }
-
- LowPrecisionTransformations transformations = getLowPrecisionTransformations(params);
- if (cpuSpecific) {
- transformations = transformations.
- remove("Eltwise").
- add<EltwiseTransformation>(LayerTransformation::Params(params), "Eltwise");
- } else {
- THROW_IE_EXCEPTION << "not CPU/GPU specific Eltwise is not supported";
- }
-
- LayerTransformationPtr eltwiseTransformation = transformations.find("Eltwise");
- eltwiseTransformation->setMinQuantizationLevels(minLevels);
-
- LowPrecisionTransformer transformer(transformations);
- transformer.transform(network);
-
- if (params.quantizeOutputs) {
- if (params.updatePrecisions) {
- // INT8 way
- const CNNLayerPtr fakeQuantize11 = getLayer(network, "FakeQuantize11");
- if ((fakeQuantize11->outData[0]->getPrecision() != Precision::U8) && (fakeQuantize11->outData[0]->getPrecision() != Precision::I8)) {
- THROW_IE_EXCEPTION <<
- "layer " << fakeQuantize11->type << " " << fakeQuantize11->name <<
- " was not quantized " << fakeQuantize11->outData[0]->getPrecision();
- }
-
- const CNNLayerPtr pooling12 = getLayer(network, "Pooling16");
- if ((pooling12->outData[0]->getPrecision() != Precision::U8) && (pooling12->outData[0]->getPrecision() != Precision::I8)) {
- THROW_IE_EXCEPTION <<
- "layer " << pooling12->type << " " << pooling12->name <<
- " was not quantized " << pooling12->outData[0]->getPrecision();
- }
-
- const CNNLayerPtr pooling16 = getLayer(network, "Pooling16");
- if ((pooling16->outData[0]->getPrecision() != Precision::U8) && (pooling16->outData[0]->getPrecision() != Precision::I8)) {
- THROW_IE_EXCEPTION <<
- "layer " << pooling16->type << " " << pooling16->name <<
- " was not quantized " << pooling16->outData[0]->getPrecision();
- }
-
- if (operation == "sum") {
- const CNNLayerPtr eltwise = getLayer(network, "Eltwise17_original");
- if (eltwise->type != "Eltwise") {
- THROW_IE_EXCEPTION << "layer type " << eltwise->type << " " << eltwise->name << " is not correct";
- }
-
- if ((eltwise->outData[0]->getPrecision() != Precision::FP32) && (eltwise->outData[0]->getPrecision() != Precision::FP16)) {
- THROW_IE_EXCEPTION << "layer " << eltwise->type << " " << eltwise->name << " output port precision is not correct";
- }
-
- const CNNLayerPtr dequantizationScaleShift = getLayer(network, "Eltwise17");
- if (dequantizationScaleShift == nullptr) {
- THROW_IE_EXCEPTION << "dequantization layer was not found";
- }
-
- Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(dequantizationScaleShift, "biases");
- const auto shiftsBuffer = CNNNetworkHelper::getFloatData(shiftsBlob);
- const size_t shiftsBlobSize = shiftsBlob->size();
- for (size_t i = 0; i < shiftsBlobSize; ++i) {
- if (shiftsBuffer.get()[i] != 0.f) {
- THROW_IE_EXCEPTION << "unexpected shift value " << shiftsBuffer.get()[i] << " for dequantization layer";
- }
- }
- } else if ((operation == "mul") || (operation == "prod")) {
- const CNNLayerPtr eltwise = getLayer(network, "Eltwise17");
- if (eltwise->type != "Eltwise") {
- THROW_IE_EXCEPTION << "layer type " << eltwise->type << " " << eltwise->name << " is not correct";
- }
-
- const CNNLayerPtr dequantizationScaleShift = getLayer(network, "Eltwise17_original");
- if (dequantizationScaleShift != nullptr) {
- THROW_IE_EXCEPTION
- << "dequantization layer " << dequantizationScaleShift->type << " " << dequantizationScaleShift->name
- << " has to be absent (moved to full path branch)";
- }
- }
- }
- } else {
- const CNNLayerPtr eltwise = getLayer(network, "Eltwise17");
- if (eltwise->type != "Eltwise") {
- THROW_IE_EXCEPTION << "layer type " << eltwise->type << " " << eltwise->name << " is not correct";
- }
-
- if ((eltwise->outData[0]->getPrecision() != Precision::FP32) && (eltwise->outData[0]->getPrecision() != Precision::FP16)) {
- THROW_IE_EXCEPTION << "layer " << eltwise->type << " " << eltwise->name << " output port precision is not correct";
- }
- }
-
- // FP32 way
- const CNNLayerPtr fakeQuantize6 = getLayer(network, "FakeQuantize6");
- if ((fakeQuantize6->outData[0]->getPrecision() != Precision::FP32) && (fakeQuantize6->outData[0]->getPrecision() != Precision::FP16)) {
- THROW_IE_EXCEPTION << "layer " << fakeQuantize6->type << " " << fakeQuantize6->name << " was quantized";
- }
-
-
- return true;
-}
-
-void EltwiseWithPoolingTestModel::resetTransformation(CNNNetwork& network) const {
- const float low = signedIntervals ? -128 : 0.f;
- const float high = signedIntervals ? 127 : 255.f;
-
- fillData(getLayer(network, "Const2"), low / 4.f, "custom");
- fillData(getLayer(network, "Const3"), high / 4.f, "custom");
- fillData(getLayer(network, "Const4"), low / 4.f, "custom");
- fillData(getLayer(network, "Const5"), high / 4.f, "custom");
-
- fillData(getLayer(network, "Const7"), low / 2.f, "custom");
- fillData(getLayer(network, "Const8"), high / 2.f, "custom");
- fillData(getLayer(network, "Const9"), low / 2.f, "custom");
- fillData(getLayer(network, "Const10"), high / 2.f, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include <vector>
-
-FakeQuantizeAndActivationTestModel::FakeQuantizeAndActivationTestModel(const std::vector<std::pair<float, float>>& intervals) :
- intervals(intervals) {}
-
-void FakeQuantizeAndActivationTestModel::initInput(Blob::Ptr input) const {
- const Precision& precision = input->getTensorDesc().getPrecision();
- const size_t dataSize = input->size();
-
- std::vector<float> data(input->size(), 4.0);
- const float step = (intervals[0].second - intervals[0].first) / dataSize;
- float value = intervals[0].first;
- for (size_t i = 0ul; i < dataSize; ++i) {
- if (precision == Precision::FP32) {
- float* buffer = input->buffer().as<float*>();
- buffer[i] = InferenceEngine::PrecisionUtils::f32tof16(value);
- } else if (precision == Precision::FP16) {
- short* buffer = input->buffer().as<short*>();
- buffer[i] = InferenceEngine::PrecisionUtils::f32tof16(value);
- }
-
- value += step;
- if (value > intervals[0].second) {
- value = intervals[0].first;
- }
- }
-}
-
-float FakeQuantizeAndActivationTestModel::getZeroThreshold() const {
- const float interval = intervals[0].second - intervals[0].first;
- return interval / (256.f * 1.e3f);
-}
-
-std::string FakeQuantizeAndActivationTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {{"levels", "256"}};
- std::map<std::string, std::string> power_params = {{"power", "1"}, {"scale", "1"}, {"shift", "0"}};
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"}, // Fake quantize to ReLU
- {"7,14", "8,15"}
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("FakeQuantizeAndActivationTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- // 2
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 5
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 6
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- // 7
- .addLayer("ReLU", p._network_precision, {}, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- // 8
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .finish(&edges);
-}
-
-std::string FakeQuantizeAndActivationTestModel::getName() const {
- return
- "FakeQuantizeAndActivationTestModel_" +
- std::to_string(intervals.size()) + "_" +
- std::to_string(intervals[0].first) + "_" + std::to_string(intervals[0].second);
-}
-
-bool FakeQuantizeAndActivationTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-void FakeQuantizeAndActivationTestModel::resetTransformation(CNNNetwork& network) const {
- std::vector<float> low(intervals.size());
- std::vector<float> high(intervals.size());
- for (size_t i = 0ul; i < intervals.size(); ++i) {
- const std::pair<float, float> interval = intervals[i];
- low[i] = interval.first;
- high[i] = interval.second;
- }
-
- fillData(getLayer(network, "Const2"), low, "custom");
- fillData(getLayer(network, "Const3"), high, "custom");
- fillData(getLayer(network, "Const4"), low, "custom");
- fillData(getLayer(network, "Const5"), high, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void FakeQuantizeAndActivationWithNegativeScalesTestModel::initInput(Blob::Ptr input) const {
- const Precision& precision = input->getTensorDesc().getPrecision();
- const size_t dataSize = input->size();
-
- std::vector<float> data(input->size(), 4.0);
- float value = -64.0;
- for (size_t i = 0ul; i < std::min(static_cast<size_t>(256), dataSize); ++i) {
- if (precision == Precision::FP32) {
- float* buffer = input->buffer().as<float*>();
- buffer[i] = InferenceEngine::PrecisionUtils::f32tof16(value);
- } else if (precision == Precision::FP16) {
- short* buffer = input->buffer().as<short*>();
- buffer[i] = InferenceEngine::PrecisionUtils::f32tof16(value);
- }
- value += 1.0;
- }
-}
-
-std::string FakeQuantizeAndActivationWithNegativeScalesTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
- std::map<std::string, std::string> scale_shift_params = {};
- std::map<std::string, std::string> power_params = {{"power", "1"}, {"scale", "1"}, {"shift", "0"}};
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, // Input -> Power
- {"1,2", "6,7"}, // Power -> FakeQuantize
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"}, // FakeQuantize -> ScaleShift
- {"7,14", "8,15"}, // ScaleShift -> ReLU
- {"8,16", "9,17"} // ReLU -> Power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("FakeQuantizeAndActivationWithNegativeScalesTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- // 2
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 5
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 6
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} })
- // 7
- .addLayer("ScaleShift", p._network_precision, {}, {{p.inputDimensions[0]}, {p.inputDimensions[0]}}, p.inputDimensions[0][1] * type_size, p.inputDimensions[0][1] * type_size)
- // 8
- .addLayer("ReLU", p._network_precision, {}, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- // 9
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .finish(&edges);
-}
-
-std::string FakeQuantizeAndActivationWithNegativeScalesTestModel::getName() const {
- return "FakeQuantizeAndActivationWithNegativeScalesTestModel";
-}
-
-bool FakeQuantizeAndActivationWithNegativeScalesTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-void FakeQuantizeAndActivationWithNegativeScalesTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const3"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "Const4"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const5"), 127.f / 4.f, "custom");
-
- fillData(getLayer(network, "ScaleShift7"), -1.f, "weights");
- fillData(getLayer(network, "ScaleShift7"), 0.f, "biases");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void FakeQuantizeAndActivationWithNegativeSlopeTestModel::initInput(Blob::Ptr input) const {
- const Precision& precision = input->getTensorDesc().getPrecision();
- const size_t dataSize = input->size();
-
- std::vector<float> data(input->size(), 4.0);
- float value = -64.0;
- for (size_t i = 0ul; i < std::min(static_cast<size_t>(256), dataSize); ++i) {
- if (precision == Precision::FP32) {
- float* buffer = input->buffer().as<float*>();
- buffer[i] = InferenceEngine::PrecisionUtils::f32tof16(value);
- } else if (precision == Precision::FP16) {
- short* buffer = input->buffer().as<short*>();
- buffer[i] = InferenceEngine::PrecisionUtils::f32tof16(value);
- }
- value += 1.0;
- }
-}
-
-std::string FakeQuantizeAndActivationWithNegativeSlopeTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
- std::map<std::string, std::string> power_params = {{"power", "1"}, {"scale", "1"}, {"shift", "0"}};
- std::map<std::string, std::string> reluParams = { {"negative_slope", "-1.0"} };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, // Input -> Power
- {"1,2", "6,7"}, // Power -> FakeQuantize
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"}, // FakeQuantize -> ScaleShift
- {"7,14", "8,15"}, // ScaleShift -> ReLU
- {"8,16", "9,17"} // ReLU -> Power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("FakeQuantizeAndActivationWithNegativeSlopeTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- // 2
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 5
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 6
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} })
- // 7
- .addLayer("ScaleShift", p._network_precision, {}, {{p.inputDimensions[0]}, {p.inputDimensions[0]}}, p.inputDimensions[0][1] * type_size, p.inputDimensions[0][1] * type_size)
- // 8
- .addLayer("ReLU", p._network_precision, &reluParams, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- // 9
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .finish(&edges);
-}
-
-std::string FakeQuantizeAndActivationWithNegativeSlopeTestModel::getName() const {
- return "FakeQuantizeAndActivationWithNegativeSlopeTestModel";
-}
-
-bool FakeQuantizeAndActivationWithNegativeSlopeTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
-
- CNNLayerPtr relu = getLayer(network, "ReLU8");
- if (relu == nullptr) {
- THROW_IE_EXCEPTION << "layer was not found " << relu->name;
- }
-
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(*relu);
- if (parents.size() != 1) {
- THROW_IE_EXCEPTION << "unexpected parent layers size " << parents.size();
- }
-
- if (parents[0]->name != "FakeQuantize6") {
- // FQ -> dequantization -> ReLU
- if (parents[0]->name != "ScaleShift7") {
- THROW_IE_EXCEPTION << "unexpected parent layer " << parents[0]->name;
- }
-
- if (parents[0]->type == "ScaleShift") {
- CNNLayerPtr dequantizationScaleShift = parents[0];
- const Blob::Ptr weightsBlob = CNNNetworkHelper::getBlob(dequantizationScaleShift, "weights");
- auto weights = CNNNetworkHelper::getFloatData(weightsBlob);
- const std::vector<float> scales = std::vector<float>(weights.get(), weights.get() + weightsBlob->size());
-
- const Blob::Ptr biasesBlob = CNNNetworkHelper::getBlob(dequantizationScaleShift, "biases");
- auto biases = CNNNetworkHelper::getFloatData(biasesBlob);
- const std::vector<float> shifts = std::vector<float>(biases.get(), biases.get() + biasesBlob->size());
-
- if ((std::all_of(shifts.begin(), shifts.end(), [](float value) { return value == 0.0; })) &&
- (std::all_of(scales.begin(), scales.end(), [](float value) { return value >= 0.0; }))) {
- THROW_IE_EXCEPTION << "dequantization " << parents[0]->type << " " << parents[0]->name << " was not moved via " << " " << relu->type << " " << relu->name;
- }
- } else if (parents[0]->type == "Convolution") {
- const CNNLayerPtr convolution = parents[0];
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(*convolution);
-
- const Blob::Ptr weightsBlob = CNNNetworkHelper::getBlob(parents[1], "custom");
- if (weightsBlob == nullptr) {
- THROW_IE_EXCEPTION << "weights are absent";
- }
- const std::shared_ptr<float> weights = CNNNetworkHelper::getFloatData(weightsBlob);
- if (weights == nullptr) {
- THROW_IE_EXCEPTION << "weights are not received";
- }
- const std::vector<float> scales = std::vector<float>(weights.get(), weights.get() + weightsBlob->size());
-
-
- if (std::any_of(scales.begin(), scales.end(), [](float value) { return value < 0.0; })) {
- THROW_IE_EXCEPTION << "dequantization scales are not correct";
- }
-
- const Blob::Ptr biasesBlob = CNNNetworkHelper::getBlob(parents[2], "custom");
- if (biasesBlob == nullptr) {
- THROW_IE_EXCEPTION << "biases are absent";
- }
- const std::shared_ptr<float> biases = CNNNetworkHelper::getFloatData(biasesBlob);
- if (biases == nullptr) {
- THROW_IE_EXCEPTION << "biases are not received";
- }
- } else {
- THROW_IE_EXCEPTION << "unexpected parent layer type " << parents[0]->type;
- }
- } else {
- // FQ -> ReLU -> dequantization or FQ -> ReLU -> Power
- const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*relu);
- if (children.size() != 1lu) {
- THROW_IE_EXCEPTION << "unexpected children layers size " << children.size();
- }
- if (children[0]->name != "Power9" && children[0]->name != "ReLU8_ScaleShift_Power9") {
- THROW_IE_EXCEPTION << "Unexpected child layer '" << children[0]->name << "'";
- }
- }
-
- return true;
-}
-
-void FakeQuantizeAndActivationWithNegativeSlopeTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 0.f, "custom");
- fillData(getLayer(network, "Const3"), 255.f / 8.f, "custom");
- fillData(getLayer(network, "Const4"), 0.f, "custom");
- fillData(getLayer(network, "Const5"), 255.f / 8.f, "custom");
-
- fillData(getLayer(network, "ScaleShift7"), 3.f, "weights");
- fillData(getLayer(network, "ScaleShift7"), 0.f, "biases");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string FakeQuantizeAndScaleShiftTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- std::map<std::string, std::string> scale_shift_params = {};
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
-
- std::map<std::string, std::string> power_params = {
- {"power", "2"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // ScaleShift
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"} // Fake quantize to Power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "DWConvFQ", p.inputDimensions[0], p._network_precision)
- .addLayer("ScaleShift", p._network_precision, &scale_shift_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}}, p.inputDimensions[0][1] * type_size, p.inputDimensions[0][1] * type_size)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .finish(&edges);
-}
-
-std::string FakeQuantizeAndScaleShiftTestModel::getName() const {
- return "FakeQuantizeAndScaleShiftTestModel";
-}
-
-bool FakeQuantizeAndScaleShiftTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-void FakeQuantizeAndScaleShiftTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const3"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "Const4"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const5"), 127.f / 4.f, "custom");
-
- fillDataWithInitValue(getLayer(network, "ScaleShift1"), "weights", 1.234f);
- fillDataWithInitValue(getLayer(network, "ScaleShift1"), "biases", 5.678f);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void FakeQuantizeReshapePoolingTestModelWithConstants::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "inputLow"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "inputHigh"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "outputLow"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "outputHigh"), 127.f / 4.f, "custom");
-
- fillDataMy(getLayer(network, "reshapeConst1"), { 0, 1280, 7, 1 }, "custom");
- fillDataMy(getLayer(network, "reshapeConst2"), { 0, 1280 }, "custom");
-}
-
-std::string FakeQuantizeReshapePoolingTestModelWithConstants::getName() const {
- return "FakeQuantizeReshapePoolingTestModelWithConstants";
-}
-
-bool FakeQuantizeReshapePoolingTestModelWithConstants::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-std::string FakeQuantizeReshapePoolingTestModelWithConstants::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fakeQuantizeParams = {{ "levels", "256" }};
- std::map<std::string, std::string> power_params = {{"power", "1"}, {"scale", "1"}, {"shift", "0"}};
- std::map<std::string, std::string> poolingParams = { {"kernel", "7,1"}, { "pool-method", "avg" }, { "strides", "1,1" } };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, // input => inputPower
- {"1,2", "6,7"}, // inputPower => fakeQuantize
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers => fakeQuantize
- {"6,12", "8,14"}, // fakeQuantize => reshape1
- {"7,13", "8,15"}, // reshapeConst1 => reshape1
- {"8,16", "9,17"}, // reshape1 => pooling
- {"9,18", "11,20"}, // pooling => reshape2
- {"10,19", "11,21"}, // reshapeConst2 => reshape2
- {"11,22", "12,23"}, // reshape2 => outputPower
- };
-
- auto network = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- // inputPower: id=1
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} }, "inputPower")
- // inputLow: id=2
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "inputLow")
- // inputHigh: id=3
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "inputHigh")
- // outputLow: id=4
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "outputLow")
- // outputHigh: id=5
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "outputHigh")
- // fakeQuantize: id=6
- .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} }, "fakeQuantize")
- // reshapeConst1: id=7
- .addLayer("Const", "I32", {}, { {}, {{4}} }, 4 * 4, "reshapeConst1")
- // reshape1: id=8
- .addLayer("Reshape", p._network_precision, {}, { {{ 1, 1280, 7 }, {4}}, {{1, 1280, 7, 1}} }, "reshape1")
- // pooling: id=9
- .addLayer("Pooling", p._network_precision, &poolingParams, { {{ 1, 1280, 7, 1 }}, {{1, 1280, 1, 1}} }, "pooling")
- // reshapeConst2: id=10
- .addLayer("Const", "I32", {}, { {}, {{2}} }, 2 * 4, "reshapeConst2")
- // reshape2: id=11
- .addLayer("Reshape", p._network_precision, {}, { {{ 1, 1280, 1, 1 }, {2}}, {{1, 1280 }} }, "reshape2")
- // outputPower: id=12
- .addLayer("Power", p._network_precision, &power_params, { {{ 1, 1280 }}, {{1, 1280}} }, "outputPower")
- .finish(&edges);
- return network;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void FakeQuantizeReshapePoolingTestModelWithoutConstants::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "inputLow"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "inputHigh"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "outputLow"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "outputHigh"), 127.f / 4.f, "custom");
-}
-
-std::string FakeQuantizeReshapePoolingTestModelWithoutConstants::getName() const {
- return "FakeQuantizeReshapePoolingTestModelWithoutConstants";
-}
-
-bool FakeQuantizeReshapePoolingTestModelWithoutConstants::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-std::string FakeQuantizeReshapePoolingTestModelWithoutConstants::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fakeQuantizeParams = {{ "levels", "256" }};
- std::map<std::string, std::string> power_params = {{"power", "2"}, {"scale", "1"}, {"shift", "0"}};
- std::map<std::string, std::string> poolingParams = { {"kernel", "7,1"}, { "pool-method", "avg" }, { "strides", "1,1" } };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, // input => inputPower
- {"1,2", "6,7"}, // inputPower => fakeQuantize
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers => fakeQuantize
- {"6,12", "7,13"}, // fakeQuantize => reshape1
- {"7,14", "8,15"}, // reshape1 => pooling
- {"8,16", "9,17"}, // pooling => reshape2
- {"9,18", "10,19"}, // reshape2 => outputPower
- };
-
- auto network = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- // inputPower: id=1
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} }, "inputPower")
- // inputLow: id=2
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "inputLow")
- // inputHigh: id=3
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "inputHigh")
- // outputLow: id=4
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "outputLow")
- // outputHigh: id=5
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "outputHigh")
- // fakeQuantize: id=6
- .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} }, "fakeQuantize")
- // reshape1: id=7
- .addLayer("Reshape", p._network_precision, {}, { {{ 1, 1280, 7 }}, {{1, 1280, 7, 1}} }, "reshape1")
- // pooling: id=8
- .addLayer("Pooling", p._network_precision, &poolingParams, { {{ 1, 1280, 7, 1 }}, {{1, 1280, 1, 1}} }, "pooling")
- // reshape2: id=9
- .addLayer("Reshape", p._network_precision, {}, { {{ 1, 1280, 1, 1 }}, {{1, 1280 }} }, "reshape2")
- // outputPower: id=10
- .addLayer("Power", p._network_precision, &power_params, { {{ 1, 1280 }}, {{1, 1280}} }, "outputPower")
- .finish(&edges);
- return network;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void FakeQuantizeReshapeTestModelWithConstants::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "inputLow"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "inputHigh"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "outputLow"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "outputHigh"), 127.f / 4.f, "custom");
-
- fillDataMy(getLayer(network, "reshapeConst"), { 0, -1 }, "custom");
-}
-
-std::string FakeQuantizeReshapeTestModelWithConstants::getName() const {
- return "FakeQuantizeReshapeTestModelWithConstants";
-}
-
-bool FakeQuantizeReshapeTestModelWithConstants::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-std::string FakeQuantizeReshapeTestModelWithConstants::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fakeQuantizeParams = {{ "levels", "256" }};
- std::map<std::string, std::string> power_params = {{"power", "1"}, {"scale", "1"}, {"shift", "0"}};
- std::map<std::string, std::string> poolingParams = { {"kernel", "7,1"}, { "pool-method", "avg" }, { "strides", "1,1" } };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, // input => inputPower
- {"1,2", "6,7"}, // inputPower => fakeQuantize
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers => fakeQuantize
- {"6,12", "8,14"}, // fakeQuantize => reshape1
- {"7,13", "8,15"}, // reshapeConst1 => reshape1
- {"8,16", "9,17"}, // reshape => outputPower
- };
-
- auto network = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- // inputPower: id=1
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} }, "inputPower")
- // inputLow: id=2
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "inputLow")
- // inputHigh: id=3
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "inputHigh")
- // outputLow: id=4
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "outputLow")
- // outputHigh: id=5
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "outputHigh")
- // fakeQuantize: id=6
- .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} }, "fakeQuantize")
- // reshapeConst1: id=7
- .addLayer("Const", "I32", {}, { {}, {{2}} }, 2 * 4, "reshapeConst")
- // reshape1: id=8
- .addLayer("Reshape", p._network_precision, {}, { {{ 1, 256, 6, 6 }, {2}}, {{1, 9216}} }, "reshape")
- // outputPower: id=9
- .addLayer("Power", p._network_precision, &power_params, { {{ 1, 9216 }}, {{1, 9216}} }, "outputPower")
- .finish(&edges);
- return network;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string FullyConnectedAndScaleShiftsOnActivationsTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- std::vector<size_t> const_1_dims = {1000, 2048};
- std::vector<size_t> const_2_dims = {1000};
- std::map<std::string, std::string> scale_shift_params = {};
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fc_params = {
- { "out-size", "1000" }
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "4,5"}, // ScaleShift
- {"2,3", "4,6"}, {"3,4", "4,7"}, // Const layers
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "FCandScaleShift", p.inputDimensions[0], p._network_precision)
- .addLayer("ScaleShift", p._network_precision, &scale_shift_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}}, p.inputDimensions[0][1] * type_size, p.inputDimensions[0][1] * type_size)
- .addLayer("Const", p._network_precision, &const_params, {{}, {const_1_dims}},
- std::accumulate(const_1_dims.begin(), const_1_dims.end(), 1lu, std::multiplies<size_t>()) * type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {const_2_dims}},
- std::accumulate(const_2_dims.begin(), const_2_dims.end(), 1lu, std::multiplies<size_t>()) * type_size, 0)
- .addLayer("FullyConnected", p._network_precision, &fc_params, {{p.inputDimensions[0], const_1_dims, const_2_dims}, {{1, 1000}}})
- .finish(&edges);
-}
-
-std::string FullyConnectedAndScaleShiftsOnActivationsTestModel::getName() const {
- return "FullyConnectedAndScaleShiftsOnActivationsTestModel";
-}
-
-bool FullyConnectedAndScaleShiftsOnActivationsTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
- return true;
-}
-
-void FullyConnectedAndScaleShiftsOnActivationsTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "ScaleShift1"), 0.4f, "weights");
- fillData(getLayer(network, "ScaleShift1"), 0.3f, "biases");
-
- fillDataWithInitValue(getLayer(network, "Const2"), "custom", 0.2f);
- fillDataWithInitValue(getLayer(network, "Const3"), "custom", 0.3f);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-std::string FakeQuantizeAsOutputTest::getName() const {
- return "FakeQuantizeAsOutputTest";
-}
-
-bool FakeQuantizeAsOutputTest::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- network.addOutput("FakeQuantize12");
-
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
-
- const auto fq = CommonTestUtils::getLayerByName(network, "FakeQuantize12");
- if (fq == nullptr)
- THROW_IE_EXCEPTION << "Layer 'FakeQuantize12' should not be transformed";
-
- return true;
-}
-
-std::unordered_set<std::string> FakeQuantizeAsOutputTest::getNotTransformedLayers() const {
- return { "Convolution14" };
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-std::string FakeQuantizeWithMultiOutputsTest::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(PrecisionTrait<Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(PrecisionTrait<Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"7,13", "12,18"}, {"8,14", "12,19"}, {"9,15", "12,20"}, {"10,16", "12,21"}, {"11,17", "12,22"}, // Const layers
- {"6,12", "14,25"}, {"12,23", "14,26"}, // Fake quantize to Conv1
- {"13,24", "14,27"}, // biases to Conv
- {"14,28", "15,29"}, // Conv to Power1
- {"12,23", "16,31"} // FQ to Power2
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {weightsConstInputDims}},
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {biasesConvolutionConstDims}}, type_size * conv.out_c, 0)
- .convolutionLayer(p._network_precision, {{p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }, {convOutShape}}, conv)
- .addLayer("Power", p._network_precision, &power_params, {{convOutShape}, {convOutShape}})
- .addLayer("Power", p._network_precision, &power_params, {{weightsConstInputDims}, {weightsConstInputDims}})
- .finish(&edges);
-}
-
-std::string FakeQuantizeWithMultiOutputsTest::getName() const {
- return "FakeQuantizeWithMultiOutputsTest";
-}
-
-bool FakeQuantizeWithMultiOutputsTest::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
-
- return true;
-}
-
-std::unordered_set<std::string> FakeQuantizeWithMultiOutputsTest::getNotTransformedLayers() const {
- return { "Convolution14" };
-}
-
-void FakeQuantizeWithMultiOutputsTest::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 0.0, "custom");
- fillData(getLayer(network, "Const3"), 127.5, "custom");
- fillData(getLayer(network, "Const4"), 0.0, "custom");
- fillData(getLayer(network, "Const5"), 127.5, "custom");
-
- fillData(getLayer(network, "Const7"), 3.0, "custom");
-
- fillData(getLayer(network, "Const8"), -1.275 / 2.0, "custom");
- fillData(getLayer(network, "Const9"), 1.275, "custom");
- fillData(getLayer(network, "Const10"), -1.275 / 2.0, "custom");
- fillData(getLayer(network, "Const11"), 1.275, "custom");
-
- fillData(getLayer(network, "Const13"), 5.0, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-std::string FakeQuantizeWithTwoScaleShiftsAsOutput::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(PrecisionTrait<Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(PrecisionTrait<Precision::FP16>::value_type);
-
- std::map<std::string, std::string> scale_shift_params = {};
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "5,5"}, // input -> fq
- {"1,1", "5,6"}, {"2,2", "5,7"}, {"3,3", "5,8"}, {"4,4", "5,9"}, // Const layers
- {"5,10", "6,11"}, {"5,10", "7,13"}, // FQ -> SS
- {"6,12", "8,15"}, {"7,14", "9,17"} // SS -> Power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "FakeQuantizeWithTwoScaleShiftsAsOutput", p.inputDimensions[0], p._network_precision)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, "inputLow")
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, "inputHigh")
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, "outputLow")
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, "outputHigh")
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("ScaleShift", p._network_precision, &scale_shift_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}}, p.inputDimensions[0][1] * type_size, p.inputDimensions[0][1] * type_size)
- .addLayer("ScaleShift", p._network_precision, &scale_shift_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}}, p.inputDimensions[0][1] * type_size, p.inputDimensions[0][1] * type_size)
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .finish(&edges);
-}
-
-std::string FakeQuantizeWithTwoScaleShiftsAsOutput::getName() const {
- return "FakeQuantizeWithTwoScaleShiftsAsOutput";
-}
-
-bool FakeQuantizeWithTwoScaleShiftsAsOutput::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
-
- return true;
-}
-
-void FakeQuantizeWithTwoScaleShiftsAsOutput::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "inputLow"), 0.f, "custom");
- fillData(getLayer(network, "inputHigh"), 5.f, "custom");
- fillData(getLayer(network, "outputLow"), 0.f, "custom");
- fillData(getLayer(network, "outputHigh"), 5.f, "custom");
-
- fillData(getLayer(network, "ScaleShift6"), 3.f, "weights");
- fillData(getLayer(network, "ScaleShift6"), 3.f, "biases");
- fillData(getLayer(network, "ScaleShift7"), 1.5f, "weights");
- fillData(getLayer(network, "ScaleShift7"), 1.5f, "biases");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-//const size_t channelsCount = 32ul;
-//const size_t group = channelsCount;
-//std::vector<size_t> weightsConstInputDims = { channelsCount, 1lu, 3lu, 3lu };
-
-FullyConnectedBaseTestModel::FullyConnectedBaseTestModel(const bool addBiasesLayer) : addBiasesLayer(addBiasesLayer) {}
-
-std::string FullyConnectedBaseTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- const size_t inputChannelsCount = p.inputDimensions[0][1];
- const size_t outputChannelsCount = p.outputDimensions[0][1];
- //conv_common_params conv = { {1, 1}, {3, 3}, {1, 1}, {1, 1}, {1, 1}, "valid", group, outputChannelsCount, false, false };
- std::vector<size_t> weightsConstInputDims = { outputChannelsCount, inputChannelsCount };
-
- //std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- //getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
- std::map<std::string, std::string> fake_quantize_params2 = { {"levels", "255"} };
- std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
- std::map<std::string, std::string> poolingParams = { {"kernel", "112,112"}, {"pool-method", "max"} };
- std::map<std::string, std::string> reshapeParams = { };
- std::map<std::string, std::string> fullyConnectedParams = { {"out-size", std::to_string(p.outputDimensions[0][1])} };
-
- std::vector<size_t> biasesConstDims = { p.outputDimensions[0][1] };
-
- const std::vector<std::vector<size_t>> convolutionDims = addBiasesLayer ?
- std::vector<std::vector<size_t>>({ p.inputDimensions[0], weightsConstInputDims, biasesConstDims }) :
- std::vector<std::vector<size_t>>({p.inputDimensions[0], weightsConstInputDims });
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"}, // FakeQuantize to Pooling
- {"7,14", "8,15"}, // Pooling to Reshape
- {"8,16", "15,28"}, // Reshape to FullyConnected
- {"9,17", "14,22"}, {"10,18", "14,23"}, {"11,19", "14,24"}, {"12,20", "14,25"}, {"13,21", "14,26"}, // Const layers
- {"14,27", "15,29"}
- };
-
- if (addBiasesLayer) {
- edges.push_back({ "16,32", "15,30" }); // biases to Conv
- }
-
- const std::vector<std::vector<size_t>> fullyConnectedDims = addBiasesLayer ?
- std::vector<std::vector<size_t>>({ {p.inputDimensions[0][0], p.inputDimensions[0][1]}, weightsConstInputDims, biasesConstDims }) :
- std::vector<std::vector<size_t>>({ {p.inputDimensions[0][0], p.inputDimensions[0][1]}, weightsConstInputDims });
-
- std::vector<size_t> quantizationParamsDims(p.inputDimensions[0].size(), 1);
- quantizationParamsDims[1] = inputChannelsCount;
-
- CommonTestUtils::DefaultNetBuilder builder = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "FullyConnectedBaseTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- // 2
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputLowConst")
- // 3
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputHighConst")
- // 4
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputLowConst")
- // 5
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputHighConst")
- // 6
- .addLayer("FakeQuantize",
- p._network_precision,
- &fake_quantize_params,
- { {p.inputDimensions[0], quantizationParamsDims, quantizationParamsDims, quantizationParamsDims, quantizationParamsDims}, {{p.inputDimensions[0]}} },
- "fakeQuantize")
- // 7
- .addLayer("Pooling", p._network_precision, &poolingParams, { {p.inputDimensions[0]}, {{1, 32, 1, 1}} }, "pooling")
- // 8
- .addLayer("Reshape", p._network_precision, &reshapeParams, { {{1, 32, 1, 1}}, {{1, 32}} }, "reshape")
- // 9
- .addLayer("Const", p._network_precision, &const_params, { {}, {weightsConstInputDims} },
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, "weigthsConst")
- // 10
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputLowConst")
- // 11
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputHighConst")
- // 12
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputLowConst")
- // 13
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputHighConst")
- // 14
- .addLayer(
- "FakeQuantize",
- p._network_precision,
- &fake_quantize_params,
- { {weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}} },
- "fakeQuantizeOnWeights")
- // 15
- .addLayer("FullyConnected", p._network_precision, &fullyConnectedParams, { fullyConnectedDims, {p.outputDimensions[0]} }, "fullyConnected");
-
- if (addBiasesLayer) {
- // 16
- builder.addLayer("Const", p._network_precision, &const_params, { {}, {biasesConstDims} }, type_size * biasesConstDims[0], "biasesConst");
- }
-
- return builder.finish(&edges);
-}
-
-bool FullyConnectedBaseTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
- return true;
-}
-
-void FullyConnectedBaseTestModel::resetTransformation(CNNNetwork& network) const {
- CNNLayerPtr fakeQuantize = CNNNetworkHelper::getLayer(network, "fakeQuantize");
- const size_t inputChannels = fakeQuantize->outData[0]->getTensorDesc().getDims()[1];
-
- CNNLayerPtr fullyConnected = CNNNetworkHelper::getLayer(network, "fullyConnected");
- const size_t outputChannels = fullyConnected->outData[0]->getTensorDesc().getDims()[1];
-
- // Const on activations
- std::vector<float> lowValues(inputChannels, 1.0); // to have shifts
- std::vector<float> highValues(inputChannels);
- if (areScalesOnActivationsDifferent()) {
- for (size_t inputChannel = 0; inputChannel < highValues.size(); ++inputChannel) {
- highValues[inputChannel] = static_cast<float>(inputChannel);
- }
- } else {
- highValues = std::vector<float>(inputChannels, 255.f);
- }
-
- fillData(getLayer(network, "dataInputLowConst"), lowValues, "custom");
- fillData(getLayer(network, "dataInputHighConst"), highValues, "custom");
- fillData(getLayer(network, "dataOutputLowConst"), lowValues, "custom");
- fillData(getLayer(network, "dataOutputHighConst"), highValues, "custom");
-
- // Const on weights
- std::vector<float> weights(outputChannels * inputChannels);
- for (size_t outputChannel = 0ul; outputChannel < outputChannels; ++outputChannel) {
- for (size_t inputChannel = 0ul; inputChannel < inputChannels; ++inputChannel) {
- weights[outputChannel * inputChannels + inputChannel] = inputChannel;
- }
- }
- fillData(getLayer(network, "weigthsConst"), weights, "custom");
-
- fillData(getLayer(network, "weigthsInputLowConst"), -128.f, "custom");
- fillData(getLayer(network, "weigthsInputHighConst"), 127.f, "custom");
- fillData(getLayer(network, "weigthsOutputLowConst"), -128.f, "custom");
- fillData(getLayer(network, "weigthsOutputHighConst"), 127.f, "custom");
-
- if (addBiasesLayer) {
- std::vector<float> biases(outputChannels);
- for (size_t i = 0ul; i < outputChannels; ++i) {
- biases[i] = static_cast<float>(i);
- }
- fillData(getLayer(network, "biasesConst"), biases, "custom");
- }
-}
-
-bool FullyConnectedBaseTestModel::areScalesOnActivationsDifferent() const {
- return false;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "low_precision_transformations/fake_quantize.hpp"
-#include "low_precision_transformations/convolution.hpp"
-#include "low_precision_transformations/fully_connected.hpp"
-#include "low_precision_transformations/scaleshift_to_convolution.hpp"
-
-FullyConnectedTestModel::FullyConnectedTestModel(
- const std::vector<size_t>& inputDimentions,
- const std::vector<size_t>& outputDimentions) :
- addBiasesLayer(false),
- inputDimentions(inputDimentions),
- outputDimentions(outputDimentions) {}
-
-std::string FullyConnectedTestModel::getName() const {
- return std::string("FullyConnectedTestModel") +
- (addBiasesLayer ? "WithBiases" : "") +
- "_D" + std::to_string(inputDimentions.size()) +
- "_D" + std::to_string(outputDimentions.size());
-}
-
-void FullyConnectedTestModel::initInput(Blob::Ptr input) const {
- fillDataWithInitValue(input, -1.f);
-}
-
-bool FullyConnectedTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- params.updatePrecisions = true;
-
- // TODO: use getLowPrecisionTransformer(params) instead
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
- add<FullyConnectedTransformation>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false), "FullyConnected").
- add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
- addCleanup<ScaleShiftToConvolutionTransformation>(
- LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
- "ScaleShift"));
-
- transformer.transform(network);
-
- if (params.quantizeOutputs) {
- const CNNLayerPtr dequantizationLayer = getLayer(network, "fullyConnected");
- if (dequantizationLayer->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "was not quantized";
- }
-
- const Blob::Ptr biases = CNNNetworkHelper::getBiases(*dequantizationLayer);
- const std::shared_ptr<float> biasesData = CNNNetworkHelper::getFloatData(biases);
- if (params.updateBiases) {
- for (size_t i = 0ul; i < biases->size(); ++i) {
- if (biasesData.get()[i] != 0.f) {
- THROW_IE_EXCEPTION << "biases value is not zero";
- }
- }
- } else {
- // FakeQuantize layer has to have shift
- for (size_t i = 0ul; i < biases->size(); ++i) {
- if (biasesData.get()[i] == 0.f) {
- THROW_IE_EXCEPTION << "biases value is zero";
- }
- }
- }
- }
-
- return true;
-}
-
-std::string FullyConnectedTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- const size_t inputChannelsCount = p.inputDimensions[0][1];
- const size_t outputChannelsCount = p.outputDimensions[0][1];
- std::vector<size_t> weightsConstInputDims = {
- p.inputDimensions[0][2] * p.inputDimensions[0][3],
- p.outputDimensions[0][p.outputDimensions[0].size() == 2ul ? 1ul : 2ul] };
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
- std::map<std::string, std::string> fake_quantize_params2 = { {"levels", "255"} };
- std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
- std::map<std::string, std::string> poolingParams = { {"kernel", "112,112"}, {"pool-method", "max"} };
- std::map<std::string, std::string> reshapeParams = { };
- std::map<std::string, std::string> fullyConnectedParams = { {"out-size", std::to_string(p.outputDimensions[0][1])} };
-
- std::vector<size_t> biasesConstDims = { p.outputDimensions[0][1] };
-
- const std::vector<std::vector<size_t>> convolutionDims = addBiasesLayer ?
- std::vector<std::vector<size_t>>({ p.inputDimensions[0], weightsConstInputDims, biasesConstDims }) :
- std::vector<std::vector<size_t>>({ p.inputDimensions[0], weightsConstInputDims });
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"}, // FakeQuantize to Pooling
- {"7,14", "8,15"}, // Pooling to Reshape
- {"8,16", "15,28"}, // Reshape to FullyConnected
- {"9,17", "14,22"}, {"10,18", "14,23"}, {"11,19", "14,24"}, {"12,20", "14,25"}, {"13,21", "14,26"}, // Const layers
- {"14,27", "15,29"}
- };
-
- if (addBiasesLayer) {
- edges.push_back({ "16,32", "15,30" }); // biases to Conv
- }
-
- const std::vector<std::vector<size_t>> fullyConnectedDims = addBiasesLayer ?
- std::vector<std::vector<size_t>>({ p.outputDimensions[0], weightsConstInputDims, biasesConstDims }) :
- std::vector<std::vector<size_t>>({ p.outputDimensions[0], weightsConstInputDims });
-
- std::vector<size_t> quantizationParamsDims(p.inputDimensions[0].size(), 1);
- quantizationParamsDims[1] = inputChannelsCount;
-
- const std::vector<size_t> reshape1OuputDims = { p.inputDimensions[0][0], p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] };
- const std::vector<size_t> reshape2OuputDims = p.outputDimensions[0].size() == 2ul ?
- std::vector<size_t>({ p.inputDimensions[0][0] * p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] }) :
- std::vector<size_t>({ p.inputDimensions[0][0], p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] });
-
- CommonTestUtils::DefaultNetBuilder builder = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "FullyConnectedTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- // 2
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputLowConst")
- // 3
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputHighConst")
- // 4
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputLowConst")
- // 5
- .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputHighConst")
- // 6
- .addLayer("FakeQuantize",
- p._network_precision,
- &fake_quantize_params,
- { {p.inputDimensions[0], quantizationParamsDims, quantizationParamsDims, quantizationParamsDims, quantizationParamsDims}, {{p.inputDimensions[0]}} },
- "fakeQuantize")
- // 7
- .addLayer("Reshape", p._network_precision, &reshapeParams, { { p.inputDimensions[0] }, { reshape1OuputDims } }, "reshape1")
- // 8
- .addLayer("Reshape", p._network_precision, &reshapeParams, { {{ reshape1OuputDims }}, { reshape2OuputDims } }, "reshape2")
- // 9
- .addLayer("Const", p._network_precision, &const_params, { {}, {weightsConstInputDims} },
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, "weigthsConst")
- // 10
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputLowConst")
- // 11
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputHighConst")
- // 12
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputLowConst")
- // 13
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputHighConst")
- // 14
- .addLayer(
- "FakeQuantize",
- p._network_precision,
- &fake_quantize_params,
- { {weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}} },
- "fakeQuantizeOnWeights")
- // 15
- .addLayer("FullyConnected", p._network_precision, &fullyConnectedParams, { fullyConnectedDims, {p.outputDimensions[0]} }, "fullyConnected");
-
- if (addBiasesLayer) {
- // 16
- builder.addLayer("Const", p._network_precision, &const_params, { {}, {biasesConstDims} }, type_size * biasesConstDims[0], "biasesConst");
- }
-
- return builder.finish(&edges);
-}
-
-void FullyConnectedTestModel::resetTransformation(CNNNetwork& network) const {
- CNNLayerPtr fakeQuantize = CNNNetworkHelper::getLayer(network, "fakeQuantize");
- const size_t inputChannels = fakeQuantize->outData[0]->getTensorDesc().getDims()[1];
-
- CNNLayerPtr fullyConnected = CNNNetworkHelper::getLayer(network, "fullyConnected");
- const size_t outputChannels = fullyConnected->outData[0]->getTensorDesc().getDims()[1];
-
- // Const on activations
- //std::vector<float> lowValues(inputChannels, 1.0); // to have shifts
- //std::vector<float> highValues(inputChannels);
- //if (areScalesOnActivationsDifferent()) {
- // for (size_t inputChannel = 0; inputChannel < highValues.size(); ++inputChannel) {
- // highValues[inputChannel] = static_cast<float>(inputChannel);
- // }
- //}
- //else {
- // highValues = std::vector<float>(inputChannels, 255.f);
- //}
-
- //std::vector<float> lowValues(inputChannels, 1.275f);
- //std::vector<float> highValues(inputChannels, 2.55f);
-
- std::vector<float> lowValues(inputChannels, 127.5f);
- std::vector<float> highValues(inputChannels, 255.f);
-
- fillData(getLayer(network, "dataInputLowConst"), lowValues, "custom");
- fillData(getLayer(network, "dataInputHighConst"), highValues, "custom");
- fillData(getLayer(network, "dataOutputLowConst"), lowValues, "custom");
- fillData(getLayer(network, "dataOutputHighConst"), highValues, "custom");
-
-
- const size_t fakeQuantizeInputChannel = outputChannels;
-
- // Const on weights
- //std::vector<float> weights(
- // fakeQuantize->outData[0]->getTensorDesc().getDims()[2] *
- // fakeQuantize->outData[0]->getTensorDesc().getDims()[3] *
- // fullyConnected->outData[0]->getTensorDesc().getDims()[fullyConnected->outData[0]->getTensorDesc().getDims().size() == 2ul ? 1 : 2]);
- //for (size_t outputChannel = 0ul; outputChannel < outputChannels; ++outputChannel) {
- // for (size_t inputChannel = 0ul; inputChannel < fakeQuantizeInputChannel; ++inputChannel) {
- // weights[outputChannel * fakeQuantizeInputChannel + inputChannel] = inputChannel;
- // }
- //}
-
- const std::vector<size_t> dims = fakeQuantize->outData[0]->getTensorDesc().getDims();
- // const size_t weightsSize = dims[2] * dims[3] * dims[dims.size() == 2ul ? 1 : 2];
- const size_t weightsSize = (dims[2] * dims[3]) * (dims[2] * dims[3]);
- std::vector<float> weights(weightsSize, 2.f);
-
- fillData(getLayer(network, "weigthsConst"), weights, "custom");
-
- fillData(getLayer(network, "weigthsInputLowConst"), -128.f, "custom");
- fillData(getLayer(network, "weigthsInputHighConst"), 127.f, "custom");
- fillData(getLayer(network, "weigthsOutputLowConst"), -128.f, "custom");
- fillData(getLayer(network, "weigthsOutputHighConst"), 127.f, "custom");
-
- if (addBiasesLayer) {
- std::vector<float> biases(outputChannels);
- for (size_t i = 0ul; i < outputChannels; ++i) {
- biases[i] = static_cast<float>(i);
- }
- fillData(getLayer(network, "biasesConst"), biases, "custom");
- }
-}
-
-bool FullyConnectedTestModel::areScalesOnActivationsDifferent() const {
- return false;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <gtest/gtest.h>
-#include <string>
-#include <unordered_map>
-
-#include <ie_core.hpp>
-#include <legacy/details/ie_cnn_network_tools.h>
-#include "cpp_interfaces/impl/ie_plugin_internal.hpp"
-
-#include "common/low_precision_tests_utils.hpp"
-
-#include "low_precision_transformations/transformer.hpp"
-#include "low_precision_transformations/convolution.hpp"
-#include "low_precision_transformations/network_helper.hpp"
-#include "low_precision_transformations/eltwise.hpp"
-
-#include "tests_common.hpp"
-#include "ir_gen_helper.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-using namespace single_layer_tests;
-
-inline void fillDataMy(CNNLayerPtr layer, std::vector<int> values, const std::string& blobName = "") {
- if (layer == nullptr) {
- THROW_IE_EXCEPTION << "layer is nullable";
- }
- if (blobName.empty() && (layer->blobs.size() != 1)) {
- THROW_IE_EXCEPTION << "several blobs";
- }
-
- Blob::Ptr blob = blobName.empty() ? layer->blobs.begin()->second : layer->blobs[blobName];
- if (blob->size() != values.size()) {
- THROW_IE_EXCEPTION << "values size is not correct";
- }
-
- int* buffer = blob->buffer().as<int*>();
- for (size_t i = 0; i < blob->size(); i++) {
- buffer[i] = values[i];
- }
-}
-
-/**
- * @brief base class for test model.
- */
-class SingleLayerTransformationsTestParams;
-
-class SingleLayerTestModel {
-public:
- typedef std::shared_ptr<SingleLayerTestModel> Ptr;
-
- LowPrecisionTransformations getLowPrecisionTransformations(const LayerTransformation::Params& params) const;
- LowPrecisionTransformer getLowPrecisionTransformer(const LayerTransformation::Params& params) const;
-
- virtual std::string getModel(SingleLayerTransformationsTestParams& p) const = 0;
- virtual std::string getName() const = 0;
-
- virtual void initInput(Blob::Ptr input) const {}
- virtual float getZeroThreshold() const {
- return 1e-7;
- }
- virtual bool transform(CNNNetwork& network, LayerTransformation::Params& params) const = 0;
- virtual void resetTransformation(CNNNetwork& network) const = 0;
- virtual std::unordered_set<std::string> getNotTransformedLayers() const {
- return {};
- }
-
- virtual float getThreshold(const std::string& device_name, const Precision precision, LayerTransformation::Params& params) const {
- return precision == Precision::FP16 ? 0.0005f : 0.0003f;
- }
-
-protected:
- // TODO: pass as parameter: 22403
- const std::string device_name = "CPU";
-};
-
-class SingleLayerTransformationsTestParams {
-public:
- SingleLayerTransformationsTestParams(
- const std::string& name,
- SingleLayerTestModel::Ptr model,
- const std::vector<std::vector<size_t>>& inputDimensions,
- const std::vector<std::vector<size_t>>& outputDimensions,
- const std::string& network_precision = "FP32") :
- device_name(name),
- model(model),
- inputDimensions(inputDimensions),
- outputDimensions(outputDimensions),
- _network_precision(network_precision) {}
-
- const std::string device_name;
- SingleLayerTestModel::Ptr model;
- const std::vector<std::vector<size_t>> inputDimensions;
- const std::vector<std::vector<size_t>> outputDimensions;
- std::string _network_precision;
-
-
- static std::string getLowPrecisionTransformerSingleLayerTestName(testing::TestParamInfo<SingleLayerTransformationsTestParams> p) {
- return p.param.model->getName();
- }
-};
-
-class FullyConnectedAndScaleShiftsOnActivationsTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class ResampleTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-
-class ConvolutionAndQuantizeOnActivationsAndWeightsBaseTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
-};
-
-class ConvolutionAndQuantizeOnSignedActivationsAndWeightsPositiveTestModel : public ConvolutionAndQuantizeOnActivationsAndWeightsBaseTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
-};
-
-class ConvolutionAndQuantizeOnSignedActivationsAndWeightsNegativeTestModel : public ConvolutionAndQuantizeOnActivationsAndWeightsBaseTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
-};
-
-class ConvolutionAndQuantizeOnUnsignedActivationsAndWeightsTestModel : public ConvolutionAndQuantizeOnActivationsAndWeightsBaseTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
-};
-
-class ConvolutionAndQuantizeOnSignedActivationsAndInvertedWeightsTestModel : public ConvolutionAndQuantizeOnActivationsAndWeightsBaseTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
-};
-
-class FakeQuantizeReshapePoolingTestModelWithConstants : public SingleLayerTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
-};
-
-class FakeQuantizeReshapePoolingTestModelWithoutConstants : public SingleLayerTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
-};
-
-class FakeQuantizeReshapeTestModelWithConstants : public SingleLayerTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
-};
-
-class ScaleShiftToConvolutionTestModel : public SingleLayerTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
-};
-
-class ScaleShiftToConvolutionAfterNotConcatIgnoreTestModel : public SingleLayerTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
-};
-
-class ScaleShiftToConvolutionAfterFakeQuantizeIgnoreTestModel : public SingleLayerTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
-};
-
-class ScaleShiftToConvolutionAfterConcatTestModel : public SingleLayerTestModel {
-public:
- ScaleShiftToConvolutionAfterConcatTestModel(const bool scaleShiftIsOutput);
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
-
-private:
- const bool scaleShiftIsOutput;
-};
-
-class FullyConnectedAndQuantizeTestModel : public SingleLayerTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override {
- fillData(getLayer(network, "dataConstInputLow"), 63.5, "custom");
- fillData(getLayer(network, "dataConstInputHigh"), 127.0, "custom");
- fillData(getLayer(network, "dataConstOutputLow"), 63.5, "custom");
- fillData(getLayer(network, "dataConstOutputHigh"), 127.0, "custom");
-
- //fillData(getLayer(network, "weightsConstInput"), 3.0, "custom");
- fillDataWithInitValue(getLayer(network, "weightsConstInput"), "custom", 1.234);
-
- fillData(getLayer(network, "weightsConstInputLow"), -1.275 / 2.0, "custom");
- fillData(getLayer(network, "weightsConstInputHigh"), 1.275, "custom");
- fillData(getLayer(network, "weightsConstOutputLow"), -1.275 / 2.0, "custom");
- fillData(getLayer(network, "weightsConstOutputHigh"), 1.275, "custom");
-
- //fillData(getLayer(network, "biasesConvolutionConst"), 5.0, "custom");
- fillDataWithInitValue(getLayer(network, "biasesConvolutionConst"), "custom", 2.123);
-
- fillDataMy(getLayer(network, "reshapeConst"), { 1, -1 });
- }
-
- std::string getName() const override {
- return "FullyConnectedAndQuantizeTestModel";
- }
-
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
-
- const std::vector<CNNLayerPtr> layers = CNNNetSortTopologically(network);
-
- const CNNLayerPtr convolution = layers[layers.size() - 2];
- if ((convolution->type != "FullyConnected") || (convolution->name != "fullyconnected_original")) {
- THROW_IE_EXCEPTION << "unexpected layer type '" << convolution->type << "' or name '" << convolution->name << "'";
- }
-
- const CNNLayerPtr dequantizationScaleShift = layers[layers.size() - 1];
- if ((dequantizationScaleShift->type != "ScaleShift") || (dequantizationScaleShift->name != "fullyconnected")) {
- THROW_IE_EXCEPTION << "unexpected layer type '" << dequantizationScaleShift->type << "' or name '" << dequantizationScaleShift->name << "'";
- }
-
- return true;
- }
-
- std::string getModel(SingleLayerTransformationsTestParams& p) const override {
- std::string layers = layersTemplate;
- auto inputSizes = p.inputDimensions.at(0);
- auto inBatch = inputSizes.at(0);
- auto inChannel = inputSizes.at(1);
- auto inX = inputSizes.at(2);
- auto inY = inputSizes.at(3);
-
- REPLACE_WITH_NUM(layers, "IN_BATCH", inBatch);
- REPLACE_WITH_NUM(layers, "IN_CHANNEL", inChannel);
- REPLACE_WITH_NUM(layers, "IN_X", inX);
- REPLACE_WITH_NUM(layers, "IN_Y", inY);
- REPLACE_WITH_NUM(layers, "RESHAPED_CH_X_Y", inChannel * inX * inY);
-
- auto outputSizes = p.outputDimensions.at(0);
- auto outBatch = outputSizes.at(0);
- auto outChannel = outputSizes.at(1);
- REPLACE_WITH_NUM(layers, "OUT_BATCH", outBatch);
- REPLACE_WITH_NUM(layers, "OUT_CHANNEL", outChannel);
-
- size_t totalOffset = 0;
-
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_HIGH_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_HIGH_OFFSET", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_OFFSET", totalOffset);
- totalOffset += inChannel * outChannel * 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_HIGH_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_OUTPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_OUTPUT_HIGH_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "RESHAPE_CONST_OFFSET", totalOffset);
- totalOffset += 8;
- REPLACE_WITH_NUM(layers, "FULLYCONNECTED_BIASES_CONST_OFFSET", totalOffset);
- totalOffset += 128;
-
-
- const std::string model = IRTemplateGenerator::getIRTemplate(
- "TransformationsTest",
- p.inputDimensions,
- "FP32",
- layers,
- edgesTemplate,
- 6);
-
- return model;
- }
-
-private:
- const std::string layersTemplate = R"V0G0N(
- <layer name="inputPower" type="Power" precision="FP32" id="1">
- <power_data power="1" scale="1" shift="0"/>
- <input>
- <port id="0">
- <dim>IN_BATCH</dim>
- <dim>IN_CHANNEL</dim>
- <dim>IN_X</dim>
- <dim>IN_Y</dim>
- </port>
- </input>
- <output>
- <port id="1">
- <dim>IN_BATCH</dim>
- <dim>IN_CHANNEL</dim>
- <dim>IN_X</dim>
- <dim>IN_Y</dim>
- </port>
- </output>
- </layer>
-
-
- <layer id="9" name="dataConstInputLow" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_LOW_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="10" name="dataConstInputHigh" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_HIGH_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="11" name="dataConstOutputLow" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_LOW_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="12" name="dataConstOutputHigh" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_HIGH_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="13" name="dataFakeQuantize" precision="FP32" type="FakeQuantize">
- <data levels="256"/>
- <input>
- <port id="0">
- <dim>IN_BATCH</dim>
- <dim>IN_CHANNEL</dim>
- <dim>IN_X</dim>
- <dim>IN_Y</dim>
- </port>
- <port id="1"/>
- <port id="2"/>
- <port id="3"/>
- <port id="4"/>
- </input>
- <output>
- <port id="5">
- <dim>IN_BATCH</dim>
- <dim>IN_CHANNEL</dim>
- <dim>IN_X</dim>
- <dim>IN_Y</dim>
- </port>
- </output>
- </layer>
- <layer id="14" name="weightsConstInput" precision="FP32" type="Const">
- <output>
- <port id="1">
- <dim>OUT_CHANNEL</dim>
- <dim>IN_CHANNEL</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_OFFSET" size="4096"/>
- </blobs>
- </layer>
- <layer id="15" name="weightsConstInputLow" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_LOW_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="16" name="weightsConstInputHigh" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_HIGH_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="17" name="weightsConstOutputLow" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_OUTPUT_LOW_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="18" name="weightsConstOutputHigh" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_OUTPUT_HIGH_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="19" name="weightsFakeQuantize" precision="FP32" type="FakeQuantize">
- <data levels="256"/>
- <input>
- <port id="0">
- <dim>OUT_CHANNEL</dim>
- <dim>IN_CHANNEL</dim>
- </port>
- <port id="1"/>
- <port id="2"/>
- <port id="3"/>
- <port id="4"/>
- </input>
- <output>
- <port id="5">
- <dim>OUT_CHANNEL</dim>
- <dim>IN_CHANNEL</dim>
- </port>
- </output>
- </layer>
- <layer id="20" name="biasesConvolutionConst" precision="FP32" type="Const">
- <output>
- <port id="1">
- <dim>OUT_CHANNEL</dim>
- </port>
- </output>
- <blobs>
- <custom offset="FULLYCONNECTED_BIASES_CONST_OFFSET" size="128"/>
- </blobs>
- </layer>
- <layer id="211" name="reshapeConst" precision="I32" type="Const">
- <output>
- <port id="1">
- <dim>2</dim>
- </port>
- </output>
- <blobs>
- <custom offset="RESHAPE_CONST_OFFSET" size="8"/>
- </blobs>
- </layer>
- <layer id="21" name="reshape" precision="FP32" type="Reshape">
- <input>
- <port id="0">
- <dim>IN_BATCH</dim>
- <dim>IN_CHANNEL</dim>
- <dim>IN_X</dim>
- <dim>IN_Y</dim>
- </port>
- <port id="1">
- <dim>2</dim>
- </port>
- </input>
- <output>
- <port id="2">
- <dim>IN_BATCH</dim>
- <dim>RESHAPED_CH_X_Y</dim>
- </port>
- </output>
- </layer>
- <layer id="22" name="fullyconnected" precision="FP32" type="FullyConnected">
- <data out-size="OUT_CHANNEL"/>
- <input>
- <port id="0">
- <dim>IN_BATCH</dim>
- <dim>RESHAPED_CH_X_Y</dim>
- </port>
- <port id="1">
- <dim>OUT_CHANNEL</dim>
- <dim>IN_CHANNEL</dim>
- </port>
- <port id="2">
- <dim>OUT_CHANNEL</dim>
- </port>
- </input>
- <output>
- <port id="3">
- <dim>OUT_BATCH</dim>
- <dim>OUT_CHANNEL</dim>
- </port>
- </output>
- </layer>
- )V0G0N";
-
- const std::string edgesTemplate = R"V0G0N(
- <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
- <edge from-layer="1" from-port="1" to-layer="13" to-port="0"/>
-
- <!-- data FakeQuantize -->
- <edge from-layer="9" from-port="1" to-layer="13" to-port="1"/>
- <edge from-layer="10" from-port="1" to-layer="13" to-port="2"/>
- <edge from-layer="11" from-port="1" to-layer="13" to-port="3"/>
- <edge from-layer="12" from-port="1" to-layer="13" to-port="4"/>
-
- <!-- weights FakeQuantize -->
- <edge from-layer="14" from-port="1" to-layer="19" to-port="0"/>
- <edge from-layer="15" from-port="1" to-layer="19" to-port="1"/>
- <edge from-layer="16" from-port="1" to-layer="19" to-port="2"/>
- <edge from-layer="17" from-port="1" to-layer="19" to-port="3"/>
- <edge from-layer="18" from-port="1" to-layer="19" to-port="4"/>
-
- <edge from-layer="13" from-port="5" to-layer="21" to-port="0"/>
- <edge from-layer="211" from-port="1" to-layer="21" to-port="1"/>
- <edge from-layer="21" from-port="2" to-layer="22" to-port="0"/>
-
- <!-- FullyConnected -->
- <edge from-layer="21" from-port="2" to-layer="22" to-port="0"/>
- <edge from-layer="19" from-port="5" to-layer="22" to-port="1"/>
- <edge from-layer="20" from-port="1" to-layer="22" to-port="2"/>
- )V0G0N";
-};
-
-class GemmAndQuantizeTestModel : public SingleLayerTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override {
- fillData(getLayer(network, "dataConstInputLow"), 63.5, "custom");
- fillData(getLayer(network, "dataConstInputHigh"), 127.0, "custom");
- fillData(getLayer(network, "dataConstOutputLow"), 63.5, "custom");
- fillData(getLayer(network, "dataConstOutputHigh"), 127.0, "custom");
-
- //fillData(getLayer(network, "weightsConstInput"), 3.0, "custom");
- fillDataWithInitValue(getLayer(network, "weightsConstInput"), "custom", 1.234);
-
- fillData(getLayer(network, "weightsConstInputLow"), -1.275 / 2.0, "custom");
- fillData(getLayer(network, "weightsConstInputHigh"), 1.275, "custom");
- fillData(getLayer(network, "weightsConstOutputLow"), -1.275 / 2.0, "custom");
- fillData(getLayer(network, "weightsConstOutputHigh"), 1.275, "custom");
-
- fillDataMy(getLayer(network, "reshapeConst"), { 1, -1 });
- }
-
- std::string getName() const override {
- return "GemmAndQuantizeTestModel";
- }
-
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
-
- const std::vector<CNNLayerPtr> layers = CNNNetSortTopologically(network);
-
- const CNNLayerPtr convolution = layers[layers.size() - 2];
- if ((convolution->type != "GEMM") || (convolution->name != "gemm_original")) {
- THROW_IE_EXCEPTION << "unexpected layer type '" << convolution->type << "' or name '" << convolution->name << "'";
- }
-
- const CNNLayerPtr dequantizationScaleShift = layers[layers.size() - 1];
- if ((dequantizationScaleShift->type != "ScaleShift") || (dequantizationScaleShift->name != "gemm")) {
- THROW_IE_EXCEPTION << "unexpected layer type '" << dequantizationScaleShift->type << "' or name '" << dequantizationScaleShift->name << "'";
- }
-
- return true;
- }
-
- std::string getModel(SingleLayerTransformationsTestParams& p) const override {
- std::string layers = layersTemplate;
- size_t totalOffset = 0;
-
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_HIGH_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_HIGH_OFFSET", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_OFFSET", totalOffset);
- totalOffset += 32 * 32 * 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_HIGH_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_OUTPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_OUTPUT_HIGH_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "RESHAPE_CONST_OFFSET", totalOffset);
- totalOffset += 8;
-
- const std::string model = IRTemplateGenerator::getIRTemplate(
- "TransformationsTest",
- { 1, 32, 149, 149 },
- "FP32",
- layers,
- edgesTemplate,
- 6);
-
- return model;
- }
-
-private:
- const std::string layersTemplate = R"V0G0N(
- <layer name="inputPower" type="Power" precision="FP32" id="1">
- <power_data power="1" scale="1" shift="0"/>
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>32</dim>
- <dim>149</dim>
- <dim>149</dim>
- </port>
- </input>
- <output>
- <port id="1">
- <dim>1</dim>
- <dim>32</dim>
- <dim>149</dim>
- <dim>149</dim>
- </port>
- </output>
- </layer>
-
-
- <layer id="9" name="dataConstInputLow" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_LOW_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="10" name="dataConstInputHigh" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_HIGH_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="11" name="dataConstOutputLow" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_LOW_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="12" name="dataConstOutputHigh" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_HIGH_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="13" name="dataFakeQuantize" precision="FP32" type="FakeQuantize">
- <data levels="256"/>
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>32</dim>
- <dim>149</dim>
- <dim>149</dim>
- </port>
- <port id="1"/>
- <port id="2"/>
- <port id="3"/>
- <port id="4"/>
- </input>
- <output>
- <port id="5">
- <dim>1</dim>
- <dim>32</dim>
- <dim>149</dim>
- <dim>149</dim>
- </port>
- </output>
- </layer>
- <layer id="14" name="weightsConstInput" precision="FP32" type="Const">
- <output>
- <port id="1">
- <dim>32</dim>
- <dim>32</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_OFFSET" size="4096"/>
- </blobs>
- </layer>
- <layer id="15" name="weightsConstInputLow" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_LOW_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="16" name="weightsConstInputHigh" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_HIGH_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="17" name="weightsConstOutputLow" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_OUTPUT_LOW_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="18" name="weightsConstOutputHigh" precision="FP32" type="Const">
- <output>
- <port id="1"/>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_OUTPUT_HIGH_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer id="19" name="weightsFakeQuantize" precision="FP32" type="FakeQuantize">
- <data levels="256"/>
- <input>
- <port id="0">
- <dim>32</dim>
- <dim>32</dim>
- </port>
- <port id="1"/>
- <port id="2"/>
- <port id="3"/>
- <port id="4"/>
- </input>
- <output>
- <port id="5">
- <dim>32</dim>
- <dim>32</dim>
- </port>
- </output>
- </layer>
- <layer id="211" name="reshapeConst" precision="I32" type="Const">
- <output>
- <port id="1">
- <dim>2</dim>
- </port>
- </output>
- <blobs>
- <custom offset="RESHAPE_CONST_OFFSET" size="8"/>
- </blobs>
- </layer>
- <layer id="21" name="reshape" precision="FP32" type="Reshape">
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>32</dim>
- <dim>149</dim>
- <dim>149</dim>
- </port>
- <port id="1">
- <dim>2</dim>
- </port>
- </input>
- <output>
- <port id="2">
- <dim>1</dim>
- <dim>32</dim>
- </port>
- </output>
- </layer>
- <layer id="22" name="gemm" precision="FP32" type="GEMM">
- <data transpose_a="0" transpose_b="1"/>
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>32</dim>
- </port>
- <port id="1">
- <dim>32</dim>
- <dim>32</dim>
- </port>
- </input>
- <output>
- <port id="3">
- <dim>1</dim>
- <dim>32</dim>
- </port>
- </output>
- </layer>
- )V0G0N";
-
- const std::string edgesTemplate = R"V0G0N(
- <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
- <edge from-layer="1" from-port="1" to-layer="13" to-port="0"/>
-
- <!-- data FakeQuantize -->
- <edge from-layer="9" from-port="1" to-layer="13" to-port="1"/>
- <edge from-layer="10" from-port="1" to-layer="13" to-port="2"/>
- <edge from-layer="11" from-port="1" to-layer="13" to-port="3"/>
- <edge from-layer="12" from-port="1" to-layer="13" to-port="4"/>
-
- <!-- weights FakeQuantize -->
- <edge from-layer="14" from-port="1" to-layer="19" to-port="0"/>
- <edge from-layer="15" from-port="1" to-layer="19" to-port="1"/>
- <edge from-layer="16" from-port="1" to-layer="19" to-port="2"/>
- <edge from-layer="17" from-port="1" to-layer="19" to-port="3"/>
- <edge from-layer="18" from-port="1" to-layer="19" to-port="4"/>
-
- <edge from-layer="13" from-port="5" to-layer="21" to-port="0"/>
- <edge from-layer="211" from-port="1" to-layer="21" to-port="1"/>
- <edge from-layer="21" from-port="2" to-layer="22" to-port="0"/>
-
- <!-- FullyConnected -->
- <edge from-layer="21" from-port="2" to-layer="22" to-port="0"/>
- <edge from-layer="19" from-port="5" to-layer="22" to-port="1"/>
- )V0G0N";
-};
-
-class PoolingTestModel : public SingleLayerTestModel {
-public:
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
-
- mutable InferenceEngine::Precision netPrecision;
-};
-
-class PowerTestModel : public SingleLayerTestModel {
-public:
- PowerTestModel(const float& power, const float& scale, const float& shift) : power(power), scale(scale), shift(shift) {}
- void resetTransformation(CNNNetwork& network) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
-
-private:
- const float power;
- const float scale;
- const float shift;
-};
-
-class ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class ConvolutionAndQuantizeOnWeightsWithoutConstTransformationTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-// Base test class to manually quantize weights and biases
-class QuantizationOnWeightsTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- virtual std::unordered_set<std::string> getNotTransformedLayers() const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class QuantizationOnInvertedWeightsTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- virtual std::unordered_set<std::string> getNotTransformedLayers() const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class FakeQuantizeAsOutputTest : public QuantizationOnWeightsTestModel {
-public:
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- virtual std::unordered_set<std::string> getNotTransformedLayers() const override;
-};
-
-class FakeQuantizeWithMultiOutputsTest : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- virtual std::unordered_set<std::string> getNotTransformedLayers() const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class FakeQuantizeWithTwoScaleShiftsAsOutput : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class ConvolutionAndPoolingAndQuantizeOnActivationsTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class ConvolutionAndQuantizeOnActivationsTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class ConvolutionAndDequantizationScaleShiftsOnActivationsTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-// base test type for FullyConnected test
-class FullyConnectedBaseTestModel : public SingleLayerTestModel {
-public:
- FullyConnectedBaseTestModel(const bool addBiasesLayer = true);
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-protected:
- virtual bool areScalesOnActivationsDifferent() const;
- const bool addBiasesLayer;
-};
-
-// base test type for convolution test
-class ConvolutionBaseTestModel : public SingleLayerTestModel {
-public:
- ConvolutionBaseTestModel(const bool addBiasesLayer = true);
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-protected:
- virtual size_t getGroupsCount(SingleLayerTransformationsTestParams& p) const;
- virtual bool areScalesOnActivationsDifferent() const;
- const bool addBiasesLayer;
-};
-
-class ConvolutionDepthwiseTestModel : public ConvolutionBaseTestModel {
-public:
- std::string getName() const override;
-protected:
- size_t getGroupsCount(SingleLayerTransformationsTestParams& p) const override;
- bool areScalesOnActivationsDifferent() const override;
-};
-
-class ConvolutionGroupedTestModel : public ConvolutionBaseTestModel {
-public:
- std::string getName() const override;
- void initInput(Blob::Ptr input) const override;
-protected:
- size_t getGroupsCount(SingleLayerTransformationsTestParams& p) const override;
- bool areScalesOnActivationsDifferent() const override;
-};
-
-class UpdateBiasesConvolutionTestModel : public ConvolutionBaseTestModel {
-public:
- UpdateBiasesConvolutionTestModel(const bool addBiasesLayer = false);
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void initInput(Blob::Ptr input) const override;
-};
-
-class UpdateBiasesFullyConnectedTestModel : public FullyConnectedBaseTestModel {
-public:
- UpdateBiasesFullyConnectedTestModel(const bool addBiasesLayer = false);
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void initInput(Blob::Ptr input) const override;
-};
-
-class FullyConnectedTestModel : public SingleLayerTestModel {
-public:
- FullyConnectedTestModel(const std::vector<size_t>& inputDimentions, const std::vector<size_t>& outputDimentions);
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void initInput(Blob::Ptr input) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- void resetTransformation(CNNNetwork& network) const override;
-protected:
- virtual bool areScalesOnActivationsDifferent() const;
- const bool addBiasesLayer;
-
-private:
- const std::vector<size_t> inputDimentions;
- const std::vector<size_t> outputDimentions;
-};
-
-class EltwiseTestModel : public SingleLayerTestModel {
-public:
- EltwiseTestModel(
- const bool cpuSpecific,
- const std::string& operation,
- const bool signedIntervals,
- const size_t minLevels = 2ul,
- const bool addPooling = true) :
- SingleLayerTestModel(),
- cpuSpecific(cpuSpecific),
- operation(operation),
- signedIntervals(signedIntervals),
- minLevels(minLevels),
- addPooling(addPooling) {}
-
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-
-private:
- const bool cpuSpecific;
- const std::string operation;
- const bool signedIntervals;
- const size_t minLevels;
- const bool addPooling;
-};
-
-class EltwiseFqWithChildrenTestModel : public SingleLayerTestModel {
-public:
- EltwiseFqWithChildrenTestModel(
- const bool cpuSpecific,
- const std::string& operation,
- const bool signedIntervals,
- const size_t minLevels = 2ul,
- const bool addPooling = true) :
- SingleLayerTestModel(),
- cpuSpecific(cpuSpecific),
- operation(operation),
- signedIntervals(signedIntervals),
- minLevels(minLevels),
- addPooling(addPooling) {}
-
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-
-private:
- const bool cpuSpecific;
- const std::string operation;
- const bool signedIntervals;
- const size_t minLevels;
- const bool addPooling;
-};
-
-
-class EltwiseWithPoolingTestModel : public SingleLayerTestModel {
-public:
- EltwiseWithPoolingTestModel(
- const bool cpuSpecific,
- const std::string& operation,
- const bool signedIntervals,
- const size_t minLevels = 2ul) :
- SingleLayerTestModel(),
- cpuSpecific(cpuSpecific),
- operation(operation),
- signedIntervals(signedIntervals),
- minLevels(minLevels) {}
-
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-
-private:
- const bool cpuSpecific;
- const std::string operation;
- const bool signedIntervals;
- const size_t minLevels;
-};
-
-class EltwiseBroadcastTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class EltwiseCpuTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override {
-
- std::string layers = layersTemplate;
- // TODO: hard-coded values
-
- size_t totalOffset = 0;
-
-
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_LOW_OFFSET_1", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_HIGHT_OFFSET_1", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_LOW_OFFSET_1", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_HIGH_OFFSET_1", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_LOW_OFFSET_3", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_HIGHT_OFFSET_3", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_LOW_OFFSET_3", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_HIGH_OFFSET_3", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_OFFSET", totalOffset);
- totalOffset += 3 * 3 * 3 * 3 * 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_SIZE", 3 * 3 * 3 * 3 * 4);
-
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_INPUT_HIGHT_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_OUTPUT_LOW_OFFSET", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "WEIGHTS_CONST_OUTPUT_HIGH_OFFSET", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "BIASES_CONST_OFFSET", totalOffset);
- totalOffset += 3 * 4;
- REPLACE_WITH_NUM(layers, "BIASES_CONST_SIZE", 3 * 4);
-
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_LOW_OFFSET_4", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_INPUT_HIGHT_OFFSET_4", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_LOW_OFFSET_4", totalOffset);
- totalOffset += 4;
- REPLACE_WITH_NUM(layers, "DATA_CONST_OUTPUT_HIGH_OFFSET_4", totalOffset);
- totalOffset += 4;
-
- REPLACE_WITH_NUM(layers, "DEQUANTIZE_SCALESHIFT_WEIGHTS_OFFSET", totalOffset);
- totalOffset += 12;
- REPLACE_WITH_NUM(layers, "DEQUANTIZE_SCALESHIFT_BIASES_OFFSET", totalOffset);
- totalOffset += 12;
-
- const std::string model = IRTemplateGenerator::getIRTemplate(
- "TransformationsTest",
- { 1, 3, 299, 299 },
- "FP32",
- layers,
- edgesTemplate,
- 6);
-
- return model;
- }
-
- std::string getName() const override {
- return "EltwiseCpuTestModel";
- }
-
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override {
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
-
- // TODO: skip interval validation - not completed
- return false;
- }
-
- void resetTransformation(CNNNetwork& network) const override {
- fillData(getLayer(network, "branch1/dataConstInputLow1"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch1/dataConstInputHigh1"), 255.0 / 100.0, "custom");
- fillData(getLayer(network, "branch1/dataConstOutputLow1"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch1/dataConstOutputHigh1"), 255.0 / 100.0, "custom");
-
- fillData(getLayer(network, "branch2/dataConstInputLow3"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch2/dataConstInputHigh3"), 255.0 / 100.0, "custom");
- fillData(getLayer(network, "branch2/dataConstOutputLow3"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch2/dataConstOutputHigh3"), 255.0 / 100.0, "custom");
-
- fillData(getLayer(network, "branch2/weightsConstInput"), 0.0, "custom");
- fillData(getLayer(network, "branch2/weightsConstInputLow"), 0.0, "custom");
- fillData(getLayer(network, "branch2/weightsConstInputHigh"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "branch2/weightsConstOutputLow"), 0.0, "custom");
- fillData(getLayer(network, "branch2/weightsConstOutputHigh"), 255.0 / 200.0, "custom");
-
- fillData(getLayer(network, "branch2/biasesConst"), { 1.0, 2.0, 3.0 });
-
- fillData(getLayer(network, "branch2/dataConstInputLow4"), 255.0 / 800.0, "custom");
- fillData(getLayer(network, "branch2/dataConstInputHigh4"), 255.0 / 400.0, "custom");
- fillData(getLayer(network, "branch2/dataConstOutputLow4"), 255.0 / 800.0, "custom");
- fillData(getLayer(network, "branch2/dataConstOutputHigh4"), 255.0 / 400.0, "custom");
- }
-
-private:
- const std::string layersTemplate = R"V0G0N(
- <layer name="branch1/dataConstInputLow1" type="Const" precision="FP32" id="102">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_LOW_OFFSET_1" size="4"/>
- </blobs>
- </layer>
- <layer name="branch1/dataConstInputHigh1" type="Const" precision="FP32" id="103">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_HIGHT_OFFSET_1" size="4"/>
- </blobs>
- </layer>
-
- <layer name="branch1/dataConstOutputLow1" type="Const" precision="FP32" id="104">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_LOW_OFFSET_1" size="4"/>
- </blobs>
- </layer>
- <layer name="branch1/dataConstOutputHigh1" type="Const" precision="FP32" id="105">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_HIGH_OFFSET_1" size="4"/>
- </blobs>
- </layer>
-
- <layer name="branch1/dataFakeQuantize1" type="FakeQuantize" precision="FP32" id="106">
- <data levels="256" />
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- </port>
- <port id="2">
- <dim>1</dim>
- </port>
- <port id="3">
- <dim>1</dim>
- </port>
- <port id="4">
- <dim>1</dim>
- </port>
- </input>
- <output>
- <port id="5">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
- </layer>
-
- <layer name="branch2/dataConstInputLow3" type="Const" precision="FP32" id="207">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_LOW_OFFSET_3" size="4"/>
- </blobs>
- </layer>
- <layer name="branch2/dataConstInputHigh3" type="Const" precision="FP32" id="208">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_HIGHT_OFFSET_3" size="4"/>
- </blobs>
- </layer>
-
- <layer name="branch2/dataConstOutputLow3" type="Const" precision="FP32" id="209">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_LOW_OFFSET_3" size="4"/>
- </blobs>
- </layer>
- <layer name="branch2/dataConstOutputHigh3" type="Const" precision="FP32" id="210">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_HIGH_OFFSET_3" size="4"/>
- </blobs>
- </layer>
-
-
- <layer name="branch2/dataFakeQuantize3" type="FakeQuantize" precision="FP32" id="211">
- <data levels="256" />
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- </port>
- <port id="2">
- <dim>1</dim>
- </port>
- <port id="3">
- <dim>1</dim>
- </port>
- <port id="4">
- <dim>1</dim>
- </port>
- </input>
- <output>
- <port id="5">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
- </layer>
-
-
- <layer name="branch2/weightsConstInput" type="Const" precision="FP32" id="212">
- <output>
- <port id="0">
- <dim>3</dim>
- <dim>3</dim>
- <dim>3</dim>
- <dim>3</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_OFFSET" size="WEIGHTS_CONST_INPUT_SIZE"/>
- </blobs>
- </layer>
- <layer name="branch2/weightsConstInputLow" type="Const" precision="FP32" id="213">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_LOW_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer name="branch2/weightsConstInputHigh" type="Const" precision="FP32" id="214">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_INPUT_HIGHT_OFFSET" size="4"/>
- </blobs>
- </layer>
-
- <layer name="branch2/weightsConstOutputLow" type="Const" precision="FP32" id="215">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_OUTPUT_LOW_OFFSET" size="4"/>
- </blobs>
- </layer>
- <layer name="branch2/weightsConstOutputHigh" type="Const" precision="FP32" id="216">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="WEIGHTS_CONST_OUTPUT_HIGH_OFFSET" size="4"/>
- </blobs>
- </layer>
-
-
- <layer name="branch2/weightsFakeQuantize" type="FakeQuantize" precision="FP32" id="218">
- <data levels="256" />
- <input>
- <port id="0">
- <dim>3</dim>
- <dim>3</dim>
- <dim>3</dim>
- <dim>3</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- </port>
- <port id="2">
- <dim>1</dim>
- </port>
- <port id="3">
- <dim>1</dim>
- </port>
- <port id="4">
- <dim>1</dim>
- </port>
- </input>
- <output>
- <port id="5">
- <dim>3</dim>
- <dim>3</dim>
- <dim>3</dim>
- <dim>3</dim>
- </port>
- </output>
- </layer>
-
- <layer name="branch2/biasesConst" type="Const" precision="FP32" id="219">
- <output>
- <port id="0">
- <dim>3</dim>
- </port>
- </output>
- <blobs>
- <custom offset="BIASES_CONST_OFFSET" size="BIASES_CONST_SIZE"/>
- </blobs>
- </layer>
-
-
- <layer name="branch2/convolution" precision="FP32" type="Convolution" id="220">
- <data dilations="1,1" group="1" kernel="3,3" output="3" pads_begin="1,1" pads_end="1,1" strides="1,1"/>
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>3</dim>
- <dim>3</dim>
- <dim>3</dim>
- <dim>3</dim>
- </port>
- <port id="2">
- <dim>3</dim>
- </port>
- </input>
- <output>
- <port id="3">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
- </layer>
-
- <layer name="branch2/dataConstInputLow4" type="Const" precision="FP32" id="222">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_LOW_OFFSET_4" size="4"/>
- </blobs>
- </layer>
- <layer name="branch2/dataConstInputHigh4" type="Const" precision="FP32" id="223">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_INPUT_HIGHT_OFFSET_4" size="4"/>
- </blobs>
- </layer>
-
- <layer name="branch2/dataConstOutputLow4" type="Const" precision="FP32" id="224">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_LOW_OFFSET_4" size="4"/>
- </blobs>
- </layer>
- <layer name="branch2/dataConstOutputHigh4" type="Const" precision="FP32" id="225">
- <output>
- <port id="0">
- <dim>1</dim>
- </port>
- </output>
- <blobs>
- <custom offset="DATA_CONST_OUTPUT_HIGH_OFFSET_4" size="4"/>
- </blobs>
- </layer>
-
- <layer name="branch2/dataFakeQuantize4" type="FakeQuantize" precision="FP32" id="226">
- <data levels="256" />
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- </port>
- <port id="2">
- <dim>1</dim>
- </port>
- <port id="3">
- <dim>1</dim>
- </port>
- <port id="4">
- <dim>1</dim>
- </port>
- </input>
- <output>
- <port id="5">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
- </layer>
-
- <layer name="branch2/eltwise" type="Eltwise" precision="FP32" id="227">
- <data operation="sum"/>
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- <port id="1">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
-
- </input>
- <output>
- <port id="2">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
- </layer>
-
-
- <layer name="outputPower" type="Power" precision="FP32" id="300">
- <power_data power="1" scale="1" shift="0"/>
- <input>
- <port id="0">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </input>
- <output>
- <port id="1">
- <dim>1</dim>
- <dim>3</dim>
- <dim>299</dim>
- <dim>299</dim>
- </port>
- </output>
- </layer>
-
- )V0G0N";
-
- const std::string edgesTemplate = R"V0G0N(
- <!-- branch 1 -->
-
- <edge from-layer="0" from-port="0" to-layer="106" to-port="0"/>
- <edge from-layer="102" from-port="0" to-layer="106" to-port="1"/>
- <edge from-layer="103" from-port="0" to-layer="106" to-port="2"/>
- <edge from-layer="104" from-port="0" to-layer="106" to-port="3"/>
- <edge from-layer="105" from-port="0" to-layer="106" to-port="4"/>
- <edge from-layer="106" from-port="5" to-layer="211" to-port="0"/>
- <edge from-layer="106" from-port="5" to-layer="227" to-port="0"/>
-
- <!-- branch 2 -->
-
- <!-- FakeQuantize on activations -->
- <edge from-layer="207" from-port="0" to-layer="211" to-port="1"/>
- <edge from-layer="208" from-port="0" to-layer="211" to-port="2"/>
- <edge from-layer="209" from-port="0" to-layer="211" to-port="3"/>
- <edge from-layer="210" from-port="0" to-layer="211" to-port="4"/>
- <edge from-layer="211" from-port="5" to-layer="220" to-port="0"/>
-
- <!-- FakeQuantize on weights -->
- <edge from-layer="212" from-port="0" to-layer="218" to-port="0"/>
- <edge from-layer="213" from-port="0" to-layer="218" to-port="1"/>
- <edge from-layer="214" from-port="0" to-layer="218" to-port="2"/>
- <edge from-layer="215" from-port="0" to-layer="218" to-port="3"/>
- <edge from-layer="216" from-port="0" to-layer="218" to-port="4"/>
- <edge from-layer="218" from-port="5" to-layer="220" to-port="1"/>
-
- <!-- Const on biases -->
- <edge from-layer="219" from-port="0" to-layer="220" to-port="2"/>
-
- <!-- Convolution -->
- <edge from-layer="220" from-port="3" to-layer="226" to-port="0"/>
-
- <!-- FakeQuantize on activations -->
- <edge from-layer="222" from-port="0" to-layer="226" to-port="1"/>
- <edge from-layer="223" from-port="0" to-layer="226" to-port="2"/>
- <edge from-layer="224" from-port="0" to-layer="226" to-port="3"/>
- <edge from-layer="225" from-port="0" to-layer="226" to-port="4"/>
- <edge from-layer="226" from-port="5" to-layer="227" to-port="1"/>
-
- <!-- Eltwise -->
- <edge from-layer="227" from-port="2" to-layer="300" to-port="0"/>
- )V0G0N";
-
- const std::map<std::string, std::vector<size_t>> dimensions = {
- {{ "in1", { 299, 299, 3, 1 } },
- { "in2", { 299, 299, 3, 1 } } }
- };
-};
-
-class ConcatTestModel : public SingleLayerTestModel {
-public:
- ConcatTestModel(
- const bool signedIntervals,
- const bool symmetricInterval = true,
- const bool multiChannel = true,
- const std::vector<size_t>& constInputDimentions = { 1 });
-
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
- float getThreshold(const std::string& device_name, const Precision precision, LayerTransformation::Params& params) const override;
-private:
- const bool signedIntervals;
- const bool symmetricInterval;
- const bool multiChannel;
- const std::vector<size_t> constInputDimentions;
-};
-
-class ConcatWithPoolingTestModel : public SingleLayerTestModel {
-public:
- ConcatWithPoolingTestModel(
- const bool multiChannel,
- const bool signedIntervals,
- const bool shift,
- const float dequantizationIntervalsDifference) :
- SingleLayerTestModel(),
- multiChannel(multiChannel),
- signedIntervals(signedIntervals),
- shift(shift),
- dequantizationIntervalsDifference(dequantizationIntervalsDifference) {}
-
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
- float getThreshold(const std::string& pluginName, const Precision precision, LayerTransformation::Params& params) const override;
-
-private:
- const bool multiChannel;
- const bool signedIntervals;
- const bool shift;
- const float dequantizationIntervalsDifference;
-};
-
-class ConcatMultiChannelTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-// TODO: remove, not used
-class ConcatMultiBranchTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-
- const static std::string layersTemplate;
-private:
-
- const std::string edgesTemplate = R"V0G0N(
- <!-- branch 1 -->
-
- <edge from-layer="0" from-port="0" to-layer="106" to-port="0"/>
- <edge from-layer="102" from-port="0" to-layer="106" to-port="1"/>
- <edge from-layer="103" from-port="0" to-layer="106" to-port="2"/>
- <edge from-layer="104" from-port="0" to-layer="106" to-port="3"/>
- <edge from-layer="105" from-port="0" to-layer="106" to-port="4"/>
- <edge from-layer="106" from-port="5" to-layer="113" to-port="0"/>
-
- <edge from-layer="1" from-port="0" to-layer="111" to-port="0"/>
- <edge from-layer="107" from-port="0" to-layer="111" to-port="1"/>
- <edge from-layer="108" from-port="0" to-layer="111" to-port="2"/>
- <edge from-layer="109" from-port="0" to-layer="111" to-port="3"/>
- <edge from-layer="110" from-port="0" to-layer="111" to-port="4"/>
- <edge from-layer="111" from-port="5" to-layer="113" to-port="1"/>
-
- <edge from-layer="113" from-port="2" to-layer="227" to-port="0"/>
-
- <!-- branch 2 -->
-
- <!-- FakeQuantize on activations -->
- <edge from-layer="113" from-port="2" to-layer="211" to-port="0"/>
- <edge from-layer="207" from-port="0" to-layer="211" to-port="1"/>
- <edge from-layer="208" from-port="0" to-layer="211" to-port="2"/>
- <edge from-layer="209" from-port="0" to-layer="211" to-port="3"/>
- <edge from-layer="210" from-port="0" to-layer="211" to-port="4"/>
- <edge from-layer="211" from-port="5" to-layer="220" to-port="0"/>
-
- <!-- FakeQuantize on weights -->
- <edge from-layer="212" from-port="0" to-layer="218" to-port="0"/>
- <edge from-layer="213" from-port="0" to-layer="218" to-port="1"/>
- <edge from-layer="214" from-port="0" to-layer="218" to-port="2"/>
- <edge from-layer="215" from-port="0" to-layer="218" to-port="3"/>
- <edge from-layer="216" from-port="0" to-layer="218" to-port="4"/>
- <edge from-layer="218" from-port="5" to-layer="220" to-port="1"/>
-
- <!-- Const on biases -->
- <edge from-layer="219" from-port="0" to-layer="220" to-port="2"/>
-
- <!-- Convolution -->
- <edge from-layer="220" from-port="3" to-layer="226" to-port="0"/>
-
- <!-- FakeQuantize on activations -->
- <edge from-layer="222" from-port="0" to-layer="226" to-port="1"/>
- <edge from-layer="223" from-port="0" to-layer="226" to-port="2"/>
- <edge from-layer="224" from-port="0" to-layer="226" to-port="3"/>
- <edge from-layer="225" from-port="0" to-layer="226" to-port="4"/>
- <edge from-layer="226" from-port="5" to-layer="227" to-port="1"/>
-
- <!-- Concat -->
- <edge from-layer="227" from-port="2" to-layer="300" to-port="0"/>
- )V0G0N";
-
- const std::map<std::string, std::vector<size_t>> dimensions = {
- {{ "in1", { 299, 299, 3, 1 } },
- { "in2", { 299, 299, 3, 1 } } }
- };
-};
-
-class FakeQuantizeAndScaleShiftTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class FakeQuantizeAndActivationTestModel : public SingleLayerTestModel {
-public:
- FakeQuantizeAndActivationTestModel(const std::vector<std::pair<float, float>>& intervals);
- void initInput(Blob::Ptr input) const override;
- float getZeroThreshold() const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-
-private:
- const std::vector<std::pair<float, float>> intervals;
-};
-
-class ScaleShiftAndFakeQuantizeTestModel : public SingleLayerTestModel {
-public:
- void initInput(Blob::Ptr input) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class FakeQuantizeAndActivationWithNegativeScalesTestModel : public SingleLayerTestModel {
-public:
- void initInput(Blob::Ptr input) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class FakeQuantizeAndActivationWithNegativeSlopeTestModel : public SingleLayerTestModel {
-public:
- void initInput(Blob::Ptr input) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class ConvolutionAndDequantizationScaleShiftAndQuantizeOnActivationsTestModel : public SingleLayerTestModel {
-public:
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-};
-
-class MvnTestModel : public SingleLayerTestModel {
-public:
- MvnTestModel(const size_t acrossChannels, const size_t normalizeVariance);
- void initInput(Blob::Ptr input) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-
-private:
- const size_t acrossChannels;
- const size_t normalizeVariance;
-};
-
-class PrecisionSelectionMultibranchPreservedTestModel : public SingleLayerTestModel {
-public:
- PrecisionSelectionMultibranchPreservedTestModel(const bool signedIntervalOnActivation);
- void initInput(Blob::Ptr input) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-
-private:
- const size_t acrossChannels;
- const size_t normalizeVariance;
- const bool signedIntervalOnActivation;
-};
-
-class PrecisionSelectionMultibranchNotPreservedTestModel : public SingleLayerTestModel {
-public:
- PrecisionSelectionMultibranchNotPreservedTestModel(const bool signedIntervalOnActivation);
- void initInput(Blob::Ptr input) const override;
- std::string getModel(SingleLayerTransformationsTestParams& p) const override;
- std::string getName() const override;
- bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
- void resetTransformation(CNNNetwork& network) const override;
-
-private:
- const size_t acrossChannels;
- const size_t normalizeVariance;
- const bool signedIntervalOnActivation;
-};
-
-class SingleLayerTransformationsTest : public TestsCommon, public WithParamInterface<SingleLayerTransformationsTestParams> {
- TBlob<uint8_t>::Ptr generateWeights(const CNNNetwork& network);
- void checkNetworkWithFakeQuantize(const CNNNetwork& network);
- void checkNetworkWithQuantize(const CNNNetwork& network);
- //void sortBlobs(CNNLayer& layer);
- CNNNetwork createNetwork();
- std::unordered_map<std::string, InferenceEngine::Blob::Ptr> infer(
- CNNNetwork& network,
- std::unordered_map<std::string, Blob::Ptr>& inputBlobs,
- Core & plugin, const std::string & device_name,
- ExecutableNetwork & executableNetwork,
- InferRequest & inferRequest);
-
-protected:
- static void compareInDetails(
- InferenceEngine::Blob &res,
- InferenceEngine::Blob &ref,
- const size_t maxDifferenceCounts,
- float max_diff = 0.01f);
- virtual void SetUp();
-};
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void MvnTestModel::initInput(Blob::Ptr input) const {
- const size_t dataSize = input->size();
- std::shared_ptr<float> floatPtr(new float[dataSize], std::default_delete<float[]>());
-
- float value = 0.f;
- for (size_t i = 0ul; i < dataSize; ++i) {
- floatPtr.get()[i] = value;
- if (value > 255.0) {
- value = 0.f;
- }
- value += 1.f;
- }
-
- CNNNetworkHelper::fillBlobByFP32(input, floatPtr.get());
-}
-
-MvnTestModel::MvnTestModel(const size_t acrossChannels, const size_t normalizeVariance) :
- acrossChannels(acrossChannels),
- normalizeVariance(normalizeVariance) {}
-
-std::string MvnTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16") {
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
- }
-
- std::map<std::string, std::string> power_params = {{"power", "1"}, {"scale", "1"}, {"shift", "0"}};
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {{"levels", "256"}};
- std::map<std::string, std::string> mvn_params = {
- {"eps", "0.001"},
- {"across_channels", std::to_string(acrossChannels)},
- {"normalize_variance", std::to_string(acrossChannels)}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // const
- {"6,12", "7,13"}, {"7,14", "8,15"} // pool, power
- };
-
- const std::vector<size_t> dimensions = p.outputDimensions[0];
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("MvnTestModel", dimensions, p._network_precision)
- .addLayer("Power", p._network_precision, &power_params, {{dimensions}, {dimensions}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{dimensions, {1}, {1}, {1}, {1}}, {{dimensions}}})
- .addLayer("MVN", p._network_precision, &mvn_params, { {dimensions}, {dimensions} })
- .addLayer("Power", p._network_precision, &power_params, {{dimensions}, {dimensions}})
- .finish(&edges);
-}
-
-bool MvnTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
- return true;
-}
-
-std::string MvnTestModel::getName() const {
- return
- "MvnTestModel" +
- (acrossChannels == 1ul ? std::string("_AcrossChannels") : "") +
- (normalizeVariance == 1ul ? std::string("_NormalizeVariance") : "");
-}
-
-void MvnTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 0.f, "custom");
- fillData(getLayer(network, "Const3"), 255.f / 2.f, "custom");
- fillData(getLayer(network, "Const4"), 0.f, "custom");
- fillData(getLayer(network, "Const5"), 255.f / 2.f, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string PoolingTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- netPrecision = Precision::FP32;
-
- // TODO: don't use network precision
- if (p._network_precision == "FP16") {
- netPrecision = Precision::FP16;
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
- }
-
- CommonTestUtils::pool_common_params pooling = { {1, 1}, {1, 1}, {0, 0}, {0, 0}, "valid", false, true };
- std::vector<size_t> poolOutShape(p.inputDimensions[0].size());
- getPoolOutShape(p.inputDimensions[0], pooling, poolOutShape);
-
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // const
- {"6,12", "7,13"}, {"7,14", "8,15"} // pool, power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Conv_ScaleShift_transformations", p.inputDimensions[0], p._network_precision)
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .poolingLayer(p._network_precision, {{p.inputDimensions[0]}, {poolOutShape}}, pooling)
- .addLayer("Power", p._network_precision, &power_params, {{poolOutShape}, {poolOutShape}})
- .finish(&edges);
-}
-
-void PoolingTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 63.5, "custom");
- fillData(getLayer(network, "Const3"), 127.0, "custom");
- fillData(getLayer(network, "Const4"), 63.5, "custom");
- fillData(getLayer(network, "Const5"), 127.0, "custom");
-}
-
-std::string PoolingTestModel::getName() const {
- return "PoolingTestModel";
-}
-
-bool PoolingTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
-
- // TODO: don't use network precision
- const Precision precision = params.updatePrecisions ? Precision(Precision::U8) : netPrecision;
-
- CNNLayerPtr fakeQuantize = getLayer(network, "FakeQuantize6");
- if (fakeQuantize->outData[0]->getPrecision() != precision) {
- THROW_IE_EXCEPTION << fakeQuantize->name << " precision " << precision << " is not correct";
- }
-
- CNNLayerPtr pooling = getLayer(network, "Pooling7");
- if (pooling->outData[0]->getPrecision() != precision) {
- THROW_IE_EXCEPTION << pooling->name << " precision " << precision << " is not correct";
- }
-
- return true;
-}
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string PowerTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::pool_common_params pooling = { {1, 1}, {1, 1}, {0, 0}, {0, 0}, "valid", false, true };
- std::vector<size_t> poolOutShape(p.inputDimensions[0].size());
- CommonTestUtils::getPoolOutShape(p.inputDimensions[0], pooling, poolOutShape);
-
- std::map<std::string, std::string> power_params = {{"power", std::to_string(power)}, {"scale", std::to_string(scale)}, {"shift", std::to_string(shift)}};
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {{"levels", "256"}};
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // const
- {"6,12", "7,13"}, {"7,14", "8,15"} // pool, power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Conv_ScaleShift_transformations", p.inputDimensions[0], p._network_precision)
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .poolingLayer(p._network_precision, {{p.inputDimensions[0]}, {poolOutShape}}, pooling)
- .addLayer("Power", p._network_precision, &power_params, {{poolOutShape}, {poolOutShape}})
- .finish(&edges);
-}
-
-void PowerTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 63.5, "custom");
- fillData(getLayer(network, "Const3"), 127.0, "custom");
- fillData(getLayer(network, "Const4"), 63.5, "custom");
- fillData(getLayer(network, "Const5"), 127.0, "custom");
-}
-
-std::string PowerTestModel::getName() const {
- return std::string("PowerTestModel") +
- (power == 1.f ? std::string("") : "_power!=1") +
- (scale == 1.f ? "" : "_scale=" + std::to_string(scale)) +
- (shift == 0 ? "" : "_shift!=" + std::to_string(shift));
-}
-
-bool PowerTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
-
- const Precision precision = params.updatePrecisions ? Precision(Precision::U8) :
- network.getInputsInfo().begin()->second->getPrecision();
-
- CNNLayerPtr fakeQuantize = getLayer(network, "FakeQuantize6");
- if (fakeQuantize->outData[0]->getPrecision() != precision) {
- THROW_IE_EXCEPTION << fakeQuantize->name << " precision " << precision << " is not correct";
- }
-
- CNNLayerPtr pooling = getLayer(network, "Pooling7");
- if (pooling->outData[0]->getPrecision() != precision) {
- THROW_IE_EXCEPTION << pooling->name << " precision " << precision << " is not correct";
- }
-
- CNNLayerPtr powerLayer = getLayer(network, "Power8");
-
- const bool deleteLayer = params.quantizeOutputs && power == 1.f && powerLayer != nullptr && powerLayer->type == "Power";
-
- if (deleteLayer) {
- THROW_IE_EXCEPTION << "Power layer is present after transformation";
- }
-
- return true;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "low_precision_transformations/fully_connected.hpp"
-
-void PrecisionSelectionMultibranchNotPreservedTestModel::initInput(Blob::Ptr input) const {
- fillData(input, 2.f);
- return;
-
- const size_t dataSize = input->size();
- std::shared_ptr<float> floatPtr(new float[dataSize], std::default_delete<float[]>());
-
- const float lowValue = signedIntervalOnActivation ? -128.f : 0.f;
- const float highValue = signedIntervalOnActivation ? 127.f : 255.f;
-
- float value = lowValue;
- for (size_t i = 0ul; i < dataSize; ++i) {
- floatPtr.get()[i] = value;
- value += 1.f;
- if (value > highValue) {
- value = lowValue;
- }
- }
-
- CNNNetworkHelper::fillBlobByFP32(input, floatPtr.get());
-}
-
-PrecisionSelectionMultibranchNotPreservedTestModel::PrecisionSelectionMultibranchNotPreservedTestModel(const bool signedIntervalOnActivation) :
- signedIntervalOnActivation(signedIntervalOnActivation),
- acrossChannels(0),
- normalizeVariance(0) {}
-
-std::string PrecisionSelectionMultibranchNotPreservedTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"}};
- std::map<std::string, std::string> poolingParams = {
- {"kernel", "1,1"},
- {"pool-method", "max"},
- {"exclude-pad", "false"}
- };
- const std::vector<size_t> dimensions = p.outputDimensions[0];
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"7,13", "12,18"}, {"8,14", "12,19"}, {"9,15", "12,20"}, {"10,16", "12,21"}, {"11,17", "12,22"}, // Const layers
- {"6,12", "14,25"}, {"12,23", "14,26"}, // Fake quantize to Conv
- {"13,24", "14,27"}, // biases to Conv
- {"6,12", "15,29"} // Fake quantize to Pooling
- //{"14,28", "15,29"} // Fake quantize to Power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- // 2
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 5
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 6
- .addLayer(
- "FakeQuantize",
- p._network_precision,
- &fake_quantize_params,
- {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}},
- "fakeQuantize")
- // 7
- .addLayer("Const", p._network_precision, &const_params, {{}, {weightsConstInputDims}},
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size)
- // 8
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 9
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 10
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 11
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 12
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}}})
- // 13
- .addLayer("Const", p._network_precision, &const_params, {{}, {biasesConvolutionConstDims}}, type_size * conv.out_c, 0)
- // 14
- .convolutionLayer(
- p._network_precision,
- { {p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims },
- {convOutShape} }, conv, "convolution")
- // 15
- .addLayer("Pooling", p._network_precision, &poolingParams, { {dimensions}, {dimensions} })
- .finish(&edges);
-}
-
-void PrecisionSelectionMultibranchNotPreservedTestModel::resetTransformation(CNNNetwork& network) const {
- if (signedIntervalOnActivation) {
- fillData(getLayer(network, "Const2"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const3"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "Const4"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const5"), 127.f / 4.f, "custom");
- } else {
- fillData(getLayer(network, "Const2"), 0.f, "custom");
- fillData(getLayer(network, "Const3"), 255.f / 4.f, "custom");
- fillData(getLayer(network, "Const4"), 0.f, "custom");
- fillData(getLayer(network, "Const5"), 255.f / 4.f, "custom");
- }
-
- fillDataWithInitValue(getLayer(network, "Const7"), "custom", 2.f);
-
- fillData(getLayer(network, "Const8"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const9"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "Const10"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const11"), 127.f / 4.f, "custom");
-
- fillDataWithInitValue(getLayer(network, "Const13"), "custom", 1.f);
-}
-
-std::string PrecisionSelectionMultibranchNotPreservedTestModel::getName() const {
- return std::string("PrecisionSelectionMultibranchNotPreservedTestModel") + (signedIntervalOnActivation ? "_Signed" : "_Unsigned");
-}
-
-bool PrecisionSelectionMultibranchNotPreservedTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- params.weightsToConst = true;
- params.updatePrecisions = true;
-
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
-
- const CNNLayerPtr fakeQuantize = CNNNetworkHelper::getLayer(network, "fakeQuantize");
- const Precision actualPrecision = fakeQuantize->outData[0]->getTensorDesc().getPrecision();
-
- if (std::any_of(
- params.precisionsOnActivations.begin(),
- params.precisionsOnActivations.end(),
- [&](const Precision precision) { return precision == Precision::U8; })) {
- if (params.quantizeOutputs) {
- if (actualPrecision != Precision::U8) {
- THROW_IE_EXCEPTION << "expected precision " << Precision::U8 << ", actual " << actualPrecision << "";
- }
-
- // Convolution has to be quantized
- CNNLayerPtr scaleShfit = CNNNetworkHelper::getLayer(network, "convolution");
- if (scaleShfit->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "unexpected last output dequantization layer type " << scaleShfit->type << " " << scaleShfit->name;
- }
-
- if (params.updateBiases) {
- const Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(scaleShfit, "biases");
- std::shared_ptr<float> shiftsBuffer = CNNNetworkHelper::getFloatData(shiftsBlob);
- for (size_t i = 0ul; i < shiftsBlob->size(); ++i) {
- if (shiftsBuffer.get()[i] != 0.0) {
- THROW_IE_EXCEPTION << "unexpected dequantization shift value";
- }
- }
- }
-
- //if (signedIntervalOnActivation)
- //scaleShfit = CNNNetworkHelper::getLayer(network, "MVN15");
- //if (scaleShfit->type != "ScaleShift") {
- // THROW_IE_EXCEPTION << "unexpected last output dequantization layer type " << scaleShfit->type << " " << scaleShfit->name;
- //}
- }
-
- return true;
- } else {
- if ((actualPrecision != Precision::FP16) && (actualPrecision != Precision::FP32)) {
- THROW_IE_EXCEPTION << "unexpected precision " << actualPrecision << "";
- }
-
- // convolution can not be quantized
- CNNLayerPtr convolution = CNNNetworkHelper::getLayer(network, "convolution");
- if (convolution->type != "Convolution") {
- THROW_IE_EXCEPTION << "unexpected last output dequantization layer type " << convolution->type << " " << convolution->name;
- }
-
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(*convolution);
- if (parents.size() != 3ul) {
- THROW_IE_EXCEPTION << "unexpected parents count " << parents.size();
- }
-
- if (parents[0]->type != "FakeQuantize") {
- THROW_IE_EXCEPTION << "unexpected parents type " << parents[0]->type;
- }
-
- return false;
- }
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "low_precision_transformations/fully_connected.hpp"
-
-void PrecisionSelectionMultibranchPreservedTestModel::initInput(Blob::Ptr input) const {
- fillData(input, 2.f);
- return;
-
- const size_t dataSize = input->size();
- std::shared_ptr<float> floatPtr(new float[dataSize], std::default_delete<float[]>());
-
- const float lowValue = signedIntervalOnActivation ? -128.f : 0.f;
- const float highValue = signedIntervalOnActivation ? 127.f : 255.f;
-
- float value = lowValue;
- for (size_t i = 0ul; i < dataSize; ++i) {
- floatPtr.get()[i] = value;
- value += 1.f;
- if (value > highValue) {
- value = lowValue;
- }
- }
-
- CNNNetworkHelper::fillBlobByFP32(input, floatPtr.get());
-}
-
-PrecisionSelectionMultibranchPreservedTestModel::PrecisionSelectionMultibranchPreservedTestModel(const bool signedIntervalOnActivation) :
- signedIntervalOnActivation(signedIntervalOnActivation),
- acrossChannels(0),
- normalizeVariance(0) {}
-
-std::string PrecisionSelectionMultibranchPreservedTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- CommonTestUtils::getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"}};
- std::map<std::string, std::string> poolingParams = {
- {"kernel", "1,1"},
- {"pool-method", "max"},
- {"exclude-pad", "false"}
- };
- const std::vector<size_t> dimensions = p.outputDimensions[0];
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"}, // Fake quantize to Pooling7
- {"6,12", "8,15"} // Fake quantize to Pooling8
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- // 2
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 5
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 6
- .addLayer(
- "FakeQuantize",
- p._network_precision,
- &fake_quantize_params,
- {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}},
- "fakeQuantize")
- // 7
- .addLayer("Pooling", p._network_precision, &poolingParams, { {dimensions}, {dimensions} })
- // 8
- .addLayer("Pooling", p._network_precision, &poolingParams, { {dimensions}, {dimensions} })
- // 9
- .finish(&edges);
-}
-
-void PrecisionSelectionMultibranchPreservedTestModel::resetTransformation(CNNNetwork& network) const {
- if (signedIntervalOnActivation) {
- fillData(getLayer(network, "Const2"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const3"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "Const4"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const5"), 127.f / 4.f, "custom");
- } else {
- fillData(getLayer(network, "Const2"), 0.f, "custom");
- fillData(getLayer(network, "Const3"), 255.f / 4.f, "custom");
- fillData(getLayer(network, "Const4"), 0.f, "custom");
- fillData(getLayer(network, "Const5"), 255.f / 4.f, "custom");
- }
-}
-
-std::string PrecisionSelectionMultibranchPreservedTestModel::getName() const {
- return std::string("PrecisionSelectionMultibranchPreservedTestModel") + (signedIntervalOnActivation ? "_Signed" : "_Unsigned");
-}
-
-bool PrecisionSelectionMultibranchPreservedTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- params.updatePrecisions = true;
-
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
-
- if (params.quantizeOutputs && params.updatePrecisions) {
- Precision expectedPrecision;
- if (params.precisionsOnActivations.size() == 1ul) {
- expectedPrecision = params.precisionsOnActivations[0];
- } else {
- expectedPrecision = signedIntervalOnActivation ? Precision::I8 : Precision::U8;
- }
- const CNNLayerPtr fakeQuantize = CNNNetworkHelper::getLayer(network, "fakeQuantize");
- const Precision actualPrecision = fakeQuantize->outData[0]->getTensorDesc().getPrecision();
- if (actualPrecision != expectedPrecision) {
- THROW_IE_EXCEPTION << "expected precision " << expectedPrecision << ", actual " << actualPrecision << "";
- }
- }
-
- return true;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-std::string QuantizationOnInvertedWeightsTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"7,13", "12,18"}, {"8,14", "12,19"}, {"9,15", "12,20"}, {"10,16", "12,21"}, {"11,17", "12,22"}, // Const layers
- {"6,12", "14,25"}, {"12,23", "14,26"}, // Fake quantize to Conv
- {"13,24", "14,27"}, // biases to Conv
- {"14,28", "15,29"} // Conv to Power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {weightsConstInputDims}},
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {biasesConvolutionConstDims}}, type_size * conv.out_c, 0)
- .convolutionLayer(p._network_precision, {{p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }, {convOutShape}}, conv)
- .addLayer("Power", p._network_precision, &power_params, {{convOutShape}, {convOutShape}})
- .finish(&edges);
-}
-
-std::string QuantizationOnInvertedWeightsTestModel::getName() const {
- return "QuantizationOnInvertedWeightsTestModel";
-}
-
-bool QuantizationOnInvertedWeightsTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- CNNLayerPtr weightsFakeQuantize = CommonTestUtils::getLayerByName(network, "FakeQuantize12");
- Blob::Ptr weights = CNNNetworkHelper::quantizeWeights(*weightsFakeQuantize, false);
-
- CNNLayerPtr biasesConvolutionConst = CommonTestUtils::getLayerByName(network, "Const13");
- Blob::Ptr biases = getBlob(biasesConvolutionConst, "custom");
-
- CNNLayerPtr convolution = CommonTestUtils::getLayerByName(network, "Convolution14");
- convolution->blobs.emplace("weights", weights);
- convolution->blobs.emplace("biases", biases);
-
- WeightableLayer* weightableLayer = dynamic_cast<WeightableLayer*>(convolution.get());
- weightableLayer->_weights = weights;
- weightableLayer->_biases = biases;
-
- CNNLayerPtr weightsConstInput = CommonTestUtils::getLayerByName(network, "Const7");
- CNNNetworkHelper::removeLayer(network, weightsConstInput);
- CNNLayerPtr weightsConstInputLow = CommonTestUtils::getLayerByName(network, "Const8");
- CNNNetworkHelper::removeLayer(network, weightsConstInputLow);
- CNNLayerPtr weightsConstInputHigh = CommonTestUtils::getLayerByName(network, "Const9");
- CNNNetworkHelper::removeLayer(network, weightsConstInputHigh);
- CNNLayerPtr weightsConstOutputLow = CommonTestUtils::getLayerByName(network, "Const10");
- CNNNetworkHelper::removeLayer(network, weightsConstOutputLow);
- CNNLayerPtr weightsConstOutputHigh = CommonTestUtils::getLayerByName(network, "Const11");
- CNNNetworkHelper::removeLayer(network, weightsConstOutputHigh);
-
- CNNNetworkHelper::removeLayer(network, weightsFakeQuantize);
- CNNNetworkHelper::removeLayer(network, biasesConvolutionConst);
-
- return false;
-}
-
-std::unordered_set<std::string> QuantizationOnInvertedWeightsTestModel::getNotTransformedLayers() const {
- return { "dataFakeQuantize" };
-}
-
-void QuantizationOnInvertedWeightsTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 0.0, "custom");
- fillData(getLayer(network, "Const3"), 127.5, "custom");
- fillData(getLayer(network, "Const4"), 0.0, "custom");
- fillData(getLayer(network, "Const5"), 127.5, "custom");
-
- fillData(getLayer(network, "Const7"), 3.0, "custom");
-
- fillData(getLayer(network, "Const8"), 1.278 / 2.0, "custom");
- fillData(getLayer(network, "Const9"), -1.27, "custom");
- fillData(getLayer(network, "Const10"), 1.278 / 2.0, "custom");
- fillData(getLayer(network, "Const11"), -1.27, "custom");
-
- fillData(getLayer(network, "Const13"), 5.0, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-std::string QuantizationOnWeightsTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv =
- { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"7,13", "12,18"}, {"8,14", "12,19"}, {"9,15", "12,20"}, {"10,16", "12,21"}, {"11,17", "12,22"}, // Const layers
- {"6,12", "14,25"}, {"12,23", "14,26"}, // Fake quantize to Conv
- {"13,24", "14,27"}, // biases to Conv
- {"14,28", "15,29"} // Conv to Power
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {weightsConstInputDims}},
- std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}}})
- .addLayer("Const", p._network_precision, &const_params, {{}, {biasesConvolutionConstDims}}, type_size * conv.out_c, 0)
- .convolutionLayer(p._network_precision, {{p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }, {convOutShape}}, conv)
- .addLayer("Power", p._network_precision, &power_params, {{convOutShape}, {convOutShape}})
- .finish(&edges);
-}
-
-std::string QuantizationOnWeightsTestModel::getName() const {
- return "QuantizationOnWeightsTestModel";
-}
-
-bool QuantizationOnWeightsTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- CNNLayerPtr weightsFakeQuantize = CommonTestUtils::getLayerByName(network, "FakeQuantize12");
- Blob::Ptr weights = CNNNetworkHelper::quantizeWeights(*weightsFakeQuantize, false);
-
- CNNLayerPtr biasesConvolutionConst = CommonTestUtils::getLayerByName(network, "Const13");
- Blob::Ptr biases = getBlob(biasesConvolutionConst, "custom");
-
- CNNLayerPtr convolution = CommonTestUtils::getLayerByName(network, "Convolution14");
- convolution->blobs.emplace("weights", weights);
- convolution->blobs.emplace("biases", biases);
-
- WeightableLayer* weightableLayer = dynamic_cast<WeightableLayer*>(convolution.get());
- weightableLayer->_weights = weights;
- weightableLayer->_biases = biases;
-
- CNNLayerPtr weightsConstInput = CommonTestUtils::getLayerByName(network, "Const7");
- CNNNetworkHelper::removeLayer(network, weightsConstInput);
- CNNLayerPtr weightsConstInputLow = CommonTestUtils::getLayerByName(network, "Const8");
- CNNNetworkHelper::removeLayer(network, weightsConstInputLow);
- CNNLayerPtr weightsConstInputHigh = CommonTestUtils::getLayerByName(network, "Const9");
- CNNNetworkHelper::removeLayer(network, weightsConstInputHigh);
- CNNLayerPtr weightsConstOutputLow = CommonTestUtils::getLayerByName(network, "Const10");
- CNNNetworkHelper::removeLayer(network, weightsConstOutputLow);
- CNNLayerPtr weightsConstOutputHigh = CommonTestUtils::getLayerByName(network, "Const11");
- CNNNetworkHelper::removeLayer(network, weightsConstOutputHigh);
-
- CNNNetworkHelper::removeLayer(network, weightsFakeQuantize);
- CNNNetworkHelper::removeLayer(network, biasesConvolutionConst);
-
- return false;
-}
-
-std::unordered_set<std::string> QuantizationOnWeightsTestModel::getNotTransformedLayers() const {
- return { "dataFakeQuantize" };
-}
-
-void QuantizationOnWeightsTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 0.0, "custom");
- fillData(getLayer(network, "Const3"), 127.5, "custom");
- fillData(getLayer(network, "Const4"), 0.0, "custom");
- fillData(getLayer(network, "Const5"), 127.5, "custom");
-
- fillData(getLayer(network, "Const7"), 3.0, "custom");
-
- fillData(getLayer(network, "Const8"), -1.275 / 2.0, "custom");
- fillData(getLayer(network, "Const9"), 1.275, "custom");
- fillData(getLayer(network, "Const10"), -1.275 / 2.0, "custom");
- fillData(getLayer(network, "Const11"), 1.275, "custom");
-
- fillData(getLayer(network, "Const13"), 5.0, "custom");
-}
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-std::string ResampleTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- CommonTestUtils::conv_common_params conv = { {1, 1}, {3, 3}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 32, false, false };
- std::vector<size_t> convOutShape(p.inputDimensions[0].size());
- getConvOutShape(p.inputDimensions[0], conv, convOutShape);
-
- std::vector<size_t> weightsConstInputDims = { 32lu, 32lu, 3lu, 3lu };
- std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::map<std::string, std::string> resampleParams = {
- {"antialias", "0"}, {"factor", "2"}, {"type", "caffe.ResampleParameter.NEAREST"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "5,5"}, // Power
- {"1,1", "5,6"}, {"2,2", "5,7"}, {"3,3", "5,8"}, {"4,4", "5,9"}, // Const layers
- {"5,10", "6,11"}
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("QuantizationOnWeights", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 2
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 5
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- // 6
- .addLayer("Resample", p._network_precision, &resampleParams, {{p.inputDimensions[0]}, {{p.inputDimensions[0]}}})
- .finish(&edges);
-}
-
-std::string ResampleTestModel::getName() const {
- return "ResampleTestModel";
-}
-
-bool ResampleTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
- return true;
-}
-
-void ResampleTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const1"), -128.0 / 20.0, "custom");
- fillData(getLayer(network, "Const2"), 127.0 / 20.0, "custom");
- fillData(getLayer(network, "Const3"), -128.0 / 20.0, "custom");
- fillData(getLayer(network, "Const4"), 127.0 / 20.0, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void ScaleShiftAndFakeQuantizeTestModel::initInput(Blob::Ptr input) const {
- const Precision& precision = input->getTensorDesc().getPrecision();
- const size_t dataSize = input->size();
-
- std::vector<float> data(input->size(), 4.0);
- float value = -64.0;
- for (size_t i = 0ul; i < std::min(static_cast<size_t>(256), dataSize); ++i) {
- if (precision == Precision::FP32) {
- float* buffer = input->buffer().as<float*>();
- buffer[i] = InferenceEngine::PrecisionUtils::f32tof16(value);
- } else if (precision == Precision::FP16) {
- short* buffer = input->buffer().as<short*>();
- buffer[i] = InferenceEngine::PrecisionUtils::f32tof16(value);
- }
- value += 1.0;
- }
-}
-
-std::string ScaleShiftAndFakeQuantizeTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {{"levels", "256"}};
- std::map<std::string, std::string> power_params = {{"power", "2"}, {"scale", "1"}, {"shift", "0"}};
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"}, // Fake quantize to ScaleShift
- {"7,14", "8,15"}
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("FakeQuantizeAndActivationTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- // 2
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 5
- .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
- // 6
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
- // 7
- .addLayer("ScaleShift", p._network_precision, {}, { {p.inputDimensions[0]}, {p.inputDimensions[0]} }, 3 * type_size, 3 * type_size)
- // 8
- .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
- .finish(&edges);
-}
-
-std::string ScaleShiftAndFakeQuantizeTestModel::getName() const {
- return "ScaleShiftAndFakeQuantizeTestModel";
-}
-
-bool ScaleShiftAndFakeQuantizeTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
- return true;
-}
-
-void ScaleShiftAndFakeQuantizeTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const3"), 127.f / 4.f, "custom");
- fillData(getLayer(network, "Const4"), -128.f / 4.f, "custom");
- fillData(getLayer(network, "Const5"), 127.f / 4.f, "custom");
-
- fillData(getLayer(network, "ScaleShift7"), 1.0, "weights");
- fillData(getLayer(network, "ScaleShift7"), 0.0, "biases");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "low_precision_transformations/scaleshift_to_convolution.hpp"
-
-ScaleShiftToConvolutionAfterConcatTestModel::ScaleShiftToConvolutionAfterConcatTestModel(const bool scaleShiftIsOutput) :
- scaleShiftIsOutput(scaleShiftIsOutput) {}
-
-std::string ScaleShiftToConvolutionAfterConcatTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
-// ASSERT_EQ(2, p.inputDimensions.size());
- size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- if (p._network_precision == "FP16")
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
-
- const size_t axis = 1; // should be passed in 'p' argument
-
- std::vector<size_t> concat_out_dims = p.inputDimensions[0];
- concat_out_dims[axis] += p.inputDimensions[1][axis];
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = {
- {"levels", "256"}
- };
- std::map<std::string, std::string> concat_params = {
- {"axis", "1"}
- };
- std::map<std::string, std::string> power_params = {
- {"power", "2"}, {"scale", "1"}, {"shift", "0"}
- };
-
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "10,10"}, {"1,1", "11,16"}, // Inputs to FakeQuantize
- {"2,2", "10,11"}, {"3,3", "10,12"}, {"4,4", "10,13"}, {"5,5", "10,14"}, // Const layers
- {"6,6", "11,17"}, {"7,7", "11,18"}, {"8,8", "11,19"}, {"9,9", "11,20"}, // Const layers
- {"10,15", "12,22"}, {"11,21", "12,23"}, // FakeQuantize to Concat
- {"12,24", "13,25"} // Concat to ScaleShift
- };
-
- if (!scaleShiftIsOutput) {
- edges.push_back({ "13,26", "14,27" });
- }
-
- auto layers = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("ScaleShiftToConvolutionAfterConcatTestModel", p.inputDimensions[0], p._network_precision)
- .addInputLayer(p._network_precision, p.inputDimensions[1])
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} })
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, { {p.inputDimensions[1], {1}, {1}, {1}, {1}}, {{p.inputDimensions[1]}} })
- .addLayer("Concat", p._network_precision, &concat_params, { {p.inputDimensions[0], p.inputDimensions[1]}, { concat_out_dims } })
- .addLayer("ScaleShift", p._network_precision, {}, { {p.outputDimensions[0]}, {p.outputDimensions[0]} }, p.outputDimensions[0][1] * type_size, p.outputDimensions[0][1] * type_size);
-
- if (!scaleShiftIsOutput) {
- layers.addLayer("Power", p._network_precision, &power_params, { {p.outputDimensions[0]}, {p.outputDimensions[0]} });
- }
-
- return layers.finish(&edges);
-}
-
-std::string ScaleShiftToConvolutionAfterConcatTestModel::getName() const {
- return std::string("ScaleShiftToConvolutionAfterConcatTestModel") +
- (scaleShiftIsOutput ? "_scaleShiftIsOutput" : "_scaleShiftIsNotOutput");
-}
-
-bool ScaleShiftToConvolutionAfterConcatTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- if (std::any_of(
- params.precisionsOnActivations.begin(),
- params.precisionsOnActivations.end(),
- [](const Precision precision) { return precision == Precision::U8; })) {
- params.updatePrecisions = true;
- }
-
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
- addCleanup<ScaleShiftToConvolutionTransformation>(
- LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
- "ScaleShift"));
-
- transformer.transform(network);
-
- if (scaleShiftIsOutput || (!params.updatePrecisions)) {
- CNNLayerPtr scaleShift = CNNNetworkHelper::getLayer(network, "ScaleShift13");
- if (scaleShift->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "unexpected layer type " << scaleShift->type << " '" << scaleShift->name << "'";
- }
- } else {
- CNNLayerPtr convolution = CNNNetworkHelper::getLayer(network, "ScaleShift13");
- if (convolution->type != "Convolution") {
- THROW_IE_EXCEPTION << "unexpected layer type " << convolution->type << " '" << convolution->name << "'";
- }
-
- if (CNNNetworkHelper::getInputChannelsCount(*convolution) != CNNNetworkHelper::getOutputChannelsCount(*convolution)) {
- THROW_IE_EXCEPTION <<
- "input channels count " << CNNNetworkHelper::getInputChannelsCount(*convolution) <<
- " is not not equal output channels count " << CNNNetworkHelper::getOutputChannelsCount(*convolution);
- }
-
- const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParents(*convolution);
-
- const Blob::Ptr weightsBlob = CNNNetworkHelper::getBlob(parents[1], "custom");
- if (weightsBlob == nullptr) {
- THROW_IE_EXCEPTION << "weights are absent";
- }
- if (weightsBlob->getTensorDesc().getPrecision() != Precision::FP16) {
- const std::shared_ptr<float> weightsData = CNNNetworkHelper::getFloatData(weightsBlob);
- if (weightsData == nullptr) {
- THROW_IE_EXCEPTION << "weights are not received";
- }
- const float* weights = weightsData.get();
- size_t notZeroWeightsValues = 0ul;
- for (size_t i = 0ul; i < weightsBlob->size(); ++i) {
- if (weights[i] != 0.f) {
- notZeroWeightsValues++;
- }
- }
- if (notZeroWeightsValues != CNNNetworkHelper::getOutputChannelsCount(*convolution)) {
- THROW_IE_EXCEPTION << "unexpected weights not zero values " << notZeroWeightsValues;
- }
- }
-
- const Blob::Ptr biasesBlob = CNNNetworkHelper::getBlob(parents[2], "custom");
- if (biasesBlob == nullptr) {
- THROW_IE_EXCEPTION << "biases are absent";
- }
- const std::shared_ptr<float> biases = CNNNetworkHelper::getFloatData(biasesBlob);
- if (biases == nullptr) {
- THROW_IE_EXCEPTION << "biases are not received";
- }
- }
-
- return true;
-}
-
-void ScaleShiftToConvolutionAfterConcatTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 0.0, "custom");
- fillData(getLayer(network, "Const3"), 255.0 / 10.0, "custom");
- fillData(getLayer(network, "Const4"), 0.0, "custom");
- fillData(getLayer(network, "Const5"), 255.0 / 10.0, "custom");
-
- fillData(getLayer(network, "Const6"), -255.0 / 400.0, "custom");
- fillData(getLayer(network, "Const7"), 255.0 / 200.0, "custom");
- fillData(getLayer(network, "Const8"), -255.0 / 400.0, "custom");
- fillData(getLayer(network, "Const9"), 255.0 / 200.0, "custom");
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void ScaleShiftToConvolutionAfterFakeQuantizeIgnoreTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 0.f, "custom");
- fillData(getLayer(network, "Const3"), 255.f / 8.f, "custom");
- fillData(getLayer(network, "Const4"), 0.f, "custom");
- fillData(getLayer(network, "Const5"), 255.f / 8.f, "custom");
-
- fillData(getLayer(network, "ScaleShift7"), 3.f, "weights");
- fillData(getLayer(network, "ScaleShift7"), 0.f, "biases");
-}
-
-std::string ScaleShiftToConvolutionAfterFakeQuantizeIgnoreTestModel::getName() const {
- return "ScaleShiftToConvolutionAfterFakeQuantizeIgnoreTestModel";
-}
-
-bool ScaleShiftToConvolutionAfterFakeQuantizeIgnoreTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
-
- CNNLayerPtr scaleShift = CNNNetworkHelper::getLayer(network, "ScaleShift7");
- if (scaleShift != nullptr) {
- THROW_IE_EXCEPTION << "unexpected layer " << scaleShift->type << " '" << scaleShift->name << "'";
- }
-
- return true;
-}
-
-std::string ScaleShiftToConvolutionAfterFakeQuantizeIgnoreTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size;
- if (p._network_precision == "FP16") {
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
- } else if (p._network_precision == "FP32") {
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- } else {
- THROW_IE_EXCEPTION << "unexpected network precision " << p._network_precision;
- }
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
- std::map<std::string, std::string> power_params = { {"power", "2"}, {"scale", "1"}, {"shift", "0"} };
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, // Input -> Power
- {"1,2", "6,7"}, // Power -> FakeQuantize
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"}, // FakeQuantize -> ScaleShift
- {"7,14", "8,15"}, // FakeQuantize -> ScaleShift
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("ScaleShiftToConvolutionAfterFakeQuantizeIgnoreTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- // 2
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 5
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 6
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} })
- // 7
- .addLayer("ScaleShift", p._network_precision, {}, { {p.inputDimensions[0]}, {p.inputDimensions[0]} }, p.inputDimensions[0][1] * type_size, p.outputDimensions[0][1] * type_size)
- // 8
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- .finish(&edges);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-
-void ScaleShiftToConvolutionAfterNotConcatIgnoreTestModel::resetTransformation(CNNNetwork& network) const {
- fillData(getLayer(network, "Const2"), 0.f, "custom");
- fillData(getLayer(network, "Const3"), 255.f / 8.f, "custom");
- fillData(getLayer(network, "Const4"), 0.f, "custom");
- fillData(getLayer(network, "Const5"), 255.f / 8.f, "custom");
-
- fillData(getLayer(network, "ScaleShift8"), 3.f, "weights");
- fillData(getLayer(network, "ScaleShift8"), 0.f, "biases");
-}
-
-std::string ScaleShiftToConvolutionAfterNotConcatIgnoreTestModel::getName() const {
- return "ScaleShiftToConvolutionAfterNotConcatIgnoreTestModel";
-}
-
-bool ScaleShiftToConvolutionAfterNotConcatIgnoreTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
- transformer.transform(network);
-
- CNNLayerPtr scaleShift = CNNNetworkHelper::getLayer(network, "ScaleShift8");
- if (scaleShift->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "unexpected layer type " << scaleShift->type << " '" << scaleShift->name << "'";
- }
-
- return true;
-}
-
-std::string ScaleShiftToConvolutionAfterNotConcatIgnoreTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
- size_t type_size;
- if (p._network_precision == "FP16") {
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
- } else if (p._network_precision == "FP32") {
- type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
- } else {
- THROW_IE_EXCEPTION << "unexpected network precision " << p._network_precision;
- }
-
- std::map<std::string, std::string> const_params = {};
- std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
- std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
- std::vector<std::pair<std::string, std::string>> edges = {
- {"0,0", "1,1"}, // Input -> Power
- {"1,2", "6,7"}, // Power -> FakeQuantize
- {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
- {"6,12", "7,13"}, // FakeQuantize -> ReLU
- {"7,14", "8,15"}, // ReLU -> ScaleShift
- };
-
- return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("ScaleShiftToConvolutionAfterNotConcatTestModel", p.inputDimensions[0], p._network_precision)
- // 1
- .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- // 2
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 3
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 4
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 5
- .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, 0)
- // 6
- .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} })
- // 7
- .addLayer("ReLU", p._network_precision, {}, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
- // 8
- .addLayer("ScaleShift", p._network_precision, {}, { {p.inputDimensions[0]}, {p.inputDimensions[0]} }, p.inputDimensions[0][1] * type_size, p.outputDimensions[0][1] * type_size)
- .finish(&edges);
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "low_precision_transformations/convolution.hpp"
-#include "low_precision_transformations/fully_connected.hpp"
-#include "low_precision_transformations/scaleshift_to_convolution.hpp"
-
-LowPrecisionTransformations SingleLayerTestModel::getLowPrecisionTransformations(const LayerTransformation::Params& params) const {
- if (device_name == "CPU") {
- return LowPrecisionTransformer::getAllTransformations(params).
- add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
- addCleanup<ScaleShiftToConvolutionTransformation>(
- LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
- "ScaleShift");
- } else if (device_name == "GPU") {
- return LowPrecisionTransformer::getAllTransformations(params);
- } else {
- THROW_IE_EXCEPTION << "unknown plugin " << device_name;
- }
-}
-
-LowPrecisionTransformer SingleLayerTestModel::getLowPrecisionTransformer(const LayerTransformation::Params& params) const {
- LowPrecisionTransformer transformer(getLowPrecisionTransformations(params));
- return transformer;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
-#include "common/validation.hpp"
-#include "tests_common_func.hpp"
-
-TBlob<uint8_t>::Ptr SingleLayerTransformationsTest::generateWeights(const CNNNetwork& network) {
- std::vector<Blob::Ptr> blobs;
- const auto net_precision = network.getInputsInfo().begin()->second->getPrecision();
-
- std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
- for (CNNLayerPtr layer : sortedLayers) {
- auto weightableLayer = std::dynamic_pointer_cast<WeightableLayer>(layer);
- const std::string& type = layer->type;
- if ((weightableLayer == nullptr) && !CaselessEq<std::string>()(type, "Const")) {
- continue;
- }
-
- size_t blobSize = 0lu;
- if (CaselessEq<std::string>()(type, "Convolution")) {
- const size_t kernelSize = CNNNetworkHelper::getKernelSize(*layer);
- const size_t inputChannelsCount = CNNNetworkHelper::getInputChannelsCount(*layer);
- const size_t outputChannelsCount = CNNNetworkHelper::getOutputChannelsCount(*layer);
- blobSize = kernelSize * inputChannelsCount * outputChannelsCount;
- } else if (CaselessEq<std::string>()(type, "Const")) {
- const std::vector<size_t>& dims = layer->outData[0]->getDims();
- blobSize = std::accumulate(dims.begin(), dims.end(), 1lu, std::multiplies<size_t>());
- } else if (CaselessEq<std::string>()(type, "ScaleShift")) {
- blobSize = 2 * layer->outData[0]->getDims()[1]; // weights and biases
- }
-
- Blob::Ptr weights = CNNNetworkHelper::makeNewBlobPtr({ net_precision, { blobSize }, C });
- weights->allocate();
- fillDataWithInitValue(weights, 1.23f);
- blobs.push_back(weights);
-
- if (CaselessEq<std::string>()(type, "Convolution")) {
- Blob::Ptr bias = CNNNetworkHelper::makeNewBlobPtr({ net_precision, { CNNNetworkHelper::getOutputChannelsCount(*layer) }, C });
- bias->allocate();
- fillDataWithInitValue(bias, 3.21f);
- blobs.push_back(bias);
- }
- }
- size_t totalSize = 0lu;
- for (auto& blob : blobs) totalSize += (blob->byteSize());
-
- TBlob<uint8_t>::Ptr modelBlob = make_shared_blob<uint8_t>({ Precision::U8, { totalSize }, C });
- modelBlob->allocate();
- uint8_t* modelBlobBuffer = modelBlob->buffer().as<uint8_t *>();
- for (Blob::Ptr blob : blobs) {
- memcpy(modelBlobBuffer, blob->buffer().as<uint8_t *>(), blob->byteSize());
- modelBlobBuffer += blob->byteSize();
- }
-
- return modelBlob;
-}
-
-// TODO: not completed
-void SingleLayerTransformationsTest::checkNetworkWithFakeQuantize(const CNNNetwork& network) {
- size_t total_size_in_bytes = 0;
- std::vector<Blob::Ptr> blob_to_model;
-
- std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
- for (CNNLayerPtr layer : sortedLayers) {
- if ((layer->type != "Convolution") && (layer->type != "Const")) {
- continue;
- }
- }
-}
-
-// TODO: not completed
-void SingleLayerTransformationsTest::checkNetworkWithQuantize(const CNNNetwork& network) {
- size_t total_size_in_bytes = 0;
- std::vector<Blob::Ptr> blob_to_model;
-
- std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
- for (CNNLayerPtr layer : sortedLayers) {
- if ((layer->type != "Convolution") && (layer->type != "Const")) {
- continue;
- }
- }
-}
-
-//void SingleLayerTransformationsTest::sortBlobs(CNNLayer& layer) {
-// auto it = layer.blobs.begin();
-// if (it == layer.blobs.end()) {
-// THROW_IE_EXCEPTION << "there is no blobs";
-// }
-
-// const auto size = it->second->size();
-// const auto byteSize = it->second->byteSize();
-// if ((it->second->size() != 2) || (it->second->byteSize() != 16)) {
-// THROW_IE_EXCEPTION << "not supported - under development";
-// }
-
-// float* buffer = it->second->buffer().as<float*>();
-// if (buffer[0] > buffer[1]) {
-// const float tmp = buffer[0];
-// buffer[0] = buffer[1];
-// buffer[1] = tmp;
-// }
-//}
-
-CNNNetwork SingleLayerTransformationsTest::createNetwork() {
- SingleLayerTransformationsTestParams p = ::testing::WithParamInterface<SingleLayerTransformationsTestParams>::GetParam();
- std::string model = p.model->getModel(p);
-
- Core reader;
- auto weights_fake = make_shared_blob<uint8_t>(TensorDesc(Precision::U8,
- SizeVector({std::numeric_limits<uint32_t>::max()/2}), Layout::C));
- weights_fake->allocate();
- CNNNetwork network = reader.ReadNetwork(model, weights_fake);
-
- auto modelBlob = generateWeights(network);
- return reader.ReadNetwork(model, modelBlob);
-}
-
-std::unordered_map<std::string, InferenceEngine::Blob::Ptr> SingleLayerTransformationsTest::infer(
- CNNNetwork& network,
- std::unordered_map<std::string, Blob::Ptr>& inputBlobs,
- Core & core,
- const std::string & device_name,
- ExecutableNetwork & executableNetwork,
- InferRequest & inferRequest) {
- const SingleLayerTransformationsTestParams p = ::testing::WithParamInterface<SingleLayerTransformationsTestParams>::GetParam();
-
- std::map<std::string, std::string> config;
- config.emplace(PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE, PluginConfigParams::NO);
- //config.emplace(PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT, "SingleLayerTransformationsTest");
-
- executableNetwork = core.LoadNetwork(network, device_name, config);
- inferRequest = executableNetwork.CreateInferRequest();
-
- for (auto& item : inputBlobs) {
- inferRequest.SetBlob(item.first.c_str(), item.second);
- }
-
- inferRequest.Infer();
-
- const std::map<std::string, DataPtr> outputsInfo = network.getOutputsInfo();
- std::unordered_map<std::string, InferenceEngine::Blob::Ptr> outputs_blob_map;
- for (auto& info : outputsInfo) {
- Blob::Ptr output_blob = inferRequest.GetBlob(info.first.c_str());
- outputs_blob_map.insert({info.first, output_blob});
- }
-
- return outputs_blob_map;
-}
-
-void SingleLayerTransformationsTest::compareInDetails(
- InferenceEngine::Blob &res,
- InferenceEngine::Blob &ref,
- const size_t maxDifferenceCounts,
- float max_diff) {
- float *res_ptr = res.buffer().as<float*>();
- size_t res_size = res.size();
-
- float *ref_ptr = ref.buffer().as<float*>();
- size_t ref_size = ref.size();
-
- ASSERT_EQ(res_size, ref_size);
-
- size_t differenceCount = 0;
- std::stringstream log;
- for (size_t i = 0; i < ref_size; i++) {
- const float difference = fabs((res_ptr[i] - ref_ptr[i]) / ref_ptr[i]) * 100.0;
- if ((difference >= max_diff) && (fabs(res_ptr[i] - ref_ptr[i]) > 0.0003)) {
- log << "i=" << i << ": " << res_ptr[i] << " VS " << ref_ptr[i] << ": " << difference << "%, " << fabs(res_ptr[i] - ref_ptr[i]) << std::endl;
-
- differenceCount++;
- if (differenceCount > maxDifferenceCounts) {
- std::cout << log.str();
- std::cout << differenceCount << " differences are detected" << std::endl;
- ASSERT_TRUE(difference < max_diff);
- break;
- }
- }
- }
-}
-
-static void relative_compare(
- const float* res,
- const float* ref,
- size_t size,
- float max_diff = 0.01f,
- const std::string assertDetails = "",
- float zero_diff = 1e-7f) {
- for (size_t i = 0lu; i < size; i++) {
- if (std::isnan(res[i]) && std::isnan(ref[i])) {
- continue;
- }
-
- if ((ref[i] == 0.f) || (res[i] == 0.f)) {
- const float diff = fabs(res[i] - ref[i]);
- ASSERT_TRUE(diff < zero_diff) <<
- "\nAbsolute comparison of values ref: " << ref[i] << " and res: " << res[i] <<
- ", diff: " << diff <<
- ", index: " << i << "\n" << assertDetails;
- } else {
- const float diff = fabs((res[i] - ref[i]) / (std::max)(ref[i], res[i]));
- ASSERT_LT(diff, max_diff) <<
- "\nRelative comparison of values ref: " << ref[i] << " and res: " << res[i] <<
- ", diff: " << diff <<
- ", max_diff: " << max_diff <<
- ", index: " << i << "\n" << assertDetails;
- }
- }
-}
-
-void SingleLayerTransformationsTest::SetUp() {
- try {
- const SingleLayerTransformationsTestParams p = ::testing::WithParamInterface<SingleLayerTransformationsTestParams>::GetParam();
- // TODO: ONNX enabling
- CNNNetwork network = createNetwork();
- ASSERT_EQ(nullptr, network.getFunction());
-
- const auto inputsInfo = network.getInputsInfo();
- std::unordered_map<std::string, Blob::Ptr> inputBlobs;
- for (auto& inputInfo : inputsInfo) {
- const TensorDesc& desc = inputInfo.second->getTensorDesc();
- Blob::Ptr input = CNNNetworkHelper::makeNewBlobPtr(desc);
- input->allocate();
-
- fillData(input, 4.f);
- p.model->initInput(input);
-
- inputBlobs.insert(std::pair<std::string, Blob::Ptr>(inputInfo.first, input));
- }
-
- p.model->resetTransformation(network);
-
- //network.serialize(
- // p.model->getName() + "_original.xml",
- // p.model->getName() + "_original.bin");
-
- Core core;
- ExecutableNetwork executableNetwork;
- InferRequest inferRequest;
- const auto originalOutputMap = infer(network, inputBlobs, core,
- p.device_name, executableNetwork, inferRequest);
-
- const std::vector<bool> updatePrecisionsValues = { false };
- const std::vector<bool> quantizeOutputsValues = { true, false };
- const std::vector<bool> weightsToConstValues = { true, false };
- const std::vector<LayerTransformation::QuantizedTensorAlignment> quantizedTensorAlignmentOnActivationsValues = {
- LayerTransformation::QuantizedTensorAlignment::None,
- LayerTransformation::QuantizedTensorAlignment::UpdateLevel
- };
- const std::vector<LayerTransformation::QuantizedTensorAlignment> quantizedTensorAlignmentOnWeightsValues = {
- LayerTransformation::QuantizedTensorAlignment::None,
- //LayerTransformation::QuantizedTensorAlignment::Mixed
- };
- const std::vector<bool> roundQuantizedValues = { false, true };
- const std::vector<bool> updateBiasesValues = { true, false };
- const std::vector<bool> supportAsymmetricQuantizationValues = { true /*, false*/ };
- const std::vector<std::vector<Precision>> precisionOnActivationsValues = {
- { Precision::I8 },
- { Precision::I8, Precision::U8 },
- { Precision::U8 },
- { Precision::U8, Precision::I8 }
- };
- const std::vector<std::vector<Precision>> precisionOnWeightsValues = { { Precision::I8 } };
-
- for (const bool updatePrecision : updatePrecisionsValues) {
- for (const bool quantizeOutputs : quantizeOutputsValues) {
- for (const bool weightsToConst : weightsToConstValues) {
- for (const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations : quantizedTensorAlignmentOnActivationsValues) {
- for (const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights : quantizedTensorAlignmentOnWeightsValues) {
- for (const bool roundQuantizedValue : roundQuantizedValues) {
- for (const bool updateBiases : updateBiasesValues) {
- for (const bool supportAsymmetricQuantization : supportAsymmetricQuantizationValues) {
- for (const std::vector<Precision> precisionOnActivations : precisionOnActivationsValues) {
- for (const std::vector<Precision> precisionOnWeights : precisionOnWeightsValues) {
- network = createNetwork();
-
- p.model->resetTransformation(network);
- auto param = LayerTransformation::Params(
- updatePrecision,
- quantizeOutputs,
- weightsToConst,
- quantizedTensorAlignmentOnActivations,
- quantizedTensorAlignmentOnWeights,
- roundQuantizedValue,
- updateBiases,
- supportAsymmetricQuantization,
- precisionOnActivations,
- precisionOnWeights);
-
- const bool validate = p.model->transform(network, param);
-
-#ifdef DISPLAY_RESULTS
- // TODO: separate each usecase to standalone parameterized test
- std::cout << std::endl <<
- "\tupdatePrecision=" << (param.updatePrecisions ? "true" : "false") << std::endl <<
- "\tquantizeOutputs=" << (param.quantizeOutputs ? "true" : "false") << std::endl <<
- "\tweightsToConst=" << (param.weightsToConst ? "true" : "false") << std::endl <<
- "\tquantizedTensorAlignmentOnActivations=" << param.quantizedTensorAlignmentOnActivations << std::endl <<
- "\tquantizedTensorAlignmentOnWeights=" << param.quantizedTensorAlignmentOnWeights << std::endl <<
- "\troundQuantizedValues: " << (param.roundQuantizedValues ? "true" : "false") << std::endl <<
- "\tupdateBiases: " << (param.updateBiases ? "true" : "false") << std::endl <<
- "\tsupportAsymmetricQuantization: " << (param.supportAsymmetricQuantization ? "true" : "false") << std::endl <<
- "\tprecisionsOnActivations: " << param.precisionsOnActivations << std::endl <<
- "\tprecisionsOnWeights: " << param.precisionsOnWeights << std::endl <<
- "\tnetworkPrecision=" << p._network_precision << std::endl;
-#endif
-
- //network.serialize(
- // p.model->getName() + "_transformed.xml",
- // p.model->getName() + "_transformed.bin");
-
- if (validate) {
- LowPrecisionTransformationValidation::validate(
- network,
- param,
- p.model->getNotTransformedLayers());
- }
-
- ExecutableNetwork executableNetworkTransformed;
- InferRequest inferRequestTransformed;
- const auto transformedOutput = infer(network, inputBlobs, core, p.device_name, executableNetworkTransformed, inferRequestTransformed);
-
- //compareInDetails(originalOutputMap, *transformedOutput, 70, 0.5);
- auto net_precision = network.getInputsInfo().begin()->second->getPrecision();
- for (auto& originalOutput : originalOutputMap) {
- const auto& name = originalOutput.first;
- const auto outSize = originalOutput.second->size();
-
- auto transformed = CNNNetworkHelper::getFloatData(transformedOutput.find(name)->second);
- auto original = CNNNetworkHelper::getFloatData(originalOutput.second);
-
- const float threshold = p.model->getThreshold(p.device_name, net_precision, param);
- const float zeroThreshold = p.model->getZeroThreshold();
-
- const auto outName = transformedOutput.find(name);
- if (outName == transformedOutput.end()) {
- THROW_IE_EXCEPTION << "Original output name " + name + " doesn't exist in transformed model";
- }
-
- relative_compare(
- CNNNetworkHelper::getFloatData(outName->second).get(),
- CNNNetworkHelper::getFloatData(originalOutput.second).get(),
- outSize,
- threshold,
- updatePrecision ? "failed with precisions" : "failed without precisions",
- zeroThreshold);
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
- } catch (const InferenceEngine::details::InferenceEngineException &e) {
- FAIL() << e.what();
- }
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "low_precision_transformations/fake_quantize.hpp"
-#include "low_precision_transformations/convolution.hpp"
-
-UpdateBiasesConvolutionTestModel::UpdateBiasesConvolutionTestModel(const bool addBiasesLayer) : ConvolutionBaseTestModel(addBiasesLayer) {}
-
-std::string UpdateBiasesConvolutionTestModel::getName() const {
- return std::string("UpdateBiasesConvolutionTestModel") +
- (addBiasesLayer ? "" : "_withoutBiases");
-}
-
-void UpdateBiasesConvolutionTestModel::initInput(Blob::Ptr input) const {
- fillDataWithInitValue(input, -1.f);
-}
-
-bool UpdateBiasesConvolutionTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- params.supportAsymmetricQuantization = false;
-
- LowPrecisionTransformer transformer = getLowPrecisionTransformer(params);
- transformer.transform(network);
-
- if (std::any_of(
- params.precisionsOnActivations.begin(),
- params.precisionsOnActivations.end(),
- [](const Precision precision) { return precision == Precision::U8; }) &&
- params.quantizeOutputs) {
- const CNNLayerPtr dequantizationLayer = getLayer(network, "Convolution");
- if (dequantizationLayer->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "was not quantized";
- }
-
- const Blob::Ptr biases = CNNNetworkHelper::getBiases(*dequantizationLayer);
- const std::shared_ptr<float> biasesData = CNNNetworkHelper::getFloatData(biases);
- if (params.updateBiases) {
- for (size_t i = 0ul; i < biases->size(); ++i) {
- if (biasesData.get()[i] != 0.f) {
- THROW_IE_EXCEPTION << "biases value is not zero";
- }
- }
-
- //CNNLayerPtr convolution = getCreatorLayer(dequantizationLayer->insData[0].lock()).lock();
- //CNNLayerPtr convolutionBiases = CNNNetworkHelper::getParent(*convolution, 2);
- //if (convolutionBiases == nullptr) {
- // THROW_IE_EXCEPTION << "biases const layer was not added";
- //}
- }
- }
-
- return true;
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include "low_precision_transformations/fake_quantize.hpp"
-#include "low_precision_transformations/convolution.hpp"
-#include "low_precision_transformations/fully_connected.hpp"
-#include "low_precision_transformations/scaleshift_to_convolution.hpp"
-
-UpdateBiasesFullyConnectedTestModel::UpdateBiasesFullyConnectedTestModel(const bool addBiasesLayer) : FullyConnectedBaseTestModel(addBiasesLayer) {}
-
-std::string UpdateBiasesFullyConnectedTestModel::getName() const {
- return std::string("UpdateBiasesFullyConnectedTestModel") +
- (addBiasesLayer ? "WithBiases" : "WithoutBiases");
-}
-
-void UpdateBiasesFullyConnectedTestModel::initInput(Blob::Ptr input) const {
- fillDataWithInitValue(input, -1.f);
-}
-
-bool UpdateBiasesFullyConnectedTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
- // TODO: use getLowPrecisionTransformer(params) instead
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
- add<FullyConnectedTransformation>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false), "FullyConnected").
- add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
- addCleanup<ScaleShiftToConvolutionTransformation>(
- LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
- "ScaleShift"));
-
- transformer.transform(network);
-
- if (params.quantizeOutputs) {
- const CNNLayerPtr dequantizationLayer = getLayer(network, "fullyConnected");
- if (dequantizationLayer->type != "ScaleShift") {
- THROW_IE_EXCEPTION << "was not quantized";
- }
-
- const Blob::Ptr biases = CNNNetworkHelper::getBiases(*dequantizationLayer);
- const std::shared_ptr<float> biasesData = CNNNetworkHelper::getFloatData(biases);
- if (params.updateBiases) {
- for (size_t i = 0ul; i < biases->size(); ++i) {
- if (biasesData.get()[i] != 0.f) {
- THROW_IE_EXCEPTION << "biases value is not zero";
- }
- }
- } else {
- for (size_t i = 0ul; i < biases->size(); ++i) {
- if (biasesData.get()[i] == 0.f) {
- THROW_IE_EXCEPTION << "biases value is zero";
- }
- }
- }
- }
-
- return true;
-}
target_include_directories(${target_name} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}"
"${IE_MAIN_SOURCE_DIR}/src/inference_engine"
- $<TARGET_PROPERTY:inference_engine_lp_transformations_legacy,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:pugixml,INTERFACE_INCLUDE_DIRECTORIES>
"${IE_MAIN_SOURCE_DIR}/src/vpu/"
"${IE_MAIN_SOURCE_DIR}/src/plugin_api"
# dynamic libraries
inference_engine_transformations
- inference_engine_ir_v7_reader)
-
-if(USE_CNNNETWORK_LPT)
- target_link_libraries(${TARGET_NAME} PRIVATE inference_engine_lp_transformations_legacy)
-else()
- target_link_libraries(${TARGET_NAME} PRIVATE inference_engine_lp_transformations)
-endif()
+ inference_engine_ir_v7_reader
+ inference_engine_lp_transformations)
if(TARGET libGNAStubs)
target_link_libraries(${TARGET_NAME} PRIVATE libGNAStubs)