From e8aed763d2bf9acd575c74b71e003541f1411542 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 19 Jun 2020 15:10:21 +0300 Subject: [PATCH] Removed Int8 normalizer and statistics (#919) * Removed Int8 normalizer and statistics * Removed statistics handling from tests utils * Fixed tests compilation with statistics --- .../src/openvino/inference_engine/ie_api.pxd | 3 - .../src/openvino/inference_engine/ie_api.pyx | 63 - .../src/openvino/inference_engine/ie_api_impl.cpp | 41 - .../src/openvino/inference_engine/ie_api_impl.hpp | 4 - .../openvino/inference_engine/ie_api_impl_defs.pxd | 2 - .../ie_bridges/python/tests/test_IENetwork.py | 50 +- inference-engine/include/ie_icnn_network.hpp | 17 - inference-engine/include/ie_icnn_network_stats.hpp | 108 -- inference-engine/include/inference_engine.hpp | 1 - inference-engine/include/vpu/vpu_plugin_config.hpp | 2 + .../src/cldnn_engine/cldnn_program.cpp | 1 - .../hetero_plugin/hetero_executable_network.cpp | 7 +- .../inference_engine/cnn_network_ngraph_impl.hpp | 4 - .../src/legacy_api/include/cnn_network_impl.hpp | 8 - .../include/cnn_network_int8_normalizer.hpp | 262 --- .../legacy_api/include/cnn_network_stats_impl.hpp | 50 - .../src/legacy_api/include/ie_util_internal.hpp | 2 +- .../src/legacy_api/src/cnn_network_impl.cpp | 2 +- .../legacy_api/src/cnn_network_int8_normalizer.cpp | 1774 -------------------- .../src/legacy_api/src/cnn_network_stats_impl.cpp | 33 - inference-engine/src/legacy_api/src/data_stats.h | 85 - .../src/legacy_api/src/ie_util_internal.cpp | 18 +- .../src/legacy_api/src/network_serializer.cpp | 40 - .../src/mkldnn_plugin/mkldnn_exec_network.cpp | 87 +- .../src/mkldnn_plugin/mkldnn_graph.cpp | 2 - .../src/readers/ir_reader/ie_format_parser.cpp | 40 - .../src/readers/ir_reader/ie_format_parser.h | 1 - .../include/vpu/graph_transformer.hpp | 2 - .../graph_transformer/include/vpu/model/model.hpp | 4 - .../graph_transformer/src/frontend/frontend.cpp | 9 - .../src/middleend/passes/weights_analysis.cpp | 32 +- .../vpu/graph_transformer/src/parsed_config.cpp | 2 - .../src/vpu/myriad_plugin/myriad_metrics.cpp | 1 - .../src/vpu/myriad_plugin/myriad_plugin.cpp | 1 - .../cnn_network/cnn_ngraph_impl_tests.cpp | 3 - .../shared_tests_instances/behavior/config.cpp | 8 - .../behavior/infer_request_config.cpp | 2 - .../common_test_utils/common_layers_params.cpp | 10 - .../common_test_utils/common_layers_params.hpp | 29 - .../xml_net_builder/xml_net_builder.cpp | 4 +- .../xml_net_builder/xml_net_builder.hpp | 64 +- .../unit_test_utils/mocks/mock_icnn_network.hpp | 2 - .../plugin_tests/vpu_test_data.hpp | 8 - .../ie_tests/include/regression_config.hpp | 1 - .../ie_tests/include/regression_tests.hpp | 19 +- .../ie_tests/src/classification_matcher.cpp | 17 - .../ie_tests/src/object_detection_matcher.cpp | 19 - .../mkldnn/single_layer_tests/conv_tests_int8.cpp | 27 - .../mkldnn/single_layer_tests/network_stats.cpp | 396 ----- .../mkldnn/single_layer_tests/network_stats.h | 44 - .../shared_tests/network_tests/network_i8.hpp | 16 - .../transformations/conv_base_test.cpp | 2 +- ...ecision_selection_multibranch_not_preserved.cpp | 2 +- .../common/regression/helpers/vpu_case_common.hpp | 1 - .../common/regression/helpers/vpu_case_params.hpp | 15 +- .../regression/helpers/vpu_classification_case.cpp | 12 +- .../regression/helpers/vpu_classification_case.hpp | 2 - .../tests_deprecated/helpers/xml_helper.cpp | 55 - .../tests_deprecated/helpers/xml_helper.hpp | 3 - .../tests_deprecated/unit/CMakeLists.txt | 10 +- .../unit/engines/mkldnn/dumper_test.cpp | 2 - .../mkldnn/normalizer/supported_fusions_test.cpp | 76 +- .../normalization/latest_in_fuse_test.cpp | 168 -- .../unit/inference_engine_tests/util_test.cpp | 14 +- 64 files changed, 108 insertions(+), 3681 deletions(-) delete mode 100644 inference-engine/include/ie_icnn_network_stats.hpp delete mode 100644 inference-engine/src/legacy_api/include/cnn_network_int8_normalizer.hpp delete mode 100644 inference-engine/src/legacy_api/include/cnn_network_stats_impl.hpp delete mode 100644 inference-engine/src/legacy_api/src/cnn_network_int8_normalizer.cpp delete mode 100644 inference-engine/src/legacy_api/src/cnn_network_stats_impl.cpp delete mode 100644 inference-engine/src/legacy_api/src/data_stats.h delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/network_stats.cpp delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/network_stats.h delete mode 100644 inference-engine/tests_deprecated/unit/inference_engine_tests/normalization/latest_in_fuse_test.cpp diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd index 7b50eec..9ce6ef4 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd @@ -48,9 +48,6 @@ cdef class ExecutableNetwork: cdef public: _requests, _infer_requests -cdef class LayersStatsMap(dict): - cdef C.IENetwork net_impl - cdef class IECore: cdef C.IECore impl cpdef IENetwork read_network(self, model : [str, bytes, Path], weights : [str, bytes, Path] = ?, bool init_from_buffer = ?) diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx index d367241..de74257 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx @@ -1235,42 +1235,6 @@ cdef class InferRequest: self.input_blobs[k].buffer[:] = v -## Layer calibration statistic container. -class LayerStats: - - ## Class constructor - # - # @param min: Tuple with per-channel minimum layer activation values - # @param max: Tuple with per-channel maximum layer activation values - # @return An instance of LayerStats class - def __init__(self, min: tuple = (), max: tuple = ()): - self._min = min - self._max = max - - ## Tuple with per-channel minimum layer activation values - @property - def min(self): - return self._min - - ## Tuple with per-channel maximum layer activation values - @property - def max(self): - return self._max - - -## Class inherited from built-in python `dict` class and overrides default `update()`method to allow -# to set or modify layers calibration statistics. -cdef class LayersStatsMap(dict): - def update(self, other=None, **kwargs): - super(LayersStatsMap, self).update(other, **kwargs) - cdef map[string, map[string, vector[float]]] c_stats_map - cdef map[string, vector[float]] c_node_stats - for k, v in self.items(): - c_node_stats["min".encode()] = v.min - c_node_stats["max".encode()] = v.max - c_stats_map[k.encode()] = c_node_stats - self.net_impl.setStats(c_stats_map) - ## This class represents a main layer information and providing setters allowing to modify layer properties cdef class IENetLayer: ## Name of the layer @@ -1586,33 +1550,6 @@ cdef class IENetwork: layers[deref(l).name.decode()] = net_l return layers - ## \note This property is deprecated. - # New Calibration Tool doesn't generate statistics - # - # Returns `LayersStatsMap` object containing dictionary that maps network layer names to calibration statistics - # represented by `LayerStats` objects. - # - # Usage example:\n - # ```python - # ie = IECore() - # net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file) - # net.stats.update({"conv1_2d" : LayserStats(min=(-25, -1, 0), max=(63, 124, 70)), - # "conv2_2d" : LayserStats(min=(-5, -1, 0, 1, -7, 2), max=(63, 124, 70, 174, 99, 106)) - # }) - # ``` - @property - def stats(self): - warnings.warn("stats property of IENetwork is deprecated.", - DeprecationWarning) - cdef map[string, map[string, vector[float]]] c_stats_map = self.impl.getStats() - py_stats_map = LayersStatsMap() - py_stats_map.net_impl = self.impl - for it in c_stats_map: - py_stats_map[it.first.decode()] = LayerStats(min=tuple(it.second["min".encode()]), - max=tuple(it.second["max".encode()])) - return py_stats_map - - ## Marks any intermediate layer as output layer to retrieve the inference results from the specified layers. # @param outputs: List of layers to be set as model outputs. The list can contain strings with layer names to be set # as outputs or tuples with layer name as first element and output port id as second element. diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp index 97361dc..d7e0cd9 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp @@ -295,47 +295,6 @@ void InferenceEnginePython::IENetwork::reshape(const std::map reshape(input_shapes); } -const std::map >> - -InferenceEnginePython::IENetwork::getStats() { - IE_SUPPRESS_DEPRECATED_START - std::map < std::string, std::map < std::string, std::vector < float >> > map; - InferenceEngine::ICNNNetworkStats *pstats = nullptr; - InferenceEngine::ResponseDesc response; - auto retCode = ((InferenceEngine::ICNNNetwork &) *actual).getStats(&pstats, &response); - if (retCode == InferenceEngine::OK) { - auto statsMap = pstats->getNodesStats(); - for (const auto &it : statsMap) { - std::map > stats; - stats.emplace("min", it.second->_minOutputs); - stats.emplace("max", it.second->_maxOutputs); - map.emplace(it.first, stats); - } - } - return map; - IE_SUPPRESS_DEPRECATED_END -} - -void InferenceEnginePython::IENetwork::setStats(const std::map>> &stats) { - IE_SUPPRESS_DEPRECATED_START - InferenceEngine::ICNNNetworkStats *pstats = nullptr; - InferenceEngine::ResponseDesc response; - auto retCode = ((InferenceEngine::ICNNNetwork &) *actual).getStats(&pstats, &response); - if (retCode == InferenceEngine::OK) { - std::map newNetNodesStats; - for (const auto &it : stats) { - InferenceEngine::NetworkNodeStatsPtr nodeStats = InferenceEngine::NetworkNodeStatsPtr( - new InferenceEngine::NetworkNodeStats()); - newNetNodesStats.emplace(it.first, nodeStats); - nodeStats->_minOutputs = it.second.at("min"); - nodeStats->_maxOutputs = it.second.at("max"); - } - pstats->setNodesStats(newNetNodesStats); - } - IE_SUPPRESS_DEPRECATED_END -} - InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string &name, size_t num_requests) : infer_requests(num_requests), name(name) { request_queue_ptr = std::make_shared(); diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp index 0f136ac..33a65c4 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp @@ -61,10 +61,6 @@ struct IENetwork { void serialize(const std::string &path_to_xml, const std::string &path_to_bin); - void setStats(const std::map>> &stats); - - const std::map>> getStats(); - void load_from_buffer(const char* xml, size_t xml_size, uint8_t* bin, size_t bin_size); IENetwork(const std::string &model, const std::string &weights); diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd index 2d40b6f..784463a 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd @@ -189,8 +189,6 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": void setLayerParams(map[string, map[string, string]] params_map) except + void serialize(const string& path_to_xml, const string& path_to_bin) except + void reshape(map[string, vector[size_t]] input_shapes) except + - void setStats(map[string, map[string, vector[float]]] & stats) except + - map[string, map[string, vector[float]]] getStats() except + void load_from_buffer(const char*xml, size_t xml_size, uint8_t*bin, size_t bin_size) except + object getFunction() except + diff --git a/inference-engine/ie_bridges/python/tests/test_IENetwork.py b/inference-engine/ie_bridges/python/tests/test_IENetwork.py index 33b1254..49ed978 100644 --- a/inference-engine/ie_bridges/python/tests/test_IENetwork.py +++ b/inference-engine/ie_bridges/python/tests/test_IENetwork.py @@ -4,7 +4,7 @@ import warnings import numpy as np from openvino.inference_engine import IECore, IENetwork, IENetLayer, DataPtr, \ - LayersStatsMap, LayerStats, InputInfoPtr, PreProcessInfo + InputInfoPtr, PreProcessInfo from conftest import model_path @@ -190,54 +190,6 @@ def test_layers(): assert isinstance(net.layers['19/Fused_Add_'], IENetLayer) -def test_get_stats_deprecated(): - with warnings.catch_warnings(record=True) as w: - ie = IECore() - net = ie.read_network(model=test_net_xml, weights=test_net_bin) - stats = net.stats - assert isinstance(stats, LayersStatsMap) - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - assert "stats property of IENetwork is deprecated." in str(w[-1].message) - - -@pytest.mark.skip(reason="Test is failed due-to ngraph conversion") -def test_set_new_stats_deprecated(): - with warnings.catch_warnings(record=True) as w: - ie = IECore() - net = ie.read_network(model=test_net_xml, weights=test_net_bin) - new_stats = LayerStats(min=(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0), - max=(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0)) - stats = net.stats - stats.update({"fc_out": new_stats}) - assert net.stats["fc_out"].min == new_stats.min - assert net.stats["fc_out"].max == new_stats.max - assert len(w) == 3 - for warns in w: - assert issubclass(warns.category, DeprecationWarning) - assert "stats property of IENetwork is deprecated." in str(warns.message) - - -@pytest.mark.skip(reason="Test is failed due-to ngraph conversion") -def test_update_stats_deprecated(): - with warnings.catch_warnings(record=True) as w: - ie = IECore() - net = ie.read_network(model=test_net_xml, weights=test_net_bin) - initial_stats = LayerStats(min=(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0), - max=(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0)) - stats = net.stats - stats.update({"fc_out": initial_stats}) - new_stats = LayerStats(min=(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0), - max=(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0)) - stats.update({"fc_out": new_stats}) - assert net.stats["fc_out"].min == new_stats.min - assert net.stats["fc_out"].max == new_stats.max - assert len(w) == 3 - for warns in w: - assert issubclass(warns.category, DeprecationWarning) - assert "stats property of IENetwork is deprecated." in str(warns.message) - - @pytest.mark.skip(reason="Test is failed due-to ngraph conversion") def test_serialize(): ie = IECore() diff --git a/inference-engine/include/ie_icnn_network.hpp b/inference-engine/include/ie_icnn_network.hpp index cd26138..1df57ad 100644 --- a/inference-engine/include/ie_icnn_network.hpp +++ b/inference-engine/include/ie_icnn_network.hpp @@ -17,7 +17,6 @@ #include "ie_blob.h" #include "ie_common.h" #include "ie_data.h" -#include "ie_icnn_network_stats.hpp" #include "ie_iextension.h" #include "ie_input_info.hpp" #include "ie_layers.h" @@ -219,22 +218,6 @@ public: }; /** - * @deprecated Migrate to IR v10 and use quantization approach with FakeQuantize - * @brief Gets the statistics. - * @param stats The statistics - * @param resp Pointer to the response message that holds a description of an error if any occurred - * @return Status code of the operation - */ - IE_SUPPRESS_DEPRECATED_START - INFERENCE_ENGINE_INTERNAL("Migrate to IR v10 and use quantization approach with FakeQuantize") - virtual StatusCode getStats(ICNNNetworkStats** stats, ResponseDesc* resp) const noexcept { - (void)stats; - (void)resp; - return NOT_IMPLEMENTED; - }; - IE_SUPPRESS_DEPRECATED_END - - /** * @brief Serialize network to IR and weights files. * * @param xmlPath Path to output IR file. diff --git a/inference-engine/include/ie_icnn_network_stats.hpp b/inference-engine/include/ie_icnn_network_stats.hpp deleted file mode 100644 index 105fe11..0000000 --- a/inference-engine/include/ie_icnn_network_stats.hpp +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/** - * @brief This is a header file for the ICNNNetworkStats class - * - * @file ie_icnn_network_stats.hpp - */ -#pragma once - -#include -#include -#include -#include -#include - -#include "details/ie_irelease.hpp" - -namespace InferenceEngine { - -class NetworkNodeStats; - -/** - * @brief A shared pointer to the NetworkNodeStats object - */ -using NetworkNodeStatsPtr = std::shared_ptr; - -/** - * @brief A smart pointer to the NetworkNodeStats object - */ -using NetworkNodeStatsWeakPtr = std::weak_ptr; - -/** - * @brief A map of pairs: name of a layer and related statistics - */ -using NetworkStatsMap = std::map; - -/** - * @deprecated Migrate to IR v10 and use quantization approach with FakeQuantize - * @class ICNNNetworkStats - * @brief This is the interface to describe the NN topology scoring statistics - */ -class INFERENCE_ENGINE_INTERNAL("Migrate to IR v10 and use quantization approach with FakeQuantize") ICNNNetworkStats : public details::IRelease { -public: - /** - * @brief Sets a map which contains layers with statistics - * - * @param stats A map which is set - * Abstract method - */ - virtual void setNodesStats(const NetworkStatsMap& stats) = 0; - /** - * @brief Gets a map which contains layers with statistics - * - * Abstract method - * @return A NetworkStatsMap object - */ - virtual const NetworkStatsMap& getNodesStats() const = 0; - /** - * @brief Checks if a container is empty - * - * Abstract method - * @return A bool value which shows whether a container is empty - */ - virtual bool isEmpty() const = 0; -}; - -/** - * @deprecated Migrate to IR v10 and use quantization approach with FakeQuantize - * @class NetworkNodeStats - * @brief This class implements a container which stores statistics for a layer - */ -class INFERENCE_ENGINE_INTERNAL("Migrate to IR v10 and use quantization approach with FakeQuantize") NetworkNodeStats { -public: - /** - * @brief The constructor which creates NetworkNodeStats object - */ - NetworkNodeStats() {} - /** - * @brief The constructor which creates NetworkNodeStats object with filled statistics - * - * @param statCount The number of minimum/maximum values in statistics - */ - explicit NetworkNodeStats(int statCount) { - float mn = (std::numeric_limits::max)(); - float mx = (std::numeric_limits::min)(); - - IE_SUPPRESS_DEPRECATED_START_WIN - for (int i = 0; i < statCount; i++) { - _minOutputs.push_back(mn); - _maxOutputs.push_back(mx); - } - IE_SUPPRESS_DEPRECATED_END_WIN - } - -public: - /** - * @brief Vector of floats which contains minimum values of layers activations - */ - std::vector _minOutputs; - /** - * @brief Vector of floats which contains maximum values of layers activations - */ - std::vector _maxOutputs; -}; - -} // namespace InferenceEngine diff --git a/inference-engine/include/inference_engine.hpp b/inference-engine/include/inference_engine.hpp index 1e90694..70820e5 100644 --- a/inference-engine/include/inference_engine.hpp +++ b/inference-engine/include/inference_engine.hpp @@ -16,6 +16,5 @@ #include #include #include -#include #include #include diff --git a/inference-engine/include/vpu/vpu_plugin_config.hpp b/inference-engine/include/vpu/vpu_plugin_config.hpp index 5662cfc..adebe22 100644 --- a/inference-engine/include/vpu/vpu_plugin_config.hpp +++ b/inference-engine/include/vpu/vpu_plugin_config.hpp @@ -123,11 +123,13 @@ DECLARE_VPU_CONFIG_VALUE(NDHWC); DECLARE_VPU_CONFIG_KEY(CUSTOM_LAYERS); /** + * @deprecated IR statistic is not available in IR v10. * @brief Ignore statistic in IR by plugin. * Plugin could use statistic present in IR in order to try to improve calculations precision. * If you don't want statistic to be used enable this option. * This option should be used with values: CONFIG_VALUE(YES) or CONFIG_VALUE(NO) (default) */ +INFERENCE_ENGINE_DEPRECATED("IR statistic is not available in IR v10") DECLARE_VPU_CONFIG_KEY(IGNORE_IR_STATISTIC); /** diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp index 3b6b37a..27b11a3 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp @@ -84,7 +84,6 @@ #include #include #include -#include "cnn_network_int8_normalizer.hpp" #include "low_precision_transformations/transformer.hpp" #include "low_precision_transformations/eltwise.hpp" diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp index 686f728..5c3e404 100644 --- a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp +++ b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp @@ -236,17 +236,12 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::ICNNNetw dumpGraph(network, subgraphs, file); } - InferenceEngine::ICNNNetworkStats* networkStats = nullptr; - if (StatusCode::OK != network.getStats(&networkStats, nullptr)) { - networkStats = nullptr; - } - std::vector descs; std::vector tempLayers; for (auto &&subgraph : subgraphs) { auto affinity = (*subgraph.begin())->affinity; tempLayers.assign(subgraph.begin(), subgraph.end()); - auto tempNetwork = cloneNet(tempLayers, networkStats); + auto tempNetwork = cloneNet(tempLayers); auto name = network.getName() + "_" + std::to_string(std::distance(subgraphs.data(), &subgraph)); tempNetwork->setName(name); // restoring some outputs from original net if they are not marked as output automatically diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp index 562deca..53cce1d 100644 --- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp +++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp @@ -91,10 +91,6 @@ public: void addOutput(const std::string& dataName); - StatusCode getStats(ICNNNetworkStats** stats, ResponseDesc* resp) const noexcept override { - return StatusCode::NOT_FOUND; - } - void Release() noexcept override { delete this; } diff --git a/inference-engine/src/legacy_api/include/cnn_network_impl.hpp b/inference-engine/src/legacy_api/include/cnn_network_impl.hpp index 4082a2e..ce511b7 100644 --- a/inference-engine/src/legacy_api/include/cnn_network_impl.hpp +++ b/inference-engine/src/legacy_api/include/cnn_network_impl.hpp @@ -11,7 +11,6 @@ #include #include "ie_ishape_infer_extension.hpp" -#include "cnn_network_stats_impl.hpp" #include "description_buffer.hpp" #include "ie_api.h" #include "ie_blob.h" @@ -131,12 +130,6 @@ public: void removeOutput(const std::string& dataName); - StatusCode getStats(ICNNNetworkStats** stats, ResponseDesc* /* resp */) const noexcept override { - if (stats == nullptr) return StatusCode::PARAMETER_MISMATCH; - *stats = _stats.get(); - return StatusCode::OK; - } - void Release() noexcept override { delete this; } @@ -161,7 +154,6 @@ protected: std::string _name; DataPtr _emptyData; ShapeInfer::ReshaperPtr _reshaper; - CNNNetworkStatsImplPtr _stats; }; IE_SUPPRESS_DEPRECATED_END diff --git a/inference-engine/src/legacy_api/include/cnn_network_int8_normalizer.hpp b/inference-engine/src/legacy_api/include/cnn_network_int8_normalizer.hpp deleted file mode 100644 index 36392de..0000000 --- a/inference-engine/src/legacy_api/include/cnn_network_int8_normalizer.hpp +++ /dev/null @@ -1,262 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace InferenceEngine { -namespace details { - -/** - * We have raw statistic from stat collection tool and this statistic should be processed to get best - * accuracy. This transformation depends on the topology, depends on the parameters of layers. - * i.e. data going to regular and depth-wise convolution would be scaled differently. In case of - * regular convolution it should be scaled for tensor wide approach, for depth-wise convolution it - * should be scaled by channel approach. - * This class contains logic of getting scales - */ -class CNNStatisticHelper { -public: - /** - * We need to have topology to make a decision about scales - * @param network initial network to be quantized, the topology can be changed during quantization - * @param internalNodesStats initial statistic - * @param maxSign - maximal signed value to be used for calculation of scales - * @param maxUnsign - maximal unsigned value to be used for calculation of scales - * - */ - CNNStatisticHelper(CNNNetwork& network, const std::map& internalNodesStats, - int maxSign, int maxUnsign); - - /** - * Returns if we can quantize layer basing on information of existing statistic before and after - * layers - */ - bool canLayerBeQuantized(CNNLayer::Ptr layer) const; - - /** - * The topology is allowed to be changed, we need to modify statistic accordingly - * - * Currently there is a need in copy of statistic only - - * @param srcName name of layer from statistic needs to be taken - * @param dstName name of layer which statistic will be applied - */ - void copyStatistics(const std::string& srcName, const std::string& dstName); - - /** - * Returns boolean values if layer produce negative data according collected statistic - * true means that layer produices negative values - * false means that layer produces only positive numbers - * @param layer - layer of interest - * @param outputPort - number of port to verify. -1 stands forverification of all outputs from - * layer - */ - bool hasNegativeOutput(const std::string& layerName, int outputPort = -1) const; - - /** - * Returns input scale for layer based on statistic - * @return blob with scales per channel - */ - InferenceEngine::Blob::Ptr getInputScale(CNNLayer::Ptr layer) const; - - /** - * Returns output scale for layer based on statistic - * @return blob with scales per channel - */ - InferenceEngine::Blob::Ptr getOutputScale(CNNLayer::Ptr layer) const; - - /** - * provides max signed value as the only place for synchronization with other algorithms in - * normalizer which require this - */ - int getMaxSignValue() const; - - /** - * Returns a latest layer in fusion, the data from returned layer will go to anopther, this mean - * that for all layers which will be fused we will have to use only statistic from that latest layer - * @param layer - layer of interest - * - * @return returns layer which statistic should be used for calculatio of all scales for layer - * passed as a parameter for this method - */ - CNNLayer::Ptr getLatestInFuse(CNNLayer::Ptr layer) const; - -private: - /** - * Calculates scale factor according statistic for layer passed to this function. No other logic for - * selection another layer is implemented here. - * - * @param channels redundant parameter, should be removed - * @param stats redundant parameter, should be removed - * @param maxInt - we can quantize to I8 even if data is unsigned, need to provide such max number - * explicitly - * - * @return InferenceEngine::Blob::Ptr - */ - InferenceEngine::Blob::Ptr calculateScaleFactor(size_t channels, NetworkNodeStatsPtr stats, int maxInt) const; - - /** - * Select the latet layer in the fusion and returns its statistic - */ - NetworkNodeStatsPtr getStatistic(CNNLayer::Ptr layer) const; - - /** - * Pass over alls statistic and normalize it to the only scale per tenso, individual per channel or - * mix depenging on the pattern in the network - */ - void NormalizeStatistic(); - - CNNNetwork network_; - std::map internalNodesStats_; - int maxSign_; - int maxUnsign_; -}; - -/** - * This class normalizes and quantizes network to "Int8" state - * The converted network will have - * 1) scaleshifts which will normalize activation values to int8 (S8/U8) range - * 2) quantize weigths and biases of convolution - * 3) adds special attributes to layers because semantic of int8 layer are different vs floating - * point ones. For example, after convolution we need to return back to denormalized values and - * there should be special scale here - * 4) Transforms some layers to another ones. For example if i8 to i8 Scaleshift is not supported - * by backend, this scaleshift will be converted to grouped/(depth-wise in ideal case) convolution - * - * This class very depends on backend and its fusion. It assumes that fusion must be executed all - * the time, we cannot for split it to independent execution of two layers in int8 mode. This is - * done to calculate normalization factors the most optimal way to save accuracy. - * Currently supported fusion - * 1. Conv-ReLU - * 2. Conv-Sum-ReLU which is appeared from the pattern - * Conv Something - * \ / - * Eltwise - * ReLU - * Here, the output form "Something" will be used as in-place storge for accumulation of the - * results for convolution. That lead to tricky case in int8 when we have signed int8 input and - * unsigned u8 output - * */ -class INFERENCE_ENGINE_API_CLASS(CNNNetworkInt8Normalizer) { -public: - CNNNetworkInt8Normalizer() {} - -private: - /** Helper function for filling of scaleshift weights for normalization of activation */ - static void fillInScaleShift(ScaleShiftLayer* scshLayer, size_t c, float* weightsN, float* weightsD); - -public: - /** main function for calling of quantization */ - static void NormalizeNetwork(ICNNNetwork& network, ICNNNetworkStats& netStats); - -protected: - /** Helper function to add scaleshifts and other layers for transformatin of topology */ - static void AddLayerToCNNNetworkBeforeLayer(CNNLayer::Ptr newLayer, CNNLayer::Ptr successor, size_t port); - /** Helper function to add scaleshifts and other layers for transformatin of topology */ - static void AddLayerToCNNNetworkAfterData(DataPtr pData, CNNLayer::Ptr layer, const std::string& nextLayerName); - /** Adds ScaleShift between two specified layers */ - static void AddScaleShiftBetween(CNNNetwork& net, const CNNLayerPtr layer1, const CNNLayerPtr layer2, - CNNStatisticHelper& statHelper); - - /** creates dw convolution with unary weights and zero biases with i8 output and the same - * statistic. it will provide requantization from U8 to I8*/ - static CNNLayer::Ptr addU8ToI8Conversion(DataPtr data, CNNLayer::Ptr successor, CNNStatisticHelper& statHelper); - - /** - * Function which recalculate weights according to input scales, and quantize weights, biases and - * adds o-scale and w-scale - * w-scale - multiplication on this scale of i8 convolution result will produce denormalized fp32 - * data - * o-scale - multiplication on this scale will convert above denormalized fp32 to i8 for next layer - */ - static void QuantizeConvolutionOrFullyConnected(CNNLayer::Ptr convolution, CNNStatisticHelper& statHelper); - - /** Adds ScaleShifts everywhere */ - static void AddScaleShifts(CNNNetwork& net, CNNStatisticHelper& statHelper); - - /** Convert ReLu-like Clamps to ReLu layers */ - static void ClampsToReLU(CNNNetwork& net, CNNStatisticHelper& statHelper); - - /** - * Goes over all layers and mark which layers will be executed in FP32/I8 and marks data between - * layers to I8/U8/FP32 - */ - static void DefinesExecutionPrecision(CNNNetwork& net, CNNStatisticHelper& statHelper); - - /** - * Since o-scales exist only for convolutins, we need to propagate them down oever concats and - * linear layers - */ - static void PropagateScaleFactors(CNNNetwork& net, const CNNStatisticHelper& statHelper); - - /** - * Normalizes and quantizes srcData using scales for normalization and int8blob precision for - * quantization - */ - static void ScaleDataToInt(const float* srcData, size_t srcSize, Blob::Ptr int8blob, - const std::vector& scales); - - /** - * Replaces all ScaleShifts layers met in the model to the depth-wise convolution with the same - * weights and biases. - * - * Exceptions: - * 1. ScaleShift following after Input layer, it is not converted to depth-wise convolution - * 2. Scaleshift producing output of network - * 3. Scaleshift passing data to Priorbox - * - * This conversion allows to avoid introductin one more i8 primitive - ScaleShift accepting i8 input - * and producing i8 output - */ - static void replaceScaleShiftByDWConvolution(CNNNetwork& net); - - /** Helper function which creates DW/Grouped/regular convolution by passed weights and biases */ - static CNNLayer::Ptr createDWConvolutionForScale(const std::string& layerName, size_t channels, float* weights, - float* biases); - - /** - * Verifies if layer produces data to layers which marked as float - */ - static bool layerProducesFloat(const CNNLayer::Ptr layer); - - /** - * Returns tails from I8 to FP32 until convolution - it is the most performed approach because - * convolution can convert to FP32 for free, while adding one more scale will decrease performance - */ - static void returnTailToFP32(const CNNLayer::Ptr layer); - - /** - * Verifies whether layer can be potentially int8 - * @return true if layer does not have improper activation for fusion - */ - static bool canLayerBeI8(const CNNLayer::Ptr& layer); - - /** - * Verifies if next layer has type which potentially can be fused with convolution - * and if activation is supported for int8 - * @return true if layer does not have improper activation for fusion - */ - static bool isNextFusionAllowed(const CNNLayer::Ptr& layer); - -public: - /** - * Returns true for a "relu-like" clamp layer i.e. a clamp with minimum = 0 - */ - static bool isReLULikeClamp(CNNLayer::Ptr layer); -}; - -typedef std::shared_ptr CNNNetworkNormalizerPtr; - -} // namespace details -} // namespace InferenceEngine diff --git a/inference-engine/src/legacy_api/include/cnn_network_stats_impl.hpp b/inference-engine/src/legacy_api/include/cnn_network_stats_impl.hpp deleted file mode 100644 index 423cf1c..0000000 --- a/inference-engine/src/legacy_api/include/cnn_network_stats_impl.hpp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "description_buffer.hpp" -#include "ie_api.h" -#include "ie_blob.h" -#include "ie_common.h" -#include "ie_data.h" - -namespace InferenceEngine { -namespace details { - -IE_SUPPRESS_DEPRECATED_START - -class INFERENCE_ENGINE_API_CLASS(CNNNetworkStatsImpl): public ICNNNetworkStats { -public: - CNNNetworkStatsImpl() = default; - virtual ~CNNNetworkStatsImpl(); - -public: - const NetworkStatsMap& getNodesStats() const override; - void setNodesStats(const NetworkStatsMap& stats) override; - bool isEmpty() const override { - return netNodesStats.empty(); - } - - void Release() noexcept override { - delete this; - } - -protected: - std::map netNodesStats; -}; - -typedef std::shared_ptr CNNNetworkStatsImplPtr; - -IE_SUPPRESS_DEPRECATED_END - -} // namespace details -} // namespace InferenceEngine diff --git a/inference-engine/src/legacy_api/include/ie_util_internal.hpp b/inference-engine/src/legacy_api/include/ie_util_internal.hpp index c22e41a..5f6a6d5 100644 --- a/inference-engine/src/legacy_api/include/ie_util_internal.hpp +++ b/inference-engine/src/legacy_api/include/ie_util_internal.hpp @@ -46,7 +46,7 @@ INFERENCE_ENGINE_API_CPP(CNNLayerPtr) clonelayer(const CNNLayer& source); * @return Cloned network */ INFERENCE_ENGINE_API_CPP(InferenceEngine::details::CNNNetworkImplPtr) -cloneNet(const std::vector& layers, const ICNNNetworkStats* networkStats); +cloneNet(const std::vector& layers); IE_SUPPRESS_DEPRECATED_END diff --git a/inference-engine/src/legacy_api/src/cnn_network_impl.cpp b/inference-engine/src/legacy_api/src/cnn_network_impl.cpp index 75d98e4..8870430 100644 --- a/inference-engine/src/legacy_api/src/cnn_network_impl.cpp +++ b/inference-engine/src/legacy_api/src/cnn_network_impl.cpp @@ -76,7 +76,7 @@ std::map getConstLayersMap(const ICNNNetwork& network) { ICNNNetwork::~ICNNNetwork() {} -CNNNetworkImpl::CNNNetworkImpl(): _stats(new CNNNetworkStatsImpl()) {} +CNNNetworkImpl::CNNNetworkImpl() {} CNNNetworkImpl::~CNNNetworkImpl() { // In case of cycles, memory leaks occur: Layer holds shared_ptr, and vice versa. diff --git a/inference-engine/src/legacy_api/src/cnn_network_int8_normalizer.cpp b/inference-engine/src/legacy_api/src/cnn_network_int8_normalizer.cpp deleted file mode 100644 index 5ef2672..0000000 --- a/inference-engine/src/legacy_api/src/cnn_network_int8_normalizer.cpp +++ /dev/null @@ -1,1774 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "cnn_network_int8_normalizer.hpp" - -#include -#include
-#include - -#include -#include -#include -#include -#include
-#include -#include -#include -#include -#include -#include -#include -#include - -#include "cnn_network_impl.hpp" -#include "cnn_network_stats_impl.hpp" -#include "ie_util_internal.hpp" - -IE_SUPPRESS_DEPRECATED_START - -using namespace std; -using namespace InferenceEngine; -using namespace InferenceEngine::details; - -using StatsMap = std::map; - -CNNStatisticHelper::CNNStatisticHelper(CNNNetwork& network, - const std::map& internalNodesStats, - int maxSign, int maxUnsign) { - internalNodesStats_ = internalNodesStats; - network_ = network; - maxSign_ = maxSign; - maxUnsign_ = maxUnsign; - - NormalizeStatistic(); -} - -bool CNNStatisticHelper::canLayerBeQuantized(CNNLayer::Ptr layer) const { - // verification of existing statistic for all inputs - for (const auto i : layer->insData) { - if (internalNodesStats_.find(i.lock()->getCreatorLayer().lock()->name) == internalNodesStats_.end()) { - return false; - } - } - // verification if there is a statistic for output of the layer - if ((layer->outData.size() > 1) && (internalNodesStats_.find(layer->name) == internalNodesStats_.end())) { - return false; - } - return true; -} - -void CNNStatisticHelper::copyStatistics(const std::string& srcName, const std::string& dstName) { - internalNodesStats_[dstName] = internalNodesStats_[srcName]; -} - -bool CNNStatisticHelper::hasNegativeOutput(const std::string& layerName, int outputPort) const { - // TODO(amalyshe) parameter outputPort is not used yet, logic of dedication to the port - // should be implemented - - NetworkNodeStatsPtr layerStat = internalNodesStats_.at(layerName); - for (auto v : layerStat->_minOutputs) { - if (v < 0.f) { - return true; - } - } - return false; -} - -InferenceEngine::Blob::Ptr CNNStatisticHelper::getInputScale(CNNLayer::Ptr layer) const { - auto inDataPtr = layer->insData[0].lock(); - if (inDataPtr == nullptr) - return nullptr; - auto previousLayer = inDataPtr->getCreatorLayer().lock(); - std::string inputLayerName = previousLayer->name; - - // for case when we have the only average pooling before, we need to take this - // statistic from input of avg pooling to compensate work of average pooling - // and to stay in int8 as much as we can - if (previousLayer->type == "Pooling" && - (previousLayer->precision == Precision::I8 || previousLayer->precision == Precision::U8)) { - // take input name to the pooling - auto prevInDataPtr = previousLayer->insData[0].lock(); - if (prevInDataPtr == nullptr) - return nullptr; - inputLayerName = prevInDataPtr->getCreatorLayer().lock()->name; - } - size_t inputChannels = inDataPtr->getTensorDesc().getDims()[1]; - if (getStatistic(previousLayer)->_minOutputs.size() != inputChannels || - getStatistic(previousLayer)->_maxOutputs.size() != inputChannels) { - THROW_IE_EXCEPTION << "min and max sizes should be equal to input channels count for " << previousLayer->name; - } - - // current normalization algorithm can have nodes with fp32 edges. it can happen only in places - // of initial quantization of int8 chains. Currently adding scaleshift adds certain I8/U8 precision - // but calcualtion of scales happens before adding of scale shifts. - // for fixing problem with cases of not determined yet presision and for following of - // quantizatoin scheme defined by normalizer, we are adding here verification of negative output - // in some cases and then verify exact precision of I8/U8 on node for covering of fully determined cases - int maxValue = hasNegativeOutput(previousLayer->name) ? maxSign_ : maxUnsign_; - if (previousLayer->outData[0]->getPrecision() == Precision::U8) { - maxValue = maxUnsign_; - } else if (previousLayer->outData[0]->getPrecision() == Precision::I8) { - maxValue = maxSign_; - } - - return calculateScaleFactor(inputChannels, getStatistic(previousLayer), maxValue); -} - -InferenceEngine::Blob::Ptr CNNStatisticHelper::getOutputScale(CNNLayer::Ptr layer) const { - // TODO(amalyshe) for now we are looking to precision on the data node - size_t outputChannels = layer->outData[0]->getTensorDesc().getDims()[1]; - if (layer->outData.size() != 1) { - THROW_IE_EXCEPTION << "Trying to get scales after layer having multiple output ports"; - } - - auto it = internalNodesStats_.find(layer->name); - if (it == internalNodesStats_.end()) { - return std::shared_ptr(); - } - - if (getStatistic(layer)->_minOutputs.size() != outputChannels || - getStatistic(layer)->_maxOutputs.size() != outputChannels) { - THROW_IE_EXCEPTION << "min and max sizes should be equal to output channels count for " << layer->name; - } - - return calculateScaleFactor(outputChannels, getStatistic(layer), - layer->outData[0]->getPrecision() == Precision::I8 ? maxSign_ : maxUnsign_); -} - -int CNNStatisticHelper::getMaxSignValue() const { - return maxSign_; -} - -InferenceEngine::Blob::Ptr CNNStatisticHelper::calculateScaleFactor(size_t channels, NetworkNodeStatsPtr stats, - int maxInt) const { - if (stats->_minOutputs.size() != channels || stats->_maxOutputs.size() != channels) { - THROW_IE_EXCEPTION << "min and max sizes should be equal to channels count"; - } - - // Creating i-scale blob - std::shared_ptr iScaleData = - std::shared_ptr(new Data("scale", {Precision::FP32, {channels}, Layout::C})); - auto iScale = CreateBlobFromData(iScaleData); - iScale->allocate(); - float* iScaleMemory = static_cast(iScale->buffer()); - - for (int c = 0; c < channels; c++) { - // maxc = fmax(maxc, fabs(stats[k]->_minOutputs[c])); // TODO Check if we should take minimums into - // account - float maxc = fabs(stats->_maxOutputs[c]); - maxc = fmax(maxc, fabs(stats->_minOutputs[c])); - - iScaleMemory[c] = maxc / static_cast(maxInt); - - if (fabs(iScaleMemory[c]) < 1e-7) { - iScaleMemory[c] = 1.0f; - } - } - return iScale; -} - -NetworkNodeStatsPtr CNNStatisticHelper::getStatistic(CNNLayer::Ptr layer) const { - // TODO(amalyshe) all logic of traversing over network and get apropriate statistics should be here - // for now it is a stub - auto it = internalNodesStats_.find(getLatestInFuse(layer)->name); - if (it != internalNodesStats_.end()) { - return it->second; - } - THROW_IE_EXCEPTION << "no stat for layer " << getLatestInFuse(layer)->name; -} - -CNNLayer::Ptr CNNStatisticHelper::getLatestInFuse(CNNLayer::Ptr layer) const { - if (layer->outData[0]->getInputTo().size() == 1 && - (CaselessEq()(layer->outData[0]->getInputTo().begin()->second->type, "relu") || - CNNNetworkInt8Normalizer::isReLULikeClamp(layer->outData[0]->getInputTo().begin()->second))) { - return layer->outData[0]->getInputTo().begin()->second; - } - // Conv-Sum-ReLU fuse - // We need to return original layer if it will be used as a sum parame and ReLU if - // iterating over outputs of pointed layer and look for the only eltwise - CNNLayer::Ptr eltwise = nullptr; - if (layer->outData.size() == 1) { - for (auto it : layer->outData[0]->getInputTo()) { - if (CaselessEq()(it.second->type, "eltwise")) { - if (eltwise) { - THROW_IE_EXCEPTION << "Pattern when one layer pass data to several eltwise layers are not " - "supported in int8 quantization"; - } - eltwise = it.second; - } - } - } - - if (eltwise) { - // if current layer is not a convolution return it as finish of fuse - if (!CaselessEq()(layer->type, "convolution")) { - return layer; - } else { - // look to the ports of eltwise - if (eltwise->insData[0].lock() != nullptr - && eltwise->insData[1].lock() != nullptr - && eltwise->insData[1].lock()->getCreatorLayer().lock() == layer - && CaselessEq()(eltwise->insData[0].lock()->getCreatorLayer().lock()->type, "convolution") - && eltwise->insData[0].lock()->getInputTo().size() == 1) { - // this is a case when two convolutions come to eltwise, the second one will be selected for fuse, - // first will be used as sum operator - return layer; - } - // given layer is a convolution and will be used for fuse, but we need to verify if there is ReLU after - // eltwise - if (eltwise->outData[0]->getInputTo().size() == 1 && - (CaselessEq()(eltwise->outData[0]->getInputTo().begin()->second->type, "relu") || - CNNNetworkInt8Normalizer::isReLULikeClamp(eltwise->outData[0]->getInputTo().begin()->second))) { - return eltwise->outData[0]->getInputTo().begin()->second; - } - return eltwise; - } - } - - return layer; -} - -void CNNStatisticHelper::NormalizeStatistic() { - StatsMap newMap; - - // In case when we have statistics in negative range when min clamped value is 0, - // we are changing statistics here to non negative. This is not fully correct behaviour since - // it can extend range and affect accuracy, but this approach works quite well - std::vector sortedLayersRC = CNNNetSortTopologically(network_); - for (auto l : sortedLayersRC) { - if (CNNNetworkInt8Normalizer::isReLULikeClamp(l)) { - if (l->outData.size() == 1) { - size_t outputChannels = l->outData[0]->getTensorDesc().getDims()[1]; - auto oldStat = internalNodesStats_.find(l->name); - if ((oldStat != internalNodesStats_.end()) && outputChannels > 1) { - for (size_t q = 0; q < oldStat->second->_minOutputs.size(); q++) { - oldStat->second->_minOutputs[q] = 0.f; - } - } - } - } - } - - float dummy = 0.0f; - - std::vector sortedLayers = CNNNetSortTopologically(network_); - for (auto l : sortedLayers) { - // if layer's statistic exists in the newMap, ignore it - if (newMap.find(l->name) != newMap.end()) { - continue; - } - // verify if layer is starter layer for propagating of statistic - bool isStarterLayer = false; - - // a case if we do not have converted statistic before the current layer - // go over all inputs and verify if statistic exists for all of inputs - bool allInputsHaveStatistics = true; - for (auto i : l->insData) { - if (newMap.find(i.lock()->getCreatorLayer().lock()->name) == newMap.end()) { - allInputsHaveStatistics = false; - break; - } - } - // if we do not have statistic - verify who is consumer of this layer - if (!allInputsHaveStatistics) { - if (l->outData.size() == 1) { - for (auto it : l->outData[0]->getInputTo()) { - if (CaselessEq()(it.second->type, "scaleshift") || - CaselessEq()(it.second->type, "convolution") || - CaselessEq()(it.second->type, "fullyconnected")) { - isStarterLayer = true; - break; - } - } - } - } else { - isStarterLayer = true; - } - if (CaselessEq()(l->type, "scaleshift") || CaselessEq()(l->type, "convolution") || - CaselessEq()(l->type, "fullyconnected")) { - isStarterLayer = true; - } - - if (!isStarterLayer) { - continue; - } - - // we do not support yet layers for quantization which split data - if (l->outData.size() != 1) { - continue; - } - - InferenceEngine::NetworkNodeStatsPtr currentStat = std::make_shared(); - - bool perChannelScale = true; - - if (CaselessEq()(l->type, "concat") && l->outData.size() == 1 && - l->outData[0]->getTensorDesc().getDims().size() == 4 && allInputsHaveStatistics) { - size_t concatLayerIdx = 0; - for (int k = 0; k < l->insData.size(); k++) { - auto prevKLayer = l->insData[k].lock()->getCreatorLayer().lock(); - // looking for the statistic for prevKLayer - auto kLayerStat = newMap.find(prevKLayer->name); - if (kLayerStat != newMap.end()) { - for (size_t ikStat = 0; ikStat < kLayerStat->second->_maxOutputs.size(); - ikStat++, concatLayerIdx++) { - currentStat->_maxOutputs.push_back(kLayerStat->second->_maxOutputs[ikStat]); - currentStat->_minOutputs.push_back(kLayerStat->second->_minOutputs[ikStat]); - } - } else { - THROW_IE_EXCEPTION << "We have incomplete statistic for predecessors of concat layer " << l->name; - } - } - } else if (CaselessEq()(l->type, "resample")) { - if (l->insData.size() == 1) { - CNNLayerPtr creator = l->insData[0].lock()->getCreatorLayer().lock(); - if (CaselessEq()(creator->type, "concat")) { - auto concatStat = newMap[creator->name]; - currentStat->_maxOutputs = concatStat->_maxOutputs; - currentStat->_minOutputs = concatStat->_minOutputs; - newMap[l->name] = currentStat; - } else { - auto itOld = internalNodesStats_.find(l->name); - if (itOld != internalNodesStats_.end()) { - currentStat->_maxOutputs = itOld->second->_maxOutputs; - currentStat->_minOutputs = itOld->second->_minOutputs; - newMap[l->name] = currentStat; - } - } - } - } else { - // go over all children until we get convoluition, scaleshift, eltwise or unknown layer - // layers Pooling and ReLU are passthrough - // to understand the granularity of the scaling - // layer concat is a layer which produce statistics and waterfall it down - std::vector toAnalyze; - for (auto it : l->outData[0]->getInputTo()) { - toAnalyze.push_back(it.second); - } - - if (CaselessEq()(l->type, "eltwise")) { - perChannelScale = false; - } - while (!toAnalyze.empty() && perChannelScale) { - CNNLayer::Ptr tl = toAnalyze.back(); - toAnalyze.pop_back(); - if (CaselessEq()(tl->type, "pooling") || CaselessEq()(tl->type, "relu") || - CNNNetworkInt8Normalizer::isReLULikeClamp(tl) || CaselessEq()(tl->type, "concat")) { - if (tl->outData.size() == 1) { - for (auto it : tl->outData[0]->getInputTo()) { - toAnalyze.push_back(it.second); - } - } - } else if (CaselessEq()(tl->type, "convolution")) { - // verify number of groups - ConvolutionLayer* pConv = dynamic_cast(tl.get()); - if (pConv == nullptr) { - THROW_IE_EXCEPTION << "Layer " << tl->name << " is not instance of ConvolutionLayer class"; - } - if (pConv->_group != pConv->_out_depth) { - perChannelScale = false; - } - } else if (CaselessEq()(tl->type, "eltwise")) { - perChannelScale = false; - } - } - - auto itOld = internalNodesStats_.find(getLatestInFuse(l)->name); - if (itOld == internalNodesStats_.end()) { - itOld = internalNodesStats_.find(l->name); - } - if (itOld != internalNodesStats_.end()) { - if (!perChannelScale) { - currentStat->_maxOutputs.resize(itOld->second->_maxOutputs.size()); - if (!itOld->second->_maxOutputs.empty()) { - float max = FLT_MIN; - DataStats::GetDataAbsMax(&itOld->second->_maxOutputs[0], itOld->second->_maxOutputs.size(), - max); - std::fill(currentStat->_maxOutputs.begin(), currentStat->_maxOutputs.end(), max); - } - - currentStat->_minOutputs.resize(itOld->second->_minOutputs.size()); - if (!itOld->second->_minOutputs.empty()) { - float min = FLT_MAX; - DataStats::GetDataMinMax(&itOld->second->_minOutputs[0], itOld->second->_minOutputs.size(), min, - dummy); - std::fill(currentStat->_minOutputs.begin(), currentStat->_minOutputs.end(), min); - } - } else { - currentStat->_maxOutputs = itOld->second->_maxOutputs; - currentStat->_minOutputs = itOld->second->_minOutputs; - } - } - - if (l->outData.size() == 1) { - size_t ch_indx = l->outData[0]->getTensorDesc().getDims().size() > 1 ? 1 : 0; - size_t outputChannels = l->outData[0]->getTensorDesc().getDims()[ch_indx]; - auto oldStat = internalNodesStats_.find(l->name); - if ((oldStat != internalNodesStats_.end()) && outputChannels > 1 && - oldStat->second->_minOutputs.size() == 1) { - auto min = oldStat->second->_minOutputs[0]; - auto max = oldStat->second->_maxOutputs[0]; - - currentStat->_minOutputs = std::vector(outputChannels); - currentStat->_maxOutputs = std::vector(outputChannels); - std::fill(currentStat->_minOutputs.begin(), currentStat->_minOutputs.end(), min); - std::fill(currentStat->_maxOutputs.begin(), currentStat->_maxOutputs.end(), max); - } - } - } - - // propagate this statistic to all layers without scale in primitives - if (!currentStat->_maxOutputs.empty() && !currentStat->_minOutputs.empty()) { - std::vector toAnalyze; - toAnalyze.push_back(l); - while (!toAnalyze.empty()) { - CNNLayer::Ptr tl = toAnalyze.back(); - toAnalyze.pop_back(); - newMap[tl->name] = currentStat; - if (tl->outData.size() == 1) { - for (auto it : tl->outData[0]->getInputTo()) { - if (CaselessEq()(it.second->type, "pooling") || - CaselessEq()(it.second->type, "relu") || - CNNNetworkInt8Normalizer::isReLULikeClamp(it.second)) { - toAnalyze.push_back(it.second); - } - } - } - } - } - } - - internalNodesStats_ = newMap; -} - -void CNNNetworkInt8Normalizer::AddLayerToCNNNetworkBeforeLayer(CNNLayer::Ptr newLayer, CNNLayer::Ptr successor, - size_t port) { - // verify if data exists - if (newLayer && successor && successor->insData.size() > port) { - // get the insData - DataPtr pData = successor->insData[port].lock(); - - Data* edge2 = new Data(*pData.get()); - DataPtr newEdge(edge2); - newEdge->getInputTo().clear(); - newEdge->getInputTo()[successor->name] = successor; - newEdge->setName(newLayer->name); - newEdge->getCreatorLayer() = newLayer; - successor->insData[port] = newEdge; - newLayer->outData.push_back(newEdge); - - newLayer->insData.push_back(pData); - pData->getInputTo().erase(successor->name); - pData->getInputTo()[newLayer->name] = newLayer; - } else { - THROW_IE_EXCEPTION << "Invalid argument"; - } -} - -CNNLayer::Ptr CNNNetworkInt8Normalizer::addU8ToI8Conversion(DataPtr data, CNNLayer::Ptr successor, - CNNStatisticHelper& statHelper) { - if (data->getPrecision() == Precision::U8 || data->getPrecision() == Precision::I8) { - size_t c = static_cast(data->getDims()[1]); - - std::vector ssWValues; - std::vector ssSValues; - for (auto i = 0; i < c; i++) { - ssWValues.push_back(1.0f); - ssSValues.push_back(0.0f); - } - std::string layerName = data->getCreatorLayer().lock()->name + "_Eltwise_ScaleShift_U8I8_" + successor->name; - CNNLayer::Ptr newLayer = createDWConvolutionForScale(layerName, c, ssWValues.data(), ssSValues.data()); - newLayer->precision = Precision::I8; - - for (size_t i = 0; i < successor->insData.size(); i++) { - if (successor->insData[i].lock() == data) { - AddLayerToCNNNetworkBeforeLayer(newLayer, successor, i); - - // update statistic to pass quantization smoothly - if (newLayer->insData[0].lock() == nullptr) - continue; - std::string inputLayerName = newLayer->insData[0].lock()->getCreatorLayer().lock()->name; - statHelper.copyStatistics(inputLayerName, layerName); - if (data->getPrecision() == Precision::U8) { - newLayer->outData[0]->setPrecision(Precision::I8); - } else { - newLayer->outData[0]->setPrecision(Precision::U8); - } - } - } - return newLayer; - } - return nullptr; -} - -void CNNNetworkInt8Normalizer::AddLayerToCNNNetworkAfterData(DataPtr pData, CNNLayer::Ptr layer, - const std::string& nextLayerName) { - // verify if data exists - if (pData && layer && pData->getCreatorLayer().lock() && - pData->getInputTo().find(nextLayerName) != pData->getInputTo().end()) { - CNNLayerPtr nextLayer = pData->getInputTo()[nextLayerName]; - - DataPtr newEdgeAfterLayer(new Data(*pData.get())); - newEdgeAfterLayer->setName(layer->name); - newEdgeAfterLayer->getCreatorLayer() = layer; - newEdgeAfterLayer->getInputTo().clear(); - newEdgeAfterLayer->getInputTo()[nextLayerName] = nextLayer; - newEdgeAfterLayer->setPrecision(Precision::FP32); - - pData->getInputTo().erase(nextLayerName); - pData->getInputTo()[layer->name] = layer; - - layer->insData.push_back(pData); - layer->outData.push_back(newEdgeAfterLayer); - - for (size_t i = 0; i < nextLayer->insData.size(); i++) { - if (nextLayer->insData[i].lock() == pData) { - nextLayer->insData[i] = newEdgeAfterLayer; - } - } - } else { - THROW_IE_EXCEPTION << "Invalid argument"; - } -} - -void CNNNetworkInt8Normalizer::fillInScaleShift(ScaleShiftLayer* scshLayer, size_t c, float* weightsN, - float* weightsD) { - // Setting "scales" - SizeVector weightsSize = {c}; - TensorDesc weightsDesc(Precision::FP32, weightsSize, InferenceEngine::C); - scshLayer->_weights = InferenceEngine::make_shared_blob(weightsDesc); - scshLayer->_weights->allocate(); - float* weightsData = scshLayer->_weights->buffer(); - for (size_t i = 0; i < c; i++) { - if (weightsN == nullptr && weightsD != nullptr) { - weightsData[i] = 1.0 / weightsD[i]; - } else if (weightsD == nullptr && weightsN != nullptr) { - weightsData[i] = weightsN[i]; - } else if (weightsN != nullptr && weightsD != nullptr) { - weightsData[i] = weightsN[i] / weightsD[i]; - } else { - weightsData[i] = 1.0; - } - } - - // Setting "shifts" - SizeVector shiftsSize = {c}; - TensorDesc shiftsDesc(Precision::FP32, shiftsSize, InferenceEngine::C); - scshLayer->_biases = InferenceEngine::make_shared_blob(shiftsDesc); - scshLayer->_biases->allocate(); - float* biasesData = scshLayer->_biases->buffer(); - for (size_t i = 0; i < c; i++) { - biasesData[i] = 0.f; // Setting to constant "0" - } -} - -void CNNNetworkInt8Normalizer::AddScaleShiftBetween(CNNNetwork& net, const CNNLayerPtr layer1, const CNNLayerPtr layer2, - CNNStatisticHelper& statHelper) { - if (CaselessEq()(layer2->type, "priorbox") || - CaselessEq()(layer2->type, "priorboxclustered")) { - return; - } - - // Searching the connection between the layers - int l1_out_i = 0; - for (; l1_out_i < layer1->outData.size(); l1_out_i++) { - if (layer1->outData[l1_out_i]->getInputTo().find(layer2->name) != - layer1->outData[l1_out_i]->getInputTo().end()) { - break; - } - } - if (l1_out_i == layer1->outData.size()) { - THROW_IE_EXCEPTION << "Can't find layer " << layer2->name << " among layer " << layer1->name << " outputs"; - } - - int l2_in_i = 0; - for (; l2_in_i < layer2->insData.size(); l2_in_i++) { - if (layer2->insData[l2_in_i].lock() != nullptr - && layer2->insData[l2_in_i].lock()->getCreatorLayer().lock() == layer1) { - break; - } - } - if (l2_in_i == layer2->insData.size()) { - THROW_IE_EXCEPTION << "Can't find layer " << layer2->name << " among layer " << layer1->name << " inputs"; - } - - DataPtr outData = layer1->outData[l1_out_i]; - - Blob::Ptr oScaleBlob = nullptr; - if (layer1->blobs.find("o-scale") != layer1->blobs.end()) { - oScaleBlob = layer1->blobs["o-scale"]; - } - - Blob::Ptr iScaleBlob = nullptr; - if (layer2->blobs.find("i-scale") != layer2->blobs.end()) { - iScaleBlob = layer2->blobs["i-scale"]; - } - - if (iScaleBlob == nullptr && oScaleBlob == nullptr) { - return; // No multipliers found around this edge. We can't create a ScaleShift here; - } else { - // Creating a ScaleShiftLayer - std::string prefix; - float *iScaleBuffer = nullptr, *oScaleBuffer = nullptr; - if (oScaleBlob != nullptr) { - oScaleBuffer = static_cast(oScaleBlob->buffer()); - prefix += "o"; - } - if (iScaleBlob != nullptr) { - iScaleBuffer = static_cast(iScaleBlob->buffer()); - prefix += "i"; - } - - std::string layerName = layer1->name + "_" + prefix + "ScaleShift_" + layer2->name; - LayerParams ssCnnLayerParams {layerName, "ScaleShift", Precision::FP32}; - CNNLayerPtr ssCnnLayer(new ScaleShiftLayer(ssCnnLayerParams)); - - AddLayerToCNNNetworkAfterData(outData, ssCnnLayer, layer2->name); - - size_t c = static_cast(outData->getDims()[1]); - - { - ScaleShiftLayer* scshLayer = dynamic_cast(ssCnnLayer.get()); - if (scshLayer == nullptr) { - THROW_IE_EXCEPTION << "Layer " << ssCnnLayer->name << " is not instance of ScaleShiftLayer class"; - } - fillInScaleShift(scshLayer, c, oScaleBuffer, iScaleBuffer); - } - - Precision odPrecision = Precision::FP32; - if (layer2->precision == Precision::I8) { - odPrecision = statHelper.hasNegativeOutput(layer1->name) ? Precision::I8 : Precision::U8; - } - ssCnnLayer->outData[0]->setPrecision(odPrecision); - } -} - -void CNNNetworkInt8Normalizer::AddScaleShifts(CNNNetwork& net, CNNStatisticHelper& statHelper) { - std::vector sortedLayers = CNNNetSortTopologically(net); - - std::vector> pairs; - - for (auto iter : sortedLayers) { - for (int l1_out_i = 0; l1_out_i < iter->outData.size(); l1_out_i++) { - for (auto nextIter : iter->outData[l1_out_i]->getInputTo()) { - CNNLayer::Ptr next = nextIter.second; - - // Checking for an INT8 convolution or fully connected with FP32 output - if ((CaselessEq()(iter->type, "Convolution") || - CaselessEq()(iter->type, "FullyConnected")) && - iter->precision == Precision::I8 && next->precision == Precision::FP32 && - iter->outData[l1_out_i]->getPrecision() == Precision::FP32) { - // Do nothing here only if iter provides data to fp32 layers - // MKLDNNPlugin will generate x8->f32 convolution - - } else if ((iter->precision != Precision::FP32 && next->precision == Precision::FP32) || - (iter->precision == Precision::FP32 && next->precision != Precision::FP32)) { - pairs.push_back(std::pair(iter, next)); - } - } - } - } - - for (auto& pair : pairs) { - AddScaleShiftBetween(net, pair.first, pair.second, statHelper); - } -} - -void CNNNetworkInt8Normalizer::ClampsToReLU(CNNNetwork& net, CNNStatisticHelper& statHelper) { - std::vector sortedLayers = CNNNetSortTopologically(net); - - for (auto iter : sortedLayers) { - if (isReLULikeClamp(iter) && (iter->precision == Precision::I8 || iter->precision == Precision::U8)) { - std::string layerName = iter->name + "_ReLU"; - LayerParams ssCnnLayerParams {layerName, "ReLU", iter->precision}; - CNNLayerPtr ssCnnLayer(new ReLULayer(ssCnnLayerParams)); - - auto previousLayer = iter->insData[0].lock()->getCreatorLayer().lock(); - ssCnnLayer->insData.push_back(iter->insData[0]); - if (ssCnnLayer->insData[0].lock() == nullptr) - continue; - ssCnnLayer->insData[0].lock()->getInputTo().erase(iter->name); - ssCnnLayer->insData[0].lock()->getInputTo()[iter->name] = ssCnnLayer; - - ssCnnLayer->outData.push_back(iter->outData[0]); - ssCnnLayer->outData[0]->getCreatorLayer() = ssCnnLayer; - - iter->insData.clear(); - iter->outData.clear(); - } - } -} - -void CNNNetworkInt8Normalizer::ScaleDataToInt(const float* srcData, size_t srcSize, Blob::Ptr int8blob, - const std::vector& scales) { - if (scales.size() == 0 || /*srcblob->size()*/ srcSize % scales.size() != 0) { - THROW_IE_EXCEPTION << "Wrong number of scale factors"; - } - - size_t channels = scales.size(); - size_t channelSize = /*srcblob->size()*/ srcSize / channels; - - const float* data = srcData; - if (int8blob->getTensorDesc().getPrecision() == Precision::I8) { - int8_t* int8data = static_cast(int8blob->buffer()); - int minValue = std::numeric_limits::min(); - int maxValue = std::numeric_limits::max(); - - size_t offset; - - float val; - - for (size_t ch = 0; ch < channels; ch++) { - offset = channelSize * ch; - - for (size_t i = 0; i < channelSize; i++) { - val = data[offset + i] * scales[ch]; - - if (val > maxValue) { - val = maxValue; - } else if (val < minValue) { - val = minValue; - } - - int8data[offset + i] = round(val); - } - } - } else if (int8blob->getTensorDesc().getPrecision() == Precision::I32) { - int32_t* int32data = static_cast(int8blob->buffer()); - int maxValue = std::numeric_limits::max(); - int minValue = std::numeric_limits::min(); - - size_t offset; - - float val; - - for (size_t ch = 0; ch < channels; ch++) { - offset = channelSize * ch; - - for (size_t i = 0; i < channelSize; i++) { - val = data[offset + i] * scales[ch]; - - if (val > maxValue) { - val = maxValue; - } else if (val < minValue) { - val = minValue; - } - - int32data[offset + i] = round(val); - } - } - } -} - -CNNLayer::Ptr CNNNetworkInt8Normalizer::createDWConvolutionForScale(const std::string& layerName, size_t channels, - float* ssWValues, float* ssSValues) { - // create new Convolution layer - LayerParams params; - params.name = layerName; - params.precision = Precision::FP32; - params.type = "Convolution"; - - CNNLayerPtr lptr = std::make_shared(params); - auto* pConv = dynamic_cast(lptr.get()); - if (pConv == nullptr) { - THROW_IE_EXCEPTION << "Layer " << lptr->name << " is not instance of ConvolutionLayer class"; - } - - pConv->_kernel.insert(X_AXIS, 1); - pConv->_kernel.insert(Y_AXIS, 1); - pConv->_stride.insert(X_AXIS, 1); - pConv->_stride.insert(Y_AXIS, 1); - pConv->_padding.insert(X_AXIS, 0); - pConv->_padding.insert(Y_AXIS, 0); - pConv->_pads_end.insert(X_AXIS, 0); - pConv->_pads_end.insert(Y_AXIS, 0); - pConv->_dilation.insert(X_AXIS, 1); - pConv->_dilation.insert(Y_AXIS, 1); - - pConv->_out_depth = channels; - // mkl-dnn does not have i8 depthwise convolution accepting signed i8 input - // when it is available, need to uncomment below lines - - // workaround - creation of new weights for simple convolution - if (pConv->_out_depth % 16 == 0) { - pConv->_group = pConv->_out_depth / 16; - Blob::Ptr weights = nullptr; - std::shared_ptr wData = - std::shared_ptr(new Data("weights", {Precision::FP32, {pConv->_out_depth * 16}, Layout::C})); - weights = CreateBlobFromData(wData); - weights->allocate(); - float* buffer = weights->buffer().as(); - size_t iDist = 0, iSrc = 0; - for (size_t g = 0; g < pConv->_group; g++) { - for (size_t k = 0; k < 16; k++) { - for (size_t s = 0; s < 16; s++) { - buffer[iDist++] = (s == k) ? ssWValues[iSrc++] : 0.f; - } - } - } - pConv->_weights = weights; - pConv->blobs["weights"] = weights; - } else { - Blob::Ptr weights = nullptr; - std::shared_ptr wData = std::shared_ptr( - new Data("weights", {Precision::FP32, {pConv->_out_depth * pConv->_out_depth}, Layout::C})); - weights = CreateBlobFromData(wData); - weights->allocate(); - float* buffer = weights->buffer().as(); - for (size_t i = 0, idx = 0; i < pConv->_out_depth; i++) { - for (size_t j = 0; j < pConv->_out_depth; j++) { - if (i == j) { - buffer[idx] = ssWValues[i]; - } else { - buffer[idx] = 0.f; - } - idx++; - } - } - pConv->_weights = weights; - pConv->blobs["weights"] = weights; - pConv->_group = 1; - } - // end of workaround - - // fililng of biases - Blob::Ptr biasesBlob = nullptr; - std::shared_ptr bData = - std::shared_ptr(new Data("biases", {Precision::FP32, {pConv->_out_depth}, Layout::C})); - biasesBlob = CreateBlobFromData(bData); - biasesBlob->allocate(); - float* bufferBiases = biasesBlob->buffer().as(); - for (size_t c = 0; c < pConv->_out_depth; c++) { - bufferBiases[c] = ssSValues[c]; - } - pConv->_biases = biasesBlob; - - pConv->blobs["weights"] = pConv->_weights; - pConv->blobs["biases"] = pConv->_biases; - return lptr; -} - -void CNNNetworkInt8Normalizer::replaceScaleShiftByDWConvolution(CNNNetwork& net) { - std::vector sortedLayers = CNNNetSortTopologically(net); - for (auto layer : sortedLayers) { - if (CaselessEq()(layer->type, "scaleshift") && - layer->insData[0].lock()->getCreatorLayer().lock() && - !CaselessEq()(layer->insData[0].lock()->getCreatorLayer().lock()->type, "input") && - layer->outData[0]->getInputTo().size() > 0) { - const auto dims = layer->insData[0].lock()->getTensorDesc().getDims(); - // only four or five dimensions Convolution layers are supported - if ((dims.size() == 4) || (dims.size() == 5)) { - // verification if this layer does not pass data to PriorBox, if it passes, we do not substitute - bool notToPriorBox = true; - for (auto o : layer->outData[0]->getInputTo()) { - if (CaselessEq()(o.second->type, "priorbox") || - CaselessEq()(o.second->type, "priorboxclustered")) { - notToPriorBox = false; - } - } - if (notToPriorBox) { - ScaleShiftLayer* pSS = dynamic_cast(layer.get()); - float* ssWValues = pSS->_weights->buffer().as(); - float* ssSValues = pSS->_biases->buffer().as(); - CNNLayer::Ptr newLayer = createDWConvolutionForScale( - layer->name, layer->outData[0]->getTensorDesc().getDims()[1], ssWValues, ssSValues); - - newLayer->outData = layer->outData; - newLayer->outData[0]->getCreatorLayer() = newLayer; - newLayer->insData = layer->insData; - if (newLayer->insData[0].lock() == nullptr) - continue; - newLayer->insData[0].lock()->getInputTo().erase(layer->name); - newLayer->insData[0].lock()->getInputTo()[newLayer->name] = newLayer; - } - } - } - } -} - -void CNNNetworkInt8Normalizer::QuantizeConvolutionOrFullyConnected(CNNLayer::Ptr target_layer, - CNNStatisticHelper& statHelper) { - size_t inputChannels = target_layer->insData[0].lock()->getTensorDesc().getDims()[1]; - size_t outputChannels = target_layer->outData[0]->getTensorDesc().getDims()[1]; - - auto iScale = statHelper.getInputScale(target_layer); - if (iScale == nullptr) - THROW_IE_EXCEPTION << "Layer '" << target_layer->name << "'has invalid scale"; - - target_layer->blobs["i-scale"] = iScale; - - Blob::Ptr weights = nullptr; - Blob::Ptr biases = nullptr; - - Blob::Ptr int8weights = nullptr; - Blob::Ptr int32biases = nullptr; - - if (target_layer->blobs.find("weights") != target_layer->blobs.end()) { - weights = target_layer->blobs["weights"]; - - // Creating int8 weights blob - std::shared_ptr int8WeightsData = - std::shared_ptr(new Data("weights", TensorDesc(Precision::I8, weights->getTensorDesc().getDims(), - weights->getTensorDesc().getLayout()))); - int8weights = CreateBlobFromData(int8WeightsData); - int8weights->allocate(); - target_layer->blobs["weights"] = int8weights; - } - - if (target_layer->blobs.find("biases") != target_layer->blobs.end()) { - biases = target_layer->blobs["biases"]; - - // Creating int8 biases blob - std::shared_ptr int32BiasesData = - std::shared_ptr(new Data("biases", TensorDesc(Precision::I32, biases->getTensorDesc().getDims(), - biases->getTensorDesc().getLayout()))); - int32biases = CreateBlobFromData(int32BiasesData); - int32biases->allocate(); - target_layer->blobs["biases"] = int32biases; - } - - std::vector weightScalers; - - // Creating w-scale blob - if (weights) { - const float* weight = static_cast(weights->buffer()); - - ConvolutionLayer* pConv1 = dynamic_cast(target_layer.get()); - - if (pConv1 != nullptr && pConv1->_group == 0) { - THROW_IE_EXCEPTION << "Convolution '" << target_layer->name << "'has wrong groups number == 0"; - } - int group = 1; - if (pConv1 != nullptr && pConv1->_group != 1) { - group = pConv1->_group; - } - - std::vector newWeights; // "new" weights are weights multiplied by i-scale - - size_t W_CO = outputChannels / group, W_CI = inputChannels / group, - W_HW = weights->size() / W_CI / W_CO / group; - - { - float* iScaleMemory = static_cast(iScale->buffer()); - for (size_t g = 0; g < group; g++) { - for (size_t co = 0; co < W_CO; co++) { - for (size_t ci = 0; ci < W_CI; ci++) { - size_t kernelBase = g * W_CO * W_CI * W_HW + co * W_CI * W_HW + ci * W_HW; - for (size_t hw = 0; hw < W_HW; hw++) { - newWeights.push_back(weight[kernelBase + hw] * iScaleMemory[g * W_CI + ci]); - } - } - } - } - } - if (newWeights.empty()) - THROW_IE_EXCEPTION << "Could not quantize layer '" << target_layer->name << "'. Invalid layer parameters."; - size_t outChannelSize = weights->getTensorDesc().getDims().back() / W_CO / group; - - // Calculating weights normalization scale factor (w-scale) - - std::set individualsG; - size_t co; - float* weight_convolution; - bool bwquantized = false; - double symQuant = 0.f; - - for (co = 0, weight_convolution = &newWeights[0]; co < outputChannels; - co++, weight_convolution += outChannelSize) { - for (size_t i = 0; i < outChannelSize && individualsG.size() < 256; i++) { - individualsG.insert(static_cast(weight_convolution[i])); - } - } - // If we have 256 quantums for all filters in convolution, it can be already int8 quantized weights - // We can support symmetric quantization - // Below conditions verify if weights are symmetric quantized around 0, what are min/max borders - // These parameters are required to repeat exactly the same quantum as model was trained - // The algorithm of restoring min/max parameters has couple assumptions which might not work for 100% - // cases. We want to explicitly define them. We assume that - // 1. All convolutions have 1st quantum either from positive or negative side. See how we calculate symQuant - // 2. If quantization is not symmetric, there should be quant on one of the side which demonstrate this - if (individualsG.size() < 256) { - // going over weights and verify that weights stay on quant positions - std::set intervals; - double prev = 0.f; - for (auto it = individualsG.begin(); it != individualsG.end(); it++) { - if (prev) { - intervals.insert(*it - prev); - } - prev = *it; - } - if (!intervals.empty()) { - symQuant = *(intervals.begin()); - } - std::set divs; - if (symQuant != 0.) { - prev = 0.f; - for (auto it = individualsG.begin(); it != individualsG.end(); it++) { - if (prev) { - divs.insert((*it - prev) / symQuant); - } - prev = *it; - } - } - - bwquantized = true; - for (auto it3 = divs.begin(); it3 != divs.end(); it3++) { - if (fabs(round(*it3) - *it3) > 0.001) { - bwquantized = false; - } - } - - // we want to make sure that quantization is symmetric. this way we are looking for the - // value in weights matching to the quant (positive or negative - if (bwquantized) { - // take the minimal and maximum values on calculated symQuant and compare with data from individuals - double minCalc = symQuant * -128.0f; - double maxCalc = symQuant * 128.0f; - for (auto it = individualsG.begin(); it != individualsG.end(); it++) { - if (*it < minCalc || *it > maxCalc) { - bwquantized = false; - } - } - } - } - if (bwquantized && symQuant != 0.0f) { - float max = symQuant * 127.0f; - for (co = 0, weight_convolution = &newWeights[0]; co < outputChannels; - co++, weight_convolution += outChannelSize) { - float scaler = static_cast(statHelper.getMaxSignValue()) / max; - weightScalers.push_back(scaler); - } - } else { - for (co = 0, weight_convolution = &newWeights[0]; co < outputChannels; - co++, weight_convolution += outChannelSize) { - float max = FLT_MIN; - DataStats::GetDataAbsMax(weight_convolution, outChannelSize, max); - - float scaler = static_cast(statHelper.getMaxSignValue()) / max; - weightScalers.push_back(scaler); - } - } - - std::shared_ptr wScaleData = - std::shared_ptr(new Data("w-scale", {Precision::FP32, {outputChannels}, Layout::C})); - auto wScale = CreateBlobFromData(wScaleData); - wScale->allocate(); - - float* wScaleMemory = static_cast(wScale->buffer()); - - for (size_t i = 0; i < outputChannels; i++) { - wScaleMemory[i] = 1.0 / weightScalers[i]; - } - target_layer->blobs["w-scale"] = wScale; - - auto oScale = statHelper.getOutputScale(statHelper.getLatestInFuse(target_layer)); - if (oScale) { - // there might not be o-scale if we do not have statistic after convolution that means - // returning to float precision after convolution - target_layer->blobs["o-scale"] = oScale; - - // debug scales. Need to compare with actual values in FP32 scoring - target_layer->blobs["ext-scale"] = target_layer->blobs["o-scale"]; - } else { - // we do not have statistics here, we cannot calculate requantizatin scales, - // next layer will be calculated in fp32 - // it's time to return forcedly edge to fp32 as well - target_layer->outData[0]->setPrecision(Precision::FP32); - } - - // Normalizing the weights - ScaleDataToInt(&newWeights[0], weights->size(), int8weights, weightScalers); - } - - // Normalizing the biases - if (biases) { - const float* bias = static_cast(biases->buffer()); - ScaleDataToInt(bias, biases->size(), int32biases, weightScalers); - } -} - -bool CNNNetworkInt8Normalizer::layerProducesFloat(const CNNLayer::Ptr layer) { - // currently we support only case of layers which have one output port - if (layer->outData.size() > 1) { - return false; - } - - bool consumersFP32 = true; - for (const auto dOut : layer->outData[0]->getInputTo()) { - if (dOut.second->precision != Precision::FP32) { - consumersFP32 = false; - } - } - return consumersFP32; -} - -void CNNNetworkInt8Normalizer::returnTailToFP32(const CNNLayer::Ptr layer) { - std::set layersToReturn; - if (layerProducesFloat(layer)) { - layersToReturn.insert(layer); - } - - while (!layersToReturn.empty()) { - CNNLayer::Ptr layerA = *layersToReturn.begin(); - layersToReturn.erase(layerA); - // 1. if it is Pooling layer, or concat layer, we can return it to FP32 as well - // we need to return it's out data - if ((CaselessEq()(layerA->type, "pooling") || CaselessEq()(layerA->type, "concat")) && - layerA->outData.size() == 1) { - layerA->precision = Precision::FP32; - layerA->outData[0]->setPrecision(Precision::FP32); - } - - if ((CaselessEq()(layerA->type, "convolution") || - CaselessEq()(layerA->type, "fullyconnected") || - CaselessEq()(layerA->type, "relu") || isReLULikeClamp(layerA)) && - layerA->outData.size() == 1) { - layerA->outData[0]->setPrecision(Precision::FP32); - if (CaselessEq()(layerA->type, "relu") - && layerA->insData[0].lock() != nullptr - && canLayerBeI8(layerA->insData[0].lock()->getCreatorLayer().lock())) { - layerA->precision = Precision::FP32; - layerA->insData[0].lock()->getCreatorLayer().lock()->outData[0]->setPrecision(Precision::FP32); - } - } - - // adding parents for analysis - if (!CaselessEq()(layerA->type, "convolution") && - !CaselessEq()(layerA->type, "fullyconnected")) { - // for all parents, if they produce data to only FP32 layers - for (auto i : layerA->insData) { - DataPtr d = i.lock(); - if (d != nullptr && d->getCreatorLayer().lock()->precision != Precision::FP32 && - (CaselessEq()(layerA->type, "pooling") || - CaselessEq()(layerA->type, "relu") || isReLULikeClamp(layerA) || - CaselessEq()(layerA->type, "concat"))) { - if (layerProducesFloat(d->getCreatorLayer().lock())) { - layersToReturn.insert(d->getCreatorLayer().lock()); - } - } - } - } - } -} - -bool CNNNetworkInt8Normalizer::canLayerBeI8(const CNNLayer::Ptr& layer) { - // fusion can happen only if initial layer supplies data to only one layer - // if it sends to several layers - it is safe to execute initial layer in any precision - if (layer->outData[0]->getInputTo().size() == 1) { - std::string aType = layer->outData[0]->getInputTo().begin()->second->type; - if (CaselessEq()(aType, "relu")) { - return true; - } else if (CaselessEq()(aType, "clamp")) { - if (!isReLULikeClamp(layer->outData[0]->getInputTo().begin()->second)) { - return false; - } - } else { - static const InferenceEngine::details::caseless_set nonSuportedActivations = { - "elu", "clamp", "tanh", "logistic", "square", "abs", - "sqrt", "linear", "bounded_elu", "sort_relu", "relu6"}; - return nonSuportedActivations.find(aType) == nonSuportedActivations.end(); - } - } - return true; -} - -bool CNNNetworkInt8Normalizer::isNextFusionAllowed(const CNNLayer::Ptr& layer) { - // fusion can happen only if initial layer supplies data to only one layer - // if it sends to several layers - it is safe to execute initial layer in any precision - if (layer->outData[0]->getInputTo().size() == 1) { - std::string aType = layer->outData[0]->getInputTo().begin()->second->type; - if (CaselessEq()(aType, "relu")) { - ReLULayer* rL = dynamic_cast(layer->outData[0]->getInputTo().begin()->second.get()); - if (rL == nullptr) { - THROW_IE_EXCEPTION << "Layer " << layer->outData[0]->getInputTo().begin()->second->name - << " is not instance of ReLULayer class"; - } - if (rL->negative_slope != 0.f) { - return false; - } - } else if (CaselessEq()(aType, "clamp")) { - if (!isReLULikeClamp(layer->outData[0]->getInputTo().begin()->second)) { - return false; - } - } else { - static const InferenceEngine::details::caseless_set nonSuportedActivations = { - "elu", "clamp", "tanh", "logistic", "square", "abs", - "sqrt", "linear", "bounded_elu", "sort_relu", "relu6"}; - return nonSuportedActivations.find(aType) == nonSuportedActivations.end(); - } - } else { - if (CaselessEq()(layer->type, "eltwise")) { - return false; - } - } - return true; -} - -bool CNNNetworkInt8Normalizer::isReLULikeClamp(CNNLayer::Ptr layer) { - if (CaselessEq()(layer->type, "Clamp")) { - ClampLayer* clamp = dynamic_cast(layer.get()); - if (clamp == nullptr) { - THROW_IE_EXCEPTION << "Int8 Normalizer error: cannot cast layer '" << layer->name << "' to Clamp"; - } - return clamp->min_value == 0; - } - return false; -} - -void CNNNetworkInt8Normalizer::DefinesExecutionPrecision(CNNNetwork& net, CNNStatisticHelper& statHelper) { - std::vector sortedLayers = CNNNetSortTopologically(net); - - // Converting layers to Int8. Calculating the multipliers if needed - for (auto iter : sortedLayers) { - if (iter->params.find("quantization_level") != iter->params.end() && - (iter->params["quantization_level"] == "FP32" || iter->params["quantization_level"] == "FP16")) { - continue; - } - - // Legacy: FullyConnected should not be converted to Int8, - // if it isn't explicitly marked to. - if (iter->params.find("quantization_level") == iter->params.end() && - CaselessEq()(iter->type, "fullyconnected")) { - continue; - } - - if (!statHelper.canLayerBeQuantized(iter)) { - continue; - } - - if (CaselessEq()(iter->type, "convolution") || - CaselessEq()(iter->type, "fullyconnected")) { - if (canLayerBeI8(iter)) { - iter->precision = Precision::I8; - // we will override I8 to U8 during analysing of Conv-ReLU and Conv-Sum-ReLU fusions - iter->outData[0]->setPrecision(Precision::I8); - } - } else if (CaselessEq()(iter->type, "relu") || isReLULikeClamp(iter)) { - // casting to ReLU - ReLULayer* rL = dynamic_cast(iter.get()); - DataPtr outData = iter->outData.size() ? iter->outData[0] : nullptr; - auto inputData = iter->insData[0].lock(); - if (inputData && inputData->getCreatorLayer().lock()->precision != Precision::FP32 && - outData->getPrecision() == Precision::FP32) { - iter->precision = Precision::I8; - if (rL != nullptr && rL->negative_slope != 0.0f) { - outData->setPrecision(Precision::I8); - } else { - outData->setPrecision(Precision::U8); - // if convolution is a predecessor, change its data to U8 also - CNNLayer::Ptr prevLayer = inputData->getCreatorLayer().lock(); - if (prevLayer && (CaselessEq()(prevLayer->type, "convolution") || - CaselessEq()(prevLayer->type, "fullyconnected") || - CaselessEq()(prevLayer->type, "eltwise"))) { - if (!isNextFusionAllowed(prevLayer) && inputData->getPrecision() == Precision::I8) { - outData->setPrecision(Precision::I8); - } else { - inputData->setPrecision(Precision::U8); - } - } - // if there is a patter A0 -> Eltwise -> ReLU and Convolution -> Eltwise -> ReLU, - // need to mark data after conv as U8 - if (prevLayer && CaselessEq()(prevLayer->type, "eltwise")) { - // decising which input will be used for fusion conv-sum-relu - CNNLayer::Ptr input1 = prevLayer->insData[0].lock()->getCreatorLayer().lock(); - CNNLayer::Ptr input2 = prevLayer->insData[1].lock()->getCreatorLayer().lock(); - CNNLayer::Ptr convLayer = nullptr; - CNNLayer::Ptr sumLayer = nullptr; - - if (!CaselessEq()(input1->type, "convolution")) { - sumLayer = input1; - convLayer = input2; - } else { - // it covers a case when both inputs are convolutions or when first input is not convolution - convLayer = input1; - sumLayer = input2; - } - convLayer->outData[0]->setPrecision(sumLayer->outData[0]->getPrecision()); - } - } - } - } else if (CaselessEq()(iter->type, "pooling")) { - auto pool = dynamic_cast(iter.get()); - if (pool == nullptr) { - THROW_IE_EXCEPTION << "Int8 Normalizer error: cannot cast layer '" << iter->name << "' to pooling"; - } - - if (pool->_type == PoolingLayer::MAX || (pool->_type == PoolingLayer::AVG && pool->outData.size() == 1)) { - auto prevLayer = iter->insData[0].lock()->getCreatorLayer().lock(); - if (prevLayer && (prevLayer->precision == Precision::I8 || prevLayer->precision == Precision::U8)) { - iter->precision = Precision::I8; - iter->outData[0]->setPrecision(statHelper.hasNegativeOutput(iter->name) ? Precision::I8 - : Precision::U8); - } - } - } else if (CaselessEq()(iter->type, "concat")) { - // we can do safe - // casting to concat and take axis parameter - // we can concat scales only if concat does concatination by feature maps - bool axisFeatureMaps = false; - auto concatLayer = dynamic_cast(iter.get()); - if (concatLayer) { - if (concatLayer->_axis == 1 && concatLayer->insData.size() && - concatLayer->insData[0].lock()->getTensorDesc().getDims().size() == 4) { - axisFeatureMaps = true; - } - } else { - THROW_IE_EXCEPTION << "Int8 Normalizer error: cannot cast layer " << iter->name << " to concat"; - } - - if (axisFeatureMaps) { - // verification of input data types - bool inputFP32 = false; - bool inputI8 = false; - bool inputU8 = false; - - for (auto inputData : iter->insData) { - auto data = inputData.lock(); - if (data->getPrecision() == Precision::FP32) { - inputFP32 = true; - } else if (data->getPrecision() == Precision::I8) { - inputI8 = true; - } else if (data->getPrecision() == Precision::U8) { - inputU8 = true; - } else { - // Is it a case of input, i.e. passing I16 to concat? - // TODO(amalyshe) to handle inputs as a separate usecase - THROW_IE_EXCEPTION << "I8 normalizer: input data has unknown precision on the edge for concat: " - << data->getName(); - } - } - - if (inputFP32) { - for (auto i : iter->insData) { - if (i.lock()->getCreatorLayer().lock()->precision != Precision::FP32) { - returnTailToFP32(i.lock()->getCreatorLayer().lock()); - } - } - } else { - iter->precision = Precision::I8; - - // we set outpout precision to U8 only if all inputs are U8, in other case it will be I8 - auto outputPrecision = (inputU8 && !inputI8) ? Precision::U8 : Precision::I8; - - // if we have mixed input for I8 and U8, we have to insert scale to edges having U8 to convert to I8 - // Yes, it leads to loosing of some precision and might lead to some performance degradation - // until we have scale supporting s8/u8 input and s8/u8 output. - if (inputU8 && inputI8) { - // looking for all edges having U8 - for (size_t d = 0; d < iter->insData.size(); d++) { - auto data = iter->insData[d].lock(); - if (data->getPrecision() == Precision::U8) { - const size_t c = static_cast(data->getDims()[1]); - std::vector ssWValues(c, 1.0f); - std::vector ssSValues(c, 0.0f); - - std::string layerName = - data->getCreatorLayer().lock()->name + "_Concat_ScaleShift_U8I8_" + iter->name; - CNNLayer::Ptr newLayer = - createDWConvolutionForScale(layerName, c, ssWValues.data(), ssSValues.data()); - newLayer->precision = Precision::I8; - AddLayerToCNNNetworkBeforeLayer(newLayer, iter, d); - - // update statistic to pass quantization smoothly - std::string inputLayerName = - newLayer->insData[0].lock()->getCreatorLayer().lock()->name; - statHelper.copyStatistics(inputLayerName, layerName); - newLayer->outData[0]->setPrecision(Precision::I8); - } - } - } - - if (iter->outData.size() == 1) { - for (auto&& out : iter->outData) { - out->setPrecision(outputPrecision); - } - } - } - } - } else if (CaselessEq()(iter->type, "eltwise")) { - // we decide which of the layers will be in int-8 mode and initialize special scale which will be used - // later in "conv-sum-relu" fuse. i8 execution of eltwise always assume this fusion - if (canLayerBeI8(iter)) { - if (iter->insData.size() == 2) { - CNNLayer::Ptr input1 = iter->insData[0].lock()->getCreatorLayer().lock(); - CNNLayer::Ptr input2 = iter->insData[1].lock()->getCreatorLayer().lock(); - if ((CaselessEq()(input1->type, "convolution") || - CaselessEq()(input2->type, "convolution")) && - !CaselessEq()(input1->type, "concat") && - !CaselessEq()(input2->type, "concat") && input1->precision != Precision::FP32 && - input2->precision != Precision::FP32) { - // understand which layer will be used for sum - CNNLayer::Ptr sumLayer = nullptr; - CNNLayer::Ptr convLayer = nullptr; - - if (!CaselessEq()(input1->type, "convolution")) { - sumLayer = input1; - convLayer = input2; - } else { - // it covers a case when both inputs are convolutions or when first input is not convolution - sumLayer = input2; - convLayer = input1; - } - - // if we find supported activation, mark it's output as I8 or U8 depending on statistics - if (iter->outData.size() == 1 && iter->outData[0]->getInputTo().size() == 1 && - (CaselessEq()(iter->outData[0]->getInputTo().begin()->second->type, "ReLU") || - CNNNetworkInt8Normalizer::isReLULikeClamp( - iter->outData[0]->getInputTo().begin()->second))) { - auto activation = iter->outData[0]->getInputTo().begin()->second; - activation->precision = Precision::I8; - if (!statHelper.hasNegativeOutput(statHelper.getLatestInFuse(convLayer)->name)) { - activation->outData[0]->setPrecision(Precision::U8); - iter->outData[0]->setPrecision(Precision::U8); - } else { - activation->outData[0]->setPrecision(Precision::I8); - iter->outData[0]->setPrecision(Precision::I8); - } - } else { - iter->outData[0]->setPrecision(Precision::I8); - } - - if (convLayer->outData[0]->getTensorDesc().getPrecision() == Precision::I8) { - // verify precision on input edges before and after eltwise fusion - // if we have i8/u8 missmatch between sum layer input and conv-sum-activation output, - // then in this case we have to add requantization to i8 on sum input edge - auto latestInFuse = statHelper.getLatestInFuse(convLayer); - if (latestInFuse->outData[0]->getTensorDesc().getPrecision() == Precision::I8) { - if (input1 == sumLayer && - iter->insData[0].lock()->getTensorDesc().getPrecision() == Precision::U8) { - sumLayer = addU8ToI8Conversion(iter->insData[0].lock(), iter, statHelper); - } else if (input2 == sumLayer && - iter->insData[1].lock()->getTensorDesc().getPrecision() == Precision::U8) { - sumLayer = addU8ToI8Conversion(iter->insData[0].lock(), iter, statHelper); - } - if (!sumLayer) { - THROW_IE_EXCEPTION << "I8 normalizer had to add U8->I8 conversion before " - << iter->name << " but failed to do this"; - } - } - - // mark eltwise as a I8 executable, mark out data as I8 - iter->precision = Precision::I8; - convLayer->outData[0]->setPrecision(sumLayer->outData[0]->getPrecision()); - // calculate the only scale - Blob::Ptr sumLayerScales = statHelper.getOutputScale(statHelper.getLatestInFuse(sumLayer)); - Blob::Ptr convLayerScales = - statHelper.getOutputScale(statHelper.getLatestInFuse(convLayer)); - float* sumScale = sumLayerScales->buffer().as(); - float* convScale = convLayerScales->buffer().as(); - for (size_t i = 0; i < sumLayerScales->size(); i++) { - sumScale[i] /= convScale[i]; - } - - iter->blobs["eltwise-sum-scale"] = sumLayerScales; - } - } - } - } else { - // if there are convolutions are inputs to this eltwise, we forcedly move them to FP32 - for (auto i : iter->insData) { - auto type = i.lock()->getCreatorLayer().lock()->type; - if (CaselessEq()(type, "convolution") || - CaselessEq()(type, "fullyconnected")) { - i.lock()->getCreatorLayer().lock()->precision = Precision::FP32; - i.lock()->setPrecision(Precision::FP32); - } - } - } - } else if (CaselessEq()(iter->type, "resample")) { - iter->precision = Precision::I8; - iter->outData[0]->setPrecision(iter->insData[0].lock()->getPrecision()); - } - } - - // quantization of weights/biases - sortedLayers = CNNNetSortTopologically(net); - for (auto iter : sortedLayers) { - if (iter->precision == Precision::I8 && (CaselessEq()(iter->type, "convolution") || - CaselessEq()(iter->type, "fullyconnected"))) { - QuantizeConvolutionOrFullyConnected(iter, statHelper); - } - } - - // Returning of tails to FP32 mode if optimistic approach marked them as I8 - // no sense to do pooling in i8, we can return just after convolution - for (auto iter : sortedLayers) { - // TODO(amalyshe) here is a handling of case when iter provides data to the only one next layer - // need to extend to cases when it provides data to many layers - if (iter->precision == Precision::I8 && iter->outData.size() == 1) { - if ((iter->outData[0]->getInputTo().size() == 1 && - iter->outData[0]->getInputTo().begin()->second->precision == Precision::FP32) || - iter->outData[0]->getInputTo().size() == 0) { - returnTailToFP32(iter); - } - } - } -} - -void CNNNetworkInt8Normalizer::PropagateScaleFactors(CNNNetwork& net, const CNNStatisticHelper& statHelper) { - std::vector sortedLayers = CNNNetSortTopologically(net); - - // Moving o-scales down - for (auto iter : sortedLayers) { - if (iter->type == "Concat" && iter->precision == Precision::I8) { - // Checking if all inputs are INT8 - bool all_inputs_are_int8 = true; - for (int k = 0; k < iter->insData.size(); k++) { - auto prevKLayer = iter->insData[k].lock()->getCreatorLayer().lock(); - if ((prevKLayer->precision != Precision::I8 && prevKLayer->precision != Precision::U8) || - prevKLayer->blobs.find("i-concat-scale") == prevKLayer->blobs.end()) { - all_inputs_are_int8 = false; - break; - } - } - - if (all_inputs_are_int8) { - // Merging o-scales of the inputs to make one for the Concat - // Creating the o-scale for the Concat by concatenating the input concats - size_t outputChannels = iter->outData[0]->getTensorDesc().getDims()[1]; - - std::shared_ptr oScaleData = - std::shared_ptr(new Data("o-scale", {Precision::FP32, {outputChannels}, Layout::C})); - auto oScale = CreateBlobFromData(oScaleData); - oScale->allocate(); - - float* oScaleMemory = static_cast(oScale->buffer()); - int cc = 0; - for (int in = 0; in < iter->insData.size(); in++) { - auto prevOScale = iter->insData[in].lock()->getCreatorLayer().lock()->blobs["i-concat-scale"]; - float* prevOScaleMemory = static_cast(prevOScale->buffer()); - - for (int c = 0; c < prevOScale->size(); c++) { - oScaleMemory[cc] = prevOScaleMemory[c]; - cc++; - } - } - if (cc != outputChannels) - THROW_IE_EXCEPTION << "Size of o-scale after " << iter->name - << " isn't equal to the channels count"; - - iter->precision = Precision::I8; - iter->blobs["o-scale"] = oScale; - } - } - - if (iter->blobs.find("o-scale") != iter->blobs.end()) { - int int8Consumers = 0; - int fp32Consumers = 0; - if (iter->outData.size() > 1) { - THROW_IE_EXCEPTION << "normalization algorithm for int8 found layer having o-scale and multiple ports"; - } - if (iter->outData.size() == 1) { - for (auto l : iter->outData[0]->getInputTo()) { - if (l.second->precision == Precision::I8 || l.second->precision == Precision::U8) { - if (CaselessEq()(l.second->type, "Pooling") || - CaselessEq()(l.second->type, "ReLU") || - CNNNetworkInt8Normalizer::isReLULikeClamp(l.second)) { - l.second->blobs["o-scale"] = iter->blobs["o-scale"]; - // debug scales. Need to compare with actual values in FP32 scoring - l.second->blobs["ext-scale"] = l.second->blobs["o-scale"]; - int8Consumers++; - } else if (l.second->type == "Convolution") { - l.second->blobs.erase("i-scale"); - int8Consumers++; - } else if (CaselessEq()(l.second->type, "Eltwise")) { - if (statHelper.getLatestInFuse(iter) != iter) { - l.second->blobs["o-scale"] = iter->blobs["o-scale"]; - } - int8Consumers++; - } else if ((l.second->precision == Precision::I8 || l.second->precision == Precision::U8) && - CaselessEq()(l.second->type, "Resample")) { - // If resample has concat as input layer it should inherit it's - // output scale - if (l.second->insData.size() == 1) { - CNNLayerPtr creator = l.second->insData[0].lock()->getCreatorLayer().lock(); - if (CaselessEq()(creator->type, "Concat")) { - l.second->blobs["o-scale"] = creator->blobs["o-scale"]; - l.second->blobs["i-concat-scale"] = l.second->blobs["o-scale"]; - } - } - - // No concat found, let use statistics - if (l.second->blobs.find("o-scale") == l.second->blobs.end()) { - auto oScale = statHelper.getOutputScale(l.second); - l.second->blobs["o-scale"] = oScale; - l.second->blobs["i-concat-scale"] = l.second->blobs["o-scale"]; - } - int8Consumers++; - } else if ((l.second->precision == Precision::I8) && - CaselessEq()(l.second->type, "concat")) { - // if concat is i8, we can propagate oscale further to concat. - // The logic around o-scale assumes that if we have it in the layer after iteration - // in this loop it means that it must not be removed and we need to place - // scale. While for concat we return to one layer back and again need to analyze o-scale - // and it is not clear if we need to return o-scale or it was only for concat. - // Having all of this in mind, it's better to rename o-scale to i-concat-scale - iter->blobs["i-concat-scale"] = iter->blobs["o-scale"]; - int8Consumers++; - } else { - fp32Consumers++; - } - } else if (CaselessEq()(l.second->type, "priorbox") || - CaselessEq()(l.second->type, "priorboxclustered")) { - } else { - // we are leaving o-scale still for adding of scale-shift before FP32 layer - fp32Consumers++; - } - } - - if (iter->outData[0]->getInputTo().empty()) { - fp32Consumers++; - } - - if (CaselessEq()(iter->type, "Convolution") || - CaselessEq()(iter->type, "FullyConnected")) { - if (int8Consumers) { - iter->blobs["oi-scale"] = iter->blobs["o-scale"]; - } else { - iter->outData[0]->setPrecision(Precision::FP32); - } - } - if (!fp32Consumers) { - iter->blobs.erase("o-scale"); - } - } - } - } - - // fixing cornercases when o-scale was propagated through linear tail but it is more efficient to leave - // conversion to de-normalized values in convolution - for (auto iter : sortedLayers) { - if (iter->blobs.find("o-scale") != iter->blobs.end()) { - // go over out data. if all outputs are fp32, continue this optimization - bool canOptimize = true; - - // current layer must not be convolution - if (CaselessEq()(iter->type, "convolution")) { - canOptimize = false; - } - for (auto o : iter->outData) { - for (auto ol : o->getInputTo()) { - if (ol.second->precision == Precision::I8) { - canOptimize = false; - } - } - } - if (!canOptimize) { - continue; - } - // trying to go up until convolution - auto curLayer = iter; - bool eliminateOScale = true; - while (curLayer && curLayer->blobs.find("oi-scale") == curLayer->blobs.end() && eliminateOScale) { - if (curLayer->insData.size() == 1 && curLayer->insData[0].lock()->getCreatorLayer().lock() && - curLayer->insData[0].lock()->getCreatorLayer().lock()->outData.size() == 1 && - curLayer->insData[0].lock()->getInputTo().size() == 1) { - curLayer = curLayer->insData[0].lock()->getCreatorLayer().lock(); - if (!CaselessEq()(curLayer->type, "Pooling") && - !CaselessEq()(curLayer->type, "ReLU") && !isReLULikeClamp(curLayer) && - !CaselessEq()(curLayer->type, "Convolution")) { - eliminateOScale = false; - } - } else { - eliminateOScale = false; - } - } - if (eliminateOScale && curLayer) { - for (auto o : iter->outData) { - o->setPrecision(Precision::FP32); - } - for (auto o : curLayer->outData) { - o->setPrecision(Precision::FP32); - } - - curLayer->blobs.erase("oi-scale"); - iter->blobs.erase("o-scale"); - auto iLayer = iter; - while (iLayer != curLayer) { - if (iLayer->type == "Pooling") { - iLayer->precision = Precision::FP32; - } - iLayer = iLayer->insData[0].lock()->getCreatorLayer().lock(); - } - } - } - } -} - -std::string getBlobDimention(const Blob::Ptr blob) { - size_t idx = blob->getTensorDesc().getDims().size(); - - std::stringstream blobDimention; - blobDimention << "["; - for (auto& dim : blob->getTensorDesc().getDims()) { - blobDimention << dim << ((--idx) != 0u ? ", " : ""); - } - blobDimention << "]"; - - return blobDimention.str(); -} - -void precisionColoring(const CNNLayerPtr layer, ordered_properties& printed_properties, - ordered_properties& node_properties) { - // looking for the w-scale - if (layer->blobs.find("w-scale") != layer->blobs.end()) { - printed_properties.insert( - printed_properties.begin(), - std::pair("w-scale", getBlobDimention(layer->blobs.find("w-scale")->second))); - } - - // looking for the oi-scale - if (layer->blobs.find("oi-scale") != layer->blobs.end()) { - printed_properties.insert( - printed_properties.begin(), - std::pair("oi-scale", getBlobDimention(layer->blobs.find("oi-scale")->second))); - } - - // looking for the o-scale - if (layer->blobs.find("o-scale") != layer->blobs.end()) { - printed_properties.insert( - printed_properties.begin(), - std::pair("o-scale", getBlobDimention(layer->blobs.find("o-scale")->second))); - } - // looking for the i-scale - if (layer->blobs.find("i-scale") != layer->blobs.end()) { - printed_properties.insert( - printed_properties.begin(), - std::pair("i-scale", getBlobDimention(layer->blobs.find("i-scale")->second))); - } - - printed_properties.insert( - printed_properties.begin(), - std::pair("Precision", layer->precision == Precision::FP32 ? "FP32" : "I8")); - - if (layer->precision == Precision::FP32) { - node_properties.emplace_back("fillcolor", "#5A5DF0"); - } else { - node_properties.emplace_back("fillcolor", "#20F608"); - } -} - -void CNNNetworkInt8Normalizer::NormalizeNetwork(ICNNNetwork& network, ICNNNetworkStats& netStats) { - CNNNetwork cnnn(ICNNNetwork::Ptr(&network, [](void*) {})); - - int maxSign = 0x7F; - int maxUnsign = 0xFF; - - // Applying int8-conversion - StatsMap statsMap = netStats.getNodesStats(); - - CNNStatisticHelper statHelper(cnnn, statsMap, maxSign, maxUnsign); - - replaceScaleShiftByDWConvolution(cnnn); - - DefinesExecutionPrecision(cnnn, statHelper); - PropagateScaleFactors(cnnn, statHelper); - ClampsToReLU(cnnn, statHelper); - AddScaleShifts(cnnn, statHelper); -#ifndef NDEBUG - std::ofstream file("i8_normalized.dot"); - saveGraphToDot(cnnn, file, precisionColoring); -#endif -} diff --git a/inference-engine/src/legacy_api/src/cnn_network_stats_impl.cpp b/inference-engine/src/legacy_api/src/cnn_network_stats_impl.cpp deleted file mode 100644 index 4a5758e..0000000 --- a/inference-engine/src/legacy_api/src/cnn_network_stats_impl.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "cnn_network_stats_impl.hpp" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; -namespace InferenceEngine { -namespace details { - -CNNNetworkStatsImpl::~CNNNetworkStatsImpl() {} - -void CNNNetworkStatsImpl::setNodesStats(const NetworkStatsMap& stats) { - netNodesStats = stats; -} - -const NetworkStatsMap& CNNNetworkStatsImpl::getNodesStats() const { - return netNodesStats; -} - -} // namespace details -} // namespace InferenceEngine diff --git a/inference-engine/src/legacy_api/src/data_stats.h b/inference-engine/src/legacy_api/src/data_stats.h deleted file mode 100644 index edc323c..0000000 --- a/inference-engine/src/legacy_api/src/data_stats.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -#include -#include - -#include "ie_api.h" - -class DataStats { -public: - template - static void GetDataMinMax(const T* data, size_t count, T& min, T& max); - - template - static void GetDataAverage(const T* data, size_t count, T& ave); - - template - static void GetDataAbsMax(const T* data, size_t count, T& max); - - template - static T GetAbsMax(T min, T max); -}; - -template -void DataStats::GetDataMinMax(const T* data, size_t count, T& min, T& max) { - for (size_t i = 0; i < count; i++) { - T val = data[i]; - - if (min > val) { - min = val; - } - - if (max < val) { - max = val; - } - } -} - -template -void DataStats::GetDataAbsMax(const T* data, size_t count, T& max) { - T min = FLT_MAX; - - GetDataMinMax(data, count, min, max); - - max = GetAbsMax(min, max); -} - -template void DataStats::GetDataMinMax(const float* data, size_t count, float& min, float& max); -template void DataStats::GetDataMinMax(const uint8_t* data, size_t count, uint8_t& min, uint8_t& max); - -template void DataStats::GetDataAbsMax(const float* data, size_t count, float& max); - -template -void DataStats::GetDataAverage(const T* data, size_t count, T& ave) { - ave = 0; - - for (size_t i = 0; i < count; i++) { - ave += data[i]; - } - - ave /= count; -} - -template void DataStats::GetDataAverage(const float* data, size_t count, float& ave); - -template -T DataStats::GetAbsMax(T min, T max) { - if (min < 0) { - min *= -1; - } - - if (max < 0) { - max *= -1; - } - - return (max > min) ? max : min; -} - -template float DataStats::GetAbsMax(float min, float max); diff --git a/inference-engine/src/legacy_api/src/ie_util_internal.cpp b/inference-engine/src/legacy_api/src/ie_util_internal.cpp index d90af89..bda1203 100644 --- a/inference-engine/src/legacy_api/src/ie_util_internal.cpp +++ b/inference-engine/src/legacy_api/src/ie_util_internal.cpp @@ -21,7 +21,6 @@ #include "details/os/os_filesystem.hpp" #include "file_utils.h" #include "graph_tools.hpp" -#include "ie_icnn_network_stats.hpp" #include "net_pass.h" #include "precision_utils.h" @@ -192,12 +191,8 @@ details::CNNNetworkImplPtr cloneNet(const ICNNNetwork& origin_network) { i++; } - InferenceEngine::ICNNNetworkStats* pstatsSrc = nullptr; - if (StatusCode::OK != network.getStats(&pstatsSrc, nullptr)) { - pstatsSrc = nullptr; - } // copy of the network - details::CNNNetworkImplPtr net = cloneNet(layers, pstatsSrc); + details::CNNNetworkImplPtr net = cloneNet(layers); // going over output layers and aligning output ports and outputs OutputsDataMap outputs; network.getOutputsInfo(outputs); @@ -236,7 +231,7 @@ details::CNNNetworkImplPtr cloneNet(const ICNNNetwork& origin_network) { return net; } -details::CNNNetworkImplPtr cloneNet(const std::vector& layers, const ICNNNetworkStats* networkStats) { +details::CNNNetworkImplPtr cloneNet(const std::vector& layers) { auto net = std::make_shared(); // Src to cloned data map @@ -342,15 +337,6 @@ details::CNNNetworkImplPtr cloneNet(const std::vector& layers, cons net->resolveOutput(); - // cloning of statistics - InferenceEngine::ICNNNetworkStats* pstatsTarget = nullptr; - if (networkStats != nullptr && !networkStats->isEmpty()) { - StatusCode st = net->getStats(&pstatsTarget, nullptr); - if (st == StatusCode::OK && pstatsTarget) { - pstatsTarget->setNodesStats(networkStats->getNodesStats()); - } - } - return net; } diff --git a/inference-engine/src/legacy_api/src/network_serializer.cpp b/inference-engine/src/legacy_api/src/network_serializer.cpp index 12ad778..02a37b4 100644 --- a/inference-engine/src/legacy_api/src/network_serializer.cpp +++ b/inference-engine/src/legacy_api/src/network_serializer.cpp @@ -85,40 +85,6 @@ std::size_t updatePreProcInfo(const InferenceEngine::ICNNNetwork& network, pugi: return dataOffset; } -void UpdateStatisticsInfo(const InferenceEngine::ICNNNetwork& network, pugi::xml_node& netXml) { - // If statistics exists, add it to the file - ICNNNetworkStats* netNodesStats = nullptr; - auto stats = netXml.append_child("statistics"); - auto resultCode = network.getStats(&netNodesStats, nullptr); - if (resultCode != StatusCode::OK) { - THROW_IE_EXCEPTION << InferenceEngine::details::as_status << resultCode - << "Can't get statistics info for serialization of the model"; - } - const NetworkStatsMap statsmap = netNodesStats->getNodesStats(); - - auto joinCommas = [&](const std::vector& v) -> std::string { - std::string res; - - for (size_t i = 0; i < v.size(); ++i) { - res += std::to_string(v[i]); - if (i < v.size() - 1) { - res += ", "; - } - } - - return res; - }; - - for (const auto& itStats : statsmap) { - auto layer = stats.append_child("layer"); - - layer.append_child("name").text().set(itStats.first.c_str()); - - layer.append_child("min").text().set(joinCommas(itStats.second->_minOutputs).c_str()); - layer.append_child("max").text().set(joinCommas(itStats.second->_maxOutputs).c_str()); - } -} - void UpdateStdLayerParams(const CNNLayer::Ptr& layer) { auto layerPtr = layer.get(); auto& params = layer->params; @@ -652,12 +618,6 @@ std::size_t FillXmlDoc(const InferenceEngine::ICNNNetwork& network, pugi::xml_do } } - // no need to print this info in case of executable graph info serialization - if (!execGraphInfoSerialization) { - dataOffset = updatePreProcInfo(network, netXml, dataOffset); - UpdateStatisticsInfo(network, netXml); - } - return dataOffset; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp index b8f0cec..48f70e7 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp @@ -13,7 +13,6 @@ #include "bf16transformer.h" #include #include -#include #include #include "low_precision_transformations/convolution.hpp" #include "low_precision_transformations/eltwise.hpp" @@ -29,7 +28,6 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -using InferenceEngine::details::CNNNetworkInt8Normalizer; using namespace InferenceEngine::details; InferenceEngine::InferRequestInternal::Ptr @@ -46,8 +44,6 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network extensionManager(extMgr), _cfg{cfg}, _name{network.getName()} { - ICNNNetworkStats* pstats = nullptr; - StatusCode s = network.getStats(&pstats, nullptr); // we are cloning network if we have statistics and we can transform network. _clonedNetwork = cloneNet(network); @@ -66,52 +62,47 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network NetPass::ConvertPrecision(*_clonedNetwork, Precision::BOOL, Precision::U8); NetPass::ConvertPrecision(*_clonedNetwork, Precision::U16, Precision::I32); - if (s == StatusCode::OK && pstats && !pstats->isEmpty()) { - CNNNetworkInt8Normalizer cnnorm; - cnnorm.NormalizeNetwork(*_clonedNetwork, *pstats); - } else { - if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) { - auto params = LayerTransformation::Params(true, // updatePrecisions - true, // quantizeOutputs - true, // weightsToConst - LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations - LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights - true, // roundQuantizedValues - true, // updateBiases - true); // supportAsymmetricQuantization - LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params). - add(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution"). - addCleanup( - LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), - "ScaleShift")); - transformer.transform(*_clonedNetwork); - - // Check if network is INT8 or Binary. - // BF16 transformations were disabled since CPU plug-in doesn't support mixed precision execution: - // BF16 + INT8 or BF16 + BIN. - bool isFloatModel = true; - CNNNetworkIterator i(&network); - while (i != CNNNetworkIterator()) { - if (CaselessEq()((*i)->type, "FakeQuantize")) { - isFloatModel = false; - break; - } - i++; + if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) { + auto params = LayerTransformation::Params(true, // updatePrecisions + true, // quantizeOutputs + true, // weightsToConst + LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations + LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights + true, // roundQuantizedValues + true, // updateBiases + true); // supportAsymmetricQuantization + LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params). + add(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution"). + addCleanup( + LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), + "ScaleShift")); + transformer.transform(*_clonedNetwork); + + // Check if network is INT8 or Binary. + // BF16 transformations were disabled since CPU plug-in doesn't support mixed precision execution: + // BF16 + INT8 or BF16 + BIN. + bool isFloatModel = true; + CNNNetworkIterator i(&network); + while (i != CNNNetworkIterator()) { + if (CaselessEq()((*i)->type, "FakeQuantize")) { + isFloatModel = false; + break; } + i++; + } - if (with_cpu_x86_bfloat16() && isFloatModel) { - BF16Transformer bf16Transformer; - CNNNetwork cnnetwork(_clonedNetwork); - // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin. - // Overwise, only layers marked as BF16 in 'cnnetwork' will be performed in bfloat16 mode. - // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin. - if (cfg.enforceBF16 == true) - bf16Transformer.convertToBFloat16(cnnetwork); - } else { - BF16Transformer bf16Transformer; - CNNNetwork cnnetwork(_clonedNetwork); - bf16Transformer.convertToFloat(cnnetwork); - } + if (with_cpu_x86_bfloat16() && isFloatModel) { + BF16Transformer bf16Transformer; + CNNNetwork cnnetwork(_clonedNetwork); + // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin. + // Overwise, only layers marked as BF16 in 'cnnetwork' will be performed in bfloat16 mode. + // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin. + if (cfg.enforceBF16 == true) + bf16Transformer.convertToBFloat16(cnnetwork); + } else { + BF16Transformer bf16Transformer; + CNNNetwork cnnetwork(_clonedNetwork); + bf16Transformer.convertToFloat(cnnetwork); } } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 7218645..93e54da 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -29,8 +29,6 @@ #include
#include -#include "cnn_network_int8_normalizer.hpp" - #include "precision_utils.h" #include #include "low_precision_transformations/transformer.hpp" diff --git a/inference-engine/src/readers/ir_reader/ie_format_parser.cpp b/inference-engine/src/readers/ir_reader/ie_format_parser.cpp index 32e4ee3..749aeba 100644 --- a/inference-engine/src/readers/ir_reader/ie_format_parser.cpp +++ b/inference-engine/src/readers/ir_reader/ie_format_parser.cpp @@ -10,7 +10,6 @@ #include #include "ie_blob_proxy.hpp" -#include "ie_icnn_network_stats.hpp" #include "ie_layer_parsers.h" #include "ie_profiling.hpp" #include "xml_parse_utils.h" @@ -423,9 +422,6 @@ CNNNetworkImplPtr FormatParser::Parse(pugi::xml_node& root) { } } - auto statNode = root.child("statistics"); - ParseStatisticSection(statNode); - if (!_network->allLayers().size()) THROW_IE_EXCEPTION << "Incorrect model! Network doesn't contain layers."; size_t inputLayersNum(0); @@ -777,39 +773,3 @@ void FormatParser::ParsePreProcess(pugi::xml_node& root) { << validMeanImageIds; } } - -void FormatParser::ParseStatisticSection(const pugi::xml_node& statNode) { - auto splitParseCommas = [&](const string& s) -> vector { - vector res; - stringstream ss(s); - - float val; - - while (ss >> val) { - res.push_back(val); - - if (ss.peek() == ',') ss.ignore(); - } - - return res; - }; - - map newNetNodesStats; - - for (auto layer : statNode.children("layer")) { - NetworkNodeStatsPtr nodeStats = NetworkNodeStatsPtr(new NetworkNodeStats()); - - string name = layer.child("name").text().get(); - - newNetNodesStats[name] = nodeStats; - - nodeStats->_minOutputs = splitParseCommas(layer.child("min").text().get()); - nodeStats->_maxOutputs = splitParseCommas(layer.child("max").text().get()); - } - - ICNNNetworkStats* pstats = nullptr; - StatusCode s = _network->getStats(&pstats, nullptr); - if (s == StatusCode::OK && pstats) { - pstats->setNodesStats(newNetNodesStats); - } -} diff --git a/inference-engine/src/readers/ir_reader/ie_format_parser.h b/inference-engine/src/readers/ir_reader/ie_format_parser.h index 7067040..0f9eb44 100644 --- a/inference-engine/src/readers/ir_reader/ie_format_parser.h +++ b/inference-engine/src/readers/ir_reader/ie_format_parser.h @@ -102,7 +102,6 @@ private: DataPtr ParseInputData(pugi::xml_node& root) const; void ParsePreProcess(pugi::xml_node& node); - void ParseStatisticSection(const pugi::xml_node& statNode); // Generate different set of creators depending on required IR version static std::vector> generateCreators(int version); diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp index 2da4699..11d1523 100644 --- a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp +++ b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp @@ -47,8 +47,6 @@ struct CompilationConfig final { bool hwOptimization = true; bool hwExtraSplit = false; - bool ignoreIRStatistic = false; - std::string irWithVpuScalesDir; std::string customLayers; diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp index 8b9c869..63ca82a 100644 --- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp +++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp @@ -59,8 +59,6 @@ private: VPU_MODEL_ATTRIBUTE(int, batchSize, 1) - VPU_MODEL_ATTRIBUTE(InferenceEngine::NetworkStatsMap, nodesStats, {}) - public: // // Constructor @@ -78,8 +76,6 @@ public: void setBatchSize(int batchSize); - inline void setNodesStats(const ie::NetworkStatsMap& stats) { _nodesStats = stats; } - // // Data nodes // diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp index 16f18e9..8496684 100644 --- a/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp @@ -359,15 +359,6 @@ ModelPtr FrontEnd::runCommonPasses(ie::ICNNNetwork& network, const UnsupportedLa model->attrs().set("index", g_counter.fetch_add(1)); model->attrs().set("resources", env.resources); - if (!env.config.ignoreIRStatistic) { - ie::ICNNNetworkStats* stats = nullptr; - // V10 IRs doesn't contain stats - if (originalOrConvertNetwork->getStats(&stats, nullptr) == InferenceEngine::OK && !stats->isEmpty()) { - env.log->trace("Use node statistics from the IR"); - model->setNodesStats(stats->getNodesStats()); - } - } - // // Update IE Network // diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp index cffe70a..10b512a 100644 --- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp @@ -156,30 +156,6 @@ int correctShift(int shift, bool firstStage, const std::string& type) { return shift; } -int maxOutputExponent(const std::string& name, const InferenceEngine::NetworkStatsMap& stats) { - auto node_stats_it = stats.find(name); - IE_ASSERT(node_stats_it != stats.end()); - - auto& max = node_stats_it->second->_maxOutputs; - auto& min = node_stats_it->second->_maxOutputs; - - IE_ASSERT(max.size() > 0 && min.size() > 0); - auto max_value = *std::max_element(max.begin(), max.end()); - auto min_value = *std::min_element(min.begin(), min.end()); - - max_value = std::max(fabsf(max_value), fabsf(min_value)); - IE_ASSERT(max_value > 0); - int exp = 0; - - // frexp fractions float into two parts: - // [0.5, 1)* 2^exp - // while float stores value in format - // [1, 2) * 2^f_exp - // which means exp returned by frexp is f_exp + 1 - frexp(max_value, &exp); - return exp - 1; -} - void scaleBlobByIdx(const Model& model, const Stage& stage, int index, float scale) { const auto& original = stage->input(index); IE_ASSERT(original->usage() == DataUsage::Fake || original->usage() == DataUsage::Const); @@ -231,11 +207,8 @@ public: void PassImpl::run(const Model& model) { VPU_PROFILE(analyzeWeightableLayers); - static const int scaleToExp = 8; // get from config? static const int scaleThreshold = 1; - auto& stats = model->nodesStats(); - bool isGrowingOutput = checkGrowingOutput(model); bool firstStage = true; @@ -267,16 +240,13 @@ void PassImpl::run(const Model& model) { auto meanExp = getMeanValue(exponents); shift = std::min(-meanExp, shift); - if (stats.empty()) { + { if (firstStage && shift < 4 && isGrowingOutput && weights->desc().dim(Dim::C) > 1) { normalVal = 5; } shift = correctShift(shift, firstStage, stage->origLayer()->type); shift -= normalVal; - } else { - int outExp = maxOutputExponent(stage->origLayer()->name, stats); // what if outExp == 15? - shift = std::min(scaleToExp - outExp, shift); } firstStage = false; diff --git a/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp b/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp index b00c95b..ccf1841 100644 --- a/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp @@ -34,7 +34,6 @@ IE_SUPPRESS_DEPRECATED_START VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), VPU_CONFIG_KEY(HW_EXTRA_SPLIT), VPU_CONFIG_KEY(CUSTOM_LAYERS), - VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), VPU_CONFIG_KEY(INPUT_NORM), VPU_CONFIG_KEY(INPUT_BIAS), @@ -160,7 +159,6 @@ void ParsedConfig::parse(const std::map& config) { setOption(_compileConfig.hwExtraSplit, switches, config, VPU_CONFIG_KEY(HW_EXTRA_SPLIT)); setOption(_compileConfig.injectSwOps, switches, config, VPU_CONFIG_KEY(HW_INJECT_STAGES)); setOption(_compileConfig.mergeHwPoolToConv, switches, config, VPU_CONFIG_KEY(HW_POOL_CONV_MERGE)); - setOption(_compileConfig.ignoreIRStatistic, switches, config, VPU_CONFIG_KEY(IGNORE_IR_STATISTIC)); setOption(_compileConfig.hwDilation, switches, config, VPU_CONFIG_KEY(HW_DILATION)); setOption(_compileConfig.forceDeprecatedCnnConversion, switches, config, VPU_CONFIG_KEY(FORCE_DEPRECATED_CNN_CONVERSION)); setOption(_compileConfig.disableReorder, switches, config, VPU_CONFIG_KEY(DISABLE_REORDER)); diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_metrics.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_metrics.cpp index 3a363f2..a3a1ab7 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_metrics.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_metrics.cpp @@ -33,7 +33,6 @@ IE_SUPPRESS_DEPRECATED_START KEY_LOG_LEVEL, KEY_VPU_PRINT_RECEIVE_TENSOR_TIME, KEY_VPU_CUSTOM_LAYERS, - KEY_VPU_IGNORE_IR_STATISTIC, KEY_VPU_MYRIAD_FORCE_RESET, KEY_VPU_MYRIAD_PLATFORM, KEY_EXCLUSIVE_ASYNC_REQUESTS, diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp index 1d30a75..2b1f7b9 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp @@ -112,7 +112,6 @@ Engine::Engine(std::shared_ptr mvnc) : { KEY_LOG_LEVEL, "LOG_NONE" }, { KEY_VPU_PRINT_RECEIVE_TENSOR_TIME, "OFF" }, { KEY_VPU_CUSTOM_LAYERS, "" }, - { KEY_VPU_IGNORE_IR_STATISTIC, "OFF" }, { KEY_VPU_MYRIAD_FORCE_RESET, "OFF" }, { KEY_VPU_MYRIAD_PLATFORM, "" }, { KEY_EXCLUSIVE_ASYNC_REQUESTS, "OFF" }, diff --git a/inference-engine/tests/functional/inference_engine/cnn_network/cnn_ngraph_impl_tests.cpp b/inference-engine/tests/functional/inference_engine/cnn_network/cnn_ngraph_impl_tests.cpp index fce269a..3ef0137 100644 --- a/inference-engine/tests/functional/inference_engine/cnn_network/cnn_ngraph_impl_tests.cpp +++ b/inference-engine/tests/functional/inference_engine/cnn_network/cnn_ngraph_impl_tests.cpp @@ -675,9 +675,6 @@ TEST(CNNNGraphImplTests, TestCheckStats) { } InferenceEngine::details::CNNNetworkNGraphImpl cnnNet(ngraph); - InferenceEngine::ICNNNetworkStats* _stats = nullptr; - ASSERT_EQ(NOT_FOUND, cnnNet.getStats(&_stats, nullptr)); - ASSERT_EQ(nullptr, _stats); } IE_SUPPRESS_DEPRECATED_END diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/config.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/config.cpp index d0cb33f..de90651 100644 --- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/config.cpp +++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/config.cpp @@ -14,9 +14,6 @@ namespace { }; const std::vector> Configs = { - {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(YES)}}, - {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(NO)}}, - {{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(YES)}}, {{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(NO)}}, @@ -64,9 +61,6 @@ namespace { {{VPU_MYRIAD_CONFIG_KEY(PROTOCOL), "BLUETOOTH"}}, {{VPU_MYRIAD_CONFIG_KEY(PROTOCOL), "LAN"}}, - {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "ON"}}, - {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "OFF"}}, - {{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), "ON"}}, {{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), "OFF"}}, @@ -89,8 +83,6 @@ namespace { {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD}, {CONFIG_KEY(LOG_LEVEL), "VERBOSE"}}, {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD}, - {VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "ON"}}, - {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD}, {VPU_MYRIAD_CONFIG_KEY(PLATFORM), "-1"}}, {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD}, {VPU_MYRIAD_CONFIG_KEY(PLATFORM), "0"}}, diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/infer_request_config.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/infer_request_config.cpp index b72a324..c31a35b 100644 --- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/infer_request_config.cpp +++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/infer_request_config.cpp @@ -21,8 +21,6 @@ namespace { const std::vector> Inconfigs = { {}, - {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(YES)}}, - {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(NO)}}, {{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(YES)}}, {{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(NO)}}, diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.cpp b/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.cpp index cc769c5..a3414a9 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.cpp +++ b/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.cpp @@ -209,14 +209,4 @@ void get_common_dims(const InferenceEngine::Blob &blob, } } -void fillStatistic(Statistic &out, size_t size, float min, float max) { - float ampl = (max - min) / 4.f; - float center1 = min + ampl; - float center2 = max - ampl; - out.min.resize(size); - out.max.resize(size); - CommonTestUtils::fill_data_sine(out.min.data(), size, center1, ampl, 1); - CommonTestUtils::fill_data_sine(out.max.data(), size, center2, ampl, 1); -} - } // namespace CommonTestUtils diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.hpp index 6dc7473..54f6583 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.hpp +++ b/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.hpp @@ -59,33 +59,6 @@ struct def_conv_common_params : conv_common_params { size_t deformable_group; }; -struct Statistic { - std::vector min; - std::vector max; - - bool empty() const { - return min.empty() || max.empty(); - } - - std::string serialize_min() const { - return serialize(min); - } - - std::string serialize_max() const { - return serialize(max); - } - -protected: - std::string serialize(const std::vector &in) const { - if (in.empty()) - return ""; - std::string out = std::to_string(in[0lu]); - for (size_t i = 1lu; i < in.size(); i++) - out += ", " + std::to_string(in[i]); - return out; - } -}; - void getConvOutShape(const std::vector &inShape, const conv_common_params ¶ms, std::vector &outShape); @@ -123,6 +96,4 @@ void get_common_dims(const InferenceEngine::Blob &blob, int32_t &dimz, int32_t &dimn); -void fillStatistic(Statistic &out, size_t size, float min, float max); - } // namespace CommonTestUtils diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_net_builder.cpp b/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_net_builder.cpp index 2c98d21..fff43fd 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_net_builder.cpp +++ b/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_net_builder.cpp @@ -22,8 +22,8 @@ void IDManager::reset() { portID = layerID = 0; } -LayerDesc::LayerDesc(std::string type, InOutShapes &shapes, IDManager &id_manager, const Statistic &statistic) - : _type(std::move(type)), _statistic(statistic) { +LayerDesc::LayerDesc(std::string type, InOutShapes &shapes, IDManager &id_manager) + : _type(std::move(type)) { _layerID = id_manager.getNextLayerID(); auto inDims = shapes.inDims; auto outDims = shapes.outDims; diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_net_builder.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_net_builder.hpp index 6a86195..6de59a7 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_net_builder.hpp +++ b/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_net_builder.hpp @@ -4,8 +4,6 @@ #pragma once -#include "xml_father.hpp" -#include "common_test_utils/common_layers_params.hpp" #include #include @@ -13,6 +11,9 @@ #include #include +#include "xml_father.hpp" +#include "common_test_utils/common_layers_params.hpp" + namespace CommonTestUtils { struct CropData { @@ -131,7 +132,6 @@ class LayerDesc { std::vector _inPortsID; std::vector _outPortsID; std::string _type; - Statistic _statistic; public: using Ptr = std::shared_ptr; @@ -141,7 +141,7 @@ public: * @param type - string with type of the layer * @param shapes - reference to the structure with input and output shapes */ - explicit LayerDesc(std::string type, InOutShapes &shapes, IDManager &id_manager, const Statistic &statistic); + explicit LayerDesc(std::string type, InOutShapes &shapes, IDManager &id_manager); /** * @brief Resets current input and output ports to iterate over all input and output ports @@ -179,10 +179,6 @@ public: * @brief Returns number of outputs */ size_t getOutputsSize() const; - - const Statistic &getStatistic() { - return _statistic; - } }; @@ -235,13 +231,12 @@ public: static XmlNetBuilder buildNetworkWithOneInput( std::string name = "AlexNet", std::vector dims = {1, 3, 227, 227}, - std::string precision = "Q78", - const Statistic &statistic = {}) { + std::string precision = "Q78") { std::shared_ptr root = std::make_shared(); auto &exp = root->node("net").attr("name", name).attr("precision", precision).attr("version", Version); auto &expFinal = exp.attr("batch", 1); - return XmlNetBuilder(root, expFinal.node("layers")).addInputLayer(precision, dims, statistic); + return XmlNetBuilder(root, expFinal.node("layers")).addInputLayer(precision, dims); } static XmlNetBuilder buildBody() { @@ -273,7 +268,6 @@ public: const std::string &precision, const InOutShapes &inout, const conv_common_params &conv_params = {}, - const Statistic &statistic = {}, const std::string &name = "") { std::map params; if (Version == 2) { @@ -317,15 +311,13 @@ public: } int weights_size = getConvWeightsSize(inout.inDims[0], conv_params, precision); int biases_size = getConvBiasesSize(conv_params, precision); - return addLayer("Convolution", precision, ¶ms, inout, weights_size, biases_size, "convolution_data", "", - statistic, name); + return addLayer("Convolution", precision, ¶ms, inout, weights_size, biases_size, "convolution_data", "", name); } XmlNetBuilder &poolingLayer( const std::string &precision, const InOutShapes &inout, const pool_common_params &pool_params = {}, - const Statistic &statistics = {}, const std::string &name = "") { std::map params; if (Version == 2) { @@ -366,7 +358,7 @@ public: else params["exclude-pad"] = "false"; } - return addLayer("Pooling", precision, ¶ms, inout, 0, 0, "pooling_data", "", statistics, name); + return addLayer("Pooling", precision, ¶ms, inout, 0, 0, "pooling_data", "", name); } struct TIPortMap { @@ -416,7 +408,7 @@ public: std::map *params, InOutShapes inout, const std::string &name) { - return addLayer(type, precision, params, inout, 0, 0, "data", "", {}, name); + return addLayer(type, precision, params, inout, 0, 0, "data", "", name); } XmlNetBuilder &addLayer( @@ -426,7 +418,7 @@ public: InOutShapes inout, int weightsSize, const std::string &name) { - return addLayer(type, precision, params, inout, weightsSize, 0, "data", "", {}, name); + return addLayer(type, precision, params, inout, weightsSize, 0, "data", "", name); } XmlNetBuilder &addLayer(const std::string &type, @@ -437,10 +429,9 @@ public: int biasesSize = 0, std::string layerDataName = "data", std::string content = "", - const Statistic &statistic = {}, const std::string &name = "") { layersNum++; - auto layerDesc = std::make_shared(type, inout, id_manager, statistic); + auto layerDesc = std::make_shared(type, inout, id_manager); layersDesc.push_back(layerDesc); auto &layer = xml.node("layer").attr("name", name.empty() ? layerDesc->getLayerName() : name).attr("precision", @@ -474,11 +465,10 @@ public: } XmlNetBuilder &addInputLayer(const std::string &precision, - const std::vector &out, - const Statistic &statistic = {}) { + const std::vector &out) { InOutShapes inout{}; inout.outDims.push_back(out); - return addLayer("Input", precision, nullptr, inout, 0, 0, "data", "", statistic); + return addLayer("Input", precision, nullptr, inout, 0, 0, "data", ""); } std::string finish(std::vector> *edges) { @@ -495,14 +485,12 @@ public: } // node_edges.close(); - addStatistic(node_edges.close()); return exp; } std::string finish(bool addInputPreProcess = true) { auto &exp = xml.close(); addEdges(exp); - addStatistic(exp); if (addInputPreProcess) { addPreProcess(exp); } @@ -571,32 +559,6 @@ private: } preProcess.close(); } - - template - void addStatistic(T &mainContent) { - bool addStatistic = false; - for (size_t i = 0lu; i < layersDesc.size() - 1lu; i++) { - if (!layersDesc[i]->getStatistic().empty()) { - addStatistic = true; - break; - } - } - if (!addStatistic) - return; - - auto &statistics = mainContent.node("statistics"); - for (size_t i = 0lu; i < layersDesc.size(); i++) { - if (!layersDesc[i]->getStatistic().empty()) { - auto &layer = statistics.node("layer"); - layer - .node("name", layersDesc[i]->getLayerName()) - .node("min", layersDesc[i]->getStatistic().serialize_min()) - .node("max", layersDesc[i]->getStatistic().serialize_max()) - .close(); - } - } - statistics.close(); - } }; typedef XmlNetBuilder<2> V2NetBuilder; diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_icnn_network.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_icnn_network.hpp index ff4585b..174ad03 100644 --- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_icnn_network.hpp +++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_icnn_network.hpp @@ -42,8 +42,6 @@ class MockICNNNetwork : public InferenceEngine::ICNNNetwork { MOCK_QUALIFIED_METHOD1(setBatchSize, noexcept, InferenceEngine::StatusCode(const size_t size)); MOCK_QUALIFIED_METHOD2(setBatchSize, noexcept, InferenceEngine::StatusCode(const size_t size, InferenceEngine::ResponseDesc*)); MOCK_QUALIFIED_METHOD0(getBatchSize, const noexcept, size_t()); - MOCK_QUALIFIED_METHOD2(getStats, const noexcept, InferenceEngine::StatusCode(InferenceEngine::ICNNNetworkStats** /*stats*/, - InferenceEngine::ResponseDesc* /*resp*/)); MOCK_QUALIFIED_METHOD0(Release, noexcept, void()); MOCK_QUALIFIED_METHOD1(getInputShapes, const noexcept, void(InferenceEngine::ICNNNetwork::InputShapes&)); MOCK_QUALIFIED_METHOD2(reshape, noexcept, InferenceEngine::StatusCode(const InferenceEngine::ICNNNetwork::InputShapes &, InferenceEngine::ResponseDesc *)); diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/vpu_test_data.hpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/vpu_test_data.hpp index 9d5e464..0fa2f84 100644 --- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/vpu_test_data.hpp +++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/vpu_test_data.hpp @@ -85,9 +85,6 @@ const std::vector deviceSpecificConfigurations = { }; const std::vector deviceAgnosticConfigurations = { - BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(YES)}}), - BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(NO)}}), - BEH_MYRIAD.withConfig({{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(YES)}}), BEH_MYRIAD.withConfig({{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(NO)}}), @@ -127,9 +124,6 @@ const BehTestParams withIncorrectConfValues[] = { BEH_MYRIAD.withConfig({{VPU_MYRIAD_CONFIG_KEY(PROTOCOL), "BLUETOOTH"}}), BEH_MYRIAD.withConfig({{VPU_MYRIAD_CONFIG_KEY(PROTOCOL), "LAN"}}), - BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "ON"}}), - BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "OFF"}}), - BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), "ON"}}), BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), "OFF"}}), @@ -150,8 +144,6 @@ const BehTestParams withIncorrectConfValues[] = { BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"}, {CONFIG_KEY(LOG_LEVEL), "VERBOSE"}}), BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"}, - {VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "ON"}}), - BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"}, {VPU_MYRIAD_CONFIG_KEY(PLATFORM), "-1"}}), BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"}, {VPU_MYRIAD_CONFIG_KEY(PLATFORM), "0"}}), diff --git a/inference-engine/tests_deprecated/functional/ie_tests/include/regression_config.hpp b/inference-engine/tests_deprecated/functional/ie_tests/include/regression_config.hpp index 17b0001..0f30e4d 100644 --- a/inference-engine/tests_deprecated/functional/ie_tests/include/regression_config.hpp +++ b/inference-engine/tests_deprecated/functional/ie_tests/include/regression_config.hpp @@ -139,7 +139,6 @@ struct RegressionConfig { string _device_name; string _firmware; string _tmp_firmware; - string _stat_file; vector labels; double nearValue = 0.0; double nearAvgValue = 0.0; diff --git a/inference-engine/tests_deprecated/functional/ie_tests/include/regression_tests.hpp b/inference-engine/tests_deprecated/functional/ie_tests/include/regression_tests.hpp index fb6e5a9..effe09c 100644 --- a/inference-engine/tests_deprecated/functional/ie_tests/include/regression_tests.hpp +++ b/inference-engine/tests_deprecated/functional/ie_tests/include/regression_tests.hpp @@ -123,7 +123,7 @@ class ModelSelector { } - Model model, statFile; + Model model; RegressionConfig config; EMean isMean = eValues; EPrecision precision = eq78; @@ -151,16 +151,6 @@ class ModelSelector { return path_to_model.str(); } - std::string prepareStatMatching() { - if (statFile.fileName() == "") return ""; - ModelsPath path_to_stat; - path_to_stat << kPathSeparator - << statFile.folderName() << kPathSeparator - << statFile.fileName(); - - return path_to_stat.str(); - } - ModelSelector() = default; std::string getReferenceResultsLabel() { @@ -511,14 +501,12 @@ class ModelSelector { config.referenceOutput.push_back(v); } config._path_to_models = prepareModelMatching(); - config._stat_file = prepareStatMatching(); return M(config); } M to(Blob::Ptr rhs) { config.outputBlob = rhs; config._path_to_models = prepareModelMatching(); - config._stat_file = prepareStatMatching(); return M(config); } @@ -533,7 +521,6 @@ class ModelSelector { } } config._path_to_models = prepareModelMatching(); - config._stat_file = prepareStatMatching(); return M(config); } @@ -548,14 +535,12 @@ class ModelSelector { config.meanRelativeError = meanRelativeError; config.maxRelativeError = maxRelativeError; config._path_to_models = prepareModelMatching(); - config._stat_file = prepareStatMatching(); return M(config); } void equalToReferenceWithDelta(double nearValue) { config.nearValue = nearValue; config._path_to_models = prepareModelMatching(); - config._stat_file = prepareStatMatching(); M(config).to(getReferenceResultsLabel()); } @@ -565,14 +550,12 @@ class ModelSelector { config.referenceOutput.push_back(v); } config._path_to_models = prepareModelMatching(); - config._stat_file = prepareStatMatching(); return M(config, true); } // place holder to run the matcher without providing any reference void possible() { config._path_to_models = prepareModelMatching(); - config._stat_file = prepareStatMatching(); auto tmp = M(config); ASSERT_NO_FATAL_FAILURE(tmp.match()); } diff --git a/inference-engine/tests_deprecated/functional/ie_tests/src/classification_matcher.cpp b/inference-engine/tests_deprecated/functional/ie_tests/src/classification_matcher.cpp index a99446e..e27b1f3 100644 --- a/inference-engine/tests_deprecated/functional/ie_tests/src/classification_matcher.cpp +++ b/inference-engine/tests_deprecated/functional/ie_tests/src/classification_matcher.cpp @@ -22,23 +22,6 @@ ClassificationMatcher::ClassificationMatcher(RegressionConfig &config) // Try to read labels file readLabels(labelFileName); - if (config._stat_file != "") { - InferenceEngine::NetworkStatsMap stat = testing::loadStatisticFromFile(config._stat_file); - - ICNNNetworkStats *pstats; - ((ICNNNetwork&)cnnNetwork).getStats(&pstats, nullptr); - pstats->setNodesStats(stat); - - // iterating over layers and fixing suppress_normalization->quantization_level - // because we have in tests IR which has old name for fp32 layers - for (auto& layer : cnnNetwork) { - if (layer->params.find("suppress_normalization") != layer->params.end() && - layer->params["suppress_normalization"] == "I8") { - layer->params["quantization_level"] = "FP32"; - } - } - } - if (config._reshape) { auto inputShapes = cnnNetwork.getInputShapes(); inputShapes.begin()->second[0] = config.batchSize; diff --git a/inference-engine/tests_deprecated/functional/ie_tests/src/object_detection_matcher.cpp b/inference-engine/tests_deprecated/functional/ie_tests/src/object_detection_matcher.cpp index 0a794cb..32e57a6 100644 --- a/inference-engine/tests_deprecated/functional/ie_tests/src/object_detection_matcher.cpp +++ b/inference-engine/tests_deprecated/functional/ie_tests/src/object_detection_matcher.cpp @@ -182,25 +182,6 @@ void ObjectDetectionMatcher::match(const ScoreFunction& score_function) { string binFileName = testing::FileUtils::fileNameNoExt(config._path_to_models) + ".bin"; auto cnnNetwork = config.ie_core->ReadNetwork(config._path_to_models, binFileName); - if (!config._stat_file.empty()) { - InferenceEngine::NetworkStatsMap stat = testing::loadStatisticFromFile(config._stat_file); - - IE_SUPPRESS_DEPRECATED_START - ICNNNetworkStats *pstats; - ((ICNNNetwork&)cnnNetwork).getStats(&pstats, nullptr); - pstats->setNodesStats(stat); - - // iterating over layers and fixing suppress_normalization->quantization_level - // because we have in tests IR which has old name for fp32 layers - for (auto layer : cnnNetwork) { - if (layer->params.find("suppress_normalization") != layer->params.end() && - layer->params["suppress_normalization"] == "I8") { - layer->params["quantization_level"] = "FP32"; - } - } - IE_SUPPRESS_DEPRECATED_END - } - if (config._reshape) { auto inputShapes = cnnNetwork.getInputShapes(); for (auto & shape : inputShapes) { diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests_int8.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests_int8.cpp index b4c53ae..7a15862 100644 --- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests_int8.cpp +++ b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests_int8.cpp @@ -8,11 +8,8 @@ #include "tests_common.hpp" #include "single_layer_common.hpp" -#include - #include -#include "network_stats.h" #include #include "common_test_utils/data_utils.hpp" @@ -209,25 +206,6 @@ class smoke_ConvolutionInt8Test: public TestsCommon, protected: const char* DEFAULT_PATH_P = "./lib"; - std::map collectStatistics(const void *model, size_t size, const InferenceEngine::TBlob::Ptr &weights, const std::vector outputNodes, const std::vector images) { - InferenceEngine::Core ie; - - std::shared_ptr netStats = std::shared_ptr(new NetworkStatsCollector(ie, "CPU")); - - size_t batchSize = images.size(); - - std::cout << "Batch size: " << batchSize << std::endl; - - std::map netNodesStats; - - netStats->ReadNetworkAndSetWeights(model, size, weights, batchSize); - - std::cout << "Inferencing and collecting statistics..." << std::endl; - netStats->InferAndCollectStats(images, netNodesStats); - - return netNodesStats; - } - static void compare_NRMSD(InferenceEngine::Blob &res, InferenceEngine::Blob &ref, float max_nrmsd = 0.01f) { float *res_ptr = res.buffer().as(); size_t res_size = res.size(); @@ -291,7 +269,6 @@ protected: // TODO Load nodes stats from file std::string imageFilename = TestDataHelpers::get_data_path() + "/validation_set/224x224/dog.bmp"; std::cout << "Using image file: " << imageFilename << std::endl; - std::map netNodesStats = collectStatistics(model.data(), model.length(), weights_ptr, { "conv1" }, { imageFilename }); Core ie; auto network = ie.ReadNetwork(model, weights_ptr); @@ -307,10 +284,6 @@ protected: CNNNetwork myNetwork = ie.ReadNetwork(model, weights_ptr); - ICNNNetworkStats* pstats; - ((ICNNNetwork&)myNetwork).getStats(&pstats, nullptr); - pstats->setNodesStats(netNodesStats); - SizeVector dims_src = {p.in.w, p.in.h, p.in.c, diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/network_stats.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/network_stats.cpp deleted file mode 100644 index 35d7557..0000000 --- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/network_stats.cpp +++ /dev/null @@ -1,396 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include - -#include - -#include - -#include "network_stats.h" -#include - -using namespace InferenceEngine; - -class DataStats { -public: - template - static void GetDataMinMax(const T* data, size_t count, T& min, T& max); - - template - static void GetDataAverage(const T* data, size_t count, T& ave); - - template - static void GetDataAbsMax(const T* data, size_t count, T& max); - - template - static T GetAbsMax(T min, T max); -}; - -template -void DataStats::GetDataMinMax(const T* data, size_t count, T& min, T& max) { - for (size_t i = 0; i < count; i++) { - T val = data[i]; - - if (min > val) { - min = val; - } - - if (max < val) { - max = val; - } - } -} - -template -void DataStats::GetDataAbsMax(const T* data, size_t count, T& max) { - T min = FLT_MAX; - - GetDataMinMax(data, count, min, max); - - max = GetAbsMax(min, max); -} - -template void DataStats::GetDataMinMax(const float* data, size_t count, float& min, float& max); -template void DataStats::GetDataMinMax(const uint8_t* data, size_t count, uint8_t& min, uint8_t& max); - -template void DataStats::GetDataAbsMax(const float* data, size_t count, float& max); - -template -void DataStats::GetDataAverage(const T* data, size_t count, T& ave) { - ave = 0; - - for (size_t i = 0; i < count; i++) { - ave += data[i]; - } - - ave /= count; -} - -template void DataStats::GetDataAverage(const float* data, size_t count, float& ave); - -template -T DataStats::GetAbsMax(T min, T max) { - if (min < 0) { - min *= -1; - } - - if (max < 0) { - max *= -1; - } - - return (max > min) ? max : min; -} - -template float DataStats::GetAbsMax(float min, float max); - - -CNNLayerPtr NetworkStatsCollector::addScaleShiftBeforeLayer(std::string name, CNNLayer::Ptr beforeLayer, size_t port, std::vector scale) { - if (beforeLayer->insData.size() < port) { - THROW_IE_EXCEPTION << "cannot find appropraite port for addScaleShiftBeforeLayer"; - } - - DataPtr pData = beforeLayer->insData[port].lock(); - LayerParams params; - params.name = name; - params.precision = Precision::FP32; - params.type = "ScaleShift"; - CNNLayerPtr lptr = std::make_shared(params); - ScaleShiftLayer *pScaleShift = dynamic_cast(lptr.get()); - - IE_ASSERT(4 == pData->getDims().size()); - std::size_t num_chanels = pData->getDims().at(1); - SizeVector wdims({ num_chanels }); - - if (scale.size() == 1) { - scale.resize(wdims[0]); - for (int i = 1; i < wdims[0]; i++) { - scale[i] = scale[0]; - } - } - - if (scale.size() != num_chanels) { - THROW_IE_EXCEPTION << "Failed to add scaleshift before " << beforeLayer->name << " due to scales and layer output dims incossitency"; - } - - Blob::Ptr weights = nullptr; - weights = make_shared_blob({Precision::FP32, wdims, Layout::C}); - weights->allocate(); - float *buffer = weights->buffer().as(); - for (size_t i = 0; i < num_chanels; i++) { - buffer[i] = scale[i]; - } - pScaleShift->_weights = weights; - - - SizeVector bdims({ num_chanels }); - Blob::Ptr biases = nullptr; - biases = make_shared_blob({Precision::FP32, bdims, Layout::C}); - biases->allocate(); - buffer = biases->buffer().as(); - for (size_t i = 0; i < num_chanels; i++) { - buffer[i] = 0.f; - } - pScaleShift->_biases = biases; - - Data *edge2 = new Data(*pData.get()); - DataPtr newEdge(edge2); - lptr->insData.push_back(pData); - lptr->outData.push_back(newEdge); - newEdge->setName(/*"EdgeAfter_" +*/ params.name); - newEdge->getCreatorLayer() = lptr; - newEdge->getInputTo().clear(); - newEdge->getInputTo()[beforeLayer->name] = beforeLayer; - - pData->getInputTo().erase(beforeLayer->name); - pData->getInputTo()[params.name] = lptr; - - for (size_t i = 0; i < beforeLayer->insData.size(); i++) { - DataPtr d = beforeLayer->insData[i].lock(); - if (d == pData) { - beforeLayer->insData[i] = newEdge; - break; - } - } - return lptr; -} - -NetworkStatsCollector::NetworkStatsCollector(const InferenceEngine::Core & ie, const std::string & deviceName) : - _ie(ie), _deviceName(deviceName) { -} - -NetworkStatsCollector::~NetworkStatsCollector() { -} - -void NetworkStatsCollector::ReadNetworkAndSetWeights(const void *model, size_t size, const InferenceEngine::TBlob::Ptr &weights, size_t batch) { - /** Reading network model **/ - _network = _ie.ReadNetwork((const char*)model, weights); - _network.setBatchSize(batch); -} - -std::string FileNameNoExt(const std::string& filePath) { - auto pos = filePath.rfind('.'); - - if (pos == std::string::npos) { - return filePath; - } - - return filePath.substr(0, pos); -} - -void NetworkStatsCollector::LoadNetwork(const std::string& modelPath, size_t batch) { - /** Reading network model **/ - _network = _ie.ReadNetwork(modelPath); - _network.setBatchSize(batch); -} - -void NetworkStatsCollector::InferAndCollectStats(const std::vector& images, - std::map& netNodesStats) { - slog::info << "Collecting statistics for layers:" << slog::endl; - - std::vector layersAfterInputs; - - std::string hackPrefix = "scaleshifted_input:"; - - std::map inputsFromLayers; - for (auto&& layer : _network) { - if (layer->insData.size() > 0) { - std::string inName = layer->input()->getName(); - for (auto&& input : _network.getInputsInfo()) { - if (inName == input.first) { - layersAfterInputs.push_back(layer); - inputsFromLayers[hackPrefix + layer->name] = inName; - } - } - } - } - - for (auto&& layer : layersAfterInputs) { - std::string firstInputName = hackPrefix + layer->name; - auto scaleShiftLayer = addScaleShiftBeforeLayer(firstInputName, layer, 0, { 1.f }); - ((ICNNNetwork&)_network).addLayer(scaleShiftLayer); - } - - // Adding output to every layer - for (auto&& layer : _network) { - slog::info << "\t" << layer->name << slog::endl; - - std::string layerType = _network.getLayerByName(layer->name.c_str())->type; - if (/*layerType != "Split" &&*/ layerType != "Input") { - _network.addOutput(layer->name); - } - } - - NetworkNodeStatsPtr nodeStats; - - const size_t batchSize = _network.getBatchSize(); - - std::vector imageNames; - - size_t rounded = images.size() - images.size() % batchSize; - - auto executable_network = _ie.LoadNetwork(_network, _deviceName); - - std::map> min_outputs, max_outputs; - - for (size_t i = 0; i < rounded; i += batchSize) { - slog::info << "Inferring image " << i+1 << " of " << rounded << slog::endl; - - imageNames.clear(); - - for (size_t img = 0; img < batchSize; img++) { - imageNames.push_back(images[i + img]); - } - - - /** Taking information about all topology inputs **/ - InputsDataMap inputInfo(_network.getInputsInfo()); - - if (inputInfo.size() != 1) throw std::logic_error("Sample supports topologies only with 1 input"); - auto inputInfoItem = *inputInfo.begin(); - - /** Specifying the precision of input data provided by the user. - * This should be called before load of the network to the device **/ - inputInfoItem.second->setPrecision(Precision::FP32); - inputInfoItem.second->setLayout(Layout::NCHW); - - std::vector> imagesData; - for (auto & i : imageNames) { - FormatReader::ReaderPtr reader(i.c_str()); - if (reader.get() == nullptr) { - slog::warn << "Image " + i + " cannot be read!" << slog::endl; - continue; - } - /** Store image data **/ - auto data_dims = inputInfoItem.second->getTensorDesc().getDims(); - std::shared_ptr data(reader->getData(data_dims.back(), data_dims.at(data_dims.size() - 2))); - if (data.get() != nullptr) { - imagesData.push_back(data); - } - } - if (imagesData.empty()) throw std::logic_error("Valid input images were not found!"); - - OutputsDataMap outputInfo(_network.getOutputsInfo()); - for (auto itOut : outputInfo) { - itOut.second->setPrecision(Precision::FP32); - } - - auto infer_request = executable_network.CreateInferRequest(); - - // -------------------------------Set input data---------------------------------------------------- - /** Iterate over all the input blobs **/ - - /** Creating input blob **/ - Blob::Ptr input = infer_request.GetBlob(inputInfoItem.first); - if (!input) { - throw std::logic_error("Invalid input blob " + inputInfoItem.first + " pointer"); - } - - /** Filling input tensor with images. First b channel, then g and r channels **/ - auto input_dims = input->getTensorDesc().getDims(); - size_t num_chanels = input_dims.at(1); - size_t image_size = input_dims.at(input_dims.size() - 2) * input_dims.back(); - - auto data = input->buffer().as::value_type*>(); - - /** Iterate over all input images **/ - for (size_t image_id = 0; image_id < imagesData.size(); ++image_id) { - /** Iterate over all pixel in image (b,g,r) **/ - for (size_t pid = 0; pid < image_size; pid++) { - /** Iterate over all channels **/ - for (size_t ch = 0; ch < num_chanels; ++ch) { - /** [images stride + channels stride + pixel id ] all in bytes **/ - data[image_id * image_size * num_chanels + ch * image_size + pid ] = imagesData.at(image_id).get()[pid*num_chanels + ch]; - } - } - } - - infer_request.Infer(); - - - for (auto itOut : outputInfo) { - auto outBlob = infer_request.GetBlob(itOut.first); - - std::string outName = itOut.first; - if (inputsFromLayers.find(itOut.first) != inputsFromLayers.end()) { - outName = inputsFromLayers[itOut.first]; - } - - size_t N, C, statCount; - auto output_dims = outBlob->getTensorDesc().getDims(); - if (output_dims.size() == 4 && outBlob->getTensorDesc().getLayout() == Layout::NCHW) { - N = output_dims[0]; - C = output_dims[1]; - statCount = C; - } else if (output_dims.size() == 2 && outBlob->getTensorDesc().getLayout() == Layout::NC) { - N = output_dims[0]; - C = output_dims[1]; - statCount = 1; - } else { - slog::warn << "Only NCHW and NC layouts are supported. Skipping layer \"" << outName << "\"" << slog::endl; - continue; - } - - - if (netNodesStats.find(outName) == netNodesStats.end()) { - nodeStats = NetworkNodeStatsPtr(new NetworkNodeStats(statCount)); - - netNodesStats[outName] = nodeStats; - } else { - nodeStats = netNodesStats[outName]; - } - - // Counting min/max outputs per channel - for (size_t n = 0; n < N; n++) { - if (output_dims.size() == 4) { - size_t _HW = output_dims.back() * output_dims.at(output_dims.size() - 2); - for (size_t c = 0; c < C; c++) { - if (outBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) { - float* ptr = &outBlob->buffer().as()[(n * C + c) * _HW]; - - float min = nodeStats->_minOutputs[c]; - float max = nodeStats->_maxOutputs[c]; - DataStats::GetDataMinMax(ptr, _HW, min, max); - nodeStats->_minOutputs[c] = min; - nodeStats->_maxOutputs[c] = max; - } else if (outBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::U8) { - uint8_t* ptr = &outBlob->buffer().as()[(n * C + c) * _HW]; - - uint8_t min = nodeStats->_minOutputs[c]; - uint8_t max = nodeStats->_maxOutputs[c]; - DataStats::GetDataMinMax(ptr, _HW, min, max); - nodeStats->_minOutputs[c] = min; - nodeStats->_maxOutputs[c] = max; - } else { - throw std::logic_error(std::string("Unsupported precision: ") + outBlob->getTensorDesc().getPrecision().name()); - } - } - } else if (output_dims.size() == 2) { - if (outBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) { - float* ptr = &outBlob->buffer().as()[n * C]; - - float min = nodeStats->_minOutputs[0]; - float max = nodeStats->_maxOutputs[0]; - DataStats::GetDataMinMax(ptr, C, min, max); - nodeStats->_minOutputs[0] = min; - nodeStats->_maxOutputs[0] = max; - } else if (outBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::U8) { - uint8_t* ptr = &outBlob->buffer().as()[n * C]; - - uint8_t min = nodeStats->_minOutputs[0]; - uint8_t max = nodeStats->_maxOutputs[0]; - DataStats::GetDataMinMax(ptr, C, min, max); - nodeStats->_minOutputs[0] = min; - nodeStats->_maxOutputs[0] = max; - } else { - throw std::logic_error(std::string("Unsupported precision: ") + outBlob->getTensorDesc().getPrecision().name()); - } - } - } - } - } -} \ No newline at end of file diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/network_stats.h b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/network_stats.h deleted file mode 100644 index 03a91f8..0000000 --- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/network_stats.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include - -#include -#include - -class NetworkStatsCollector { -public: - NetworkStatsCollector(const InferenceEngine::Core & ie, const std::string & deviceName); - ~NetworkStatsCollector(); - -public: - void ReadNetworkAndSetWeights(const void *model, size_t size, const InferenceEngine::TBlob::Ptr &weights, size_t batch); - void LoadNetwork(const std::string& modelPath, size_t batch); - - void InferAndCollectStats(const std::vector& images, - std::map& netNodesStats); - -/* void InferAndCollectHistogram(const std::vector& images, - const std::vector& layerNames, - std::map& netNodesStats); - - void InferAndFindOptimalThreshold(const std::vector& images, - const std::vector& layerNames, - std::map& netNodesStats); - - void CalculateThreshold(std::map& netNodesStats);*/ - - void CalculatePotentialMax(const float* weights, const InferenceEngine::SizeVector& weightDism, float& max); - static InferenceEngine::CNNLayerPtr addScaleShiftBeforeLayer(std::string name, InferenceEngine::CNNLayer::Ptr beforeLayer, - size_t port, std::vector scale); - -private: - InferenceEngine::Core _ie; - InferenceEngine::CNNNetwork _network; - std::string _deviceName; -}; diff --git a/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp b/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp index f4ff8e9..41ad505 100644 --- a/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp +++ b/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp @@ -51,7 +51,6 @@ struct network_params { std::string deviceName; std::string modelFile; std::string imageName; - std::string statFile; std::vector> refValue; // optional config (used for multi-device) std::map config; @@ -77,13 +76,6 @@ struct network_params { result += imageName; return result; } - - std::string stat() { - ModelsPath result; - result += kPathSeparator; - result += statFile; - return result; - } }; static LayerTransformation::Params createParam() { @@ -354,13 +346,6 @@ protected: network.setBatchSize(batch_size); ie.SetConfig(p.config); - if (p.statFile != "") { - InferenceEngine::NetworkStatsMap stat = testing::loadStatisticFromFile(p.stat()); - - ICNNNetworkStats *pstats; - ((ICNNNetwork&)network).getStats(&pstats, nullptr); - pstats->setNodesStats(stat); - } if (transformationsParams.transformationsInTestEnabled) { ICNNNetwork& icnnnetwork = network; @@ -527,7 +512,6 @@ protected: "CPU", transformationsParam.modelParams.irFilePath, transformationsParam.modelParams.dataFilePath, - "", referenceValues }; diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/conv_base_test.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/conv_base_test.cpp index a82af43..1e31941 100644 --- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/conv_base_test.cpp +++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/conv_base_test.cpp @@ -76,7 +76,7 @@ std::string ConvolutionBaseTestModel::getModel(SingleLayerTransformationsTestPar &fake_quantize_params, { {weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}} }, "fakeQuantizeOnWeights") - .convolutionLayer(p._network_precision, { convolutionDims, {convOutShape} }, conv, {}, "Convolution"); + .convolutionLayer(p._network_precision, { convolutionDims, {convOutShape} }, conv, "Convolution"); if (addBiasesLayer) { builder.addLayer("Const", p._network_precision, &const_params, { {}, {biasesConvolutionConstDims} }, type_size * conv.out_c, "biasesConst"); diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/precision_selection_multibranch_not_preserved.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/precision_selection_multibranch_not_preserved.cpp index edde971..ff82419 100644 --- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/precision_selection_multibranch_not_preserved.cpp +++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/precision_selection_multibranch_not_preserved.cpp @@ -103,7 +103,7 @@ std::string PrecisionSelectionMultibranchNotPreservedTestModel::getModel(SingleL .convolutionLayer( p._network_precision, { {p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }, - {convOutShape} }, conv, {}, "convolution") + {convOutShape} }, conv, "convolution") // 15 .addLayer("Pooling", p._network_precision, &poolingParams, { {dimensions}, {dimensions} }) .finish(&edges); diff --git a/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_common.hpp b/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_common.hpp index 0a80bad..cd81b3e 100644 --- a/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_common.hpp +++ b/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_common.hpp @@ -56,7 +56,6 @@ extern bool CheckMA2085(); using Batch = int; using DoReshape = bool; using Resources = int; -using IsIgnoreStatistic = bool; using PluginDevicePair = std::pair; //------------------------------------------------------------------------------ diff --git a/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_params.hpp b/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_params.hpp index 4b7b63c..4a7528b 100644 --- a/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_params.hpp +++ b/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_params.hpp @@ -54,12 +54,10 @@ public: std::string model_name, std::string img_name, double reference_delta, - Regression::EMean mean = Regression::EMean::eValues, - bool with_stat_file = false); + Regression::EMean mean = Regression::EMean::eValues); // Accessors inline Regression::EMean mean() const; - inline bool withStatFile() const; // Operations inline std::string name() const override; @@ -72,7 +70,6 @@ public: private: //Data section Regression::EMean mean_; - bool with_stat_file_; }; //------------------------------------------------------------------------------ @@ -140,21 +137,15 @@ inline ClassificationSrcParam::ClassificationSrcParam( std::string model_name, std::string img_name, double reference_delta, - Regression::EMean mean, - bool with_stat_file): + Regression::EMean mean): SourceParameterBase(model_name, img_name, reference_delta), - mean_(mean), - with_stat_file_(with_stat_file) { + mean_(mean) { } inline Regression::EMean ClassificationSrcParam::mean() const { return mean_; } -inline bool ClassificationSrcParam::withStatFile() const { - return with_stat_file_; -} - inline std::string ClassificationSrcParam::name() const { return SourceParameterBase::name() + "_Mean=" + format_mean(mean_); diff --git a/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_classification_case.cpp b/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_classification_case.cpp index 81169ef..3b08936 100644 --- a/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_classification_case.cpp +++ b/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_classification_case.cpp @@ -15,8 +15,7 @@ std::string VpuNoClassificationRegression::getTestCaseName( get<2>(param.param), get<3>(param.param)) + "_SHAVES=" + (get<4>(param.param) == -1 ? "AUTO" : std::to_string(get<4>(param.param))) + - "_IsIgnoreStatistic=" + std::to_string(get<5>(param.param)) + - "_" + get<6>(param.param).name(); + "_" + get<5>(param.param).name(); } void VpuNoClassificationRegression::SetUp() { @@ -28,8 +27,7 @@ void VpuNoClassificationRegression::SetUp() { batch_= get<2>(ClassificationTestVpuParam::GetParam()); do_reshape_= get<3>(ClassificationTestVpuParam::GetParam()); resources_= get<4>(ClassificationTestVpuParam::GetParam()); - is_ignore_statistic_ = get<5>(ClassificationTestVpuParam::GetParam()); - source_param_= get<6>(ClassificationTestVpuParam::GetParam()); + source_param_= get<5>(ClassificationTestVpuParam::GetParam()); InitConfig(); } @@ -41,12 +39,6 @@ void VpuNoClassificationRegression::InitConfig() { config_["VPU_NUMBER_OF_CMX_SLICES"] = std::to_string(resources_); config_["VPU_NUMBER_OF_SHAVES"] = std::to_string(resources_); } - - if (is_ignore_statistic_) { - config_["VPU_IGNORE_IR_STATISTIC"] = CONFIG_VALUE(YES); - } else { - config_["VPU_IGNORE_IR_STATISTIC"] = CONFIG_VALUE(NO); - } } //------------------------------------------------------------------------------ diff --git a/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_classification_case.hpp b/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_classification_case.hpp index 181d3e5..eeb4551 100644 --- a/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_classification_case.hpp +++ b/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_classification_case.hpp @@ -13,7 +13,6 @@ using ClassificationTestVpuParam = WithParamInterface>; using ClassificationSpecificTestVpuParam = WithParamInterface #include "cpp/ie_cnn_network.h" #include -#include "ie_icnn_network_stats.hpp" #include "xml_helper.hpp" #include @@ -92,58 +91,4 @@ std::string XMLHelper::getXmlPath(const std::string & filePath){ return xmlPath; } -InferenceEngine::NetworkStatsMap loadStatisticFromFile(const std::string& xmlPath) { - auto splitParseCommas = [&](const std::string& s) ->std::vector { - std::vector res; - std::stringstream ss(s); - - float val; - - while (ss >> val) { - res.push_back(val); - - if (ss.peek() == ',') - ss.ignore(); - } - - return res; - }; - - InferenceEngine::NetworkStatsMap newNetNodesStats; - - pugi::xml_document doc; - - pugi::xml_parse_result pr = doc.load_file(xmlPath.c_str()); - - - if (!pr) { - THROW_IE_EXCEPTION << "Can't load stat file " << xmlPath; - } - - auto stats = doc.child("stats"); - auto layers = stats.child("layers"); - - InferenceEngine::NetworkNodeStatsPtr nodeStats; - size_t offset; - size_t size; - size_t count; - - IE_SUPPRESS_DEPRECATED_START - - for (auto layer : layers.children("layer")) { - nodeStats = InferenceEngine::NetworkNodeStatsPtr(new InferenceEngine::NetworkNodeStats()); - - std::string name = layer.child("name").text().get(); - - newNetNodesStats[name] = nodeStats; - - nodeStats->_minOutputs = splitParseCommas(layer.child("min").text().get()); - nodeStats->_maxOutputs = splitParseCommas(layer.child("max").text().get()); - } - - IE_SUPPRESS_DEPRECATED_END - - return newNetNodesStats; -} - } diff --git a/inference-engine/tests_deprecated/helpers/xml_helper.hpp b/inference-engine/tests_deprecated/helpers/xml_helper.hpp index 86e93c3..8bd4e93 100644 --- a/inference-engine/tests_deprecated/helpers/xml_helper.hpp +++ b/inference-engine/tests_deprecated/helpers/xml_helper.hpp @@ -14,7 +14,6 @@ #include #include "cpp/ie_cnn_network.h" #include -#include namespace testing { @@ -54,6 +53,4 @@ private: std::shared_ptr _impl; }; -InferenceEngine::NetworkStatsMap loadStatisticFromFile(const std::string& xmlPath); - } diff --git a/inference-engine/tests_deprecated/unit/CMakeLists.txt b/inference-engine/tests_deprecated/unit/CMakeLists.txt index 48afcbb..96e24dc 100644 --- a/inference-engine/tests_deprecated/unit/CMakeLists.txt +++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt @@ -13,17 +13,11 @@ SET (CMAKE_SKIP_RPATH OFF) file(GLOB TEST_SRC + cnn_network/*.cpp graph_tools/*.cpp - http_client/*.cpp inference_engine_tests/*.cpp - inference_engine_tests/cpp_interfaces/*.cpp - inference_engine_tests/normalization/*.cpp - inference_engine_tests/transformations/*.cpp - inference_engine_tests/transformations/*.hpp - cnn_network/*.cpp - topology_verification_tests/*.cpp stress_tests/*.cpp - cpp_api/*.cpp + topology_verification_tests/*.cpp ) if (ENABLE_GNA) diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/dumper_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/dumper_test.cpp index 32d1f12..19f5d4d 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/dumper_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/dumper_test.cpp @@ -7,10 +7,8 @@ #include "mkldnn_graph.h" #include "mkldnn_graph_dumper.h" #include "ie_blob.h" -#include "ie_util_internal.hpp" #include "details/ie_cnn_network_tools.h" #include "common_test_utils/xml_net_builder/xml_net_builder.hpp" -#include "graph_tools.hpp" #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/normalizer/supported_fusions_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/normalizer/supported_fusions_test.cpp index 8a1000d..0de2209 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/normalizer/supported_fusions_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/normalizer/supported_fusions_test.cpp @@ -53,12 +53,10 @@ class ConvSum: public TestsCommon, public ::testing::WithParamInterface max_stat(p.in1[1]); CommonTestUtils::fill_data_sine(min_stat.data(), p.in1[1], -1, 1, 1); CommonTestUtils::fill_data_sine(max_stat.data(), p.in1[1], 1, 1, -1); - CommonTestUtils::Statistic in_stat = {min_stat, max_stat}; std::vector conv_min_stat(convOutShape[1]); std::vector conv_max_stat(convOutShape[1]); CommonTestUtils::fill_data_sine(conv_min_stat.data(), convOutShape[1], -1, 1, 1); CommonTestUtils::fill_data_sine(conv_max_stat.data(), convOutShape[1], 1, 1, -1); - CommonTestUtils::Statistic conv_stat = {conv_min_stat, conv_max_stat}; std::map elt_params = { {"operation", "sum"} @@ -66,10 +64,10 @@ class ConvSum: public TestsCommon, public ::testing::WithParamInterface> edges = { {"0,0", "2,2"}, {"2,3", "3,4"}, {"1,1", "3,5"} }; return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput( - "Fusion_conv_sum", p.in1, precision, in_stat) - .addInputLayer(precision, convOutShape, in_stat) - .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv, conv_stat) - .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "", conv_stat) + "Fusion_conv_sum", p.in1, precision) + .addInputLayer(precision, convOutShape) + .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv) + .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "") .finish(&edges); } @@ -114,12 +112,10 @@ class ConvSumReLU: public TestsCommon, public ::testing::WithParamInterface max_stat(p.in1[1]); CommonTestUtils::fill_data_sine(min_stat.data(), p.in1[1], -1, 1, 1); CommonTestUtils::fill_data_sine(max_stat.data(), p.in1[1], 1, 1, -1); - CommonTestUtils::Statistic in_stat = {min_stat, max_stat}; std::vector conv_min_stat(convOutShape[1]); std::vector conv_max_stat(convOutShape[1]); CommonTestUtils::fill_data_sine(conv_min_stat.data(), convOutShape[1], -1, 1, 1); CommonTestUtils::fill_data_sine(conv_max_stat.data(), convOutShape[1], 1, 1, -1); - CommonTestUtils::Statistic conv_stat = {conv_min_stat, conv_max_stat}; std::map elt_params = { {"operation", "sum"} @@ -127,11 +123,11 @@ class ConvSumReLU: public TestsCommon, public ::testing::WithParamInterface relu_params = {}; std::vector> edges = { {"0,0", "2,2"}, {"2,3", "3,4"}, {"1,1", "3,5"}, {"3,6", "4,7"} }; return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput( - "Fusion_conv_sum", p.in1, precision, in_stat) - .addInputLayer(precision, convOutShape, in_stat) - .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv, conv_stat) - .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "", conv_stat) - .addLayer("ReLU", precision, &relu_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "", conv_stat) + "Fusion_conv_sum", p.in1, precision) + .addInputLayer(precision, convOutShape) + .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv) + .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "") + .addLayer("ReLU", precision, &relu_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "") .finish(&edges); } @@ -179,23 +175,21 @@ class ConvConvSum: public TestsCommon, public ::testing::WithParamInterface max_stat(p.in1[1]); CommonTestUtils::fill_data_sine(min_stat.data(), p.in1[1], -1, 1, 1); CommonTestUtils::fill_data_sine(max_stat.data(), p.in1[1], 1, 1, -1); - CommonTestUtils::Statistic in_stat = {min_stat, max_stat}; std::vector conv_min_stat(convOutShape[1]); std::vector conv_max_stat(convOutShape[1]); CommonTestUtils::fill_data_sine(conv_min_stat.data(), convOutShape[1], -1, 1, 1); CommonTestUtils::fill_data_sine(conv_max_stat.data(), convOutShape[1], 1, 1, -1); - CommonTestUtils::Statistic conv_stat = {conv_min_stat, conv_max_stat}; std::map elt_params = { {"operation", "sum"} }; std::vector> edges = { {"0,0", "2,2"}, {"2,3", "4,6"}, {"1,1", "3,4"}, {"3,5", "4,7"} }; return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput( - "Fusion_conv_sum", p.in1, precision, in_stat) - .addInputLayer(precision, p.in1, in_stat) - .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv, conv_stat) - .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv, conv_stat) - .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "", conv_stat) + "Fusion_conv_sum", p.in1, precision) + .addInputLayer(precision, p.in1) + .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv) + .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv) + .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "") .finish(&edges); } @@ -242,24 +236,18 @@ class ConvConvSumReLU: public TestsCommon, public ::testing::WithParamInterface< getConvOutShape(p.in1, p.conv1, convOutShape1); getConvOutShape(p.in2, p.conv2, convOutShape2); - CommonTestUtils::Statistic in1_stat, in2_stat, conv1_stat, conv2_stat; - fillStatistic(in1_stat, p.in1[1], -2, 2); - fillStatistic(in2_stat, p.in2[1], -2, 2); - fillStatistic(conv1_stat, p.conv1.out_c, -2, 2); - fillStatistic(conv2_stat, p.conv2.out_c, -2, 2); - std::map elt_params = { {"operation", "sum"} }; std::map relu_params = {}; std::vector> edges = { {"0,0", "2,2"}, {"2,3", "4,6"}, {"1,1", "3,4"}, {"3,5", "4,7"}, {"4,8", "5,9"} }; return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput( - "Fusion_conv_sum", p.in1, precision, in1_stat) - .addInputLayer(precision, p.in2, in2_stat) - .convolutionLayer(precision, {{p.in1}, {convOutShape1}}, p.conv1, conv1_stat) - .convolutionLayer(precision, {{p.in2}, {convOutShape2}}, p.conv2, conv2_stat) - .addLayer("Eltwise", precision, &elt_params, {{convOutShape1, convOutShape2}, {convOutShape1}}, 0, 0, "data", "", conv1_stat) - .addLayer("ReLU", precision, &relu_params, {{convOutShape1}, {convOutShape1}}, 0, 0, "data", "", conv1_stat) + "Fusion_conv_sum", p.in1, precision) + .addInputLayer(precision, p.in2) + .convolutionLayer(precision, {{p.in1}, {convOutShape1}}, p.conv1) + .convolutionLayer(precision, {{p.in2}, {convOutShape2}}, p.conv2) + .addLayer("Eltwise", precision, &elt_params, {{convOutShape1, convOutShape2}, {convOutShape1}}, 0, 0, "data", "") + .addLayer("ReLU", precision, &relu_params, {{convOutShape1}, {convOutShape1}}, 0, 0, "data", "") .finish(&edges); } @@ -314,14 +302,6 @@ class ConvConvSumReLUPoolConv: public TestsCommon, public ::testing::WithParamIn getConvOutShape(convOutShape1, p.conv3, convOutShape3); getPoolOutShape(convOutShape1, p.pool, poolOutShape); - CommonTestUtils::Statistic in1_stat, in2_stat, conv1_stat, conv2_stat, conv3_stat, pool_stat; - fillStatistic(in1_stat, p.in1[1], -2.f, 2.f); - fillStatistic(in2_stat, p.in2[1], -2.f, 2.f); - fillStatistic(conv1_stat, p.conv1.out_c, -2.f, 2.f); - fillStatistic(conv2_stat, p.conv2.out_c, -2.f, 2.f); - fillStatistic(conv3_stat, p.conv3.out_c, -2.f, 2.f); - fillStatistic(pool_stat, poolOutShape[1], 0.f, 3.f); - std::map elt_params = { {"operation", "sum"} }; @@ -334,14 +314,14 @@ class ConvConvSumReLUPoolConv: public TestsCommon, public ::testing::WithParamIn {"5,10", "7,13"}, {"4,8", "6,11"} }; return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput( - "Fusion_conv_sum", p.in1, precision, in1_stat) - .addInputLayer(precision, p.in2, in2_stat) - .convolutionLayer(precision, {{p.in1}, {convOutShape1}}, p.conv1, conv1_stat) - .convolutionLayer(precision, {{p.in2}, {convOutShape2}}, p.conv2, conv2_stat) - .addLayer("Eltwise", precision, &elt_params, {{convOutShape1, convOutShape2}, {convOutShape1}}, 0, 0, "data", "", conv1_stat) - .addLayer("ReLU", precision, &relu_params, {{convOutShape1}, {convOutShape1}}, 0, 0, "data", "", pool_stat) - .convolutionLayer(precision, {{convOutShape1}, {convOutShape3}}, p.conv3, conv3_stat) - .addLayer("Pooling", precision, &relu_params, {{convOutShape1}, {poolOutShape}}, 0, 0, "data", "", pool_stat) + "Fusion_conv_sum", p.in1, precision) + .addInputLayer(precision, p.in2) + .convolutionLayer(precision, {{p.in1}, {convOutShape1}}, p.conv1) + .convolutionLayer(precision, {{p.in2}, {convOutShape2}}, p.conv2) + .addLayer("Eltwise", precision, &elt_params, {{convOutShape1, convOutShape2}, {convOutShape1}}, 0, 0, "data", "") + .addLayer("ReLU", precision, &relu_params, {{convOutShape1}, {convOutShape1}}, 0, 0, "data", "") + .convolutionLayer(precision, {{convOutShape1}, {convOutShape3}}, p.conv3) + .addLayer("Pooling", precision, &relu_params, {{convOutShape1}, {poolOutShape}}, 0, 0, "data", "") .finish(&edges); } diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/normalization/latest_in_fuse_test.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/normalization/latest_in_fuse_test.cpp deleted file mode 100644 index bfea14b..0000000 --- a/inference-engine/tests_deprecated/unit/inference_engine_tests/normalization/latest_in_fuse_test.cpp +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include - -#include -#include "tests_common.hpp" -#include "ir_gen_helper.hpp" - -#include "common_test_utils/common_layers_params.hpp" - -using namespace ::testing; -using namespace single_layer_tests; - -struct conv_conv_eltwise_params { - // Formats: NCHW, NCDHW - std::vector in; - - CommonTestUtils::conv_common_params conv; - CommonTestUtils::eltwise_common_params eltwise; -}; - -class NormalizationConvConvEltwiseTests: public TestsCommon, - public WithParamInterface { - std::string layers_t = R"V0G0N( - - - - - __INP_DIMS__ - - - - - __CONV_OUT_DIMS__ - - - - - - - - - - - - __INP_DIMS__ - - - - - __CONV_OUT_DIMS__ - - - - - - - - - - - - __CONV_OUT_DIMS__ - - - __CONV_OUT_DIMS__ - - - - - __CONV_OUT_DIMS__ - - - -)V0G0N"; - - std::string edges_t = R"V0G0N( - - - - -)V0G0N"; - - std::string getModel(conv_conv_eltwise_params p) { - std::string model = layers_t; - - std::string s_dims; - for (auto& dim : p.in) { - s_dims += "\n "; - s_dims += std::to_string(dim) + ""; - } - REPLACE_WITH_STR(model, "__INP_DIMS__", s_dims); - - s_dims = "\n "; - s_dims += std::to_string(p.in[0]) + ""; - s_dims += "\n "; - s_dims += std::to_string(p.conv.out_c) + ""; - int k_len = p.conv.kernel.size(); - for (size_t i = 2; i < p.in.size(); i++) { - size_t inx = k_len - i + 1; - size_t dim = (p.in[i] + 2lu * p.conv.pads_begin[inx] - p.conv.kernel[inx]) / p.conv.stride[inx] + 1lu; - s_dims += "\n "; - s_dims += std::to_string(dim) + ""; - } - REPLACE_WITH_STR(model, "__CONV_OUT_DIMS__", s_dims); - - REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.conv.kernel); - REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.conv.stride); - REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.conv.pads_begin); - REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.conv.pads_end); - REPLACE_WITH_NUM(model, "_GC_", p.conv.group); - REPLACE_WITH_NUM(model, "_OC_", p.conv.out_c); - - model = IRTemplateGenerator::getIRTemplate("Deconvolution_Concat", p.in, "FP32", model, edges_t); - - return model; - } - -protected: - virtual void TearDown() { - } - - virtual void SetUp() { - try { - TestsCommon::SetUp(); - conv_conv_eltwise_params p = ::testing::WithParamInterface::GetParam(); - std::string model = getModel(p); - - InferenceEngine::Core ie; - InferenceEngine::CNNNetwork network; - auto blob = InferenceEngine::make_shared_blob(InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, - {9}, InferenceEngine::Layout::C)); - blob->allocate(); - ASSERT_NO_THROW(network = ie.ReadNetwork(model, blob)); - - int maxSign = 0x7F; - int maxUnsign = 0xFF; - - InferenceEngine::details::CNNStatisticHelper statHelper(network, {}, maxSign, maxUnsign); - auto conv_1 = network.getLayerByName("conv_1"); - auto conv_2 = network.getLayerByName("conv_2"); - auto eltwise = network.getLayerByName("eltwise_block"); - - ASSERT_EQ(eltwise, statHelper.getLatestInFuse(conv_1)); - ASSERT_EQ(conv_2, statHelper.getLatestInFuse(conv_2)); - ASSERT_EQ(eltwise, statHelper.getLatestInFuse(eltwise)); - } catch (const InferenceEngine::details::InferenceEngineException &e) { - FAIL() << e.what(); - } - } -}; - -TEST_P(NormalizationConvConvEltwiseTests, TestsConvConvEltwise) {} - -INSTANTIATE_TEST_CASE_P( - TestsConvConvEltwise, NormalizationConvConvEltwiseTests, - ::testing::Values( - conv_conv_eltwise_params{{1, 16, 4, 4}, - { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 32, true }, - {"sum", {}} }, - conv_conv_eltwise_params{{1, 16, 4, 4, 4}, - { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 32, true }, - {"sum", {}} } - )); diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/util_test.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/util_test.cpp index c12dd81..3fc681e 100644 --- a/inference-engine/tests_deprecated/unit/inference_engine_tests/util_test.cpp +++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/util_test.cpp @@ -182,7 +182,7 @@ TEST(UtilTests, cloneNet) { { auto layer = getLayer(net, "layer1"); - auto cloned = IE::cloneNet({layer}, nullptr); + auto cloned = IE::cloneNet({layer}); EXPECT_EQ(2, cloned->layerCount()); auto clonedLayer = getLayer(cloned, "layer1"); ASSERT_NE(nullptr, clonedLayer); @@ -200,7 +200,7 @@ TEST(UtilTests, cloneNet) { { auto layer1 = getLayer(net, "layer1"); auto layer2 = getLayer(net, "layer2"); - auto cloned = IE::cloneNet({layer1,layer2}, nullptr); + auto cloned = IE::cloneNet({layer1,layer2}); EXPECT_EQ(4, cloned->layerCount()); auto clonedLayer1 = getLayer(cloned, "layer1"); auto clonedLayer2 = getLayer(cloned, "layer2"); @@ -221,7 +221,7 @@ TEST(UtilTests, cloneNet) { { auto layer4 = getLayer(net, "layer4"); auto layer5 = getLayer(net, "layer5"); - auto cloned = IE::cloneNet({layer4,layer5}, nullptr); + auto cloned = IE::cloneNet({layer4,layer5}); EXPECT_EQ(4, cloned->layerCount()); auto clonedLayer4 = getLayer(cloned, "layer4"); auto clonedLayer5 = getLayer(cloned, "layer5"); @@ -253,7 +253,7 @@ TEST(UtilTests, cloneNet) { } { auto layer3 = getLayer(net, "layer3"); - auto cloned = IE::cloneNet({layer3}, nullptr); + auto cloned = IE::cloneNet({layer3}); EXPECT_EQ(2, cloned->layerCount()); auto clonedLayer3 = getLayer(cloned, "layer3"); ASSERT_NE(nullptr, clonedLayer3); @@ -283,7 +283,7 @@ TEST(UtilTests, cloneNet) { auto layer5 = getLayer(net, "layer5"); auto layer6 = getLayer(net, "layer6"); auto layer7 = getLayer(net, "layer7"); - auto cloned = IE::cloneNet({layer1,layer2,layer3,layer4,layer5,layer6,layer7}, nullptr); + auto cloned = IE::cloneNet({layer1,layer2,layer3,layer4,layer5,layer6,layer7}); EXPECT_EQ(9, cloned->layerCount()); auto clonedLayer1 = getLayer(cloned, "layer1"); auto clonedLayer2 = getLayer(cloned, "layer2"); @@ -414,7 +414,7 @@ TEST(UtilTests, cloneNet_input) { auto cloned = IE::cloneNet({getLayer(net, "layer1"), getLayer(net, "layer2"), - getLayer(net, "layer3")}, nullptr); + getLayer(net, "layer3")}); ASSERT_EQ(6, cloned->layerCount()); ASSERT_NE(nullptr, getLayer(cloned, "input1")); @@ -468,7 +468,7 @@ TEST(UtilTests, cloneNet_const) { auto cloned = IE::cloneNet({getLayer(net, "layer1"), getLayer(net, "layer2"), - getLayer(net, "layer3")}, nullptr); + getLayer(net, "layer3")}); ASSERT_EQ(6, cloned->layerCount()); ASSERT_NE(nullptr, getLayer(cloned, "input1")); -- 2.7.4