cdef public:
_requests, _infer_requests
-cdef class LayersStatsMap(dict):
- cdef C.IENetwork net_impl
-
cdef class IECore:
cdef C.IECore impl
cpdef IENetwork read_network(self, model : [str, bytes, Path], weights : [str, bytes, Path] = ?, bool init_from_buffer = ?)
self.input_blobs[k].buffer[:] = v
-## Layer calibration statistic container.
-class LayerStats:
-
- ## Class constructor
- #
- # @param min: Tuple with per-channel minimum layer activation values
- # @param max: Tuple with per-channel maximum layer activation values
- # @return An instance of LayerStats class
- def __init__(self, min: tuple = (), max: tuple = ()):
- self._min = min
- self._max = max
-
- ## Tuple with per-channel minimum layer activation values
- @property
- def min(self):
- return self._min
-
- ## Tuple with per-channel maximum layer activation values
- @property
- def max(self):
- return self._max
-
-
-## Class inherited from built-in python `dict` class and overrides default `update()`method to allow
-# to set or modify layers calibration statistics.
-cdef class LayersStatsMap(dict):
- def update(self, other=None, **kwargs):
- super(LayersStatsMap, self).update(other, **kwargs)
- cdef map[string, map[string, vector[float]]] c_stats_map
- cdef map[string, vector[float]] c_node_stats
- for k, v in self.items():
- c_node_stats["min".encode()] = v.min
- c_node_stats["max".encode()] = v.max
- c_stats_map[k.encode()] = c_node_stats
- self.net_impl.setStats(c_stats_map)
-
## This class represents a main layer information and providing setters allowing to modify layer properties
cdef class IENetLayer:
## Name of the layer
layers[deref(l).name.decode()] = net_l
return layers
- ## \note This property is deprecated.
- # New Calibration Tool doesn't generate statistics
- #
- # Returns `LayersStatsMap` object containing dictionary that maps network layer names to calibration statistics
- # represented by `LayerStats` objects.
- #
- # Usage example:\n
- # ```python
- # ie = IECore()
- # net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
- # net.stats.update({"conv1_2d" : LayserStats(min=(-25, -1, 0), max=(63, 124, 70)),
- # "conv2_2d" : LayserStats(min=(-5, -1, 0, 1, -7, 2), max=(63, 124, 70, 174, 99, 106))
- # })
- # ```
- @property
- def stats(self):
- warnings.warn("stats property of IENetwork is deprecated.",
- DeprecationWarning)
- cdef map[string, map[string, vector[float]]] c_stats_map = self.impl.getStats()
- py_stats_map = LayersStatsMap()
- py_stats_map.net_impl = self.impl
- for it in c_stats_map:
- py_stats_map[it.first.decode()] = LayerStats(min=tuple(it.second["min".encode()]),
- max=tuple(it.second["max".encode()]))
- return py_stats_map
-
-
## Marks any intermediate layer as output layer to retrieve the inference results from the specified layers.
# @param outputs: List of layers to be set as model outputs. The list can contain strings with layer names to be set
# as outputs or tuples with layer name as first element and output port id as second element.
actual->reshape(input_shapes);
}
-const std::map <std::string, std::map<std::string, std::vector < float>>>
-
-InferenceEnginePython::IENetwork::getStats() {
- IE_SUPPRESS_DEPRECATED_START
- std::map < std::string, std::map < std::string, std::vector < float >> > map;
- InferenceEngine::ICNNNetworkStats *pstats = nullptr;
- InferenceEngine::ResponseDesc response;
- auto retCode = ((InferenceEngine::ICNNNetwork &) *actual).getStats(&pstats, &response);
- if (retCode == InferenceEngine::OK) {
- auto statsMap = pstats->getNodesStats();
- for (const auto &it : statsMap) {
- std::map <std::string, std::vector<float>> stats;
- stats.emplace("min", it.second->_minOutputs);
- stats.emplace("max", it.second->_maxOutputs);
- map.emplace(it.first, stats);
- }
- }
- return map;
- IE_SUPPRESS_DEPRECATED_END
-}
-
-void InferenceEnginePython::IENetwork::setStats(const std::map<std::string, std::map<std::string,
- std::vector<float>>> &stats) {
- IE_SUPPRESS_DEPRECATED_START
- InferenceEngine::ICNNNetworkStats *pstats = nullptr;
- InferenceEngine::ResponseDesc response;
- auto retCode = ((InferenceEngine::ICNNNetwork &) *actual).getStats(&pstats, &response);
- if (retCode == InferenceEngine::OK) {
- std::map<std::string, InferenceEngine::NetworkNodeStatsPtr> newNetNodesStats;
- for (const auto &it : stats) {
- InferenceEngine::NetworkNodeStatsPtr nodeStats = InferenceEngine::NetworkNodeStatsPtr(
- new InferenceEngine::NetworkNodeStats());
- newNetNodesStats.emplace(it.first, nodeStats);
- nodeStats->_minOutputs = it.second.at("min");
- nodeStats->_maxOutputs = it.second.at("max");
- }
- pstats->setNodesStats(newNetNodesStats);
- }
- IE_SUPPRESS_DEPRECATED_END
-}
-
InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string &name, size_t num_requests) :
infer_requests(num_requests), name(name) {
request_queue_ptr = std::make_shared<IdleInferRequestQueue>();
void serialize(const std::string &path_to_xml, const std::string &path_to_bin);
- void setStats(const std::map<std::string, std::map<std::string, std::vector<float>>> &stats);
-
- const std::map<std::string, std::map<std::string, std::vector<float>>> getStats();
-
void load_from_buffer(const char* xml, size_t xml_size, uint8_t* bin, size_t bin_size);
IENetwork(const std::string &model, const std::string &weights);
void setLayerParams(map[string, map[string, string]] params_map) except +
void serialize(const string& path_to_xml, const string& path_to_bin) except +
void reshape(map[string, vector[size_t]] input_shapes) except +
- void setStats(map[string, map[string, vector[float]]] & stats) except +
- map[string, map[string, vector[float]]] getStats() except +
void load_from_buffer(const char*xml, size_t xml_size, uint8_t*bin, size_t bin_size) except +
object getFunction() except +
import numpy as np
from openvino.inference_engine import IECore, IENetwork, IENetLayer, DataPtr, \
- LayersStatsMap, LayerStats, InputInfoPtr, PreProcessInfo
+ InputInfoPtr, PreProcessInfo
from conftest import model_path
assert isinstance(net.layers['19/Fused_Add_'], IENetLayer)
-def test_get_stats_deprecated():
- with warnings.catch_warnings(record=True) as w:
- ie = IECore()
- net = ie.read_network(model=test_net_xml, weights=test_net_bin)
- stats = net.stats
- assert isinstance(stats, LayersStatsMap)
- assert len(w) == 1
- assert issubclass(w[-1].category, DeprecationWarning)
- assert "stats property of IENetwork is deprecated." in str(w[-1].message)
-
-
-@pytest.mark.skip(reason="Test is failed due-to ngraph conversion")
-def test_set_new_stats_deprecated():
- with warnings.catch_warnings(record=True) as w:
- ie = IECore()
- net = ie.read_network(model=test_net_xml, weights=test_net_bin)
- new_stats = LayerStats(min=(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0),
- max=(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0))
- stats = net.stats
- stats.update({"fc_out": new_stats})
- assert net.stats["fc_out"].min == new_stats.min
- assert net.stats["fc_out"].max == new_stats.max
- assert len(w) == 3
- for warns in w:
- assert issubclass(warns.category, DeprecationWarning)
- assert "stats property of IENetwork is deprecated." in str(warns.message)
-
-
-@pytest.mark.skip(reason="Test is failed due-to ngraph conversion")
-def test_update_stats_deprecated():
- with warnings.catch_warnings(record=True) as w:
- ie = IECore()
- net = ie.read_network(model=test_net_xml, weights=test_net_bin)
- initial_stats = LayerStats(min=(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0),
- max=(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0))
- stats = net.stats
- stats.update({"fc_out": initial_stats})
- new_stats = LayerStats(min=(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0),
- max=(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0))
- stats.update({"fc_out": new_stats})
- assert net.stats["fc_out"].min == new_stats.min
- assert net.stats["fc_out"].max == new_stats.max
- assert len(w) == 3
- for warns in w:
- assert issubclass(warns.category, DeprecationWarning)
- assert "stats property of IENetwork is deprecated." in str(warns.message)
-
-
@pytest.mark.skip(reason="Test is failed due-to ngraph conversion")
def test_serialize():
ie = IECore()
#include "ie_blob.h"
#include "ie_common.h"
#include "ie_data.h"
-#include "ie_icnn_network_stats.hpp"
#include "ie_iextension.h"
#include "ie_input_info.hpp"
#include "ie_layers.h"
};
/**
- * @deprecated Migrate to IR v10 and use quantization approach with FakeQuantize
- * @brief Gets the statistics.
- * @param stats The statistics
- * @param resp Pointer to the response message that holds a description of an error if any occurred
- * @return Status code of the operation
- */
- IE_SUPPRESS_DEPRECATED_START
- INFERENCE_ENGINE_INTERNAL("Migrate to IR v10 and use quantization approach with FakeQuantize")
- virtual StatusCode getStats(ICNNNetworkStats** stats, ResponseDesc* resp) const noexcept {
- (void)stats;
- (void)resp;
- return NOT_IMPLEMENTED;
- };
- IE_SUPPRESS_DEPRECATED_END
-
- /**
* @brief Serialize network to IR and weights files.
*
* @param xmlPath Path to output IR file.
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-/**
- * @brief This is a header file for the ICNNNetworkStats class
- *
- * @file ie_icnn_network_stats.hpp
- */
-#pragma once
-
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "details/ie_irelease.hpp"
-
-namespace InferenceEngine {
-
-class NetworkNodeStats;
-
-/**
- * @brief A shared pointer to the NetworkNodeStats object
- */
-using NetworkNodeStatsPtr = std::shared_ptr<NetworkNodeStats>;
-
-/**
- * @brief A smart pointer to the NetworkNodeStats object
- */
-using NetworkNodeStatsWeakPtr = std::weak_ptr<NetworkNodeStats>;
-
-/**
- * @brief A map of pairs: name of a layer and related statistics
- */
-using NetworkStatsMap = std::map<std::string, NetworkNodeStatsPtr>;
-
-/**
- * @deprecated Migrate to IR v10 and use quantization approach with FakeQuantize
- * @class ICNNNetworkStats
- * @brief This is the interface to describe the NN topology scoring statistics
- */
-class INFERENCE_ENGINE_INTERNAL("Migrate to IR v10 and use quantization approach with FakeQuantize") ICNNNetworkStats : public details::IRelease {
-public:
- /**
- * @brief Sets a map which contains layers with statistics
- *
- * @param stats A map which is set
- * Abstract method
- */
- virtual void setNodesStats(const NetworkStatsMap& stats) = 0;
- /**
- * @brief Gets a map which contains layers with statistics
- *
- * Abstract method
- * @return A NetworkStatsMap object
- */
- virtual const NetworkStatsMap& getNodesStats() const = 0;
- /**
- * @brief Checks if a container is empty
- *
- * Abstract method
- * @return A bool value which shows whether a container is empty
- */
- virtual bool isEmpty() const = 0;
-};
-
-/**
- * @deprecated Migrate to IR v10 and use quantization approach with FakeQuantize
- * @class NetworkNodeStats
- * @brief This class implements a container which stores statistics for a layer
- */
-class INFERENCE_ENGINE_INTERNAL("Migrate to IR v10 and use quantization approach with FakeQuantize") NetworkNodeStats {
-public:
- /**
- * @brief The constructor which creates NetworkNodeStats object
- */
- NetworkNodeStats() {}
- /**
- * @brief The constructor which creates NetworkNodeStats object with filled statistics
- *
- * @param statCount The number of minimum/maximum values in statistics
- */
- explicit NetworkNodeStats(int statCount) {
- float mn = (std::numeric_limits<float>::max)();
- float mx = (std::numeric_limits<float>::min)();
-
- IE_SUPPRESS_DEPRECATED_START_WIN
- for (int i = 0; i < statCount; i++) {
- _minOutputs.push_back(mn);
- _maxOutputs.push_back(mx);
- }
- IE_SUPPRESS_DEPRECATED_END_WIN
- }
-
-public:
- /**
- * @brief Vector of floats which contains minimum values of layers activations
- */
- std::vector<float> _minOutputs;
- /**
- * @brief Vector of floats which contains maximum values of layers activations
- */
- std::vector<float> _maxOutputs;
-};
-
-} // namespace InferenceEngine
#include <cpp/ie_plugin_cpp.hpp>
#include <ie_core.hpp>
#include <ie_icnn_network.hpp>
-#include <ie_icnn_network_stats.hpp>
#include <ie_plugin_config.hpp>
#include <ie_version.hpp>
DECLARE_VPU_CONFIG_KEY(CUSTOM_LAYERS);
/**
+ * @deprecated IR statistic is not available in IR v10.
* @brief Ignore statistic in IR by plugin.
* Plugin could use statistic present in IR in order to try to improve calculations precision.
* If you don't want statistic to be used enable this option.
* This option should be used with values: CONFIG_VALUE(YES) or CONFIG_VALUE(NO) (default)
*/
+INFERENCE_ENGINE_DEPRECATED("IR statistic is not available in IR v10")
DECLARE_VPU_CONFIG_KEY(IGNORE_IR_STATISTIC);
/**
#include <sys/types.h>
#include <sys/stat.h>
#include <exec_graph_info.hpp>
-#include "cnn_network_int8_normalizer.hpp"
#include "low_precision_transformations/transformer.hpp"
#include "low_precision_transformations/eltwise.hpp"
dumpGraph(network, subgraphs, file);
}
- InferenceEngine::ICNNNetworkStats* networkStats = nullptr;
- if (StatusCode::OK != network.getStats(&networkStats, nullptr)) {
- networkStats = nullptr;
- }
-
std::vector<NetworkDesc> descs;
std::vector<CNNLayerPtr> tempLayers;
for (auto &&subgraph : subgraphs) {
auto affinity = (*subgraph.begin())->affinity;
tempLayers.assign(subgraph.begin(), subgraph.end());
- auto tempNetwork = cloneNet(tempLayers, networkStats);
+ auto tempNetwork = cloneNet(tempLayers);
auto name = network.getName() + "_" + std::to_string(std::distance(subgraphs.data(), &subgraph));
tempNetwork->setName(name);
// restoring some outputs from original net if they are not marked as output automatically
void addOutput(const std::string& dataName);
- StatusCode getStats(ICNNNetworkStats** stats, ResponseDesc* resp) const noexcept override {
- return StatusCode::NOT_FOUND;
- }
-
void Release() noexcept override {
delete this;
}
#include <vector>
#include "ie_ishape_infer_extension.hpp"
-#include "cnn_network_stats_impl.hpp"
#include "description_buffer.hpp"
#include "ie_api.h"
#include "ie_blob.h"
void removeOutput(const std::string& dataName);
- StatusCode getStats(ICNNNetworkStats** stats, ResponseDesc* /* resp */) const noexcept override {
- if (stats == nullptr) return StatusCode::PARAMETER_MISMATCH;
- *stats = _stats.get();
- return StatusCode::OK;
- }
-
void Release() noexcept override {
delete this;
}
std::string _name;
DataPtr _emptyData;
ShapeInfer::ReshaperPtr _reshaper;
- CNNNetworkStatsImplPtr _stats;
};
IE_SUPPRESS_DEPRECATED_END
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <cpp/ie_cnn_network.h>
-#include <float.h>
-
-#include <ie_icnn_network.hpp>
-#include <ie_icnn_network_stats.hpp>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace InferenceEngine {
-namespace details {
-
-/**
- * We have raw statistic from stat collection tool and this statistic should be processed to get best
- * accuracy. This transformation depends on the topology, depends on the parameters of layers.
- * i.e. data going to regular and depth-wise convolution would be scaled differently. In case of
- * regular convolution it should be scaled for tensor wide approach, for depth-wise convolution it
- * should be scaled by channel approach.
- * This class contains logic of getting scales
- */
-class CNNStatisticHelper {
-public:
- /**
- * We need to have topology to make a decision about scales
- * @param network initial network to be quantized, the topology can be changed during quantization
- * @param internalNodesStats initial statistic
- * @param maxSign - maximal signed value to be used for calculation of scales
- * @param maxUnsign - maximal unsigned value to be used for calculation of scales
- *
- */
- CNNStatisticHelper(CNNNetwork& network, const std::map<std::string, NetworkNodeStatsPtr>& internalNodesStats,
- int maxSign, int maxUnsign);
-
- /**
- * Returns if we can quantize layer basing on information of existing statistic before and after
- * layers
- */
- bool canLayerBeQuantized(CNNLayer::Ptr layer) const;
-
- /**
- * The topology is allowed to be changed, we need to modify statistic accordingly
- *
- * Currently there is a need in copy of statistic only
-
- * @param srcName name of layer from statistic needs to be taken
- * @param dstName name of layer which statistic will be applied
- */
- void copyStatistics(const std::string& srcName, const std::string& dstName);
-
- /**
- * Returns boolean values if layer produce negative data according collected statistic
- * true means that layer produices negative values
- * false means that layer produces only positive numbers
- * @param layer - layer of interest
- * @param outputPort - number of port to verify. -1 stands forverification of all outputs from
- * layer
- */
- bool hasNegativeOutput(const std::string& layerName, int outputPort = -1) const;
-
- /**
- * Returns input scale for layer based on statistic
- * @return blob with scales per channel
- */
- InferenceEngine::Blob::Ptr getInputScale(CNNLayer::Ptr layer) const;
-
- /**
- * Returns output scale for layer based on statistic
- * @return blob with scales per channel
- */
- InferenceEngine::Blob::Ptr getOutputScale(CNNLayer::Ptr layer) const;
-
- /**
- * provides max signed value as the only place for synchronization with other algorithms in
- * normalizer which require this
- */
- int getMaxSignValue() const;
-
- /**
- * Returns a latest layer in fusion, the data from returned layer will go to anopther, this mean
- * that for all layers which will be fused we will have to use only statistic from that latest layer
- * @param layer - layer of interest
- *
- * @return returns layer which statistic should be used for calculatio of all scales for layer
- * passed as a parameter for this method
- */
- CNNLayer::Ptr getLatestInFuse(CNNLayer::Ptr layer) const;
-
-private:
- /**
- * Calculates scale factor according statistic for layer passed to this function. No other logic for
- * selection another layer is implemented here.
- *
- * @param channels redundant parameter, should be removed
- * @param stats redundant parameter, should be removed
- * @param maxInt - we can quantize to I8 even if data is unsigned, need to provide such max number
- * explicitly
- *
- * @return InferenceEngine::Blob::Ptr
- */
- InferenceEngine::Blob::Ptr calculateScaleFactor(size_t channels, NetworkNodeStatsPtr stats, int maxInt) const;
-
- /**
- * Select the latet layer in the fusion and returns its statistic
- */
- NetworkNodeStatsPtr getStatistic(CNNLayer::Ptr layer) const;
-
- /**
- * Pass over alls statistic and normalize it to the only scale per tenso, individual per channel or
- * mix depenging on the pattern in the network
- */
- void NormalizeStatistic();
-
- CNNNetwork network_;
- std::map<std::string, NetworkNodeStatsPtr> internalNodesStats_;
- int maxSign_;
- int maxUnsign_;
-};
-
-/**
- * This class normalizes and quantizes network to "Int8" state
- * The converted network will have
- * 1) scaleshifts which will normalize activation values to int8 (S8/U8) range
- * 2) quantize weigths and biases of convolution
- * 3) adds special attributes to layers because semantic of int8 layer are different vs floating
- * point ones. For example, after convolution we need to return back to denormalized values and
- * there should be special scale here
- * 4) Transforms some layers to another ones. For example if i8 to i8 Scaleshift is not supported
- * by backend, this scaleshift will be converted to grouped/(depth-wise in ideal case) convolution
- *
- * This class very depends on backend and its fusion. It assumes that fusion must be executed all
- * the time, we cannot for split it to independent execution of two layers in int8 mode. This is
- * done to calculate normalization factors the most optimal way to save accuracy.
- * Currently supported fusion
- * 1. Conv-ReLU
- * 2. Conv-Sum-ReLU which is appeared from the pattern
- * Conv Something
- * \ /
- * Eltwise
- * ReLU
- * Here, the output form "Something" will be used as in-place storge for accumulation of the
- * results for convolution. That lead to tricky case in int8 when we have signed int8 input and
- * unsigned u8 output
- * */
-class INFERENCE_ENGINE_API_CLASS(CNNNetworkInt8Normalizer) {
-public:
- CNNNetworkInt8Normalizer() {}
-
-private:
- /** Helper function for filling of scaleshift weights for normalization of activation */
- static void fillInScaleShift(ScaleShiftLayer* scshLayer, size_t c, float* weightsN, float* weightsD);
-
-public:
- /** main function for calling of quantization */
- static void NormalizeNetwork(ICNNNetwork& network, ICNNNetworkStats& netStats);
-
-protected:
- /** Helper function to add scaleshifts and other layers for transformatin of topology */
- static void AddLayerToCNNNetworkBeforeLayer(CNNLayer::Ptr newLayer, CNNLayer::Ptr successor, size_t port);
- /** Helper function to add scaleshifts and other layers for transformatin of topology */
- static void AddLayerToCNNNetworkAfterData(DataPtr pData, CNNLayer::Ptr layer, const std::string& nextLayerName);
- /** Adds ScaleShift between two specified layers */
- static void AddScaleShiftBetween(CNNNetwork& net, const CNNLayerPtr layer1, const CNNLayerPtr layer2,
- CNNStatisticHelper& statHelper);
-
- /** creates dw convolution with unary weights and zero biases with i8 output and the same
- * statistic. it will provide requantization from U8 to I8*/
- static CNNLayer::Ptr addU8ToI8Conversion(DataPtr data, CNNLayer::Ptr successor, CNNStatisticHelper& statHelper);
-
- /**
- * Function which recalculate weights according to input scales, and quantize weights, biases and
- * adds o-scale and w-scale
- * w-scale - multiplication on this scale of i8 convolution result will produce denormalized fp32
- * data
- * o-scale - multiplication on this scale will convert above denormalized fp32 to i8 for next layer
- */
- static void QuantizeConvolutionOrFullyConnected(CNNLayer::Ptr convolution, CNNStatisticHelper& statHelper);
-
- /** Adds ScaleShifts everywhere */
- static void AddScaleShifts(CNNNetwork& net, CNNStatisticHelper& statHelper);
-
- /** Convert ReLu-like Clamps to ReLu layers */
- static void ClampsToReLU(CNNNetwork& net, CNNStatisticHelper& statHelper);
-
- /**
- * Goes over all layers and mark which layers will be executed in FP32/I8 and marks data between
- * layers to I8/U8/FP32
- */
- static void DefinesExecutionPrecision(CNNNetwork& net, CNNStatisticHelper& statHelper);
-
- /**
- * Since o-scales exist only for convolutins, we need to propagate them down oever concats and
- * linear layers
- */
- static void PropagateScaleFactors(CNNNetwork& net, const CNNStatisticHelper& statHelper);
-
- /**
- * Normalizes and quantizes srcData using scales for normalization and int8blob precision for
- * quantization
- */
- static void ScaleDataToInt(const float* srcData, size_t srcSize, Blob::Ptr int8blob,
- const std::vector<float>& scales);
-
- /**
- * Replaces all ScaleShifts layers met in the model to the depth-wise convolution with the same
- * weights and biases.
- *
- * Exceptions:
- * 1. ScaleShift following after Input layer, it is not converted to depth-wise convolution
- * 2. Scaleshift producing output of network
- * 3. Scaleshift passing data to Priorbox
- *
- * This conversion allows to avoid introductin one more i8 primitive - ScaleShift accepting i8 input
- * and producing i8 output
- */
- static void replaceScaleShiftByDWConvolution(CNNNetwork& net);
-
- /** Helper function which creates DW/Grouped/regular convolution by passed weights and biases */
- static CNNLayer::Ptr createDWConvolutionForScale(const std::string& layerName, size_t channels, float* weights,
- float* biases);
-
- /**
- * Verifies if layer produces data to layers which marked as float
- */
- static bool layerProducesFloat(const CNNLayer::Ptr layer);
-
- /**
- * Returns tails from I8 to FP32 until convolution - it is the most performed approach because
- * convolution can convert to FP32 for free, while adding one more scale will decrease performance
- */
- static void returnTailToFP32(const CNNLayer::Ptr layer);
-
- /**
- * Verifies whether layer can be potentially int8
- * @return true if layer does not have improper activation for fusion
- */
- static bool canLayerBeI8(const CNNLayer::Ptr& layer);
-
- /**
- * Verifies if next layer has type which potentially can be fused with convolution
- * and if activation is supported for int8
- * @return true if layer does not have improper activation for fusion
- */
- static bool isNextFusionAllowed(const CNNLayer::Ptr& layer);
-
-public:
- /**
- * Returns true for a "relu-like" clamp layer i.e. a clamp with minimum = 0
- */
- static bool isReLULikeClamp(CNNLayer::Ptr layer);
-};
-
-typedef std::shared_ptr<CNNNetworkInt8Normalizer> CNNNetworkNormalizerPtr;
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ie_icnn_network.hpp>
-#include <ie_icnn_network_stats.hpp>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "description_buffer.hpp"
-#include "ie_api.h"
-#include "ie_blob.h"
-#include "ie_common.h"
-#include "ie_data.h"
-
-namespace InferenceEngine {
-namespace details {
-
-IE_SUPPRESS_DEPRECATED_START
-
-class INFERENCE_ENGINE_API_CLASS(CNNNetworkStatsImpl): public ICNNNetworkStats {
-public:
- CNNNetworkStatsImpl() = default;
- virtual ~CNNNetworkStatsImpl();
-
-public:
- const NetworkStatsMap& getNodesStats() const override;
- void setNodesStats(const NetworkStatsMap& stats) override;
- bool isEmpty() const override {
- return netNodesStats.empty();
- }
-
- void Release() noexcept override {
- delete this;
- }
-
-protected:
- std::map<std::string, NetworkNodeStatsPtr> netNodesStats;
-};
-
-typedef std::shared_ptr<CNNNetworkStatsImpl> CNNNetworkStatsImplPtr;
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace details
-} // namespace InferenceEngine
* @return Cloned network
*/
INFERENCE_ENGINE_API_CPP(InferenceEngine::details::CNNNetworkImplPtr)
-cloneNet(const std::vector<InferenceEngine::CNNLayerPtr>& layers, const ICNNNetworkStats* networkStats);
+cloneNet(const std::vector<InferenceEngine::CNNLayerPtr>& layers);
IE_SUPPRESS_DEPRECATED_END
ICNNNetwork::~ICNNNetwork() {}
-CNNNetworkImpl::CNNNetworkImpl(): _stats(new CNNNetworkStatsImpl()) {}
+CNNNetworkImpl::CNNNetworkImpl() {}
CNNNetworkImpl::~CNNNetworkImpl() {
// In case of cycles, memory leaks occur: Layer holds shared_ptr<Data>, and vice versa.
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "cnn_network_int8_normalizer.hpp"
-
-#include <data_stats.h>
-#include <details/ie_cnn_network_tools.h>
-#include <ie_common.h>
-
-#include <algorithm>
-#include <blob_factory.hpp>
-#include <cassert>
-#include <cmath>
-#include <details/caseless.hpp>
-#include <fstream>
-#include <limits>
-#include <map>
-#include <memory>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "cnn_network_impl.hpp"
-#include "cnn_network_stats_impl.hpp"
-#include "ie_util_internal.hpp"
-
-IE_SUPPRESS_DEPRECATED_START
-
-using namespace std;
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-using StatsMap = std::map<std::string, InferenceEngine::NetworkNodeStatsPtr>;
-
-CNNStatisticHelper::CNNStatisticHelper(CNNNetwork& network,
- const std::map<std::string, NetworkNodeStatsPtr>& internalNodesStats,
- int maxSign, int maxUnsign) {
- internalNodesStats_ = internalNodesStats;
- network_ = network;
- maxSign_ = maxSign;
- maxUnsign_ = maxUnsign;
-
- NormalizeStatistic();
-}
-
-bool CNNStatisticHelper::canLayerBeQuantized(CNNLayer::Ptr layer) const {
- // verification of existing statistic for all inputs
- for (const auto i : layer->insData) {
- if (internalNodesStats_.find(i.lock()->getCreatorLayer().lock()->name) == internalNodesStats_.end()) {
- return false;
- }
- }
- // verification if there is a statistic for output of the layer
- if ((layer->outData.size() > 1) && (internalNodesStats_.find(layer->name) == internalNodesStats_.end())) {
- return false;
- }
- return true;
-}
-
-void CNNStatisticHelper::copyStatistics(const std::string& srcName, const std::string& dstName) {
- internalNodesStats_[dstName] = internalNodesStats_[srcName];
-}
-
-bool CNNStatisticHelper::hasNegativeOutput(const std::string& layerName, int outputPort) const {
- // TODO(amalyshe) parameter outputPort is not used yet, logic of dedication to the port
- // should be implemented
-
- NetworkNodeStatsPtr layerStat = internalNodesStats_.at(layerName);
- for (auto v : layerStat->_minOutputs) {
- if (v < 0.f) {
- return true;
- }
- }
- return false;
-}
-
-InferenceEngine::Blob::Ptr CNNStatisticHelper::getInputScale(CNNLayer::Ptr layer) const {
- auto inDataPtr = layer->insData[0].lock();
- if (inDataPtr == nullptr)
- return nullptr;
- auto previousLayer = inDataPtr->getCreatorLayer().lock();
- std::string inputLayerName = previousLayer->name;
-
- // for case when we have the only average pooling before, we need to take this
- // statistic from input of avg pooling to compensate work of average pooling
- // and to stay in int8 as much as we can
- if (previousLayer->type == "Pooling" &&
- (previousLayer->precision == Precision::I8 || previousLayer->precision == Precision::U8)) {
- // take input name to the pooling
- auto prevInDataPtr = previousLayer->insData[0].lock();
- if (prevInDataPtr == nullptr)
- return nullptr;
- inputLayerName = prevInDataPtr->getCreatorLayer().lock()->name;
- }
- size_t inputChannels = inDataPtr->getTensorDesc().getDims()[1];
- if (getStatistic(previousLayer)->_minOutputs.size() != inputChannels ||
- getStatistic(previousLayer)->_maxOutputs.size() != inputChannels) {
- THROW_IE_EXCEPTION << "min and max sizes should be equal to input channels count for " << previousLayer->name;
- }
-
- // current normalization algorithm can have nodes with fp32 edges. it can happen only in places
- // of initial quantization of int8 chains. Currently adding scaleshift adds certain I8/U8 precision
- // but calcualtion of scales happens before adding of scale shifts.
- // for fixing problem with cases of not determined yet presision and for following of
- // quantizatoin scheme defined by normalizer, we are adding here verification of negative output
- // in some cases and then verify exact precision of I8/U8 on node for covering of fully determined cases
- int maxValue = hasNegativeOutput(previousLayer->name) ? maxSign_ : maxUnsign_;
- if (previousLayer->outData[0]->getPrecision() == Precision::U8) {
- maxValue = maxUnsign_;
- } else if (previousLayer->outData[0]->getPrecision() == Precision::I8) {
- maxValue = maxSign_;
- }
-
- return calculateScaleFactor(inputChannels, getStatistic(previousLayer), maxValue);
-}
-
-InferenceEngine::Blob::Ptr CNNStatisticHelper::getOutputScale(CNNLayer::Ptr layer) const {
- // TODO(amalyshe) for now we are looking to precision on the data node
- size_t outputChannels = layer->outData[0]->getTensorDesc().getDims()[1];
- if (layer->outData.size() != 1) {
- THROW_IE_EXCEPTION << "Trying to get scales after layer having multiple output ports";
- }
-
- auto it = internalNodesStats_.find(layer->name);
- if (it == internalNodesStats_.end()) {
- return std::shared_ptr<Blob>();
- }
-
- if (getStatistic(layer)->_minOutputs.size() != outputChannels ||
- getStatistic(layer)->_maxOutputs.size() != outputChannels) {
- THROW_IE_EXCEPTION << "min and max sizes should be equal to output channels count for " << layer->name;
- }
-
- return calculateScaleFactor(outputChannels, getStatistic(layer),
- layer->outData[0]->getPrecision() == Precision::I8 ? maxSign_ : maxUnsign_);
-}
-
-int CNNStatisticHelper::getMaxSignValue() const {
- return maxSign_;
-}
-
-InferenceEngine::Blob::Ptr CNNStatisticHelper::calculateScaleFactor(size_t channels, NetworkNodeStatsPtr stats,
- int maxInt) const {
- if (stats->_minOutputs.size() != channels || stats->_maxOutputs.size() != channels) {
- THROW_IE_EXCEPTION << "min and max sizes should be equal to channels count";
- }
-
- // Creating i-scale blob
- std::shared_ptr<Data> iScaleData =
- std::shared_ptr<Data>(new Data("scale", {Precision::FP32, {channels}, Layout::C}));
- auto iScale = CreateBlobFromData(iScaleData);
- iScale->allocate();
- float* iScaleMemory = static_cast<float*>(iScale->buffer());
-
- for (int c = 0; c < channels; c++) {
- // maxc = fmax(maxc, fabs(stats[k]->_minOutputs[c])); // TODO Check if we should take minimums into
- // account
- float maxc = fabs(stats->_maxOutputs[c]);
- maxc = fmax(maxc, fabs(stats->_minOutputs[c]));
-
- iScaleMemory[c] = maxc / static_cast<float>(maxInt);
-
- if (fabs(iScaleMemory[c]) < 1e-7) {
- iScaleMemory[c] = 1.0f;
- }
- }
- return iScale;
-}
-
-NetworkNodeStatsPtr CNNStatisticHelper::getStatistic(CNNLayer::Ptr layer) const {
- // TODO(amalyshe) all logic of traversing over network and get apropriate statistics should be here
- // for now it is a stub
- auto it = internalNodesStats_.find(getLatestInFuse(layer)->name);
- if (it != internalNodesStats_.end()) {
- return it->second;
- }
- THROW_IE_EXCEPTION << "no stat for layer " << getLatestInFuse(layer)->name;
-}
-
-CNNLayer::Ptr CNNStatisticHelper::getLatestInFuse(CNNLayer::Ptr layer) const {
- if (layer->outData[0]->getInputTo().size() == 1 &&
- (CaselessEq<std::string>()(layer->outData[0]->getInputTo().begin()->second->type, "relu") ||
- CNNNetworkInt8Normalizer::isReLULikeClamp(layer->outData[0]->getInputTo().begin()->second))) {
- return layer->outData[0]->getInputTo().begin()->second;
- }
- // Conv-Sum-ReLU fuse
- // We need to return original layer if it will be used as a sum parame and ReLU if
- // iterating over outputs of pointed layer and look for the only eltwise
- CNNLayer::Ptr eltwise = nullptr;
- if (layer->outData.size() == 1) {
- for (auto it : layer->outData[0]->getInputTo()) {
- if (CaselessEq<std::string>()(it.second->type, "eltwise")) {
- if (eltwise) {
- THROW_IE_EXCEPTION << "Pattern when one layer pass data to several eltwise layers are not "
- "supported in int8 quantization";
- }
- eltwise = it.second;
- }
- }
- }
-
- if (eltwise) {
- // if current layer is not a convolution return it as finish of fuse
- if (!CaselessEq<std::string>()(layer->type, "convolution")) {
- return layer;
- } else {
- // look to the ports of eltwise
- if (eltwise->insData[0].lock() != nullptr
- && eltwise->insData[1].lock() != nullptr
- && eltwise->insData[1].lock()->getCreatorLayer().lock() == layer
- && CaselessEq<std::string>()(eltwise->insData[0].lock()->getCreatorLayer().lock()->type, "convolution")
- && eltwise->insData[0].lock()->getInputTo().size() == 1) {
- // this is a case when two convolutions come to eltwise, the second one will be selected for fuse,
- // first will be used as sum operator
- return layer;
- }
- // given layer is a convolution and will be used for fuse, but we need to verify if there is ReLU after
- // eltwise
- if (eltwise->outData[0]->getInputTo().size() == 1 &&
- (CaselessEq<std::string>()(eltwise->outData[0]->getInputTo().begin()->second->type, "relu") ||
- CNNNetworkInt8Normalizer::isReLULikeClamp(eltwise->outData[0]->getInputTo().begin()->second))) {
- return eltwise->outData[0]->getInputTo().begin()->second;
- }
- return eltwise;
- }
- }
-
- return layer;
-}
-
-void CNNStatisticHelper::NormalizeStatistic() {
- StatsMap newMap;
-
- // In case when we have statistics in negative range when min clamped value is 0,
- // we are changing statistics here to non negative. This is not fully correct behaviour since
- // it can extend range and affect accuracy, but this approach works quite well
- std::vector<CNNLayerPtr> sortedLayersRC = CNNNetSortTopologically(network_);
- for (auto l : sortedLayersRC) {
- if (CNNNetworkInt8Normalizer::isReLULikeClamp(l)) {
- if (l->outData.size() == 1) {
- size_t outputChannels = l->outData[0]->getTensorDesc().getDims()[1];
- auto oldStat = internalNodesStats_.find(l->name);
- if ((oldStat != internalNodesStats_.end()) && outputChannels > 1) {
- for (size_t q = 0; q < oldStat->second->_minOutputs.size(); q++) {
- oldStat->second->_minOutputs[q] = 0.f;
- }
- }
- }
- }
- }
-
- float dummy = 0.0f;
-
- std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network_);
- for (auto l : sortedLayers) {
- // if layer's statistic exists in the newMap, ignore it
- if (newMap.find(l->name) != newMap.end()) {
- continue;
- }
- // verify if layer is starter layer for propagating of statistic
- bool isStarterLayer = false;
-
- // a case if we do not have converted statistic before the current layer
- // go over all inputs and verify if statistic exists for all of inputs
- bool allInputsHaveStatistics = true;
- for (auto i : l->insData) {
- if (newMap.find(i.lock()->getCreatorLayer().lock()->name) == newMap.end()) {
- allInputsHaveStatistics = false;
- break;
- }
- }
- // if we do not have statistic - verify who is consumer of this layer
- if (!allInputsHaveStatistics) {
- if (l->outData.size() == 1) {
- for (auto it : l->outData[0]->getInputTo()) {
- if (CaselessEq<std::string>()(it.second->type, "scaleshift") ||
- CaselessEq<std::string>()(it.second->type, "convolution") ||
- CaselessEq<std::string>()(it.second->type, "fullyconnected")) {
- isStarterLayer = true;
- break;
- }
- }
- }
- } else {
- isStarterLayer = true;
- }
- if (CaselessEq<std::string>()(l->type, "scaleshift") || CaselessEq<std::string>()(l->type, "convolution") ||
- CaselessEq<std::string>()(l->type, "fullyconnected")) {
- isStarterLayer = true;
- }
-
- if (!isStarterLayer) {
- continue;
- }
-
- // we do not support yet layers for quantization which split data
- if (l->outData.size() != 1) {
- continue;
- }
-
- InferenceEngine::NetworkNodeStatsPtr currentStat = std::make_shared<NetworkNodeStats>();
-
- bool perChannelScale = true;
-
- if (CaselessEq<std::string>()(l->type, "concat") && l->outData.size() == 1 &&
- l->outData[0]->getTensorDesc().getDims().size() == 4 && allInputsHaveStatistics) {
- size_t concatLayerIdx = 0;
- for (int k = 0; k < l->insData.size(); k++) {
- auto prevKLayer = l->insData[k].lock()->getCreatorLayer().lock();
- // looking for the statistic for prevKLayer
- auto kLayerStat = newMap.find(prevKLayer->name);
- if (kLayerStat != newMap.end()) {
- for (size_t ikStat = 0; ikStat < kLayerStat->second->_maxOutputs.size();
- ikStat++, concatLayerIdx++) {
- currentStat->_maxOutputs.push_back(kLayerStat->second->_maxOutputs[ikStat]);
- currentStat->_minOutputs.push_back(kLayerStat->second->_minOutputs[ikStat]);
- }
- } else {
- THROW_IE_EXCEPTION << "We have incomplete statistic for predecessors of concat layer " << l->name;
- }
- }
- } else if (CaselessEq<std::string>()(l->type, "resample")) {
- if (l->insData.size() == 1) {
- CNNLayerPtr creator = l->insData[0].lock()->getCreatorLayer().lock();
- if (CaselessEq<std::string>()(creator->type, "concat")) {
- auto concatStat = newMap[creator->name];
- currentStat->_maxOutputs = concatStat->_maxOutputs;
- currentStat->_minOutputs = concatStat->_minOutputs;
- newMap[l->name] = currentStat;
- } else {
- auto itOld = internalNodesStats_.find(l->name);
- if (itOld != internalNodesStats_.end()) {
- currentStat->_maxOutputs = itOld->second->_maxOutputs;
- currentStat->_minOutputs = itOld->second->_minOutputs;
- newMap[l->name] = currentStat;
- }
- }
- }
- } else {
- // go over all children until we get convoluition, scaleshift, eltwise or unknown layer
- // layers Pooling and ReLU are passthrough
- // to understand the granularity of the scaling
- // layer concat is a layer which produce statistics and waterfall it down
- std::vector<CNNLayer::Ptr> toAnalyze;
- for (auto it : l->outData[0]->getInputTo()) {
- toAnalyze.push_back(it.second);
- }
-
- if (CaselessEq<std::string>()(l->type, "eltwise")) {
- perChannelScale = false;
- }
- while (!toAnalyze.empty() && perChannelScale) {
- CNNLayer::Ptr tl = toAnalyze.back();
- toAnalyze.pop_back();
- if (CaselessEq<std::string>()(tl->type, "pooling") || CaselessEq<std::string>()(tl->type, "relu") ||
- CNNNetworkInt8Normalizer::isReLULikeClamp(tl) || CaselessEq<std::string>()(tl->type, "concat")) {
- if (tl->outData.size() == 1) {
- for (auto it : tl->outData[0]->getInputTo()) {
- toAnalyze.push_back(it.second);
- }
- }
- } else if (CaselessEq<std::string>()(tl->type, "convolution")) {
- // verify number of groups
- ConvolutionLayer* pConv = dynamic_cast<ConvolutionLayer*>(tl.get());
- if (pConv == nullptr) {
- THROW_IE_EXCEPTION << "Layer " << tl->name << " is not instance of ConvolutionLayer class";
- }
- if (pConv->_group != pConv->_out_depth) {
- perChannelScale = false;
- }
- } else if (CaselessEq<std::string>()(tl->type, "eltwise")) {
- perChannelScale = false;
- }
- }
-
- auto itOld = internalNodesStats_.find(getLatestInFuse(l)->name);
- if (itOld == internalNodesStats_.end()) {
- itOld = internalNodesStats_.find(l->name);
- }
- if (itOld != internalNodesStats_.end()) {
- if (!perChannelScale) {
- currentStat->_maxOutputs.resize(itOld->second->_maxOutputs.size());
- if (!itOld->second->_maxOutputs.empty()) {
- float max = FLT_MIN;
- DataStats::GetDataAbsMax(&itOld->second->_maxOutputs[0], itOld->second->_maxOutputs.size(),
- max);
- std::fill(currentStat->_maxOutputs.begin(), currentStat->_maxOutputs.end(), max);
- }
-
- currentStat->_minOutputs.resize(itOld->second->_minOutputs.size());
- if (!itOld->second->_minOutputs.empty()) {
- float min = FLT_MAX;
- DataStats::GetDataMinMax(&itOld->second->_minOutputs[0], itOld->second->_minOutputs.size(), min,
- dummy);
- std::fill(currentStat->_minOutputs.begin(), currentStat->_minOutputs.end(), min);
- }
- } else {
- currentStat->_maxOutputs = itOld->second->_maxOutputs;
- currentStat->_minOutputs = itOld->second->_minOutputs;
- }
- }
-
- if (l->outData.size() == 1) {
- size_t ch_indx = l->outData[0]->getTensorDesc().getDims().size() > 1 ? 1 : 0;
- size_t outputChannels = l->outData[0]->getTensorDesc().getDims()[ch_indx];
- auto oldStat = internalNodesStats_.find(l->name);
- if ((oldStat != internalNodesStats_.end()) && outputChannels > 1 &&
- oldStat->second->_minOutputs.size() == 1) {
- auto min = oldStat->second->_minOutputs[0];
- auto max = oldStat->second->_maxOutputs[0];
-
- currentStat->_minOutputs = std::vector<float>(outputChannels);
- currentStat->_maxOutputs = std::vector<float>(outputChannels);
- std::fill(currentStat->_minOutputs.begin(), currentStat->_minOutputs.end(), min);
- std::fill(currentStat->_maxOutputs.begin(), currentStat->_maxOutputs.end(), max);
- }
- }
- }
-
- // propagate this statistic to all layers without scale in primitives
- if (!currentStat->_maxOutputs.empty() && !currentStat->_minOutputs.empty()) {
- std::vector<CNNLayer::Ptr> toAnalyze;
- toAnalyze.push_back(l);
- while (!toAnalyze.empty()) {
- CNNLayer::Ptr tl = toAnalyze.back();
- toAnalyze.pop_back();
- newMap[tl->name] = currentStat;
- if (tl->outData.size() == 1) {
- for (auto it : tl->outData[0]->getInputTo()) {
- if (CaselessEq<std::string>()(it.second->type, "pooling") ||
- CaselessEq<std::string>()(it.second->type, "relu") ||
- CNNNetworkInt8Normalizer::isReLULikeClamp(it.second)) {
- toAnalyze.push_back(it.second);
- }
- }
- }
- }
- }
- }
-
- internalNodesStats_ = newMap;
-}
-
-void CNNNetworkInt8Normalizer::AddLayerToCNNNetworkBeforeLayer(CNNLayer::Ptr newLayer, CNNLayer::Ptr successor,
- size_t port) {
- // verify if data exists
- if (newLayer && successor && successor->insData.size() > port) {
- // get the insData
- DataPtr pData = successor->insData[port].lock();
-
- Data* edge2 = new Data(*pData.get());
- DataPtr newEdge(edge2);
- newEdge->getInputTo().clear();
- newEdge->getInputTo()[successor->name] = successor;
- newEdge->setName(newLayer->name);
- newEdge->getCreatorLayer() = newLayer;
- successor->insData[port] = newEdge;
- newLayer->outData.push_back(newEdge);
-
- newLayer->insData.push_back(pData);
- pData->getInputTo().erase(successor->name);
- pData->getInputTo()[newLayer->name] = newLayer;
- } else {
- THROW_IE_EXCEPTION << "Invalid argument";
- }
-}
-
-CNNLayer::Ptr CNNNetworkInt8Normalizer::addU8ToI8Conversion(DataPtr data, CNNLayer::Ptr successor,
- CNNStatisticHelper& statHelper) {
- if (data->getPrecision() == Precision::U8 || data->getPrecision() == Precision::I8) {
- size_t c = static_cast<size_t>(data->getDims()[1]);
-
- std::vector<float> ssWValues;
- std::vector<float> ssSValues;
- for (auto i = 0; i < c; i++) {
- ssWValues.push_back(1.0f);
- ssSValues.push_back(0.0f);
- }
- std::string layerName = data->getCreatorLayer().lock()->name + "_Eltwise_ScaleShift_U8I8_" + successor->name;
- CNNLayer::Ptr newLayer = createDWConvolutionForScale(layerName, c, ssWValues.data(), ssSValues.data());
- newLayer->precision = Precision::I8;
-
- for (size_t i = 0; i < successor->insData.size(); i++) {
- if (successor->insData[i].lock() == data) {
- AddLayerToCNNNetworkBeforeLayer(newLayer, successor, i);
-
- // update statistic to pass quantization smoothly
- if (newLayer->insData[0].lock() == nullptr)
- continue;
- std::string inputLayerName = newLayer->insData[0].lock()->getCreatorLayer().lock()->name;
- statHelper.copyStatistics(inputLayerName, layerName);
- if (data->getPrecision() == Precision::U8) {
- newLayer->outData[0]->setPrecision(Precision::I8);
- } else {
- newLayer->outData[0]->setPrecision(Precision::U8);
- }
- }
- }
- return newLayer;
- }
- return nullptr;
-}
-
-void CNNNetworkInt8Normalizer::AddLayerToCNNNetworkAfterData(DataPtr pData, CNNLayer::Ptr layer,
- const std::string& nextLayerName) {
- // verify if data exists
- if (pData && layer && pData->getCreatorLayer().lock() &&
- pData->getInputTo().find(nextLayerName) != pData->getInputTo().end()) {
- CNNLayerPtr nextLayer = pData->getInputTo()[nextLayerName];
-
- DataPtr newEdgeAfterLayer(new Data(*pData.get()));
- newEdgeAfterLayer->setName(layer->name);
- newEdgeAfterLayer->getCreatorLayer() = layer;
- newEdgeAfterLayer->getInputTo().clear();
- newEdgeAfterLayer->getInputTo()[nextLayerName] = nextLayer;
- newEdgeAfterLayer->setPrecision(Precision::FP32);
-
- pData->getInputTo().erase(nextLayerName);
- pData->getInputTo()[layer->name] = layer;
-
- layer->insData.push_back(pData);
- layer->outData.push_back(newEdgeAfterLayer);
-
- for (size_t i = 0; i < nextLayer->insData.size(); i++) {
- if (nextLayer->insData[i].lock() == pData) {
- nextLayer->insData[i] = newEdgeAfterLayer;
- }
- }
- } else {
- THROW_IE_EXCEPTION << "Invalid argument";
- }
-}
-
-void CNNNetworkInt8Normalizer::fillInScaleShift(ScaleShiftLayer* scshLayer, size_t c, float* weightsN,
- float* weightsD) {
- // Setting "scales"
- SizeVector weightsSize = {c};
- TensorDesc weightsDesc(Precision::FP32, weightsSize, InferenceEngine::C);
- scshLayer->_weights = InferenceEngine::make_shared_blob<float>(weightsDesc);
- scshLayer->_weights->allocate();
- float* weightsData = scshLayer->_weights->buffer();
- for (size_t i = 0; i < c; i++) {
- if (weightsN == nullptr && weightsD != nullptr) {
- weightsData[i] = 1.0 / weightsD[i];
- } else if (weightsD == nullptr && weightsN != nullptr) {
- weightsData[i] = weightsN[i];
- } else if (weightsN != nullptr && weightsD != nullptr) {
- weightsData[i] = weightsN[i] / weightsD[i];
- } else {
- weightsData[i] = 1.0;
- }
- }
-
- // Setting "shifts"
- SizeVector shiftsSize = {c};
- TensorDesc shiftsDesc(Precision::FP32, shiftsSize, InferenceEngine::C);
- scshLayer->_biases = InferenceEngine::make_shared_blob<float>(shiftsDesc);
- scshLayer->_biases->allocate();
- float* biasesData = scshLayer->_biases->buffer();
- for (size_t i = 0; i < c; i++) {
- biasesData[i] = 0.f; // Setting to constant "0"
- }
-}
-
-void CNNNetworkInt8Normalizer::AddScaleShiftBetween(CNNNetwork& net, const CNNLayerPtr layer1, const CNNLayerPtr layer2,
- CNNStatisticHelper& statHelper) {
- if (CaselessEq<std::string>()(layer2->type, "priorbox") ||
- CaselessEq<std::string>()(layer2->type, "priorboxclustered")) {
- return;
- }
-
- // Searching the connection between the layers
- int l1_out_i = 0;
- for (; l1_out_i < layer1->outData.size(); l1_out_i++) {
- if (layer1->outData[l1_out_i]->getInputTo().find(layer2->name) !=
- layer1->outData[l1_out_i]->getInputTo().end()) {
- break;
- }
- }
- if (l1_out_i == layer1->outData.size()) {
- THROW_IE_EXCEPTION << "Can't find layer " << layer2->name << " among layer " << layer1->name << " outputs";
- }
-
- int l2_in_i = 0;
- for (; l2_in_i < layer2->insData.size(); l2_in_i++) {
- if (layer2->insData[l2_in_i].lock() != nullptr
- && layer2->insData[l2_in_i].lock()->getCreatorLayer().lock() == layer1) {
- break;
- }
- }
- if (l2_in_i == layer2->insData.size()) {
- THROW_IE_EXCEPTION << "Can't find layer " << layer2->name << " among layer " << layer1->name << " inputs";
- }
-
- DataPtr outData = layer1->outData[l1_out_i];
-
- Blob::Ptr oScaleBlob = nullptr;
- if (layer1->blobs.find("o-scale") != layer1->blobs.end()) {
- oScaleBlob = layer1->blobs["o-scale"];
- }
-
- Blob::Ptr iScaleBlob = nullptr;
- if (layer2->blobs.find("i-scale") != layer2->blobs.end()) {
- iScaleBlob = layer2->blobs["i-scale"];
- }
-
- if (iScaleBlob == nullptr && oScaleBlob == nullptr) {
- return; // No multipliers found around this edge. We can't create a ScaleShift here;
- } else {
- // Creating a ScaleShiftLayer
- std::string prefix;
- float *iScaleBuffer = nullptr, *oScaleBuffer = nullptr;
- if (oScaleBlob != nullptr) {
- oScaleBuffer = static_cast<float*>(oScaleBlob->buffer());
- prefix += "o";
- }
- if (iScaleBlob != nullptr) {
- iScaleBuffer = static_cast<float*>(iScaleBlob->buffer());
- prefix += "i";
- }
-
- std::string layerName = layer1->name + "_" + prefix + "ScaleShift_" + layer2->name;
- LayerParams ssCnnLayerParams {layerName, "ScaleShift", Precision::FP32};
- CNNLayerPtr ssCnnLayer(new ScaleShiftLayer(ssCnnLayerParams));
-
- AddLayerToCNNNetworkAfterData(outData, ssCnnLayer, layer2->name);
-
- size_t c = static_cast<size_t>(outData->getDims()[1]);
-
- {
- ScaleShiftLayer* scshLayer = dynamic_cast<ScaleShiftLayer*>(ssCnnLayer.get());
- if (scshLayer == nullptr) {
- THROW_IE_EXCEPTION << "Layer " << ssCnnLayer->name << " is not instance of ScaleShiftLayer class";
- }
- fillInScaleShift(scshLayer, c, oScaleBuffer, iScaleBuffer);
- }
-
- Precision odPrecision = Precision::FP32;
- if (layer2->precision == Precision::I8) {
- odPrecision = statHelper.hasNegativeOutput(layer1->name) ? Precision::I8 : Precision::U8;
- }
- ssCnnLayer->outData[0]->setPrecision(odPrecision);
- }
-}
-
-void CNNNetworkInt8Normalizer::AddScaleShifts(CNNNetwork& net, CNNStatisticHelper& statHelper) {
- std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(net);
-
- std::vector<std::pair<CNNLayerPtr, CNNLayerPtr>> pairs;
-
- for (auto iter : sortedLayers) {
- for (int l1_out_i = 0; l1_out_i < iter->outData.size(); l1_out_i++) {
- for (auto nextIter : iter->outData[l1_out_i]->getInputTo()) {
- CNNLayer::Ptr next = nextIter.second;
-
- // Checking for an INT8 convolution or fully connected with FP32 output
- if ((CaselessEq<std::string>()(iter->type, "Convolution") ||
- CaselessEq<std::string>()(iter->type, "FullyConnected")) &&
- iter->precision == Precision::I8 && next->precision == Precision::FP32 &&
- iter->outData[l1_out_i]->getPrecision() == Precision::FP32) {
- // Do nothing here only if iter provides data to fp32 layers
- // MKLDNNPlugin will generate x8->f32 convolution
-
- } else if ((iter->precision != Precision::FP32 && next->precision == Precision::FP32) ||
- (iter->precision == Precision::FP32 && next->precision != Precision::FP32)) {
- pairs.push_back(std::pair<CNNLayerPtr, CNNLayerPtr>(iter, next));
- }
- }
- }
- }
-
- for (auto& pair : pairs) {
- AddScaleShiftBetween(net, pair.first, pair.second, statHelper);
- }
-}
-
-void CNNNetworkInt8Normalizer::ClampsToReLU(CNNNetwork& net, CNNStatisticHelper& statHelper) {
- std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(net);
-
- for (auto iter : sortedLayers) {
- if (isReLULikeClamp(iter) && (iter->precision == Precision::I8 || iter->precision == Precision::U8)) {
- std::string layerName = iter->name + "_ReLU";
- LayerParams ssCnnLayerParams {layerName, "ReLU", iter->precision};
- CNNLayerPtr ssCnnLayer(new ReLULayer(ssCnnLayerParams));
-
- auto previousLayer = iter->insData[0].lock()->getCreatorLayer().lock();
- ssCnnLayer->insData.push_back(iter->insData[0]);
- if (ssCnnLayer->insData[0].lock() == nullptr)
- continue;
- ssCnnLayer->insData[0].lock()->getInputTo().erase(iter->name);
- ssCnnLayer->insData[0].lock()->getInputTo()[iter->name] = ssCnnLayer;
-
- ssCnnLayer->outData.push_back(iter->outData[0]);
- ssCnnLayer->outData[0]->getCreatorLayer() = ssCnnLayer;
-
- iter->insData.clear();
- iter->outData.clear();
- }
- }
-}
-
-void CNNNetworkInt8Normalizer::ScaleDataToInt(const float* srcData, size_t srcSize, Blob::Ptr int8blob,
- const std::vector<float>& scales) {
- if (scales.size() == 0 || /*srcblob->size()*/ srcSize % scales.size() != 0) {
- THROW_IE_EXCEPTION << "Wrong number of scale factors";
- }
-
- size_t channels = scales.size();
- size_t channelSize = /*srcblob->size()*/ srcSize / channels;
-
- const float* data = srcData;
- if (int8blob->getTensorDesc().getPrecision() == Precision::I8) {
- int8_t* int8data = static_cast<int8_t*>(int8blob->buffer());
- int minValue = std::numeric_limits<int8_t>::min();
- int maxValue = std::numeric_limits<int8_t>::max();
-
- size_t offset;
-
- float val;
-
- for (size_t ch = 0; ch < channels; ch++) {
- offset = channelSize * ch;
-
- for (size_t i = 0; i < channelSize; i++) {
- val = data[offset + i] * scales[ch];
-
- if (val > maxValue) {
- val = maxValue;
- } else if (val < minValue) {
- val = minValue;
- }
-
- int8data[offset + i] = round(val);
- }
- }
- } else if (int8blob->getTensorDesc().getPrecision() == Precision::I32) {
- int32_t* int32data = static_cast<int32_t*>(int8blob->buffer());
- int maxValue = std::numeric_limits<int32_t>::max();
- int minValue = std::numeric_limits<int32_t>::min();
-
- size_t offset;
-
- float val;
-
- for (size_t ch = 0; ch < channels; ch++) {
- offset = channelSize * ch;
-
- for (size_t i = 0; i < channelSize; i++) {
- val = data[offset + i] * scales[ch];
-
- if (val > maxValue) {
- val = maxValue;
- } else if (val < minValue) {
- val = minValue;
- }
-
- int32data[offset + i] = round(val);
- }
- }
- }
-}
-
-CNNLayer::Ptr CNNNetworkInt8Normalizer::createDWConvolutionForScale(const std::string& layerName, size_t channels,
- float* ssWValues, float* ssSValues) {
- // create new Convolution layer
- LayerParams params;
- params.name = layerName;
- params.precision = Precision::FP32;
- params.type = "Convolution";
-
- CNNLayerPtr lptr = std::make_shared<ConvolutionLayer>(params);
- auto* pConv = dynamic_cast<ConvolutionLayer*>(lptr.get());
- if (pConv == nullptr) {
- THROW_IE_EXCEPTION << "Layer " << lptr->name << " is not instance of ConvolutionLayer class";
- }
-
- pConv->_kernel.insert(X_AXIS, 1);
- pConv->_kernel.insert(Y_AXIS, 1);
- pConv->_stride.insert(X_AXIS, 1);
- pConv->_stride.insert(Y_AXIS, 1);
- pConv->_padding.insert(X_AXIS, 0);
- pConv->_padding.insert(Y_AXIS, 0);
- pConv->_pads_end.insert(X_AXIS, 0);
- pConv->_pads_end.insert(Y_AXIS, 0);
- pConv->_dilation.insert(X_AXIS, 1);
- pConv->_dilation.insert(Y_AXIS, 1);
-
- pConv->_out_depth = channels;
- // mkl-dnn does not have i8 depthwise convolution accepting signed i8 input
- // when it is available, need to uncomment below lines
-
- // workaround - creation of new weights for simple convolution
- if (pConv->_out_depth % 16 == 0) {
- pConv->_group = pConv->_out_depth / 16;
- Blob::Ptr weights = nullptr;
- std::shared_ptr<Data> wData =
- std::shared_ptr<Data>(new Data("weights", {Precision::FP32, {pConv->_out_depth * 16}, Layout::C}));
- weights = CreateBlobFromData(wData);
- weights->allocate();
- float* buffer = weights->buffer().as<float*>();
- size_t iDist = 0, iSrc = 0;
- for (size_t g = 0; g < pConv->_group; g++) {
- for (size_t k = 0; k < 16; k++) {
- for (size_t s = 0; s < 16; s++) {
- buffer[iDist++] = (s == k) ? ssWValues[iSrc++] : 0.f;
- }
- }
- }
- pConv->_weights = weights;
- pConv->blobs["weights"] = weights;
- } else {
- Blob::Ptr weights = nullptr;
- std::shared_ptr<Data> wData = std::shared_ptr<Data>(
- new Data("weights", {Precision::FP32, {pConv->_out_depth * pConv->_out_depth}, Layout::C}));
- weights = CreateBlobFromData(wData);
- weights->allocate();
- float* buffer = weights->buffer().as<float*>();
- for (size_t i = 0, idx = 0; i < pConv->_out_depth; i++) {
- for (size_t j = 0; j < pConv->_out_depth; j++) {
- if (i == j) {
- buffer[idx] = ssWValues[i];
- } else {
- buffer[idx] = 0.f;
- }
- idx++;
- }
- }
- pConv->_weights = weights;
- pConv->blobs["weights"] = weights;
- pConv->_group = 1;
- }
- // end of workaround
-
- // fililng of biases
- Blob::Ptr biasesBlob = nullptr;
- std::shared_ptr<Data> bData =
- std::shared_ptr<Data>(new Data("biases", {Precision::FP32, {pConv->_out_depth}, Layout::C}));
- biasesBlob = CreateBlobFromData(bData);
- biasesBlob->allocate();
- float* bufferBiases = biasesBlob->buffer().as<float*>();
- for (size_t c = 0; c < pConv->_out_depth; c++) {
- bufferBiases[c] = ssSValues[c];
- }
- pConv->_biases = biasesBlob;
-
- pConv->blobs["weights"] = pConv->_weights;
- pConv->blobs["biases"] = pConv->_biases;
- return lptr;
-}
-
-void CNNNetworkInt8Normalizer::replaceScaleShiftByDWConvolution(CNNNetwork& net) {
- std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(net);
- for (auto layer : sortedLayers) {
- if (CaselessEq<std::string>()(layer->type, "scaleshift") &&
- layer->insData[0].lock()->getCreatorLayer().lock() &&
- !CaselessEq<std::string>()(layer->insData[0].lock()->getCreatorLayer().lock()->type, "input") &&
- layer->outData[0]->getInputTo().size() > 0) {
- const auto dims = layer->insData[0].lock()->getTensorDesc().getDims();
- // only four or five dimensions Convolution layers are supported
- if ((dims.size() == 4) || (dims.size() == 5)) {
- // verification if this layer does not pass data to PriorBox, if it passes, we do not substitute
- bool notToPriorBox = true;
- for (auto o : layer->outData[0]->getInputTo()) {
- if (CaselessEq<std::string>()(o.second->type, "priorbox") ||
- CaselessEq<std::string>()(o.second->type, "priorboxclustered")) {
- notToPriorBox = false;
- }
- }
- if (notToPriorBox) {
- ScaleShiftLayer* pSS = dynamic_cast<ScaleShiftLayer*>(layer.get());
- float* ssWValues = pSS->_weights->buffer().as<float*>();
- float* ssSValues = pSS->_biases->buffer().as<float*>();
- CNNLayer::Ptr newLayer = createDWConvolutionForScale(
- layer->name, layer->outData[0]->getTensorDesc().getDims()[1], ssWValues, ssSValues);
-
- newLayer->outData = layer->outData;
- newLayer->outData[0]->getCreatorLayer() = newLayer;
- newLayer->insData = layer->insData;
- if (newLayer->insData[0].lock() == nullptr)
- continue;
- newLayer->insData[0].lock()->getInputTo().erase(layer->name);
- newLayer->insData[0].lock()->getInputTo()[newLayer->name] = newLayer;
- }
- }
- }
- }
-}
-
-void CNNNetworkInt8Normalizer::QuantizeConvolutionOrFullyConnected(CNNLayer::Ptr target_layer,
- CNNStatisticHelper& statHelper) {
- size_t inputChannels = target_layer->insData[0].lock()->getTensorDesc().getDims()[1];
- size_t outputChannels = target_layer->outData[0]->getTensorDesc().getDims()[1];
-
- auto iScale = statHelper.getInputScale(target_layer);
- if (iScale == nullptr)
- THROW_IE_EXCEPTION << "Layer '" << target_layer->name << "'has invalid scale";
-
- target_layer->blobs["i-scale"] = iScale;
-
- Blob::Ptr weights = nullptr;
- Blob::Ptr biases = nullptr;
-
- Blob::Ptr int8weights = nullptr;
- Blob::Ptr int32biases = nullptr;
-
- if (target_layer->blobs.find("weights") != target_layer->blobs.end()) {
- weights = target_layer->blobs["weights"];
-
- // Creating int8 weights blob
- std::shared_ptr<Data> int8WeightsData =
- std::shared_ptr<Data>(new Data("weights", TensorDesc(Precision::I8, weights->getTensorDesc().getDims(),
- weights->getTensorDesc().getLayout())));
- int8weights = CreateBlobFromData(int8WeightsData);
- int8weights->allocate();
- target_layer->blobs["weights"] = int8weights;
- }
-
- if (target_layer->blobs.find("biases") != target_layer->blobs.end()) {
- biases = target_layer->blobs["biases"];
-
- // Creating int8 biases blob
- std::shared_ptr<Data> int32BiasesData =
- std::shared_ptr<Data>(new Data("biases", TensorDesc(Precision::I32, biases->getTensorDesc().getDims(),
- biases->getTensorDesc().getLayout())));
- int32biases = CreateBlobFromData(int32BiasesData);
- int32biases->allocate();
- target_layer->blobs["biases"] = int32biases;
- }
-
- std::vector<float> weightScalers;
-
- // Creating w-scale blob
- if (weights) {
- const float* weight = static_cast<const float*>(weights->buffer());
-
- ConvolutionLayer* pConv1 = dynamic_cast<ConvolutionLayer*>(target_layer.get());
-
- if (pConv1 != nullptr && pConv1->_group == 0) {
- THROW_IE_EXCEPTION << "Convolution '" << target_layer->name << "'has wrong groups number == 0";
- }
- int group = 1;
- if (pConv1 != nullptr && pConv1->_group != 1) {
- group = pConv1->_group;
- }
-
- std::vector<float> newWeights; // "new" weights are weights multiplied by i-scale
-
- size_t W_CO = outputChannels / group, W_CI = inputChannels / group,
- W_HW = weights->size() / W_CI / W_CO / group;
-
- {
- float* iScaleMemory = static_cast<float*>(iScale->buffer());
- for (size_t g = 0; g < group; g++) {
- for (size_t co = 0; co < W_CO; co++) {
- for (size_t ci = 0; ci < W_CI; ci++) {
- size_t kernelBase = g * W_CO * W_CI * W_HW + co * W_CI * W_HW + ci * W_HW;
- for (size_t hw = 0; hw < W_HW; hw++) {
- newWeights.push_back(weight[kernelBase + hw] * iScaleMemory[g * W_CI + ci]);
- }
- }
- }
- }
- }
- if (newWeights.empty())
- THROW_IE_EXCEPTION << "Could not quantize layer '" << target_layer->name << "'. Invalid layer parameters.";
- size_t outChannelSize = weights->getTensorDesc().getDims().back() / W_CO / group;
-
- // Calculating weights normalization scale factor (w-scale)
-
- std::set<double> individualsG;
- size_t co;
- float* weight_convolution;
- bool bwquantized = false;
- double symQuant = 0.f;
-
- for (co = 0, weight_convolution = &newWeights[0]; co < outputChannels;
- co++, weight_convolution += outChannelSize) {
- for (size_t i = 0; i < outChannelSize && individualsG.size() < 256; i++) {
- individualsG.insert(static_cast<double>(weight_convolution[i]));
- }
- }
- // If we have 256 quantums for all filters in convolution, it can be already int8 quantized weights
- // We can support symmetric quantization
- // Below conditions verify if weights are symmetric quantized around 0, what are min/max borders
- // These parameters are required to repeat exactly the same quantum as model was trained
- // The algorithm of restoring min/max parameters has couple assumptions which might not work for 100%
- // cases. We want to explicitly define them. We assume that
- // 1. All convolutions have 1st quantum either from positive or negative side. See how we calculate symQuant
- // 2. If quantization is not symmetric, there should be quant on one of the side which demonstrate this
- if (individualsG.size() < 256) {
- // going over weights and verify that weights stay on quant positions
- std::set<double> intervals;
- double prev = 0.f;
- for (auto it = individualsG.begin(); it != individualsG.end(); it++) {
- if (prev) {
- intervals.insert(*it - prev);
- }
- prev = *it;
- }
- if (!intervals.empty()) {
- symQuant = *(intervals.begin());
- }
- std::set<double> divs;
- if (symQuant != 0.) {
- prev = 0.f;
- for (auto it = individualsG.begin(); it != individualsG.end(); it++) {
- if (prev) {
- divs.insert((*it - prev) / symQuant);
- }
- prev = *it;
- }
- }
-
- bwquantized = true;
- for (auto it3 = divs.begin(); it3 != divs.end(); it3++) {
- if (fabs(round(*it3) - *it3) > 0.001) {
- bwquantized = false;
- }
- }
-
- // we want to make sure that quantization is symmetric. this way we are looking for the
- // value in weights matching to the quant (positive or negative
- if (bwquantized) {
- // take the minimal and maximum values on calculated symQuant and compare with data from individuals
- double minCalc = symQuant * -128.0f;
- double maxCalc = symQuant * 128.0f;
- for (auto it = individualsG.begin(); it != individualsG.end(); it++) {
- if (*it < minCalc || *it > maxCalc) {
- bwquantized = false;
- }
- }
- }
- }
- if (bwquantized && symQuant != 0.0f) {
- float max = symQuant * 127.0f;
- for (co = 0, weight_convolution = &newWeights[0]; co < outputChannels;
- co++, weight_convolution += outChannelSize) {
- float scaler = static_cast<float>(statHelper.getMaxSignValue()) / max;
- weightScalers.push_back(scaler);
- }
- } else {
- for (co = 0, weight_convolution = &newWeights[0]; co < outputChannels;
- co++, weight_convolution += outChannelSize) {
- float max = FLT_MIN;
- DataStats::GetDataAbsMax(weight_convolution, outChannelSize, max);
-
- float scaler = static_cast<float>(statHelper.getMaxSignValue()) / max;
- weightScalers.push_back(scaler);
- }
- }
-
- std::shared_ptr<Data> wScaleData =
- std::shared_ptr<Data>(new Data("w-scale", {Precision::FP32, {outputChannels}, Layout::C}));
- auto wScale = CreateBlobFromData(wScaleData);
- wScale->allocate();
-
- float* wScaleMemory = static_cast<float*>(wScale->buffer());
-
- for (size_t i = 0; i < outputChannels; i++) {
- wScaleMemory[i] = 1.0 / weightScalers[i];
- }
- target_layer->blobs["w-scale"] = wScale;
-
- auto oScale = statHelper.getOutputScale(statHelper.getLatestInFuse(target_layer));
- if (oScale) {
- // there might not be o-scale if we do not have statistic after convolution that means
- // returning to float precision after convolution
- target_layer->blobs["o-scale"] = oScale;
-
- // debug scales. Need to compare with actual values in FP32 scoring
- target_layer->blobs["ext-scale"] = target_layer->blobs["o-scale"];
- } else {
- // we do not have statistics here, we cannot calculate requantizatin scales,
- // next layer will be calculated in fp32
- // it's time to return forcedly edge to fp32 as well
- target_layer->outData[0]->setPrecision(Precision::FP32);
- }
-
- // Normalizing the weights
- ScaleDataToInt(&newWeights[0], weights->size(), int8weights, weightScalers);
- }
-
- // Normalizing the biases
- if (biases) {
- const float* bias = static_cast<const float*>(biases->buffer());
- ScaleDataToInt(bias, biases->size(), int32biases, weightScalers);
- }
-}
-
-bool CNNNetworkInt8Normalizer::layerProducesFloat(const CNNLayer::Ptr layer) {
- // currently we support only case of layers which have one output port
- if (layer->outData.size() > 1) {
- return false;
- }
-
- bool consumersFP32 = true;
- for (const auto dOut : layer->outData[0]->getInputTo()) {
- if (dOut.second->precision != Precision::FP32) {
- consumersFP32 = false;
- }
- }
- return consumersFP32;
-}
-
-void CNNNetworkInt8Normalizer::returnTailToFP32(const CNNLayer::Ptr layer) {
- std::set<CNNLayer::Ptr> layersToReturn;
- if (layerProducesFloat(layer)) {
- layersToReturn.insert(layer);
- }
-
- while (!layersToReturn.empty()) {
- CNNLayer::Ptr layerA = *layersToReturn.begin();
- layersToReturn.erase(layerA);
- // 1. if it is Pooling layer, or concat layer, we can return it to FP32 as well
- // we need to return it's out data
- if ((CaselessEq<std::string>()(layerA->type, "pooling") || CaselessEq<std::string>()(layerA->type, "concat")) &&
- layerA->outData.size() == 1) {
- layerA->precision = Precision::FP32;
- layerA->outData[0]->setPrecision(Precision::FP32);
- }
-
- if ((CaselessEq<std::string>()(layerA->type, "convolution") ||
- CaselessEq<std::string>()(layerA->type, "fullyconnected") ||
- CaselessEq<std::string>()(layerA->type, "relu") || isReLULikeClamp(layerA)) &&
- layerA->outData.size() == 1) {
- layerA->outData[0]->setPrecision(Precision::FP32);
- if (CaselessEq<std::string>()(layerA->type, "relu")
- && layerA->insData[0].lock() != nullptr
- && canLayerBeI8(layerA->insData[0].lock()->getCreatorLayer().lock())) {
- layerA->precision = Precision::FP32;
- layerA->insData[0].lock()->getCreatorLayer().lock()->outData[0]->setPrecision(Precision::FP32);
- }
- }
-
- // adding parents for analysis
- if (!CaselessEq<std::string>()(layerA->type, "convolution") &&
- !CaselessEq<std::string>()(layerA->type, "fullyconnected")) {
- // for all parents, if they produce data to only FP32 layers
- for (auto i : layerA->insData) {
- DataPtr d = i.lock();
- if (d != nullptr && d->getCreatorLayer().lock()->precision != Precision::FP32 &&
- (CaselessEq<std::string>()(layerA->type, "pooling") ||
- CaselessEq<std::string>()(layerA->type, "relu") || isReLULikeClamp(layerA) ||
- CaselessEq<std::string>()(layerA->type, "concat"))) {
- if (layerProducesFloat(d->getCreatorLayer().lock())) {
- layersToReturn.insert(d->getCreatorLayer().lock());
- }
- }
- }
- }
- }
-}
-
-bool CNNNetworkInt8Normalizer::canLayerBeI8(const CNNLayer::Ptr& layer) {
- // fusion can happen only if initial layer supplies data to only one layer
- // if it sends to several layers - it is safe to execute initial layer in any precision
- if (layer->outData[0]->getInputTo().size() == 1) {
- std::string aType = layer->outData[0]->getInputTo().begin()->second->type;
- if (CaselessEq<std::string>()(aType, "relu")) {
- return true;
- } else if (CaselessEq<std::string>()(aType, "clamp")) {
- if (!isReLULikeClamp(layer->outData[0]->getInputTo().begin()->second)) {
- return false;
- }
- } else {
- static const InferenceEngine::details::caseless_set<std::string> nonSuportedActivations = {
- "elu", "clamp", "tanh", "logistic", "square", "abs",
- "sqrt", "linear", "bounded_elu", "sort_relu", "relu6"};
- return nonSuportedActivations.find(aType) == nonSuportedActivations.end();
- }
- }
- return true;
-}
-
-bool CNNNetworkInt8Normalizer::isNextFusionAllowed(const CNNLayer::Ptr& layer) {
- // fusion can happen only if initial layer supplies data to only one layer
- // if it sends to several layers - it is safe to execute initial layer in any precision
- if (layer->outData[0]->getInputTo().size() == 1) {
- std::string aType = layer->outData[0]->getInputTo().begin()->second->type;
- if (CaselessEq<std::string>()(aType, "relu")) {
- ReLULayer* rL = dynamic_cast<ReLULayer*>(layer->outData[0]->getInputTo().begin()->second.get());
- if (rL == nullptr) {
- THROW_IE_EXCEPTION << "Layer " << layer->outData[0]->getInputTo().begin()->second->name
- << " is not instance of ReLULayer class";
- }
- if (rL->negative_slope != 0.f) {
- return false;
- }
- } else if (CaselessEq<std::string>()(aType, "clamp")) {
- if (!isReLULikeClamp(layer->outData[0]->getInputTo().begin()->second)) {
- return false;
- }
- } else {
- static const InferenceEngine::details::caseless_set<std::string> nonSuportedActivations = {
- "elu", "clamp", "tanh", "logistic", "square", "abs",
- "sqrt", "linear", "bounded_elu", "sort_relu", "relu6"};
- return nonSuportedActivations.find(aType) == nonSuportedActivations.end();
- }
- } else {
- if (CaselessEq<std::string>()(layer->type, "eltwise")) {
- return false;
- }
- }
- return true;
-}
-
-bool CNNNetworkInt8Normalizer::isReLULikeClamp(CNNLayer::Ptr layer) {
- if (CaselessEq<std::string>()(layer->type, "Clamp")) {
- ClampLayer* clamp = dynamic_cast<ClampLayer*>(layer.get());
- if (clamp == nullptr) {
- THROW_IE_EXCEPTION << "Int8 Normalizer error: cannot cast layer '" << layer->name << "' to Clamp";
- }
- return clamp->min_value == 0;
- }
- return false;
-}
-
-void CNNNetworkInt8Normalizer::DefinesExecutionPrecision(CNNNetwork& net, CNNStatisticHelper& statHelper) {
- std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(net);
-
- // Converting layers to Int8. Calculating the multipliers if needed
- for (auto iter : sortedLayers) {
- if (iter->params.find("quantization_level") != iter->params.end() &&
- (iter->params["quantization_level"] == "FP32" || iter->params["quantization_level"] == "FP16")) {
- continue;
- }
-
- // Legacy: FullyConnected should not be converted to Int8,
- // if it isn't explicitly marked to.
- if (iter->params.find("quantization_level") == iter->params.end() &&
- CaselessEq<std::string>()(iter->type, "fullyconnected")) {
- continue;
- }
-
- if (!statHelper.canLayerBeQuantized(iter)) {
- continue;
- }
-
- if (CaselessEq<std::string>()(iter->type, "convolution") ||
- CaselessEq<std::string>()(iter->type, "fullyconnected")) {
- if (canLayerBeI8(iter)) {
- iter->precision = Precision::I8;
- // we will override I8 to U8 during analysing of Conv-ReLU and Conv-Sum-ReLU fusions
- iter->outData[0]->setPrecision(Precision::I8);
- }
- } else if (CaselessEq<std::string>()(iter->type, "relu") || isReLULikeClamp(iter)) {
- // casting to ReLU
- ReLULayer* rL = dynamic_cast<ReLULayer*>(iter.get());
- DataPtr outData = iter->outData.size() ? iter->outData[0] : nullptr;
- auto inputData = iter->insData[0].lock();
- if (inputData && inputData->getCreatorLayer().lock()->precision != Precision::FP32 &&
- outData->getPrecision() == Precision::FP32) {
- iter->precision = Precision::I8;
- if (rL != nullptr && rL->negative_slope != 0.0f) {
- outData->setPrecision(Precision::I8);
- } else {
- outData->setPrecision(Precision::U8);
- // if convolution is a predecessor, change its data to U8 also
- CNNLayer::Ptr prevLayer = inputData->getCreatorLayer().lock();
- if (prevLayer && (CaselessEq<std::string>()(prevLayer->type, "convolution") ||
- CaselessEq<std::string>()(prevLayer->type, "fullyconnected") ||
- CaselessEq<std::string>()(prevLayer->type, "eltwise"))) {
- if (!isNextFusionAllowed(prevLayer) && inputData->getPrecision() == Precision::I8) {
- outData->setPrecision(Precision::I8);
- } else {
- inputData->setPrecision(Precision::U8);
- }
- }
- // if there is a patter A0 -> Eltwise -> ReLU and Convolution -> Eltwise -> ReLU,
- // need to mark data after conv as U8
- if (prevLayer && CaselessEq<std::string>()(prevLayer->type, "eltwise")) {
- // decising which input will be used for fusion conv-sum-relu
- CNNLayer::Ptr input1 = prevLayer->insData[0].lock()->getCreatorLayer().lock();
- CNNLayer::Ptr input2 = prevLayer->insData[1].lock()->getCreatorLayer().lock();
- CNNLayer::Ptr convLayer = nullptr;
- CNNLayer::Ptr sumLayer = nullptr;
-
- if (!CaselessEq<std::string>()(input1->type, "convolution")) {
- sumLayer = input1;
- convLayer = input2;
- } else {
- // it covers a case when both inputs are convolutions or when first input is not convolution
- convLayer = input1;
- sumLayer = input2;
- }
- convLayer->outData[0]->setPrecision(sumLayer->outData[0]->getPrecision());
- }
- }
- }
- } else if (CaselessEq<std::string>()(iter->type, "pooling")) {
- auto pool = dynamic_cast<PoolingLayer*>(iter.get());
- if (pool == nullptr) {
- THROW_IE_EXCEPTION << "Int8 Normalizer error: cannot cast layer '" << iter->name << "' to pooling";
- }
-
- if (pool->_type == PoolingLayer::MAX || (pool->_type == PoolingLayer::AVG && pool->outData.size() == 1)) {
- auto prevLayer = iter->insData[0].lock()->getCreatorLayer().lock();
- if (prevLayer && (prevLayer->precision == Precision::I8 || prevLayer->precision == Precision::U8)) {
- iter->precision = Precision::I8;
- iter->outData[0]->setPrecision(statHelper.hasNegativeOutput(iter->name) ? Precision::I8
- : Precision::U8);
- }
- }
- } else if (CaselessEq<std::string>()(iter->type, "concat")) {
- // we can do safe
- // casting to concat and take axis parameter
- // we can concat scales only if concat does concatination by feature maps
- bool axisFeatureMaps = false;
- auto concatLayer = dynamic_cast<ConcatLayer*>(iter.get());
- if (concatLayer) {
- if (concatLayer->_axis == 1 && concatLayer->insData.size() &&
- concatLayer->insData[0].lock()->getTensorDesc().getDims().size() == 4) {
- axisFeatureMaps = true;
- }
- } else {
- THROW_IE_EXCEPTION << "Int8 Normalizer error: cannot cast layer " << iter->name << " to concat";
- }
-
- if (axisFeatureMaps) {
- // verification of input data types
- bool inputFP32 = false;
- bool inputI8 = false;
- bool inputU8 = false;
-
- for (auto inputData : iter->insData) {
- auto data = inputData.lock();
- if (data->getPrecision() == Precision::FP32) {
- inputFP32 = true;
- } else if (data->getPrecision() == Precision::I8) {
- inputI8 = true;
- } else if (data->getPrecision() == Precision::U8) {
- inputU8 = true;
- } else {
- // Is it a case of input, i.e. passing I16 to concat?
- // TODO(amalyshe) to handle inputs as a separate usecase
- THROW_IE_EXCEPTION << "I8 normalizer: input data has unknown precision on the edge for concat: "
- << data->getName();
- }
- }
-
- if (inputFP32) {
- for (auto i : iter->insData) {
- if (i.lock()->getCreatorLayer().lock()->precision != Precision::FP32) {
- returnTailToFP32(i.lock()->getCreatorLayer().lock());
- }
- }
- } else {
- iter->precision = Precision::I8;
-
- // we set outpout precision to U8 only if all inputs are U8, in other case it will be I8
- auto outputPrecision = (inputU8 && !inputI8) ? Precision::U8 : Precision::I8;
-
- // if we have mixed input for I8 and U8, we have to insert scale to edges having U8 to convert to I8
- // Yes, it leads to loosing of some precision and might lead to some performance degradation
- // until we have scale supporting s8/u8 input and s8/u8 output.
- if (inputU8 && inputI8) {
- // looking for all edges having U8
- for (size_t d = 0; d < iter->insData.size(); d++) {
- auto data = iter->insData[d].lock();
- if (data->getPrecision() == Precision::U8) {
- const size_t c = static_cast<size_t>(data->getDims()[1]);
- std::vector<float> ssWValues(c, 1.0f);
- std::vector<float> ssSValues(c, 0.0f);
-
- std::string layerName =
- data->getCreatorLayer().lock()->name + "_Concat_ScaleShift_U8I8_" + iter->name;
- CNNLayer::Ptr newLayer =
- createDWConvolutionForScale(layerName, c, ssWValues.data(), ssSValues.data());
- newLayer->precision = Precision::I8;
- AddLayerToCNNNetworkBeforeLayer(newLayer, iter, d);
-
- // update statistic to pass quantization smoothly
- std::string inputLayerName =
- newLayer->insData[0].lock()->getCreatorLayer().lock()->name;
- statHelper.copyStatistics(inputLayerName, layerName);
- newLayer->outData[0]->setPrecision(Precision::I8);
- }
- }
- }
-
- if (iter->outData.size() == 1) {
- for (auto&& out : iter->outData) {
- out->setPrecision(outputPrecision);
- }
- }
- }
- }
- } else if (CaselessEq<std::string>()(iter->type, "eltwise")) {
- // we decide which of the layers will be in int-8 mode and initialize special scale which will be used
- // later in "conv-sum-relu" fuse. i8 execution of eltwise always assume this fusion
- if (canLayerBeI8(iter)) {
- if (iter->insData.size() == 2) {
- CNNLayer::Ptr input1 = iter->insData[0].lock()->getCreatorLayer().lock();
- CNNLayer::Ptr input2 = iter->insData[1].lock()->getCreatorLayer().lock();
- if ((CaselessEq<std::string>()(input1->type, "convolution") ||
- CaselessEq<std::string>()(input2->type, "convolution")) &&
- !CaselessEq<std::string>()(input1->type, "concat") &&
- !CaselessEq<std::string>()(input2->type, "concat") && input1->precision != Precision::FP32 &&
- input2->precision != Precision::FP32) {
- // understand which layer will be used for sum
- CNNLayer::Ptr sumLayer = nullptr;
- CNNLayer::Ptr convLayer = nullptr;
-
- if (!CaselessEq<std::string>()(input1->type, "convolution")) {
- sumLayer = input1;
- convLayer = input2;
- } else {
- // it covers a case when both inputs are convolutions or when first input is not convolution
- sumLayer = input2;
- convLayer = input1;
- }
-
- // if we find supported activation, mark it's output as I8 or U8 depending on statistics
- if (iter->outData.size() == 1 && iter->outData[0]->getInputTo().size() == 1 &&
- (CaselessEq<std::string>()(iter->outData[0]->getInputTo().begin()->second->type, "ReLU") ||
- CNNNetworkInt8Normalizer::isReLULikeClamp(
- iter->outData[0]->getInputTo().begin()->second))) {
- auto activation = iter->outData[0]->getInputTo().begin()->second;
- activation->precision = Precision::I8;
- if (!statHelper.hasNegativeOutput(statHelper.getLatestInFuse(convLayer)->name)) {
- activation->outData[0]->setPrecision(Precision::U8);
- iter->outData[0]->setPrecision(Precision::U8);
- } else {
- activation->outData[0]->setPrecision(Precision::I8);
- iter->outData[0]->setPrecision(Precision::I8);
- }
- } else {
- iter->outData[0]->setPrecision(Precision::I8);
- }
-
- if (convLayer->outData[0]->getTensorDesc().getPrecision() == Precision::I8) {
- // verify precision on input edges before and after eltwise fusion
- // if we have i8/u8 missmatch between sum layer input and conv-sum-activation output,
- // then in this case we have to add requantization to i8 on sum input edge
- auto latestInFuse = statHelper.getLatestInFuse(convLayer);
- if (latestInFuse->outData[0]->getTensorDesc().getPrecision() == Precision::I8) {
- if (input1 == sumLayer &&
- iter->insData[0].lock()->getTensorDesc().getPrecision() == Precision::U8) {
- sumLayer = addU8ToI8Conversion(iter->insData[0].lock(), iter, statHelper);
- } else if (input2 == sumLayer &&
- iter->insData[1].lock()->getTensorDesc().getPrecision() == Precision::U8) {
- sumLayer = addU8ToI8Conversion(iter->insData[0].lock(), iter, statHelper);
- }
- if (!sumLayer) {
- THROW_IE_EXCEPTION << "I8 normalizer had to add U8->I8 conversion before "
- << iter->name << " but failed to do this";
- }
- }
-
- // mark eltwise as a I8 executable, mark out data as I8
- iter->precision = Precision::I8;
- convLayer->outData[0]->setPrecision(sumLayer->outData[0]->getPrecision());
- // calculate the only scale
- Blob::Ptr sumLayerScales = statHelper.getOutputScale(statHelper.getLatestInFuse(sumLayer));
- Blob::Ptr convLayerScales =
- statHelper.getOutputScale(statHelper.getLatestInFuse(convLayer));
- float* sumScale = sumLayerScales->buffer().as<float*>();
- float* convScale = convLayerScales->buffer().as<float*>();
- for (size_t i = 0; i < sumLayerScales->size(); i++) {
- sumScale[i] /= convScale[i];
- }
-
- iter->blobs["eltwise-sum-scale"] = sumLayerScales;
- }
- }
- }
- } else {
- // if there are convolutions are inputs to this eltwise, we forcedly move them to FP32
- for (auto i : iter->insData) {
- auto type = i.lock()->getCreatorLayer().lock()->type;
- if (CaselessEq<std::string>()(type, "convolution") ||
- CaselessEq<std::string>()(type, "fullyconnected")) {
- i.lock()->getCreatorLayer().lock()->precision = Precision::FP32;
- i.lock()->setPrecision(Precision::FP32);
- }
- }
- }
- } else if (CaselessEq<std::string>()(iter->type, "resample")) {
- iter->precision = Precision::I8;
- iter->outData[0]->setPrecision(iter->insData[0].lock()->getPrecision());
- }
- }
-
- // quantization of weights/biases
- sortedLayers = CNNNetSortTopologically(net);
- for (auto iter : sortedLayers) {
- if (iter->precision == Precision::I8 && (CaselessEq<std::string>()(iter->type, "convolution") ||
- CaselessEq<std::string>()(iter->type, "fullyconnected"))) {
- QuantizeConvolutionOrFullyConnected(iter, statHelper);
- }
- }
-
- // Returning of tails to FP32 mode if optimistic approach marked them as I8
- // no sense to do pooling in i8, we can return just after convolution
- for (auto iter : sortedLayers) {
- // TODO(amalyshe) here is a handling of case when iter provides data to the only one next layer
- // need to extend to cases when it provides data to many layers
- if (iter->precision == Precision::I8 && iter->outData.size() == 1) {
- if ((iter->outData[0]->getInputTo().size() == 1 &&
- iter->outData[0]->getInputTo().begin()->second->precision == Precision::FP32) ||
- iter->outData[0]->getInputTo().size() == 0) {
- returnTailToFP32(iter);
- }
- }
- }
-}
-
-void CNNNetworkInt8Normalizer::PropagateScaleFactors(CNNNetwork& net, const CNNStatisticHelper& statHelper) {
- std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(net);
-
- // Moving o-scales down
- for (auto iter : sortedLayers) {
- if (iter->type == "Concat" && iter->precision == Precision::I8) {
- // Checking if all inputs are INT8
- bool all_inputs_are_int8 = true;
- for (int k = 0; k < iter->insData.size(); k++) {
- auto prevKLayer = iter->insData[k].lock()->getCreatorLayer().lock();
- if ((prevKLayer->precision != Precision::I8 && prevKLayer->precision != Precision::U8) ||
- prevKLayer->blobs.find("i-concat-scale") == prevKLayer->blobs.end()) {
- all_inputs_are_int8 = false;
- break;
- }
- }
-
- if (all_inputs_are_int8) {
- // Merging o-scales of the inputs to make one for the Concat
- // Creating the o-scale for the Concat by concatenating the input concats
- size_t outputChannels = iter->outData[0]->getTensorDesc().getDims()[1];
-
- std::shared_ptr<Data> oScaleData =
- std::shared_ptr<Data>(new Data("o-scale", {Precision::FP32, {outputChannels}, Layout::C}));
- auto oScale = CreateBlobFromData(oScaleData);
- oScale->allocate();
-
- float* oScaleMemory = static_cast<float*>(oScale->buffer());
- int cc = 0;
- for (int in = 0; in < iter->insData.size(); in++) {
- auto prevOScale = iter->insData[in].lock()->getCreatorLayer().lock()->blobs["i-concat-scale"];
- float* prevOScaleMemory = static_cast<float*>(prevOScale->buffer());
-
- for (int c = 0; c < prevOScale->size(); c++) {
- oScaleMemory[cc] = prevOScaleMemory[c];
- cc++;
- }
- }
- if (cc != outputChannels)
- THROW_IE_EXCEPTION << "Size of o-scale after " << iter->name
- << " isn't equal to the channels count";
-
- iter->precision = Precision::I8;
- iter->blobs["o-scale"] = oScale;
- }
- }
-
- if (iter->blobs.find("o-scale") != iter->blobs.end()) {
- int int8Consumers = 0;
- int fp32Consumers = 0;
- if (iter->outData.size() > 1) {
- THROW_IE_EXCEPTION << "normalization algorithm for int8 found layer having o-scale and multiple ports";
- }
- if (iter->outData.size() == 1) {
- for (auto l : iter->outData[0]->getInputTo()) {
- if (l.second->precision == Precision::I8 || l.second->precision == Precision::U8) {
- if (CaselessEq<std::string>()(l.second->type, "Pooling") ||
- CaselessEq<std::string>()(l.second->type, "ReLU") ||
- CNNNetworkInt8Normalizer::isReLULikeClamp(l.second)) {
- l.second->blobs["o-scale"] = iter->blobs["o-scale"];
- // debug scales. Need to compare with actual values in FP32 scoring
- l.second->blobs["ext-scale"] = l.second->blobs["o-scale"];
- int8Consumers++;
- } else if (l.second->type == "Convolution") {
- l.second->blobs.erase("i-scale");
- int8Consumers++;
- } else if (CaselessEq<std::string>()(l.second->type, "Eltwise")) {
- if (statHelper.getLatestInFuse(iter) != iter) {
- l.second->blobs["o-scale"] = iter->blobs["o-scale"];
- }
- int8Consumers++;
- } else if ((l.second->precision == Precision::I8 || l.second->precision == Precision::U8) &&
- CaselessEq<std::string>()(l.second->type, "Resample")) {
- // If resample has concat as input layer it should inherit it's
- // output scale
- if (l.second->insData.size() == 1) {
- CNNLayerPtr creator = l.second->insData[0].lock()->getCreatorLayer().lock();
- if (CaselessEq<std::string>()(creator->type, "Concat")) {
- l.second->blobs["o-scale"] = creator->blobs["o-scale"];
- l.second->blobs["i-concat-scale"] = l.second->blobs["o-scale"];
- }
- }
-
- // No concat found, let use statistics
- if (l.second->blobs.find("o-scale") == l.second->blobs.end()) {
- auto oScale = statHelper.getOutputScale(l.second);
- l.second->blobs["o-scale"] = oScale;
- l.second->blobs["i-concat-scale"] = l.second->blobs["o-scale"];
- }
- int8Consumers++;
- } else if ((l.second->precision == Precision::I8) &&
- CaselessEq<std::string>()(l.second->type, "concat")) {
- // if concat is i8, we can propagate oscale further to concat.
- // The logic around o-scale assumes that if we have it in the layer after iteration
- // in this loop it means that it must not be removed and we need to place
- // scale. While for concat we return to one layer back and again need to analyze o-scale
- // and it is not clear if we need to return o-scale or it was only for concat.
- // Having all of this in mind, it's better to rename o-scale to i-concat-scale
- iter->blobs["i-concat-scale"] = iter->blobs["o-scale"];
- int8Consumers++;
- } else {
- fp32Consumers++;
- }
- } else if (CaselessEq<std::string>()(l.second->type, "priorbox") ||
- CaselessEq<std::string>()(l.second->type, "priorboxclustered")) {
- } else {
- // we are leaving o-scale still for adding of scale-shift before FP32 layer
- fp32Consumers++;
- }
- }
-
- if (iter->outData[0]->getInputTo().empty()) {
- fp32Consumers++;
- }
-
- if (CaselessEq<std::string>()(iter->type, "Convolution") ||
- CaselessEq<std::string>()(iter->type, "FullyConnected")) {
- if (int8Consumers) {
- iter->blobs["oi-scale"] = iter->blobs["o-scale"];
- } else {
- iter->outData[0]->setPrecision(Precision::FP32);
- }
- }
- if (!fp32Consumers) {
- iter->blobs.erase("o-scale");
- }
- }
- }
- }
-
- // fixing cornercases when o-scale was propagated through linear tail but it is more efficient to leave
- // conversion to de-normalized values in convolution
- for (auto iter : sortedLayers) {
- if (iter->blobs.find("o-scale") != iter->blobs.end()) {
- // go over out data. if all outputs are fp32, continue this optimization
- bool canOptimize = true;
-
- // current layer must not be convolution
- if (CaselessEq<std::string>()(iter->type, "convolution")) {
- canOptimize = false;
- }
- for (auto o : iter->outData) {
- for (auto ol : o->getInputTo()) {
- if (ol.second->precision == Precision::I8) {
- canOptimize = false;
- }
- }
- }
- if (!canOptimize) {
- continue;
- }
- // trying to go up until convolution
- auto curLayer = iter;
- bool eliminateOScale = true;
- while (curLayer && curLayer->blobs.find("oi-scale") == curLayer->blobs.end() && eliminateOScale) {
- if (curLayer->insData.size() == 1 && curLayer->insData[0].lock()->getCreatorLayer().lock() &&
- curLayer->insData[0].lock()->getCreatorLayer().lock()->outData.size() == 1 &&
- curLayer->insData[0].lock()->getInputTo().size() == 1) {
- curLayer = curLayer->insData[0].lock()->getCreatorLayer().lock();
- if (!CaselessEq<std::string>()(curLayer->type, "Pooling") &&
- !CaselessEq<std::string>()(curLayer->type, "ReLU") && !isReLULikeClamp(curLayer) &&
- !CaselessEq<std::string>()(curLayer->type, "Convolution")) {
- eliminateOScale = false;
- }
- } else {
- eliminateOScale = false;
- }
- }
- if (eliminateOScale && curLayer) {
- for (auto o : iter->outData) {
- o->setPrecision(Precision::FP32);
- }
- for (auto o : curLayer->outData) {
- o->setPrecision(Precision::FP32);
- }
-
- curLayer->blobs.erase("oi-scale");
- iter->blobs.erase("o-scale");
- auto iLayer = iter;
- while (iLayer != curLayer) {
- if (iLayer->type == "Pooling") {
- iLayer->precision = Precision::FP32;
- }
- iLayer = iLayer->insData[0].lock()->getCreatorLayer().lock();
- }
- }
- }
- }
-}
-
-std::string getBlobDimention(const Blob::Ptr blob) {
- size_t idx = blob->getTensorDesc().getDims().size();
-
- std::stringstream blobDimention;
- blobDimention << "[";
- for (auto& dim : blob->getTensorDesc().getDims()) {
- blobDimention << dim << ((--idx) != 0u ? ", " : "");
- }
- blobDimention << "]";
-
- return blobDimention.str();
-}
-
-void precisionColoring(const CNNLayerPtr layer, ordered_properties& printed_properties,
- ordered_properties& node_properties) {
- // looking for the w-scale
- if (layer->blobs.find("w-scale") != layer->blobs.end()) {
- printed_properties.insert(
- printed_properties.begin(),
- std::pair<std::string, std::string>("w-scale", getBlobDimention(layer->blobs.find("w-scale")->second)));
- }
-
- // looking for the oi-scale
- if (layer->blobs.find("oi-scale") != layer->blobs.end()) {
- printed_properties.insert(
- printed_properties.begin(),
- std::pair<std::string, std::string>("oi-scale", getBlobDimention(layer->blobs.find("oi-scale")->second)));
- }
-
- // looking for the o-scale
- if (layer->blobs.find("o-scale") != layer->blobs.end()) {
- printed_properties.insert(
- printed_properties.begin(),
- std::pair<std::string, std::string>("o-scale", getBlobDimention(layer->blobs.find("o-scale")->second)));
- }
- // looking for the i-scale
- if (layer->blobs.find("i-scale") != layer->blobs.end()) {
- printed_properties.insert(
- printed_properties.begin(),
- std::pair<std::string, std::string>("i-scale", getBlobDimention(layer->blobs.find("i-scale")->second)));
- }
-
- printed_properties.insert(
- printed_properties.begin(),
- std::pair<std::string, std::string>("Precision", layer->precision == Precision::FP32 ? "FP32" : "I8"));
-
- if (layer->precision == Precision::FP32) {
- node_properties.emplace_back("fillcolor", "#5A5DF0");
- } else {
- node_properties.emplace_back("fillcolor", "#20F608");
- }
-}
-
-void CNNNetworkInt8Normalizer::NormalizeNetwork(ICNNNetwork& network, ICNNNetworkStats& netStats) {
- CNNNetwork cnnn(ICNNNetwork::Ptr(&network, [](void*) {}));
-
- int maxSign = 0x7F;
- int maxUnsign = 0xFF;
-
- // Applying int8-conversion
- StatsMap statsMap = netStats.getNodesStats();
-
- CNNStatisticHelper statHelper(cnnn, statsMap, maxSign, maxUnsign);
-
- replaceScaleShiftByDWConvolution(cnnn);
-
- DefinesExecutionPrecision(cnnn, statHelper);
- PropagateScaleFactors(cnnn, statHelper);
- ClampsToReLU(cnnn, statHelper);
- AddScaleShifts(cnnn, statHelper);
-#ifndef NDEBUG
- std::ofstream file("i8_normalized.dot");
- saveGraphToDot(cnnn, file, precisionColoring);
-#endif
-}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "cnn_network_stats_impl.hpp"
-
-#include <ie_common.h>
-
-#include <cassert>
-#include <cfloat>
-#include <fstream>
-#include <map>
-#include <memory>
-#include <pugixml.hpp>
-#include <string>
-#include <vector>
-
-using namespace std;
-namespace InferenceEngine {
-namespace details {
-
-CNNNetworkStatsImpl::~CNNNetworkStatsImpl() {}
-
-void CNNNetworkStatsImpl::setNodesStats(const NetworkStatsMap& stats) {
- netNodesStats = stats;
-}
-
-const NetworkStatsMap& CNNNetworkStatsImpl::getNodesStats() const {
- return netNodesStats;
-}
-
-} // namespace details
-} // namespace InferenceEngine
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <float.h>
-#include <stdlib.h>
-
-#include <cstdint>
-#include <vector>
-
-#include "ie_api.h"
-
-class DataStats {
-public:
- template <typename T>
- static void GetDataMinMax(const T* data, size_t count, T& min, T& max);
-
- template <typename T>
- static void GetDataAverage(const T* data, size_t count, T& ave);
-
- template <typename T>
- static void GetDataAbsMax(const T* data, size_t count, T& max);
-
- template <typename T>
- static T GetAbsMax(T min, T max);
-};
-
-template <typename T>
-void DataStats::GetDataMinMax(const T* data, size_t count, T& min, T& max) {
- for (size_t i = 0; i < count; i++) {
- T val = data[i];
-
- if (min > val) {
- min = val;
- }
-
- if (max < val) {
- max = val;
- }
- }
-}
-
-template <typename T>
-void DataStats::GetDataAbsMax(const T* data, size_t count, T& max) {
- T min = FLT_MAX;
-
- GetDataMinMax(data, count, min, max);
-
- max = GetAbsMax(min, max);
-}
-
-template void DataStats::GetDataMinMax<float>(const float* data, size_t count, float& min, float& max);
-template void DataStats::GetDataMinMax<uint8_t>(const uint8_t* data, size_t count, uint8_t& min, uint8_t& max);
-
-template void DataStats::GetDataAbsMax<float>(const float* data, size_t count, float& max);
-
-template <typename T>
-void DataStats::GetDataAverage(const T* data, size_t count, T& ave) {
- ave = 0;
-
- for (size_t i = 0; i < count; i++) {
- ave += data[i];
- }
-
- ave /= count;
-}
-
-template void DataStats::GetDataAverage<float>(const float* data, size_t count, float& ave);
-
-template <typename T>
-T DataStats::GetAbsMax(T min, T max) {
- if (min < 0) {
- min *= -1;
- }
-
- if (max < 0) {
- max *= -1;
- }
-
- return (max > min) ? max : min;
-}
-
-template float DataStats::GetAbsMax<float>(float min, float max);
#include "details/os/os_filesystem.hpp"
#include "file_utils.h"
#include "graph_tools.hpp"
-#include "ie_icnn_network_stats.hpp"
#include "net_pass.h"
#include "precision_utils.h"
i++;
}
- InferenceEngine::ICNNNetworkStats* pstatsSrc = nullptr;
- if (StatusCode::OK != network.getStats(&pstatsSrc, nullptr)) {
- pstatsSrc = nullptr;
- }
// copy of the network
- details::CNNNetworkImplPtr net = cloneNet(layers, pstatsSrc);
+ details::CNNNetworkImplPtr net = cloneNet(layers);
// going over output layers and aligning output ports and outputs
OutputsDataMap outputs;
network.getOutputsInfo(outputs);
return net;
}
-details::CNNNetworkImplPtr cloneNet(const std::vector<CNNLayerPtr>& layers, const ICNNNetworkStats* networkStats) {
+details::CNNNetworkImplPtr cloneNet(const std::vector<CNNLayerPtr>& layers) {
auto net = std::make_shared<InferenceEngine::details::CNNNetworkImpl>();
// Src to cloned data map
net->resolveOutput();
- // cloning of statistics
- InferenceEngine::ICNNNetworkStats* pstatsTarget = nullptr;
- if (networkStats != nullptr && !networkStats->isEmpty()) {
- StatusCode st = net->getStats(&pstatsTarget, nullptr);
- if (st == StatusCode::OK && pstatsTarget) {
- pstatsTarget->setNodesStats(networkStats->getNodesStats());
- }
- }
-
return net;
}
return dataOffset;
}
-void UpdateStatisticsInfo(const InferenceEngine::ICNNNetwork& network, pugi::xml_node& netXml) {
- // If statistics exists, add it to the file
- ICNNNetworkStats* netNodesStats = nullptr;
- auto stats = netXml.append_child("statistics");
- auto resultCode = network.getStats(&netNodesStats, nullptr);
- if (resultCode != StatusCode::OK) {
- THROW_IE_EXCEPTION << InferenceEngine::details::as_status << resultCode
- << "Can't get statistics info for serialization of the model";
- }
- const NetworkStatsMap statsmap = netNodesStats->getNodesStats();
-
- auto joinCommas = [&](const std::vector<float>& v) -> std::string {
- std::string res;
-
- for (size_t i = 0; i < v.size(); ++i) {
- res += std::to_string(v[i]);
- if (i < v.size() - 1) {
- res += ", ";
- }
- }
-
- return res;
- };
-
- for (const auto& itStats : statsmap) {
- auto layer = stats.append_child("layer");
-
- layer.append_child("name").text().set(itStats.first.c_str());
-
- layer.append_child("min").text().set(joinCommas(itStats.second->_minOutputs).c_str());
- layer.append_child("max").text().set(joinCommas(itStats.second->_maxOutputs).c_str());
- }
-}
-
void UpdateStdLayerParams(const CNNLayer::Ptr& layer) {
auto layerPtr = layer.get();
auto& params = layer->params;
}
}
- // no need to print this info in case of executable graph info serialization
- if (!execGraphInfoSerialization) {
- dataOffset = updatePreProcInfo(network, netXml, dataOffset);
- UpdateStatisticsInfo(network, netXml);
- }
-
return dataOffset;
}
#include "bf16transformer.h"
#include <ie_util_internal.hpp>
#include <graph_tools.hpp>
-#include <cnn_network_int8_normalizer.hpp>
#include <threading/ie_executor_manager.hpp>
#include "low_precision_transformations/convolution.hpp"
#include "low_precision_transformations/eltwise.hpp"
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
-using InferenceEngine::details::CNNNetworkInt8Normalizer;
using namespace InferenceEngine::details;
InferenceEngine::InferRequestInternal::Ptr
extensionManager(extMgr),
_cfg{cfg},
_name{network.getName()} {
- ICNNNetworkStats* pstats = nullptr;
- StatusCode s = network.getStats(&pstats, nullptr);
// we are cloning network if we have statistics and we can transform network.
_clonedNetwork = cloneNet(network);
NetPass::ConvertPrecision(*_clonedNetwork, Precision::BOOL, Precision::U8);
NetPass::ConvertPrecision(*_clonedNetwork, Precision::U16, Precision::I32);
- if (s == StatusCode::OK && pstats && !pstats->isEmpty()) {
- CNNNetworkInt8Normalizer cnnorm;
- cnnorm.NormalizeNetwork(*_clonedNetwork, *pstats);
- } else {
- if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) {
- auto params = LayerTransformation::Params(true, // updatePrecisions
- true, // quantizeOutputs
- true, // weightsToConst
- LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
- LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
- true, // roundQuantizedValues
- true, // updateBiases
- true); // supportAsymmetricQuantization
- LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
- add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
- addCleanup<ScaleShiftToConvolutionTransformation>(
- LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
- "ScaleShift"));
- transformer.transform(*_clonedNetwork);
-
- // Check if network is INT8 or Binary.
- // BF16 transformations were disabled since CPU plug-in doesn't support mixed precision execution:
- // BF16 + INT8 or BF16 + BIN.
- bool isFloatModel = true;
- CNNNetworkIterator i(&network);
- while (i != CNNNetworkIterator()) {
- if (CaselessEq<std::string>()((*i)->type, "FakeQuantize")) {
- isFloatModel = false;
- break;
- }
- i++;
+ if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) {
+ auto params = LayerTransformation::Params(true, // updatePrecisions
+ true, // quantizeOutputs
+ true, // weightsToConst
+ LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
+ LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
+ true, // roundQuantizedValues
+ true, // updateBiases
+ true); // supportAsymmetricQuantization
+ LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
+ add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
+ addCleanup<ScaleShiftToConvolutionTransformation>(
+ LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
+ "ScaleShift"));
+ transformer.transform(*_clonedNetwork);
+
+ // Check if network is INT8 or Binary.
+ // BF16 transformations were disabled since CPU plug-in doesn't support mixed precision execution:
+ // BF16 + INT8 or BF16 + BIN.
+ bool isFloatModel = true;
+ CNNNetworkIterator i(&network);
+ while (i != CNNNetworkIterator()) {
+ if (CaselessEq<std::string>()((*i)->type, "FakeQuantize")) {
+ isFloatModel = false;
+ break;
}
+ i++;
+ }
- if (with_cpu_x86_bfloat16() && isFloatModel) {
- BF16Transformer bf16Transformer;
- CNNNetwork cnnetwork(_clonedNetwork);
- // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin.
- // Overwise, only layers marked as BF16 in 'cnnetwork' will be performed in bfloat16 mode.
- // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin.
- if (cfg.enforceBF16 == true)
- bf16Transformer.convertToBFloat16(cnnetwork);
- } else {
- BF16Transformer bf16Transformer;
- CNNNetwork cnnetwork(_clonedNetwork);
- bf16Transformer.convertToFloat(cnnetwork);
- }
+ if (with_cpu_x86_bfloat16() && isFloatModel) {
+ BF16Transformer bf16Transformer;
+ CNNNetwork cnnetwork(_clonedNetwork);
+ // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin.
+ // Overwise, only layers marked as BF16 in 'cnnetwork' will be performed in bfloat16 mode.
+ // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin.
+ if (cfg.enforceBF16 == true)
+ bf16Transformer.convertToBFloat16(cnnetwork);
+ } else {
+ BF16Transformer bf16Transformer;
+ CNNNetwork cnnetwork(_clonedNetwork);
+ bf16Transformer.convertToFloat(cnnetwork);
}
}
#include <details/ie_cnn_network_tools.h>
#include <ie_memcpy.h>
-#include "cnn_network_int8_normalizer.hpp"
-
#include "precision_utils.h"
#include <ie_plugin_config.hpp>
#include "low_precision_transformations/transformer.hpp"
#include <unordered_set>
#include "ie_blob_proxy.hpp"
-#include "ie_icnn_network_stats.hpp"
#include "ie_layer_parsers.h"
#include "ie_profiling.hpp"
#include "xml_parse_utils.h"
}
}
- auto statNode = root.child("statistics");
- ParseStatisticSection(statNode);
-
if (!_network->allLayers().size()) THROW_IE_EXCEPTION << "Incorrect model! Network doesn't contain layers.";
size_t inputLayersNum(0);
<< validMeanImageIds;
}
}
-
-void FormatParser::ParseStatisticSection(const pugi::xml_node& statNode) {
- auto splitParseCommas = [&](const string& s) -> vector<float> {
- vector<float> res;
- stringstream ss(s);
-
- float val;
-
- while (ss >> val) {
- res.push_back(val);
-
- if (ss.peek() == ',') ss.ignore();
- }
-
- return res;
- };
-
- map<string, NetworkNodeStatsPtr> newNetNodesStats;
-
- for (auto layer : statNode.children("layer")) {
- NetworkNodeStatsPtr nodeStats = NetworkNodeStatsPtr(new NetworkNodeStats());
-
- string name = layer.child("name").text().get();
-
- newNetNodesStats[name] = nodeStats;
-
- nodeStats->_minOutputs = splitParseCommas(layer.child("min").text().get());
- nodeStats->_maxOutputs = splitParseCommas(layer.child("max").text().get());
- }
-
- ICNNNetworkStats* pstats = nullptr;
- StatusCode s = _network->getStats(&pstats, nullptr);
- if (s == StatusCode::OK && pstats) {
- pstats->setNodesStats(newNetNodesStats);
- }
-}
DataPtr ParseInputData(pugi::xml_node& root) const;
void ParsePreProcess(pugi::xml_node& node);
- void ParseStatisticSection(const pugi::xml_node& statNode);
// Generate different set of creators depending on required IR version
static std::vector<std::shared_ptr<BaseCreator>> generateCreators(int version);
bool hwOptimization = true;
bool hwExtraSplit = false;
- bool ignoreIRStatistic = false;
-
std::string irWithVpuScalesDir;
std::string customLayers;
VPU_MODEL_ATTRIBUTE(int, batchSize, 1)
- VPU_MODEL_ATTRIBUTE(InferenceEngine::NetworkStatsMap, nodesStats, {})
-
public:
//
// Constructor
void setBatchSize(int batchSize);
- inline void setNodesStats(const ie::NetworkStatsMap& stats) { _nodesStats = stats; }
-
//
// Data nodes
//
model->attrs().set<int>("index", g_counter.fetch_add(1));
model->attrs().set<Resources>("resources", env.resources);
- if (!env.config.ignoreIRStatistic) {
- ie::ICNNNetworkStats* stats = nullptr;
- // V10 IRs doesn't contain stats
- if (originalOrConvertNetwork->getStats(&stats, nullptr) == InferenceEngine::OK && !stats->isEmpty()) {
- env.log->trace("Use node statistics from the IR");
- model->setNodesStats(stats->getNodesStats());
- }
- }
-
//
// Update IE Network
//
return shift;
}
-int maxOutputExponent(const std::string& name, const InferenceEngine::NetworkStatsMap& stats) {
- auto node_stats_it = stats.find(name);
- IE_ASSERT(node_stats_it != stats.end());
-
- auto& max = node_stats_it->second->_maxOutputs;
- auto& min = node_stats_it->second->_maxOutputs;
-
- IE_ASSERT(max.size() > 0 && min.size() > 0);
- auto max_value = *std::max_element(max.begin(), max.end());
- auto min_value = *std::min_element(min.begin(), min.end());
-
- max_value = std::max(fabsf(max_value), fabsf(min_value));
- IE_ASSERT(max_value > 0);
- int exp = 0;
-
- // frexp fractions float into two parts:
- // [0.5, 1)* 2^exp
- // while float stores value in format
- // [1, 2) * 2^f_exp
- // which means exp returned by frexp is f_exp + 1
- frexp(max_value, &exp);
- return exp - 1;
-}
-
void scaleBlobByIdx(const Model& model, const Stage& stage, int index, float scale) {
const auto& original = stage->input(index);
IE_ASSERT(original->usage() == DataUsage::Fake || original->usage() == DataUsage::Const);
void PassImpl::run(const Model& model) {
VPU_PROFILE(analyzeWeightableLayers);
- static const int scaleToExp = 8; // get from config?
static const int scaleThreshold = 1;
- auto& stats = model->nodesStats();
-
bool isGrowingOutput = checkGrowingOutput(model);
bool firstStage = true;
auto meanExp = getMeanValue(exponents);
shift = std::min(-meanExp, shift);
- if (stats.empty()) {
+ {
if (firstStage && shift < 4 && isGrowingOutput && weights->desc().dim(Dim::C) > 1) {
normalVal = 5;
}
shift = correctShift(shift, firstStage, stage->origLayer()->type);
shift -= normalVal;
- } else {
- int outExp = maxOutputExponent(stage->origLayer()->name, stats); // what if outExp == 15?
- shift = std::min(scaleToExp - outExp, shift);
}
firstStage = false;
VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION),
VPU_CONFIG_KEY(HW_EXTRA_SPLIT),
VPU_CONFIG_KEY(CUSTOM_LAYERS),
- VPU_CONFIG_KEY(IGNORE_IR_STATISTIC),
VPU_CONFIG_KEY(INPUT_NORM),
VPU_CONFIG_KEY(INPUT_BIAS),
setOption(_compileConfig.hwExtraSplit, switches, config, VPU_CONFIG_KEY(HW_EXTRA_SPLIT));
setOption(_compileConfig.injectSwOps, switches, config, VPU_CONFIG_KEY(HW_INJECT_STAGES));
setOption(_compileConfig.mergeHwPoolToConv, switches, config, VPU_CONFIG_KEY(HW_POOL_CONV_MERGE));
- setOption(_compileConfig.ignoreIRStatistic, switches, config, VPU_CONFIG_KEY(IGNORE_IR_STATISTIC));
setOption(_compileConfig.hwDilation, switches, config, VPU_CONFIG_KEY(HW_DILATION));
setOption(_compileConfig.forceDeprecatedCnnConversion, switches, config, VPU_CONFIG_KEY(FORCE_DEPRECATED_CNN_CONVERSION));
setOption(_compileConfig.disableReorder, switches, config, VPU_CONFIG_KEY(DISABLE_REORDER));
KEY_LOG_LEVEL,
KEY_VPU_PRINT_RECEIVE_TENSOR_TIME,
KEY_VPU_CUSTOM_LAYERS,
- KEY_VPU_IGNORE_IR_STATISTIC,
KEY_VPU_MYRIAD_FORCE_RESET,
KEY_VPU_MYRIAD_PLATFORM,
KEY_EXCLUSIVE_ASYNC_REQUESTS,
{ KEY_LOG_LEVEL, "LOG_NONE" },
{ KEY_VPU_PRINT_RECEIVE_TENSOR_TIME, "OFF" },
{ KEY_VPU_CUSTOM_LAYERS, "" },
- { KEY_VPU_IGNORE_IR_STATISTIC, "OFF" },
{ KEY_VPU_MYRIAD_FORCE_RESET, "OFF" },
{ KEY_VPU_MYRIAD_PLATFORM, "" },
{ KEY_EXCLUSIVE_ASYNC_REQUESTS, "OFF" },
}
InferenceEngine::details::CNNNetworkNGraphImpl cnnNet(ngraph);
- InferenceEngine::ICNNNetworkStats* _stats = nullptr;
- ASSERT_EQ(NOT_FOUND, cnnNet.getStats(&_stats, nullptr));
- ASSERT_EQ(nullptr, _stats);
}
IE_SUPPRESS_DEPRECATED_END
};
const std::vector<std::map<std::string, std::string>> Configs = {
- {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(YES)}},
- {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(NO)}},
-
{{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(YES)}},
{{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(NO)}},
{{VPU_MYRIAD_CONFIG_KEY(PROTOCOL), "BLUETOOTH"}},
{{VPU_MYRIAD_CONFIG_KEY(PROTOCOL), "LAN"}},
- {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "ON"}},
- {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "OFF"}},
-
{{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), "ON"}},
{{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), "OFF"}},
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD},
{CONFIG_KEY(LOG_LEVEL), "VERBOSE"}},
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD},
- {VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "ON"}},
- {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD},
{VPU_MYRIAD_CONFIG_KEY(PLATFORM), "-1"}},
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD},
{VPU_MYRIAD_CONFIG_KEY(PLATFORM), "0"}},
const std::vector<std::map<std::string, std::string>> Inconfigs = {
{},
- {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(YES)}},
- {{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(NO)}},
{{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(YES)}},
{{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(NO)}},
}
}
-void fillStatistic(Statistic &out, size_t size, float min, float max) {
- float ampl = (max - min) / 4.f;
- float center1 = min + ampl;
- float center2 = max - ampl;
- out.min.resize(size);
- out.max.resize(size);
- CommonTestUtils::fill_data_sine(out.min.data(), size, center1, ampl, 1);
- CommonTestUtils::fill_data_sine(out.max.data(), size, center2, ampl, 1);
-}
-
} // namespace CommonTestUtils
size_t deformable_group;
};
-struct Statistic {
- std::vector<float> min;
- std::vector<float> max;
-
- bool empty() const {
- return min.empty() || max.empty();
- }
-
- std::string serialize_min() const {
- return serialize(min);
- }
-
- std::string serialize_max() const {
- return serialize(max);
- }
-
-protected:
- std::string serialize(const std::vector<float> &in) const {
- if (in.empty())
- return "";
- std::string out = std::to_string(in[0lu]);
- for (size_t i = 1lu; i < in.size(); i++)
- out += ", " + std::to_string(in[i]);
- return out;
- }
-};
-
void getConvOutShape(const std::vector<size_t> &inShape,
const conv_common_params ¶ms,
std::vector<size_t> &outShape);
int32_t &dimz,
int32_t &dimn);
-void fillStatistic(Statistic &out, size_t size, float min, float max);
-
} // namespace CommonTestUtils
portID = layerID = 0;
}
-LayerDesc::LayerDesc(std::string type, InOutShapes &shapes, IDManager &id_manager, const Statistic &statistic)
- : _type(std::move(type)), _statistic(statistic) {
+LayerDesc::LayerDesc(std::string type, InOutShapes &shapes, IDManager &id_manager)
+ : _type(std::move(type)) {
_layerID = id_manager.getNextLayerID();
auto inDims = shapes.inDims;
auto outDims = shapes.outDims;
#pragma once
-#include "xml_father.hpp"
-#include "common_test_utils/common_layers_params.hpp"
#include <memory>
#include <string>
#include <vector>
#include <map>
+#include "xml_father.hpp"
+#include "common_test_utils/common_layers_params.hpp"
+
namespace CommonTestUtils {
struct CropData {
std::vector<LayerPortData> _inPortsID;
std::vector<LayerPortData> _outPortsID;
std::string _type;
- Statistic _statistic;
public:
using Ptr = std::shared_ptr<LayerDesc>;
* @param type - string with type of the layer
* @param shapes - reference to the structure with input and output shapes
*/
- explicit LayerDesc(std::string type, InOutShapes &shapes, IDManager &id_manager, const Statistic &statistic);
+ explicit LayerDesc(std::string type, InOutShapes &shapes, IDManager &id_manager);
/**
* @brief Resets current input and output ports to iterate over all input and output ports
* @brief Returns number of outputs
*/
size_t getOutputsSize() const;
-
- const Statistic &getStatistic() {
- return _statistic;
- }
};
static XmlNetBuilder buildNetworkWithOneInput(
std::string name = "AlexNet",
std::vector<size_t> dims = {1, 3, 227, 227},
- std::string precision = "Q78",
- const Statistic &statistic = {}) {
+ std::string precision = "Q78") {
std::shared_ptr<XMLFather> root = std::make_shared<XMLFather>();
auto &exp = root->node("net").attr("name", name).attr("precision", precision).attr("version", Version);
auto &expFinal = exp.attr("batch", 1);
- return XmlNetBuilder(root, expFinal.node("layers")).addInputLayer(precision, dims, statistic);
+ return XmlNetBuilder(root, expFinal.node("layers")).addInputLayer(precision, dims);
}
static XmlNetBuilder buildBody() {
const std::string &precision,
const InOutShapes &inout,
const conv_common_params &conv_params = {},
- const Statistic &statistic = {},
const std::string &name = "") {
std::map<std::string, std::string> params;
if (Version == 2) {
}
int weights_size = getConvWeightsSize(inout.inDims[0], conv_params, precision);
int biases_size = getConvBiasesSize(conv_params, precision);
- return addLayer("Convolution", precision, ¶ms, inout, weights_size, biases_size, "convolution_data", "",
- statistic, name);
+ return addLayer("Convolution", precision, ¶ms, inout, weights_size, biases_size, "convolution_data", "", name);
}
XmlNetBuilder &poolingLayer(
const std::string &precision,
const InOutShapes &inout,
const pool_common_params &pool_params = {},
- const Statistic &statistics = {},
const std::string &name = "") {
std::map<std::string, std::string> params;
if (Version == 2) {
else
params["exclude-pad"] = "false";
}
- return addLayer("Pooling", precision, ¶ms, inout, 0, 0, "pooling_data", "", statistics, name);
+ return addLayer("Pooling", precision, ¶ms, inout, 0, 0, "pooling_data", "", name);
}
struct TIPortMap {
std::map<std::string, std::string> *params,
InOutShapes inout,
const std::string &name) {
- return addLayer(type, precision, params, inout, 0, 0, "data", "", {}, name);
+ return addLayer(type, precision, params, inout, 0, 0, "data", "", name);
}
XmlNetBuilder &addLayer(
InOutShapes inout,
int weightsSize,
const std::string &name) {
- return addLayer(type, precision, params, inout, weightsSize, 0, "data", "", {}, name);
+ return addLayer(type, precision, params, inout, weightsSize, 0, "data", "", name);
}
XmlNetBuilder &addLayer(const std::string &type,
int biasesSize = 0,
std::string layerDataName = "data",
std::string content = "",
- const Statistic &statistic = {},
const std::string &name = "") {
layersNum++;
- auto layerDesc = std::make_shared<LayerDesc>(type, inout, id_manager, statistic);
+ auto layerDesc = std::make_shared<LayerDesc>(type, inout, id_manager);
layersDesc.push_back(layerDesc);
auto &layer = xml.node("layer").attr("name", name.empty() ? layerDesc->getLayerName() : name).attr("precision",
}
XmlNetBuilder &addInputLayer(const std::string &precision,
- const std::vector<size_t> &out,
- const Statistic &statistic = {}) {
+ const std::vector<size_t> &out) {
InOutShapes inout{};
inout.outDims.push_back(out);
- return addLayer("Input", precision, nullptr, inout, 0, 0, "data", "", statistic);
+ return addLayer("Input", precision, nullptr, inout, 0, 0, "data", "");
}
std::string finish(std::vector<std::pair<std::string, std::string>> *edges) {
}
// node_edges.close();
- addStatistic(node_edges.close());
return exp;
}
std::string finish(bool addInputPreProcess = true) {
auto &exp = xml.close();
addEdges(exp);
- addStatistic(exp);
if (addInputPreProcess) {
addPreProcess(exp);
}
}
preProcess.close();
}
-
- template<class T>
- void addStatistic(T &mainContent) {
- bool addStatistic = false;
- for (size_t i = 0lu; i < layersDesc.size() - 1lu; i++) {
- if (!layersDesc[i]->getStatistic().empty()) {
- addStatistic = true;
- break;
- }
- }
- if (!addStatistic)
- return;
-
- auto &statistics = mainContent.node("statistics");
- for (size_t i = 0lu; i < layersDesc.size(); i++) {
- if (!layersDesc[i]->getStatistic().empty()) {
- auto &layer = statistics.node("layer");
- layer
- .node("name", layersDesc[i]->getLayerName())
- .node("min", layersDesc[i]->getStatistic().serialize_min())
- .node("max", layersDesc[i]->getStatistic().serialize_max())
- .close();
- }
- }
- statistics.close();
- }
};
typedef XmlNetBuilder<2> V2NetBuilder;
MOCK_QUALIFIED_METHOD1(setBatchSize, noexcept, InferenceEngine::StatusCode(const size_t size));
MOCK_QUALIFIED_METHOD2(setBatchSize, noexcept, InferenceEngine::StatusCode(const size_t size, InferenceEngine::ResponseDesc*));
MOCK_QUALIFIED_METHOD0(getBatchSize, const noexcept, size_t());
- MOCK_QUALIFIED_METHOD2(getStats, const noexcept, InferenceEngine::StatusCode(InferenceEngine::ICNNNetworkStats** /*stats*/,
- InferenceEngine::ResponseDesc* /*resp*/));
MOCK_QUALIFIED_METHOD0(Release, noexcept, void());
MOCK_QUALIFIED_METHOD1(getInputShapes, const noexcept, void(InferenceEngine::ICNNNetwork::InputShapes&));
MOCK_QUALIFIED_METHOD2(reshape, noexcept, InferenceEngine::StatusCode(const InferenceEngine::ICNNNetwork::InputShapes &, InferenceEngine::ResponseDesc *));
};
const std::vector<BehTestParams> deviceAgnosticConfigurations = {
- BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(YES)}}),
- BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), CONFIG_VALUE(NO)}}),
-
BEH_MYRIAD.withConfig({{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(YES)}}),
BEH_MYRIAD.withConfig({{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(NO)}}),
BEH_MYRIAD.withConfig({{VPU_MYRIAD_CONFIG_KEY(PROTOCOL), "BLUETOOTH"}}),
BEH_MYRIAD.withConfig({{VPU_MYRIAD_CONFIG_KEY(PROTOCOL), "LAN"}}),
- BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "ON"}}),
- BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "OFF"}}),
-
BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), "ON"}}),
BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), "OFF"}}),
BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
{CONFIG_KEY(LOG_LEVEL), "VERBOSE"}}),
BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
- {VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "ON"}}),
- BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
{VPU_MYRIAD_CONFIG_KEY(PLATFORM), "-1"}}),
BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
{VPU_MYRIAD_CONFIG_KEY(PLATFORM), "0"}}),
string _device_name;
string _firmware;
string _tmp_firmware;
- string _stat_file;
vector<string> labels;
double nearValue = 0.0;
double nearAvgValue = 0.0;
}
- Model model, statFile;
+ Model model;
RegressionConfig config;
EMean isMean = eValues;
EPrecision precision = eq78;
return path_to_model.str();
}
- std::string prepareStatMatching() {
- if (statFile.fileName() == "") return "";
- ModelsPath path_to_stat;
- path_to_stat << kPathSeparator
- << statFile.folderName() << kPathSeparator
- << statFile.fileName();
-
- return path_to_stat.str();
- }
-
ModelSelector() = default;
std::string getReferenceResultsLabel() {
config.referenceOutput.push_back(v);
}
config._path_to_models = prepareModelMatching();
- config._stat_file = prepareStatMatching();
return M(config);
}
M to(Blob::Ptr rhs) {
config.outputBlob = rhs;
config._path_to_models = prepareModelMatching();
- config._stat_file = prepareStatMatching();
return M(config);
}
}
}
config._path_to_models = prepareModelMatching();
- config._stat_file = prepareStatMatching();
return M(config);
}
config.meanRelativeError = meanRelativeError;
config.maxRelativeError = maxRelativeError;
config._path_to_models = prepareModelMatching();
- config._stat_file = prepareStatMatching();
return M(config);
}
void equalToReferenceWithDelta(double nearValue) {
config.nearValue = nearValue;
config._path_to_models = prepareModelMatching();
- config._stat_file = prepareStatMatching();
M(config).to(getReferenceResultsLabel());
}
config.referenceOutput.push_back(v);
}
config._path_to_models = prepareModelMatching();
- config._stat_file = prepareStatMatching();
return M(config, true);
}
// place holder to run the matcher without providing any reference
void possible() {
config._path_to_models = prepareModelMatching();
- config._stat_file = prepareStatMatching();
auto tmp = M(config);
ASSERT_NO_FATAL_FAILURE(tmp.match());
}
// Try to read labels file
readLabels(labelFileName);
- if (config._stat_file != "") {
- InferenceEngine::NetworkStatsMap stat = testing::loadStatisticFromFile(config._stat_file);
-
- ICNNNetworkStats *pstats;
- ((ICNNNetwork&)cnnNetwork).getStats(&pstats, nullptr);
- pstats->setNodesStats(stat);
-
- // iterating over layers and fixing suppress_normalization->quantization_level
- // because we have in tests IR which has old name for fp32 layers
- for (auto& layer : cnnNetwork) {
- if (layer->params.find("suppress_normalization") != layer->params.end() &&
- layer->params["suppress_normalization"] == "I8") {
- layer->params["quantization_level"] = "FP32";
- }
- }
- }
-
if (config._reshape) {
auto inputShapes = cnnNetwork.getInputShapes();
inputShapes.begin()->second[0] = config.batchSize;
string binFileName = testing::FileUtils::fileNameNoExt(config._path_to_models) + ".bin";
auto cnnNetwork = config.ie_core->ReadNetwork(config._path_to_models, binFileName);
- if (!config._stat_file.empty()) {
- InferenceEngine::NetworkStatsMap stat = testing::loadStatisticFromFile(config._stat_file);
-
- IE_SUPPRESS_DEPRECATED_START
- ICNNNetworkStats *pstats;
- ((ICNNNetwork&)cnnNetwork).getStats(&pstats, nullptr);
- pstats->setNodesStats(stat);
-
- // iterating over layers and fixing suppress_normalization->quantization_level
- // because we have in tests IR which has old name for fp32 layers
- for (auto layer : cnnNetwork) {
- if (layer->params.find("suppress_normalization") != layer->params.end() &&
- layer->params["suppress_normalization"] == "I8") {
- layer->params["quantization_level"] = "FP32";
- }
- }
- IE_SUPPRESS_DEPRECATED_END
- }
-
if (config._reshape) {
auto inputShapes = cnnNetwork.getInputShapes();
for (auto & shape : inputShapes) {
#include "tests_common.hpp"
#include "single_layer_common.hpp"
-#include <cnn_network_stats_impl.hpp>
-
#include <string>
-#include "network_stats.h"
#include <format_reader/format_reader_ptr.h>
#include "common_test_utils/data_utils.hpp"
protected:
const char* DEFAULT_PATH_P = "./lib";
- std::map<std::string, NetworkNodeStatsPtr> collectStatistics(const void *model, size_t size, const InferenceEngine::TBlob<uint8_t>::Ptr &weights, const std::vector<std::string> outputNodes, const std::vector<std::string> images) {
- InferenceEngine::Core ie;
-
- std::shared_ptr<NetworkStatsCollector> netStats = std::shared_ptr<NetworkStatsCollector>(new NetworkStatsCollector(ie, "CPU"));
-
- size_t batchSize = images.size();
-
- std::cout << "Batch size: " << batchSize << std::endl;
-
- std::map<std::string, NetworkNodeStatsPtr> netNodesStats;
-
- netStats->ReadNetworkAndSetWeights(model, size, weights, batchSize);
-
- std::cout << "Inferencing and collecting statistics..." << std::endl;
- netStats->InferAndCollectStats(images, netNodesStats);
-
- return netNodesStats;
- }
-
static void compare_NRMSD(InferenceEngine::Blob &res, InferenceEngine::Blob &ref, float max_nrmsd = 0.01f) {
float *res_ptr = res.buffer().as<float*>();
size_t res_size = res.size();
// TODO Load nodes stats from file
std::string imageFilename = TestDataHelpers::get_data_path() + "/validation_set/224x224/dog.bmp";
std::cout << "Using image file: " << imageFilename << std::endl;
- std::map<std::string, NetworkNodeStatsPtr> netNodesStats = collectStatistics(model.data(), model.length(), weights_ptr, { "conv1" }, { imageFilename });
Core ie;
auto network = ie.ReadNetwork(model, weights_ptr);
CNNNetwork myNetwork = ie.ReadNetwork(model, weights_ptr);
- ICNNNetworkStats* pstats;
- ((ICNNNetwork&)myNetwork).getStats(&pstats, nullptr);
- pstats->setNodesStats(netNodesStats);
-
SizeVector dims_src = {p.in.w,
p.in.h,
p.in.c,
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <cfloat>
-#include <fstream>
-#include <limits>
-#include <memory>
-
-#include <pugixml.hpp>
-
-#include <format_reader_ptr.h>
-
-#include "network_stats.h"
-#include <samples/slog.hpp>
-
-using namespace InferenceEngine;
-
-class DataStats {
-public:
- template <typename T>
- static void GetDataMinMax(const T* data, size_t count, T& min, T& max);
-
- template <typename T>
- static void GetDataAverage(const T* data, size_t count, T& ave);
-
- template <typename T>
- static void GetDataAbsMax(const T* data, size_t count, T& max);
-
- template <typename T>
- static T GetAbsMax(T min, T max);
-};
-
-template <typename T>
-void DataStats::GetDataMinMax(const T* data, size_t count, T& min, T& max) {
- for (size_t i = 0; i < count; i++) {
- T val = data[i];
-
- if (min > val) {
- min = val;
- }
-
- if (max < val) {
- max = val;
- }
- }
-}
-
-template <typename T>
-void DataStats::GetDataAbsMax(const T* data, size_t count, T& max) {
- T min = FLT_MAX;
-
- GetDataMinMax(data, count, min, max);
-
- max = GetAbsMax(min, max);
-}
-
-template void DataStats::GetDataMinMax<float>(const float* data, size_t count, float& min, float& max);
-template void DataStats::GetDataMinMax<uint8_t>(const uint8_t* data, size_t count, uint8_t& min, uint8_t& max);
-
-template void DataStats::GetDataAbsMax<float>(const float* data, size_t count, float& max);
-
-template <typename T>
-void DataStats::GetDataAverage(const T* data, size_t count, T& ave) {
- ave = 0;
-
- for (size_t i = 0; i < count; i++) {
- ave += data[i];
- }
-
- ave /= count;
-}
-
-template void DataStats::GetDataAverage<float>(const float* data, size_t count, float& ave);
-
-template <typename T>
-T DataStats::GetAbsMax(T min, T max) {
- if (min < 0) {
- min *= -1;
- }
-
- if (max < 0) {
- max *= -1;
- }
-
- return (max > min) ? max : min;
-}
-
-template float DataStats::GetAbsMax<float>(float min, float max);
-
-
-CNNLayerPtr NetworkStatsCollector::addScaleShiftBeforeLayer(std::string name, CNNLayer::Ptr beforeLayer, size_t port, std::vector<float> scale) {
- if (beforeLayer->insData.size() < port) {
- THROW_IE_EXCEPTION << "cannot find appropraite port for addScaleShiftBeforeLayer";
- }
-
- DataPtr pData = beforeLayer->insData[port].lock();
- LayerParams params;
- params.name = name;
- params.precision = Precision::FP32;
- params.type = "ScaleShift";
- CNNLayerPtr lptr = std::make_shared<ScaleShiftLayer>(params);
- ScaleShiftLayer *pScaleShift = dynamic_cast<ScaleShiftLayer *>(lptr.get());
-
- IE_ASSERT(4 == pData->getDims().size());
- std::size_t num_chanels = pData->getDims().at(1);
- SizeVector wdims({ num_chanels });
-
- if (scale.size() == 1) {
- scale.resize(wdims[0]);
- for (int i = 1; i < wdims[0]; i++) {
- scale[i] = scale[0];
- }
- }
-
- if (scale.size() != num_chanels) {
- THROW_IE_EXCEPTION << "Failed to add scaleshift before " << beforeLayer->name << " due to scales and layer output dims incossitency";
- }
-
- Blob::Ptr weights = nullptr;
- weights = make_shared_blob<float>({Precision::FP32, wdims, Layout::C});
- weights->allocate();
- float *buffer = weights->buffer().as<float *>();
- for (size_t i = 0; i < num_chanels; i++) {
- buffer[i] = scale[i];
- }
- pScaleShift->_weights = weights;
-
-
- SizeVector bdims({ num_chanels });
- Blob::Ptr biases = nullptr;
- biases = make_shared_blob<float>({Precision::FP32, bdims, Layout::C});
- biases->allocate();
- buffer = biases->buffer().as<float *>();
- for (size_t i = 0; i < num_chanels; i++) {
- buffer[i] = 0.f;
- }
- pScaleShift->_biases = biases;
-
- Data *edge2 = new Data(*pData.get());
- DataPtr newEdge(edge2);
- lptr->insData.push_back(pData);
- lptr->outData.push_back(newEdge);
- newEdge->setName(/*"EdgeAfter_" +*/ params.name);
- newEdge->getCreatorLayer() = lptr;
- newEdge->getInputTo().clear();
- newEdge->getInputTo()[beforeLayer->name] = beforeLayer;
-
- pData->getInputTo().erase(beforeLayer->name);
- pData->getInputTo()[params.name] = lptr;
-
- for (size_t i = 0; i < beforeLayer->insData.size(); i++) {
- DataPtr d = beforeLayer->insData[i].lock();
- if (d == pData) {
- beforeLayer->insData[i] = newEdge;
- break;
- }
- }
- return lptr;
-}
-
-NetworkStatsCollector::NetworkStatsCollector(const InferenceEngine::Core & ie, const std::string & deviceName) :
- _ie(ie), _deviceName(deviceName) {
-}
-
-NetworkStatsCollector::~NetworkStatsCollector() {
-}
-
-void NetworkStatsCollector::ReadNetworkAndSetWeights(const void *model, size_t size, const InferenceEngine::TBlob<uint8_t>::Ptr &weights, size_t batch) {
- /** Reading network model **/
- _network = _ie.ReadNetwork((const char*)model, weights);
- _network.setBatchSize(batch);
-}
-
-std::string FileNameNoExt(const std::string& filePath) {
- auto pos = filePath.rfind('.');
-
- if (pos == std::string::npos) {
- return filePath;
- }
-
- return filePath.substr(0, pos);
-}
-
-void NetworkStatsCollector::LoadNetwork(const std::string& modelPath, size_t batch) {
- /** Reading network model **/
- _network = _ie.ReadNetwork(modelPath);
- _network.setBatchSize(batch);
-}
-
-void NetworkStatsCollector::InferAndCollectStats(const std::vector<std::string>& images,
- std::map<std::string, NetworkNodeStatsPtr>& netNodesStats) {
- slog::info << "Collecting statistics for layers:" << slog::endl;
-
- std::vector<CNNLayerPtr> layersAfterInputs;
-
- std::string hackPrefix = "scaleshifted_input:";
-
- std::map<std::string, std::string> inputsFromLayers;
- for (auto&& layer : _network) {
- if (layer->insData.size() > 0) {
- std::string inName = layer->input()->getName();
- for (auto&& input : _network.getInputsInfo()) {
- if (inName == input.first) {
- layersAfterInputs.push_back(layer);
- inputsFromLayers[hackPrefix + layer->name] = inName;
- }
- }
- }
- }
-
- for (auto&& layer : layersAfterInputs) {
- std::string firstInputName = hackPrefix + layer->name;
- auto scaleShiftLayer = addScaleShiftBeforeLayer(firstInputName, layer, 0, { 1.f });
- ((ICNNNetwork&)_network).addLayer(scaleShiftLayer);
- }
-
- // Adding output to every layer
- for (auto&& layer : _network) {
- slog::info << "\t" << layer->name << slog::endl;
-
- std::string layerType = _network.getLayerByName(layer->name.c_str())->type;
- if (/*layerType != "Split" &&*/ layerType != "Input") {
- _network.addOutput(layer->name);
- }
- }
-
- NetworkNodeStatsPtr nodeStats;
-
- const size_t batchSize = _network.getBatchSize();
-
- std::vector<std::string> imageNames;
-
- size_t rounded = images.size() - images.size() % batchSize;
-
- auto executable_network = _ie.LoadNetwork(_network, _deviceName);
-
- std::map<std::string, std::vector<float>> min_outputs, max_outputs;
-
- for (size_t i = 0; i < rounded; i += batchSize) {
- slog::info << "Inferring image " << i+1 << " of " << rounded << slog::endl;
-
- imageNames.clear();
-
- for (size_t img = 0; img < batchSize; img++) {
- imageNames.push_back(images[i + img]);
- }
-
-
- /** Taking information about all topology inputs **/
- InputsDataMap inputInfo(_network.getInputsInfo());
-
- if (inputInfo.size() != 1) throw std::logic_error("Sample supports topologies only with 1 input");
- auto inputInfoItem = *inputInfo.begin();
-
- /** Specifying the precision of input data provided by the user.
- * This should be called before load of the network to the device **/
- inputInfoItem.second->setPrecision(Precision::FP32);
- inputInfoItem.second->setLayout(Layout::NCHW);
-
- std::vector<std::shared_ptr<unsigned char>> imagesData;
- for (auto & i : imageNames) {
- FormatReader::ReaderPtr reader(i.c_str());
- if (reader.get() == nullptr) {
- slog::warn << "Image " + i + " cannot be read!" << slog::endl;
- continue;
- }
- /** Store image data **/
- auto data_dims = inputInfoItem.second->getTensorDesc().getDims();
- std::shared_ptr<unsigned char> data(reader->getData(data_dims.back(), data_dims.at(data_dims.size() - 2)));
- if (data.get() != nullptr) {
- imagesData.push_back(data);
- }
- }
- if (imagesData.empty()) throw std::logic_error("Valid input images were not found!");
-
- OutputsDataMap outputInfo(_network.getOutputsInfo());
- for (auto itOut : outputInfo) {
- itOut.second->setPrecision(Precision::FP32);
- }
-
- auto infer_request = executable_network.CreateInferRequest();
-
- // -------------------------------Set input data----------------------------------------------------
- /** Iterate over all the input blobs **/
-
- /** Creating input blob **/
- Blob::Ptr input = infer_request.GetBlob(inputInfoItem.first);
- if (!input) {
- throw std::logic_error("Invalid input blob " + inputInfoItem.first + " pointer");
- }
-
- /** Filling input tensor with images. First b channel, then g and r channels **/
- auto input_dims = input->getTensorDesc().getDims();
- size_t num_chanels = input_dims.at(1);
- size_t image_size = input_dims.at(input_dims.size() - 2) * input_dims.back();
-
- auto data = input->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
-
- /** Iterate over all input images **/
- for (size_t image_id = 0; image_id < imagesData.size(); ++image_id) {
- /** Iterate over all pixel in image (b,g,r) **/
- for (size_t pid = 0; pid < image_size; pid++) {
- /** Iterate over all channels **/
- for (size_t ch = 0; ch < num_chanels; ++ch) {
- /** [images stride + channels stride + pixel id ] all in bytes **/
- data[image_id * image_size * num_chanels + ch * image_size + pid ] = imagesData.at(image_id).get()[pid*num_chanels + ch];
- }
- }
- }
-
- infer_request.Infer();
-
-
- for (auto itOut : outputInfo) {
- auto outBlob = infer_request.GetBlob(itOut.first);
-
- std::string outName = itOut.first;
- if (inputsFromLayers.find(itOut.first) != inputsFromLayers.end()) {
- outName = inputsFromLayers[itOut.first];
- }
-
- size_t N, C, statCount;
- auto output_dims = outBlob->getTensorDesc().getDims();
- if (output_dims.size() == 4 && outBlob->getTensorDesc().getLayout() == Layout::NCHW) {
- N = output_dims[0];
- C = output_dims[1];
- statCount = C;
- } else if (output_dims.size() == 2 && outBlob->getTensorDesc().getLayout() == Layout::NC) {
- N = output_dims[0];
- C = output_dims[1];
- statCount = 1;
- } else {
- slog::warn << "Only NCHW and NC layouts are supported. Skipping layer \"" << outName << "\"" << slog::endl;
- continue;
- }
-
-
- if (netNodesStats.find(outName) == netNodesStats.end()) {
- nodeStats = NetworkNodeStatsPtr(new NetworkNodeStats(statCount));
-
- netNodesStats[outName] = nodeStats;
- } else {
- nodeStats = netNodesStats[outName];
- }
-
- // Counting min/max outputs per channel
- for (size_t n = 0; n < N; n++) {
- if (output_dims.size() == 4) {
- size_t _HW = output_dims.back() * output_dims.at(output_dims.size() - 2);
- for (size_t c = 0; c < C; c++) {
- if (outBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
- float* ptr = &outBlob->buffer().as<float*>()[(n * C + c) * _HW];
-
- float min = nodeStats->_minOutputs[c];
- float max = nodeStats->_maxOutputs[c];
- DataStats::GetDataMinMax<float>(ptr, _HW, min, max);
- nodeStats->_minOutputs[c] = min;
- nodeStats->_maxOutputs[c] = max;
- } else if (outBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::U8) {
- uint8_t* ptr = &outBlob->buffer().as<uint8_t*>()[(n * C + c) * _HW];
-
- uint8_t min = nodeStats->_minOutputs[c];
- uint8_t max = nodeStats->_maxOutputs[c];
- DataStats::GetDataMinMax<uint8_t>(ptr, _HW, min, max);
- nodeStats->_minOutputs[c] = min;
- nodeStats->_maxOutputs[c] = max;
- } else {
- throw std::logic_error(std::string("Unsupported precision: ") + outBlob->getTensorDesc().getPrecision().name());
- }
- }
- } else if (output_dims.size() == 2) {
- if (outBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
- float* ptr = &outBlob->buffer().as<float*>()[n * C];
-
- float min = nodeStats->_minOutputs[0];
- float max = nodeStats->_maxOutputs[0];
- DataStats::GetDataMinMax<float>(ptr, C, min, max);
- nodeStats->_minOutputs[0] = min;
- nodeStats->_maxOutputs[0] = max;
- } else if (outBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::U8) {
- uint8_t* ptr = &outBlob->buffer().as<uint8_t*>()[n * C];
-
- uint8_t min = nodeStats->_minOutputs[0];
- uint8_t max = nodeStats->_maxOutputs[0];
- DataStats::GetDataMinMax<uint8_t>(ptr, C, min, max);
- nodeStats->_minOutputs[0] = min;
- nodeStats->_maxOutputs[0] = max;
- } else {
- throw std::logic_error(std::string("Unsupported precision: ") + outBlob->getTensorDesc().getPrecision().name());
- }
- }
- }
- }
- }
-}
\ No newline at end of file
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-#include <vector>
-#include <map>
-
-#include <ie_core.hpp>
-#include <ie_icnn_network_stats.hpp>
-
-class NetworkStatsCollector {
-public:
- NetworkStatsCollector(const InferenceEngine::Core & ie, const std::string & deviceName);
- ~NetworkStatsCollector();
-
-public:
- void ReadNetworkAndSetWeights(const void *model, size_t size, const InferenceEngine::TBlob<uint8_t>::Ptr &weights, size_t batch);
- void LoadNetwork(const std::string& modelPath, size_t batch);
-
- void InferAndCollectStats(const std::vector<std::string>& images,
- std::map<std::string, InferenceEngine::NetworkNodeStatsPtr>& netNodesStats);
-
-/* void InferAndCollectHistogram(const std::vector<std::string>& images,
- const std::vector<std::string>& layerNames,
- std::map<std::string, InferenceEngine::NetworkNodeStatsPtr>& netNodesStats);
-
- void InferAndFindOptimalThreshold(const std::vector<std::string>& images,
- const std::vector<std::string>& layerNames,
- std::map<std::string, InferenceEngine::NetworkNodeStatsPtr>& netNodesStats);
-
- void CalculateThreshold(std::map<std::string, InferenceEngine::NetworkNodeStatsPtr>& netNodesStats);*/
-
- void CalculatePotentialMax(const float* weights, const InferenceEngine::SizeVector& weightDism, float& max);
- static InferenceEngine::CNNLayerPtr addScaleShiftBeforeLayer(std::string name, InferenceEngine::CNNLayer::Ptr beforeLayer,
- size_t port, std::vector<float> scale);
-
-private:
- InferenceEngine::Core _ie;
- InferenceEngine::CNNNetwork _network;
- std::string _deviceName;
-};
std::string deviceName;
std::string modelFile;
std::string imageName;
- std::string statFile;
std::vector<std::pair<int, float>> refValue;
// optional config (used for multi-device)
std::map<std::string, std::string> config;
result += imageName;
return result;
}
-
- std::string stat() {
- ModelsPath result;
- result += kPathSeparator;
- result += statFile;
- return result;
- }
};
static LayerTransformation::Params createParam() {
network.setBatchSize(batch_size);
ie.SetConfig(p.config);
- if (p.statFile != "") {
- InferenceEngine::NetworkStatsMap stat = testing::loadStatisticFromFile(p.stat());
-
- ICNNNetworkStats *pstats;
- ((ICNNNetwork&)network).getStats(&pstats, nullptr);
- pstats->setNodesStats(stat);
- }
if (transformationsParams.transformationsInTestEnabled) {
ICNNNetwork& icnnnetwork = network;
"CPU",
transformationsParam.modelParams.irFilePath,
transformationsParam.modelParams.dataFilePath,
- "",
referenceValues
};
&fake_quantize_params,
{ {weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}} },
"fakeQuantizeOnWeights")
- .convolutionLayer(p._network_precision, { convolutionDims, {convOutShape} }, conv, {}, "Convolution");
+ .convolutionLayer(p._network_precision, { convolutionDims, {convOutShape} }, conv, "Convolution");
if (addBiasesLayer) {
builder.addLayer("Const", p._network_precision, &const_params, { {}, {biasesConvolutionConstDims} }, type_size * conv.out_c, "biasesConst");
.convolutionLayer(
p._network_precision,
{ {p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims },
- {convOutShape} }, conv, {}, "convolution")
+ {convOutShape} }, conv, "convolution")
// 15
.addLayer("Pooling", p._network_precision, &poolingParams, { {dimensions}, {dimensions} })
.finish(&edges);
using Batch = int;
using DoReshape = bool;
using Resources = int;
-using IsIgnoreStatistic = bool;
using PluginDevicePair = std::pair<std::string, std::string>;
//------------------------------------------------------------------------------
std::string model_name,
std::string img_name,
double reference_delta,
- Regression::EMean mean = Regression::EMean::eValues,
- bool with_stat_file = false);
+ Regression::EMean mean = Regression::EMean::eValues);
// Accessors
inline Regression::EMean mean() const;
- inline bool withStatFile() const;
// Operations
inline std::string name() const override;
private:
//Data section
Regression::EMean mean_;
- bool with_stat_file_;
};
//------------------------------------------------------------------------------
std::string model_name,
std::string img_name,
double reference_delta,
- Regression::EMean mean,
- bool with_stat_file):
+ Regression::EMean mean):
SourceParameterBase(model_name, img_name, reference_delta),
- mean_(mean),
- with_stat_file_(with_stat_file) {
+ mean_(mean) {
}
inline Regression::EMean ClassificationSrcParam::mean() const {
return mean_;
}
-inline bool ClassificationSrcParam::withStatFile() const {
- return with_stat_file_;
-}
-
inline std::string ClassificationSrcParam::name() const {
return SourceParameterBase::name() +
"_Mean=" + format_mean(mean_);
get<2>(param.param),
get<3>(param.param)) +
"_SHAVES=" + (get<4>(param.param) == -1 ? "AUTO" : std::to_string(get<4>(param.param))) +
- "_IsIgnoreStatistic=" + std::to_string(get<5>(param.param)) +
- "_" + get<6>(param.param).name();
+ "_" + get<5>(param.param).name();
}
void VpuNoClassificationRegression::SetUp() {
batch_= get<2>(ClassificationTestVpuParam::GetParam());
do_reshape_= get<3>(ClassificationTestVpuParam::GetParam());
resources_= get<4>(ClassificationTestVpuParam::GetParam());
- is_ignore_statistic_ = get<5>(ClassificationTestVpuParam::GetParam());
- source_param_= get<6>(ClassificationTestVpuParam::GetParam());
+ source_param_= get<5>(ClassificationTestVpuParam::GetParam());
InitConfig();
}
config_["VPU_NUMBER_OF_CMX_SLICES"] = std::to_string(resources_);
config_["VPU_NUMBER_OF_SHAVES"] = std::to_string(resources_);
}
-
- if (is_ignore_statistic_) {
- config_["VPU_IGNORE_IR_STATISTIC"] = CONFIG_VALUE(YES);
- } else {
- config_["VPU_IGNORE_IR_STATISTIC"] = CONFIG_VALUE(NO);
- }
}
//------------------------------------------------------------------------------
Batch,
DoReshape,
Resources,
- IsIgnoreStatistic,
ClassificationSrcParam>>;
using ClassificationSpecificTestVpuParam = WithParamInterface<std::tuple<
protected:
// Data section
int resources_;
- bool is_ignore_statistic_;
ClassificationSrcParam source_param_;
//Operations
#include <stdio.h>
#include "cpp/ie_cnn_network.h"
#include <gtest/gtest.h>
-#include "ie_icnn_network_stats.hpp"
#include "xml_helper.hpp"
#include <pugixml.hpp>
return xmlPath;
}
-InferenceEngine::NetworkStatsMap loadStatisticFromFile(const std::string& xmlPath) {
- auto splitParseCommas = [&](const std::string& s) ->std::vector<float> {
- std::vector<float> res;
- std::stringstream ss(s);
-
- float val;
-
- while (ss >> val) {
- res.push_back(val);
-
- if (ss.peek() == ',')
- ss.ignore();
- }
-
- return res;
- };
-
- InferenceEngine::NetworkStatsMap newNetNodesStats;
-
- pugi::xml_document doc;
-
- pugi::xml_parse_result pr = doc.load_file(xmlPath.c_str());
-
-
- if (!pr) {
- THROW_IE_EXCEPTION << "Can't load stat file " << xmlPath;
- }
-
- auto stats = doc.child("stats");
- auto layers = stats.child("layers");
-
- InferenceEngine::NetworkNodeStatsPtr nodeStats;
- size_t offset;
- size_t size;
- size_t count;
-
- IE_SUPPRESS_DEPRECATED_START
-
- for (auto layer : layers.children("layer")) {
- nodeStats = InferenceEngine::NetworkNodeStatsPtr(new InferenceEngine::NetworkNodeStats());
-
- std::string name = layer.child("name").text().get();
-
- newNetNodesStats[name] = nodeStats;
-
- nodeStats->_minOutputs = splitParseCommas(layer.child("min").text().get());
- nodeStats->_maxOutputs = splitParseCommas(layer.child("max").text().get());
- }
-
- IE_SUPPRESS_DEPRECATED_END
-
- return newNetNodesStats;
-}
-
}
#include <stdio.h>
#include "cpp/ie_cnn_network.h"
#include <gtest/gtest.h>
-#include <ie_icnn_network_stats.hpp>
namespace testing {
std::shared_ptr<impl> _impl;
};
-InferenceEngine::NetworkStatsMap loadStatisticFromFile(const std::string& xmlPath);
-
}
file(GLOB
TEST_SRC
+ cnn_network/*.cpp
graph_tools/*.cpp
- http_client/*.cpp
inference_engine_tests/*.cpp
- inference_engine_tests/cpp_interfaces/*.cpp
- inference_engine_tests/normalization/*.cpp
- inference_engine_tests/transformations/*.cpp
- inference_engine_tests/transformations/*.hpp
- cnn_network/*.cpp
- topology_verification_tests/*.cpp
stress_tests/*.cpp
- cpp_api/*.cpp
+ topology_verification_tests/*.cpp
)
if (ENABLE_GNA)
#include "mkldnn_graph.h"
#include "mkldnn_graph_dumper.h"
#include "ie_blob.h"
-#include "ie_util_internal.hpp"
#include "details/ie_cnn_network_tools.h"
#include "common_test_utils/xml_net_builder/xml_net_builder.hpp"
-#include "graph_tools.hpp"
#include <ie_core.hpp>
#include <string>
std::vector<float> max_stat(p.in1[1]);
CommonTestUtils::fill_data_sine(min_stat.data(), p.in1[1], -1, 1, 1);
CommonTestUtils::fill_data_sine(max_stat.data(), p.in1[1], 1, 1, -1);
- CommonTestUtils::Statistic in_stat = {min_stat, max_stat};
std::vector<float> conv_min_stat(convOutShape[1]);
std::vector<float> conv_max_stat(convOutShape[1]);
CommonTestUtils::fill_data_sine(conv_min_stat.data(), convOutShape[1], -1, 1, 1);
CommonTestUtils::fill_data_sine(conv_max_stat.data(), convOutShape[1], 1, 1, -1);
- CommonTestUtils::Statistic conv_stat = {conv_min_stat, conv_max_stat};
std::map<std::string, std::string> elt_params = {
{"operation", "sum"}
std::vector<std::pair<std::string, std::string>> edges = { {"0,0", "2,2"}, {"2,3", "3,4"}, {"1,1", "3,5"} };
return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Fusion_conv_sum", p.in1, precision, in_stat)
- .addInputLayer(precision, convOutShape, in_stat)
- .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv, conv_stat)
- .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "", conv_stat)
+ "Fusion_conv_sum", p.in1, precision)
+ .addInputLayer(precision, convOutShape)
+ .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv)
+ .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "")
.finish(&edges);
}
std::vector<float> max_stat(p.in1[1]);
CommonTestUtils::fill_data_sine(min_stat.data(), p.in1[1], -1, 1, 1);
CommonTestUtils::fill_data_sine(max_stat.data(), p.in1[1], 1, 1, -1);
- CommonTestUtils::Statistic in_stat = {min_stat, max_stat};
std::vector<float> conv_min_stat(convOutShape[1]);
std::vector<float> conv_max_stat(convOutShape[1]);
CommonTestUtils::fill_data_sine(conv_min_stat.data(), convOutShape[1], -1, 1, 1);
CommonTestUtils::fill_data_sine(conv_max_stat.data(), convOutShape[1], 1, 1, -1);
- CommonTestUtils::Statistic conv_stat = {conv_min_stat, conv_max_stat};
std::map<std::string, std::string> elt_params = {
{"operation", "sum"}
std::map<std::string, std::string> relu_params = {};
std::vector<std::pair<std::string, std::string>> edges = { {"0,0", "2,2"}, {"2,3", "3,4"}, {"1,1", "3,5"}, {"3,6", "4,7"} };
return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Fusion_conv_sum", p.in1, precision, in_stat)
- .addInputLayer(precision, convOutShape, in_stat)
- .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv, conv_stat)
- .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "", conv_stat)
- .addLayer("ReLU", precision, &relu_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "", conv_stat)
+ "Fusion_conv_sum", p.in1, precision)
+ .addInputLayer(precision, convOutShape)
+ .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv)
+ .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "")
+ .addLayer("ReLU", precision, &relu_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "")
.finish(&edges);
}
std::vector<float> max_stat(p.in1[1]);
CommonTestUtils::fill_data_sine(min_stat.data(), p.in1[1], -1, 1, 1);
CommonTestUtils::fill_data_sine(max_stat.data(), p.in1[1], 1, 1, -1);
- CommonTestUtils::Statistic in_stat = {min_stat, max_stat};
std::vector<float> conv_min_stat(convOutShape[1]);
std::vector<float> conv_max_stat(convOutShape[1]);
CommonTestUtils::fill_data_sine(conv_min_stat.data(), convOutShape[1], -1, 1, 1);
CommonTestUtils::fill_data_sine(conv_max_stat.data(), convOutShape[1], 1, 1, -1);
- CommonTestUtils::Statistic conv_stat = {conv_min_stat, conv_max_stat};
std::map<std::string, std::string> elt_params = {
{"operation", "sum"}
};
std::vector<std::pair<std::string, std::string>> edges = { {"0,0", "2,2"}, {"2,3", "4,6"}, {"1,1", "3,4"}, {"3,5", "4,7"} };
return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Fusion_conv_sum", p.in1, precision, in_stat)
- .addInputLayer(precision, p.in1, in_stat)
- .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv, conv_stat)
- .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv, conv_stat)
- .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "", conv_stat)
+ "Fusion_conv_sum", p.in1, precision)
+ .addInputLayer(precision, p.in1)
+ .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv)
+ .convolutionLayer(precision, {{p.in1}, {convOutShape}}, p.conv)
+ .addLayer("Eltwise", precision, &elt_params, {{convOutShape, convOutShape}, {convOutShape}}, 0, 0, "data", "")
.finish(&edges);
}
getConvOutShape(p.in1, p.conv1, convOutShape1);
getConvOutShape(p.in2, p.conv2, convOutShape2);
- CommonTestUtils::Statistic in1_stat, in2_stat, conv1_stat, conv2_stat;
- fillStatistic(in1_stat, p.in1[1], -2, 2);
- fillStatistic(in2_stat, p.in2[1], -2, 2);
- fillStatistic(conv1_stat, p.conv1.out_c, -2, 2);
- fillStatistic(conv2_stat, p.conv2.out_c, -2, 2);
-
std::map<std::string, std::string> elt_params = {
{"operation", "sum"}
};
std::map<std::string, std::string> relu_params = {};
std::vector<std::pair<std::string, std::string>> edges = { {"0,0", "2,2"}, {"2,3", "4,6"}, {"1,1", "3,4"}, {"3,5", "4,7"}, {"4,8", "5,9"} };
return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Fusion_conv_sum", p.in1, precision, in1_stat)
- .addInputLayer(precision, p.in2, in2_stat)
- .convolutionLayer(precision, {{p.in1}, {convOutShape1}}, p.conv1, conv1_stat)
- .convolutionLayer(precision, {{p.in2}, {convOutShape2}}, p.conv2, conv2_stat)
- .addLayer("Eltwise", precision, &elt_params, {{convOutShape1, convOutShape2}, {convOutShape1}}, 0, 0, "data", "", conv1_stat)
- .addLayer("ReLU", precision, &relu_params, {{convOutShape1}, {convOutShape1}}, 0, 0, "data", "", conv1_stat)
+ "Fusion_conv_sum", p.in1, precision)
+ .addInputLayer(precision, p.in2)
+ .convolutionLayer(precision, {{p.in1}, {convOutShape1}}, p.conv1)
+ .convolutionLayer(precision, {{p.in2}, {convOutShape2}}, p.conv2)
+ .addLayer("Eltwise", precision, &elt_params, {{convOutShape1, convOutShape2}, {convOutShape1}}, 0, 0, "data", "")
+ .addLayer("ReLU", precision, &relu_params, {{convOutShape1}, {convOutShape1}}, 0, 0, "data", "")
.finish(&edges);
}
getConvOutShape(convOutShape1, p.conv3, convOutShape3);
getPoolOutShape(convOutShape1, p.pool, poolOutShape);
- CommonTestUtils::Statistic in1_stat, in2_stat, conv1_stat, conv2_stat, conv3_stat, pool_stat;
- fillStatistic(in1_stat, p.in1[1], -2.f, 2.f);
- fillStatistic(in2_stat, p.in2[1], -2.f, 2.f);
- fillStatistic(conv1_stat, p.conv1.out_c, -2.f, 2.f);
- fillStatistic(conv2_stat, p.conv2.out_c, -2.f, 2.f);
- fillStatistic(conv3_stat, p.conv3.out_c, -2.f, 2.f);
- fillStatistic(pool_stat, poolOutShape[1], 0.f, 3.f);
-
std::map<std::string, std::string> elt_params = {
{"operation", "sum"}
};
{"5,10", "7,13"},
{"4,8", "6,11"} };
return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
- "Fusion_conv_sum", p.in1, precision, in1_stat)
- .addInputLayer(precision, p.in2, in2_stat)
- .convolutionLayer(precision, {{p.in1}, {convOutShape1}}, p.conv1, conv1_stat)
- .convolutionLayer(precision, {{p.in2}, {convOutShape2}}, p.conv2, conv2_stat)
- .addLayer("Eltwise", precision, &elt_params, {{convOutShape1, convOutShape2}, {convOutShape1}}, 0, 0, "data", "", conv1_stat)
- .addLayer("ReLU", precision, &relu_params, {{convOutShape1}, {convOutShape1}}, 0, 0, "data", "", pool_stat)
- .convolutionLayer(precision, {{convOutShape1}, {convOutShape3}}, p.conv3, conv3_stat)
- .addLayer("Pooling", precision, &relu_params, {{convOutShape1}, {poolOutShape}}, 0, 0, "data", "", pool_stat)
+ "Fusion_conv_sum", p.in1, precision)
+ .addInputLayer(precision, p.in2)
+ .convolutionLayer(precision, {{p.in1}, {convOutShape1}}, p.conv1)
+ .convolutionLayer(precision, {{p.in2}, {convOutShape2}}, p.conv2)
+ .addLayer("Eltwise", precision, &elt_params, {{convOutShape1, convOutShape2}, {convOutShape1}}, 0, 0, "data", "")
+ .addLayer("ReLU", precision, &relu_params, {{convOutShape1}, {convOutShape1}}, 0, 0, "data", "")
+ .convolutionLayer(precision, {{convOutShape1}, {convOutShape3}}, p.conv3)
+ .addLayer("Pooling", precision, &relu_params, {{convOutShape1}, {poolOutShape}}, 0, 0, "data", "")
.finish(&edges);
}
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include <ie_core.hpp>
-
-#include <cnn_network_int8_normalizer.hpp>
-#include "tests_common.hpp"
-#include "ir_gen_helper.hpp"
-
-#include "common_test_utils/common_layers_params.hpp"
-
-using namespace ::testing;
-using namespace single_layer_tests;
-
-struct conv_conv_eltwise_params {
- // Formats: NCHW, NCDHW
- std::vector<size_t> in;
-
- CommonTestUtils::conv_common_params conv;
- CommonTestUtils::eltwise_common_params eltwise;
-};
-
-class NormalizationConvConvEltwiseTests: public TestsCommon,
- public WithParamInterface<conv_conv_eltwise_params> {
- std::string layers_t = R"V0G0N(
- <layer id="1" name="conv_1" precision="FP32" type="Convolution">
- <data group="_GC_" kernel="_K_" output="_OC_" pads_begin="_PB_" pads_end="_PE_" strides="_KS_"/>
- <input>
- <port id="0">
- __INP_DIMS__
- </port>
- </input>
- <output>
- <port id="1">
- __CONV_OUT_DIMS__
- </port>
- </output>
- <blobs>
- <weights offset="0" size="1"/>
- <biases offset="1" size="2"/>
- </blobs>
- </layer>
- <layer id="2" name="conv_2" precision="FP32" type="Convolution">
- <data group="_GC_" kernel="_K_" output="_OC_" pads_begin="_PB_" pads_end="_PE_" strides="_KS_"/>
- <input>
- <port id="0">
- __INP_DIMS__
- </port>
- </input>
- <output>
- <port id="1">
- __CONV_OUT_DIMS__
- </port>
- </output>
- <blobs>
- <weights offset="3" size="4"/>
- <biases offset="4" size="5"/>
- </blobs>
- </layer>
- <layer id="3" name="eltwise_block" precision="FP32" type="Eltwise">
- <data coeff="" operation="sum"/>
- <input>
- <port id="0">
- __CONV_OUT_DIMS__
- </port>
- <port id="1">
- __CONV_OUT_DIMS__
- </port>
- </input>
- <output>
- <port id="2">
- __CONV_OUT_DIMS__
- </port>
- </output>
- </layer>
-)V0G0N";
-
- std::string edges_t = R"V0G0N(
- <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
- <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
- <edge from-layer="1" from-port="1" to-layer="3" to-port="0"/>
- <edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
-)V0G0N";
-
- std::string getModel(conv_conv_eltwise_params p) {
- std::string model = layers_t;
-
- std::string s_dims;
- for (auto& dim : p.in) {
- s_dims += "\n <dim>";
- s_dims += std::to_string(dim) + "</dim>";
- }
- REPLACE_WITH_STR(model, "__INP_DIMS__", s_dims);
-
- s_dims = "\n <dim>";
- s_dims += std::to_string(p.in[0]) + "</dim>";
- s_dims += "\n <dim>";
- s_dims += std::to_string(p.conv.out_c) + "</dim>";
- int k_len = p.conv.kernel.size();
- for (size_t i = 2; i < p.in.size(); i++) {
- size_t inx = k_len - i + 1;
- size_t dim = (p.in[i] + 2lu * p.conv.pads_begin[inx] - p.conv.kernel[inx]) / p.conv.stride[inx] + 1lu;
- s_dims += "\n <dim>";
- s_dims += std::to_string(dim) + "</dim>";
- }
- REPLACE_WITH_STR(model, "__CONV_OUT_DIMS__", s_dims);
-
- REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.conv.kernel);
- REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.conv.stride);
- REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.conv.pads_begin);
- REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.conv.pads_end);
- REPLACE_WITH_NUM(model, "_GC_", p.conv.group);
- REPLACE_WITH_NUM(model, "_OC_", p.conv.out_c);
-
- model = IRTemplateGenerator::getIRTemplate("Deconvolution_Concat", p.in, "FP32", model, edges_t);
-
- return model;
- }
-
-protected:
- virtual void TearDown() {
- }
-
- virtual void SetUp() {
- try {
- TestsCommon::SetUp();
- conv_conv_eltwise_params p = ::testing::WithParamInterface<conv_conv_eltwise_params>::GetParam();
- std::string model = getModel(p);
-
- InferenceEngine::Core ie;
- InferenceEngine::CNNNetwork network;
- auto blob = InferenceEngine::make_shared_blob<uint8_t>(InferenceEngine::TensorDesc(InferenceEngine::Precision::U8,
- {9}, InferenceEngine::Layout::C));
- blob->allocate();
- ASSERT_NO_THROW(network = ie.ReadNetwork(model, blob));
-
- int maxSign = 0x7F;
- int maxUnsign = 0xFF;
-
- InferenceEngine::details::CNNStatisticHelper statHelper(network, {}, maxSign, maxUnsign);
- auto conv_1 = network.getLayerByName("conv_1");
- auto conv_2 = network.getLayerByName("conv_2");
- auto eltwise = network.getLayerByName("eltwise_block");
-
- ASSERT_EQ(eltwise, statHelper.getLatestInFuse(conv_1));
- ASSERT_EQ(conv_2, statHelper.getLatestInFuse(conv_2));
- ASSERT_EQ(eltwise, statHelper.getLatestInFuse(eltwise));
- } catch (const InferenceEngine::details::InferenceEngineException &e) {
- FAIL() << e.what();
- }
- }
-};
-
-TEST_P(NormalizationConvConvEltwiseTests, TestsConvConvEltwise) {}
-
-INSTANTIATE_TEST_CASE_P(
- TestsConvConvEltwise, NormalizationConvConvEltwiseTests,
- ::testing::Values(
- conv_conv_eltwise_params{{1, 16, 4, 4},
- { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 32, true },
- {"sum", {}} },
- conv_conv_eltwise_params{{1, 16, 4, 4, 4},
- { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 32, true },
- {"sum", {}} }
- ));
{
auto layer = getLayer(net, "layer1");
- auto cloned = IE::cloneNet({layer}, nullptr);
+ auto cloned = IE::cloneNet({layer});
EXPECT_EQ(2, cloned->layerCount());
auto clonedLayer = getLayer(cloned, "layer1");
ASSERT_NE(nullptr, clonedLayer);
{
auto layer1 = getLayer(net, "layer1");
auto layer2 = getLayer(net, "layer2");
- auto cloned = IE::cloneNet({layer1,layer2}, nullptr);
+ auto cloned = IE::cloneNet({layer1,layer2});
EXPECT_EQ(4, cloned->layerCount());
auto clonedLayer1 = getLayer(cloned, "layer1");
auto clonedLayer2 = getLayer(cloned, "layer2");
{
auto layer4 = getLayer(net, "layer4");
auto layer5 = getLayer(net, "layer5");
- auto cloned = IE::cloneNet({layer4,layer5}, nullptr);
+ auto cloned = IE::cloneNet({layer4,layer5});
EXPECT_EQ(4, cloned->layerCount());
auto clonedLayer4 = getLayer(cloned, "layer4");
auto clonedLayer5 = getLayer(cloned, "layer5");
}
{
auto layer3 = getLayer(net, "layer3");
- auto cloned = IE::cloneNet({layer3}, nullptr);
+ auto cloned = IE::cloneNet({layer3});
EXPECT_EQ(2, cloned->layerCount());
auto clonedLayer3 = getLayer(cloned, "layer3");
ASSERT_NE(nullptr, clonedLayer3);
auto layer5 = getLayer(net, "layer5");
auto layer6 = getLayer(net, "layer6");
auto layer7 = getLayer(net, "layer7");
- auto cloned = IE::cloneNet({layer1,layer2,layer3,layer4,layer5,layer6,layer7}, nullptr);
+ auto cloned = IE::cloneNet({layer1,layer2,layer3,layer4,layer5,layer6,layer7});
EXPECT_EQ(9, cloned->layerCount());
auto clonedLayer1 = getLayer(cloned, "layer1");
auto clonedLayer2 = getLayer(cloned, "layer2");
auto cloned = IE::cloneNet({getLayer(net, "layer1"),
getLayer(net, "layer2"),
- getLayer(net, "layer3")}, nullptr);
+ getLayer(net, "layer3")});
ASSERT_EQ(6, cloned->layerCount());
ASSERT_NE(nullptr, getLayer(cloned, "input1"));
auto cloned = IE::cloneNet({getLayer(net, "layer1"),
getLayer(net, "layer2"),
- getLayer(net, "layer3")}, nullptr);
+ getLayer(net, "layer3")});
ASSERT_EQ(6, cloned->layerCount());
ASSERT_NE(nullptr, getLayer(cloned, "input1"));