1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
7 #include "gna_plugin_config.hpp"
8 #include "layer_transform.hpp"
9 #include "graph_tools.hpp"
10 #include "details/ie_cnn_network_tools.h"
11 #include "layer_quantizer.hpp"
12 #include "scale_factor_calc.hpp"
14 namespace GNAPluginNS {
16 * Quantize entire cnn - network
17 * @tparam T - type trait for weights and biases
20 class ModelQuantizer {
22 CNNNetworkPtr quantize(InferenceEngine::ICNNNetwork &model, float scaleFactor) const {
23 return quantize(model, [](InferenceEngine::CNNNetPtr &){}, scaleFactor);
26 template <class PreQuantisationCb>
27 CNNNetworkPtr quantize(InferenceEngine::ICNNNetwork &model, const PreQuantisationCb &cb, float scaleFactor) const {
28 auto visitor = [&](InferenceEngine::CNNLayerPtr lp) {
29 return InferenceEngine::injectData<QuantizedLayerParams>(lp);
31 auto copiedNet = InferenceEngine::CNNNetCopy(model, visitor);
33 // TODO: probably not the best way of using dynamic cast in order to transform Precision
34 // one of solution is to create not copyNet overloads, that accepts 2 functors, one for layer copy
35 // and another one for net copy
36 auto rawNet = dynamic_cast<InferenceEngine::details::CNNNetworkImpl *>(copiedNet.get());
37 rawNet->setPrecision(T::mandatory().getNetPrecision());
39 // allow client code to access copied topology, to avoid copies if user would like to chain quantisation with
40 // another preprocessing
43 LayersQuantizer<T> lc(scaleFactor);
44 auto sortedNewNet = InferenceEngine::details::CNNNetSortTopologically(*copiedNet.get());
45 gnalog() << "Sorted layers: " << std::endl;
46 for (auto &&layer : sortedNewNet) {
47 gnalog() << layer->name << std::endl;
50 // weights scale is a hint, not all weightable layers preserve it in all possible precisions
51 propagateScaleFactor(sortedNewNet, T::mandatory().getWeightsPrecision().size(), scaleFactor);
53 // sorted order gives possibility for propagate quantisation along depended layers
54 for (auto &&layer : sortedNewNet) {
55 transformLayer(layer, lc);
62 void propagateScaleFactor(std::vector<InferenceEngine::CNNLayerPtr> & net, int weightsBytesSize, float scaleFactor) const {
63 ScaleFactorCalculator sf(net, weightsBytesSize, scaleFactor);
65 while (!sf.allLayersProcessed()) {
66 for (auto &&layer : sf.getStartLayers()) {
67 transformLayer(layer, sf);
68 // transforming until we reached cases where output scale updated due to situation in downstream layer
69 if (sf.needToRestart()) {
76 } // namespace GNAPluginNS