src/armnn/NetworkQuantizer.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // SPDX-License-Identifier: MIT
   4 //
   5
   6 #include "NetworkQuantizer.hpp"
   7 #include "NetworkQuantizerUtils.hpp"
   8 #include "Graph.hpp"
   9 #include "Layer.hpp"
  10 #include "Network.hpp"
  11 #include "DynamicQuantizationVisitor.hpp"
  12 #include "StaticRangeVisitor.hpp"
  13 #include "QuantizerVisitor.hpp"
  14 #include "OverrideInputRangeVisitor.hpp"
  15
  16 #include <TensorIOUtils.hpp>
  17
  18 #include <armnn/ILayerVisitor.hpp>
  19 #include <armnn/INetwork.hpp>
  20 #include <armnn/Tensor.hpp>
  21 #include <armnn/Types.hpp>
  22
  23 #include <armnnUtils/TensorUtils.hpp>
  24
  25 #include <boost/variant.hpp>
  26
  27 #include <vector>
  28 #include <cmath>
  29
  30 namespace armnn
  31 {
  32
  33 using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
  34
  35 INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options)
  36 {
  37     return new NetworkQuantizer(inputNetwork, options);
  38 }
  39
  40 INetworkQuantizerPtr INetworkQuantizer::Create(INetwork* inputNetwork, const QuantizerOptions& options)
  41 {
  42     return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy);
  43 }
  44
  45 void INetworkQuantizer::Destroy(INetworkQuantizer *quantizer)
  46 {
  47     delete boost::polymorphic_downcast<NetworkQuantizer*>(quantizer);
  48 }
  49
  50 void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max)
  51 {
  52     const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph();
  53     auto inputLayers = graph.GetInputLayers();
  54
  55     // Walk the input layers of the graph and override the quantization parameters of the one with the given id
  56     OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
  57     VisitLayers(inputLayers, overrideInputRangeVisitor);
  58 }
  59
  60 void NetworkQuantizer::Refine(const InputTensors& inputTensors)
  61 {
  62     // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
  63     // will not have been created. Need to get the environment set up, Runtime loaded,
  64     // DynamicQuantizationVisitor created and run over the network to initialise itself
  65     // and the RangeTracker the Debug callback registered and an initial inference
  66     // done to set up the first min/max values
  67     if (!m_Runtime)
  68     {
  69         m_RefineCount = 0;
  70         m_Ranges.SetDynamicMode(true);
  71         const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
  72
  73         // need to insert Debug layers in the DynamicQuantizationVisitor
  74         Graph& graph = const_cast<Graph&>(cGraph);
  75
  76         // Initialize RangeTracker to the default values for each layer.
  77         // The default values are overwritten by the min/max that is
  78         // recorded during the first dataset min/max calibration. This
  79         // initialisation is only required for the first call of Refine().
  80         m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
  81         VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
  82
  83         IRuntime::CreationOptions options;
  84         m_Runtime = IRuntime::Create(options);
  85
  86         // Optimize network - debug already enabled for layers that require quantization
  87         OptimizerOptions optimizerOptions(false, false);
  88         std::vector<BackendId> backends = {"CpuRef"};
  89         IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
  90                                                      backends,
  91                                                      m_Runtime->GetDeviceSpec(),
  92                                                      optimizerOptions);
  93
  94         m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
  95
  96         // Debug callback function to refine min/max in RangeTracker
  97         auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
  98             // Get min/max pair from tensor data
  99             std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
 100
 101             // For first calibration dataset, set min/max range in RangeTracker to
 102             // min/max ranges gathered during inference
 103             if (m_RefineCount == 0)
 104             {
 105                 m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
 106             }
 107             else
 108             {
 109                 // For every other calibration dataset, only set min/max range if the
 110                 // values gathered are less than / greater than originally recorded.
 111                 m_Ranges.RefineMin(guid, slotIndex, minMax.first);
 112                 m_Ranges.RefineMax(guid, slotIndex, minMax.second);
 113             }
 114         };
 115
 116         m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
 117     }
 118
 119     // Create output tensor for EnqueueWorkload
 120     std::vector<armnn::BindingPointInfo> outputBindings;
 121     auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
 122     std::vector<TContainer> outputVectors;
 123     for (auto outputLayerBindingId : outputLayers)
 124     {
 125         auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
 126         outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
 127         outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
 128     }
 129     OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
 130
 131     // Execute EnqueueWorkload with calibration image
 132     m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
 133     ++m_RefineCount;
 134 }
 135
 136 INetworkPtr NetworkQuantizer::ExportNetwork()
 137 {
 138     const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
 139
 140     // Step 1) Walk the graph and populate default min/max values for
 141     // intermediate tensors, only if Runtime does not exist (created
 142     // if Refine has been called)
 143     if (!m_Runtime)
 144     {
 145         m_Ranges.SetDynamicMode(false);
 146         StaticRangeVisitor rangeVisitor(m_Ranges);
 147         VisitLayers(graph, rangeVisitor);
 148     }
 149     else
 150     {
 151         // Set min/max range of non-calibrated layers to parent layer's range
 152         m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
 153         // now tear down the runtime and the dynamic visitor.
 154         m_Runtime.reset(nullptr);
 155         m_DynamicQuantizationVisitor = EmptyOptional();
 156         m_RefineCount = 0;
 157     }
 158
 159     // Step 2) Convert input InputNetwork to Quantized InputNetwork
 160     std::unique_ptr<IQuantizationScheme> quantizationScheme;
 161     switch (m_Options.m_ActivationFormat)
 162     {
 163         case DataType::QuantisedAsymm8:
 164             quantizationScheme = std::make_unique<QAsymm8QuantizationScheme>();
 165             break;
 166         case DataType::QSymmS8:
 167             quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
 168             break;
 169         case DataType::QuantisedSymm16:
 170             quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
 171             break;
 172         default:
 173             throw InvalidArgumentException("Unsupported quantization target");
 174     }
 175
 176     QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
 177     VisitLayers(graph, quantizerVisitor);
 178
 179     // clear the ranges
 180     m_Ranges.Reset();
 181
 182     return quantizerVisitor.RetrieveFinalNetwork();
 183 }
 184
 185 } //namespace armn