IVGCVSW-1807 : change license text in file headers
[platform/upstream/armnn.git] / tests / InferenceModel.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 #include "armnn/ArmNN.hpp"
7
8 #if defined(ARMNN_TF_LITE_PARSER)
9 #include "armnnTfLiteParser/ITfLiteParser.hpp"
10 #endif
11
12 #include <HeapProfiling.hpp>
13 #if defined(ARMNN_ONNX_PARSER)
14 #include "armnnOnnxParser/IOnnxParser.hpp"
15 #endif
16
17 #include <boost/exception/exception.hpp>
18 #include <boost/exception/diagnostic_information.hpp>
19 #include <boost/log/trivial.hpp>
20 #include <boost/format.hpp>
21 #include <boost/program_options.hpp>
22 #include <boost/filesystem.hpp>
23
24 #include <map>
25 #include <string>
26 #include <fstream>
27 #include <type_traits>
28
29 namespace InferenceModelInternal
30 {
31 // This needs to go when the armnnCaffeParser, armnnTfParser and armnnTfLiteParser
32 // definitions of BindingPointInfo gets consolidated.
33 using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>;
34
35 using QuantizationParams = std::pair<float,int32_t>;
36
37 struct Params
38 {
39     std::string m_ModelPath;
40     std::string m_InputBinding;
41     std::string m_OutputBinding;
42     const armnn::TensorShape* m_InputTensorShape;
43     std::vector<armnn::Compute> m_ComputeDevice;
44     bool m_EnableProfiling;
45     size_t m_SubgraphId;
46     bool m_IsModelBinary;
47     bool m_VisualizePostOptimizationModel;
48     bool m_EnableFp16TurboMode;
49
50     Params()
51         : m_InputTensorShape(nullptr)
52         , m_ComputeDevice{armnn::Compute::CpuRef}
53         , m_EnableProfiling(false)
54         , m_SubgraphId(0)
55         , m_IsModelBinary(true)
56         , m_VisualizePostOptimizationModel(false)
57         , m_EnableFp16TurboMode(false)
58     {}
59 };
60
61 } // namespace InferenceModelInternal
62
63 template <typename IParser>
64 struct CreateNetworkImpl
65 {
66 public:
67     using Params = InferenceModelInternal::Params;
68     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
69
70     static armnn::INetworkPtr Create(const Params& params,
71                                      BindingPointInfo& inputBindings,
72                                      BindingPointInfo& outputBindings)
73     {
74       const std::string& modelPath = params.m_ModelPath;
75
76       // Create a network from a file on disk
77       auto parser(IParser::Create());
78
79       std::map<std::string, armnn::TensorShape> inputShapes;
80       if (params.m_InputTensorShape)
81       {
82           inputShapes[params.m_InputBinding] = *params.m_InputTensorShape;
83       }
84       std::vector<std::string> requestedOutputs{ params.m_OutputBinding };
85       armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
86
87       {
88           ARMNN_SCOPED_HEAP_PROFILING("Parsing");
89           // Handle text and binary input differently by calling the corresponding parser function
90           network = (params.m_IsModelBinary ?
91               parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
92               parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
93       }
94
95       inputBindings  = parser->GetNetworkInputBindingInfo(params.m_InputBinding);
96       outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding);
97       return network;
98     }
99 };
100
101 #if defined(ARMNN_TF_LITE_PARSER)
102 template <>
103 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
104 {
105 public:
106     using IParser = armnnTfLiteParser::ITfLiteParser;
107     using Params = InferenceModelInternal::Params;
108     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
109
110     static armnn::INetworkPtr Create(const Params& params,
111                                      BindingPointInfo& inputBindings,
112                                      BindingPointInfo& outputBindings)
113     {
114       const std::string& modelPath = params.m_ModelPath;
115
116       // Create a network from a file on disk
117       auto parser(IParser::Create());
118
119       armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
120
121       {
122           ARMNN_SCOPED_HEAP_PROFILING("Parsing");
123           network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
124       }
125
126       inputBindings  = parser->GetNetworkInputBindingInfo(params.m_SubgraphId, params.m_InputBinding);
127       outputBindings = parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, params.m_OutputBinding);
128       return network;
129     }
130 };
131 #endif
132
133 #if defined(ARMNN_ONNX_PARSER)
134 template <>
135 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
136 {
137 public:
138     using IParser = armnnOnnxParser::IOnnxParser;
139     using Params = InferenceModelInternal::Params;
140     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
141
142     static armnn::INetworkPtr Create(const Params& params,
143                                      BindingPointInfo& inputBindings,
144                                      BindingPointInfo& outputBindings)
145     {
146       const std::string& modelPath = params.m_ModelPath;
147
148       // Create a network from a file on disk
149       auto parser(IParser::Create());
150
151       armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
152
153       {
154           ARMNN_SCOPED_HEAP_PROFILING("Parsing");
155           network = (params.m_IsModelBinary ?
156               parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
157               parser->CreateNetworkFromTextFile(modelPath.c_str()));
158       }
159
160       inputBindings  = parser->GetNetworkInputBindingInfo(params.m_InputBinding);
161       outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding);
162       return network;
163     }
164 };
165 #endif
166
167 template<typename TContainer>
168 inline armnn::InputTensors MakeInputTensors(const InferenceModelInternal::BindingPointInfo& input,
169     const TContainer& inputTensorData)
170 {
171     if (inputTensorData.size() != input.second.GetNumElements())
172     {
173         try
174         {
175             throw armnn::Exception(boost::str(boost::format("Input tensor has incorrect size. Expected %1% elements "
176                 "but got %2%.") % input.second.GetNumElements() % inputTensorData.size()));
177         } catch (const boost::exception& e)
178         {
179             // Coverity fix: it should not be possible to get here but boost::str and boost::format can both
180             // throw uncaught exceptions, convert them to armnn exceptions and rethrow.
181             throw armnn::Exception(diagnostic_information(e));
182         }
183     }
184     return { { input.first, armnn::ConstTensor(input.second, inputTensorData.data()) } };
185 }
186
187 template<typename TContainer>
188 inline armnn::OutputTensors MakeOutputTensors(const InferenceModelInternal::BindingPointInfo& output,
189     TContainer& outputTensorData)
190 {
191     if (outputTensorData.size() != output.second.GetNumElements())
192     {
193         throw armnn::Exception("Output tensor has incorrect size");
194     }
195     return { { output.first, armnn::Tensor(output.second, outputTensorData.data()) } };
196 }
197
198
199
200 template <typename IParser, typename TDataType>
201 class InferenceModel
202 {
203 public:
204     using DataType = TDataType;
205     using Params = InferenceModelInternal::Params;
206
207     struct CommandLineOptions
208     {
209         std::string m_ModelDir;
210         std::vector<armnn::Compute> m_ComputeDevice;
211         bool m_VisualizePostOptimizationModel;
212         bool m_EnableFp16TurboMode;
213     };
214
215     static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
216     {
217         namespace po = boost::program_options;
218
219         desc.add_options()
220             ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
221                 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
222             ("compute,c", po::value<std::vector<armnn::Compute>>(&options.m_ComputeDevice)->default_value
223                  ({armnn::Compute::CpuAcc, armnn::Compute::CpuRef}),
224                 "Which device to run layers on by default. Possible choices: CpuAcc, CpuRef, GpuAcc")
225             ("visualize-optimized-model,v",
226                 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
227              "Produce a dot file useful for visualizing the graph post optimization."
228                 "The file will have the same name as the model with the .dot extention.")
229             ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
230                 "If this option is enabled FP32 layers, weights and biases will be converted "
231                 "to FP16 where the backend supports it.");
232     }
233
234     InferenceModel(const Params& params, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
235         : m_EnableProfiling(params.m_EnableProfiling)
236     {
237         if (runtime)
238         {
239             m_Runtime = runtime;
240         }
241         else
242         {
243             armnn::IRuntime::CreationOptions options;
244             m_Runtime = std::move(armnn::IRuntime::Create(options));
245         }
246
247         armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindingInfo,
248            m_OutputBindingInfo);
249
250         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
251         {
252             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
253
254             armnn::OptimizerOptions options;
255             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
256
257             optNet = armnn::Optimize(*network, params.m_ComputeDevice, m_Runtime->GetDeviceSpec(), options);
258             if (!optNet)
259             {
260                 throw armnn::Exception("Optimize returned nullptr");
261             }
262         }
263
264         if (params.m_VisualizePostOptimizationModel)
265         {
266             boost::filesystem::path filename = params.m_ModelPath;
267             filename.replace_extension("dot");
268             std::fstream file(filename.c_str(),file.out);
269             optNet->SerializeToDot(file);
270         }
271
272         armnn::Status ret;
273         {
274             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
275             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
276         }
277
278         if (ret == armnn::Status::Failure)
279         {
280             throw armnn::Exception("IRuntime::LoadNetwork failed");
281         }
282     }
283
284     unsigned int GetOutputSize() const
285     {
286         return m_OutputBindingInfo.second.GetNumElements();
287     }
288
289     void Run(const std::vector<TDataType>& input, std::vector<TDataType>& output)
290     {
291         BOOST_ASSERT(output.size() == GetOutputSize());
292
293         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
294         if (profiler)
295         {
296             profiler->EnableProfiling(m_EnableProfiling);
297         }
298
299         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
300                                                        MakeInputTensors(input),
301                                                        MakeOutputTensors(output));
302         if (ret == armnn::Status::Failure)
303         {
304             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
305         }
306     }
307
308     const InferenceModelInternal::BindingPointInfo & GetInputBindingInfo() const
309     {
310         return m_InputBindingInfo;
311     }
312
313     const InferenceModelInternal::BindingPointInfo & GetOutputBindingInfo() const
314     {
315         return m_OutputBindingInfo;
316     }
317
318     InferenceModelInternal::QuantizationParams GetQuantizationParams() const
319     {
320         return std::make_pair(m_OutputBindingInfo.second.GetQuantizationScale(),
321                               m_OutputBindingInfo.second.GetQuantizationOffset());
322     }
323
324 private:
325     armnn::NetworkId m_NetworkIdentifier;
326     std::shared_ptr<armnn::IRuntime> m_Runtime;
327
328     InferenceModelInternal::BindingPointInfo m_InputBindingInfo;
329     InferenceModelInternal::BindingPointInfo m_OutputBindingInfo;
330     bool m_EnableProfiling;
331
332     template<typename TContainer>
333     armnn::InputTensors MakeInputTensors(const TContainer& inputTensorData)
334     {
335         return ::MakeInputTensors(m_InputBindingInfo, inputTensorData);
336     }
337
338     template<typename TContainer>
339     armnn::OutputTensors MakeOutputTensors(TContainer& outputTensorData)
340     {
341         return ::MakeOutputTensors(m_OutputBindingInfo, outputTensorData);
342     }
343 };