IVGCVSW-2529 DeepSpeech v1 test
[platform/upstream/armnn.git] / tests / InferenceModel.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 #include <armnn/ArmNN.hpp>
7
8 #if defined(ARMNN_TF_LITE_PARSER)
9 #include <armnnTfLiteParser/ITfLiteParser.hpp>
10 #endif
11
12 #include <HeapProfiling.hpp>
13 #if defined(ARMNN_ONNX_PARSER)
14 #include <armnnOnnxParser/IOnnxParser.hpp>
15 #endif
16
17 #include <backendsCommon/BackendRegistry.hpp>
18
19 #include <boost/algorithm/string/join.hpp>
20 #include <boost/exception/exception.hpp>
21 #include <boost/exception/diagnostic_information.hpp>
22 #include <boost/log/trivial.hpp>
23 #include <boost/format.hpp>
24 #include <boost/program_options.hpp>
25 #include <boost/filesystem.hpp>
26 #include <boost/lexical_cast.hpp>
27 #include <boost/variant.hpp>
28
29 #include <algorithm>
30 #include <iterator>
31 #include <fstream>
32 #include <map>
33 #include <string>
34 #include <vector>
35 #include <type_traits>
36
37 namespace
38 {
39
40 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
41                                            armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
42 {
43     if (backendIds.empty())
44     {
45         return false;
46     }
47
48     armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
49
50     bool allValid = true;
51     for (const auto& backendId : backendIds)
52     {
53         if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
54         {
55             allValid = false;
56             if (invalidBackendIds)
57             {
58                 if (!invalidBackendIds.value().empty())
59                 {
60                     invalidBackendIds.value() += ", ";
61                 }
62                 invalidBackendIds.value() += backendId;
63             }
64         }
65     }
66     return allValid;
67 }
68
69 } // anonymous namespace
70
71 namespace InferenceModelInternal
72 {
73 // This needs to go when the armnnCaffeParser, armnnTfParser and armnnTfLiteParser
74 // definitions of BindingPointInfo gets consolidated.
75 using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>;
76
77 using QuantizationParams = std::pair<float,int32_t>;
78
79 struct Params
80 {
81     std::string                     m_ModelPath;
82     std::vector<std::string>        m_InputBindings;
83     std::vector<armnn::TensorShape> m_InputShapes;
84     std::vector<std::string>        m_OutputBindings;
85     std::vector<armnn::BackendId>   m_ComputeDevices;
86     bool                            m_EnableProfiling;
87     size_t                          m_SubgraphId;
88     bool                            m_IsModelBinary;
89     bool                            m_VisualizePostOptimizationModel;
90     bool                            m_EnableFp16TurboMode;
91
92     Params()
93         : m_ComputeDevices{"CpuRef"}
94         , m_EnableProfiling(false)
95         , m_SubgraphId(0)
96         , m_IsModelBinary(true)
97         , m_VisualizePostOptimizationModel(false)
98         , m_EnableFp16TurboMode(false)
99     {}
100 };
101
102 } // namespace InferenceModelInternal
103
104 template <typename IParser>
105 struct CreateNetworkImpl
106 {
107 public:
108     using Params = InferenceModelInternal::Params;
109     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
110
111     static armnn::INetworkPtr Create(const Params& params,
112                                      std::vector<BindingPointInfo>& inputBindings,
113                                      std::vector<BindingPointInfo>& outputBindings)
114     {
115         const std::string& modelPath = params.m_ModelPath;
116
117         // Create a network from a file on disk
118         auto parser(IParser::Create());
119
120         std::map<std::string, armnn::TensorShape> inputShapes;
121         if (!params.m_InputShapes.empty())
122         {
123             const size_t numInputShapes   = params.m_InputShapes.size();
124             const size_t numInputBindings = params.m_InputBindings.size();
125             if (numInputShapes < numInputBindings)
126             {
127                 throw armnn::Exception(boost::str(boost::format(
128                     "Not every input has its tensor shape specified: expected=%1%, got=%2%")
129                     % numInputBindings % numInputShapes));
130             }
131
132             for (size_t i = 0; i < numInputShapes; i++)
133             {
134                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
135             }
136         }
137
138         std::vector<std::string> requestedOutputs = params.m_OutputBindings;
139         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
140
141         {
142             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
143             // Handle text and binary input differently by calling the corresponding parser function
144             network = (params.m_IsModelBinary ?
145                 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
146                 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
147         }
148
149         for (const std::string& inputLayerName : params.m_InputBindings)
150         {
151             inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
152         }
153
154         for (const std::string& outputLayerName : params.m_OutputBindings)
155         {
156             outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
157         }
158
159         return network;
160     }
161 };
162
163 #if defined(ARMNN_TF_LITE_PARSER)
164 template <>
165 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
166 {
167 public:
168     using IParser = armnnTfLiteParser::ITfLiteParser;
169     using Params = InferenceModelInternal::Params;
170     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
171
172     static armnn::INetworkPtr Create(const Params& params,
173                                      std::vector<BindingPointInfo>& inputBindings,
174                                      std::vector<BindingPointInfo>& outputBindings)
175     {
176         const std::string& modelPath = params.m_ModelPath;
177
178         // Create a network from a file on disk
179         auto parser(IParser::Create());
180
181         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
182
183         {
184             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
185             network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
186         }
187
188         for (const std::string& inputLayerName : params.m_InputBindings)
189         {
190             BindingPointInfo inputBinding =
191                 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
192             inputBindings.push_back(inputBinding);
193         }
194
195         for (const std::string& outputLayerName : params.m_OutputBindings)
196         {
197             BindingPointInfo outputBinding =
198                 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
199             outputBindings.push_back(outputBinding);
200         }
201
202         return network;
203     }
204 };
205 #endif
206
207 #if defined(ARMNN_ONNX_PARSER)
208 template <>
209 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
210 {
211 public:
212     using IParser = armnnOnnxParser::IOnnxParser;
213     using Params = InferenceModelInternal::Params;
214     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
215
216     static armnn::INetworkPtr Create(const Params& params,
217                                      std::vector<BindingPointInfo>& inputBindings,
218                                      std::vector<BindingPointInfo>& outputBindings)
219     {
220         const std::string& modelPath = params.m_ModelPath;
221
222         // Create a network from a file on disk
223         auto parser(IParser::Create());
224
225         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
226
227         {
228             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
229             network = (params.m_IsModelBinary ?
230                 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
231                 parser->CreateNetworkFromTextFile(modelPath.c_str()));
232         }
233
234         for (const std::string& inputLayerName : params.m_InputBindings)
235         {
236             BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
237             inputBindings.push_back(inputBinding);
238         }
239
240         for (const std::string& outputLayerName : params.m_OutputBindings)
241         {
242             BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
243             outputBindings.push_back(outputBinding);
244         }
245
246         return network;
247     }
248 };
249 #endif
250
251 template<typename TContainer>
252 inline armnn::InputTensors MakeInputTensors(
253     const std::vector<InferenceModelInternal::BindingPointInfo>& inputBindings,
254     const std::vector<TContainer>& inputDataContainers)
255 {
256     armnn::InputTensors inputTensors;
257
258     const size_t numInputs = inputBindings.size();
259     if (numInputs != inputDataContainers.size())
260     {
261         throw armnn::Exception(boost::str(boost::format("Number of inputs does not match number of "
262             "tensor data containers: %1% != %2%") % numInputs % inputDataContainers.size()));
263     }
264
265     for (size_t i = 0; i < numInputs; i++)
266     {
267         const InferenceModelInternal::BindingPointInfo& inputBinding = inputBindings[i];
268         const TContainer& inputData = inputDataContainers[i];
269
270         boost::apply_visitor([&](auto&& value)
271                              {
272                                  if (value.size() != inputBinding.second.GetNumElements())
273                                  {
274                                     throw armnn::Exception("Input tensor has incorrect size");
275                                  }
276
277                                  armnn::ConstTensor inputTensor(inputBinding.second, value.data());
278                                  inputTensors.push_back(std::make_pair(inputBinding.first, inputTensor));
279                              },
280                              inputData);
281     }
282
283     return inputTensors;
284 }
285
286 template<typename TContainer>
287 inline armnn::OutputTensors MakeOutputTensors(
288     const std::vector<InferenceModelInternal::BindingPointInfo>& outputBindings,
289     std::vector<TContainer>& outputDataContainers)
290 {
291     armnn::OutputTensors outputTensors;
292
293     const size_t numOutputs = outputBindings.size();
294     if (numOutputs != outputDataContainers.size())
295     {
296         throw armnn::Exception(boost::str(boost::format("Number of outputs does not match number of "
297             "tensor data containers: %1% != %2%") % numOutputs % outputDataContainers.size()));
298     }
299
300     for (size_t i = 0; i < numOutputs; i++)
301     {
302         const InferenceModelInternal::BindingPointInfo& outputBinding = outputBindings[i];
303         TContainer& outputData = outputDataContainers[i];
304
305         boost::apply_visitor([&](auto&& value)
306                              {
307                                  if (value.size() != outputBinding.second.GetNumElements())
308                                  {
309                                      throw armnn::Exception("Output tensor has incorrect size");
310                                  }
311
312                                  armnn::Tensor outputTensor(outputBinding.second, value.data());
313                                  outputTensors.push_back(std::make_pair(outputBinding.first, outputTensor));
314                              },
315                              outputData);
316     }
317
318     return outputTensors;
319 }
320
321 template <typename IParser, typename TDataType>
322 class InferenceModel
323 {
324 public:
325     using DataType           = TDataType;
326     using Params             = InferenceModelInternal::Params;
327     using BindingPointInfo   = InferenceModelInternal::BindingPointInfo;
328     using QuantizationParams = InferenceModelInternal::QuantizationParams;
329     using TContainer         = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
330
331     struct CommandLineOptions
332     {
333         std::string m_ModelDir;
334         std::vector<std::string> m_ComputeDevices;
335         bool m_VisualizePostOptimizationModel;
336         bool m_EnableFp16TurboMode;
337
338         std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
339         {
340             std::vector<armnn::BackendId> backendIds;
341             std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
342             return backendIds;
343         }
344     };
345
346     static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
347     {
348         namespace po = boost::program_options;
349
350         const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
351
352         const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
353                                           + armnn::BackendRegistryInstance().GetBackendIdsAsString();
354
355         desc.add_options()
356             ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
357                 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
358             ("compute,c", po::value<std::vector<std::string>>(&options.m_ComputeDevices)->
359                 default_value(defaultComputes, boost::algorithm::join(defaultComputes, ", "))->
360                 multitoken(), backendsMessage.c_str())
361             ("visualize-optimized-model,v",
362                 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
363              "Produce a dot file useful for visualizing the graph post optimization."
364                 "The file will have the same name as the model with the .dot extention.")
365             ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
366                 "If this option is enabled FP32 layers, weights and biases will be converted "
367                 "to FP16 where the backend supports it.");
368     }
369
370     InferenceModel(const Params& params, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
371         : m_EnableProfiling(params.m_EnableProfiling)
372     {
373         if (runtime)
374         {
375             m_Runtime = runtime;
376         }
377         else
378         {
379             armnn::IRuntime::CreationOptions options;
380             options.m_EnableGpuProfiling = m_EnableProfiling;
381             m_Runtime = std::move(armnn::IRuntime::Create(options));
382         }
383
384         std::string invalidBackends;
385         if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
386         {
387             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
388         }
389
390         armnn::INetworkPtr network =
391             CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
392
393         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
394         {
395             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
396
397             armnn::OptimizerOptions options;
398             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
399
400             optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
401             if (!optNet)
402             {
403                 throw armnn::Exception("Optimize returned nullptr");
404             }
405         }
406
407         if (params.m_VisualizePostOptimizationModel)
408         {
409             boost::filesystem::path filename = params.m_ModelPath;
410             filename.replace_extension("dot");
411             std::fstream file(filename.c_str(),file.out);
412             optNet->SerializeToDot(file);
413         }
414
415         armnn::Status ret;
416         {
417             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
418             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
419         }
420
421         if (ret == armnn::Status::Failure)
422         {
423             throw armnn::Exception("IRuntime::LoadNetwork failed");
424         }
425     }
426
427     void CheckInputIndexIsValid(unsigned int inputIndex) const
428     {
429         if (m_InputBindings.size() < inputIndex + 1)
430         {
431             throw armnn::Exception(boost::str(boost::format("Input index out of range: %1%") % inputIndex));
432         }
433     }
434
435     void CheckOutputIndexIsValid(unsigned int outputIndex) const
436     {
437         if (m_OutputBindings.size() < outputIndex + 1)
438         {
439             throw armnn::Exception(boost::str(boost::format("Output index out of range: %1%") % outputIndex));
440         }
441     }
442
443     unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
444     {
445         CheckOutputIndexIsValid(outputIndex);
446         return m_OutputBindings[outputIndex].second.GetNumElements();
447     }
448
449     void Run(const std::vector<TContainer>& inputContainers, std::vector<TContainer>& outputContainers)
450     {
451         for (unsigned int i = 0; i < outputContainers.size(); ++i)
452         {
453             const unsigned int expectedOutputDataSize = GetOutputSize(i);
454
455             boost::apply_visitor([expectedOutputDataSize, i](auto&& value)
456             {
457                 const unsigned int actualOutputDataSize   = boost::numeric_cast<unsigned int>(value.size());
458                 if (actualOutputDataSize < expectedOutputDataSize)
459                 {
460                     unsigned int outputIndex = boost::numeric_cast<unsigned int>(i);
461                     throw armnn::Exception(
462                             boost::str(boost::format("Not enough data for output #%1%: expected "
463                             "%2% elements, got %3%") % outputIndex % expectedOutputDataSize % actualOutputDataSize));
464                 }
465             },
466             outputContainers[i]);
467         }
468
469         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
470         if (profiler)
471         {
472             profiler->EnableProfiling(m_EnableProfiling);
473         }
474
475         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
476                                                        MakeInputTensors(inputContainers),
477                                                        MakeOutputTensors(outputContainers));
478
479         // if profiling is enabled print out the results
480         if (profiler && profiler->IsProfilingEnabled())
481         {
482             profiler->Print(std::cout);
483         }
484
485         if (ret == armnn::Status::Failure)
486         {
487             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
488         }
489     }
490
491     const BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
492     {
493         CheckInputIndexIsValid(inputIndex);
494         return m_InputBindings[inputIndex];
495     }
496
497     const std::vector<BindingPointInfo>& GetInputBindingInfos() const
498     {
499         return m_InputBindings;
500     }
501
502     const BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
503     {
504         CheckOutputIndexIsValid(outputIndex);
505         return m_OutputBindings[outputIndex];
506     }
507
508     const std::vector<BindingPointInfo>& GetOutputBindingInfos() const
509     {
510         return m_OutputBindings;
511     }
512
513     QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
514     {
515         CheckOutputIndexIsValid(outputIndex);
516         return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
517                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
518     }
519
520     std::vector<QuantizationParams> GetAllQuantizationParams() const
521     {
522         std::vector<QuantizationParams> quantizationParams;
523         for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
524         {
525             quantizationParams.push_back(GetQuantizationParams(i));
526         }
527         return quantizationParams;
528     }
529
530 private:
531     armnn::NetworkId m_NetworkIdentifier;
532     std::shared_ptr<armnn::IRuntime> m_Runtime;
533
534     std::vector<InferenceModelInternal::BindingPointInfo> m_InputBindings;
535     std::vector<InferenceModelInternal::BindingPointInfo> m_OutputBindings;
536     bool m_EnableProfiling;
537
538     template<typename TContainer>
539     armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
540     {
541         return ::MakeInputTensors(m_InputBindings, inputDataContainers);
542     }
543
544     template<typename TContainer>
545     armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
546     {
547         return ::MakeOutputTensors(m_OutputBindings, outputDataContainers);
548     }
549 };