IVGCVSW-3182 Corrected expected model predictions
[platform/upstream/armnn.git] / tests / InferenceModel.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 #include <armnn/ArmNN.hpp>
7
8 #if defined(ARMNN_SERIALIZER)
9 #include "armnnDeserializer/IDeserializer.hpp"
10 #endif
11 #if defined(ARMNN_TF_LITE_PARSER)
12 #include <armnnTfLiteParser/ITfLiteParser.hpp>
13 #endif
14 #if defined(ARMNN_ONNX_PARSER)
15 #include <armnnOnnxParser/IOnnxParser.hpp>
16 #endif
17
18 #include <HeapProfiling.hpp>
19 #include <TensorIOUtils.hpp>
20
21 #include <backendsCommon/BackendRegistry.hpp>
22
23 #include <boost/algorithm/string/join.hpp>
24 #include <boost/exception/exception.hpp>
25 #include <boost/exception/diagnostic_information.hpp>
26 #include <boost/log/trivial.hpp>
27 #include <boost/format.hpp>
28 #include <boost/program_options.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/lexical_cast.hpp>
31 #include <boost/variant.hpp>
32
33 #include <algorithm>
34 #include <chrono>
35 #include <iterator>
36 #include <fstream>
37 #include <map>
38 #include <string>
39 #include <vector>
40 #include <type_traits>
41
42 namespace
43 {
44
45 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
46                                            armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
47 {
48     if (backendIds.empty())
49     {
50         return false;
51     }
52
53     armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
54
55     bool allValid = true;
56     for (const auto& backendId : backendIds)
57     {
58         if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
59         {
60             allValid = false;
61             if (invalidBackendIds)
62             {
63                 if (!invalidBackendIds.value().empty())
64                 {
65                     invalidBackendIds.value() += ", ";
66                 }
67                 invalidBackendIds.value() += backendId;
68             }
69         }
70     }
71     return allValid;
72 }
73
74 } // anonymous namespace
75
76 namespace InferenceModelInternal
77 {
78 using BindingPointInfo = armnn::BindingPointInfo;
79
80 using QuantizationParams = std::pair<float,int32_t>;
81
82 struct Params
83 {
84     std::string                     m_ModelPath;
85     std::vector<std::string>        m_InputBindings;
86     std::vector<armnn::TensorShape> m_InputShapes;
87     std::vector<std::string>        m_OutputBindings;
88     std::vector<armnn::BackendId>   m_ComputeDevices;
89     size_t                          m_SubgraphId;
90     bool                            m_IsModelBinary;
91     bool                            m_VisualizePostOptimizationModel;
92     bool                            m_EnableFp16TurboMode;
93
94     Params()
95         : m_ComputeDevices{"CpuRef"}
96         , m_SubgraphId(0)
97         , m_IsModelBinary(true)
98         , m_VisualizePostOptimizationModel(false)
99         , m_EnableFp16TurboMode(false)
100     {}
101 };
102
103 } // namespace InferenceModelInternal
104
105 template <typename IParser>
106 struct CreateNetworkImpl
107 {
108 public:
109     using Params = InferenceModelInternal::Params;
110
111     static armnn::INetworkPtr Create(const Params& params,
112                                      std::vector<armnn::BindingPointInfo>& inputBindings,
113                                      std::vector<armnn::BindingPointInfo>& outputBindings)
114     {
115         const std::string& modelPath = params.m_ModelPath;
116
117         // Create a network from a file on disk
118         auto parser(IParser::Create());
119
120         std::map<std::string, armnn::TensorShape> inputShapes;
121         if (!params.m_InputShapes.empty())
122         {
123             const size_t numInputShapes   = params.m_InputShapes.size();
124             const size_t numInputBindings = params.m_InputBindings.size();
125             if (numInputShapes < numInputBindings)
126             {
127                 throw armnn::Exception(boost::str(boost::format(
128                     "Not every input has its tensor shape specified: expected=%1%, got=%2%")
129                     % numInputBindings % numInputShapes));
130             }
131
132             for (size_t i = 0; i < numInputShapes; i++)
133             {
134                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
135             }
136         }
137
138         std::vector<std::string> requestedOutputs = params.m_OutputBindings;
139         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
140
141         {
142             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
143             // Handle text and binary input differently by calling the corresponding parser function
144             network = (params.m_IsModelBinary ?
145                 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
146                 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
147         }
148
149         for (const std::string& inputLayerName : params.m_InputBindings)
150         {
151             inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
152         }
153
154         for (const std::string& outputLayerName : params.m_OutputBindings)
155         {
156             outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
157         }
158
159         return network;
160     }
161 };
162
163 #if defined(ARMNN_SERIALIZER)
164 template <>
165 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
166 {
167 public:
168     using IParser          = armnnDeserializer::IDeserializer;
169     using Params           = InferenceModelInternal::Params;
170
171     static armnn::INetworkPtr Create(const Params& params,
172                                      std::vector<armnn::BindingPointInfo>& inputBindings,
173                                      std::vector<armnn::BindingPointInfo>& outputBindings)
174     {
175         auto parser(IParser::Create());
176         BOOST_ASSERT(parser);
177
178         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
179
180         {
181             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
182
183             boost::system::error_code errorCode;
184             boost::filesystem::path pathToFile(params.m_ModelPath);
185             if (!boost::filesystem::exists(pathToFile, errorCode))
186             {
187                 throw armnn::FileNotFoundException(boost::str(
188                                                    boost::format("Cannot find the file (%1%) errorCode: %2% %3%") %
189                                                    params.m_ModelPath %
190                                                    errorCode %
191                                                    CHECK_LOCATION().AsString()));
192             }
193             std::ifstream file(params.m_ModelPath, std::ios::binary);
194
195             network = parser->CreateNetworkFromBinary(file);
196         }
197
198         unsigned int subgraphId = boost::numeric_cast<unsigned int>(params.m_SubgraphId);
199
200         for (const std::string& inputLayerName : params.m_InputBindings)
201         {
202             armnnDeserializer::BindingPointInfo inputBinding =
203                 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
204             inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
205         }
206
207         for (const std::string& outputLayerName : params.m_OutputBindings)
208         {
209             armnnDeserializer::BindingPointInfo outputBinding =
210                 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
211             outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
212         }
213
214         return network;
215     }
216 };
217 #endif
218
219 #if defined(ARMNN_TF_LITE_PARSER)
220 template <>
221 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
222 {
223 public:
224     using IParser = armnnTfLiteParser::ITfLiteParser;
225     using Params = InferenceModelInternal::Params;
226
227     static armnn::INetworkPtr Create(const Params& params,
228                                      std::vector<armnn::BindingPointInfo>& inputBindings,
229                                      std::vector<armnn::BindingPointInfo>& outputBindings)
230     {
231         const std::string& modelPath = params.m_ModelPath;
232
233         // Create a network from a file on disk
234         auto parser(IParser::Create());
235
236         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
237
238         {
239             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
240             network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
241         }
242
243         for (const std::string& inputLayerName : params.m_InputBindings)
244         {
245             armnn::BindingPointInfo inputBinding =
246                 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
247             inputBindings.push_back(inputBinding);
248         }
249
250         for (const std::string& outputLayerName : params.m_OutputBindings)
251         {
252             armnn::BindingPointInfo outputBinding =
253                 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
254             outputBindings.push_back(outputBinding);
255         }
256
257         return network;
258     }
259 };
260 #endif
261
262 #if defined(ARMNN_ONNX_PARSER)
263 template <>
264 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
265 {
266 public:
267     using IParser = armnnOnnxParser::IOnnxParser;
268     using Params = InferenceModelInternal::Params;
269     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
270
271     static armnn::INetworkPtr Create(const Params& params,
272                                      std::vector<BindingPointInfo>& inputBindings,
273                                      std::vector<BindingPointInfo>& outputBindings)
274     {
275         const std::string& modelPath = params.m_ModelPath;
276
277         // Create a network from a file on disk
278         auto parser(IParser::Create());
279
280         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
281
282         {
283             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
284             network = (params.m_IsModelBinary ?
285                 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
286                 parser->CreateNetworkFromTextFile(modelPath.c_str()));
287         }
288
289         for (const std::string& inputLayerName : params.m_InputBindings)
290         {
291             BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
292             inputBindings.push_back(inputBinding);
293         }
294
295         for (const std::string& outputLayerName : params.m_OutputBindings)
296         {
297             BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
298             outputBindings.push_back(outputBinding);
299         }
300
301         return network;
302     }
303 };
304 #endif
305
306
307
308 template <typename IParser, typename TDataType>
309 class InferenceModel
310 {
311 public:
312     using DataType           = TDataType;
313     using Params             = InferenceModelInternal::Params;
314     using QuantizationParams = InferenceModelInternal::QuantizationParams;
315     using TContainer         = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
316
317     struct CommandLineOptions
318     {
319         std::string m_ModelDir;
320         std::vector<std::string> m_ComputeDevices;
321         bool m_VisualizePostOptimizationModel;
322         bool m_EnableFp16TurboMode;
323         std::string m_Labels;
324
325         std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
326         {
327             std::vector<armnn::BackendId> backendIds;
328             std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
329             return backendIds;
330         }
331     };
332
333     static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
334     {
335         namespace po = boost::program_options;
336
337         const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
338
339         const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
340                                           + armnn::BackendRegistryInstance().GetBackendIdsAsString();
341
342         desc.add_options()
343             ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
344                 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
345             ("compute,c", po::value<std::vector<std::string>>(&options.m_ComputeDevices)->
346                 default_value(defaultComputes, boost::algorithm::join(defaultComputes, ", "))->
347                 multitoken(), backendsMessage.c_str())
348             ("labels,l", po::value<std::string>(&options.m_Labels),
349                 "Text file containing one image filename - correct label pair per line, "
350                 "used to test the accuracy of the network.")
351             ("visualize-optimized-model,v",
352                 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
353              "Produce a dot file useful for visualizing the graph post optimization."
354                 "The file will have the same name as the model with the .dot extention.")
355             ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
356                 "If this option is enabled FP32 layers, weights and biases will be converted "
357                 "to FP16 where the backend supports it.");
358     }
359
360     InferenceModel(const Params& params,
361                    bool enableProfiling,
362                    const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
363         : m_EnableProfiling(enableProfiling)
364     {
365         if (runtime)
366         {
367             m_Runtime = runtime;
368         }
369         else
370         {
371             armnn::IRuntime::CreationOptions options;
372             options.m_EnableGpuProfiling = m_EnableProfiling;
373             m_Runtime = std::move(armnn::IRuntime::Create(options));
374         }
375
376         std::string invalidBackends;
377         if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
378         {
379             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
380         }
381
382         armnn::INetworkPtr network =
383             CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
384
385         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
386         {
387             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
388
389             armnn::OptimizerOptions options;
390             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
391
392             optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
393             if (!optNet)
394             {
395                 throw armnn::Exception("Optimize returned nullptr");
396             }
397         }
398
399         if (params.m_VisualizePostOptimizationModel)
400         {
401             boost::filesystem::path filename = params.m_ModelPath;
402             filename.replace_extension("dot");
403             std::fstream file(filename.c_str(),file.out);
404             optNet->SerializeToDot(file);
405         }
406
407         armnn::Status ret;
408         {
409             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
410             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
411         }
412
413         if (ret == armnn::Status::Failure)
414         {
415             throw armnn::Exception("IRuntime::LoadNetwork failed");
416         }
417     }
418
419     void CheckInputIndexIsValid(unsigned int inputIndex) const
420     {
421         if (m_InputBindings.size() < inputIndex + 1)
422         {
423             throw armnn::Exception(boost::str(boost::format("Input index out of range: %1%") % inputIndex));
424         }
425     }
426
427     void CheckOutputIndexIsValid(unsigned int outputIndex) const
428     {
429         if (m_OutputBindings.size() < outputIndex + 1)
430         {
431             throw armnn::Exception(boost::str(boost::format("Output index out of range: %1%") % outputIndex));
432         }
433     }
434
435     unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
436     {
437         CheckOutputIndexIsValid(outputIndex);
438         return m_OutputBindings[outputIndex].second.GetNumElements();
439     }
440
441     std::chrono::duration<double, std::milli> Run(
442             const std::vector<TContainer>& inputContainers,
443             std::vector<TContainer>& outputContainers)
444     {
445         for (unsigned int i = 0; i < outputContainers.size(); ++i)
446         {
447             const unsigned int expectedOutputDataSize = GetOutputSize(i);
448
449             boost::apply_visitor([expectedOutputDataSize, i](auto&& value)
450             {
451                 const unsigned int actualOutputDataSize   = boost::numeric_cast<unsigned int>(value.size());
452                 if (actualOutputDataSize < expectedOutputDataSize)
453                 {
454                     unsigned int outputIndex = boost::numeric_cast<unsigned int>(i);
455                     throw armnn::Exception(
456                             boost::str(boost::format("Not enough data for output #%1%: expected "
457                             "%2% elements, got %3%") % outputIndex % expectedOutputDataSize % actualOutputDataSize));
458                 }
459             },
460             outputContainers[i]);
461         }
462
463         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
464         if (profiler)
465         {
466             profiler->EnableProfiling(m_EnableProfiling);
467         }
468
469         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
470         const auto start_time = GetCurrentTime();
471
472         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
473                                                        MakeInputTensors(inputContainers),
474                                                        MakeOutputTensors(outputContainers));
475
476         const auto end_time = GetCurrentTime();
477
478         // if profiling is enabled print out the results
479         if (profiler && profiler->IsProfilingEnabled())
480         {
481             profiler->Print(std::cout);
482         }
483
484         if (ret == armnn::Status::Failure)
485         {
486             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
487         }
488         else
489         {
490             return std::chrono::duration<double, std::milli>(end_time - start_time);
491         }
492     }
493
494     const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
495     {
496         CheckInputIndexIsValid(inputIndex);
497         return m_InputBindings[inputIndex];
498     }
499
500     const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
501     {
502         return m_InputBindings;
503     }
504
505     const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
506     {
507         CheckOutputIndexIsValid(outputIndex);
508         return m_OutputBindings[outputIndex];
509     }
510
511     const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
512     {
513         return m_OutputBindings;
514     }
515
516     QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
517     {
518         CheckOutputIndexIsValid(outputIndex);
519         return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
520                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
521     }
522
523     QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
524     {
525         CheckInputIndexIsValid(inputIndex);
526         return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
527                               m_InputBindings[inputIndex].second.GetQuantizationOffset());
528     }
529
530     std::vector<QuantizationParams> GetAllQuantizationParams() const
531     {
532         std::vector<QuantizationParams> quantizationParams;
533         for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
534         {
535             quantizationParams.push_back(GetQuantizationParams(i));
536         }
537         return quantizationParams;
538     }
539
540 private:
541     armnn::NetworkId m_NetworkIdentifier;
542     std::shared_ptr<armnn::IRuntime> m_Runtime;
543
544     std::vector<armnn::BindingPointInfo> m_InputBindings;
545     std::vector<armnn::BindingPointInfo> m_OutputBindings;
546     bool m_EnableProfiling;
547
548     template<typename TContainer>
549     armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
550     {
551         return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
552     }
553
554     template<typename TContainer>
555     armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
556     {
557         return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
558     }
559
560     std::chrono::high_resolution_clock::time_point GetCurrentTime()
561     {
562         return std::chrono::high_resolution_clock::now();
563     }
564
565     std::chrono::duration<double, std::milli> GetTimeDuration(
566             std::chrono::high_resolution_clock::time_point& start_time,
567             std::chrono::high_resolution_clock::time_point& end_time)
568     {
569         return std::chrono::duration<double, std::milli>(end_time - start_time);
570     }
571
572 };