Update ACL pin to 1c2ff950071c5b4fd6e83487083d23c96637545f
[platform/upstream/armnn.git] / tests / InferenceModel.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #pragma once
7
8 #include <armnn/ArmNN.hpp>
9 #include <armnn/BackendRegistry.hpp>
10
11 #if defined(ARMNN_SERIALIZER)
12 #include "armnnDeserializer/IDeserializer.hpp"
13 #endif
14 #if defined(ARMNN_TF_LITE_PARSER)
15 #include <armnnTfLiteParser/ITfLiteParser.hpp>
16 #endif
17 #if defined(ARMNN_ONNX_PARSER)
18 #include <armnnOnnxParser/IOnnxParser.hpp>
19 #endif
20
21 #include <HeapProfiling.hpp>
22 #include <TensorIOUtils.hpp>
23
24 #include <boost/algorithm/string/join.hpp>
25 #include <boost/exception/exception.hpp>
26 #include <boost/exception/diagnostic_information.hpp>
27 #include <boost/format.hpp>
28 #include <boost/program_options.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/lexical_cast.hpp>
31 #include <boost/variant.hpp>
32
33 #include <algorithm>
34 #include <chrono>
35 #include <iterator>
36 #include <fstream>
37 #include <map>
38 #include <string>
39 #include <vector>
40 #include <type_traits>
41
42 namespace
43 {
44
45 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
46                                            armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
47 {
48     if (backendIds.empty())
49     {
50         return false;
51     }
52
53     armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
54
55     bool allValid = true;
56     for (const auto& backendId : backendIds)
57     {
58         if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
59         {
60             allValid = false;
61             if (invalidBackendIds)
62             {
63                 if (!invalidBackendIds.value().empty())
64                 {
65                     invalidBackendIds.value() += ", ";
66                 }
67                 invalidBackendIds.value() += backendId;
68             }
69         }
70     }
71     return allValid;
72 }
73
74 } // anonymous namespace
75
76 namespace InferenceModelInternal
77 {
78 using BindingPointInfo = armnn::BindingPointInfo;
79
80 using QuantizationParams = std::pair<float,int32_t>;
81
82 struct Params
83 {
84     std::string                     m_ModelPath;
85     std::vector<std::string>        m_InputBindings;
86     std::vector<armnn::TensorShape> m_InputShapes;
87     std::vector<std::string>        m_OutputBindings;
88     std::vector<armnn::BackendId>   m_ComputeDevices;
89     std::string                     m_DynamicBackendsPath;
90     size_t                          m_SubgraphId;
91     bool                            m_IsModelBinary;
92     bool                            m_VisualizePostOptimizationModel;
93     bool                            m_EnableFp16TurboMode;
94     bool                            m_PrintIntermediateLayers;
95     bool                            m_ParseUnsupported;
96
97     Params()
98         : m_ComputeDevices{}
99         , m_SubgraphId(0)
100         , m_IsModelBinary(true)
101         , m_VisualizePostOptimizationModel(false)
102         , m_EnableFp16TurboMode(false)
103         , m_PrintIntermediateLayers(false)
104         , m_ParseUnsupported(false)
105     {}
106 };
107
108 } // namespace InferenceModelInternal
109
110 template <typename IParser>
111 struct CreateNetworkImpl
112 {
113 public:
114     using Params = InferenceModelInternal::Params;
115
116     static armnn::INetworkPtr Create(const Params& params,
117                                      std::vector<armnn::BindingPointInfo>& inputBindings,
118                                      std::vector<armnn::BindingPointInfo>& outputBindings)
119     {
120         const std::string& modelPath = params.m_ModelPath;
121
122         // Create a network from a file on disk
123         auto parser(IParser::Create());
124
125         std::map<std::string, armnn::TensorShape> inputShapes;
126         if (!params.m_InputShapes.empty())
127         {
128             const size_t numInputShapes   = params.m_InputShapes.size();
129             const size_t numInputBindings = params.m_InputBindings.size();
130             if (numInputShapes < numInputBindings)
131             {
132                 throw armnn::Exception(boost::str(boost::format(
133                     "Not every input has its tensor shape specified: expected=%1%, got=%2%")
134                     % numInputBindings % numInputShapes));
135             }
136
137             for (size_t i = 0; i < numInputShapes; i++)
138             {
139                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
140             }
141         }
142
143         std::vector<std::string> requestedOutputs = params.m_OutputBindings;
144         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
145
146         {
147             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
148             // Handle text and binary input differently by calling the corresponding parser function
149             network = (params.m_IsModelBinary ?
150                 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
151                 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
152         }
153
154         for (const std::string& inputLayerName : params.m_InputBindings)
155         {
156             inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
157         }
158
159         for (const std::string& outputLayerName : params.m_OutputBindings)
160         {
161             outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
162         }
163
164         return network;
165     }
166 };
167
168 #if defined(ARMNN_SERIALIZER)
169 template <>
170 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
171 {
172 public:
173     using IParser          = armnnDeserializer::IDeserializer;
174     using Params           = InferenceModelInternal::Params;
175
176     static armnn::INetworkPtr Create(const Params& params,
177                                      std::vector<armnn::BindingPointInfo>& inputBindings,
178                                      std::vector<armnn::BindingPointInfo>& outputBindings)
179     {
180         auto parser(IParser::Create());
181         BOOST_ASSERT(parser);
182
183         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
184
185         {
186             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
187
188             boost::system::error_code errorCode;
189             boost::filesystem::path pathToFile(params.m_ModelPath);
190             if (!boost::filesystem::exists(pathToFile, errorCode))
191             {
192                 throw armnn::FileNotFoundException(boost::str(
193                                                    boost::format("Cannot find the file (%1%) errorCode: %2% %3%") %
194                                                    params.m_ModelPath %
195                                                    errorCode %
196                                                    CHECK_LOCATION().AsString()));
197             }
198             std::ifstream file(params.m_ModelPath, std::ios::binary);
199
200             network = parser->CreateNetworkFromBinary(file);
201         }
202
203         unsigned int subgraphId = boost::numeric_cast<unsigned int>(params.m_SubgraphId);
204
205         for (const std::string& inputLayerName : params.m_InputBindings)
206         {
207             armnnDeserializer::BindingPointInfo inputBinding =
208                 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
209             inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
210         }
211
212         for (const std::string& outputLayerName : params.m_OutputBindings)
213         {
214             armnnDeserializer::BindingPointInfo outputBinding =
215                 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
216             outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
217         }
218
219         return network;
220     }
221 };
222 #endif
223
224 #if defined(ARMNN_TF_LITE_PARSER)
225 template <>
226 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
227 {
228 public:
229     using IParser = armnnTfLiteParser::ITfLiteParser;
230     using Params = InferenceModelInternal::Params;
231
232     static armnn::INetworkPtr Create(const Params& params,
233                                      std::vector<armnn::BindingPointInfo>& inputBindings,
234                                      std::vector<armnn::BindingPointInfo>& outputBindings)
235     {
236         const std::string& modelPath = params.m_ModelPath;
237
238         // Create a network from a file on disk
239         IParser::TfLiteParserOptions options;
240         options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
241         auto parser(IParser::Create(options));
242
243         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
244
245         {
246             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
247             network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
248         }
249
250         for (const std::string& inputLayerName : params.m_InputBindings)
251         {
252             armnn::BindingPointInfo inputBinding =
253                 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
254             inputBindings.push_back(inputBinding);
255         }
256
257         for (const std::string& outputLayerName : params.m_OutputBindings)
258         {
259             armnn::BindingPointInfo outputBinding =
260                 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
261             outputBindings.push_back(outputBinding);
262         }
263
264         return network;
265     }
266 };
267 #endif
268
269 #if defined(ARMNN_ONNX_PARSER)
270 template <>
271 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
272 {
273 public:
274     using IParser = armnnOnnxParser::IOnnxParser;
275     using Params = InferenceModelInternal::Params;
276     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
277
278     static armnn::INetworkPtr Create(const Params& params,
279                                      std::vector<BindingPointInfo>& inputBindings,
280                                      std::vector<BindingPointInfo>& outputBindings)
281     {
282         const std::string& modelPath = params.m_ModelPath;
283
284         // Create a network from a file on disk
285         auto parser(IParser::Create());
286
287         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
288
289         {
290             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
291             network = (params.m_IsModelBinary ?
292                 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
293                 parser->CreateNetworkFromTextFile(modelPath.c_str()));
294         }
295
296         for (const std::string& inputLayerName : params.m_InputBindings)
297         {
298             BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
299             inputBindings.push_back(inputBinding);
300         }
301
302         for (const std::string& outputLayerName : params.m_OutputBindings)
303         {
304             BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
305             outputBindings.push_back(outputBinding);
306         }
307
308         return network;
309     }
310 };
311 #endif
312
313
314
315 template <typename IParser, typename TDataType>
316 class InferenceModel
317 {
318 public:
319     using DataType           = TDataType;
320     using Params             = InferenceModelInternal::Params;
321     using QuantizationParams = InferenceModelInternal::QuantizationParams;
322     using TContainer         = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
323
324     struct CommandLineOptions
325     {
326         std::string m_ModelDir;
327         std::vector<std::string> m_ComputeDevices;
328         std::string m_DynamicBackendsPath;
329         bool m_VisualizePostOptimizationModel;
330         bool m_EnableFp16TurboMode;
331         std::string m_Labels;
332
333         std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
334         {
335             std::vector<armnn::BackendId> backendIds;
336             std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
337             return backendIds;
338         }
339     };
340
341     static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
342     {
343         namespace po = boost::program_options;
344
345         const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
346
347         const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
348                                           + armnn::BackendRegistryInstance().GetBackendIdsAsString();
349
350         desc.add_options()
351             ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
352                 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
353             ("compute,c", po::value<std::vector<std::string>>(&options.m_ComputeDevices)->
354                 default_value(defaultComputes, boost::algorithm::join(defaultComputes, ", "))->
355                 multitoken(), backendsMessage.c_str())
356             ("dynamic-backends-path,b", po::value(&options.m_DynamicBackendsPath),
357                 "Path where to load any available dynamic backend from. "
358                 "If left empty (the default), dynamic backends will not be used.")
359             ("labels,l", po::value<std::string>(&options.m_Labels),
360                 "Text file containing one image filename - correct label pair per line, "
361                 "used to test the accuracy of the network.")
362             ("visualize-optimized-model,v",
363                 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
364              "Produce a dot file useful for visualizing the graph post optimization."
365                 "The file will have the same name as the model with the .dot extention.")
366             ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
367                 "If this option is enabled FP32 layers, weights and biases will be converted "
368                 "to FP16 where the backend supports it.");
369     }
370
371     InferenceModel(const Params& params,
372                    bool enableProfiling,
373                    const std::string& dynamicBackendsPath,
374                    const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
375         : m_EnableProfiling(enableProfiling)
376         , m_DynamicBackendsPath(dynamicBackendsPath)
377     {
378         if (runtime)
379         {
380             m_Runtime = runtime;
381         }
382         else
383         {
384             armnn::IRuntime::CreationOptions options;
385             options.m_EnableGpuProfiling = m_EnableProfiling;
386             options.m_DynamicBackendsPath = m_DynamicBackendsPath;
387             m_Runtime = std::move(armnn::IRuntime::Create(options));
388         }
389
390         std::string invalidBackends;
391         if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
392         {
393             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
394         }
395
396         armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
397
398         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
399         {
400             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
401
402             armnn::OptimizerOptions options;
403             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
404             options.m_Debug = params.m_PrintIntermediateLayers;
405
406             optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
407             if (!optNet)
408             {
409                 throw armnn::Exception("Optimize returned nullptr");
410             }
411         }
412
413         if (params.m_VisualizePostOptimizationModel)
414         {
415             boost::filesystem::path filename = params.m_ModelPath;
416             filename.replace_extension("dot");
417             std::fstream file(filename.c_str(), std::ios_base::out);
418             optNet->SerializeToDot(file);
419         }
420
421         armnn::Status ret;
422         {
423             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
424             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
425         }
426
427         if (ret == armnn::Status::Failure)
428         {
429             throw armnn::Exception("IRuntime::LoadNetwork failed");
430         }
431     }
432
433     void CheckInputIndexIsValid(unsigned int inputIndex) const
434     {
435         if (m_InputBindings.size() < inputIndex + 1)
436         {
437             throw armnn::Exception(boost::str(boost::format("Input index out of range: %1%") % inputIndex));
438         }
439     }
440
441     void CheckOutputIndexIsValid(unsigned int outputIndex) const
442     {
443         if (m_OutputBindings.size() < outputIndex + 1)
444         {
445             throw armnn::Exception(boost::str(boost::format("Output index out of range: %1%") % outputIndex));
446         }
447     }
448
449     unsigned int GetInputSize(unsigned int inputIndex = 0u) const
450     {
451         CheckInputIndexIsValid(inputIndex);
452         return m_InputBindings[inputIndex].second.GetNumElements();
453     }
454
455     unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
456     {
457         CheckOutputIndexIsValid(outputIndex);
458         return m_OutputBindings[outputIndex].second.GetNumElements();
459     }
460
461     std::chrono::duration<double, std::milli> Run(
462             const std::vector<TContainer>& inputContainers,
463             std::vector<TContainer>& outputContainers)
464     {
465         for (unsigned int i = 0; i < outputContainers.size(); ++i)
466         {
467             const unsigned int expectedOutputDataSize = GetOutputSize(i);
468
469             boost::apply_visitor([expectedOutputDataSize, i](auto&& value)
470             {
471                 const unsigned int actualOutputDataSize   = boost::numeric_cast<unsigned int>(value.size());
472                 if (actualOutputDataSize < expectedOutputDataSize)
473                 {
474                     unsigned int outputIndex = boost::numeric_cast<unsigned int>(i);
475                     throw armnn::Exception(
476                             boost::str(boost::format("Not enough data for output #%1%: expected "
477                             "%2% elements, got %3%") % outputIndex % expectedOutputDataSize % actualOutputDataSize));
478                 }
479             },
480             outputContainers[i]);
481         }
482
483         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
484         if (profiler)
485         {
486             profiler->EnableProfiling(m_EnableProfiling);
487         }
488
489         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
490         const auto start_time = GetCurrentTime();
491
492         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
493                                                        MakeInputTensors(inputContainers),
494                                                        MakeOutputTensors(outputContainers));
495
496         const auto end_time = GetCurrentTime();
497
498         // if profiling is enabled print out the results
499         if (profiler && profiler->IsProfilingEnabled())
500         {
501             profiler->Print(std::cout);
502         }
503
504         if (ret == armnn::Status::Failure)
505         {
506             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
507         }
508         else
509         {
510             return std::chrono::duration<double, std::milli>(end_time - start_time);
511         }
512     }
513
514     const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
515     {
516         CheckInputIndexIsValid(inputIndex);
517         return m_InputBindings[inputIndex];
518     }
519
520     const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
521     {
522         return m_InputBindings;
523     }
524
525     const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
526     {
527         CheckOutputIndexIsValid(outputIndex);
528         return m_OutputBindings[outputIndex];
529     }
530
531     const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
532     {
533         return m_OutputBindings;
534     }
535
536     QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
537     {
538         CheckOutputIndexIsValid(outputIndex);
539         return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
540                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
541     }
542
543     QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
544     {
545         CheckInputIndexIsValid(inputIndex);
546         return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
547                               m_InputBindings[inputIndex].second.GetQuantizationOffset());
548     }
549
550     std::vector<QuantizationParams> GetAllQuantizationParams() const
551     {
552         std::vector<QuantizationParams> quantizationParams;
553         for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
554         {
555             quantizationParams.push_back(GetQuantizationParams(i));
556         }
557         return quantizationParams;
558     }
559
560 private:
561     armnn::NetworkId m_NetworkIdentifier;
562     std::shared_ptr<armnn::IRuntime> m_Runtime;
563
564     std::vector<armnn::BindingPointInfo> m_InputBindings;
565     std::vector<armnn::BindingPointInfo> m_OutputBindings;
566     bool m_EnableProfiling;
567     std::string m_DynamicBackendsPath;
568
569     template<typename TContainer>
570     armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
571     {
572         return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
573     }
574
575     template<typename TContainer>
576     armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
577     {
578         return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
579     }
580
581     std::chrono::high_resolution_clock::time_point GetCurrentTime()
582     {
583         return std::chrono::high_resolution_clock::now();
584     }
585
586     std::chrono::duration<double, std::milli> GetTimeDuration(
587             std::chrono::high_resolution_clock::time_point& start_time,
588             std::chrono::high_resolution_clock::time_point& end_time)
589     {
590         return std::chrono::duration<double, std::milli>(end_time - start_time);
591     }
592
593 };