NNXSW-1853 Change SubgraphViewSelector algorithm
[platform/upstream/armnn.git] / tests / InferenceModel.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #pragma once
7
8 #include <armnn/ArmNN.hpp>
9 #include <armnn/BackendRegistry.hpp>
10
11 #if defined(ARMNN_SERIALIZER)
12 #include "armnnDeserializer/IDeserializer.hpp"
13 #endif
14 #if defined(ARMNN_TF_LITE_PARSER)
15 #include <armnnTfLiteParser/ITfLiteParser.hpp>
16 #endif
17 #if defined(ARMNN_ONNX_PARSER)
18 #include <armnnOnnxParser/IOnnxParser.hpp>
19 #endif
20
21 #include <HeapProfiling.hpp>
22 #include <TensorIOUtils.hpp>
23
24 #include <boost/algorithm/string/join.hpp>
25 #include <boost/exception/exception.hpp>
26 #include <boost/exception/diagnostic_information.hpp>
27 #include <boost/log/trivial.hpp>
28 #include <boost/format.hpp>
29 #include <boost/program_options.hpp>
30 #include <boost/filesystem.hpp>
31 #include <boost/lexical_cast.hpp>
32 #include <boost/variant.hpp>
33
34 #include <algorithm>
35 #include <chrono>
36 #include <iterator>
37 #include <fstream>
38 #include <map>
39 #include <string>
40 #include <vector>
41 #include <type_traits>
42
43 namespace
44 {
45
46 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
47                                            armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
48 {
49     if (backendIds.empty())
50     {
51         return false;
52     }
53
54     armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
55
56     bool allValid = true;
57     for (const auto& backendId : backendIds)
58     {
59         if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
60         {
61             allValid = false;
62             if (invalidBackendIds)
63             {
64                 if (!invalidBackendIds.value().empty())
65                 {
66                     invalidBackendIds.value() += ", ";
67                 }
68                 invalidBackendIds.value() += backendId;
69             }
70         }
71     }
72     return allValid;
73 }
74
75 } // anonymous namespace
76
77 namespace InferenceModelInternal
78 {
79 using BindingPointInfo = armnn::BindingPointInfo;
80
81 using QuantizationParams = std::pair<float,int32_t>;
82
83 struct Params
84 {
85     std::string                     m_ModelPath;
86     std::vector<std::string>        m_InputBindings;
87     std::vector<armnn::TensorShape> m_InputShapes;
88     std::vector<std::string>        m_OutputBindings;
89     std::vector<armnn::BackendId>   m_ComputeDevices;
90     std::string                     m_DynamicBackendsPath;
91     size_t                          m_SubgraphId;
92     bool                            m_IsModelBinary;
93     bool                            m_VisualizePostOptimizationModel;
94     bool                            m_EnableFp16TurboMode;
95     bool                            m_PrintIntermediateLayers;
96
97     Params()
98         : m_ComputeDevices{}
99         , m_SubgraphId(0)
100         , m_IsModelBinary(true)
101         , m_VisualizePostOptimizationModel(false)
102         , m_EnableFp16TurboMode(false)
103         , m_PrintIntermediateLayers(false)
104     {}
105 };
106
107 } // namespace InferenceModelInternal
108
109 template <typename IParser>
110 struct CreateNetworkImpl
111 {
112 public:
113     using Params = InferenceModelInternal::Params;
114
115     static armnn::INetworkPtr Create(const Params& params,
116                                      std::vector<armnn::BindingPointInfo>& inputBindings,
117                                      std::vector<armnn::BindingPointInfo>& outputBindings)
118     {
119         const std::string& modelPath = params.m_ModelPath;
120
121         // Create a network from a file on disk
122         auto parser(IParser::Create());
123
124         std::map<std::string, armnn::TensorShape> inputShapes;
125         if (!params.m_InputShapes.empty())
126         {
127             const size_t numInputShapes   = params.m_InputShapes.size();
128             const size_t numInputBindings = params.m_InputBindings.size();
129             if (numInputShapes < numInputBindings)
130             {
131                 throw armnn::Exception(boost::str(boost::format(
132                     "Not every input has its tensor shape specified: expected=%1%, got=%2%")
133                     % numInputBindings % numInputShapes));
134             }
135
136             for (size_t i = 0; i < numInputShapes; i++)
137             {
138                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
139             }
140         }
141
142         std::vector<std::string> requestedOutputs = params.m_OutputBindings;
143         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
144
145         {
146             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
147             // Handle text and binary input differently by calling the corresponding parser function
148             network = (params.m_IsModelBinary ?
149                 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
150                 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
151         }
152
153         for (const std::string& inputLayerName : params.m_InputBindings)
154         {
155             inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
156         }
157
158         for (const std::string& outputLayerName : params.m_OutputBindings)
159         {
160             outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
161         }
162
163         return network;
164     }
165 };
166
167 #if defined(ARMNN_SERIALIZER)
168 template <>
169 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
170 {
171 public:
172     using IParser          = armnnDeserializer::IDeserializer;
173     using Params           = InferenceModelInternal::Params;
174
175     static armnn::INetworkPtr Create(const Params& params,
176                                      std::vector<armnn::BindingPointInfo>& inputBindings,
177                                      std::vector<armnn::BindingPointInfo>& outputBindings)
178     {
179         auto parser(IParser::Create());
180         BOOST_ASSERT(parser);
181
182         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
183
184         {
185             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
186
187             boost::system::error_code errorCode;
188             boost::filesystem::path pathToFile(params.m_ModelPath);
189             if (!boost::filesystem::exists(pathToFile, errorCode))
190             {
191                 throw armnn::FileNotFoundException(boost::str(
192                                                    boost::format("Cannot find the file (%1%) errorCode: %2% %3%") %
193                                                    params.m_ModelPath %
194                                                    errorCode %
195                                                    CHECK_LOCATION().AsString()));
196             }
197             std::ifstream file(params.m_ModelPath, std::ios::binary);
198
199             network = parser->CreateNetworkFromBinary(file);
200         }
201
202         unsigned int subgraphId = boost::numeric_cast<unsigned int>(params.m_SubgraphId);
203
204         for (const std::string& inputLayerName : params.m_InputBindings)
205         {
206             armnnDeserializer::BindingPointInfo inputBinding =
207                 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
208             inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
209         }
210
211         for (const std::string& outputLayerName : params.m_OutputBindings)
212         {
213             armnnDeserializer::BindingPointInfo outputBinding =
214                 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
215             outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
216         }
217
218         return network;
219     }
220 };
221 #endif
222
223 #if defined(ARMNN_TF_LITE_PARSER)
224 template <>
225 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
226 {
227 public:
228     using IParser = armnnTfLiteParser::ITfLiteParser;
229     using Params = InferenceModelInternal::Params;
230
231     static armnn::INetworkPtr Create(const Params& params,
232                                      std::vector<armnn::BindingPointInfo>& inputBindings,
233                                      std::vector<armnn::BindingPointInfo>& outputBindings)
234     {
235         const std::string& modelPath = params.m_ModelPath;
236
237         // Create a network from a file on disk
238         auto parser(IParser::Create());
239
240         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
241
242         {
243             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
244             network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
245         }
246
247         for (const std::string& inputLayerName : params.m_InputBindings)
248         {
249             armnn::BindingPointInfo inputBinding =
250                 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
251             inputBindings.push_back(inputBinding);
252         }
253
254         for (const std::string& outputLayerName : params.m_OutputBindings)
255         {
256             armnn::BindingPointInfo outputBinding =
257                 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
258             outputBindings.push_back(outputBinding);
259         }
260
261         return network;
262     }
263 };
264 #endif
265
266 #if defined(ARMNN_ONNX_PARSER)
267 template <>
268 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
269 {
270 public:
271     using IParser = armnnOnnxParser::IOnnxParser;
272     using Params = InferenceModelInternal::Params;
273     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
274
275     static armnn::INetworkPtr Create(const Params& params,
276                                      std::vector<BindingPointInfo>& inputBindings,
277                                      std::vector<BindingPointInfo>& outputBindings)
278     {
279         const std::string& modelPath = params.m_ModelPath;
280
281         // Create a network from a file on disk
282         auto parser(IParser::Create());
283
284         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
285
286         {
287             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
288             network = (params.m_IsModelBinary ?
289                 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
290                 parser->CreateNetworkFromTextFile(modelPath.c_str()));
291         }
292
293         for (const std::string& inputLayerName : params.m_InputBindings)
294         {
295             BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
296             inputBindings.push_back(inputBinding);
297         }
298
299         for (const std::string& outputLayerName : params.m_OutputBindings)
300         {
301             BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
302             outputBindings.push_back(outputBinding);
303         }
304
305         return network;
306     }
307 };
308 #endif
309
310
311
312 template <typename IParser, typename TDataType>
313 class InferenceModel
314 {
315 public:
316     using DataType           = TDataType;
317     using Params             = InferenceModelInternal::Params;
318     using QuantizationParams = InferenceModelInternal::QuantizationParams;
319     using TContainer         = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
320
321     struct CommandLineOptions
322     {
323         std::string m_ModelDir;
324         std::vector<std::string> m_ComputeDevices;
325         std::string m_DynamicBackendsPath;
326         bool m_VisualizePostOptimizationModel;
327         bool m_EnableFp16TurboMode;
328         std::string m_Labels;
329
330         std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
331         {
332             std::vector<armnn::BackendId> backendIds;
333             std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
334             return backendIds;
335         }
336     };
337
338     static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
339     {
340         namespace po = boost::program_options;
341
342         const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
343
344         const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
345                                           + armnn::BackendRegistryInstance().GetBackendIdsAsString();
346
347         desc.add_options()
348             ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
349                 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
350             ("compute,c", po::value<std::vector<std::string>>(&options.m_ComputeDevices)->
351                 default_value(defaultComputes, boost::algorithm::join(defaultComputes, ", "))->
352                 multitoken(), backendsMessage.c_str())
353             ("dynamic-backends-path,b", po::value(&options.m_DynamicBackendsPath),
354                 "Path where to load any available dynamic backend from. "
355                 "If left empty (the default), dynamic backends will not be used.")
356             ("labels,l", po::value<std::string>(&options.m_Labels),
357                 "Text file containing one image filename - correct label pair per line, "
358                 "used to test the accuracy of the network.")
359             ("visualize-optimized-model,v",
360                 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
361              "Produce a dot file useful for visualizing the graph post optimization."
362                 "The file will have the same name as the model with the .dot extention.")
363             ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
364                 "If this option is enabled FP32 layers, weights and biases will be converted "
365                 "to FP16 where the backend supports it.");
366     }
367
368     InferenceModel(const Params& params,
369                    bool enableProfiling,
370                    const std::string& dynamicBackendsPath,
371                    const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
372         : m_EnableProfiling(enableProfiling)
373         , m_DynamicBackendsPath(dynamicBackendsPath)
374     {
375         if (runtime)
376         {
377             m_Runtime = runtime;
378         }
379         else
380         {
381             armnn::IRuntime::CreationOptions options;
382             options.m_EnableGpuProfiling = m_EnableProfiling;
383             options.m_DynamicBackendsPath = m_DynamicBackendsPath;
384             m_Runtime = std::move(armnn::IRuntime::Create(options));
385         }
386
387         std::string invalidBackends;
388         if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
389         {
390             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
391         }
392
393         armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
394
395         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
396         {
397             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
398
399             armnn::OptimizerOptions options;
400             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
401             options.m_Debug = params.m_PrintIntermediateLayers;
402
403             optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
404             if (!optNet)
405             {
406                 throw armnn::Exception("Optimize returned nullptr");
407             }
408         }
409
410         if (params.m_VisualizePostOptimizationModel)
411         {
412             boost::filesystem::path filename = params.m_ModelPath;
413             filename.replace_extension("dot");
414             std::fstream file(filename.c_str(), std::ios_base::out);
415             optNet->SerializeToDot(file);
416         }
417
418         armnn::Status ret;
419         {
420             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
421             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
422         }
423
424         if (ret == armnn::Status::Failure)
425         {
426             throw armnn::Exception("IRuntime::LoadNetwork failed");
427         }
428     }
429
430     void CheckInputIndexIsValid(unsigned int inputIndex) const
431     {
432         if (m_InputBindings.size() < inputIndex + 1)
433         {
434             throw armnn::Exception(boost::str(boost::format("Input index out of range: %1%") % inputIndex));
435         }
436     }
437
438     void CheckOutputIndexIsValid(unsigned int outputIndex) const
439     {
440         if (m_OutputBindings.size() < outputIndex + 1)
441         {
442             throw armnn::Exception(boost::str(boost::format("Output index out of range: %1%") % outputIndex));
443         }
444     }
445
446     unsigned int GetInputSize(unsigned int inputIndex = 0u) const
447     {
448         CheckInputIndexIsValid(inputIndex);
449         return m_InputBindings[inputIndex].second.GetNumElements();
450     }
451
452     unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
453     {
454         CheckOutputIndexIsValid(outputIndex);
455         return m_OutputBindings[outputIndex].second.GetNumElements();
456     }
457
458     std::chrono::duration<double, std::milli> Run(
459             const std::vector<TContainer>& inputContainers,
460             std::vector<TContainer>& outputContainers)
461     {
462         for (unsigned int i = 0; i < outputContainers.size(); ++i)
463         {
464             const unsigned int expectedOutputDataSize = GetOutputSize(i);
465
466             boost::apply_visitor([expectedOutputDataSize, i](auto&& value)
467             {
468                 const unsigned int actualOutputDataSize   = boost::numeric_cast<unsigned int>(value.size());
469                 if (actualOutputDataSize < expectedOutputDataSize)
470                 {
471                     unsigned int outputIndex = boost::numeric_cast<unsigned int>(i);
472                     throw armnn::Exception(
473                             boost::str(boost::format("Not enough data for output #%1%: expected "
474                             "%2% elements, got %3%") % outputIndex % expectedOutputDataSize % actualOutputDataSize));
475                 }
476             },
477             outputContainers[i]);
478         }
479
480         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
481         if (profiler)
482         {
483             profiler->EnableProfiling(m_EnableProfiling);
484         }
485
486         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
487         const auto start_time = GetCurrentTime();
488
489         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
490                                                        MakeInputTensors(inputContainers),
491                                                        MakeOutputTensors(outputContainers));
492
493         const auto end_time = GetCurrentTime();
494
495         // if profiling is enabled print out the results
496         if (profiler && profiler->IsProfilingEnabled())
497         {
498             profiler->Print(std::cout);
499         }
500
501         if (ret == armnn::Status::Failure)
502         {
503             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
504         }
505         else
506         {
507             return std::chrono::duration<double, std::milli>(end_time - start_time);
508         }
509     }
510
511     const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
512     {
513         CheckInputIndexIsValid(inputIndex);
514         return m_InputBindings[inputIndex];
515     }
516
517     const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
518     {
519         return m_InputBindings;
520     }
521
522     const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
523     {
524         CheckOutputIndexIsValid(outputIndex);
525         return m_OutputBindings[outputIndex];
526     }
527
528     const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
529     {
530         return m_OutputBindings;
531     }
532
533     QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
534     {
535         CheckOutputIndexIsValid(outputIndex);
536         return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
537                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
538     }
539
540     QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
541     {
542         CheckInputIndexIsValid(inputIndex);
543         return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
544                               m_InputBindings[inputIndex].second.GetQuantizationOffset());
545     }
546
547     std::vector<QuantizationParams> GetAllQuantizationParams() const
548     {
549         std::vector<QuantizationParams> quantizationParams;
550         for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
551         {
552             quantizationParams.push_back(GetQuantizationParams(i));
553         }
554         return quantizationParams;
555     }
556
557 private:
558     armnn::NetworkId m_NetworkIdentifier;
559     std::shared_ptr<armnn::IRuntime> m_Runtime;
560
561     std::vector<armnn::BindingPointInfo> m_InputBindings;
562     std::vector<armnn::BindingPointInfo> m_OutputBindings;
563     bool m_EnableProfiling;
564     std::string m_DynamicBackendsPath;
565
566     template<typename TContainer>
567     armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
568     {
569         return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
570     }
571
572     template<typename TContainer>
573     armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
574     {
575         return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
576     }
577
578     std::chrono::high_resolution_clock::time_point GetCurrentTime()
579     {
580         return std::chrono::high_resolution_clock::now();
581     }
582
583     std::chrono::duration<double, std::milli> GetTimeDuration(
584             std::chrono::high_resolution_clock::time_point& start_time,
585             std::chrono::high_resolution_clock::time_point& end_time)
586     {
587         return std::chrono::duration<double, std::milli>(end_time - start_time);
588     }
589
590 };