IVGCVSW-4661 Add include Assert to GatordMockService.cpp
[platform/upstream/armnn.git] / tests / InferenceModel.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #pragma once
7
8 #include <armnn/ArmNN.hpp>
9 #include <armnn/BackendRegistry.hpp>
10 #include <armnn/utility/Assert.hpp>
11
12 #if defined(ARMNN_SERIALIZER)
13 #include "armnnDeserializer/IDeserializer.hpp"
14 #endif
15 #if defined(ARMNN_TF_LITE_PARSER)
16 #include <armnnTfLiteParser/ITfLiteParser.hpp>
17 #endif
18 #if defined(ARMNN_ONNX_PARSER)
19 #include <armnnOnnxParser/IOnnxParser.hpp>
20 #endif
21
22 #include <HeapProfiling.hpp>
23 #include <TensorIOUtils.hpp>
24
25 #include <boost/algorithm/string/join.hpp>
26 #include <boost/exception/exception.hpp>
27 #include <boost/exception/diagnostic_information.hpp>
28 #include <boost/format.hpp>
29 #include <boost/program_options.hpp>
30 #include <boost/filesystem.hpp>
31 #include <boost/variant.hpp>
32
33 #include <algorithm>
34 #include <chrono>
35 #include <iterator>
36 #include <fstream>
37 #include <map>
38 #include <string>
39 #include <vector>
40 #include <type_traits>
41
42 namespace
43 {
44
45 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
46                                            armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
47 {
48     if (backendIds.empty())
49     {
50         return false;
51     }
52
53     armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
54
55     bool allValid = true;
56     for (const auto& backendId : backendIds)
57     {
58         if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
59         {
60             allValid = false;
61             if (invalidBackendIds)
62             {
63                 if (!invalidBackendIds.value().empty())
64                 {
65                     invalidBackendIds.value() += ", ";
66                 }
67                 invalidBackendIds.value() += backendId;
68             }
69         }
70     }
71     return allValid;
72 }
73
74 } // anonymous namespace
75
76 namespace InferenceModelInternal
77 {
78 using BindingPointInfo = armnn::BindingPointInfo;
79
80 using QuantizationParams = std::pair<float,int32_t>;
81
82 struct Params
83 {
84     std::string                     m_ModelPath;
85     std::vector<std::string>        m_InputBindings;
86     std::vector<armnn::TensorShape> m_InputShapes;
87     std::vector<std::string>        m_OutputBindings;
88     std::vector<armnn::BackendId>   m_ComputeDevices;
89     std::string                     m_DynamicBackendsPath;
90     size_t                          m_SubgraphId;
91     bool                            m_IsModelBinary;
92     bool                            m_VisualizePostOptimizationModel;
93     bool                            m_EnableFp16TurboMode;
94     bool                            m_EnableBf16TurboMode;
95     bool                            m_PrintIntermediateLayers;
96     bool                            m_ParseUnsupported;
97
98     Params()
99         : m_ComputeDevices{}
100         , m_SubgraphId(0)
101         , m_IsModelBinary(true)
102         , m_VisualizePostOptimizationModel(false)
103         , m_EnableFp16TurboMode(false)
104         , m_EnableBf16TurboMode(false)
105         , m_PrintIntermediateLayers(false)
106         , m_ParseUnsupported(false)
107     {}
108 };
109
110 } // namespace InferenceModelInternal
111
112 template <typename IParser>
113 struct CreateNetworkImpl
114 {
115 public:
116     using Params = InferenceModelInternal::Params;
117
118     static armnn::INetworkPtr Create(const Params& params,
119                                      std::vector<armnn::BindingPointInfo>& inputBindings,
120                                      std::vector<armnn::BindingPointInfo>& outputBindings)
121     {
122         const std::string& modelPath = params.m_ModelPath;
123
124         // Create a network from a file on disk
125         auto parser(IParser::Create());
126
127         std::map<std::string, armnn::TensorShape> inputShapes;
128         if (!params.m_InputShapes.empty())
129         {
130             const size_t numInputShapes   = params.m_InputShapes.size();
131             const size_t numInputBindings = params.m_InputBindings.size();
132             if (numInputShapes < numInputBindings)
133             {
134                 throw armnn::Exception(boost::str(boost::format(
135                     "Not every input has its tensor shape specified: expected=%1%, got=%2%")
136                     % numInputBindings % numInputShapes));
137             }
138
139             for (size_t i = 0; i < numInputShapes; i++)
140             {
141                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
142             }
143         }
144
145         std::vector<std::string> requestedOutputs = params.m_OutputBindings;
146         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
147
148         {
149             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
150             // Handle text and binary input differently by calling the corresponding parser function
151             network = (params.m_IsModelBinary ?
152                 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
153                 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
154         }
155
156         for (const std::string& inputLayerName : params.m_InputBindings)
157         {
158             inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
159         }
160
161         for (const std::string& outputLayerName : params.m_OutputBindings)
162         {
163             outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
164         }
165
166         return network;
167     }
168 };
169
170 #if defined(ARMNN_SERIALIZER)
171 template <>
172 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
173 {
174 public:
175     using IParser          = armnnDeserializer::IDeserializer;
176     using Params           = InferenceModelInternal::Params;
177
178     static armnn::INetworkPtr Create(const Params& params,
179                                      std::vector<armnn::BindingPointInfo>& inputBindings,
180                                      std::vector<armnn::BindingPointInfo>& outputBindings)
181     {
182         auto parser(IParser::Create());
183         ARMNN_ASSERT(parser);
184
185         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
186
187         {
188             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
189
190             boost::system::error_code errorCode;
191             boost::filesystem::path pathToFile(params.m_ModelPath);
192             if (!boost::filesystem::exists(pathToFile, errorCode))
193             {
194                 throw armnn::FileNotFoundException(boost::str(
195                                                    boost::format("Cannot find the file (%1%) errorCode: %2% %3%") %
196                                                    params.m_ModelPath %
197                                                    errorCode %
198                                                    CHECK_LOCATION().AsString()));
199             }
200             std::ifstream file(params.m_ModelPath, std::ios::binary);
201
202             network = parser->CreateNetworkFromBinary(file);
203         }
204
205         unsigned int subgraphId = boost::numeric_cast<unsigned int>(params.m_SubgraphId);
206
207         for (const std::string& inputLayerName : params.m_InputBindings)
208         {
209             armnnDeserializer::BindingPointInfo inputBinding =
210                 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
211             inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
212         }
213
214         for (const std::string& outputLayerName : params.m_OutputBindings)
215         {
216             armnnDeserializer::BindingPointInfo outputBinding =
217                 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
218             outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
219         }
220
221         return network;
222     }
223 };
224 #endif
225
226 #if defined(ARMNN_TF_LITE_PARSER)
227 template <>
228 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
229 {
230 public:
231     using IParser = armnnTfLiteParser::ITfLiteParser;
232     using Params = InferenceModelInternal::Params;
233
234     static armnn::INetworkPtr Create(const Params& params,
235                                      std::vector<armnn::BindingPointInfo>& inputBindings,
236                                      std::vector<armnn::BindingPointInfo>& outputBindings)
237     {
238         const std::string& modelPath = params.m_ModelPath;
239
240         // Create a network from a file on disk
241         IParser::TfLiteParserOptions options;
242         options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
243         auto parser(IParser::Create(options));
244
245         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
246
247         {
248             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
249             network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
250         }
251
252         for (const std::string& inputLayerName : params.m_InputBindings)
253         {
254             armnn::BindingPointInfo inputBinding =
255                 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
256             inputBindings.push_back(inputBinding);
257         }
258
259         for (const std::string& outputLayerName : params.m_OutputBindings)
260         {
261             armnn::BindingPointInfo outputBinding =
262                 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
263             outputBindings.push_back(outputBinding);
264         }
265
266         return network;
267     }
268 };
269 #endif
270
271 #if defined(ARMNN_ONNX_PARSER)
272 template <>
273 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
274 {
275 public:
276     using IParser = armnnOnnxParser::IOnnxParser;
277     using Params = InferenceModelInternal::Params;
278     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
279
280     static armnn::INetworkPtr Create(const Params& params,
281                                      std::vector<BindingPointInfo>& inputBindings,
282                                      std::vector<BindingPointInfo>& outputBindings)
283     {
284         const std::string& modelPath = params.m_ModelPath;
285
286         // Create a network from a file on disk
287         auto parser(IParser::Create());
288
289         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
290
291         {
292             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
293             network = (params.m_IsModelBinary ?
294                 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
295                 parser->CreateNetworkFromTextFile(modelPath.c_str()));
296         }
297
298         for (const std::string& inputLayerName : params.m_InputBindings)
299         {
300             BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
301             inputBindings.push_back(inputBinding);
302         }
303
304         for (const std::string& outputLayerName : params.m_OutputBindings)
305         {
306             BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
307             outputBindings.push_back(outputBinding);
308         }
309
310         return network;
311     }
312 };
313 #endif
314
315
316
317 template <typename IParser, typename TDataType>
318 class InferenceModel
319 {
320 public:
321     using DataType           = TDataType;
322     using Params             = InferenceModelInternal::Params;
323     using QuantizationParams = InferenceModelInternal::QuantizationParams;
324     using TContainer         = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
325
326     struct CommandLineOptions
327     {
328         std::string m_ModelDir;
329         std::vector<std::string> m_ComputeDevices;
330         std::string m_DynamicBackendsPath;
331         bool m_VisualizePostOptimizationModel;
332         bool m_EnableFp16TurboMode;
333         bool m_EnableBf16TurboMode;
334         std::string m_Labels;
335
336         std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
337         {
338             std::vector<armnn::BackendId> backendIds;
339             std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
340             return backendIds;
341         }
342     };
343
344     static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
345     {
346         namespace po = boost::program_options;
347
348         const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
349
350         const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
351                                           + armnn::BackendRegistryInstance().GetBackendIdsAsString();
352
353         desc.add_options()
354             ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
355                 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
356             ("compute,c", po::value<std::vector<std::string>>(&options.m_ComputeDevices)->
357                 default_value(defaultComputes, boost::algorithm::join(defaultComputes, ", "))->
358                 multitoken(), backendsMessage.c_str())
359             ("dynamic-backends-path,b", po::value(&options.m_DynamicBackendsPath),
360                 "Path where to load any available dynamic backend from. "
361                 "If left empty (the default), dynamic backends will not be used.")
362             ("labels,l", po::value<std::string>(&options.m_Labels),
363                 "Text file containing one image filename - correct label pair per line, "
364                 "used to test the accuracy of the network.")
365             ("visualize-optimized-model,v",
366                 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
367              "Produce a dot file useful for visualizing the graph post optimization."
368                 "The file will have the same name as the model with the .dot extention.")
369             ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
370                 "If this option is enabled FP32 layers, weights and biases will be converted "
371                 "to FP16 where the backend supports it.")
372             ("bf16-turbo-mode", po::value<bool>(&options.m_EnableBf16TurboMode)->default_value(false),
373                 "If this option is enabled FP32 layers, weights and biases will be converted "
374                 "to BF16 where the backend supports it.");
375     }
376
377     InferenceModel(const Params& params,
378                    bool enableProfiling,
379                    const std::string& dynamicBackendsPath,
380                    const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
381         : m_EnableProfiling(enableProfiling)
382         , m_DynamicBackendsPath(dynamicBackendsPath)
383     {
384         if (runtime)
385         {
386             m_Runtime = runtime;
387         }
388         else
389         {
390             armnn::IRuntime::CreationOptions options;
391             options.m_EnableGpuProfiling = m_EnableProfiling;
392             options.m_DynamicBackendsPath = m_DynamicBackendsPath;
393             m_Runtime = std::move(armnn::IRuntime::Create(options));
394         }
395
396         std::string invalidBackends;
397         if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
398         {
399             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
400         }
401
402         armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
403
404         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
405         {
406             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
407
408             armnn::OptimizerOptions options;
409             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
410             options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
411             options.m_Debug = params.m_PrintIntermediateLayers;
412
413             optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
414             if (!optNet)
415             {
416                 throw armnn::Exception("Optimize returned nullptr");
417             }
418         }
419
420         if (params.m_VisualizePostOptimizationModel)
421         {
422             boost::filesystem::path filename = params.m_ModelPath;
423             filename.replace_extension("dot");
424             std::fstream file(filename.c_str(), std::ios_base::out);
425             optNet->SerializeToDot(file);
426         }
427
428         armnn::Status ret;
429         {
430             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
431             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
432         }
433
434         if (ret == armnn::Status::Failure)
435         {
436             throw armnn::Exception("IRuntime::LoadNetwork failed");
437         }
438     }
439
440     void CheckInputIndexIsValid(unsigned int inputIndex) const
441     {
442         if (m_InputBindings.size() < inputIndex + 1)
443         {
444             throw armnn::Exception(boost::str(boost::format("Input index out of range: %1%") % inputIndex));
445         }
446     }
447
448     void CheckOutputIndexIsValid(unsigned int outputIndex) const
449     {
450         if (m_OutputBindings.size() < outputIndex + 1)
451         {
452             throw armnn::Exception(boost::str(boost::format("Output index out of range: %1%") % outputIndex));
453         }
454     }
455
456     unsigned int GetInputSize(unsigned int inputIndex = 0u) const
457     {
458         CheckInputIndexIsValid(inputIndex);
459         return m_InputBindings[inputIndex].second.GetNumElements();
460     }
461
462     unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
463     {
464         CheckOutputIndexIsValid(outputIndex);
465         return m_OutputBindings[outputIndex].second.GetNumElements();
466     }
467
468     std::chrono::duration<double, std::milli> Run(
469             const std::vector<TContainer>& inputContainers,
470             std::vector<TContainer>& outputContainers)
471     {
472         for (unsigned int i = 0; i < outputContainers.size(); ++i)
473         {
474             const unsigned int expectedOutputDataSize = GetOutputSize(i);
475
476             boost::apply_visitor([expectedOutputDataSize, i](auto&& value)
477             {
478                 const unsigned int actualOutputDataSize   = boost::numeric_cast<unsigned int>(value.size());
479                 if (actualOutputDataSize < expectedOutputDataSize)
480                 {
481                     unsigned int outputIndex = boost::numeric_cast<unsigned int>(i);
482                     throw armnn::Exception(
483                             boost::str(boost::format("Not enough data for output #%1%: expected "
484                             "%2% elements, got %3%") % outputIndex % expectedOutputDataSize % actualOutputDataSize));
485                 }
486             },
487             outputContainers[i]);
488         }
489
490         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
491         if (profiler)
492         {
493             profiler->EnableProfiling(m_EnableProfiling);
494         }
495
496         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
497         const auto start_time = GetCurrentTime();
498
499         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
500                                                        MakeInputTensors(inputContainers),
501                                                        MakeOutputTensors(outputContainers));
502
503         const auto end_time = GetCurrentTime();
504
505         // if profiling is enabled print out the results
506         if (profiler && profiler->IsProfilingEnabled())
507         {
508             profiler->Print(std::cout);
509         }
510
511         if (ret == armnn::Status::Failure)
512         {
513             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
514         }
515         else
516         {
517             return std::chrono::duration<double, std::milli>(end_time - start_time);
518         }
519     }
520
521     const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
522     {
523         CheckInputIndexIsValid(inputIndex);
524         return m_InputBindings[inputIndex];
525     }
526
527     const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
528     {
529         return m_InputBindings;
530     }
531
532     const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
533     {
534         CheckOutputIndexIsValid(outputIndex);
535         return m_OutputBindings[outputIndex];
536     }
537
538     const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
539     {
540         return m_OutputBindings;
541     }
542
543     QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
544     {
545         CheckOutputIndexIsValid(outputIndex);
546         return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
547                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
548     }
549
550     QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
551     {
552         CheckInputIndexIsValid(inputIndex);
553         return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
554                               m_InputBindings[inputIndex].second.GetQuantizationOffset());
555     }
556
557     std::vector<QuantizationParams> GetAllQuantizationParams() const
558     {
559         std::vector<QuantizationParams> quantizationParams;
560         for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
561         {
562             quantizationParams.push_back(GetQuantizationParams(i));
563         }
564         return quantizationParams;
565     }
566
567 private:
568     armnn::NetworkId m_NetworkIdentifier;
569     std::shared_ptr<armnn::IRuntime> m_Runtime;
570
571     std::vector<armnn::BindingPointInfo> m_InputBindings;
572     std::vector<armnn::BindingPointInfo> m_OutputBindings;
573     bool m_EnableProfiling;
574     std::string m_DynamicBackendsPath;
575
576     template<typename TContainer>
577     armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
578     {
579         return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
580     }
581
582     template<typename TContainer>
583     armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
584     {
585         return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
586     }
587
588     std::chrono::high_resolution_clock::time_point GetCurrentTime()
589     {
590         return std::chrono::high_resolution_clock::now();
591     }
592
593     std::chrono::duration<double, std::milli> GetTimeDuration(
594             std::chrono::high_resolution_clock::time_point& start_time,
595             std::chrono::high_resolution_clock::time_point& end_time)
596     {
597         return std::chrono::duration<double, std::milli>(end_time - start_time);
598     }
599
600 };