IVGCVSW-3039 Unify BindingPointInfo declarations
[platform/upstream/armnn.git] / tests / InferenceModel.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 #include <armnn/ArmNN.hpp>
7
8 #if defined(ARMNN_SERIALIZER)
9 #include "armnnDeserializer/IDeserializer.hpp"
10 #endif
11 #if defined(ARMNN_TF_LITE_PARSER)
12 #include <armnnTfLiteParser/ITfLiteParser.hpp>
13 #endif
14 #if defined(ARMNN_ONNX_PARSER)
15 #include <armnnOnnxParser/IOnnxParser.hpp>
16 #endif
17
18 #include <HeapProfiling.hpp>
19
20 #include <backendsCommon/BackendRegistry.hpp>
21
22 #include <boost/algorithm/string/join.hpp>
23 #include <boost/exception/exception.hpp>
24 #include <boost/exception/diagnostic_information.hpp>
25 #include <boost/log/trivial.hpp>
26 #include <boost/format.hpp>
27 #include <boost/program_options.hpp>
28 #include <boost/filesystem.hpp>
29 #include <boost/lexical_cast.hpp>
30 #include <boost/variant.hpp>
31
32 #include <algorithm>
33 #include <chrono>
34 #include <iterator>
35 #include <fstream>
36 #include <map>
37 #include <string>
38 #include <vector>
39 #include <type_traits>
40
41 namespace
42 {
43
44 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
45                                            armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
46 {
47     if (backendIds.empty())
48     {
49         return false;
50     }
51
52     armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
53
54     bool allValid = true;
55     for (const auto& backendId : backendIds)
56     {
57         if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
58         {
59             allValid = false;
60             if (invalidBackendIds)
61             {
62                 if (!invalidBackendIds.value().empty())
63                 {
64                     invalidBackendIds.value() += ", ";
65                 }
66                 invalidBackendIds.value() += backendId;
67             }
68         }
69     }
70     return allValid;
71 }
72
73 } // anonymous namespace
74
75 namespace InferenceModelInternal
76 {
77 using BindingPointInfo = armnn::BindingPointInfo;
78
79 using QuantizationParams = std::pair<float,int32_t>;
80
81 struct Params
82 {
83     std::string                     m_ModelPath;
84     std::vector<std::string>        m_InputBindings;
85     std::vector<armnn::TensorShape> m_InputShapes;
86     std::vector<std::string>        m_OutputBindings;
87     std::vector<armnn::BackendId>   m_ComputeDevices;
88     size_t                          m_SubgraphId;
89     bool                            m_IsModelBinary;
90     bool                            m_VisualizePostOptimizationModel;
91     bool                            m_EnableFp16TurboMode;
92
93     Params()
94         : m_ComputeDevices{"CpuRef"}
95         , m_SubgraphId(0)
96         , m_IsModelBinary(true)
97         , m_VisualizePostOptimizationModel(false)
98         , m_EnableFp16TurboMode(false)
99     {}
100 };
101
102 } // namespace InferenceModelInternal
103
104 template <typename IParser>
105 struct CreateNetworkImpl
106 {
107 public:
108     using Params = InferenceModelInternal::Params;
109
110     static armnn::INetworkPtr Create(const Params& params,
111                                      std::vector<armnn::BindingPointInfo>& inputBindings,
112                                      std::vector<armnn::BindingPointInfo>& outputBindings)
113     {
114         const std::string& modelPath = params.m_ModelPath;
115
116         // Create a network from a file on disk
117         auto parser(IParser::Create());
118
119         std::map<std::string, armnn::TensorShape> inputShapes;
120         if (!params.m_InputShapes.empty())
121         {
122             const size_t numInputShapes   = params.m_InputShapes.size();
123             const size_t numInputBindings = params.m_InputBindings.size();
124             if (numInputShapes < numInputBindings)
125             {
126                 throw armnn::Exception(boost::str(boost::format(
127                     "Not every input has its tensor shape specified: expected=%1%, got=%2%")
128                     % numInputBindings % numInputShapes));
129             }
130
131             for (size_t i = 0; i < numInputShapes; i++)
132             {
133                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
134             }
135         }
136
137         std::vector<std::string> requestedOutputs = params.m_OutputBindings;
138         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
139
140         {
141             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
142             // Handle text and binary input differently by calling the corresponding parser function
143             network = (params.m_IsModelBinary ?
144                 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
145                 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
146         }
147
148         for (const std::string& inputLayerName : params.m_InputBindings)
149         {
150             inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
151         }
152
153         for (const std::string& outputLayerName : params.m_OutputBindings)
154         {
155             outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
156         }
157
158         return network;
159     }
160 };
161
162 #if defined(ARMNN_SERIALIZER)
163 template <>
164 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
165 {
166 public:
167     using IParser          = armnnDeserializer::IDeserializer;
168     using Params           = InferenceModelInternal::Params;
169
170     static armnn::INetworkPtr Create(const Params& params,
171                                      std::vector<armnn::BindingPointInfo>& inputBindings,
172                                      std::vector<armnn::BindingPointInfo>& outputBindings)
173     {
174         auto parser(IParser::Create());
175         BOOST_ASSERT(parser);
176
177         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
178
179         {
180             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
181
182             boost::system::error_code errorCode;
183             boost::filesystem::path pathToFile(params.m_ModelPath);
184             if (!boost::filesystem::exists(pathToFile, errorCode))
185             {
186                 throw armnn::FileNotFoundException(boost::str(
187                                                    boost::format("Cannot find the file (%1%) errorCode: %2% %3%") %
188                                                    params.m_ModelPath %
189                                                    errorCode %
190                                                    CHECK_LOCATION().AsString()));
191             }
192             std::ifstream file(params.m_ModelPath, std::ios::binary);
193
194             network = parser->CreateNetworkFromBinary(file);
195         }
196
197         unsigned int subgraphId = boost::numeric_cast<unsigned int>(params.m_SubgraphId);
198
199         for (const std::string& inputLayerName : params.m_InputBindings)
200         {
201             armnnDeserializer::BindingPointInfo inputBinding =
202                 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
203             inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
204         }
205
206         for (const std::string& outputLayerName : params.m_OutputBindings)
207         {
208             armnnDeserializer::BindingPointInfo outputBinding =
209                 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
210             outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
211         }
212
213         return network;
214     }
215 };
216 #endif
217
218 #if defined(ARMNN_TF_LITE_PARSER)
219 template <>
220 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
221 {
222 public:
223     using IParser = armnnTfLiteParser::ITfLiteParser;
224     using Params = InferenceModelInternal::Params;
225
226     static armnn::INetworkPtr Create(const Params& params,
227                                      std::vector<armnn::BindingPointInfo>& inputBindings,
228                                      std::vector<armnn::BindingPointInfo>& outputBindings)
229     {
230         const std::string& modelPath = params.m_ModelPath;
231
232         // Create a network from a file on disk
233         auto parser(IParser::Create());
234
235         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
236
237         {
238             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
239             network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
240         }
241
242         for (const std::string& inputLayerName : params.m_InputBindings)
243         {
244             armnn::BindingPointInfo inputBinding =
245                 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
246             inputBindings.push_back(inputBinding);
247         }
248
249         for (const std::string& outputLayerName : params.m_OutputBindings)
250         {
251             armnn::BindingPointInfo outputBinding =
252                 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
253             outputBindings.push_back(outputBinding);
254         }
255
256         return network;
257     }
258 };
259 #endif
260
261 #if defined(ARMNN_ONNX_PARSER)
262 template <>
263 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
264 {
265 public:
266     using IParser = armnnOnnxParser::IOnnxParser;
267     using Params = InferenceModelInternal::Params;
268     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
269
270     static armnn::INetworkPtr Create(const Params& params,
271                                      std::vector<BindingPointInfo>& inputBindings,
272                                      std::vector<BindingPointInfo>& outputBindings)
273     {
274         const std::string& modelPath = params.m_ModelPath;
275
276         // Create a network from a file on disk
277         auto parser(IParser::Create());
278
279         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
280
281         {
282             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
283             network = (params.m_IsModelBinary ?
284                 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
285                 parser->CreateNetworkFromTextFile(modelPath.c_str()));
286         }
287
288         for (const std::string& inputLayerName : params.m_InputBindings)
289         {
290             BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
291             inputBindings.push_back(inputBinding);
292         }
293
294         for (const std::string& outputLayerName : params.m_OutputBindings)
295         {
296             BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
297             outputBindings.push_back(outputBinding);
298         }
299
300         return network;
301     }
302 };
303 #endif
304
305 template<typename TContainer>
306 inline armnn::InputTensors MakeInputTensors(
307     const std::vector<armnn::BindingPointInfo>& inputBindings,
308     const std::vector<TContainer>& inputDataContainers)
309 {
310     armnn::InputTensors inputTensors;
311
312     const size_t numInputs = inputBindings.size();
313     if (numInputs != inputDataContainers.size())
314     {
315         throw armnn::Exception(boost::str(boost::format("Number of inputs does not match number of "
316             "tensor data containers: %1% != %2%") % numInputs % inputDataContainers.size()));
317     }
318
319     for (size_t i = 0; i < numInputs; i++)
320     {
321         const armnn::BindingPointInfo& inputBinding = inputBindings[i];
322         const TContainer& inputData = inputDataContainers[i];
323
324         boost::apply_visitor([&](auto&& value)
325                              {
326                                  if (value.size() != inputBinding.second.GetNumElements())
327                                  {
328                                     throw armnn::Exception("Input tensor has incorrect size");
329                                  }
330
331                                  armnn::ConstTensor inputTensor(inputBinding.second, value.data());
332                                  inputTensors.push_back(std::make_pair(inputBinding.first, inputTensor));
333                              },
334                              inputData);
335     }
336
337     return inputTensors;
338 }
339
340 template<typename TContainer>
341 inline armnn::OutputTensors MakeOutputTensors(
342     const std::vector<armnn::BindingPointInfo>& outputBindings,
343     std::vector<TContainer>& outputDataContainers)
344 {
345     armnn::OutputTensors outputTensors;
346
347     const size_t numOutputs = outputBindings.size();
348     if (numOutputs != outputDataContainers.size())
349     {
350         throw armnn::Exception(boost::str(boost::format("Number of outputs does not match number of "
351             "tensor data containers: %1% != %2%") % numOutputs % outputDataContainers.size()));
352     }
353
354     for (size_t i = 0; i < numOutputs; i++)
355     {
356         const armnn::BindingPointInfo& outputBinding = outputBindings[i];
357         TContainer& outputData = outputDataContainers[i];
358
359         boost::apply_visitor([&](auto&& value)
360                              {
361                                  if (value.size() != outputBinding.second.GetNumElements())
362                                  {
363                                      throw armnn::Exception("Output tensor has incorrect size");
364                                  }
365
366                                  armnn::Tensor outputTensor(outputBinding.second, value.data());
367                                  outputTensors.push_back(std::make_pair(outputBinding.first, outputTensor));
368                              },
369                              outputData);
370     }
371
372     return outputTensors;
373 }
374
375 template <typename IParser, typename TDataType>
376 class InferenceModel
377 {
378 public:
379     using DataType           = TDataType;
380     using Params             = InferenceModelInternal::Params;
381     using QuantizationParams = InferenceModelInternal::QuantizationParams;
382     using TContainer         = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
383
384     struct CommandLineOptions
385     {
386         std::string m_ModelDir;
387         std::vector<std::string> m_ComputeDevices;
388         bool m_VisualizePostOptimizationModel;
389         bool m_EnableFp16TurboMode;
390         std::string m_Labels;
391
392         std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
393         {
394             std::vector<armnn::BackendId> backendIds;
395             std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
396             return backendIds;
397         }
398     };
399
400     static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
401     {
402         namespace po = boost::program_options;
403
404         const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
405
406         const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
407                                           + armnn::BackendRegistryInstance().GetBackendIdsAsString();
408
409         desc.add_options()
410             ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
411                 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
412             ("compute,c", po::value<std::vector<std::string>>(&options.m_ComputeDevices)->
413                 default_value(defaultComputes, boost::algorithm::join(defaultComputes, ", "))->
414                 multitoken(), backendsMessage.c_str())
415             ("labels,l", po::value<std::string>(&options.m_Labels),
416                 "Text file containing one image filename - correct label pair per line, "
417                 "used to test the accuracy of the network.")
418             ("visualize-optimized-model,v",
419                 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
420              "Produce a dot file useful for visualizing the graph post optimization."
421                 "The file will have the same name as the model with the .dot extention.")
422             ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
423                 "If this option is enabled FP32 layers, weights and biases will be converted "
424                 "to FP16 where the backend supports it.");
425     }
426
427     InferenceModel(const Params& params,
428                    bool enableProfiling,
429                    const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
430         : m_EnableProfiling(enableProfiling)
431     {
432         if (runtime)
433         {
434             m_Runtime = runtime;
435         }
436         else
437         {
438             armnn::IRuntime::CreationOptions options;
439             options.m_EnableGpuProfiling = m_EnableProfiling;
440             m_Runtime = std::move(armnn::IRuntime::Create(options));
441         }
442
443         std::string invalidBackends;
444         if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
445         {
446             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
447         }
448
449         armnn::INetworkPtr network =
450             CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
451
452         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
453         {
454             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
455
456             armnn::OptimizerOptions options;
457             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
458
459             optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
460             if (!optNet)
461             {
462                 throw armnn::Exception("Optimize returned nullptr");
463             }
464         }
465
466         if (params.m_VisualizePostOptimizationModel)
467         {
468             boost::filesystem::path filename = params.m_ModelPath;
469             filename.replace_extension("dot");
470             std::fstream file(filename.c_str(),file.out);
471             optNet->SerializeToDot(file);
472         }
473
474         armnn::Status ret;
475         {
476             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
477             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
478         }
479
480         if (ret == armnn::Status::Failure)
481         {
482             throw armnn::Exception("IRuntime::LoadNetwork failed");
483         }
484     }
485
486     void CheckInputIndexIsValid(unsigned int inputIndex) const
487     {
488         if (m_InputBindings.size() < inputIndex + 1)
489         {
490             throw armnn::Exception(boost::str(boost::format("Input index out of range: %1%") % inputIndex));
491         }
492     }
493
494     void CheckOutputIndexIsValid(unsigned int outputIndex) const
495     {
496         if (m_OutputBindings.size() < outputIndex + 1)
497         {
498             throw armnn::Exception(boost::str(boost::format("Output index out of range: %1%") % outputIndex));
499         }
500     }
501
502     unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
503     {
504         CheckOutputIndexIsValid(outputIndex);
505         return m_OutputBindings[outputIndex].second.GetNumElements();
506     }
507
508     std::chrono::duration<double, std::milli> Run(
509             const std::vector<TContainer>& inputContainers,
510             std::vector<TContainer>& outputContainers)
511     {
512         for (unsigned int i = 0; i < outputContainers.size(); ++i)
513         {
514             const unsigned int expectedOutputDataSize = GetOutputSize(i);
515
516             boost::apply_visitor([expectedOutputDataSize, i](auto&& value)
517             {
518                 const unsigned int actualOutputDataSize   = boost::numeric_cast<unsigned int>(value.size());
519                 if (actualOutputDataSize < expectedOutputDataSize)
520                 {
521                     unsigned int outputIndex = boost::numeric_cast<unsigned int>(i);
522                     throw armnn::Exception(
523                             boost::str(boost::format("Not enough data for output #%1%: expected "
524                             "%2% elements, got %3%") % outputIndex % expectedOutputDataSize % actualOutputDataSize));
525                 }
526             },
527             outputContainers[i]);
528         }
529
530         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
531         if (profiler)
532         {
533             profiler->EnableProfiling(m_EnableProfiling);
534         }
535
536         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
537         const auto start_time = GetCurrentTime();
538
539         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
540                                                        MakeInputTensors(inputContainers),
541                                                        MakeOutputTensors(outputContainers));
542
543         const auto end_time = GetCurrentTime();
544
545         // if profiling is enabled print out the results
546         if (profiler && profiler->IsProfilingEnabled())
547         {
548             profiler->Print(std::cout);
549         }
550
551         if (ret == armnn::Status::Failure)
552         {
553             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
554         }
555         else
556         {
557             return std::chrono::duration<double, std::milli>(end_time - start_time);
558         }
559     }
560
561     const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
562     {
563         CheckInputIndexIsValid(inputIndex);
564         return m_InputBindings[inputIndex];
565     }
566
567     const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
568     {
569         return m_InputBindings;
570     }
571
572     const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
573     {
574         CheckOutputIndexIsValid(outputIndex);
575         return m_OutputBindings[outputIndex];
576     }
577
578     const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
579     {
580         return m_OutputBindings;
581     }
582
583     QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
584     {
585         CheckOutputIndexIsValid(outputIndex);
586         return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
587                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
588     }
589
590     QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
591     {
592         CheckInputIndexIsValid(inputIndex);
593         return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
594                               m_InputBindings[inputIndex].second.GetQuantizationOffset());
595     }
596
597     std::vector<QuantizationParams> GetAllQuantizationParams() const
598     {
599         std::vector<QuantizationParams> quantizationParams;
600         for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
601         {
602             quantizationParams.push_back(GetQuantizationParams(i));
603         }
604         return quantizationParams;
605     }
606
607 private:
608     armnn::NetworkId m_NetworkIdentifier;
609     std::shared_ptr<armnn::IRuntime> m_Runtime;
610
611     std::vector<armnn::BindingPointInfo> m_InputBindings;
612     std::vector<armnn::BindingPointInfo> m_OutputBindings;
613     bool m_EnableProfiling;
614
615     template<typename TContainer>
616     armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
617     {
618         return ::MakeInputTensors(m_InputBindings, inputDataContainers);
619     }
620
621     template<typename TContainer>
622     armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
623     {
624         return ::MakeOutputTensors(m_OutputBindings, outputDataContainers);
625     }
626
627     std::chrono::high_resolution_clock::time_point GetCurrentTime()
628     {
629         return std::chrono::high_resolution_clock::now();
630     }
631
632     std::chrono::duration<double, std::milli> GetTimeDuration(
633             std::chrono::high_resolution_clock::time_point& start_time,
634             std::chrono::high_resolution_clock::time_point& end_time)
635     {
636         return std::chrono::duration<double, std::milli>(end_time - start_time);
637     }
638
639 };