IVGCVSW-2543 Add timing for ExecuteNetwork inference
[platform/upstream/armnn.git] / tests / InferenceModel.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 #include <armnn/ArmNN.hpp>
7
8 #if defined(ARMNN_SERIALIZER)
9 #include "armnnDeserializer/IDeserializer.hpp"
10 #endif
11 #if defined(ARMNN_TF_LITE_PARSER)
12 #include <armnnTfLiteParser/ITfLiteParser.hpp>
13 #endif
14 #if defined(ARMNN_ONNX_PARSER)
15 #include <armnnOnnxParser/IOnnxParser.hpp>
16 #endif
17
18 #include <HeapProfiling.hpp>
19
20 #include <backendsCommon/BackendRegistry.hpp>
21
22 #include <boost/algorithm/string/join.hpp>
23 #include <boost/exception/exception.hpp>
24 #include <boost/exception/diagnostic_information.hpp>
25 #include <boost/log/trivial.hpp>
26 #include <boost/format.hpp>
27 #include <boost/program_options.hpp>
28 #include <boost/filesystem.hpp>
29 #include <boost/lexical_cast.hpp>
30 #include <boost/variant.hpp>
31
32 #include <algorithm>
33 #include <chrono>
34 #include <iterator>
35 #include <fstream>
36 #include <map>
37 #include <string>
38 #include <vector>
39 #include <type_traits>
40
41 namespace
42 {
43
44 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
45                                            armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
46 {
47     if (backendIds.empty())
48     {
49         return false;
50     }
51
52     armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
53
54     bool allValid = true;
55     for (const auto& backendId : backendIds)
56     {
57         if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
58         {
59             allValid = false;
60             if (invalidBackendIds)
61             {
62                 if (!invalidBackendIds.value().empty())
63                 {
64                     invalidBackendIds.value() += ", ";
65                 }
66                 invalidBackendIds.value() += backendId;
67             }
68         }
69     }
70     return allValid;
71 }
72
73 } // anonymous namespace
74
75 namespace InferenceModelInternal
76 {
77 // This needs to go when the armnnCaffeParser, armnnTfParser and armnnTfLiteParser
78 // definitions of BindingPointInfo gets consolidated.
79 using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>;
80
81 using QuantizationParams = std::pair<float,int32_t>;
82
83 struct Params
84 {
85     std::string                     m_ModelPath;
86     std::vector<std::string>        m_InputBindings;
87     std::vector<armnn::TensorShape> m_InputShapes;
88     std::vector<std::string>        m_OutputBindings;
89     std::vector<armnn::BackendId>   m_ComputeDevices;
90     bool                            m_EnableProfiling;
91     size_t                          m_SubgraphId;
92     bool                            m_IsModelBinary;
93     bool                            m_VisualizePostOptimizationModel;
94     bool                            m_EnableFp16TurboMode;
95
96     Params()
97         : m_ComputeDevices{"CpuRef"}
98         , m_EnableProfiling(false)
99         , m_SubgraphId(0)
100         , m_IsModelBinary(true)
101         , m_VisualizePostOptimizationModel(false)
102         , m_EnableFp16TurboMode(false)
103     {}
104 };
105
106 } // namespace InferenceModelInternal
107
108 template <typename IParser>
109 struct CreateNetworkImpl
110 {
111 public:
112     using Params = InferenceModelInternal::Params;
113     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
114
115     static armnn::INetworkPtr Create(const Params& params,
116                                      std::vector<BindingPointInfo>& inputBindings,
117                                      std::vector<BindingPointInfo>& outputBindings)
118     {
119         const std::string& modelPath = params.m_ModelPath;
120
121         // Create a network from a file on disk
122         auto parser(IParser::Create());
123
124         std::map<std::string, armnn::TensorShape> inputShapes;
125         if (!params.m_InputShapes.empty())
126         {
127             const size_t numInputShapes   = params.m_InputShapes.size();
128             const size_t numInputBindings = params.m_InputBindings.size();
129             if (numInputShapes < numInputBindings)
130             {
131                 throw armnn::Exception(boost::str(boost::format(
132                     "Not every input has its tensor shape specified: expected=%1%, got=%2%")
133                     % numInputBindings % numInputShapes));
134             }
135
136             for (size_t i = 0; i < numInputShapes; i++)
137             {
138                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
139             }
140         }
141
142         std::vector<std::string> requestedOutputs = params.m_OutputBindings;
143         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
144
145         {
146             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
147             // Handle text and binary input differently by calling the corresponding parser function
148             network = (params.m_IsModelBinary ?
149                 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
150                 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
151         }
152
153         for (const std::string& inputLayerName : params.m_InputBindings)
154         {
155             inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
156         }
157
158         for (const std::string& outputLayerName : params.m_OutputBindings)
159         {
160             outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
161         }
162
163         return network;
164     }
165 };
166
167 #if defined(ARMNN_SERIALIZER)
168 template <>
169 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
170 {
171 public:
172     using IParser          = armnnDeserializer::IDeserializer;
173     using Params           = InferenceModelInternal::Params;
174     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
175
176     static armnn::INetworkPtr Create(const Params& params,
177                                      std::vector<BindingPointInfo>& inputBindings,
178                                      std::vector<BindingPointInfo>& outputBindings)
179     {
180         auto parser(IParser::Create());
181         BOOST_ASSERT(parser);
182
183         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
184
185         {
186             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
187
188             boost::system::error_code errorCode;
189             boost::filesystem::path pathToFile(params.m_ModelPath);
190             if (!boost::filesystem::exists(pathToFile, errorCode))
191             {
192                 throw armnn::FileNotFoundException(boost::str(
193                                                    boost::format("Cannot find the file (%1%) errorCode: %2% %3%") %
194                                                    params.m_ModelPath %
195                                                    errorCode %
196                                                    CHECK_LOCATION().AsString()));
197             }
198             std::ifstream file(params.m_ModelPath, std::ios::binary);
199
200             network = parser->CreateNetworkFromBinary(file);
201         }
202
203         unsigned int subGraphId = boost::numeric_cast<unsigned int>(params.m_SubgraphId);
204
205         for (const std::string& inputLayerName : params.m_InputBindings)
206         {
207             armnnDeserializer::BindingPointInfo inputBinding =
208                 parser->GetNetworkInputBindingInfo(subGraphId, inputLayerName);
209             inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
210         }
211
212         for (const std::string& outputLayerName : params.m_OutputBindings)
213         {
214             armnnDeserializer::BindingPointInfo outputBinding =
215                 parser->GetNetworkOutputBindingInfo(subGraphId, outputLayerName);
216             outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
217         }
218
219         return network;
220     }
221 };
222 #endif
223
224 #if defined(ARMNN_TF_LITE_PARSER)
225 template <>
226 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
227 {
228 public:
229     using IParser = armnnTfLiteParser::ITfLiteParser;
230     using Params = InferenceModelInternal::Params;
231     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
232
233     static armnn::INetworkPtr Create(const Params& params,
234                                      std::vector<BindingPointInfo>& inputBindings,
235                                      std::vector<BindingPointInfo>& outputBindings)
236     {
237         const std::string& modelPath = params.m_ModelPath;
238
239         // Create a network from a file on disk
240         auto parser(IParser::Create());
241
242         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
243
244         {
245             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
246             network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
247         }
248
249         for (const std::string& inputLayerName : params.m_InputBindings)
250         {
251             BindingPointInfo inputBinding =
252                 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
253             inputBindings.push_back(inputBinding);
254         }
255
256         for (const std::string& outputLayerName : params.m_OutputBindings)
257         {
258             BindingPointInfo outputBinding =
259                 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
260             outputBindings.push_back(outputBinding);
261         }
262
263         return network;
264     }
265 };
266 #endif
267
268 #if defined(ARMNN_ONNX_PARSER)
269 template <>
270 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
271 {
272 public:
273     using IParser = armnnOnnxParser::IOnnxParser;
274     using Params = InferenceModelInternal::Params;
275     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
276
277     static armnn::INetworkPtr Create(const Params& params,
278                                      std::vector<BindingPointInfo>& inputBindings,
279                                      std::vector<BindingPointInfo>& outputBindings)
280     {
281         const std::string& modelPath = params.m_ModelPath;
282
283         // Create a network from a file on disk
284         auto parser(IParser::Create());
285
286         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
287
288         {
289             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
290             network = (params.m_IsModelBinary ?
291                 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
292                 parser->CreateNetworkFromTextFile(modelPath.c_str()));
293         }
294
295         for (const std::string& inputLayerName : params.m_InputBindings)
296         {
297             BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
298             inputBindings.push_back(inputBinding);
299         }
300
301         for (const std::string& outputLayerName : params.m_OutputBindings)
302         {
303             BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
304             outputBindings.push_back(outputBinding);
305         }
306
307         return network;
308     }
309 };
310 #endif
311
312 template<typename TContainer>
313 inline armnn::InputTensors MakeInputTensors(
314     const std::vector<InferenceModelInternal::BindingPointInfo>& inputBindings,
315     const std::vector<TContainer>& inputDataContainers)
316 {
317     armnn::InputTensors inputTensors;
318
319     const size_t numInputs = inputBindings.size();
320     if (numInputs != inputDataContainers.size())
321     {
322         throw armnn::Exception(boost::str(boost::format("Number of inputs does not match number of "
323             "tensor data containers: %1% != %2%") % numInputs % inputDataContainers.size()));
324     }
325
326     for (size_t i = 0; i < numInputs; i++)
327     {
328         const InferenceModelInternal::BindingPointInfo& inputBinding = inputBindings[i];
329         const TContainer& inputData = inputDataContainers[i];
330
331         boost::apply_visitor([&](auto&& value)
332                              {
333                                  if (value.size() != inputBinding.second.GetNumElements())
334                                  {
335                                     throw armnn::Exception("Input tensor has incorrect size");
336                                  }
337
338                                  armnn::ConstTensor inputTensor(inputBinding.second, value.data());
339                                  inputTensors.push_back(std::make_pair(inputBinding.first, inputTensor));
340                              },
341                              inputData);
342     }
343
344     return inputTensors;
345 }
346
347 template<typename TContainer>
348 inline armnn::OutputTensors MakeOutputTensors(
349     const std::vector<InferenceModelInternal::BindingPointInfo>& outputBindings,
350     std::vector<TContainer>& outputDataContainers)
351 {
352     armnn::OutputTensors outputTensors;
353
354     const size_t numOutputs = outputBindings.size();
355     if (numOutputs != outputDataContainers.size())
356     {
357         throw armnn::Exception(boost::str(boost::format("Number of outputs does not match number of "
358             "tensor data containers: %1% != %2%") % numOutputs % outputDataContainers.size()));
359     }
360
361     for (size_t i = 0; i < numOutputs; i++)
362     {
363         const InferenceModelInternal::BindingPointInfo& outputBinding = outputBindings[i];
364         TContainer& outputData = outputDataContainers[i];
365
366         boost::apply_visitor([&](auto&& value)
367                              {
368                                  if (value.size() != outputBinding.second.GetNumElements())
369                                  {
370                                      throw armnn::Exception("Output tensor has incorrect size");
371                                  }
372
373                                  armnn::Tensor outputTensor(outputBinding.second, value.data());
374                                  outputTensors.push_back(std::make_pair(outputBinding.first, outputTensor));
375                              },
376                              outputData);
377     }
378
379     return outputTensors;
380 }
381
382 template <typename IParser, typename TDataType>
383 class InferenceModel
384 {
385 public:
386     using DataType           = TDataType;
387     using Params             = InferenceModelInternal::Params;
388     using BindingPointInfo   = InferenceModelInternal::BindingPointInfo;
389     using QuantizationParams = InferenceModelInternal::QuantizationParams;
390     using TContainer         = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
391
392     struct CommandLineOptions
393     {
394         std::string m_ModelDir;
395         std::vector<std::string> m_ComputeDevices;
396         bool m_VisualizePostOptimizationModel;
397         bool m_EnableFp16TurboMode;
398
399         std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
400         {
401             std::vector<armnn::BackendId> backendIds;
402             std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
403             return backendIds;
404         }
405     };
406
407     static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
408     {
409         namespace po = boost::program_options;
410
411         const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
412
413         const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
414                                           + armnn::BackendRegistryInstance().GetBackendIdsAsString();
415
416         desc.add_options()
417             ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
418                 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
419             ("compute,c", po::value<std::vector<std::string>>(&options.m_ComputeDevices)->
420                 default_value(defaultComputes, boost::algorithm::join(defaultComputes, ", "))->
421                 multitoken(), backendsMessage.c_str())
422             ("visualize-optimized-model,v",
423                 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
424              "Produce a dot file useful for visualizing the graph post optimization."
425                 "The file will have the same name as the model with the .dot extention.")
426             ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
427                 "If this option is enabled FP32 layers, weights and biases will be converted "
428                 "to FP16 where the backend supports it.");
429     }
430
431     InferenceModel(const Params& params, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
432         : m_EnableProfiling(params.m_EnableProfiling)
433     {
434         if (runtime)
435         {
436             m_Runtime = runtime;
437         }
438         else
439         {
440             armnn::IRuntime::CreationOptions options;
441             options.m_EnableGpuProfiling = m_EnableProfiling;
442             m_Runtime = std::move(armnn::IRuntime::Create(options));
443         }
444
445         std::string invalidBackends;
446         if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
447         {
448             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
449         }
450
451         armnn::INetworkPtr network =
452             CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
453
454         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
455         {
456             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
457
458             armnn::OptimizerOptions options;
459             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
460
461             optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
462             if (!optNet)
463             {
464                 throw armnn::Exception("Optimize returned nullptr");
465             }
466         }
467
468         if (params.m_VisualizePostOptimizationModel)
469         {
470             boost::filesystem::path filename = params.m_ModelPath;
471             filename.replace_extension("dot");
472             std::fstream file(filename.c_str(),file.out);
473             optNet->SerializeToDot(file);
474         }
475
476         armnn::Status ret;
477         {
478             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
479             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
480         }
481
482         if (ret == armnn::Status::Failure)
483         {
484             throw armnn::Exception("IRuntime::LoadNetwork failed");
485         }
486     }
487
488     void CheckInputIndexIsValid(unsigned int inputIndex) const
489     {
490         if (m_InputBindings.size() < inputIndex + 1)
491         {
492             throw armnn::Exception(boost::str(boost::format("Input index out of range: %1%") % inputIndex));
493         }
494     }
495
496     void CheckOutputIndexIsValid(unsigned int outputIndex) const
497     {
498         if (m_OutputBindings.size() < outputIndex + 1)
499         {
500             throw armnn::Exception(boost::str(boost::format("Output index out of range: %1%") % outputIndex));
501         }
502     }
503
504     unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
505     {
506         CheckOutputIndexIsValid(outputIndex);
507         return m_OutputBindings[outputIndex].second.GetNumElements();
508     }
509
510     std::chrono::duration<double, std::milli> Run(
511             const std::vector<TContainer>& inputContainers,
512             std::vector<TContainer>& outputContainers)
513     {
514         for (unsigned int i = 0; i < outputContainers.size(); ++i)
515         {
516             const unsigned int expectedOutputDataSize = GetOutputSize(i);
517
518             boost::apply_visitor([expectedOutputDataSize, i](auto&& value)
519             {
520                 const unsigned int actualOutputDataSize   = boost::numeric_cast<unsigned int>(value.size());
521                 if (actualOutputDataSize < expectedOutputDataSize)
522                 {
523                     unsigned int outputIndex = boost::numeric_cast<unsigned int>(i);
524                     throw armnn::Exception(
525                             boost::str(boost::format("Not enough data for output #%1%: expected "
526                             "%2% elements, got %3%") % outputIndex % expectedOutputDataSize % actualOutputDataSize));
527                 }
528             },
529             outputContainers[i]);
530         }
531
532         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
533         if (profiler)
534         {
535             profiler->EnableProfiling(m_EnableProfiling);
536         }
537
538         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
539         const auto start_time = GetCurrentTime();
540
541         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
542                                                        MakeInputTensors(inputContainers),
543                                                        MakeOutputTensors(outputContainers));
544
545         const auto end_time = GetCurrentTime();
546
547         // if profiling is enabled print out the results
548         if (profiler && profiler->IsProfilingEnabled())
549         {
550             profiler->Print(std::cout);
551         }
552
553         if (ret == armnn::Status::Failure)
554         {
555             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
556         }
557         else
558         {
559             return std::chrono::duration<double, std::milli>(end_time - start_time);
560         }
561     }
562
563     const BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
564     {
565         CheckInputIndexIsValid(inputIndex);
566         return m_InputBindings[inputIndex];
567     }
568
569     const std::vector<BindingPointInfo>& GetInputBindingInfos() const
570     {
571         return m_InputBindings;
572     }
573
574     const BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
575     {
576         CheckOutputIndexIsValid(outputIndex);
577         return m_OutputBindings[outputIndex];
578     }
579
580     const std::vector<BindingPointInfo>& GetOutputBindingInfos() const
581     {
582         return m_OutputBindings;
583     }
584
585     QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
586     {
587         CheckOutputIndexIsValid(outputIndex);
588         return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
589                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
590     }
591
592     QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
593     {
594         CheckInputIndexIsValid(inputIndex);
595         return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
596                               m_InputBindings[inputIndex].second.GetQuantizationOffset());
597     }
598
599     std::vector<QuantizationParams> GetAllQuantizationParams() const
600     {
601         std::vector<QuantizationParams> quantizationParams;
602         for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
603         {
604             quantizationParams.push_back(GetQuantizationParams(i));
605         }
606         return quantizationParams;
607     }
608
609 private:
610     armnn::NetworkId m_NetworkIdentifier;
611     std::shared_ptr<armnn::IRuntime> m_Runtime;
612
613     std::vector<InferenceModelInternal::BindingPointInfo> m_InputBindings;
614     std::vector<InferenceModelInternal::BindingPointInfo> m_OutputBindings;
615     bool m_EnableProfiling;
616
617     template<typename TContainer>
618     armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
619     {
620         return ::MakeInputTensors(m_InputBindings, inputDataContainers);
621     }
622
623     template<typename TContainer>
624     armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
625     {
626         return ::MakeOutputTensors(m_OutputBindings, outputDataContainers);
627     }
628
629     std::chrono::high_resolution_clock::time_point GetCurrentTime()
630     {
631         return std::chrono::high_resolution_clock::now();
632     }
633
634     std::chrono::duration<double, std::milli> GetTimeDuration(
635             std::chrono::high_resolution_clock::time_point& start_time,
636             std::chrono::high_resolution_clock::time_point& end_time)
637     {
638         return std::chrono::duration<double, std::milli>(end_time - start_time);
639     }
640
641 };