IVGCVSW-2564 Add support for multiple input and output bindings in InferenceModel
[platform/upstream/armnn.git] / tests / InferenceModel.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 #include <armnn/ArmNN.hpp>
7
8 #if defined(ARMNN_TF_LITE_PARSER)
9 #include <armnnTfLiteParser/ITfLiteParser.hpp>
10 #endif
11
12 #include <HeapProfiling.hpp>
13 #if defined(ARMNN_ONNX_PARSER)
14 #include <armnnOnnxParser/IOnnxParser.hpp>
15 #endif
16
17 #include <backendsCommon/BackendRegistry.hpp>
18
19 #include <boost/exception/exception.hpp>
20 #include <boost/exception/diagnostic_information.hpp>
21 #include <boost/log/trivial.hpp>
22 #include <boost/format.hpp>
23 #include <boost/program_options.hpp>
24 #include <boost/filesystem.hpp>
25 #include <boost/lexical_cast.hpp>
26
27 #include <fstream>
28 #include <map>
29 #include <string>
30 #include <vector>
31 #include <type_traits>
32
33 namespace
34 {
35
36 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
37                                            armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
38 {
39     if (backendIds.empty())
40     {
41         return false;
42     }
43
44     armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
45
46     bool allValid = true;
47     for (const auto& backendId : backendIds)
48     {
49         if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
50         {
51             allValid = false;
52             if (invalidBackendIds)
53             {
54                 if (!invalidBackendIds.value().empty())
55                 {
56                     invalidBackendIds.value() += ", ";
57                 }
58                 invalidBackendIds.value() += backendId;
59             }
60         }
61     }
62     return allValid;
63 }
64
65 } // anonymous namespace
66
67 namespace InferenceModelInternal
68 {
69 // This needs to go when the armnnCaffeParser, armnnTfParser and armnnTfLiteParser
70 // definitions of BindingPointInfo gets consolidated.
71 using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>;
72
73 using QuantizationParams = std::pair<float,int32_t>;
74
75 struct Params
76 {
77     std::string                     m_ModelPath;
78     std::vector<std::string>        m_InputBindings;
79     std::vector<armnn::TensorShape> m_InputShapes;
80     std::vector<std::string>        m_OutputBindings;
81     std::vector<armnn::BackendId>   m_ComputeDevice;
82     bool                            m_EnableProfiling;
83     size_t                          m_SubgraphId;
84     bool                            m_IsModelBinary;
85     bool                            m_VisualizePostOptimizationModel;
86     bool                            m_EnableFp16TurboMode;
87
88     Params()
89         : m_ComputeDevice{armnn::Compute::CpuRef}
90         , m_EnableProfiling(false)
91         , m_SubgraphId(0)
92         , m_IsModelBinary(true)
93         , m_VisualizePostOptimizationModel(false)
94         , m_EnableFp16TurboMode(false)
95     {}
96 };
97
98 } // namespace InferenceModelInternal
99
100 template <typename IParser>
101 struct CreateNetworkImpl
102 {
103 public:
104     using Params = InferenceModelInternal::Params;
105     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
106
107     static armnn::INetworkPtr Create(const Params& params,
108                                      std::vector<BindingPointInfo>& inputBindings,
109                                      std::vector<BindingPointInfo>& outputBindings)
110     {
111         const std::string& modelPath = params.m_ModelPath;
112
113         // Create a network from a file on disk
114         auto parser(IParser::Create());
115
116         std::map<std::string, armnn::TensorShape> inputShapes;
117         if (!params.m_InputShapes.empty())
118         {
119             const size_t numInputShapes   = params.m_InputShapes.size();
120             const size_t numInputBindings = params.m_InputBindings.size();
121             if (numInputShapes < numInputBindings)
122             {
123                 throw armnn::Exception(boost::str(boost::format(
124                     "Not every input has its tensor shape specified: expected=%1%, got=%2%")
125                     % numInputBindings % numInputShapes));
126             }
127
128             for (size_t i = 0; i < numInputShapes; i++)
129             {
130                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
131             }
132         }
133
134         std::vector<std::string> requestedOutputs = params.m_OutputBindings;
135         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
136
137         {
138             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
139             // Handle text and binary input differently by calling the corresponding parser function
140             network = (params.m_IsModelBinary ?
141                 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
142                 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
143         }
144
145         for (const std::string& inputLayerName : params.m_InputBindings)
146         {
147             inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
148         }
149
150         for (const std::string& outputLayerName : params.m_OutputBindings)
151         {
152             outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
153         }
154
155         return network;
156     }
157 };
158
159 #if defined(ARMNN_TF_LITE_PARSER)
160 template <>
161 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
162 {
163 public:
164     using IParser = armnnTfLiteParser::ITfLiteParser;
165     using Params = InferenceModelInternal::Params;
166     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
167
168     static armnn::INetworkPtr Create(const Params& params,
169                                      std::vector<BindingPointInfo>& inputBindings,
170                                      std::vector<BindingPointInfo>& outputBindings)
171     {
172         const std::string& modelPath = params.m_ModelPath;
173
174         // Create a network from a file on disk
175         auto parser(IParser::Create());
176
177         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
178
179         {
180             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
181             network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
182         }
183
184         for (const std::string& inputLayerName : params.m_InputBindings)
185         {
186             BindingPointInfo inputBinding =
187                 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
188             inputBindings.push_back(inputBinding);
189         }
190
191         for (const std::string& outputLayerName : params.m_OutputBindings)
192         {
193             BindingPointInfo outputBinding =
194                 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
195             outputBindings.push_back(outputBinding);
196         }
197
198         return network;
199     }
200 };
201 #endif
202
203 #if defined(ARMNN_ONNX_PARSER)
204 template <>
205 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
206 {
207 public:
208     using IParser = armnnOnnxParser::IOnnxParser;
209     using Params = InferenceModelInternal::Params;
210     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
211
212     static armnn::INetworkPtr Create(const Params& params,
213                                      std::vector<BindingPointInfo>& inputBindings,
214                                      std::vector<BindingPointInfo>& outputBindings)
215     {
216         const std::string& modelPath = params.m_ModelPath;
217
218         // Create a network from a file on disk
219         auto parser(IParser::Create());
220
221         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
222
223         {
224             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
225             network = (params.m_IsModelBinary ?
226                 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
227                 parser->CreateNetworkFromTextFile(modelPath.c_str()));
228         }
229
230         for (const std::string& inputLayerName : params.m_InputBindings)
231         {
232             BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
233             inputBindings.push_back(inputBinding);
234         }
235
236         for (const std::string& outputLayerName : params.m_OutputBindings)
237         {
238             BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
239             outputBindings.push_back(outputBinding);
240         }
241
242         return network;
243     }
244 };
245 #endif
246
247 template<typename TContainer>
248 inline armnn::InputTensors MakeInputTensors(
249     const std::vector<InferenceModelInternal::BindingPointInfo>& inputBindings,
250     const std::vector<TContainer>& inputDataContainers)
251 {
252     armnn::InputTensors inputTensors;
253
254     const size_t numInputs = inputBindings.size();
255     if (numInputs != inputDataContainers.size())
256     {
257         throw armnn::Exception(boost::str(boost::format("Number of inputs does not match number of "
258             "tensor data containers: %1% != %2%") % numInputs % inputDataContainers.size()));
259     }
260
261     for (size_t i = 0; i < numInputs; i++)
262     {
263         const InferenceModelInternal::BindingPointInfo& inputBinding = inputBindings[i];
264         const TContainer& inputData = inputDataContainers[i];
265
266         if (inputData.size() != inputBinding.second.GetNumElements())
267         {
268             throw armnn::Exception("Input tensor has incorrect size");
269         }
270
271         armnn::ConstTensor inputTensor(inputBinding.second, inputData.data());
272         inputTensors.push_back(std::make_pair(inputBinding.first, inputTensor));
273     }
274
275     return inputTensors;
276 }
277
278 template<typename TContainer>
279 inline armnn::OutputTensors MakeOutputTensors(
280     const std::vector<InferenceModelInternal::BindingPointInfo>& outputBindings,
281     std::vector<TContainer>& outputDataContainers)
282 {
283     armnn::OutputTensors outputTensors;
284
285     const size_t numOutputs = outputBindings.size();
286     if (numOutputs != outputDataContainers.size())
287     {
288         throw armnn::Exception(boost::str(boost::format("Number of outputs does not match number of "
289             "tensor data containers: %1% != %2%") % numOutputs % outputDataContainers.size()));
290     }
291
292     for (size_t i = 0; i < numOutputs; i++)
293     {
294         const InferenceModelInternal::BindingPointInfo& outputBinding = outputBindings[i];
295         TContainer& outputData = outputDataContainers[i];
296
297         if (outputData.size() != outputBinding.second.GetNumElements())
298         {
299             throw armnn::Exception("Output tensor has incorrect size");
300         }
301
302         armnn::Tensor outputTensor(outputBinding.second, outputData.data());
303         outputTensors.push_back(std::make_pair(outputBinding.first, outputTensor));
304     }
305
306     return outputTensors;
307 }
308
309 template <typename IParser, typename TDataType>
310 class InferenceModel
311 {
312 public:
313     using DataType           = TDataType;
314     using Params             = InferenceModelInternal::Params;
315     using BindingPointInfo   = InferenceModelInternal::BindingPointInfo;
316     using QuantizationParams = InferenceModelInternal::QuantizationParams;
317     using TContainer         = std::vector<TDataType>;
318
319     struct CommandLineOptions
320     {
321         std::string m_ModelDir;
322         std::vector<armnn::BackendId> m_ComputeDevice;
323         bool m_VisualizePostOptimizationModel;
324         bool m_EnableFp16TurboMode;
325     };
326
327     static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
328     {
329         namespace po = boost::program_options;
330
331         std::vector<armnn::BackendId> defaultBackends = {armnn::Compute::CpuAcc, armnn::Compute::CpuRef};
332
333         const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
334                                           + armnn::BackendRegistryInstance().GetBackendIdsAsString();
335
336         desc.add_options()
337             ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
338                 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
339             ("compute,c", po::value<std::vector<armnn::BackendId>>(&options.m_ComputeDevice)->default_value
340                 (defaultBackends), backendsMessage.c_str())
341             ("visualize-optimized-model,v",
342                 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
343              "Produce a dot file useful for visualizing the graph post optimization."
344                 "The file will have the same name as the model with the .dot extention.")
345             ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
346                 "If this option is enabled FP32 layers, weights and biases will be converted "
347                 "to FP16 where the backend supports it.");
348     }
349
350     InferenceModel(const Params& params, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
351         : m_EnableProfiling(params.m_EnableProfiling)
352     {
353         if (runtime)
354         {
355             m_Runtime = runtime;
356         }
357         else
358         {
359             armnn::IRuntime::CreationOptions options;
360             options.m_EnableGpuProfiling = m_EnableProfiling;
361             m_Runtime = std::move(armnn::IRuntime::Create(options));
362         }
363
364         std::string invalidBackends;
365         if (!CheckRequestedBackendsAreValid(params.m_ComputeDevice, armnn::Optional<std::string&>(invalidBackends)))
366         {
367             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
368         }
369
370         armnn::INetworkPtr network =
371             CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
372
373         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
374         {
375             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
376
377             armnn::OptimizerOptions options;
378             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
379
380             optNet = armnn::Optimize(*network, params.m_ComputeDevice, m_Runtime->GetDeviceSpec(), options);
381             if (!optNet)
382             {
383                 throw armnn::Exception("Optimize returned nullptr");
384             }
385         }
386
387         if (params.m_VisualizePostOptimizationModel)
388         {
389             boost::filesystem::path filename = params.m_ModelPath;
390             filename.replace_extension("dot");
391             std::fstream file(filename.c_str(),file.out);
392             optNet->SerializeToDot(file);
393         }
394
395         armnn::Status ret;
396         {
397             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
398             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
399         }
400
401         if (ret == armnn::Status::Failure)
402         {
403             throw armnn::Exception("IRuntime::LoadNetwork failed");
404         }
405     }
406
407     void CheckInputIndexIsValid(unsigned int inputIndex) const
408     {
409         if (m_InputBindings.size() < inputIndex + 1)
410         {
411             throw armnn::Exception(boost::str(boost::format("Input index out of range: %1%") % inputIndex));
412         }
413     }
414
415     void CheckOutputIndexIsValid(unsigned int outputIndex) const
416     {
417         if (m_OutputBindings.size() < outputIndex + 1)
418         {
419             throw armnn::Exception(boost::str(boost::format("Output index out of range: %1%") % outputIndex));
420         }
421     }
422
423     unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
424     {
425         CheckOutputIndexIsValid(outputIndex);
426         return m_OutputBindings[outputIndex].second.GetNumElements();
427     }
428
429     void Run(const std::vector<TContainer>& inputContainers, std::vector<TContainer>& outputContainers)
430     {
431         for (unsigned int i = 0; i < outputContainers.size(); i++)
432         {
433             const unsigned int expectedOutputDataSize = GetOutputSize(i);
434             const unsigned int actualOutputDataSize   = boost::numeric_cast<unsigned int>(outputContainers[i].size());
435             if (actualOutputDataSize < expectedOutputDataSize)
436             {
437                 unsigned int outputIndex = boost::numeric_cast<unsigned int>(i);
438                 throw armnn::Exception(boost::str(boost::format("Not enough data for output #%1%: expected "
439                     "%2% elements, got %3%") % outputIndex % expectedOutputDataSize % actualOutputDataSize));
440             }
441         }
442
443         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
444         if (profiler)
445         {
446             profiler->EnableProfiling(m_EnableProfiling);
447         }
448
449         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
450                                                        MakeInputTensors(inputContainers),
451                                                        MakeOutputTensors(outputContainers));
452
453         // if profiling is enabled print out the results
454         if (profiler && profiler->IsProfilingEnabled())
455         {
456             profiler->Print(std::cout);
457         }
458
459         if (ret == armnn::Status::Failure)
460         {
461             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
462         }
463     }
464
465     const BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
466     {
467         CheckInputIndexIsValid(inputIndex);
468         return m_InputBindings[inputIndex];
469     }
470
471     const std::vector<BindingPointInfo>& GetInputBindingInfos() const
472     {
473         return m_InputBindings;
474     }
475
476     const BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
477     {
478         CheckOutputIndexIsValid(outputIndex);
479         return m_OutputBindings[outputIndex];
480     }
481
482     const std::vector<BindingPointInfo>& GetOutputBindingInfos() const
483     {
484         return m_OutputBindings;
485     }
486
487     QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
488     {
489         CheckOutputIndexIsValid(outputIndex);
490         return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
491                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
492     }
493
494     std::vector<QuantizationParams> GetAllQuantizationParams() const
495     {
496         std::vector<QuantizationParams> quantizationParams;
497         for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
498         {
499             quantizationParams.push_back(GetQuantizationParams(i));
500         }
501         return quantizationParams;
502     }
503
504 private:
505     armnn::NetworkId m_NetworkIdentifier;
506     std::shared_ptr<armnn::IRuntime> m_Runtime;
507
508     std::vector<InferenceModelInternal::BindingPointInfo> m_InputBindings;
509     std::vector<InferenceModelInternal::BindingPointInfo> m_OutputBindings;
510     bool m_EnableProfiling;
511
512     template<typename TContainer>
513     armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
514     {
515         return ::MakeInputTensors(m_InputBindings, inputDataContainers);
516     }
517
518     template<typename TContainer>
519     armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
520     {
521         return ::MakeOutputTensors(m_OutputBindings, outputDataContainers);
522     }
523 };