2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
6 #include <armnn/ArmNN.hpp>
8 #if defined(ARMNN_TF_LITE_PARSER)
9 #include <armnnTfLiteParser/ITfLiteParser.hpp>
12 #include <HeapProfiling.hpp>
13 #if defined(ARMNN_ONNX_PARSER)
14 #include <armnnOnnxParser/IOnnxParser.hpp>
17 #include <backendsCommon/BackendRegistry.hpp>
19 #include <boost/exception/exception.hpp>
20 #include <boost/exception/diagnostic_information.hpp>
21 #include <boost/log/trivial.hpp>
22 #include <boost/format.hpp>
23 #include <boost/program_options.hpp>
24 #include <boost/filesystem.hpp>
25 #include <boost/lexical_cast.hpp>
31 #include <type_traits>
36 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
37 armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
39 if (backendIds.empty())
44 armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
47 for (const auto& backendId : backendIds)
49 if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
52 if (invalidBackendIds)
54 if (!invalidBackendIds.value().empty())
56 invalidBackendIds.value() += ", ";
58 invalidBackendIds.value() += backendId;
65 } // anonymous namespace
67 namespace InferenceModelInternal
69 // This needs to go when the armnnCaffeParser, armnnTfParser and armnnTfLiteParser
70 // definitions of BindingPointInfo gets consolidated.
71 using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>;
73 using QuantizationParams = std::pair<float,int32_t>;
77 std::string m_ModelPath;
78 std::vector<std::string> m_InputBindings;
79 std::vector<armnn::TensorShape> m_InputShapes;
80 std::vector<std::string> m_OutputBindings;
81 std::vector<armnn::BackendId> m_ComputeDevice;
82 bool m_EnableProfiling;
85 bool m_VisualizePostOptimizationModel;
86 bool m_EnableFp16TurboMode;
89 : m_ComputeDevice{armnn::Compute::CpuRef}
90 , m_EnableProfiling(false)
92 , m_IsModelBinary(true)
93 , m_VisualizePostOptimizationModel(false)
94 , m_EnableFp16TurboMode(false)
98 } // namespace InferenceModelInternal
100 template <typename IParser>
101 struct CreateNetworkImpl
104 using Params = InferenceModelInternal::Params;
105 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
107 static armnn::INetworkPtr Create(const Params& params,
108 std::vector<BindingPointInfo>& inputBindings,
109 std::vector<BindingPointInfo>& outputBindings)
111 const std::string& modelPath = params.m_ModelPath;
113 // Create a network from a file on disk
114 auto parser(IParser::Create());
116 std::map<std::string, armnn::TensorShape> inputShapes;
117 if (!params.m_InputShapes.empty())
119 const size_t numInputShapes = params.m_InputShapes.size();
120 const size_t numInputBindings = params.m_InputBindings.size();
121 if (numInputShapes < numInputBindings)
123 throw armnn::Exception(boost::str(boost::format(
124 "Not every input has its tensor shape specified: expected=%1%, got=%2%")
125 % numInputBindings % numInputShapes));
128 for (size_t i = 0; i < numInputShapes; i++)
130 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
134 std::vector<std::string> requestedOutputs = params.m_OutputBindings;
135 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
138 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
139 // Handle text and binary input differently by calling the corresponding parser function
140 network = (params.m_IsModelBinary ?
141 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
142 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
145 for (const std::string& inputLayerName : params.m_InputBindings)
147 inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
150 for (const std::string& outputLayerName : params.m_OutputBindings)
152 outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
159 #if defined(ARMNN_TF_LITE_PARSER)
161 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
164 using IParser = armnnTfLiteParser::ITfLiteParser;
165 using Params = InferenceModelInternal::Params;
166 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
168 static armnn::INetworkPtr Create(const Params& params,
169 std::vector<BindingPointInfo>& inputBindings,
170 std::vector<BindingPointInfo>& outputBindings)
172 const std::string& modelPath = params.m_ModelPath;
174 // Create a network from a file on disk
175 auto parser(IParser::Create());
177 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
180 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
181 network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
184 for (const std::string& inputLayerName : params.m_InputBindings)
186 BindingPointInfo inputBinding =
187 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
188 inputBindings.push_back(inputBinding);
191 for (const std::string& outputLayerName : params.m_OutputBindings)
193 BindingPointInfo outputBinding =
194 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
195 outputBindings.push_back(outputBinding);
203 #if defined(ARMNN_ONNX_PARSER)
205 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
208 using IParser = armnnOnnxParser::IOnnxParser;
209 using Params = InferenceModelInternal::Params;
210 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
212 static armnn::INetworkPtr Create(const Params& params,
213 std::vector<BindingPointInfo>& inputBindings,
214 std::vector<BindingPointInfo>& outputBindings)
216 const std::string& modelPath = params.m_ModelPath;
218 // Create a network from a file on disk
219 auto parser(IParser::Create());
221 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
224 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
225 network = (params.m_IsModelBinary ?
226 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
227 parser->CreateNetworkFromTextFile(modelPath.c_str()));
230 for (const std::string& inputLayerName : params.m_InputBindings)
232 BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
233 inputBindings.push_back(inputBinding);
236 for (const std::string& outputLayerName : params.m_OutputBindings)
238 BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
239 outputBindings.push_back(outputBinding);
247 template<typename TContainer>
248 inline armnn::InputTensors MakeInputTensors(
249 const std::vector<InferenceModelInternal::BindingPointInfo>& inputBindings,
250 const std::vector<TContainer>& inputDataContainers)
252 armnn::InputTensors inputTensors;
254 const size_t numInputs = inputBindings.size();
255 if (numInputs != inputDataContainers.size())
257 throw armnn::Exception(boost::str(boost::format("Number of inputs does not match number of "
258 "tensor data containers: %1% != %2%") % numInputs % inputDataContainers.size()));
261 for (size_t i = 0; i < numInputs; i++)
263 const InferenceModelInternal::BindingPointInfo& inputBinding = inputBindings[i];
264 const TContainer& inputData = inputDataContainers[i];
266 if (inputData.size() != inputBinding.second.GetNumElements())
268 throw armnn::Exception("Input tensor has incorrect size");
271 armnn::ConstTensor inputTensor(inputBinding.second, inputData.data());
272 inputTensors.push_back(std::make_pair(inputBinding.first, inputTensor));
278 template<typename TContainer>
279 inline armnn::OutputTensors MakeOutputTensors(
280 const std::vector<InferenceModelInternal::BindingPointInfo>& outputBindings,
281 std::vector<TContainer>& outputDataContainers)
283 armnn::OutputTensors outputTensors;
285 const size_t numOutputs = outputBindings.size();
286 if (numOutputs != outputDataContainers.size())
288 throw armnn::Exception(boost::str(boost::format("Number of outputs does not match number of "
289 "tensor data containers: %1% != %2%") % numOutputs % outputDataContainers.size()));
292 for (size_t i = 0; i < numOutputs; i++)
294 const InferenceModelInternal::BindingPointInfo& outputBinding = outputBindings[i];
295 TContainer& outputData = outputDataContainers[i];
297 if (outputData.size() != outputBinding.second.GetNumElements())
299 throw armnn::Exception("Output tensor has incorrect size");
302 armnn::Tensor outputTensor(outputBinding.second, outputData.data());
303 outputTensors.push_back(std::make_pair(outputBinding.first, outputTensor));
306 return outputTensors;
309 template <typename IParser, typename TDataType>
313 using DataType = TDataType;
314 using Params = InferenceModelInternal::Params;
315 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
316 using QuantizationParams = InferenceModelInternal::QuantizationParams;
317 using TContainer = std::vector<TDataType>;
319 struct CommandLineOptions
321 std::string m_ModelDir;
322 std::vector<armnn::BackendId> m_ComputeDevice;
323 bool m_VisualizePostOptimizationModel;
324 bool m_EnableFp16TurboMode;
327 static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
329 namespace po = boost::program_options;
331 std::vector<armnn::BackendId> defaultBackends = {armnn::Compute::CpuAcc, armnn::Compute::CpuRef};
333 const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
334 + armnn::BackendRegistryInstance().GetBackendIdsAsString();
337 ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
338 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
339 ("compute,c", po::value<std::vector<armnn::BackendId>>(&options.m_ComputeDevice)->default_value
340 (defaultBackends), backendsMessage.c_str())
341 ("visualize-optimized-model,v",
342 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
343 "Produce a dot file useful for visualizing the graph post optimization."
344 "The file will have the same name as the model with the .dot extention.")
345 ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
346 "If this option is enabled FP32 layers, weights and biases will be converted "
347 "to FP16 where the backend supports it.");
350 InferenceModel(const Params& params, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
351 : m_EnableProfiling(params.m_EnableProfiling)
359 armnn::IRuntime::CreationOptions options;
360 options.m_EnableGpuProfiling = m_EnableProfiling;
361 m_Runtime = std::move(armnn::IRuntime::Create(options));
364 std::string invalidBackends;
365 if (!CheckRequestedBackendsAreValid(params.m_ComputeDevice, armnn::Optional<std::string&>(invalidBackends)))
367 throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
370 armnn::INetworkPtr network =
371 CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
373 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
375 ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
377 armnn::OptimizerOptions options;
378 options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
380 optNet = armnn::Optimize(*network, params.m_ComputeDevice, m_Runtime->GetDeviceSpec(), options);
383 throw armnn::Exception("Optimize returned nullptr");
387 if (params.m_VisualizePostOptimizationModel)
389 boost::filesystem::path filename = params.m_ModelPath;
390 filename.replace_extension("dot");
391 std::fstream file(filename.c_str(),file.out);
392 optNet->SerializeToDot(file);
397 ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
398 ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
401 if (ret == armnn::Status::Failure)
403 throw armnn::Exception("IRuntime::LoadNetwork failed");
407 void CheckInputIndexIsValid(unsigned int inputIndex) const
409 if (m_InputBindings.size() < inputIndex + 1)
411 throw armnn::Exception(boost::str(boost::format("Input index out of range: %1%") % inputIndex));
415 void CheckOutputIndexIsValid(unsigned int outputIndex) const
417 if (m_OutputBindings.size() < outputIndex + 1)
419 throw armnn::Exception(boost::str(boost::format("Output index out of range: %1%") % outputIndex));
423 unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
425 CheckOutputIndexIsValid(outputIndex);
426 return m_OutputBindings[outputIndex].second.GetNumElements();
429 void Run(const std::vector<TContainer>& inputContainers, std::vector<TContainer>& outputContainers)
431 for (unsigned int i = 0; i < outputContainers.size(); i++)
433 const unsigned int expectedOutputDataSize = GetOutputSize(i);
434 const unsigned int actualOutputDataSize = boost::numeric_cast<unsigned int>(outputContainers[i].size());
435 if (actualOutputDataSize < expectedOutputDataSize)
437 unsigned int outputIndex = boost::numeric_cast<unsigned int>(i);
438 throw armnn::Exception(boost::str(boost::format("Not enough data for output #%1%: expected "
439 "%2% elements, got %3%") % outputIndex % expectedOutputDataSize % actualOutputDataSize));
443 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
446 profiler->EnableProfiling(m_EnableProfiling);
449 armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
450 MakeInputTensors(inputContainers),
451 MakeOutputTensors(outputContainers));
453 // if profiling is enabled print out the results
454 if (profiler && profiler->IsProfilingEnabled())
456 profiler->Print(std::cout);
459 if (ret == armnn::Status::Failure)
461 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
465 const BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
467 CheckInputIndexIsValid(inputIndex);
468 return m_InputBindings[inputIndex];
471 const std::vector<BindingPointInfo>& GetInputBindingInfos() const
473 return m_InputBindings;
476 const BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
478 CheckOutputIndexIsValid(outputIndex);
479 return m_OutputBindings[outputIndex];
482 const std::vector<BindingPointInfo>& GetOutputBindingInfos() const
484 return m_OutputBindings;
487 QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
489 CheckOutputIndexIsValid(outputIndex);
490 return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
491 m_OutputBindings[outputIndex].second.GetQuantizationOffset());
494 std::vector<QuantizationParams> GetAllQuantizationParams() const
496 std::vector<QuantizationParams> quantizationParams;
497 for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
499 quantizationParams.push_back(GetQuantizationParams(i));
501 return quantizationParams;
505 armnn::NetworkId m_NetworkIdentifier;
506 std::shared_ptr<armnn::IRuntime> m_Runtime;
508 std::vector<InferenceModelInternal::BindingPointInfo> m_InputBindings;
509 std::vector<InferenceModelInternal::BindingPointInfo> m_OutputBindings;
510 bool m_EnableProfiling;
512 template<typename TContainer>
513 armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
515 return ::MakeInputTensors(m_InputBindings, inputDataContainers);
518 template<typename TContainer>
519 armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
521 return ::MakeOutputTensors(m_OutputBindings, outputDataContainers);