2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
6 #include <armnn/ArmNN.hpp>
8 #if defined(ARMNN_TF_LITE_PARSER)
9 #include <armnnTfLiteParser/ITfLiteParser.hpp>
12 #include <HeapProfiling.hpp>
13 #if defined(ARMNN_ONNX_PARSER)
14 #include <armnnOnnxParser/IOnnxParser.hpp>
17 #include <backends/backendsCommon/BackendRegistry.hpp>
19 #include <boost/exception/exception.hpp>
20 #include <boost/exception/diagnostic_information.hpp>
21 #include <boost/log/trivial.hpp>
22 #include <boost/format.hpp>
23 #include <boost/program_options.hpp>
24 #include <boost/filesystem.hpp>
25 #include <boost/lexical_cast.hpp>
30 #include <type_traits>
35 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
36 armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
38 if (backendIds.empty())
43 armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
46 for (const auto& backendId : backendIds)
48 if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
51 if (invalidBackendIds)
53 if (!invalidBackendIds.value().empty())
55 invalidBackendIds.value() += ", ";
57 invalidBackendIds.value() += backendId;
64 } // anonymous namespace
66 namespace InferenceModelInternal
68 // This needs to go when the armnnCaffeParser, armnnTfParser and armnnTfLiteParser
69 // definitions of BindingPointInfo gets consolidated.
70 using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>;
72 using QuantizationParams = std::pair<float,int32_t>;
76 std::string m_ModelPath;
77 std::string m_InputBinding;
78 std::string m_OutputBinding;
79 const armnn::TensorShape* m_InputTensorShape;
80 std::vector<armnn::BackendId> m_ComputeDevice;
81 bool m_EnableProfiling;
84 bool m_VisualizePostOptimizationModel;
85 bool m_EnableFp16TurboMode;
88 : m_InputTensorShape(nullptr)
89 , m_ComputeDevice{armnn::Compute::CpuRef}
90 , m_EnableProfiling(false)
92 , m_IsModelBinary(true)
93 , m_VisualizePostOptimizationModel(false)
94 , m_EnableFp16TurboMode(false)
98 } // namespace InferenceModelInternal
100 template <typename IParser>
101 struct CreateNetworkImpl
104 using Params = InferenceModelInternal::Params;
105 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
107 static armnn::INetworkPtr Create(const Params& params,
108 BindingPointInfo& inputBindings,
109 BindingPointInfo& outputBindings)
111 const std::string& modelPath = params.m_ModelPath;
113 // Create a network from a file on disk
114 auto parser(IParser::Create());
116 std::map<std::string, armnn::TensorShape> inputShapes;
117 if (params.m_InputTensorShape)
119 inputShapes[params.m_InputBinding] = *params.m_InputTensorShape;
121 std::vector<std::string> requestedOutputs{ params.m_OutputBinding };
122 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
125 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
126 // Handle text and binary input differently by calling the corresponding parser function
127 network = (params.m_IsModelBinary ?
128 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
129 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
132 inputBindings = parser->GetNetworkInputBindingInfo(params.m_InputBinding);
133 outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding);
138 #if defined(ARMNN_TF_LITE_PARSER)
140 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
143 using IParser = armnnTfLiteParser::ITfLiteParser;
144 using Params = InferenceModelInternal::Params;
145 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
147 static armnn::INetworkPtr Create(const Params& params,
148 BindingPointInfo& inputBindings,
149 BindingPointInfo& outputBindings)
151 const std::string& modelPath = params.m_ModelPath;
153 // Create a network from a file on disk
154 auto parser(IParser::Create());
156 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
159 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
160 network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
163 inputBindings = parser->GetNetworkInputBindingInfo(params.m_SubgraphId, params.m_InputBinding);
164 outputBindings = parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, params.m_OutputBinding);
170 #if defined(ARMNN_ONNX_PARSER)
172 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
175 using IParser = armnnOnnxParser::IOnnxParser;
176 using Params = InferenceModelInternal::Params;
177 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
179 static armnn::INetworkPtr Create(const Params& params,
180 BindingPointInfo& inputBindings,
181 BindingPointInfo& outputBindings)
183 const std::string& modelPath = params.m_ModelPath;
185 // Create a network from a file on disk
186 auto parser(IParser::Create());
188 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
191 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
192 network = (params.m_IsModelBinary ?
193 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
194 parser->CreateNetworkFromTextFile(modelPath.c_str()));
197 inputBindings = parser->GetNetworkInputBindingInfo(params.m_InputBinding);
198 outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding);
204 template<typename TContainer>
205 inline armnn::InputTensors MakeInputTensors(const InferenceModelInternal::BindingPointInfo& input,
206 const TContainer& inputTensorData)
208 if (inputTensorData.size() != input.second.GetNumElements())
212 throw armnn::Exception(boost::str(boost::format("Input tensor has incorrect size. Expected %1% elements "
213 "but got %2%.") % input.second.GetNumElements() % inputTensorData.size()));
214 } catch (const boost::exception& e)
216 // Coverity fix: it should not be possible to get here but boost::str and boost::format can both
217 // throw uncaught exceptions, convert them to armnn exceptions and rethrow.
218 throw armnn::Exception(diagnostic_information(e));
221 return { { input.first, armnn::ConstTensor(input.second, inputTensorData.data()) } };
224 template<typename TContainer>
225 inline armnn::OutputTensors MakeOutputTensors(const InferenceModelInternal::BindingPointInfo& output,
226 TContainer& outputTensorData)
228 if (outputTensorData.size() != output.second.GetNumElements())
230 throw armnn::Exception("Output tensor has incorrect size");
232 return { { output.first, armnn::Tensor(output.second, outputTensorData.data()) } };
235 template <typename IParser, typename TDataType>
239 using DataType = TDataType;
240 using Params = InferenceModelInternal::Params;
242 struct CommandLineOptions
244 std::string m_ModelDir;
245 std::vector<armnn::BackendId> m_ComputeDevice;
246 bool m_VisualizePostOptimizationModel;
247 bool m_EnableFp16TurboMode;
250 static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
252 namespace po = boost::program_options;
254 std::vector<armnn::BackendId> defaultBackends = {armnn::Compute::CpuAcc, armnn::Compute::CpuRef};
256 const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
257 + armnn::BackendRegistryInstance().GetBackendIdsAsString();
260 ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
261 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
262 ("compute,c", po::value<std::vector<armnn::BackendId>>(&options.m_ComputeDevice)->default_value
263 (defaultBackends), backendsMessage.c_str())
264 ("visualize-optimized-model,v",
265 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
266 "Produce a dot file useful for visualizing the graph post optimization."
267 "The file will have the same name as the model with the .dot extention.")
268 ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
269 "If this option is enabled FP32 layers, weights and biases will be converted "
270 "to FP16 where the backend supports it.");
273 InferenceModel(const Params& params, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
274 : m_EnableProfiling(params.m_EnableProfiling)
282 armnn::IRuntime::CreationOptions options;
283 options.m_EnableGpuProfiling = m_EnableProfiling;
284 m_Runtime = std::move(armnn::IRuntime::Create(options));
287 std::string invalidBackends;
288 if (!CheckRequestedBackendsAreValid(params.m_ComputeDevice, armnn::Optional<std::string&>(invalidBackends)))
290 throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
293 armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindingInfo,
294 m_OutputBindingInfo);
296 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
298 ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
300 armnn::OptimizerOptions options;
301 options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
303 optNet = armnn::Optimize(*network, params.m_ComputeDevice, m_Runtime->GetDeviceSpec(), options);
306 throw armnn::Exception("Optimize returned nullptr");
310 if (params.m_VisualizePostOptimizationModel)
312 boost::filesystem::path filename = params.m_ModelPath;
313 filename.replace_extension("dot");
314 std::fstream file(filename.c_str(),file.out);
315 optNet->SerializeToDot(file);
320 ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
321 ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
324 if (ret == armnn::Status::Failure)
326 throw armnn::Exception("IRuntime::LoadNetwork failed");
330 unsigned int GetOutputSize() const
332 return m_OutputBindingInfo.second.GetNumElements();
335 void Run(const std::vector<TDataType>& input, std::vector<TDataType>& output)
337 BOOST_ASSERT(output.size() == GetOutputSize());
339 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
342 profiler->EnableProfiling(m_EnableProfiling);
345 armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
346 MakeInputTensors(input),
347 MakeOutputTensors(output));
349 // if profiling is enabled print out the results
350 if (profiler && profiler->IsProfilingEnabled())
352 profiler->Print(std::cout);
355 if (ret == armnn::Status::Failure)
357 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
361 const InferenceModelInternal::BindingPointInfo & GetInputBindingInfo() const
363 return m_InputBindingInfo;
366 const InferenceModelInternal::BindingPointInfo & GetOutputBindingInfo() const
368 return m_OutputBindingInfo;
371 InferenceModelInternal::QuantizationParams GetQuantizationParams() const
373 return std::make_pair(m_OutputBindingInfo.second.GetQuantizationScale(),
374 m_OutputBindingInfo.second.GetQuantizationOffset());
378 armnn::NetworkId m_NetworkIdentifier;
379 std::shared_ptr<armnn::IRuntime> m_Runtime;
381 InferenceModelInternal::BindingPointInfo m_InputBindingInfo;
382 InferenceModelInternal::BindingPointInfo m_OutputBindingInfo;
383 bool m_EnableProfiling;
385 template<typename TContainer>
386 armnn::InputTensors MakeInputTensors(const TContainer& inputTensorData)
388 return ::MakeInputTensors(m_InputBindingInfo, inputTensorData);
391 template<typename TContainer>
392 armnn::OutputTensors MakeOutputTensors(TContainer& outputTensorData)
394 return ::MakeOutputTensors(m_OutputBindingInfo, outputTensorData);