2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
6 #include "armnn/ArmNN.hpp"
8 #if defined(ARMNN_TF_LITE_PARSER)
9 #include "armnnTfLiteParser/ITfLiteParser.hpp"
12 #include <HeapProfiling.hpp>
13 #if defined(ARMNN_ONNX_PARSER)
14 #include "armnnOnnxParser/IOnnxParser.hpp"
17 #include <boost/exception/exception.hpp>
18 #include <boost/exception/diagnostic_information.hpp>
19 #include <boost/log/trivial.hpp>
20 #include <boost/format.hpp>
21 #include <boost/program_options.hpp>
22 #include <boost/filesystem.hpp>
27 #include <type_traits>
29 namespace InferenceModelInternal
31 // This needs to go when the armnnCaffeParser, armnnTfParser and armnnTfLiteParser
32 // definitions of BindingPointInfo gets consolidated.
33 using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>;
35 using QuantizationParams = std::pair<float,int32_t>;
39 std::string m_ModelPath;
40 std::string m_InputBinding;
41 std::string m_OutputBinding;
42 const armnn::TensorShape* m_InputTensorShape;
43 std::vector<armnn::Compute> m_ComputeDevice;
44 bool m_EnableProfiling;
47 bool m_VisualizePostOptimizationModel;
48 bool m_EnableFp16TurboMode;
51 : m_InputTensorShape(nullptr)
52 , m_ComputeDevice{armnn::Compute::CpuRef}
53 , m_EnableProfiling(false)
55 , m_IsModelBinary(true)
56 , m_VisualizePostOptimizationModel(false)
57 , m_EnableFp16TurboMode(false)
61 } // namespace InferenceModelInternal
63 template <typename IParser>
64 struct CreateNetworkImpl
67 using Params = InferenceModelInternal::Params;
68 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
70 static armnn::INetworkPtr Create(const Params& params,
71 BindingPointInfo& inputBindings,
72 BindingPointInfo& outputBindings)
74 const std::string& modelPath = params.m_ModelPath;
76 // Create a network from a file on disk
77 auto parser(IParser::Create());
79 std::map<std::string, armnn::TensorShape> inputShapes;
80 if (params.m_InputTensorShape)
82 inputShapes[params.m_InputBinding] = *params.m_InputTensorShape;
84 std::vector<std::string> requestedOutputs{ params.m_OutputBinding };
85 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
88 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
89 // Handle text and binary input differently by calling the corresponding parser function
90 network = (params.m_IsModelBinary ?
91 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
92 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
95 inputBindings = parser->GetNetworkInputBindingInfo(params.m_InputBinding);
96 outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding);
101 #if defined(ARMNN_TF_LITE_PARSER)
103 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
106 using IParser = armnnTfLiteParser::ITfLiteParser;
107 using Params = InferenceModelInternal::Params;
108 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
110 static armnn::INetworkPtr Create(const Params& params,
111 BindingPointInfo& inputBindings,
112 BindingPointInfo& outputBindings)
114 const std::string& modelPath = params.m_ModelPath;
116 // Create a network from a file on disk
117 auto parser(IParser::Create());
119 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
122 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
123 network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
126 inputBindings = parser->GetNetworkInputBindingInfo(params.m_SubgraphId, params.m_InputBinding);
127 outputBindings = parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, params.m_OutputBinding);
133 #if defined(ARMNN_ONNX_PARSER)
135 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
138 using IParser = armnnOnnxParser::IOnnxParser;
139 using Params = InferenceModelInternal::Params;
140 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
142 static armnn::INetworkPtr Create(const Params& params,
143 BindingPointInfo& inputBindings,
144 BindingPointInfo& outputBindings)
146 const std::string& modelPath = params.m_ModelPath;
148 // Create a network from a file on disk
149 auto parser(IParser::Create());
151 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
154 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
155 network = (params.m_IsModelBinary ?
156 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
157 parser->CreateNetworkFromTextFile(modelPath.c_str()));
160 inputBindings = parser->GetNetworkInputBindingInfo(params.m_InputBinding);
161 outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding);
167 template<typename TContainer>
168 inline armnn::InputTensors MakeInputTensors(const InferenceModelInternal::BindingPointInfo& input,
169 const TContainer& inputTensorData)
171 if (inputTensorData.size() != input.second.GetNumElements())
175 throw armnn::Exception(boost::str(boost::format("Input tensor has incorrect size. Expected %1% elements "
176 "but got %2%.") % input.second.GetNumElements() % inputTensorData.size()));
177 } catch (const boost::exception& e)
179 // Coverity fix: it should not be possible to get here but boost::str and boost::format can both
180 // throw uncaught exceptions, convert them to armnn exceptions and rethrow.
181 throw armnn::Exception(diagnostic_information(e));
184 return { { input.first, armnn::ConstTensor(input.second, inputTensorData.data()) } };
187 template<typename TContainer>
188 inline armnn::OutputTensors MakeOutputTensors(const InferenceModelInternal::BindingPointInfo& output,
189 TContainer& outputTensorData)
191 if (outputTensorData.size() != output.second.GetNumElements())
193 throw armnn::Exception("Output tensor has incorrect size");
195 return { { output.first, armnn::Tensor(output.second, outputTensorData.data()) } };
200 template <typename IParser, typename TDataType>
204 using DataType = TDataType;
205 using Params = InferenceModelInternal::Params;
207 struct CommandLineOptions
209 std::string m_ModelDir;
210 std::vector<armnn::Compute> m_ComputeDevice;
211 bool m_VisualizePostOptimizationModel;
212 bool m_EnableFp16TurboMode;
215 static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
217 namespace po = boost::program_options;
220 ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
221 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
222 ("compute,c", po::value<std::vector<armnn::Compute>>(&options.m_ComputeDevice)->default_value
223 ({armnn::Compute::CpuAcc, armnn::Compute::CpuRef}),
224 "Which device to run layers on by default. Possible choices: CpuAcc, CpuRef, GpuAcc")
225 ("visualize-optimized-model,v",
226 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
227 "Produce a dot file useful for visualizing the graph post optimization."
228 "The file will have the same name as the model with the .dot extention.")
229 ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
230 "If this option is enabled FP32 layers, weights and biases will be converted "
231 "to FP16 where the backend supports it.");
234 InferenceModel(const Params& params, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
235 : m_EnableProfiling(params.m_EnableProfiling)
243 armnn::IRuntime::CreationOptions options;
244 m_Runtime = std::move(armnn::IRuntime::Create(options));
247 armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindingInfo,
248 m_OutputBindingInfo);
250 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
252 ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
254 armnn::OptimizerOptions options;
255 options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
257 optNet = armnn::Optimize(*network, params.m_ComputeDevice, m_Runtime->GetDeviceSpec(), options);
260 throw armnn::Exception("Optimize returned nullptr");
264 if (params.m_VisualizePostOptimizationModel)
266 boost::filesystem::path filename = params.m_ModelPath;
267 filename.replace_extension("dot");
268 std::fstream file(filename.c_str(),file.out);
269 optNet->SerializeToDot(file);
274 ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
275 ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
278 if (ret == armnn::Status::Failure)
280 throw armnn::Exception("IRuntime::LoadNetwork failed");
284 unsigned int GetOutputSize() const
286 return m_OutputBindingInfo.second.GetNumElements();
289 void Run(const std::vector<TDataType>& input, std::vector<TDataType>& output)
291 BOOST_ASSERT(output.size() == GetOutputSize());
293 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
296 profiler->EnableProfiling(m_EnableProfiling);
299 armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
300 MakeInputTensors(input),
301 MakeOutputTensors(output));
303 // if profiling is enabled print out the results
304 if (profiler && profiler->IsProfilingEnabled())
306 profiler->Print(std::cout);
309 if (ret == armnn::Status::Failure)
311 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
315 const InferenceModelInternal::BindingPointInfo & GetInputBindingInfo() const
317 return m_InputBindingInfo;
320 const InferenceModelInternal::BindingPointInfo & GetOutputBindingInfo() const
322 return m_OutputBindingInfo;
325 InferenceModelInternal::QuantizationParams GetQuantizationParams() const
327 return std::make_pair(m_OutputBindingInfo.second.GetQuantizationScale(),
328 m_OutputBindingInfo.second.GetQuantizationOffset());
332 armnn::NetworkId m_NetworkIdentifier;
333 std::shared_ptr<armnn::IRuntime> m_Runtime;
335 InferenceModelInternal::BindingPointInfo m_InputBindingInfo;
336 InferenceModelInternal::BindingPointInfo m_OutputBindingInfo;
337 bool m_EnableProfiling;
339 template<typename TContainer>
340 armnn::InputTensors MakeInputTensors(const TContainer& inputTensorData)
342 return ::MakeInputTensors(m_InputBindingInfo, inputTensorData);
345 template<typename TContainer>
346 armnn::OutputTensors MakeOutputTensors(TContainer& outputTensorData)
348 return ::MakeOutputTensors(m_OutputBindingInfo, outputTensorData);