1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
6 * @brief The entry point the Inference Engine sample application
7 * @file classification_sample/main.cpp
8 * @example classification_sample/main.cpp
19 #include <inference_engine.hpp>
21 #include <format_reader_ptr.h>
23 #include <samples/common.hpp>
24 #include <samples/slog.hpp>
25 #include <samples/args_helper.hpp>
26 #include <samples/classification_results.h>
29 #include <ext_list.hpp>
31 #include "classification_sample_async.h"
33 using namespace InferenceEngine;
35 ConsoleErrorListener error_listener;
37 bool ParseAndCheckCommandLine(int argc, char *argv[]) {
38 // ---------------------------Parsing and validation of input args--------------------------------------
39 slog::info << "Parsing input parameters" << slog::endl;
41 gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
46 slog::info << "Parsing input parameters" << slog::endl;
49 throw std::logic_error("Parameter -ni must be more than 0 ! (default 1)");
52 if (FLAGS_nireq < 1) {
53 throw std::logic_error("Parameter -nireq must be more than 0 ! (default 1)");
56 if (FLAGS_i.empty()) {
57 throw std::logic_error("Parameter -i is not set");
60 if (FLAGS_m.empty()) {
61 throw std::logic_error("Parameter -m is not set");
64 if (FLAGS_ni < FLAGS_nireq) {
65 throw std::logic_error("Number of iterations could not be less than requests quantity");
71 int main(int argc, char *argv[]) {
73 slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl;
75 // ------------------------------ Parsing and validation of input args ---------------------------------
76 if (!ParseAndCheckCommandLine(argc, argv)) {
80 /** This vector stores paths to the processed images **/
81 std::vector<std::string> imageNames;
82 parseInputFilesArguments(imageNames);
83 if (imageNames.empty()) throw std::logic_error("No suitable images were found");
84 // -----------------------------------------------------------------------------------------------------
86 // --------------------------- 1. Load Plugin for inference engine -------------------------------------
87 slog::info << "Loading plugin" << slog::endl;
88 InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
90 static_cast<InferenceEngine::InferenceEnginePluginPtr>(plugin)->SetLogCallback(error_listener);
93 /** Loading default extensions **/
94 if (FLAGS_d.find("CPU") != std::string::npos) {
96 * cpu_extensions library is compiled from "extension" folder containing
97 * custom MKLDNNPlugin layer implementations. These layers are not supported
98 * by mkldnn, but they can be useful for inferring custom topologies.
100 plugin.AddExtension(std::make_shared<Extensions::Cpu::CpuExtensions>());
103 if (!FLAGS_l.empty()) {
104 // CPU(MKLDNN) extensions are loaded as a shared library and passed as a pointer to base extension
105 IExtensionPtr extension_ptr = make_so_pointer<IExtension>(FLAGS_l);
106 plugin.AddExtension(extension_ptr);
107 slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl;
109 if (!FLAGS_c.empty()) {
110 // clDNN Extensions are loaded from an .xml description and OpenCL kernel files
111 plugin.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}});
112 slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl;
116 /** Printing plugin version **/
117 printPluginVersion(plugin, std::cout);
118 // -----------------------------------------------------------------------------------------------------
120 // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
121 slog::info << "Loading network files" << slog::endl;
123 CNNNetReader networkReader;
124 /** Read network model **/
125 networkReader.ReadNetwork(FLAGS_m);
127 /** Extract model name and load weights **/
128 std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin";
129 networkReader.ReadWeights(binFileName);
131 CNNNetwork network = networkReader.getNetwork();
132 // -----------------------------------------------------------------------------------------------------
134 // --------------------------- 3. Configure input & output ---------------------------------------------
136 // --------------------------- Prepare input blobs -----------------------------------------------------
137 slog::info << "Preparing input blobs" << slog::endl;
139 /** Taking information about all topology inputs **/
140 InputsDataMap inputInfo(network.getInputsInfo());
141 if (inputInfo.size() != 1) throw std::logic_error("Sample supports topologies only with 1 input");
143 auto inputInfoItem = *inputInfo.begin();
145 /** Specifying the precision and layout of input data provided by the user.
146 * This should be called before load of the network to the plugin **/
147 inputInfoItem.second->setPrecision(Precision::U8);
148 inputInfoItem.second->setLayout(Layout::NCHW);
150 std::vector<std::shared_ptr<unsigned char>> imagesData;
151 for (auto & i : imageNames) {
152 FormatReader::ReaderPtr reader(i.c_str());
153 if (reader.get() == nullptr) {
154 slog::warn << "Image " + i + " cannot be read!" << slog::endl;
157 /** Store image data **/
158 std::shared_ptr<unsigned char> data(
159 reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3],
160 inputInfoItem.second->getTensorDesc().getDims()[2]));
161 if (data.get() != nullptr) {
162 imagesData.push_back(data);
165 if (imagesData.empty()) throw std::logic_error("Valid input images were not found!");
167 /** Setting batch size using image count **/
168 network.setBatchSize(imagesData.size());
169 size_t batchSize = network.getBatchSize();
170 slog::info << "Batch size is " << std::to_string(batchSize) << slog::endl;
172 // ------------------------------ Prepare output blobs -------------------------------------------------
173 slog::info << "Preparing output blobs" << slog::endl;
175 OutputsDataMap outputInfo(network.getOutputsInfo());
176 std::vector <Blob::Ptr> outputBlobs;
177 for (size_t i = 0; i < FLAGS_nireq; i++) {
178 auto outputBlob = make_shared_blob<PrecisionTrait<Precision::FP32>::value_type>(outputInfo.begin()->second->getTensorDesc());
179 outputBlob->allocate();
180 outputBlobs.push_back(outputBlob);
182 // -----------------------------------------------------------------------------------------------------
184 // --------------------------- 4. Loading model to the plugin ------------------------------------------
185 slog::info << "Loading model to the plugin" << slog::endl;
187 std::map<std::string, std::string> config;
189 config[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
190 if (FLAGS_d.find("CPU") != std::string::npos) { // CPU supports few special performance-oriented keys
191 // limit threading for CPU portion of inference
192 config[PluginConfigParams::KEY_CPU_THREADS_NUM] = std::to_string(FLAGS_nthreads);
193 // pin threads for CPU portion of inference
194 config[PluginConfigParams::KEY_CPU_BIND_THREAD] = FLAGS_pin;
195 // for pure CPU execution, more throughput-oriented execution via streams
196 if (FLAGS_d == "CPU")
197 config[PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS] = std::to_string(FLAGS_nireq);
199 ExecutableNetwork executable_network = plugin.LoadNetwork(network, config);
200 // -----------------------------------------------------------------------------------------------------
202 // --------------------------- 5. Create infer request -------------------------------------------------
203 std::vector<InferRequest> inferRequests;
204 for (size_t i = 0; i < FLAGS_nireq; i++) {
205 InferRequest inferRequest = executable_network.CreateInferRequest();
206 inferRequests.push_back(inferRequest);
208 // -----------------------------------------------------------------------------------------------------
210 // --------------------------- 6. Prepare input --------------------------------------------------------
212 for (auto & item : inputInfo) {
213 auto input = make_shared_blob<PrecisionTrait<Precision::U8>::value_type>(item.second->getTensorDesc());
215 inputBlobs[item.first] = input;
217 auto dims = input->getTensorDesc().getDims();
218 /** Fill input tensor with images. First b channel, then g and r channels **/
219 size_t num_channels = dims[1];
220 size_t image_size = dims[3] * dims[2];
222 /** Iterate over all input images **/
223 for (size_t image_id = 0; image_id < imagesData.size(); ++image_id) {
224 /** Iterate over all pixel in image (b,g,r) **/
225 for (size_t pid = 0; pid < image_size; pid++) {
226 /** Iterate over all channels **/
227 for (size_t ch = 0; ch < num_channels; ++ch) {
228 /** [images stride + channels stride + pixel id ] all in bytes **/
229 input->data()[image_id * image_size * num_channels + ch * image_size + pid] = imagesData.at(image_id).get()[pid*num_channels + ch];
235 for (size_t i = 0; i < FLAGS_nireq; i++) {
236 inferRequests[i].SetBlob(inputBlobs.begin()->first, inputBlobs.begin()->second);
237 inferRequests[i].SetBlob(outputInfo.begin()->first, outputBlobs[i]);
239 // -----------------------------------------------------------------------------------------------------
241 // --------------------------- 7. Do inference ---------------------------------------------------------
242 slog::info << "Start inference (" << FLAGS_ni << " iterations)" << slog::endl;
244 typedef std::chrono::high_resolution_clock Time;
245 typedef std::chrono::duration<double, std::ratio<1, 1000>> ms;
246 typedef std::chrono::duration<float> fsec;
249 inferRequests[0].StartAsync();
250 inferRequests[0].Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
252 /** Start inference & calc performance **/
253 auto t0 = Time::now();
255 size_t currentInfer = 0;
256 size_t prevInfer = (FLAGS_nireq > 1) ? 1 : 0;
258 for (size_t iter = 0; iter < FLAGS_ni + FLAGS_nireq; ++iter) {
259 if (iter < FLAGS_ni) {
260 inferRequests[currentInfer].StartAsync();
262 inferRequests[prevInfer].Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
265 if (currentInfer >= FLAGS_nireq) {
269 if (prevInfer >= FLAGS_nireq) {
273 auto t1 = Time::now();
275 ms d = std::chrono::duration_cast<ms>(fs);
277 // -----------------------------------------------------------------------------------------------------
279 // --------------------------- 8. Process output -------------------------------------------------------
280 slog::info << "Processing output blobs" << slog::endl;
282 for (size_t i = 0; i < FLAGS_nireq; i++) {
283 /** Validating -nt value **/
284 const size_t resultsCnt = outputBlobs[i]->size() / batchSize;
285 if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
286 slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \
287 << resultsCnt+1 << " and more than 0)\n will be used maximal value : " << resultsCnt << slog::endl;
288 FLAGS_nt = resultsCnt;
291 /** Read labels from file (e.x. AlexNet.labels) **/
292 std::string labelFileName = fileNameNoExt(FLAGS_m) + ".labels";
293 std::vector<std::string> labels;
295 std::ifstream inputFile;
296 inputFile.open(labelFileName, std::ios::in);
297 if (inputFile.is_open()) {
299 while (std::getline(inputFile, strLine)) {
301 labels.push_back(strLine);
305 ClassificationResult classificationResult(outputBlobs[i], imageNames,
308 classificationResult.print();
310 // -----------------------------------------------------------------------------------------------------
311 std::cout << std::endl << "total inference time: " << total << std::endl;
312 std::cout << std::endl << "Throughput: " << 1000 * static_cast<double>(FLAGS_ni) * batchSize / total << " FPS" << std::endl;
313 std::cout << std::endl;
315 /** Show performance results **/
316 std::map<std::string, InferenceEngineProfileInfo> performanceMap;
318 for (size_t nireq = 0; nireq < FLAGS_nireq; nireq++) {
319 printPerformanceCounts(inferRequests[nireq], std::cout);
323 catch (const std::exception& error) {
324 slog::err << error.what() << slog::endl;
328 slog::err << "Unknown/internal exception happened." << slog::endl;
332 slog::info << "Execution successful" << slog::endl;