inference-engine/samples/classification_sample_async/main.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 /**
   6 * @brief The entry point the Inference Engine sample application
   7 * @file classification_sample/main.cpp
   8 * @example classification_sample/main.cpp
   9 */
  10
  11 #include <fstream>
  12 #include <iomanip>
  13 #include <vector>
  14 #include <chrono>
  15 #include <memory>
  16 #include <string>
  17 #include <map>
  18
  19 #include <inference_engine.hpp>
  20
  21 #include <format_reader_ptr.h>
  22
  23 #include <samples/common.hpp>
  24 #include <samples/slog.hpp>
  25 #include <samples/args_helper.hpp>
  26 #include <samples/classification_results.h>
  27
  28 #include <sys/stat.h>
  29 #include <ext_list.hpp>
  30
  31 #include "classification_sample_async.h"
  32
  33 using namespace InferenceEngine;
  34
  35 ConsoleErrorListener error_listener;
  36
  37 bool ParseAndCheckCommandLine(int argc, char *argv[]) {
  38     // ---------------------------Parsing and validation of input args--------------------------------------
  39     slog::info << "Parsing input parameters" << slog::endl;
  40
  41     gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
  42     if (FLAGS_h) {
  43         showUsage();
  44         return false;
  45     }
  46     slog::info << "Parsing input parameters" << slog::endl;
  47
  48     if (FLAGS_ni < 1) {
  49         throw std::logic_error("Parameter -ni must be more than 0 ! (default 1)");
  50     }
  51
  52     if (FLAGS_nireq < 1) {
  53         throw std::logic_error("Parameter -nireq must be more than 0 ! (default 1)");
  54     }
  55
  56     if (FLAGS_i.empty()) {
  57         throw std::logic_error("Parameter -i is not set");
  58     }
  59
  60     if (FLAGS_m.empty()) {
  61         throw std::logic_error("Parameter -m is not set");
  62     }
  63
  64     if (FLAGS_ni < FLAGS_nireq) {
  65         throw std::logic_error("Number of iterations could not be less than requests quantity");
  66     }
  67
  68     return true;
  69 }
  70
  71 int main(int argc, char *argv[]) {
  72     try {
  73         slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl;
  74
  75         // ------------------------------ Parsing and validation of input args ---------------------------------
  76         if (!ParseAndCheckCommandLine(argc, argv)) {
  77             return 0;
  78         }
  79
  80         /** This vector stores paths to the processed images **/
  81         std::vector<std::string> imageNames;
  82         parseInputFilesArguments(imageNames);
  83         if (imageNames.empty()) throw std::logic_error("No suitable images were found");
  84         // -----------------------------------------------------------------------------------------------------
  85
  86         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
  87         slog::info << "Loading plugin" << slog::endl;
  88         InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
  89         if (FLAGS_p_msg) {
  90             static_cast<InferenceEngine::InferenceEnginePluginPtr>(plugin)->SetLogCallback(error_listener);
  91         }
  92
  93         /** Loading default extensions **/
  94         if (FLAGS_d.find("CPU") != std::string::npos) {
  95             /**
  96              * cpu_extensions library is compiled from "extension" folder containing
  97              * custom MKLDNNPlugin layer implementations. These layers are not supported
  98              * by mkldnn, but they can be useful for inferring custom topologies.
  99             **/
 100             plugin.AddExtension(std::make_shared<Extensions::Cpu::CpuExtensions>());
 101         }
 102
 103         if (!FLAGS_l.empty()) {
 104             // CPU(MKLDNN) extensions are loaded as a shared library and passed as a pointer to base extension
 105             IExtensionPtr extension_ptr = make_so_pointer<IExtension>(FLAGS_l);
 106             plugin.AddExtension(extension_ptr);
 107             slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl;
 108         }
 109         if (!FLAGS_c.empty()) {
 110             // clDNN Extensions are loaded from an .xml description and OpenCL kernel files
 111             plugin.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}});
 112             slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl;
 113         }
 114
 115         ResponseDesc resp;
 116         /** Printing plugin version **/
 117         printPluginVersion(plugin, std::cout);
 118         // -----------------------------------------------------------------------------------------------------
 119
 120         // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
 121         slog::info << "Loading network files" << slog::endl;
 122
 123         CNNNetReader networkReader;
 124         /** Read network model **/
 125         networkReader.ReadNetwork(FLAGS_m);
 126
 127         /** Extract model name and load weights **/
 128         std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin";
 129         networkReader.ReadWeights(binFileName);
 130
 131         CNNNetwork network = networkReader.getNetwork();
 132         // -----------------------------------------------------------------------------------------------------
 133
 134         // --------------------------- 3. Configure input & output ---------------------------------------------
 135
 136         // --------------------------- Prepare input blobs -----------------------------------------------------
 137         slog::info << "Preparing input blobs" << slog::endl;
 138
 139         /** Taking information about all topology inputs **/
 140         InputsDataMap inputInfo(network.getInputsInfo());
 141         if (inputInfo.size() != 1) throw std::logic_error("Sample supports topologies only with 1 input");
 142
 143         auto inputInfoItem = *inputInfo.begin();
 144
 145         /** Specifying the precision and layout of input data provided by the user.
 146          * This should be called before load of the network to the plugin **/
 147         inputInfoItem.second->setPrecision(Precision::U8);
 148         inputInfoItem.second->setLayout(Layout::NCHW);
 149
 150         std::vector<std::shared_ptr<unsigned char>> imagesData;
 151         for (auto & i : imageNames) {
 152             FormatReader::ReaderPtr reader(i.c_str());
 153             if (reader.get() == nullptr) {
 154                 slog::warn << "Image " + i + " cannot be read!" << slog::endl;
 155                 continue;
 156             }
 157             /** Store image data **/
 158             std::shared_ptr<unsigned char> data(
 159                     reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3],
 160                                     inputInfoItem.second->getTensorDesc().getDims()[2]));
 161             if (data.get() != nullptr) {
 162                 imagesData.push_back(data);
 163             }
 164         }
 165         if (imagesData.empty()) throw std::logic_error("Valid input images were not found!");
 166
 167         /** Setting batch size using image count **/
 168         network.setBatchSize(imagesData.size());
 169         size_t batchSize = network.getBatchSize();
 170         slog::info << "Batch size is " << std::to_string(batchSize) << slog::endl;
 171
 172         // ------------------------------ Prepare output blobs -------------------------------------------------
 173         slog::info << "Preparing output blobs" << slog::endl;
 174
 175         OutputsDataMap outputInfo(network.getOutputsInfo());
 176         std::vector <Blob::Ptr> outputBlobs;
 177         for (size_t i = 0; i < FLAGS_nireq; i++) {
 178             auto outputBlob = make_shared_blob<PrecisionTrait<Precision::FP32>::value_type>(outputInfo.begin()->second->getTensorDesc());
 179             outputBlob->allocate();
 180             outputBlobs.push_back(outputBlob);
 181         }
 182         // -----------------------------------------------------------------------------------------------------
 183
 184         // --------------------------- 4. Loading model to the plugin ------------------------------------------
 185         slog::info << "Loading model to the plugin" << slog::endl;
 186
 187         std::map<std::string, std::string> config;
 188         if (FLAGS_pc)
 189             config[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
 190         if (FLAGS_d.find("CPU") != std::string::npos) {  // CPU supports few special performance-oriented keys
 191             // limit threading for CPU portion of inference
 192             config[PluginConfigParams::KEY_CPU_THREADS_NUM] = std::to_string(FLAGS_nthreads);
 193             // pin threads for CPU portion of inference
 194             config[PluginConfigParams::KEY_CPU_BIND_THREAD] = FLAGS_pin;
 195             // for pure CPU execution, more throughput-oriented execution via streams
 196             if (FLAGS_d == "CPU")
 197                 config[PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS] = std::to_string(FLAGS_nireq);
 198         }
 199         ExecutableNetwork executable_network = plugin.LoadNetwork(network, config);
 200         // -----------------------------------------------------------------------------------------------------
 201
 202         // --------------------------- 5. Create infer request -------------------------------------------------
 203         std::vector<InferRequest> inferRequests;
 204         for (size_t i = 0; i < FLAGS_nireq; i++) {
 205             InferRequest inferRequest = executable_network.CreateInferRequest();
 206             inferRequests.push_back(inferRequest);
 207         }
 208         // -----------------------------------------------------------------------------------------------------
 209
 210         // --------------------------- 6. Prepare input --------------------------------------------------------
 211         BlobMap inputBlobs;
 212         for (auto & item : inputInfo) {
 213             auto input = make_shared_blob<PrecisionTrait<Precision::U8>::value_type>(item.second->getTensorDesc());
 214             input->allocate();
 215             inputBlobs[item.first] = input;
 216
 217             auto dims = input->getTensorDesc().getDims();
 218             /** Fill input tensor with images. First b channel, then g and r channels **/
 219             size_t num_channels = dims[1];
 220             size_t image_size = dims[3] * dims[2];
 221
 222             /** Iterate over all input images **/
 223             for (size_t image_id = 0; image_id < imagesData.size(); ++image_id) {
 224                 /** Iterate over all pixel in image (b,g,r) **/
 225                 for (size_t pid = 0; pid < image_size; pid++) {
 226                     /** Iterate over all channels **/
 227                     for (size_t ch = 0; ch < num_channels; ++ch) {
 228                         /**          [images stride + channels stride + pixel id ] all in bytes            **/
 229                         input->data()[image_id * image_size * num_channels + ch * image_size + pid] = imagesData.at(image_id).get()[pid*num_channels + ch];
 230                     }
 231                 }
 232             }
 233         }
 234
 235         for (size_t i = 0; i < FLAGS_nireq; i++) {
 236             inferRequests[i].SetBlob(inputBlobs.begin()->first, inputBlobs.begin()->second);
 237             inferRequests[i].SetBlob(outputInfo.begin()->first, outputBlobs[i]);
 238         }
 239         // -----------------------------------------------------------------------------------------------------
 240
 241         // --------------------------- 7. Do inference ---------------------------------------------------------
 242         slog::info << "Start inference (" << FLAGS_ni << " iterations)" << slog::endl;
 243
 244         typedef std::chrono::high_resolution_clock Time;
 245         typedef std::chrono::duration<double, std::ratio<1, 1000>> ms;
 246         typedef std::chrono::duration<float> fsec;
 247
 248         // warming up
 249         inferRequests[0].StartAsync();
 250         inferRequests[0].Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
 251         double total = 0.0;
 252         /** Start inference & calc performance **/
 253         auto t0 = Time::now();
 254
 255         size_t currentInfer = 0;
 256         size_t prevInfer = (FLAGS_nireq > 1) ? 1 : 0;
 257
 258         for (size_t iter = 0; iter < FLAGS_ni + FLAGS_nireq; ++iter) {
 259             if (iter < FLAGS_ni) {
 260                 inferRequests[currentInfer].StartAsync();
 261             }
 262             inferRequests[prevInfer].Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
 263
 264             currentInfer++;
 265             if (currentInfer >= FLAGS_nireq) {
 266                 currentInfer = 0;
 267             }
 268             prevInfer++;
 269             if (prevInfer >= FLAGS_nireq) {
 270                 prevInfer = 0;
 271             }
 272         }
 273         auto t1 = Time::now();
 274         fsec fs = t1 - t0;
 275         ms d = std::chrono::duration_cast<ms>(fs);
 276         total = d.count();
 277         // -----------------------------------------------------------------------------------------------------
 278
 279         // --------------------------- 8. Process output -------------------------------------------------------
 280         slog::info << "Processing output blobs" << slog::endl;
 281
 282         for (size_t i = 0; i < FLAGS_nireq; i++) {
 283             /** Validating -nt value **/
 284             const size_t resultsCnt = outputBlobs[i]->size() / batchSize;
 285             if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
 286                 slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \
 287                           << resultsCnt+1 << " and more than 0)\n            will be used maximal value : " << resultsCnt << slog::endl;
 288                 FLAGS_nt = resultsCnt;
 289             }
 290
 291             /** Read labels from file (e.x. AlexNet.labels) **/
 292             std::string labelFileName = fileNameNoExt(FLAGS_m) + ".labels";
 293             std::vector<std::string> labels;
 294
 295             std::ifstream inputFile;
 296             inputFile.open(labelFileName, std::ios::in);
 297             if (inputFile.is_open()) {
 298                 std::string strLine;
 299                 while (std::getline(inputFile, strLine)) {
 300                     trim(strLine);
 301                     labels.push_back(strLine);
 302                 }
 303             }
 304
 305             ClassificationResult classificationResult(outputBlobs[i], imageNames,
 306                                                       batchSize, FLAGS_nt,
 307                                                       labels);
 308             classificationResult.print();
 309         }
 310         // -----------------------------------------------------------------------------------------------------
 311         std::cout << std::endl << "total inference time: " << total << std::endl;
 312         std::cout << std::endl << "Throughput: " << 1000 * static_cast<double>(FLAGS_ni) * batchSize / total << " FPS" << std::endl;
 313         std::cout << std::endl;
 314
 315         /** Show performance results **/
 316         std::map<std::string, InferenceEngineProfileInfo> performanceMap;
 317         if (FLAGS_pc) {
 318             for (size_t nireq = 0; nireq < FLAGS_nireq; nireq++) {
 319                 printPerformanceCounts(inferRequests[nireq], std::cout);
 320             }
 321         }
 322     }
 323     catch (const std::exception& error) {
 324         slog::err << error.what() << slog::endl;
 325         return 1;
 326     }
 327     catch (...) {
 328         slog::err << "Unknown/internal exception happened." << slog::endl;
 329         return 1;
 330     }
 331
 332     slog::info << "Execution successful" << slog::endl;
 333     return 0;
 334 }