inference-engine/samples/object_detection_sample_ssd/main.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include <gflags/gflags.h>
   6 #include <functional>
   7 #include <iostream>
   8 #include <fstream>
   9 #include <random>
  10 #include <string>
  11 #include <memory>
  12 #include <vector>
  13 #include <time.h>
  14 #include <limits>
  15 #include <chrono>
  16 #include <algorithm>
  17
  18 #include <format_reader_ptr.h>
  19 #include <inference_engine.hpp>
  20 #include <ext_list.hpp>
  21
  22 #include <samples/common.hpp>
  23 #include <samples/slog.hpp>
  24 #include <samples/args_helper.hpp>
  25 #include "object_detection_sample_ssd.h"
  26
  27 using namespace InferenceEngine;
  28
  29 ConsoleErrorListener error_listener;
  30
  31 bool ParseAndCheckCommandLine(int argc, char *argv[]) {
  32     // ---------------------------Parsing and validation of input args--------------------------------------
  33     gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
  34     if (FLAGS_h) {
  35         showUsage();
  36         return false;
  37     }
  38
  39     slog::info << "Parsing input parameters" << slog::endl;
  40
  41     if (FLAGS_ni < 1) {
  42         throw std::logic_error("Parameter -ni should be greater than 0 (default: 1)");
  43     }
  44
  45     if (FLAGS_i.empty()) {
  46         throw std::logic_error("Parameter -i is not set");
  47     }
  48
  49     if (FLAGS_m.empty()) {
  50         throw std::logic_error("Parameter -m is not set");
  51     }
  52
  53     return true;
  54 }
  55
  56 /**
  57 * \brief The entry point for the Inference Engine object_detection sample application
  58 * \file object_detection_sample_ssd/main.cpp
  59 * \example object_detection_sample_ssd/main.cpp
  60 */
  61 int main(int argc, char *argv[]) {
  62     try {
  63         /** This sample covers certain topology and cannot be generalized for any object detection one **/
  64         slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << "\n";
  65
  66         // --------------------------- 1. Parsing and validation of input args ---------------------------------
  67         if (!ParseAndCheckCommandLine(argc, argv)) {
  68             return 0;
  69         }
  70         // -----------------------------------------------------------------------------------------------------
  71
  72         // --------------------------- 2. Read input -----------------------------------------------------------
  73         /** This vector stores paths to the processed images **/
  74         std::vector<std::string> images;
  75         parseInputFilesArguments(images);
  76         if (images.empty()) throw std::logic_error("No suitable images were found");
  77         // -----------------------------------------------------------------------------------------------------
  78
  79         // --------------------------- 3. Load Plugin for inference engine -------------------------------------
  80         slog::info << "Loading plugin" << slog::endl;
  81         InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
  82         if (FLAGS_p_msg) {
  83             static_cast<InferenceEngine::InferenceEnginePluginPtr>(plugin)->SetLogCallback(error_listener);
  84         }
  85
  86         /*If CPU device, load default library with extensions that comes with the product*/
  87         if (FLAGS_d.find("CPU") != std::string::npos) {
  88             /**
  89             * cpu_extensions library is compiled from "extension" folder containing
  90             * custom MKLDNNPlugin layer implementations. These layers are not supported
  91             * by mkldnn, but they can be useful for inferring custom topologies.
  92             **/
  93             plugin.AddExtension(std::make_shared<Extensions::Cpu::CpuExtensions>());
  94         }
  95
  96         if (!FLAGS_l.empty()) {
  97             // CPU(MKLDNN) extensions are loaded as a shared library and passed as a pointer to base extension
  98             IExtensionPtr extension_ptr = make_so_pointer<IExtension>(FLAGS_l);
  99             plugin.AddExtension(extension_ptr);
 100             slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl;
 101         }
 102
 103         if (!FLAGS_c.empty()) {
 104             // clDNN Extensions are loaded from an .xml description and OpenCL kernel files
 105             plugin.SetConfig({ { PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c } });
 106             slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl;
 107         }
 108
 109         /** Setting plugin parameter for per layer metrics **/
 110         if (FLAGS_pc) {
 111             plugin.SetConfig({ { PluginConfigParams::KEY_PERF_COUNT, PluginConfigParams::YES } });
 112         }
 113
 114         /** Printing plugin version **/
 115         printPluginVersion(plugin, std::cout);
 116         // -----------------------------------------------------------------------------------------------------
 117
 118         // --------------------------- 4. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
 119         std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin";
 120         slog::info << "Loading network files:"
 121             "\n\t" << FLAGS_m <<
 122             "\n\t" << binFileName <<
 123             slog::endl;
 124
 125         CNNNetReader networkReader;
 126         /** Read network model **/
 127         networkReader.ReadNetwork(FLAGS_m);
 128
 129         /** Extract model name and load weights **/
 130         networkReader.ReadWeights(binFileName);
 131         CNNNetwork network = networkReader.getNetwork();
 132         // -----------------------------------------------------------------------------------------------------
 133
 134         // --------------------------- 5. Prepare input blobs --------------------------------------------------
 135         slog::info << "Preparing input blobs" << slog::endl;
 136
 137         /** Taking information about all topology inputs **/
 138         InputsDataMap inputsInfo(network.getInputsInfo());
 139
 140         /** SSD network has one input and one output **/
 141         if (inputsInfo.size() != 1 && inputsInfo.size() != 2) throw std::logic_error("Sample supports topologies only with 1 or 2 inputs");
 142
 143         /**
 144          * Some networks have SSD-like output format (ending with DetectionOutput layer), but
 145          * having 2 inputs as Faster-RCNN: one for image and one for "image info".
 146          *
 147          * Although object_datection_sample_ssd's main task is to support clean SSD, it could score
 148          * the networks with two inputs as well. For such networks imInfoInputName will contain the "second" input name.
 149          */
 150         std::string imageInputName, imInfoInputName;
 151
 152         InputInfo::Ptr inputInfo = nullptr;
 153
 154         SizeVector inputImageDims;
 155         /** Stores input image **/
 156
 157         /** Iterating over all input blobs **/
 158         for (auto & item : inputsInfo) {
 159             /** Working with first input tensor that stores image **/
 160             if (item.second->getInputData()->getTensorDesc().getDims().size() == 4) {
 161                 imageInputName = item.first;
 162
 163                 inputInfo = item.second;
 164
 165                 slog::info << "Batch size is " << std::to_string(networkReader.getNetwork().getBatchSize()) << slog::endl;
 166
 167                 /** Creating first input blob **/
 168                 Precision inputPrecision = Precision::U8;
 169                 item.second->setPrecision(inputPrecision);
 170             } else if (item.second->getInputData()->getTensorDesc().getDims().size() == 2) {
 171                 imInfoInputName = item.first;
 172
 173                 Precision inputPrecision = Precision::FP32;
 174                 item.second->setPrecision(inputPrecision);
 175                 if ((item.second->getTensorDesc().getDims()[1] != 3 && item.second->getTensorDesc().getDims()[1] != 6)) {
 176                     throw std::logic_error("Invalid input info. Should be 3 or 6 values length");
 177                 }
 178             }
 179         }
 180
 181         if (inputInfo == nullptr) {
 182             inputInfo = inputsInfo.begin()->second;
 183         }
 184         // -----------------------------------------------------------------------------------------------------
 185
 186         // --------------------------- 6. Prepare output blobs -------------------------------------------------
 187         slog::info << "Preparing output blobs" << slog::endl;
 188
 189         OutputsDataMap outputsInfo(network.getOutputsInfo());
 190
 191         std::string outputName;
 192         DataPtr outputInfo;
 193         for (const auto& out : outputsInfo) {
 194             if (out.second->creatorLayer.lock()->type == "DetectionOutput") {
 195                 outputName = out.first;
 196                 outputInfo = out.second;
 197             }
 198         }
 199
 200         if (outputInfo == nullptr) {
 201             throw std::logic_error("Can't find a DetectionOutput layer in the topology");
 202         }
 203
 204         const SizeVector outputDims = outputInfo->getTensorDesc().getDims();
 205
 206         const int maxProposalCount = outputDims[2];
 207         const int objectSize = outputDims[3];
 208
 209         if (objectSize != 7) {
 210             throw std::logic_error("Output item should have 7 as a last dimension");
 211         }
 212
 213         if (outputDims.size() != 4) {
 214             throw std::logic_error("Incorrect output dimensions for SSD model");
 215         }
 216
 217         /** Set the precision of output data provided by the user, should be called before load of the network to the plugin **/
 218         outputInfo->setPrecision(Precision::FP32);
 219         // -----------------------------------------------------------------------------------------------------
 220
 221         // --------------------------- 7. Loading model to the plugin ------------------------------------------
 222         slog::info << "Loading model to the plugin" << slog::endl;
 223
 224         ExecutableNetwork executable_network = plugin.LoadNetwork(network, {});
 225         // -----------------------------------------------------------------------------------------------------
 226
 227         // --------------------------- 8. Create infer request -------------------------------------------------
 228         InferRequest infer_request = executable_network.CreateInferRequest();
 229         // -----------------------------------------------------------------------------------------------------
 230
 231         // --------------------------- 9. Prepare input --------------------------------------------------------
 232         /** Collect images data ptrs **/
 233         std::vector<std::shared_ptr<unsigned char>> imagesData, originalImagesData;
 234         std::vector<size_t> imageWidths, imageHeights;
 235         for (auto & i : images) {
 236             FormatReader::ReaderPtr reader(i.c_str());
 237             if (reader.get() == nullptr) {
 238                 slog::warn << "Image " + i + " cannot be read!" << slog::endl;
 239                 continue;
 240             }
 241             /** Store image data **/
 242             std::shared_ptr<unsigned char> originalData(reader->getData());
 243             std::shared_ptr<unsigned char> data(reader->getData(inputInfo->getTensorDesc().getDims()[3], inputInfo->getTensorDesc().getDims()[2]));
 244             if (data.get() != nullptr) {
 245                 originalImagesData.push_back(originalData);
 246                 imagesData.push_back(data);
 247                 imageWidths.push_back(reader->width());
 248                 imageHeights.push_back(reader->height());
 249             }
 250         }
 251         if (imagesData.empty()) throw std::logic_error("Valid input images were not found!");
 252
 253         size_t batchSize = network.getBatchSize();
 254         slog::info << "Batch size is " << std::to_string(batchSize) << slog::endl;
 255         if (batchSize != imagesData.size()) {
 256             slog::warn << "Number of images " + std::to_string(imagesData.size()) + \
 257                 " doesn't match batch size " + std::to_string(batchSize) << slog::endl;
 258             batchSize = std::min(batchSize, imagesData.size());
 259             slog::warn << "Number of images to be processed is "<< std::to_string(batchSize) << slog::endl;
 260         }
 261
 262         /** Creating input blob **/
 263         Blob::Ptr imageInput = infer_request.GetBlob(imageInputName);
 264
 265         /** Filling input tensor with images. First b channel, then g and r channels **/
 266         size_t num_channels = imageInput->getTensorDesc().getDims()[1];
 267         size_t image_size = imageInput->getTensorDesc().getDims()[3] * imageInput->getTensorDesc().getDims()[2];
 268
 269         unsigned char* data = static_cast<unsigned char*>(imageInput->buffer());
 270
 271         /** Iterate over all input images **/
 272         for (size_t image_id = 0; image_id < std::min(imagesData.size(), batchSize); ++image_id) {
 273             /** Iterate over all pixel in image (b,g,r) **/
 274             for (size_t pid = 0; pid < image_size; pid++) {
 275                 /** Iterate over all channels **/
 276                 for (size_t ch = 0; ch < num_channels; ++ch) {
 277                     /**          [images stride + channels stride + pixel id ] all in bytes            **/
 278                     data[image_id * image_size * num_channels + ch * image_size + pid] = imagesData.at(image_id).get()[pid*num_channels + ch];
 279                 }
 280             }
 281         }
 282
 283         if (imInfoInputName != "") {
 284             Blob::Ptr input2 = infer_request.GetBlob(imInfoInputName);
 285             auto imInfoDim = inputsInfo.find(imInfoInputName)->second->getTensorDesc().getDims()[1];
 286
 287             /** Fill input tensor with values **/
 288             float *p = input2->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
 289
 290             for (size_t image_id = 0; image_id < std::min(imagesData.size(), batchSize); ++image_id) {
 291                 p[image_id * imInfoDim + 0] = static_cast<float>(inputsInfo[imageInputName]->getTensorDesc().getDims()[2]);
 292                 p[image_id * imInfoDim + 1] = static_cast<float>(inputsInfo[imageInputName]->getTensorDesc().getDims()[3]);
 293                 for (size_t k = 2; k < imInfoDim; k++) {
 294                     p[image_id * imInfoDim + k] = 1.0f;  // all scale factors are set to 1.0
 295                 }
 296             }
 297         }
 298         // -----------------------------------------------------------------------------------------------------
 299
 300         // --------------------------- 10. Do inference ---------------------------------------------------------
 301         slog::info << "Start inference (" << FLAGS_ni << " iterations)" << slog::endl;
 302
 303         typedef std::chrono::high_resolution_clock Time;
 304         typedef std::chrono::duration<double, std::ratio<1, 1000>> ms;
 305         typedef std::chrono::duration<float> fsec;
 306
 307         double total = 0.0;
 308         /** Start inference & calc performance **/
 309         for (size_t iter = 0; iter < FLAGS_ni; ++iter) {
 310             auto t0 = Time::now();
 311             infer_request.Infer();
 312             auto t1 = Time::now();
 313             fsec fs = t1 - t0;
 314             ms d = std::chrono::duration_cast<ms>(fs);
 315             total += d.count();
 316         }
 317         // -----------------------------------------------------------------------------------------------------
 318
 319         // --------------------------- 11. Process output -------------------------------------------------------
 320         slog::info << "Processing output blobs" << slog::endl;
 321
 322         const Blob::Ptr output_blob = infer_request.GetBlob(outputName);
 323         const float* detection = static_cast<PrecisionTrait<Precision::FP32>::value_type*>(output_blob->buffer());
 324
 325         std::vector<std::vector<int> > boxes(batchSize);
 326         std::vector<std::vector<int> > classes(batchSize);
 327
 328         /* Each detection has image_id that denotes processed image */
 329         for (int curProposal = 0; curProposal < maxProposalCount; curProposal++) {
 330             auto image_id = static_cast<int>(detection[curProposal * objectSize + 0]);
 331             if (image_id < 0) {
 332                 break;
 333             }
 334
 335             float confidence = detection[curProposal * objectSize + 2];
 336             auto label = static_cast<int>(detection[curProposal * objectSize + 1]);
 337             auto xmin = static_cast<int>(detection[curProposal * objectSize + 3] * imageWidths[image_id]);
 338             auto ymin = static_cast<int>(detection[curProposal * objectSize + 4] * imageHeights[image_id]);
 339             auto xmax = static_cast<int>(detection[curProposal * objectSize + 5] * imageWidths[image_id]);
 340             auto ymax = static_cast<int>(detection[curProposal * objectSize + 6] * imageHeights[image_id]);
 341
 342             std::cout << "[" << curProposal << "," << label << "] element, prob = " << confidence <<
 343                 "    (" << xmin << "," << ymin << ")-(" << xmax << "," << ymax << ")" << " batch id : " << image_id;
 344
 345             if (confidence > 0.5) {
 346                 /** Drawing only objects with >50% probability **/
 347                 classes[image_id].push_back(label);
 348                 boxes[image_id].push_back(xmin);
 349                 boxes[image_id].push_back(ymin);
 350                 boxes[image_id].push_back(xmax - xmin);
 351                 boxes[image_id].push_back(ymax - ymin);
 352                 std::cout << " WILL BE PRINTED!";
 353             }
 354             std::cout << std::endl;
 355         }
 356
 357         for (size_t batch_id = 0; batch_id < batchSize; ++batch_id) {
 358             addRectangles(originalImagesData[batch_id].get(), imageHeights[batch_id], imageWidths[batch_id], boxes[batch_id], classes[batch_id],
 359                           BBOX_THICKNESS);
 360             const std::string image_path = "out_" + std::to_string(batch_id) + ".bmp";
 361             if (writeOutputBmp(image_path, originalImagesData[batch_id].get(), imageHeights[batch_id], imageWidths[batch_id])) {
 362                 slog::info << "Image " + image_path + " created!" << slog::endl;
 363             } else {
 364                 throw std::logic_error(std::string("Can't create a file: ") + image_path);
 365             }
 366         }
 367         // -----------------------------------------------------------------------------------------------------
 368         std::cout << std::endl << "total inference time: " << total << std::endl;
 369         std::cout << "Average running time of one iteration: " << total / static_cast<double>(FLAGS_ni) << " ms" << std::endl;
 370         std::cout << std::endl << "Throughput: " << 1000 * static_cast<double>(FLAGS_ni) * batchSize / total << " FPS" << std::endl;
 371         std::cout << std::endl;
 372
 373         /** Show performance results **/
 374         if (FLAGS_pc) {
 375             printPerformanceCounts(infer_request, std::cout);
 376         }
 377     }
 378     catch (const std::exception& error) {
 379         slog::err << error.what() << slog::endl;
 380         return 1;
 381     }
 382     catch (...) {
 383         slog::err << "Unknown/internal exception happened." << slog::endl;
 384         return 1;
 385     }
 386
 387     slog::info << "Execution successful" << slog::endl;
 388     return 0;
 389 }