Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / samples / lenet_network_graph_builder / main.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include <fstream>
6 #include <vector>
7 #include <string>
8 #include <memory>
9 #include <limits>
10
11 #include <inference_engine.hpp>
12 #include <ie_builders.hpp>
13 #include <ie_utils.hpp>
14 #include <format_reader_ptr.h>
15
16 #include <samples/common.hpp>
17 #include <samples/slog.hpp>
18 #include <samples/args_helper.hpp>
19
20 #include <gflags/gflags.h>
21 #include "lenet_network_graph_builder.hpp"
22
23 using namespace InferenceEngine;
24
25 bool ParseAndCheckCommandLine(int argc, char *argv[]) {
26     slog::info << "Parsing input parameters" << slog::endl;
27
28     gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
29     if (FLAGS_h) {
30         showUsage();
31         return false;
32     }
33
34     if (FLAGS_ni <= 0) {
35         throw std::logic_error("Incorrect value for ni argument. It should be more than 0");
36     }
37
38     if (FLAGS_nt <= 0 || FLAGS_nt > 10) {
39         throw std::logic_error("Incorrect value for nt argument. It should be more than 0 and less than 10");
40     }
41
42     return true;
43 }
44
45 void readFile(const std::string &file_name, void *buffer, size_t maxSize) {
46     std::ifstream inputFile;
47
48     inputFile.open(file_name, std::ios::binary | std::ios::in);
49     if (!inputFile.is_open()) {
50         throw std::logic_error("cannot open file weight file");
51     }
52     if (!inputFile.read(reinterpret_cast<char *>(buffer), maxSize)) {
53         inputFile.close();
54         throw std::logic_error("cannot read bytes from weight file");
55     }
56
57     inputFile.close();
58 }
59
60 TBlob<uint8_t>::CPtr ReadWeights(std::string filepath) {
61     std::ifstream weightFile(filepath, std::ifstream::ate | std::ifstream::binary);
62     int64_t fileSize = weightFile.tellg();
63
64     if (fileSize < 0) {
65         throw std::logic_error("Incorrect weight file");
66     }
67
68     size_t ulFileSize = static_cast<size_t>(fileSize);
69
70     TBlob<uint8_t>::Ptr weightsPtr(new TBlob<uint8_t>(Precision::FP32, C, {ulFileSize}));
71     weightsPtr->allocate();
72     readFile(filepath, weightsPtr->buffer(), ulFileSize);
73
74     return weightsPtr;
75 }
76
77 /**
78  * @brief The entry point for inference engine automatic squeezenet networt builder sample
79  * @file squeezenet networt builder/main.cpp
80  * @example squeezenet networt builder/main.cpp
81  */
82 int main(int argc, char *argv[]) {
83     try {
84         slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl;
85
86         if (!ParseAndCheckCommandLine(argc, argv)) {
87             return 0;
88         }
89
90         /** This vector stores paths to the processed images **/
91         std::vector<std::string> images;
92         parseInputFilesArguments(images);
93         if (images.empty()) {
94             throw std::logic_error("No suitable images were found");
95         }
96
97         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
98         slog::info << "Loading plugin" << slog::endl;
99         InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
100         printPluginVersion(plugin, std::cout);
101
102         /** Per layer metrics **/
103         if (FLAGS_pc) {
104             plugin.SetConfig({ { PluginConfigParams::KEY_PERF_COUNT, PluginConfigParams::YES } });
105         }
106         // -----------------------------------------------------------------------------------------------------
107
108         //--------------------------- 2. Create network using graph builder ------------------------------------
109         TBlob<uint8_t>::CPtr weightsPtr = ReadWeights(FLAGS_m);
110
111         Builder::Network builder("LeNet");
112         idx_t layerId = builder.addLayer(Builder::InputLayer("data").setPort(Port({1, 1, 28, 28})));
113         auto ptrWeights = make_shared_blob(TensorDesc(Precision::FP32, {500}, Layout::C),
114                 weightsPtr->cbuffer().as<float *>());
115         auto ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {20}, Layout::C),
116                 weightsPtr->cbuffer().as<float *>() + 500);
117         idx_t weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights));
118         idx_t biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases));
119         layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer("conv1")
120                   .setKernel({5, 5}).setDilation({1, 1}).setGroup(1).setStrides({1, 1}).setOutDepth(20)
121                   .setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0}));
122         layerId = builder.addLayer({{layerId}}, Builder::PoolingLayer("pool1").setExcludePad(true).setKernel({2, 2})
123                   .setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0})
124                   .setPoolingType(Builder::PoolingLayer::PoolingType::MAX)
125                   .setRoundingType(Builder::PoolingLayer::RoundingType::CEIL).setStrides({2, 2}));
126         ptrWeights = make_shared_blob(TensorDesc(Precision::FP32, {25000}, Layout::C),
127                 weightsPtr->cbuffer().as<float *>() + 520);
128         ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {50}, Layout::C),
129                 weightsPtr->cbuffer().as<float *>() + 25520);
130         weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights));
131         biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases));
132         layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer("conv2")
133                   .setDilation({1, 1}).setGroup(1).setKernel({5, 5}).setOutDepth(50).setPaddingsBegin({0, 0})
134                   .setPaddingsEnd({0, 0}).setStrides({1, 1}));
135         layerId = builder.addLayer({{layerId}}, Builder::PoolingLayer("pool2").setExcludePad(true).setKernel({2, 2})
136                   .setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0}).setPoolingType(Builder::PoolingLayer::PoolingType::MAX)
137                   .setRoundingType(Builder::PoolingLayer::RoundingType::CEIL).setStrides({2, 2}));
138         ptrWeights = make_shared_blob(TensorDesc(Precision::FP32, {400000}, Layout::C),
139                 weightsPtr->cbuffer().as<float *>() + 102280 / 4);
140         ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {500}, Layout::C),
141                 weightsPtr->cbuffer().as<float *>() + 1702280 / 4);
142         weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights));
143         biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases));
144         layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::FullyConnectedLayer("ip1")
145                 .setOutputNum(500));
146         layerId = builder.addLayer({{layerId}}, Builder::ReLULayer("relu1").setNegativeSlope(0.0f));
147         ptrWeights = make_shared_blob(TensorDesc(Precision::FP32, {5000}, Layout::C),
148                 weightsPtr->cbuffer().as<float *>() + 1704280 / 4);
149         ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {10}, Layout::C),
150                 weightsPtr->cbuffer().as<float *>() + 1724280 / 4);
151         weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights));
152         biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases));
153         layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::FullyConnectedLayer("ip2")
154                   .setOutputNum(10));
155         layerId = builder.addLayer({{layerId}}, Builder::SoftMaxLayer("prob").setAxis(1));
156         builder.addLayer({PortInfo(layerId)}, Builder::OutputLayer("sf_out"));
157
158         CNNNetwork network{Builder::convertToICNNNetwork(builder.build())};
159         // -----------------------------------------------------------------------------------------------------
160
161         // --------------------------- 3. Configure input & output ---------------------------------------------
162         // --------------------------- Prepare input blobs -----------------------------------------------------
163         slog::info << "Preparing input blobs" << slog::endl;
164
165         InputsDataMap inputInfo = network.getInputsInfo();
166         if (inputInfo.size() != 1) {
167             throw std::logic_error("Sample supports topologies only with 1 input");
168         }
169
170         auto inputInfoItem = *inputInfo.begin();
171
172         /** Specifying the precision and layout of input data provided by the user.
173          * This should be called before load of the network to the plugin **/
174         inputInfoItem.second->setPrecision(Precision::FP32);
175         inputInfoItem.second->setLayout(Layout::NCHW);
176
177         std::vector<std::shared_ptr<unsigned char>> imagesData;
178         for (auto & i : images) {
179             FormatReader::ReaderPtr reader(i.c_str());
180             if (reader.get() == nullptr) {
181                 slog::warn << "Image " + i + " cannot be read!" << slog::endl;
182                 continue;
183             }
184             /** Store image data **/
185             std::shared_ptr<unsigned char> data(
186                     reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3],
187                                     inputInfoItem.second->getTensorDesc().getDims()[2]));
188             if (data.get() != nullptr) {
189                 imagesData.push_back(data);
190             }
191         }
192
193         if (imagesData.empty()) {
194             throw std::logic_error("Valid input images were not found!");
195         }
196
197         /** Setting batch size using image count **/
198         network.setBatchSize(imagesData.size());
199         size_t batchSize = network.getBatchSize();
200         slog::info << "Batch size is " << std::to_string(batchSize) << slog::endl;
201
202         // --------------------------- Prepare output blobs -----------------------------------------------------
203         slog::info << "Checking that the outputs are as the demo expects" << slog::endl;
204         OutputsDataMap outputInfo(network.getOutputsInfo());
205         std::string firstOutputName;
206
207         for (auto & item : outputInfo) {
208             if (firstOutputName.empty()) {
209                 firstOutputName = item.first;
210             }
211             DataPtr outputData = item.second;
212             if (!outputData) {
213                 throw std::logic_error("output data pointer is not valid");
214             }
215
216             item.second->setPrecision(Precision::FP32);
217         }
218
219         if (outputInfo.size() != 1) {
220             throw std::logic_error("This demo accepts networks having only one output");
221         }
222
223         DataPtr& output = outputInfo.begin()->second;
224         auto outputName = outputInfo.begin()->first;
225
226         const SizeVector outputDims = output->getTensorDesc().getDims();
227         const int classCount = outputDims[1];
228
229         if (classCount > 10) {
230             throw std::logic_error("Incorrect number of output classes for LeNet network");
231         }
232
233         if (outputDims.size() != 2) {
234             throw std::logic_error("Incorrect output dimensions for LeNet");
235         }
236         output->setPrecision(Precision::FP32);
237         output->setLayout(Layout::NC);
238
239         // -----------------------------------------------------------------------------------------------------
240
241         // --------------------------- 4. Loading model to the plugin ------------------------------------------
242         slog::info << "Loading model to the plugin" << slog::endl;
243         ExecutableNetwork exeNetwork = plugin.LoadNetwork(network, {});
244         // -----------------------------------------------------------------------------------------------------
245
246         // --------------------------- 5. Create infer request -------------------------------------------------
247         InferRequest infer_request = exeNetwork.CreateInferRequest();
248         // -----------------------------------------------------------------------------------------------------
249
250         // --------------------------- 6. Prepare input --------------------------------------------------------
251         /** Iterate over all the input blobs **/
252         for (const auto & item : inputInfo) {
253             /** Creating input blob **/
254             Blob::Ptr input = infer_request.GetBlob(item.first);
255
256             /** Filling input tensor with images. First b channel, then g and r channels **/
257             size_t num_channels = input->getTensorDesc().getDims()[1];
258             size_t image_size = input->getTensorDesc().getDims()[2] * input->getTensorDesc().getDims()[3];
259
260             auto data = input->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
261
262             /** Iterate over all input images **/
263             for (size_t image_id = 0; image_id < imagesData.size(); ++image_id) {
264                 /** Iterate over all pixel in image (b,g,r) **/
265                 for (size_t pid = 0; pid < image_size; pid++) {
266                     /** Iterate over all channels **/
267                     for (size_t ch = 0; ch < num_channels; ++ch) {
268                         /**          [images stride + channels stride + pixel id ] all in bytes            **/
269                         data[image_id * image_size * num_channels + ch * image_size + pid ] = imagesData.at(image_id).get()[pid*num_channels + ch];
270                     }
271                 }
272             }
273         }
274         inputInfo = {};
275         // -----------------------------------------------------------------------------------------------------
276
277         // --------------------------- 7. Do inference ---------------------------------------------------------
278         typedef std::chrono::high_resolution_clock Time;
279         typedef std::chrono::duration<double, std::ratio<1, 1000>> ms;
280         typedef std::chrono::duration<float> fsec;
281
282         double total = 0.0;
283         /** Start inference & calc performance **/
284         for (size_t iter = 0; iter < FLAGS_ni; ++iter) {
285             auto t0 = Time::now();
286             infer_request.Infer();
287             auto t1 = Time::now();
288             fsec fs = t1 - t0;
289             ms d = std::chrono::duration_cast<ms>(fs);
290             total += d.count();
291         }
292         // -----------------------------------------------------------------------------------------------------
293
294         // --------------------------- 8. Process output -------------------------------------------------------
295         slog::info << "Processing output blobs" << slog::endl;
296
297         const Blob::Ptr outputBlob = infer_request.GetBlob(firstOutputName);
298         auto outputData = outputBlob->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
299
300         /** Validating -nt value **/
301         const size_t resultsCnt = outputBlob->size() / batchSize;
302         if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
303             slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \
304                       << resultsCnt+1 << " and more than 0)\n            will be used maximal value : " << resultsCnt;
305             FLAGS_nt = resultsCnt;
306         }
307
308         /** This vector stores id's of top N results **/
309         std::vector<unsigned> results;
310         TopResults(FLAGS_nt, *outputBlob, results);
311
312         std::cout << std::endl << "Top " << FLAGS_nt << " results:" << std::endl << std::endl;
313
314         /** Print the result iterating over each batch **/
315         for (size_t image_id = 0; image_id < batchSize; ++image_id) {
316             std::cout << "Image " << images[image_id] << std::endl << std::endl;
317             for (size_t id = image_id * FLAGS_nt, cnt = 0; cnt < FLAGS_nt; ++cnt, ++id) {
318                 std::cout.precision(7);
319                 /** Getting probability for resulting class **/
320                 const auto result = outputData[results[id] + image_id*(outputBlob->size() / batchSize)];
321                 std::cout << std::left << std::fixed << "Number: " << results[id] << "; Probability: " << result << std::endl;
322             }
323             std::cout << std::endl;
324         }
325         if (std::fabs(total) < std::numeric_limits<double>::epsilon()) {
326             throw std::logic_error("total can't be equal to zero");
327         }
328         // -----------------------------------------------------------------------------------------------------
329         std::cout << std::endl << "total inference time: " << total << std::endl;
330         std::cout << "Average running time of one iteration: " << total / static_cast<double>(FLAGS_ni) << " ms" << std::endl;
331         std::cout << std::endl << "Throughput: " << 1000 * static_cast<double>(FLAGS_ni) * batchSize / total << " FPS" << std::endl;
332         std::cout << std::endl;
333         // -----------------------------------------------------------------------------------------------------
334
335         /** Show performance results **/
336         if (FLAGS_pc) {
337             printPerformanceCounts(infer_request, std::cout);
338         }
339     } catch  (const std::exception &ex) {
340         slog::err << ex.what() << slog::endl;
341         return 3;
342     }
343     return 0;
344 }