Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / samples / calibration_tool / calibrator_processors.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "calibrator_processors.h"
6 #include <string>       // std::string
7 #include <iostream>     // std::cout
8 #include <sstream>      // std::stringstream
9 #include <iomanip>
10 #include <algorithm>
11 #include <map>
12 #include <memory>
13 #include <utility>
14 #include <list>
15 #include <limits>
16 #include "details/ie_cnn_network_tools.h"
17 #include "details/caseless.hpp"
18
19 using namespace InferenceEngine;
20 using namespace InferenceEngine::details;
21
22 using InferenceEngine::details::InferenceEngineException;
23
24 CNNLayerPtr Int8Calibrator::addScaleShiftBeforeLayer(std::string name, CNNLayer::Ptr beforeLayer, size_t port, std::vector<float> scale) {
25     if (beforeLayer->insData.size() < port) {
26         THROW_IE_EXCEPTION << "cannot find appropraite port for addScaleShiftBeforeLayer";
27     }
28
29     DataPtr pData = beforeLayer->insData[port].lock();
30     LayerParams params;
31     params.name = name;
32     params.precision = Precision::FP32;
33     params.type = "ScaleShift";
34     CNNLayerPtr lptr = std::make_shared<ScaleShiftLayer>(params);
35     ScaleShiftLayer *pScaleShift = dynamic_cast<ScaleShiftLayer *>(lptr.get());
36
37     SizeVector wdims({ pData->dims[2] });
38
39     if (scale.size() == 1) {
40         scale.resize(wdims[0]);
41         for (size_t i = 1; i < wdims[0]; i++) {
42             scale[i] = scale[0];
43         }
44     }
45
46     if (scale.size() != pData->dims[2]) {
47         THROW_IE_EXCEPTION << "Failed to add scaleshift before " << beforeLayer->name << " due to scales and layer output dims incossitency";
48     }
49
50     Blob::Ptr weights = nullptr;
51     weights = make_shared_blob<float>(Precision::FP32, Layout::C, wdims);
52     weights->allocate();
53     float *buffer = weights->buffer().as<float *>();
54     if (buffer == nullptr) {
55         THROW_IE_EXCEPTION << "Could not allocate weights buffer";
56     }
57     for (size_t i = 0; i < pData->dims[2]; i++) {
58         buffer[i] = scale[i];
59     }
60     pScaleShift->_weights = weights;
61
62
63     SizeVector bdims({ pData->dims[2] });
64     Blob::Ptr biases = nullptr;
65     biases = make_shared_blob<float>(Precision::FP32, Layout::C, bdims);
66     biases->allocate();
67     buffer = biases->buffer().as<float *>();
68     for (size_t i = 0; i < pData->dims[2]; i++) {
69         buffer[i] = 0.f;
70     }
71     pScaleShift->_biases = biases;
72
73     Data *edge2 = new Data(*pData.get());
74     DataPtr newEdge(edge2);
75     lptr->insData.push_back(pData);
76     lptr->outData.push_back(newEdge);
77     newEdge->name = /*"EdgeAfter_" +*/ params.name;
78     newEdge->creatorLayer = lptr;
79     newEdge->inputTo.clear();
80     newEdge->inputTo[beforeLayer->name] = beforeLayer;
81
82     pData->inputTo.erase(beforeLayer->name);
83     pData->inputTo[params.name] = lptr;
84
85     for (size_t i = 0; i < beforeLayer->insData.size(); i++) {
86         DataPtr d = beforeLayer->insData[i].lock();
87         if (d == pData) {
88             beforeLayer->insData[i] = newEdge;
89             break;
90         }
91     }
92     return lptr;
93 }
94
95
96 float Int8Calibrator::compare_NRMSD(InferenceEngine::Blob::Ptr res, InferenceEngine::Blob::Ptr ref) {
97     float *res_ptr = res->buffer().as<float *>();
98
99     float *ref_ptr = ref->buffer().as<float *>();
100     size_t ref_size = ref->size();
101
102     float sum = 0;
103
104     float mmin = ref_ptr[0], mmax = ref_ptr[0];
105
106     for (size_t i = 0; i < ref_size; i++) {
107         float sqr = (ref_ptr[i] - res_ptr[i]);
108         sqr *= sqr;
109         sum += sqr;
110
111         mmin = std::min(mmin, ref_ptr[i]);
112         mmax = std::max(mmax, ref_ptr[i]);
113     }
114     if (std::fabs(ref_size) < std::numeric_limits<double>::epsilon()) {
115         throw std::logic_error("ref_size can't be equal to zero");
116     }
117     sum /= ref_size;
118
119     sum = pow(sum, 0.5f);
120
121     sum /= mmax - mmin;
122
123     return sum;
124 }
125
126
127 InferenceEngine::NetworkStatsMap Int8Calibrator::getStatistic(float threshold) {
128     InferenceEngine::NetworkStatsMap netNodesStats;
129     // go over all outputs and get aggregated statistics
130     for (auto l : _statData.registeredLayers()) {
131         NetworkNodeStatsPtr nodeStats;
132         size_t channels = _statData.getNumberChannels(l);
133         if (netNodesStats.find(l) == netNodesStats.end()) {
134             nodeStats = NetworkNodeStatsPtr(new NetworkNodeStats(channels));
135
136             netNodesStats[l] = nodeStats;
137         } else {
138             nodeStats = netNodesStats[l];
139         }
140         for (size_t c = 0; c < channels; c++) {
141             _statData.getDataMinMax(l, c, nodeStats->_minOutputs[c], nodeStats->_maxOutputs[c], threshold);
142         }
143     }
144     return netNodesStats;
145 }
146
147
148 void Int8Calibrator::collectFP32Statistic() {
149     _collectByLayer = false;
150     _collectStatistic = true;
151
152     networkReaderC = InferenceEngine::CNNNetReader();
153     networkReaderC.ReadNetwork(_modelFileNameI8C);
154     if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model";
155     /** Extract model name and load weights **/
156     std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin";
157     networkReaderC.ReadWeights(binFileName.c_str());
158     if (_cBatch == 0) {
159         // Zero means "take batch value from the IR"
160         _cBatch = networkReaderC.getNetwork().getBatchSize();
161     } else {
162         // Not zero means "use the specified value"
163         auto input_shapes = networkReaderC.getNetwork().getInputShapes();
164         std::string input_name;
165         SizeVector input_shape;
166         std::tie(input_name, input_shape) = *input_shapes.begin();
167         input_shape[0] = _cBatch;
168         input_shapes[input_name] = input_shape;
169         networkReaderC.getNetwork().reshape(input_shapes);
170     }
171
172     auto network = networkReaderC.getNetwork();
173
174
175     std::vector<CNNLayerPtr> layersAfterInputs;
176
177     std::string hackPrefix = "scaleshifted_input:";
178
179     for (auto &&layer : network) {
180         if (layer->insData.size() > 0) {
181             std::string inName = layer->input()->getName();
182             for (auto &&input : network.getInputsInfo()) {
183                 if (inName == input.first) {
184                     layersAfterInputs.push_back(layer);
185                     _inputsFromLayers[hackPrefix + layer->name] = inName;
186                 }
187             }
188         }
189     }
190
191     for (auto &&layer : layersAfterInputs) {
192         std::string firstInputName = hackPrefix + layer->name;
193         auto scaleShiftLayer = addScaleShiftBeforeLayer(firstInputName, layer, 0, { 1.f });
194         ((ICNNNetwork&)network).addLayer(scaleShiftLayer);
195     }
196
197
198     // 1. add all layers as output one
199     for (auto &&layer : network) {
200         std::string layerType = network.getLayerByName(layer->name.c_str())->type;
201         if (layerType != "Const") {
202             if (/*layerType != "Split" &&*/layerType != "Input") {
203                 network.addOutput(layer->name);
204             }
205             _statData.registerLayer(layer->name);
206         }
207     }
208
209     ExecutableNetwork executable_network = _pluginI8C.LoadNetwork(network, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } });
210     _inferRequestI8C = executable_network.CreateInferRequest();
211 }
212
213 void Int8Calibrator::validateInt8Config(const InferenceEngine::NetworkStatsMap &stat,
214                                         const std::map<std::string, bool> &layersToInt8,
215                                         bool convertFullyConnected) {
216     _collectByLayer = false;
217     _collectStatistic = false;
218     networkReaderC = InferenceEngine::CNNNetReader();
219     networkReaderC.ReadNetwork(_modelFileNameI8C);
220     if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model";
221     /** Extract model name and load weights **/
222     std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin";
223     networkReaderC.ReadWeights(binFileName.c_str());
224     if (_cBatch == 0) {
225         // Zero means "take batch value from the IR"
226         _cBatch = networkReaderC.getNetwork().getBatchSize();
227     } else {
228         // Not zero means "use the specified value"
229         auto input_shapes = networkReaderC.getNetwork().getInputShapes();
230         std::string input_name;
231         SizeVector input_shape;
232         std::tie(input_name, input_shape) = *input_shapes.begin();
233         input_shape[0] = _cBatch;
234         input_shapes[input_name] = input_shape;
235         networkReaderC.getNetwork().reshape(input_shapes);
236     }
237
238     // Initialize statistic
239     ICNNNetworkStats *pstats = nullptr;
240     StatusCode s = ((ICNNNetwork&)networkReaderC.getNetwork()).getStats(&pstats, nullptr);
241     if (s == StatusCode::OK && pstats) {
242         pstats->setNodesStats(stat);
243     }
244
245     auto network = networkReaderC.getNetwork();
246
247     for (auto l : network) {
248         if (l->type == "FullyConnected") {
249             l->params["quantization_level"] = (convertFullyConnected == false) ? "FP32" : "I8";
250         }
251     }
252
253     for (auto l : layersToInt8) {
254         network.getLayerByName(l.first.c_str())->
255             params["quantization_level"] = (l.second == false) ? "FP32" : "I8";
256     }
257
258     ExecutableNetwork executable_network = _pluginI8C.LoadNetwork(network, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } });
259     _inferRequestI8C = executable_network.CreateInferRequest();
260 }
261
262 CNNNetwork Int8Calibrator::createICNNNetworkForLayer(CNNLayer::Ptr layerToClone, bool hasReLU) {
263     CNNLayer::Ptr layerRelU = layerToClone->outData[0]->inputTo.begin()->second;
264
265     InferenceEngine::CNNNetReader reader1;
266     DataPtr inputData = layerToClone->insData[0].lock();
267     std::string inputName = inputData->name;
268
269     size_t inputBatch = inputData->getTensorDesc().getDims()[0];
270     size_t inputChannels = inputData->getTensorDesc().getDims()[1];
271     size_t inputHeight = inputData->getTensorDesc().getDims()[2];
272     size_t inputWidth = inputData->getTensorDesc().getDims()[3];
273
274     DataPtr outputData = layerToClone->outData[0];
275     size_t outputBatch = outputData->getTensorDesc().getDims()[0];
276     size_t outputChannels = outputData->getTensorDesc().getDims()[1];
277     size_t outputHeight = outputData->getTensorDesc().getDims()[2];
278     size_t outputWidth = outputData->getTensorDesc().getDims()[3];
279
280     ConvolutionLayer *pConvS = dynamic_cast<ConvolutionLayer *>(layerToClone.get());
281
282     std::string model = "<net name=\"L\" version=\"2\" batch=\"1\"><layers> "\
283         "<layer name=\"" +
284         inputName +
285         "\" type=\"Input\" precision=\"FP32\" id=\"0\"> "\
286         "<output>"\
287         "<port id=\"0\">"\
288         "<dim>" + std::to_string(inputBatch) + "</dim>"\
289         "<dim>" + std::to_string(inputChannels) + "</dim>"\
290         "<dim>" + std::to_string(inputHeight) + "</dim>"\
291         "<dim>" + std::to_string(inputWidth) + "</dim>"\
292         "</port>"\
293         "</output>"\
294         "</layer>"\
295         "<layer name=\"" +
296         layerToClone->name +
297         "\" type=\"Convolution\" precision=\"FP32\" id=\"1\">"\
298         "<convolution_data stride-x=\"" + std::to_string(pConvS->_stride_x) +
299         "\" stride-y=\"" + std::to_string(pConvS->_stride_y) +
300         "\" pad-x=\"" + std::to_string(pConvS->_padding_x) +
301         "\" pad-y=\"" + std::to_string(pConvS->_padding_y) +
302         "\" kernel-x=\"" + std::to_string(pConvS->_kernel_x) +
303         "\" kernel-y=\"" + std::to_string(pConvS->_kernel_y) +
304         "\" dilation-x=\"" + std::to_string(pConvS->_dilation_x) +
305         "\" dilation-y=\"" + std::to_string(pConvS->_dilation_y) +
306         "\" output=\"" + std::to_string(pConvS->_out_depth) +
307         "\" group=\"" + std::to_string(pConvS->_group) + "\" />"\
308         "<input>"\
309         "<port id=\"1\">"\
310         "<dim>" + std::to_string(inputBatch) + "</dim>"\
311         "<dim>" + std::to_string(inputChannels) + "</dim>"\
312         "<dim>" + std::to_string(inputHeight) + "</dim>"\
313         "<dim>" + std::to_string(inputWidth) + "</dim>"\
314         "</port>"\
315         "</input>"\
316         "<output>"\
317         "<port id=\"2\">"\
318         "<dim>" + std::to_string(outputBatch) + "</dim>"\
319         "<dim>" + std::to_string(outputChannels) + "</dim>"\
320         "<dim>" + std::to_string(outputHeight) + "</dim>"\
321         "<dim>" + std::to_string(outputWidth) + "</dim>"\
322         "</port>"\
323         "</output>"\
324         "</layer>";
325     if (hasReLU) {
326         model += "<layer name=\"" +
327             layerRelU->name +
328             "\" type=\"ReLU\" precision=\"FP32\" id=\"2\">"\
329             "<input>"
330             "<port id=\"3\">"\
331             "<dim>" + std::to_string(outputBatch) + "</dim>"\
332             "<dim>" + std::to_string(outputChannels) + "</dim>"\
333             "<dim>" + std::to_string(outputHeight) + "</dim>"\
334             "<dim>" + std::to_string(outputWidth) + "</dim>"\
335             "</port>"\
336             "</input>"\
337             "<output>"\
338             "<port id=\"4\">"\
339             "<dim>" + std::to_string(outputBatch) + "</dim>"\
340             "<dim>" + std::to_string(outputChannels) + "</dim>"\
341             "<dim>" + std::to_string(outputHeight) + "</dim>"\
342             "<dim>" + std::to_string(outputWidth) + "</dim>"\
343             "</port>"\
344             "</output>"\
345             "</layer>";
346     }
347     model += "</layers> <edges>"\
348         "<edge from-layer=\"0\" from-port=\"0\" to-layer=\"1\" to-port=\"1\"/> ";
349     if (hasReLU) {
350         model += "<edge from-layer=\"1\" from-port=\"2\" to-layer=\"2\" to-port=\"3\"/> ";
351     }
352     model += "</edges></net>";
353
354     reader1.ReadNetwork(model.c_str(), model.length());
355     ICNNNetwork &n = reader1.getNetwork();
356
357     InferenceEngine::InputsDataMap inputs;
358     n.getInputsInfo(inputs);
359     CNNLayerPtr inputLayer = inputs.begin()->second->getInputData()->creatorLayer.lock();
360
361     CNNLayerPtr convLayer;
362     n.getLayerByName(layerToClone->name.c_str(), convLayer, nullptr);
363     ConvolutionLayer *pConvT = dynamic_cast<ConvolutionLayer *>(convLayer.get());
364     pConvT->_weights = pConvS->_weights;
365     pConvT->_biases = pConvS->_biases;
366     pConvT->blobs = pConvS->blobs;
367
368     return reader1.getNetwork();
369 }
370
371 void Int8Calibrator::collectByLayerStatistic(const InferenceEngine::NetworkStatsMap &stat) {
372     _collectByLayer = true;
373     _collectStatistic = false;
374     networkReaderC = InferenceEngine::CNNNetReader();
375     networkReaderC.ReadNetwork(_modelFileNameI8C);
376     if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model";
377     /** Extract model name and load weights **/
378     std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin";
379     networkReaderC.ReadWeights(binFileName.c_str());
380     if (_cBatch != 0) {
381         auto input_shapes = networkReaderC.getNetwork().getInputShapes();
382         std::string input_name;
383         SizeVector input_shape;
384         std::tie(input_name, input_shape) = *input_shapes.begin();
385         input_shape[0] = _cBatch;
386         input_shapes[input_name] = input_shape;
387         networkReaderC.getNetwork().reshape(input_shapes);
388     }
389
390     auto network = networkReaderC.getNetwork();
391     // 1. add all layers as output one
392     for (auto &&layer : network) {
393         std::string layerType = network.getLayerByName(layer->name.c_str())->type;
394         if (/*layerType != "Split" &&*/layerType != "Input" && layerType != "Const") {
395             network.addOutput(layer->name);
396         }
397
398         if (layerType == "Convolution") {
399             _layersAccuracyDrop[layer->name] = 0.f;
400         }
401     }
402
403     ExecutableNetwork executable_network = _pluginI8C.LoadNetwork(network, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } });
404     _inferRequestI8C = executable_network.CreateInferRequest();
405
406     // 2. go over all layers which affect accuracy and create network basing on it
407     for (auto l : _layersAccuracyDrop) {
408         CNNLayerPtr layerToClone = network.getLayerByName(l.first.c_str());
409         CNNLayerPtr layerRelU = nullptr;
410         // verification if there is Conv-RELU patern
411         // currently it is only supported
412
413         // if only one output from conv and if it is an output to relu
414         if (layerToClone->outData.size() == 1
415             && layerToClone->outData[0]->inputTo.size() == 1
416             && CaselessEq<std::string>()(layerToClone->outData[0]->inputTo.begin()->second->name, "relu")) {
417             layerRelU = layerToClone->outData[0]->inputTo.begin()->second;
418         }
419
420         CNNNetwork n = createICNNNetworkForLayer(layerToClone, layerRelU ? true : false);
421         if (_cBatch != 0) {
422             auto input_shapes = n.getInputShapes();
423             std::string input_name;
424             SizeVector input_shape;
425             std::tie(input_name, input_shape) = *input_shapes.begin();
426             input_shape[0] = _cBatch;
427             input_shapes[input_name] = input_shape;
428             n.reshape(input_shapes);
429         }
430
431         // Initialize statistic
432         ICNNNetworkStats *pstats = nullptr;
433         ICNNNetwork &in = n;
434         StatusCode s = in.getStats(&pstats, nullptr);
435         if (s == StatusCode::OK && pstats) {
436             pstats->setNodesStats(stat);
437         }
438
439         InferenceEngine::InputsDataMap inputs = n.getInputsInfo();
440         DataPtr q = inputs.begin()->second->getInputData();
441
442         ExecutableNetwork enetwork = _pluginI8C.LoadNetwork(n, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } });
443         _singleLayerNetworks.push_back(enetwork);
444         InferenceEngine::InferRequest request = enetwork.CreateInferRequest();
445         std::string inputName = layerToClone->insData[0].lock()->name;
446         request.SetBlob(inputName, _inferRequestI8C.GetBlob(inputName));
447         _singleLayerRequests[layerToClone->name] = { request, layerRelU ? layerRelU->name : layerToClone->name, layerToClone->name };
448     }
449 }
450
451
452 void Int8Calibrator::collectCalibrationStatistic(size_t pics) {
453     if (_collectByLayer) {
454         std::map<std::string, SingleLayerData>::iterator it = _singleLayerRequests.begin();
455         while (it != _singleLayerRequests.end()) {
456             it->second._request.Infer();
457             Blob::Ptr expected = _inferRequestI8C.GetBlob(it->second._outputName);
458             Blob::Ptr result = it->second._request.GetBlob(it->second._outputName);
459             float diff = compare_NRMSD(result, expected);
460             it->second._int8Accuracy.push_back(diff);
461             it++;
462         }
463     }
464     if (_collectStatistic) {
465         for (auto l : _statData.registeredLayers()) {
466             auto outBlob = _inferRequestI8C.GetBlob(l);
467
468             std::string outName = l;
469             if (_inputsFromLayers.find(l) != _inputsFromLayers.end()) {
470                 outName = _inputsFromLayers[l];
471             }
472
473             size_t N, C;
474             if (outBlob->dims().size() == 4 && outBlob->layout() == Layout::NCHW) {
475                 // TODO(amalyshe) cahnge to using of tensor desc
476                 N = pics;
477                 C = outBlob->dims()[2];
478             } else if (outBlob->dims().size() == 2 && outBlob->layout() == Layout::NC) {
479                 N = pics;
480                 C = outBlob->dims()[0];
481             } else {
482                 continue;
483             }
484
485             // Counting min/max outputs per channel
486             for (size_t n = 0; n < N; n++) {
487                 if (outBlob->dims().size() == 4) {
488                     size_t _HW = outBlob->dims()[0] * outBlob->dims()[1];
489                     for (size_t c = 0; c < C; c++) {
490                         if (outBlob->getTensorDesc().getPrecision() == Precision::FP32) {
491                             float *ptr = &outBlob->buffer().as<float *>()[(n * C + c) * _HW];
492                             _statData.addTensorStatistics(outName, c, ptr, _HW);
493                         } else if (outBlob->getTensorDesc().getPrecision() == Precision::U8) {
494                             uint8_t *ptr = &outBlob->buffer().as<uint8_t *>()[(n * C + c) * _HW];
495                             _statData.addTensorStatistics(outName, c, ptr, _HW);
496                         } else {
497                             throw std::logic_error(std::string("Unsupported precision: ") + outBlob->getTensorDesc().getPrecision().name());
498                         }
499                     }
500                 } else if (outBlob->dims().size() == 2) {
501                     if (outBlob->getTensorDesc().getPrecision() == Precision::FP32) {
502                         float *ptr = &outBlob->buffer().as<float *>()[n * C];
503                         _statData.addTensorStatistics(outName, 0, ptr, C);
504                     } else if (outBlob->getTensorDesc().getPrecision() == Precision::U8) {
505                         uint8_t *ptr = &outBlob->buffer().as<uint8_t *>()[n * C];
506                         _statData.addTensorStatistics(outName, 0, ptr, C);
507                     } else {
508                         throw std::logic_error(std::string("Unsupported precision: ") + outBlob->getTensorDesc().getPrecision().name());
509                     }
510                 }
511             }
512         }
513     }
514 }
515
516 void Int8Calibrator::calculateLayersAccuracyDrop() {
517     _layersAccuracyDrop.clear();
518
519     std::map<std::string, SingleLayerData>::iterator it = _singleLayerRequests.begin();
520     while (it != _singleLayerRequests.end()) {
521         // calculate average metric per layer over all images and sort in desc order
522         float mo = 0.f;
523         for (auto d : it->second._int8Accuracy) {
524             mo += d;
525         }
526         mo = mo / it->second._int8Accuracy.size();
527         _layersAccuracyDrop[it->first] = mo;
528         it++;
529     }
530
531     // correction of accuracy drop to have sorted values for cases when accuracy drop is equal
532     // correction is added according to topological order
533     // this will prioritize returning of layers to FP32 starting from layers closer to the end of network
534     std::vector<CNNLayerPtr> ordered = InferenceEngine::details::CNNNetSortTopologically(networkReaderC.getNetwork());
535     float c = 0.00001f;
536     for (auto l : ordered) {
537         auto it = _layersAccuracyDrop.find(l->name);
538         if (it != _layersAccuracyDrop.end()) {
539             it->second += c;
540         }
541         c += 0.00001f;
542     }
543     _singleLayerRequests.clear();
544 }
545
546 std::map<std::string, float> Int8Calibrator::layersAccuracyDrop() {
547     return _layersAccuracyDrop;
548 }
549
550
551
552 //--------------------------------------------------------------------------------------------------
553
554 ClassificationCalibrator::ClassificationCalibrator(int nPictures, const std::string &flags_m,
555                                                    const std::string &flags_d, const std::string &flags_i,
556                                                    int flags_b, InferenceEngine::InferencePlugin plugin,
557                                                    CsvDumper &dumper, const std::string &flags_l,
558                                                      PreprocessingOptions preprocessingOptions, bool zeroBackground) :
559     ClassificationProcessor(flags_m, flags_d, flags_i, flags_b,
560                             plugin, dumper, flags_l,
561                             preprocessingOptions, zeroBackground) {
562     _modelFileNameI8C = modelFileName;
563     _pluginI8C = plugin;
564     _nPictures = nPictures;
565     _cBatch = flags_b;
566 }
567
568 shared_ptr<Processor::InferenceMetrics> ClassificationCalibrator::Process(bool stream_output) {
569     inferRequest = _inferRequestI8C;
570     int top1Result = 0, total = 0;
571
572     ClassificationSetGenerator generator;
573
574     try {
575         generator.readLabels(labelFileName);
576     } catch (InferenceEngine::details::InferenceEngineException& ex) {
577         slog::warn << "Can't read labels file " << labelFileName << slog::endl;
578         slog::warn << "Error: " << ex.what() << slog::endl;
579     }
580     auto validationMap = generator.getValidationMap(imagesPath);
581
582     if (validationMap.empty()) {
583         THROW_IE_EXCEPTION << "The validation dataset in " << imagesPath << "is empty. Check the dataset file or folder and the labels file";
584     }
585
586     ImageDecoder decoder;
587
588     // ----------------------------Do inference-------------------------------------------------------------
589     std::vector<int> expected(batch);
590     std::vector<std::string> files(batch);
591
592     if (!_nPictures) {
593         _nPictures = validationMap.size();
594     }
595
596
597     ConsoleProgress progress(_nPictures, stream_output);
598
599     CalibrationMetrics im;
600
601     std::string firstInputName = this->inputInfo.begin()->first;
602     std::string firstOutputName = this->outInfo.begin()->first;
603     auto firstInputBlob = inferRequest.GetBlob(firstInputName);
604     auto firstOutputBlob = inferRequest.GetBlob(firstOutputName);
605
606     size_t ipics = 0;
607     auto iter = validationMap.begin();
608     while (iter != validationMap.end() && ipics < _nPictures) {
609         size_t b = 0;
610         int filesWatched = 0;
611         for (; b < batch && iter != validationMap.end() && ipics + b < _nPictures ; b++, iter++, filesWatched++) {
612             expected[b] = iter->first;
613             try {
614                 decoder.insertIntoBlob(iter->second, b, *firstInputBlob, preprocessingOptions);
615                 files[b] = iter->second;
616             } catch (const InferenceEngineException &iex) {
617                 slog::warn << "Can't read file " << iter->second << slog::endl;
618                 slog::warn << "Error: " << iex.what() << slog::endl;
619                 // Could be some non-image file in directory
620                 b--;
621                 continue;
622             }
623         }
624         ipics += batch;
625
626         Infer(progress, filesWatched, im);
627         collectCalibrationStatistic(b);
628
629         std::vector<unsigned> results;
630         InferenceEngine::TopResults(1, *firstOutputBlob, results);
631         for (size_t i = 0; i < b; i++) {
632             int expc = expected[i];
633             if (zeroBackground) expc++;
634             bool top1Scored = (static_cast<int>(results[i]) == expc);
635             if (top1Scored) top1Result++;
636             total++;
637         }
638     }
639     progress.finish();
640
641     calculateLayersAccuracyDrop();
642
643     if (total == 0) {
644         throw std::logic_error("total can't be equal to zero");
645     }
646
647     im.AccuracyResult = static_cast<float>(top1Result) / static_cast<float>(total);
648
649     return std::shared_ptr<Processor::InferenceMetrics>(new CalibrationMetrics(im));
650 }
651
652 //--------------------------------------------------------------------------------------------------
653 SSDObjectDetectionCalibrator::SSDObjectDetectionCalibrator(int nPictures, const std::string &flags_m,
654                                                            const std::string &flags_d, const std::string &flags_i,
655                                                            const std::string &subdir, int flags_b,
656                                                              double threshold,
657                                                              InferencePlugin plugin, CsvDumper &dumper,
658                                                              const std::string &flags_a, const std::string &classes_list_file) :
659     SSDObjectDetectionProcessor(flags_m, flags_d, flags_i, subdir, flags_b,
660                                   threshold,
661                                   plugin, dumper,
662                                   flags_a, classes_list_file) {
663     _modelFileNameI8C = modelFileName;
664     _pluginI8C = plugin;
665     _nPictures = nPictures;
666     _cBatch = flags_b;
667 }
668
669 shared_ptr<Processor::InferenceMetrics> SSDObjectDetectionCalibrator::Process(bool stream_output) {
670     inferRequest = _inferRequestI8C;
671
672     // Parsing PASCAL VOC2012 format
673     VOCAnnotationParser vocAnnParser;
674     VOCAnnotationCollector annCollector(annotationsPath);
675
676
677     if (annCollector.annotations().size() == 0) {
678         ObjectDetectionInferenceMetrics emptyIM(this->threshold);
679
680         return std::shared_ptr<InferenceMetrics>(new ObjectDetectionInferenceMetrics(emptyIM));
681     }
682
683     // Getting desired results from annotations
684     std::map<std::string, ImageDescription> desiredForFiles;
685
686     for (auto &ann : annCollector.annotations()) {
687         std::list<DetectedObject> dobList;
688         for (auto &obj : ann.objects) {
689             DetectedObject dob(classes[obj.name], static_cast<float>(obj.bndbox.xmin), static_cast<float>(obj.bndbox.ymin),
690                                static_cast<float>(obj.bndbox.xmax), static_cast<float>(obj.bndbox.ymax), 1.0f, obj.difficult != 0);
691             dobList.push_back(dob);
692         }
693         ImageDescription id(dobList);
694         desiredForFiles.insert(std::pair<std::string, ImageDescription>(ann.folder + "/" + (!subdir.empty() ? subdir + "/" : "") + ann.filename, id));
695     }
696
697     for (auto &item : outInfo) {
698         DataPtr outputData = item.second;
699         if (!outputData) {
700             throw std::logic_error("output data pointer is not valid");
701         }
702     }
703     // -----------------------------------------------------------------------------------------------------
704
705     // ----------------------------Do inference-------------------------------------------------------------
706
707     std::vector<VOCAnnotation> expected(batch);
708
709     if (!_nPictures) {
710         _nPictures = annCollector.annotations().size();
711     }
712
713     ConsoleProgress progress(_nPictures, stream_output);
714
715     ObjectDetectionInferenceMetrics im(threshold);
716
717     vector<VOCAnnotation>::const_iterator iter = annCollector.annotations().begin();
718
719     std::map<std::string, ImageDescription> scaledDesiredForFiles;
720
721     std::string firstInputName = this->inputInfo.begin()->first;
722     auto firstInputBlob = inferRequest.GetBlob(firstInputName);
723     size_t ipics = 0;
724
725     while (iter != annCollector.annotations().end() && ipics < _nPictures) {
726         std::vector<std::string> files;
727         size_t b = 0;
728
729         int filesWatched = 0;
730         for (; b < batch && iter != annCollector.annotations().end(); b++, iter++, filesWatched++) {
731             expected[b] = *iter;
732             string filename = iter->folder + "/" + (!subdir.empty() ? subdir + "/" : "") + iter->filename;
733             try {
734                 float scale_x, scale_y;
735
736                 scale_x = 1.0f / iter->size.width;  // orig_size.width;
737                 scale_y = 1.0f / iter->size.height;  // orig_size.height;
738
739                 if (scaleProposalToInputSize) {
740                     scale_x *= firstInputBlob->dims()[0];
741                     scale_y *= firstInputBlob->dims()[1];
742                 }
743
744                 // Scaling the desired result (taken from the annotation) to the network size
745                 scaledDesiredForFiles.insert(std::pair<std::string, ImageDescription>(filename, desiredForFiles.at(filename).scale(scale_x, scale_y)));
746
747                 files.push_back(filename);
748             } catch (const InferenceEngineException &iex) {
749                 slog::warn << "Can't read file " << this->imagesPath + "/" + filename << slog::endl;
750                 slog::warn << "Error: " << iex.what() << slog::endl;
751                 // Could be some non-image file in directory
752                 b--;
753                 continue;
754             }
755             ipics++;
756         }
757
758         // Infer model
759         Infer(progress, filesWatched, im);
760         collectCalibrationStatistic(b);
761
762         // Processing the inference result
763         std::map<std::string, std::list<DetectedObject>> detectedObjects = processResult(files);
764
765         // Calculating similarity
766         //
767         for (size_t j = 0; j < files.size(); j++) {
768             ImageDescription result(detectedObjects[files[j]]);
769             im.apc.consumeImage(result, scaledDesiredForFiles.at(files[j]));
770         }
771     }
772     progress.finish();
773
774     calculateLayersAccuracyDrop();
775
776     CalibrationMetrics imCalibration;
777     const ObjectDetectionInferenceMetrics &odim = dynamic_cast<const ObjectDetectionInferenceMetrics&>(im);
778     if (im.nRuns > 0) {
779         std::map<int, double> appc = odim.apc.calculateAveragePrecisionPerClass();
780
781         double mAP = 0;
782         for (auto i : appc) {
783             mAP += i.second;
784         }
785         imCalibration.AccuracyResult = static_cast<float>(mAP / appc.size());
786     }
787     return std::shared_ptr<Processor::InferenceMetrics>(new CalibrationMetrics(imCalibration));
788 }
789
790