1 // Copyright (C) 2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
11 #include "statistics_report.hpp"
13 void StatisticsReport::add(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &pmStat, const double &latency) {
14 if (_config.niter > 0 && _config.niter == _performanceCounters.size()) {
15 // do not add elements for the adittionaly executed requests.
19 _latencies.push_back(latency);
20 if (_config.report_type == medianCntReport || _config.report_type == detailedCntReport) {
21 // collect per-iteration statistics only in case of enabled median/detailed statistic collecting
22 _performanceCounters.push_back(pmStat);
26 void StatisticsReport::dump(const double &fps, const size_t &numProcessedReq, const double &totalExecTime) {
27 if (_config.report_type.empty()) {
28 slog::info << "Statistics collecting was not requested. No reports are dumped." << slog::endl;
32 size_t numMeasuredReq = numProcessedReq;
33 if (_config.api == "async" && _config.niter > 0) {
34 // in this case number of processed requests is higher than the value of -niter option.
35 // but we need to handle statistics for -niter number of requests only
36 numMeasuredReq = _config.niter;
39 std::string separator =
40 #if defined _WIN32 || defined __CYGWIN__
49 if (_config.report_folder.empty())
52 CsvDumper dumper(true, _config.report_folder + separator + "benchmark_" + _config.report_type + "_report.csv");
54 // resulting number of columns in csv file depends on the report_type. If it's noCntReport, then
55 // no PM data is collected and there are only 3 columns in the file (in configuration section). If it's
56 // medianCntReport then median PM values are collected per each layer and the number of columns is 6.
59 // layer name;exec status;layer type;exec type;real time;cpu time;
60 // conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;615;3;
61 // Here, all the data are taken from InferenceEngine::InferenceEngineProfileInfo.
63 // In case of detailedCntReport the number of columns is 4 + numMeasuredReq * 2, because first 4 parameters
64 // are the same but realTime and cpuTime can be different on each iteration (example from 5 GPU requests):
65 // conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;630,3;617,3;616,3;615,3;617,3;
66 size_t numOfColumns = 0;
67 if (_config.report_type == noCntReport) {
69 } else if (_config.report_type == medianCntReport) {
72 // for detailedCntReport
73 numOfColumns = 4 + numMeasuredReq * 2;
76 auto completeCsvRow = [](CsvDumper &dumper, size_t numOfColumns, size_t filled) {
77 for (size_t i = 0; i < numOfColumns - filled; i++)
82 // dump execution configuration
83 dumper << "Configuration setup";
84 completeCsvRow(dumper, numOfColumns, 1);
85 dumper << "config option" << "CLI parameter" << "value";
86 completeCsvRow(dumper, numOfColumns, 3);
88 dumper << "target device" << " -d" << _config.device;
89 completeCsvRow(dumper, numOfColumns, 3);
90 dumper << "execution mode" << " -api" << _config.api;
91 completeCsvRow(dumper, numOfColumns, 3);
92 dumper << "batch size" << " -b" << _config.batch;
93 completeCsvRow(dumper, numOfColumns, 3);
94 dumper << "number of iterations" << " -niter" << _config.niter;
95 completeCsvRow(dumper, numOfColumns, 3);
96 dumper << "number of parallel infer requests" << " -nireq" << _config.nireq;
97 completeCsvRow(dumper, numOfColumns, 3);
98 dumper << "number of CPU threads" << " -nthreads" << _config.cpu_nthreads;
99 completeCsvRow(dumper, numOfColumns, 3);
100 dumper << "CPU pinning enabled" << " -pin" << _config.cpu_pin;
101 completeCsvRow(dumper, numOfColumns, 3);
105 // write PM data from each iteration
106 if (!_performanceCounters.empty()) {
107 if (_config.report_type != medianCntReport && _config.report_type != detailedCntReport) {
108 throw std::logic_error("PM data should only be collected for median or detailed report types");
111 // this vector is sorted according to network layers execution order.
112 auto performanceMapSorted = preparePmStatistics();
114 dumper << "Performance counters";
115 completeCsvRow(dumper, numOfColumns, 1);
116 dumper << "layer name" << "exec status" << "layer type" << "exec type";
118 if (_config.report_type == medianCntReport) {
119 dumper << "median real time" << "median cpu time";
120 completeCsvRow(dumper, numOfColumns, 6);
122 // detailedCntReport case
123 for (size_t i = 0; i< _performanceCounters.size(); i++) {
124 dumper << "realTime_iter" + std::to_string(i) << "cpuTime_iter" + std::to_string(i);
126 completeCsvRow(dumper, numOfColumns, 4 + _performanceCounters.size() * 2);
129 for (const auto &layer : performanceMapSorted) {
130 dumper << layer.first; // layer name
131 switch (layer.second.status) {
132 case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
133 dumper << "EXECUTED";
135 case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
138 case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
139 dumper << "OPTIMIZED_OUT";
142 dumper << layer.second.layer_type << layer.second.exec_type;
144 if (_config.report_type == medianCntReport) {
145 // write median realTime and cpuTime from each processed request for current layer
147 std::to_string(getMedianValue<long long>(_perLayerRealTime[layer.first]) / 1000.0) <<
148 std::to_string(getMedianValue<long long>(_perLayerCpuTime[layer.first]) / 1000.0);
150 // write all realTime and cpuTime from each processed request for current layer
151 for (size_t i = 0; i < numMeasuredReq; i++) {
152 dumper << std::to_string(_perLayerRealTime[layer.first][i] / 1000.0) << std::to_string(_perLayerCpuTime[layer.first][i] / 1000.0);
160 if (_config.report_type == detailedCntReport) {
161 dumper << "Statistics";
162 completeCsvRow(dumper, numOfColumns, 1);
165 for (size_t i = 0; i < _latencies.size(); i++) {
166 // detailedCntReport case
167 dumper << "iter" + std::to_string(i);
169 completeCsvRow(dumper, numOfColumns, 4 + _latencies.size());
170 dumper << "latencies";
171 for (const auto &lat : _latencies) {
174 completeCsvRow(dumper, numOfColumns, _latencies.size());
178 dumper << "Execution results";
179 completeCsvRow(dumper, numOfColumns, 1);
180 dumper << "number of measured infer requests" << numMeasuredReq;
181 completeCsvRow(dumper, numOfColumns, 2);
182 dumper << "latency" << getMedianValue<double>(_latencies);
183 completeCsvRow(dumper, numOfColumns, 2);
184 dumper << "throughput" << fps;
185 completeCsvRow(dumper, numOfColumns, 2);
186 dumper << "total execution time" << totalExecTime;
187 completeCsvRow(dumper, numOfColumns, 2);
189 slog::info << "statistics report is stored to " << dumper.getFilename() << slog::endl;
192 double StatisticsReport::getMedianLatency() {
193 return getMedianValue<double>(_latencies);
196 std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>> StatisticsReport::preparePmStatistics() {
197 if (_performanceCounters.empty()) {
198 throw std::logic_error("preparePmStatistics() was called when no PM data was collected");
201 // sort PM data of first processed request according to layers execution order
202 auto performanceMapSorted = perfCountersSorted(_performanceCounters[0]);
204 // iterate over each processed infer request and handle its PM data
205 for (auto &pm : _performanceCounters) {
206 // iterate over each layer from sorted vector and add required PM data to the per-layer maps
207 for (const auto & it : performanceMapSorted) {
208 _perLayerRealTime[it.first].push_back(pm[it.first].realTime_uSec);
209 _perLayerCpuTime[it.first].push_back(pm[it.first].cpu_uSec);
212 return performanceMapSorted;
215 template <typename T>
216 T StatisticsReport::getMedianValue(const std::vector<T> &vec) {
217 std::vector<T> sortedVec(vec);
218 std::sort(sortedVec.begin(), sortedVec.end());
219 return (sortedVec.size() % 2 != 0) ?
220 sortedVec[sortedVec.size() / 2ULL] :
221 (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);