1 // Copyright (C) 2018 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "speech_sample.hpp"
7 #include <gflags/gflags.h>
22 #include <inference_engine.hpp>
23 #include <gna/gna_config.hpp>
25 #include <samples/common.hpp>
26 #include <samples/slog.hpp>
27 #include <samples/args_helper.hpp>
30 #define ALIGN(memSize, pad) ((static_cast<int>((memSize) + pad - 1) / pad) * pad)
32 #define MAX_SCORE_DIFFERENCE 0.0001f
33 #define MAX_VAL_2B_FEAT 16384
35 using namespace InferenceEngine;
37 typedef std::chrono::high_resolution_clock Time;
38 typedef std::chrono::duration<double, std::ratio<1, 1000>> ms;
39 typedef std::chrono::duration<float> fsec;
48 float sumSquaredError;
51 float sumSquaredRelError;
54 void GetKaldiArkInfo(const char *fileName,
55 uint32_t numArrayToFindSize,
56 uint32_t *ptrNumArrays,
57 uint32_t *ptrNumMemoryBytes) {
58 uint32_t numArrays = 0;
59 uint32_t numMemoryBytes = 0;
61 std::ifstream in_file(fileName, std::ios::binary);
63 while (!in_file.eof()) {
65 uint32_t numRows = 0u, numCols = 0u, num_bytes = 0u;
66 std::getline(in_file, line, '\0'); // read variable length name followed by space and NUL
67 std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D
68 if (line.compare("BFM ") != 0) {
71 in_file.read(reinterpret_cast<char *>(&numRows), sizeof(uint32_t)); // read number of rows
72 std::getline(in_file, line, '\4'); // read control-D
73 in_file.read(reinterpret_cast<char *>(&numCols), sizeof(uint32_t)); // read number of columns
74 num_bytes = numRows * numCols * sizeof(float);
75 in_file.seekg(num_bytes, in_file.cur); // read data
77 if (numArrays == numArrayToFindSize) {
78 numMemoryBytes += num_bytes;
84 fprintf(stderr, "Failed to open %s for reading in GetKaldiArkInfo()!\n", fileName);
88 if (ptrNumArrays != NULL) *ptrNumArrays = numArrays;
89 if (ptrNumMemoryBytes != NULL) *ptrNumMemoryBytes = numMemoryBytes;
92 void LoadKaldiArkArray(const char *fileName, uint32_t arrayIndex, std::string &ptrName, std::vector<uint8_t> &memory,
93 uint32_t *ptrNumRows, uint32_t *ptrNumColumns, uint32_t *ptrNumBytesPerElement) {
94 std::ifstream in_file(fileName, std::ios::binary);
97 while (i < arrayIndex) {
99 uint32_t numRows = 0u, numCols = 0u;
100 std::getline(in_file, line, '\0'); // read variable length name followed by space and NUL
101 std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D
102 if (line.compare("BFM ") != 0) {
105 in_file.read(reinterpret_cast<char *>(&numRows), sizeof(uint32_t)); // read number of rows
106 std::getline(in_file, line, '\4'); // read control-D
107 in_file.read(reinterpret_cast<char *>(&numCols), sizeof(uint32_t)); // read number of columns
108 in_file.seekg(numRows * numCols * sizeof(float), in_file.cur); // read data
111 if (!in_file.eof()) {
113 std::getline(in_file, ptrName, '\0'); // read variable length name followed by space and NUL
114 std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D
115 if (line.compare("BFM ") != 0) {
116 fprintf(stderr, "Cannot find array specifier in file %s in LoadKaldiArkArray()!\n", fileName);
119 in_file.read(reinterpret_cast<char *>(ptrNumRows), sizeof(uint32_t)); // read number of rows
120 std::getline(in_file, line, '\4'); // read control-D
121 in_file.read(reinterpret_cast<char *>(ptrNumColumns), sizeof(uint32_t)); // read number of columns
122 size_t willWrite = *ptrNumRows * *ptrNumColumns * sizeof(float);
123 in_file.read(reinterpret_cast<char *>(&memory.front()),
124 *ptrNumRows * *ptrNumColumns * sizeof(float)); // read array data
128 fprintf(stderr, "Failed to open %s for reading in GetKaldiArkInfo()!\n", fileName);
132 *ptrNumBytesPerElement = sizeof(float);
135 void SaveKaldiArkArray(const char *fileName,
140 uint32_t numColumns) {
141 std::ios_base::openmode mode = std::ios::binary;
143 mode |= std::ios::app;
145 std::ofstream out_file(fileName, mode);
146 if (out_file.good()) {
147 out_file.write(name.c_str(), name.length()); // write name
148 out_file.write("\0", 1);
149 out_file.write("BFM ", 4);
150 out_file.write("\4", 1);
151 out_file.write(reinterpret_cast<char *>(&numRows), sizeof(uint32_t));
152 out_file.write("\4", 1);
153 out_file.write(reinterpret_cast<char *>(&numColumns), sizeof(uint32_t));
154 out_file.write(reinterpret_cast<char *>(ptrMemory), numRows * numColumns * sizeof(float));
157 throw std::runtime_error(std::string("Failed to open %s for writing in SaveKaldiArkArray()!\n") + fileName);
161 float ScaleFactorForQuantization(void *ptrFloatMemory, float targetMax, uint32_t numElements) {
162 float *ptrFloatFeat = reinterpret_cast<float *>(ptrFloatMemory);
166 for (uint32_t i = 0; i < numElements; i++) {
167 if (fabs(ptrFloatFeat[i]) > max) {
168 max = fabs(ptrFloatFeat[i]);
175 scaleFactor = targetMax / max;
178 return (scaleFactor);
181 void ClearScoreError(score_error_t *error) {
182 error->numScores = 0;
183 error->numErrors = 0;
184 error->maxError = 0.0;
185 error->rmsError = 0.0;
186 error->sumError = 0.0;
187 error->sumRmsError = 0.0;
188 error->sumSquaredError = 0.0;
189 error->maxRelError = 0.0;
190 error->sumRelError = 0.0;
191 error->sumSquaredRelError = 0.0;
194 void UpdateScoreError(score_error_t *error, score_error_t *totalError) {
195 totalError->numErrors += error->numErrors;
196 totalError->numScores += error->numScores;
197 totalError->sumRmsError += error->rmsError;
198 totalError->sumError += error->sumError;
199 totalError->sumSquaredError += error->sumSquaredError;
200 if (error->maxError > totalError->maxError) {
201 totalError->maxError = error->maxError;
203 totalError->sumRelError += error->sumRelError;
204 totalError->sumSquaredRelError += error->sumSquaredRelError;
205 if (error->maxRelError > totalError->maxRelError) {
206 totalError->maxRelError = error->maxRelError;
210 uint32_t CompareScores(float *ptrScoreArray,
211 void *ptrRefScoreArray,
212 score_error_t *scoreError,
214 uint32_t numColumns) {
215 uint32_t numErrors = 0;
217 ClearScoreError(scoreError);
219 float *A = ptrScoreArray;
220 float *B = reinterpret_cast<float *>(ptrRefScoreArray);
221 for (uint32_t i = 0; i < numRows; i++) {
222 for (uint32_t j = 0; j < numColumns; j++) {
223 float score = A[i * numColumns + j];
224 float refscore = B[i * numColumns + j];
225 float error = fabs(refscore - score);
226 float rel_error = error / (static_cast<float>(fabs(refscore)) + 1e-20f);
227 float squared_error = error * error;
228 float squared_rel_error = rel_error * rel_error;
229 scoreError->numScores++;
230 scoreError->sumError += error;
231 scoreError->sumSquaredError += squared_error;
232 if (error > scoreError->maxError) {
233 scoreError->maxError = error;
235 scoreError->sumRelError += rel_error;
236 scoreError->sumSquaredRelError += squared_rel_error;
237 if (rel_error > scoreError->maxRelError) {
238 scoreError->maxRelError = rel_error;
240 if (error > scoreError->threshold) {
245 scoreError->rmsError = sqrt(scoreError->sumSquaredError / (numRows * numColumns));
246 scoreError->sumRmsError += scoreError->rmsError;
247 scoreError->numErrors = numErrors;
252 float StdDevError(score_error_t error) {
253 return (sqrt(error.sumSquaredError / error.numScores
254 - (error.sumError / error.numScores) * (error.sumError / error.numScores)));
257 float StdDevRelError(score_error_t error) {
258 return (sqrt(error.sumSquaredRelError / error.numScores
259 - (error.sumRelError / error.numScores) * (error.sumRelError / error.numScores)));
262 #if !defined(__arm__) && !defined(_M_ARM)
263 #if defined(_WIN32) || defined(WIN32)
272 inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
273 unsigned int *ecx, unsigned int *edx) {
275 #if defined(_WIN32) || defined(WIN32)
276 int regs[4] = {static_cast<int>(*eax), static_cast<int>(*ebx), static_cast<int>(*ecx), static_cast<int>(*edx)};
277 __cpuid(regs, level);
278 *eax = static_cast<uint32_t>(regs[0]);
279 *ebx = static_cast<uint32_t>(regs[1]);
280 *ecx = static_cast<uint32_t>(regs[2]);
281 *edx = static_cast<uint32_t>(regs[3]);
283 __get_cpuid(level, eax, ebx, ecx, edx);
287 // return GNA module frequency in MHz
288 float getGnaFrequencyMHz() {
296 const uint8_t sixth_family = 6;
297 const uint8_t cannon_lake_model = 102;
298 const uint8_t gemini_lake_model = 122;
300 native_cpuid(&eax, &ebx, &ecx, &edx);
301 family = (eax >> 8) & 0xF;
303 // model is the concatenation of two fields
304 // | extended model | model |
305 // copy extended model data
306 model = (eax >> 16) & 0xF;
310 model += (eax >> 4) & 0xF;
312 if (family == sixth_family && model == cannon_lake_model) {
314 } else if (family == sixth_family &&
315 model == gemini_lake_model) {
318 // counters not supported and we retrns just default value
323 #endif // !defined(__arm__) && !defined(_M_ARM)
325 void printReferenceCompareResults(score_error_t const &totalError,
327 std::ostream &stream) {
328 stream << " max error: " <<
329 totalError.maxError << std::endl;
330 stream << " avg error: " <<
331 totalError.sumError / totalError.numScores << std::endl;
332 stream << " avg rms error: " <<
333 totalError.sumRmsError / framesNum << std::endl;
334 stream << " stdev error: " <<
335 StdDevError(totalError) << std::endl << std::endl;
339 void printPerformanceCounters(std::map<std::string,
340 InferenceEngine::InferenceEngineProfileInfo> const &utterancePerfMap,
342 std::ostream &stream) {
343 #if !defined(__arm__) && !defined(_M_ARM)
344 stream << std::endl << "Performance counts:" << std::endl;
345 stream << std::setw(10) << std::right << "" << "Counter descriptions";
346 stream << std::setw(22) << "Utt scoring time";
347 stream << std::setw(18) << "Avg infer time";
350 stream << std::setw(46) << "(ms)";
351 stream << std::setw(24) << "(us per call)";
354 for (const auto &it : utterancePerfMap) {
355 std::string const &counter_name = it.first;
356 float current_units = it.second.realTime_uSec;
357 float call_units = current_units / callsNum;
359 // if GNA HW counters
360 // get frequency of GNA module
361 freq = getGnaFrequencyMHz();
362 current_units /= freq * 1000;
364 stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
365 stream << std::setw(16) << std::right << current_units;
366 stream << std::setw(21) << std::right << call_units;
373 void getPerformanceCounters(InferenceEngine::InferRequest &request,
374 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfCounters) {
375 auto retPerfCounters = request.GetPerformanceCounts();
377 for (const auto &pair : retPerfCounters) {
378 perfCounters[pair.first] = pair.second;
382 void sumPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const &perfCounters,
383 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &totalPerfCounters) {
384 for (const auto &pair : perfCounters) {
385 totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec;
389 bool ParseAndCheckCommandLine(int argc, char *argv[]) {
390 // ---------------------------Parsing and validation of input args--------------------------------------
391 slog::info << "Parsing input parameters" << slog::endl;
393 gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
398 bool isDumpMode = !FLAGS_wg.empty() || !FLAGS_we.empty();
400 // input not required only in dump mode and if external scale factor provided
401 if (FLAGS_i.empty() && (!isDumpMode || FLAGS_q.compare("user") != 0)) {
403 throw std::logic_error("In model dump mode either static quantization is used (-i) or user scale"
404 " factor need to be provided. See -q user option");
406 throw std::logic_error("Input file not set. Please use -i.");
409 if (FLAGS_m.empty() && FLAGS_rg.empty()) {
410 throw std::logic_error("Either IR file (-m) or GNAModel file (-rg) need to be set.");
413 if ((!FLAGS_m.empty() && !FLAGS_rg.empty())) {
414 throw std::logic_error("Only one of -m and -rg is allowed.");
417 if ((FLAGS_d.compare("GPU") != 0) && (FLAGS_d.compare("CPU") != 0) && (FLAGS_d.compare("GNA_AUTO") != 0) &&
418 (FLAGS_d.compare("GNA_HW") != 0)
419 && (FLAGS_d.compare("GNA_SW") != 0) && (FLAGS_d.compare("GNA_SW_EXACT") != 0)) {
420 throw std::logic_error("Specified device is not supported.");
423 float scaleFactorInput = static_cast<float>(FLAGS_sf);
424 if (scaleFactorInput <= 0.0f) {
425 throw std::logic_error("Scale factor out of range (must be non-negative).");
428 uint32_t batchSize = (uint32_t) FLAGS_bs;
429 if ((batchSize < 1) || (batchSize > 8)) {
430 throw std::logic_error("Batch size out of range (1..8).");
433 /** default is a static quantisation **/
434 if ((FLAGS_q.compare("static") != 0) && (FLAGS_q.compare("dynamic") != 0) && (FLAGS_q.compare("user") != 0)) {
435 throw std::logic_error("Quantization mode not supported (static, dynamic, user).");
438 if (FLAGS_q.compare("dynamic") == 0) {
439 throw std::logic_error("Dynamic quantization not yet supported.");
442 if (FLAGS_qb != 16 && FLAGS_qb != 8) {
443 throw std::logic_error("Only 8 or 16 bits supported.");
446 if (FLAGS_nthreads <= 0) {
447 throw std::logic_error("Not valid value for 'nthreads' argument. It should be > 0 ");
454 * @brief The entry point for inference engine automatic speech recognition sample
455 * @file speech_sample/main.cpp
456 * @example speech_sample/main.cpp
458 int main(int argc, char *argv[]) {
460 slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl;
462 // ------------------------------ Parsing and validation of input args ---------------------------------
463 if (!ParseAndCheckCommandLine(argc, argv)) {
467 if (FLAGS_l.empty()) {
468 slog::info << "No extensions provided" << slog::endl;
471 bool useGna = (FLAGS_d.find("GNA") != std::string::npos);
472 auto deviceStr = FLAGS_d.substr(0, (FLAGS_d.find("_")));
473 float scaleFactorInput = static_cast<float>(FLAGS_sf);
474 uint32_t batchSize = (uint32_t) FLAGS_bs;
475 /** Extract input ark file name **/
476 std::string inputArkName = fileNameNoExt(FLAGS_i) + ".ark";
478 uint32_t numUtterances(0), numBytesThisUtterance(0);
479 if (!FLAGS_i.empty()) {
480 GetKaldiArkInfo(inputArkName.c_str(), 0, &numUtterances, &numBytesThisUtterance);
482 // -----------------------------------------------------------------------------------------------------
484 // --------------------------- 1. Load Plugin for inference engine -------------------------------------
485 slog::info << "Loading plugin" << slog::endl;
486 /** Loading plugin for device **/
487 InferencePlugin plugin = PluginDispatcher({FLAGS_pp, "../../../lib/intel64", ""}).getPluginByDevice(deviceStr);
489 /** Printing plugin version **/
490 std::cout << plugin.GetVersion() << std::endl << std::endl;
491 // -----------------------------------------------------------------------------------------------------
493 // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
494 slog::info << "Loading network files" << slog::endl;
496 CNNNetReader netBuilder;
497 if (!FLAGS_m.empty()) {
498 /** Read network model **/
499 netBuilder.ReadNetwork(FLAGS_m);
501 /** Extract model name and load weights **/
502 std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin";
503 netBuilder.ReadWeights(binFileName);
505 // -------------------------------------------------------------------------------------------------
507 // --------------------------- 3. Set batch size ---------------------------------------------------
508 /** Set batch size. Unlike in imaging, batching in time (rather than space) is done for speech recognition. **/
509 netBuilder.getNetwork().setBatchSize(batchSize);
510 slog::info << "Batch size is " << std::to_string(netBuilder.getNetwork().getBatchSize())
514 /** Setting plugin parameter for per layer metrics **/
515 std::map<std::string, std::string> gnaPluginConfig;
516 std::map<std::string, std::string> genericPluginConfig;
517 if (FLAGS_d.compare("CPU") != 0) {
518 gnaPluginConfig[GNAConfigParams::KEY_GNA_DEVICE_MODE] = FLAGS_d;
521 genericPluginConfig[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
524 if (FLAGS_q.compare("user") == 0) {
525 std::cout << "[ INFO ] Using scale factor of " << FLAGS_sf << std::endl;
526 gnaPluginConfig[GNA_CONFIG_KEY(SCALE_FACTOR)] = std::to_string(FLAGS_sf);
527 } else { // "static" quantization with calculated scale factor
529 std::vector<uint8_t> ptrFeatures;
530 uint32_t numArrays(0), numBytes(0), numFrames(0), numFrameElements(0), numBytesPerElement(0);
531 GetKaldiArkInfo(inputArkName.c_str(), 0, &numArrays, &numBytes);
532 ptrFeatures.resize(numBytes);
533 LoadKaldiArkArray(inputArkName.c_str(),
539 &numBytesPerElement);
541 ScaleFactorForQuantization(ptrFeatures.data(), MAX_VAL_2B_FEAT, numFrames * numFrameElements);
542 slog::info << "Using scale factor of " << scaleFactorInput << " calculated from first utterance."
544 gnaPluginConfig[GNA_CONFIG_KEY(SCALE_FACTOR)] = std::to_string(scaleFactorInput);
548 gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I8";
550 gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I16";
553 gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string(FLAGS_nthreads);
554 gnaPluginConfig[GNA_CONFIG_KEY(COMPACT_MODE)] = CONFIG_VALUE(NO);
555 // -----------------------------------------------------------------------------------------------------
557 // --------------------------- 4. Write model to file --------------------------------------------------
558 // Embedded GNA model dumping (for Intel(R) Speech Enabling Developer Kit)
559 if (!FLAGS_we.empty()) {
560 gnaPluginConfig[GNAConfigParams::KEY_GNA_FIRMWARE_MODEL_IMAGE] = FLAGS_we;
562 // -----------------------------------------------------------------------------------------------------
564 // --------------------------- 5. Loading model to the plugin ------------------------------------------
567 genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig));
569 auto t0 = Time::now();
570 ExecutableNetwork executableNet;
571 if (!FLAGS_m.empty()) {
572 slog::info << "Loading model to the plugin" << slog::endl;
573 executableNet = plugin.LoadNetwork(netBuilder.getNetwork(), genericPluginConfig);
575 slog::info << "Importing model to the plugin" << slog::endl;
576 executableNet = plugin.ImportNetwork(FLAGS_rg.c_str(), genericPluginConfig);
580 ms loadTime = std::chrono::duration_cast<ms>(Time::now() - t0);
581 slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl;
583 // --------------------------- 6. Exporting gna model using InferenceEngine AOT API---------------------
584 if (!FLAGS_wg.empty()) {
585 slog::info << "Writing GNA Model to file " << FLAGS_wg << slog::endl;
587 executableNet.Export(FLAGS_wg);
588 ms exportTime = std::chrono::duration_cast<ms>(Time::now() - t0);
589 slog::info << "Exporting time " << exportTime.count() << " ms" << slog::endl;
593 if (!FLAGS_we.empty()) {
594 slog::info << "Exported GNA embedded model to file " << FLAGS_we << slog::endl;
598 std::vector<std::pair<InferRequest, size_t>> inferRequests(FLAGS_nthreads);
599 for (auto& inferRequest : inferRequests) {
600 inferRequest = {executableNet.CreateInferRequest(), -1};
602 // -----------------------------------------------------------------------------------------------------
604 // --------------------------- 7. Prepare input blobs --------------------------------------------------
605 /** Taking information about all topology inputs **/
606 ConstInputsDataMap cInputInfo = executableNet.GetInputsInfo();
607 InputsDataMap inputInfo;
608 if (!FLAGS_m.empty()) {
609 inputInfo = netBuilder.getNetwork().getInputsInfo();
612 /** Stores all input blobs data **/
613 if (cInputInfo.size() != 1) {
614 throw std::logic_error("Sample supports only topologies with 1 input");
617 Blob::Ptr ptrInputBlob = inferRequests[0].first.GetBlob(cInputInfo.begin()->first);
619 /** configure input precision if model loaded from IR **/
620 for (auto &item : inputInfo) {
621 Precision inputPrecision = Precision::FP32; // specify Precision::I16 to provide quantized inputs
622 item.second->setPrecision(inputPrecision);
623 item.second->getInputData()->layout = NC; // row major layout
626 // -----------------------------------------------------------------------------------------------------
628 // --------------------------- 8. Prepare output blobs -------------------------------------------------
629 ConstOutputsDataMap cOutputInfo(executableNet.GetOutputsInfo());
630 OutputsDataMap outputInfo;
631 if (!FLAGS_m.empty()) {
632 outputInfo = netBuilder.getNetwork().getOutputsInfo();
635 Blob::Ptr ptrOutputBlob = inferRequests[0].first.GetBlob(cOutputInfo.begin()->first);
637 for (auto &item : outputInfo) {
638 DataPtr outData = item.second;
640 throw std::logic_error("output data pointer is not valid");
643 Precision outputPrecision = Precision::FP32; // specify Precision::I32 to retrieve quantized outputs
644 outData->setPrecision(outputPrecision);
645 outData->layout = NC; // row major layout
647 // -----------------------------------------------------------------------------------------------------
649 // --------------------------- 9. Do inference ---------------------------------------------------------
650 std::vector<uint8_t> ptrUtterance;
651 std::vector<uint8_t> ptrScores;
652 std::vector<uint8_t> ptrReferenceScores;
653 score_error_t frameError, totalError;
655 for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
656 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> utterancePerfMap;
658 uint32_t numFrames(0), numFrameElementsInput(0), numBytesPerElementInput(0), n(0);
659 uint32_t numFramesReference(0), numFrameElementsReference(0), numBytesPerElementReference(0),
660 numBytesReferenceScoreThisUtterance(0);
661 const uint32_t numScoresPerFrame = ptrOutputBlob->size() / batchSize;
662 GetKaldiArkInfo(inputArkName.c_str(), utteranceIndex, &n, &numBytesThisUtterance);
663 ptrUtterance.resize(numBytesThisUtterance);
664 LoadKaldiArkArray(inputArkName.c_str(),
669 &numFrameElementsInput,
670 &numBytesPerElementInput);
672 uint32_t numFrameElementsInputPadded = numFrameElementsInput;
674 if (ptrInputBlob->size() != numFrameElementsInputPadded * batchSize) {
675 throw std::logic_error("network input size(" + std::to_string(ptrInputBlob->size()) +
676 ") mismatch to ark file size (" +
677 std::to_string(numFrameElementsInputPadded * batchSize) + ")");
679 ptrScores.resize(numFrames * numScoresPerFrame * sizeof(float));
680 if (!FLAGS_r.empty()) {
681 std::string refUtteranceName;
682 GetKaldiArkInfo(FLAGS_r.c_str(), utteranceIndex, &n, &numBytesReferenceScoreThisUtterance);
683 ptrReferenceScores.resize(numBytesReferenceScoreThisUtterance);
684 LoadKaldiArkArray(FLAGS_r.c_str(),
689 &numFrameElementsReference,
690 &numBytesPerElementReference);
693 double totalTime = 0.0;
695 std::cout << "Utterance " << utteranceIndex << ": " << std::endl;
697 ClearScoreError(&totalError);
698 totalError.threshold = frameError.threshold = MAX_SCORE_DIFFERENCE;
699 auto inputFrame = &ptrUtterance.front();
700 auto outputFrame = &ptrScores.front();
702 size_t frameIndex{0};
703 uint32_t numFramesThisBatch{batchSize};
705 auto t0 = Time::now();
709 while (frameIndex <= numFrames) {
710 if (frameIndex == numFrames) {
711 bool hasRequests = false;
712 for (auto &inferRequest : inferRequests) {
713 if (inferRequest.second != -1) {
722 bool inferRequestFetched = false;
723 for (auto &inferRequest : inferRequests) {
724 if (frameIndex == numFrames) {
725 numFramesThisBatch = 1;
727 numFramesThisBatch = (numFrames - frameIndex < batchSize) ? (numFrames - frameIndex) : batchSize;
730 if (inferRequest.second != -1) {
731 StatusCode code = inferRequest.first.Wait(
732 InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
734 if (code != StatusCode::OK) {
738 if (!FLAGS_o.empty()) {
739 Blob::Ptr outputBlob = inferRequest.first.GetBlob(cOutputInfo.begin()->first);
740 std::memcpy(outputFrame,
741 outputBlob->buffer(),
742 outputBlob->byteSize());
743 outputFrame += numScoresPerFrame * sizeof(float);
746 if (!FLAGS_r.empty()) {
747 Blob::Ptr outputBlob = inferRequest.first.GetBlob(cOutputInfo.begin()->first);
748 CompareScores(outputBlob->buffer().as<float *>(),
749 &ptrReferenceScores[inferRequest.second *
750 numFrameElementsReference *
751 numBytesPerElementReference],
754 numFrameElementsReference);
755 UpdateScoreError(&frameError, &totalError);
759 inferRequest.second = -1;
761 if (frameIndex == numFrames) {
765 Blob::Ptr inputBlob = inferRequest.first.GetBlob(cInputInfo.begin()->first);
766 std::memcpy(inputBlob->buffer(),
768 inputBlob->byteSize());
770 inferRequest.first.StartAsync();
772 inferRequest.second = frameIndex;
773 frameIndex += numFramesThisBatch;
774 inputFrame += sizeof(float) * numFrameElementsInput * numFramesThisBatch;
775 inferRequestFetched |= true;
778 if (!inferRequestFetched) {
779 std::this_thread::sleep_for(std::chrono::milliseconds(1));
784 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> callPerfMap;
785 // retrive new counters
786 for (auto inferRequest : inferRequests) {
787 getPerformanceCounters(inferRequest.first, callPerfMap);
788 // summarize retrived counters with all previous
789 sumPerformanceCounters(callPerfMap, utterancePerfMap);
796 ms d = std::chrono::duration_cast<ms>(fs);
797 totalTime += d.count();
799 // resetting state between utterances
800 for (auto &&state : executableNet.QueryState()) {
804 if (!FLAGS_o.empty()) {
805 bool shouldAppend = (utteranceIndex == 0) ? false : true;
806 SaveKaldiArkArray(FLAGS_o.c_str(), shouldAppend, uttName, &ptrScores.front(),
807 numFrames, numScoresPerFrame);
810 /** Show performance results **/
811 std::cout << "Total time in Infer (HW and SW):\t" << totalTime << " ms"
813 std::cout << "Frames in utterance:\t\t\t" << numFrames << " frames"
815 std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast<double>(numFrames) << " ms"
819 printPerformanceCounters(utterancePerfMap, frameIndex, std::cout);
821 if (!FLAGS_r.empty()) {
822 printReferenceCompareResults(totalError, numFrames, std::cout);
824 std::cout << "End of Utterance " << utteranceIndex << std::endl << std::endl;
826 // -----------------------------------------------------------------------------------------------------
828 catch (const std::exception &error) {
829 slog::err << error.what() << slog::endl;
833 slog::err << "Unknown/internal exception happened" << slog::endl;
837 slog::info << "Execution successful" << slog::endl;