From 40c8039250df22881efae58a962834d6b459a562 Mon Sep 17 00:00:00 2001 From: Alexey Suhov Date: Mon, 14 Oct 2019 20:33:41 +0300 Subject: [PATCH] cleanup --- inference-engine/cmake/dependencies.cmake | 20 - inference-engine/cmake/features_ie.cmake | 1 - .../speech_recognition_offline_demo/CMakeLists.txt | 13 - .../speech_recognition_offline_demo/README.md | 153 --- .../speech_recognition_offline_demo/main.cpp | 1199 -------------------- .../speech_sample.hpp | 251 ---- 6 files changed, 1637 deletions(-) delete mode 100644 inference-engine/samples/speech_recognition_offline_demo/CMakeLists.txt delete mode 100644 inference-engine/samples/speech_recognition_offline_demo/README.md delete mode 100644 inference-engine/samples/speech_recognition_offline_demo/main.cpp delete mode 100644 inference-engine/samples/speech_recognition_offline_demo/speech_sample.hpp diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake index c16b267..583a8c1 100644 --- a/inference-engine/cmake/dependencies.cmake +++ b/inference-engine/cmake/dependencies.cmake @@ -159,26 +159,6 @@ if (ENABLE_GNA) debug_message(STATUS "gna=" ${GNA}) endif() -if (ENABLE_ROCKHOPER) - set(rh_decoder_version "Rockhopper_1.0.0.682") - - set(INCLUDE_RH_DECODER "include(\"\$\{IE_ROOT_DIR\}/share/ie_rh_decoder.cmake\")") - - RESOLVE_DEPENDENCY(RH_Decoder - ARCHIVE_UNIFIED "${rh_decoder_version}.zip" - TARGET_PATH "${TEMP}/${rh_decoder_version}" - VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*") - - configure_file( - "${IE_MAIN_SOURCE_DIR}/cmake/InitRHDecoder.cmake.in" - "${CMAKE_BINARY_DIR}/share/ie_rh_decoder.cmake" - @ONLY) - - list(APPEND CMAKE_MODULE_PATH ${CMAKE_BINARY_DIR}/share) - # for inference engine in tree build - lets include this finder - include(ie_rh_decoder) -endif() - configure_file( "${IE_MAIN_SOURCE_DIR}/cmake/share/InferenceEngineConfig.cmake.in" "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig.cmake" diff --git a/inference-engine/cmake/features_ie.cmake b/inference-engine/cmake/features_ie.cmake index a0f225b..fe9a2e9 100644 --- a/inference-engine/cmake/features_ie.cmake +++ b/inference-engine/cmake/features_ie.cmake @@ -8,7 +8,6 @@ include (options) #these options are aimed to optimize build time on development system ie_option (ENABLE_GNA "GNA support for inference engine" ON) -ie_option (ENABLE_ROCKHOPER "use Rockhopper decoder for converting / output scores" ON) ie_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ON) diff --git a/inference-engine/samples/speech_recognition_offline_demo/CMakeLists.txt b/inference-engine/samples/speech_recognition_offline_demo/CMakeLists.txt deleted file mode 100644 index ea47315..0000000 --- a/inference-engine/samples/speech_recognition_offline_demo/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -if(COMMAND init_rh_decoder) - init_rh_decoder() - - ie_add_sample(NAME speech_recognition_offline_demo - SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp" - HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/speech_sample.hpp" - INCLUDE_DIRECTORIES "${libRH_Decoder_INCLUDE_DIRS}" - DEPENDENCIES "${libRH_Decoder_LIBRARIES}") -endif() \ No newline at end of file diff --git a/inference-engine/samples/speech_recognition_offline_demo/README.md b/inference-engine/samples/speech_recognition_offline_demo/README.md deleted file mode 100644 index ace1719..0000000 --- a/inference-engine/samples/speech_recognition_offline_demo/README.md +++ /dev/null @@ -1,153 +0,0 @@ -# Offline Automatic Speech Recognition C++ Demo - -This topic shows how to run speech recognition, demonstrates acoustic model inference and Weighted Finite State Transducer (WFST) language model decoding based on Kaldi\* acoustic neural models, Intel® Rockhopper Trail language models, and speech feature vectors. - -## How It Works - -The workflow is as follows: -1. The application reads command-line parameters -and loads a Kaldi-trained neural network along with a Kaldi `.ark` speech feature vector file to the Inference Engine plugin. -2. The application performs inference and passes acoustic scores vectors to decoding stage, and -Intel® Rockhopper Trail decoder translates them into a text transcription. -3. The application prints recognized text on a screen. - -### Acoustic and Language Model Setup - -Pretrained models are available at [Intel® Open Source Technology Center](https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi) and [Intel® OpenVINO™ Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader). For this sample, we use models from `librispeech\s5_ext` folder. - -To train models from scratch, refer to a shell-script Kaldi training recipe `lspeech_s5_ext_run.sh` and corresponding documentation `lspeech_s5_ext.md`. - -To convert a Kaldi acoustic model into an Intermediate Representation (IR) format acceptable by this sample, use the following Model Optimizer command: - -```sh -$ python3 mo.py --framework kaldi --input_model lspeech_s5_ext.nnet --counts lspeech_s5_ext.counts --remove_output_softmax -``` - -The command produces an IR network consisting of `lspeech_s5_ext.xml` and -`lspeech_s5_ext.bin`. - -> **NOTE**: Model Optimizer (`mo.py`), Kaldi-trained neural network (`lspeech_s5_ext.nnet`) -and Kaldi class counts file (`lspeech_s5_ext.counts`) must be in your working directory. - -### Speech Recognition - -Once the IR is created or downloaded, you can use the following command for -speech recognition on Intel® processors with a GNA coprocessor (or -emulation library) and Rockhopper Trail decoder library: - -```sh -$ ./speech_recognition_offline_demo -d GNA_AUTO -bs 1 -i test_feat_1_10.ark -m lspeech_s5_ext.xml -hmm rht_language_model/rh.hmm -cl rht_language_model/cl.fst -g rht_language_model/g.fst -labels rht_language_model/labels.bin -amsf 0.08 -``` - -## Sample Output - -``` -[ INFO ] InferenceEngine: - API version ............ 1.6 - Build .................. R3 - Description ....... API -[ INFO ] Parsing input parameters -[ INFO ] No extensions provided -[ INFO ] Loading Inference Engine -[ INFO ] Device info: - GNA - GNAPlugin version ......... 1.6 - Build ........... GNAPlugin - -[ INFO ] Loading network files -[ INFO ] Batch size is 1 -[ INFO ] Using scale factor of 4079.14 calculated from first utterance. -[ INFO ] Loading model to the device -[ INFO ] Model loading time 301.864 ms -Utterance 0: -1272-128104-0012 ONLY UNFORTUNATELY HIS OWN WORK NEVER DOES GET GOOD - -Total time in Infer (HW and SW): 1522.28 ms -Frames in utterance: 536 frames -Average Infer time per frame: 2.84008 ms -End of Utterance 0 - -Utterance 1: -174-84280-0011 BUT NOW IT DOESN'T SEEM TO MATTER VERY MUCH - -Total time in Infer (HW and SW): 957.779 ms -Frames in utterance: 334 frames -Average Infer time per frame: 2.8676 ms -End of Utterance 1 - -Utterance 2: -1988-147956-0010 I REMEMBERED WHAT THE CONDUCTOR HAD SAID ABOUT HER EYES - -Total time in Infer (HW and SW): 1082.91 ms -Frames in utterance: 384 frames -Average Infer time per frame: 2.82008 ms -End of Utterance 2 - -Utterance 3: -1988-147956-0026 WE WERE SO DEEP IN THE GRASS THAT WE COULD SEE NOTHING BUT THE BLUE SKY OVER US AND THE GOLD TREE IN FRONT OF US - -Total time in Infer (HW and SW): 1963.4 ms -Frames in utterance: 690 frames -Average Infer time per frame: 2.84551 ms -End of Utterance 3 - -Utterance 4: -2086-149220-0045 FEWER WORDS THAN BEFORE BUT WITH THE SAME MYSTERIOUS MUSIC IN - -Total time in Infer (HW and SW): 1283.32 ms -Frames in utterance: 453 frames -Average Infer time per frame: 2.83293 ms -End of Utterance 4 - -Utterance 5: -2277-149874-0011 HE SEEMED TO BE THINKING OF SOMETHING ELSE - -Total time in Infer (HW and SW): 690.602 ms -Frames in utterance: 245 frames -Average Infer time per frame: 2.81878 ms -End of Utterance 5 - -Utterance 6: -2277-149896-0034 HE RANG AGAIN THIS TIME HARDER STILL NO ANSWER - -Total time in Infer (HW and SW): 1128.91 ms -Frames in utterance: 399 frames -Average Infer time per frame: 2.82934 ms -End of Utterance 6 - -Utterance 7: -2277-149897-0015 IN ABOUT AN HOUR AND THREE QUARTERS THE BOY RETURNED - -Total time in Infer (HW and SW): 857.916 ms -Frames in utterance: 302 frames -Average Infer time per frame: 2.84078 ms -End of Utterance 7 - -Utterance 8: -2412-153948-0005 I WAS DELIGHTED WITH THE COUNTRY AND THE MANNER OF LIFE - -Total time in Infer (HW and SW): 897.309 ms -Frames in utterance: 312 frames -Average Infer time per frame: 2.87599 ms -End of Utterance 8 - -Utterance 9: -3081-166546-0044 HE WAS THE PLAIN FACE DETECTIVE WHO HAD SPOKEN TO GEORGE - -Total time in Infer (HW and SW): 1280.3 ms -Frames in utterance: 448 frames -Average Infer time per frame: 2.8578 ms -End of Utterance 9 - -[ INFO ] Execution successful -``` - -## Input Preparation - -Speech Recognition Offline Demo application accepts Kaldi binary `.ark` files holding stacked feature frames. -To prepare such files, please follow steps described in `lspeech_s5_ext.md` from folder `librispeech\s5_ext` of Model Zoo. - -## See Also -* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) -* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) -* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) diff --git a/inference-engine/samples/speech_recognition_offline_demo/main.cpp b/inference-engine/samples/speech_recognition_offline_demo/main.cpp deleted file mode 100644 index f112f01..0000000 --- a/inference-engine/samples/speech_recognition_offline_demo/main.cpp +++ /dev/null @@ -1,1199 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "speech_sample.hpp" -#include "rockhopper_decoder.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#ifndef ALIGN -#define ALIGN(memSize, pad) ((static_cast((memSize) + pad - 1) / pad) * pad) -#endif -#define MAX_SCORE_DIFFERENCE 0.0001f -#define MAX_VAL_2B_FEAT 16384 - -using namespace InferenceEngine; - -typedef std::chrono::high_resolution_clock Time; -typedef std::chrono::duration> ms; -typedef std::chrono::duration fsec; -typedef struct { - uint32_t numScores; - uint32_t numErrors; - float threshold; - float maxError; - float rmsError; - float sumError; - float sumRmsError; - float sumSquaredError; - float maxRelError; - float sumRelError; - float sumSquaredRelError; -} score_error_t; - -struct InferRequestStruct { - InferRequest inferRequest; - int frameIndex; - uint32_t numFramesThisBatch; -}; - -struct RhDecoderInstanceParams { - RhDecoderInstanceHandle handle; - uint8_t* hmm_data; - uint8_t* cl_data; - uint8_t* g_data; - uint8_t* label_data; -}; - -void GetKaldiArkInfo(const char *fileName, - uint32_t numArrayToFindSize, - uint32_t *ptrNumArrays, - uint32_t *ptrNumMemoryBytes) { - uint32_t numArrays = 0; - uint32_t numMemoryBytes = 0; - - std::ifstream in_file(fileName, std::ios::binary); - if (in_file.good()) { - while (!in_file.eof()) { - std::string line; - uint32_t numRows = 0u, numCols = 0u, num_bytes = 0u; - std::getline(in_file, line, '\0'); // read variable length name followed by space and NUL - std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D - if (line.compare("BFM ") != 0) { - break; - } - in_file.read(reinterpret_cast(&numRows), sizeof(uint32_t)); // read number of rows - std::getline(in_file, line, '\4'); // read control-D - in_file.read(reinterpret_cast(&numCols), sizeof(uint32_t)); // read number of columns - num_bytes = numRows * numCols * sizeof(float); - in_file.seekg(num_bytes, in_file.cur); // read data - - if (numArrays == numArrayToFindSize) { - numMemoryBytes += num_bytes; - } - numArrays++; - } - in_file.close(); - } else { - fprintf(stderr, "Failed to open %s for reading in GetKaldiArkInfo()!\n", fileName); - exit(-1); - } - - if (ptrNumArrays != NULL) *ptrNumArrays = numArrays; - if (ptrNumMemoryBytes != NULL) *ptrNumMemoryBytes = numMemoryBytes; -} - -void LoadKaldiArkArray(const char *fileName, uint32_t arrayIndex, std::string &ptrName, std::vector &memory, - uint32_t *ptrNumRows, uint32_t *ptrNumColumns, uint32_t *ptrNumBytesPerElement) { - std::ifstream in_file(fileName, std::ios::binary); - if (in_file.good()) { - uint32_t i = 0; - while (i < arrayIndex) { - std::string line; - uint32_t numRows = 0u, numCols = 0u; - std::getline(in_file, line, '\0'); // read variable length name followed by space and NUL - std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D - if (line.compare("BFM ") != 0) { - break; - } - in_file.read(reinterpret_cast(&numRows), sizeof(uint32_t)); // read number of rows - std::getline(in_file, line, '\4'); // read control-D - in_file.read(reinterpret_cast(&numCols), sizeof(uint32_t)); // read number of columns - in_file.seekg(numRows * numCols * sizeof(float), in_file.cur); // read data - i++; - } - if (!in_file.eof()) { - std::string line; - std::getline(in_file, ptrName, '\0'); // read variable length name followed by space and NUL - std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D - if (line.compare("BFM ") != 0) { - fprintf(stderr, "Cannot find array specifier in file %s in LoadKaldiArkArray()!\n", fileName); - exit(-1); - } - in_file.read(reinterpret_cast(ptrNumRows), sizeof(uint32_t)); // read number of rows - std::getline(in_file, line, '\4'); // read control-D - in_file.read(reinterpret_cast(ptrNumColumns), sizeof(uint32_t)); // read number of columns - in_file.read(reinterpret_cast(&memory.front()), - *ptrNumRows * *ptrNumColumns * sizeof(float)); // read array data - } - in_file.close(); - } else { - fprintf(stderr, "Failed to open %s for reading in GetKaldiArkInfo()!\n", fileName); - exit(-1); - } - - *ptrNumBytesPerElement = sizeof(float); -} - -void SaveKaldiArkArray(const char *fileName, - bool shouldAppend, - std::string name, - void *ptrMemory, - uint32_t numRows, - uint32_t numColumns) { - std::ios_base::openmode mode = std::ios::binary; - if (shouldAppend) { - mode |= std::ios::app; - } - std::ofstream out_file(fileName, mode); - if (out_file.good()) { - out_file.write(name.c_str(), name.length()); // write name - out_file.write("\0", 1); - out_file.write("BFM ", 4); - out_file.write("\4", 1); - out_file.write(reinterpret_cast(&numRows), sizeof(uint32_t)); - out_file.write("\4", 1); - out_file.write(reinterpret_cast(&numColumns), sizeof(uint32_t)); - out_file.write(reinterpret_cast(ptrMemory), numRows * numColumns * sizeof(float)); - out_file.close(); - } else { - throw std::runtime_error(std::string("Failed to open %s for writing in SaveKaldiArkArray()!\n") + fileName); - } -} - -float ScaleFactorForQuantization(void *ptrFloatMemory, float targetMax, uint32_t numElements) { - float *ptrFloatFeat = reinterpret_cast(ptrFloatMemory); - float max = 0.0; - float scaleFactor; - - for (uint32_t i = 0; i < numElements; i++) { - if (fabs(ptrFloatFeat[i]) > max) { - max = fabs(ptrFloatFeat[i]); - } - } - - if (max == 0) { - scaleFactor = 1.0; - } else { - scaleFactor = targetMax / max; - } - - return (scaleFactor); -} - -void ClearScoreError(score_error_t *error) { - error->numScores = 0; - error->numErrors = 0; - error->maxError = 0.0; - error->rmsError = 0.0; - error->sumError = 0.0; - error->sumRmsError = 0.0; - error->sumSquaredError = 0.0; - error->maxRelError = 0.0; - error->sumRelError = 0.0; - error->sumSquaredRelError = 0.0; -} - -void UpdateScoreError(score_error_t *error, score_error_t *totalError) { - totalError->numErrors += error->numErrors; - totalError->numScores += error->numScores; - totalError->sumRmsError += error->rmsError; - totalError->sumError += error->sumError; - totalError->sumSquaredError += error->sumSquaredError; - if (error->maxError > totalError->maxError) { - totalError->maxError = error->maxError; - } - totalError->sumRelError += error->sumRelError; - totalError->sumSquaredRelError += error->sumSquaredRelError; - if (error->maxRelError > totalError->maxRelError) { - totalError->maxRelError = error->maxRelError; - } -} - -uint32_t CompareScores(float *ptrScoreArray, - void *ptrRefScoreArray, - score_error_t *scoreError, - uint32_t numRows, - uint32_t numColumns) { - uint32_t numErrors = 0; - - ClearScoreError(scoreError); - - float *A = ptrScoreArray; - float *B = reinterpret_cast(ptrRefScoreArray); - for (uint32_t i = 0; i < numRows; i++) { - for (uint32_t j = 0; j < numColumns; j++) { - float score = A[i * numColumns + j]; - float refscore = B[i * numColumns + j]; - float error = fabs(refscore - score); - float rel_error = error / (static_cast(fabs(refscore)) + 1e-20f); - float squared_error = error * error; - float squared_rel_error = rel_error * rel_error; - scoreError->numScores++; - scoreError->sumError += error; - scoreError->sumSquaredError += squared_error; - if (error > scoreError->maxError) { - scoreError->maxError = error; - } - scoreError->sumRelError += rel_error; - scoreError->sumSquaredRelError += squared_rel_error; - if (rel_error > scoreError->maxRelError) { - scoreError->maxRelError = rel_error; - } - if (error > scoreError->threshold) { - numErrors++; - } - } - } - scoreError->rmsError = sqrt(scoreError->sumSquaredError / (numRows * numColumns)); - scoreError->sumRmsError += scoreError->rmsError; - scoreError->numErrors = numErrors; - - return (numErrors); -} - -float StdDevError(score_error_t error) { - return (sqrt(error.sumSquaredError / error.numScores - - (error.sumError / error.numScores) * (error.sumError / error.numScores))); -} - -float StdDevRelError(score_error_t error) { - return (sqrt(error.sumSquaredRelError / error.numScores - - (error.sumRelError / error.numScores) * (error.sumRelError / error.numScores))); -} - -#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) -#if defined(_WIN32) || defined(WIN32) -#include -#include -#else - -#include - -#endif - -inline void native_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) { - size_t level = *eax; -#if defined(_WIN32) || defined(WIN32) - int regs[4] = {static_cast(*eax), static_cast(*ebx), static_cast(*ecx), static_cast(*edx)}; - __cpuid(regs, level); - *eax = static_cast(regs[0]); - *ebx = static_cast(regs[1]); - *ecx = static_cast(regs[2]); - *edx = static_cast(regs[3]); -#else - __get_cpuid(level, eax, ebx, ecx, edx); -#endif -} - -// return GNA module frequency in MHz -float getGnaFrequencyMHz() { - uint32_t eax = 1; - uint32_t ebx = 0; - uint32_t ecx = 0; - uint32_t edx = 0; - uint32_t family = 0; - uint32_t model = 0; - const uint8_t sixth_family = 6; - const uint8_t cannon_lake_model = 102; - const uint8_t gemini_lake_model = 122; - - native_cpuid(&eax, &ebx, &ecx, &edx); - family = (eax >> 8) & 0xF; - - // model is the concatenation of two fields - // | extended model | model | - // copy extended model data - model = (eax >> 16) & 0xF; - // shift - model <<= 4; - // copy model data - model += (eax >> 4) & 0xF; - - if (family == sixth_family && model == cannon_lake_model) { - return 400; - } else if (family == sixth_family && - model == gemini_lake_model) { - return 200; - } else { - // counters not supported and we retrns just default value - return 1; - } -} - -#endif // if not ARM - -void printReferenceCompareResults(score_error_t const &totalError, - size_t framesNum, - std::ostream &stream) { - stream << " max error: " << - totalError.maxError << std::endl; - stream << " avg error: " << - totalError.sumError / totalError.numScores << std::endl; - stream << " avg rms error: " << - totalError.sumRmsError / framesNum << std::endl; - stream << " stdev error: " << - StdDevError(totalError) << std::endl << std::endl; - stream << std::endl; -} - -void printPerformanceCounters(std::map const &utterancePerfMap, - size_t callsNum, - std::ostream &stream, std::string fullDeviceName) { -#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) - std::ios_base::fmtflags fmt_flags(stream.flags() ); - stream << std::endl << "Performance counts:" << std::endl; - stream << std::setw(10) << std::right << "" << "Counter descriptions"; - stream << std::setw(22) << "Utt scoring time"; - stream << std::setw(18) << "Avg infer time"; - stream << std::endl; - - stream << std::setw(46) << "(ms)"; - stream << std::setw(24) << "(us per call)"; - stream << std::endl; - - for (const auto &it : utterancePerfMap) { - std::string const &counter_name = it.first; - float current_units = static_cast(it.second.realTime_uSec); - float call_units = current_units / callsNum; - // if GNA HW counters - // get frequency of GNA module - float freq = getGnaFrequencyMHz(); - current_units /= freq * 1000; - call_units /= freq; - stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1); - stream << std::setw(16) << std::right << current_units; - stream << std::setw(21) << std::right << call_units; - stream << std::endl; - } - stream << std::endl; - std::cout << std::endl; - std::cout << "Full device name: " << fullDeviceName << std::endl; - std::cout << std::endl; - stream.flags(fmt_flags); -#endif -} - -void getPerformanceCounters(InferenceEngine::InferRequest &request, - std::map &perfCounters) { - auto retPerfCounters = request.GetPerformanceCounts(); - - for (const auto &pair : retPerfCounters) { - perfCounters[pair.first] = pair.second; - } -} - -void sumPerformanceCounters(std::map const &perfCounters, - std::map &totalPerfCounters) { - for (const auto &pair : perfCounters) { - totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec; - } -} - -bool ParseAndCheckCommandLine(int argc, char *argv[]) { - // ---------------------------Parsing and validation of input args-------------------------------------- - slog::info << "Parsing input parameters" << slog::endl; - - gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); - if (FLAGS_h) { - showUsage(); - showAvailableDevices(); - return false; - } - bool isDumpMode = !FLAGS_wg.empty() || !FLAGS_we.empty(); - - // input not required only in dump mode and if external scale factor provided - if (FLAGS_i.empty() && (!isDumpMode || FLAGS_q.compare("user") != 0)) { - if (isDumpMode) { - throw std::logic_error("In model dump mode either static quantization is used (-i) or user scale" - " factor need to be provided. See -q user option"); - } - throw std::logic_error("Input file not set. Please use -i."); - } - - if (FLAGS_m.empty() && FLAGS_rg.empty()) { - throw std::logic_error("Either IR file (-m) or GNAModel file (-rg) need to be set."); - } - - if ((!FLAGS_m.empty() && !FLAGS_rg.empty())) { - throw std::logic_error("Only one of -m and -rg is allowed."); - } - - std::vector supportedDevices = { - "CPU", - "GPU", - "GNA_AUTO", - "GNA_HW", - "GNA_SW_EXACT", - "GNA_SW", - "GNA_SW_FP32", - "HETERO:GNA,CPU", - "HETERO:GNA_HW,CPU", - "HETERO:GNA_SW_EXACT,CPU", - "HETERO:GNA_SW,CPU", - "HETERO:GNA_SW_FP32,CPU", - "MYRIAD" - }; - - if (std::find(supportedDevices.begin(), supportedDevices.end(), FLAGS_d) == supportedDevices.end()) { - throw std::logic_error("Specified device is not supported."); - } - - float scaleFactorInput = static_cast(FLAGS_sf); - if (scaleFactorInput <= 0.0f) { - throw std::logic_error("Scale factor out of range (must be non-negative)."); - } - - uint32_t batchSize = (uint32_t) FLAGS_bs; - if ((batchSize < 1) || (batchSize > 8)) { - throw std::logic_error("Batch size out of range (1..8)."); - } - - /** default is a static quantisation **/ - if ((FLAGS_q.compare("static") != 0) && (FLAGS_q.compare("dynamic") != 0) && (FLAGS_q.compare("user") != 0)) { - throw std::logic_error("Quantization mode not supported (static, dynamic, user)."); - } - - if (FLAGS_q.compare("dynamic") == 0) { - throw std::logic_error("Dynamic quantization not yet supported."); - } - - if (FLAGS_qb != 16 && FLAGS_qb != 8) { - throw std::logic_error("Only 8 or 16 bits supported."); - } - - if (FLAGS_nthreads <= 0) { - throw std::logic_error("Invalid value for 'nthreads' argument. It must be greater that or equal to 0"); - } - - if (FLAGS_cw_r < 0) { - throw std::logic_error("Invalid value for 'cw_r' argument. It must be greater than or equal to 0"); - } - - if (FLAGS_cw_l < 0) { - throw std::logic_error("Invalid value for 'cw_l' argument. It must be greater than or equal to 0"); - } - - // RH decoder parameters - if (FLAGS_hmm.empty()) { - throw std::logic_error("RH HMM model file not set. Please use -hmm."); - } - if (FLAGS_labels.empty()) { - throw std::logic_error("RH labels file not set. Please use -labels."); - } - if (FLAGS_g.empty()) { - throw std::logic_error("RH LM: G.fst model file not set. Please use -g."); - } - if (FLAGS_cl.empty()) { - throw std::logic_error("RH LM: CL.fst model file not set. Please use -cl."); - } - - return true; -} - -uint8_t* ReadBinaryFile(const char* filename, uint32_t* size) { - if (nullptr == size) { - throw std::logic_error("Size parameter is null"); - } - - FILE * f = fopen(filename, "rb"); - if (!f) { - throw std::runtime_error("Failed to open binary file " + std::string(filename)); - } - - int32_t res = fseek(f, 0, SEEK_END); - if (res != 0) { - fclose(f); - throw std::runtime_error("Error occured while loading (fseek) file " + std::string(filename)); - } - - auto fileSize = ftell(f); - if (fileSize < 0) { - fclose(f); - throw std::runtime_error("Error occured while loading (ftell) file " + std::string(filename)); - return nullptr; - } - - res = fseek(f, 0, SEEK_SET); - uint8_t* data = new (std::nothrow) uint8_t[fileSize]; - if (!data) { - fclose(f); - throw std::runtime_error("Not enough memory to load file " + std::string(filename)); - } - - *size = fread(data, 1, fileSize, f); - fclose(f); - - if (*size != fileSize) { - delete[] data; - throw std::runtime_error("Could not read all the data from file " + std::string(filename)); - } - - return data; -} - -void InitializeRhDecoder(RhDecoderInstanceParams& instanceParams, int32_t scoreVectorSize) { - uint32_t hmm_size = 0; - uint32_t cl_size = 0; - uint32_t g_size = 0; - uint32_t label_size = 0; - - instanceParams.hmm_data = ReadBinaryFile(FLAGS_hmm.c_str(), &hmm_size); - instanceParams.cl_data = ReadBinaryFile(FLAGS_cl.c_str(), &cl_size); - instanceParams.g_data = ReadBinaryFile(FLAGS_g.c_str(), &g_size); - instanceParams.label_data = ReadBinaryFile(FLAGS_labels.c_str(), &label_size); - - if (instanceParams.hmm_data && instanceParams.cl_data && - instanceParams.g_data && instanceParams.label_data) { - RhDecoderStatus status = RhDecoderCreateInstance(&instanceParams.handle); - - do { - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to create decoder"); - } - - status = RhDecoderSetDefaultParameterValues(instanceParams.handle, - RhAcousticModelType::RH_ACOUSTIC_MODEL_TYPE_GENERIC_CHAIN); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to set default decoder values"); - } - - // now overwrite some of the parameters - float acoustic_scale_factor = static_cast(FLAGS_amsf); - status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_ACOUSTIC_SCALE_FACTOR, - &acoustic_scale_factor, sizeof(float)); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to set parameter acoustic_scale_factor value"); - } - - status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_ACOUSTIC_SCORE_VECTOR_SIZE, - &scoreVectorSize, sizeof(int)); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to set parameter score_vector_size value"); - } - - float beam_width = static_cast(FLAGS_beam_width); - status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_BEAM_WIDTH, - &beam_width, sizeof(float)); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to set parameter beam_width value"); - } - - status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_NBEST, - &FLAGS_nbest, sizeof(int)); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to set parameter nbest value"); - } - - status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_G_CACHE_LOG_SIZE, - &FLAGS_gcls, sizeof(int)); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to set parameter g_cache_log_size value"); - } - - status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_TRACE_BACK_LOG_SIZE, - &FLAGS_tbls, sizeof(int)); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to set parameter trace_back_log_size value"); - } - - status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_MIN_STABLE_FRAMES, - &FLAGS_msf, sizeof(int)); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to set parameter min_stable_frames value"); - } - - status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_TOKEN_BUFFER_SIZE, - &FLAGS_tbs, sizeof(int)); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to set parameter token_buffer_size value"); - } - - status = RhDecoderSetupResource(instanceParams.handle, - RhResourceType::HMM, instanceParams.hmm_data, hmm_size); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to load HMM model"); - } - - status = RhDecoderSetupResource(instanceParams.handle, - RhResourceType::PRONUNCIATION_MODEL, instanceParams.cl_data, cl_size); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to load pronunciation model"); - } - - status = RhDecoderSetupResource(instanceParams.handle, - RhResourceType::LANGUAGE_MODEL, instanceParams.g_data, g_size); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to load language model"); - } - - status = RhDecoderSetupResource(instanceParams.handle, - RhResourceType::LABELS, instanceParams.label_data, label_size); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to load labels"); - } - - status = RhDecoderInitInstance(instanceParams.handle); - if (RhDecoderStatus::RH_DECODER_SUCCESS != status) { - throw std::logic_error("Failed to initialize decoder"); - } - } while (0); - } else { - throw std::logic_error("Failed to read one of the resources"); - } -} - -void FreeRhDecoder(RhDecoderInstanceParams& instanceParams) { - if (instanceParams.handle) { - RhDecoderStatus status = RhDecoderFreeInstance(instanceParams.handle); - if (status != RH_DECODER_SUCCESS) { - slog::err << "Failed to free decoder. Status: " << status << slog::endl; - throw std::logic_error("Failed to free decoder. Status: " + std::to_string(status)); - } - } - - if (instanceParams.hmm_data) { - delete[] instanceParams.hmm_data; - instanceParams.hmm_data = nullptr; - } - - if (instanceParams.cl_data) { - delete[] instanceParams.cl_data; - instanceParams.cl_data = nullptr; - } - - if (instanceParams.g_data) { - delete[] instanceParams.g_data; - instanceParams.g_data = nullptr; - } - - if (instanceParams.label_data) { - delete[] instanceParams.label_data; - instanceParams.label_data = nullptr; - } -} - -/** - * @brief The entry point for inference engine automatic speech recognition sample - * @file speech_sample/main.cpp - * @example speech_sample/main.cpp - */ -int main(int argc, char *argv[]) { - try { - slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl; - - // ------------------------------ Parsing and validation of input args --------------------------------- - if (!ParseAndCheckCommandLine(argc, argv)) { - return 0; - } - - if (FLAGS_l.empty()) { - slog::info << "No extensions provided" << slog::endl; - } - - auto isFeature = [&](const std::string xFeature) { return FLAGS_d.find(xFeature) != std::string::npos; }; - - bool useGna = isFeature("GNA"); - bool useHetero = isFeature("HETERO"); - std::string deviceStr = - useHetero && useGna ? "HETERO:GNA,CPU" : FLAGS_d.substr(0, (FLAGS_d.find("_"))); - float scaleFactorInput = static_cast(FLAGS_sf); - uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : (uint32_t) FLAGS_bs; - - std::vector inputArkFiles; - std::vector numBytesThisUtterance; - uint32_t numUtterances(0); - if (!FLAGS_i.empty()) { - std::string outStr; - std::istringstream stream(FLAGS_i); - - uint32_t currentNumUtterances(0), currentNumBytesThisUtterance(0); - while (getline(stream, outStr, ',')) { - std::string filename(fileNameNoExt(outStr) + ".ark"); - inputArkFiles.push_back(filename); - - GetKaldiArkInfo(filename.c_str(), 0, ¤tNumUtterances, ¤tNumBytesThisUtterance); - if (numUtterances == 0) { - numUtterances = currentNumUtterances; - } else if (currentNumUtterances != numUtterances) { - throw std::logic_error("Incorrect input files. Number of utterance must be the same for all ark files"); - } - numBytesThisUtterance.push_back(currentNumBytesThisUtterance); - } - } - size_t numInputArkFiles(inputArkFiles.size()); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 1. Load inference engine ------------------------------------- - slog::info << "Loading Inference Engine" << slog::endl; - Core ie; - - /** Printing device version **/ - slog::info << "Device info: " << slog::endl; - std::cout << ie.GetVersions(deviceStr) << std::endl; - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ - slog::info << "Loading network files" << slog::endl; - - CNNNetReader netBuilder; - if (!FLAGS_m.empty()) { - /** Read network model **/ - netBuilder.ReadNetwork(FLAGS_m); - - /** Extract model name and load weights **/ - std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin"; - netBuilder.ReadWeights(binFileName); - - // ------------------------------------------------------------------------------------------------- - - // --------------------------- 3. Set batch size --------------------------------------------------- - /** Set batch size. Unlike in imaging, batching in time (rather than space) is done for speech recognition. **/ - netBuilder.getNetwork().setBatchSize(batchSize); - slog::info << "Batch size is " << std::to_string(netBuilder.getNetwork().getBatchSize()) - << slog::endl; - } - - /** Setting parameter for per layer metrics **/ - std::map gnaPluginConfig; - std::map genericPluginConfig; - if (useGna) { - std::string gnaDevice = - useHetero ? FLAGS_d.substr(FLAGS_d.find("GNA"), FLAGS_d.find(",") - FLAGS_d.find("GNA")) : FLAGS_d; - gnaPluginConfig[GNAConfigParams::KEY_GNA_DEVICE_MODE] = - gnaDevice.find("_") == std::string::npos ? "GNA_AUTO" : gnaDevice; - } - - if (FLAGS_pc) { - genericPluginConfig[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES; - } - - if (FLAGS_q.compare("user") == 0) { - if (numInputArkFiles > 1) { - std::string errMessage("Incorrect use case for multiple input ark files. Please don't use -q 'user' for this case."); - throw std::logic_error(errMessage); - } - slog::info << "Using scale factor of " << FLAGS_sf << slog::endl; - gnaPluginConfig[GNA_CONFIG_KEY(SCALE_FACTOR)] = std::to_string(FLAGS_sf); - } else { - // "static" quantization with calculated scale factor - for (size_t i = 0; i < numInputArkFiles; i++) { - auto inputArkName = inputArkFiles[i].c_str(); - std::string name; - std::vector ptrFeatures; - uint32_t numArrays(0), numBytes(0), numFrames(0), numFrameElements(0), numBytesPerElement(0); - GetKaldiArkInfo(inputArkName, 0, &numArrays, &numBytes); - ptrFeatures.resize(numBytes); - LoadKaldiArkArray(inputArkName, - 0, - name, - ptrFeatures, - &numFrames, - &numFrameElements, - &numBytesPerElement); - scaleFactorInput = - ScaleFactorForQuantization(ptrFeatures.data(), MAX_VAL_2B_FEAT, numFrames * numFrameElements); - slog::info << "Using scale factor of " << scaleFactorInput << " calculated from first utterance." - << slog::endl; - std::string scaleFactorConfigKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i); - gnaPluginConfig[scaleFactorConfigKey] = std::to_string(scaleFactorInput); - } - } - - if (FLAGS_qb == 8) { - gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I8"; - } else { - gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I16"; - } - - gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads); - gnaPluginConfig[GNA_CONFIG_KEY(COMPACT_MODE)] = CONFIG_VALUE(NO); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 4. Write model to file -------------------------------------------------- - // Embedded GNA model dumping (for Intel(R) Speech Enabling Developer Kit) - if (!FLAGS_we.empty()) { - gnaPluginConfig[GNAConfigParams::KEY_GNA_FIRMWARE_MODEL_IMAGE] = FLAGS_we; - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 5. Loading model to the device ------------------------------------------ - - if (useGna) { - genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig)); - } - auto t0 = Time::now(); - ExecutableNetwork executableNet; - - if (!FLAGS_m.empty()) { - slog::info << "Loading model to the device" << slog::endl; - executableNet = ie.LoadNetwork(netBuilder.getNetwork(), deviceStr, genericPluginConfig); - } else { - slog::info << "Importing model to the device" << slog::endl; - executableNet = ie.ImportNetwork(FLAGS_rg.c_str(), deviceStr, genericPluginConfig); - } - - ms loadTime = std::chrono::duration_cast(Time::now() - t0); - slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl; - - // --------------------------- 6. Exporting gna model using InferenceEngine AOT API--------------------- - if (!FLAGS_wg.empty()) { - slog::info << "Writing GNA Model to file " << FLAGS_wg << slog::endl; - t0 = Time::now(); - executableNet.Export(FLAGS_wg); - ms exportTime = std::chrono::duration_cast(Time::now() - t0); - slog::info << "Exporting time " << exportTime.count() << " ms" << slog::endl; - return 0; - } - - if (!FLAGS_we.empty()) { - slog::info << "Exported GNA embedded model to file " << FLAGS_we << slog::endl; - return 0; - } - - std::vector inferRequests((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads); - for (auto& inferRequest : inferRequests) { - inferRequest = {executableNet.CreateInferRequest(), -1, batchSize}; - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 7. Prepare input blobs -------------------------------------------------- - /** Taking information about all topology inputs **/ - ConstInputsDataMap cInputInfo = executableNet.GetInputsInfo(); - /** Stores all input blobs data **/ - if (cInputInfo.size() != numInputArkFiles) { - throw std::logic_error("Number of network inputs(" - + std::to_string(cInputInfo.size()) + ") is not equal to number of ark files(" - + std::to_string(numInputArkFiles) + ")"); - } - - std::vector ptrInputBlobs; - for (auto& input : cInputInfo) { - ptrInputBlobs.push_back(inferRequests.begin()->inferRequest.GetBlob(input.first)); - } - - InputsDataMap inputInfo; - if (!FLAGS_m.empty()) { - inputInfo = netBuilder.getNetwork().getInputsInfo(); - } - /** configure input precision if model loaded from IR **/ - for (auto &item : inputInfo) { - Precision inputPrecision = Precision::FP32; // specify Precision::I16 to provide quantized inputs - item.second->setPrecision(inputPrecision); - item.second->getInputData()->setLayout(Layout::NC); // row major layout - } - - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 8. Prepare output blobs ------------------------------------------------- - ConstOutputsDataMap cOutputInfo(executableNet.GetOutputsInfo()); - OutputsDataMap outputInfo; - if (!FLAGS_m.empty()) { - outputInfo = netBuilder.getNetwork().getOutputsInfo(); - } - - Blob::Ptr ptrOutputBlob = inferRequests.begin()->inferRequest.GetBlob(cOutputInfo.rbegin()->first); - - for (auto &item : outputInfo) { - DataPtr outData = item.second; - if (!outData) { - throw std::logic_error("output data pointer is not valid"); - } - - Precision outputPrecision = Precision::FP32; // specify Precision::I32 to retrieve quantized outputs - outData->setPrecision(outputPrecision); - outData->setLayout(Layout::NC); // row major layout - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 9. Initialize RH decoder ------------------------------------------------ - - RhDecoderInstanceParams rhDecoderInstanceParams{ nullptr }; - auto lastLayerOutputCount = outputInfo.begin()->second->getDims()[1]; - InitializeRhDecoder(rhDecoderInstanceParams, lastLayerOutputCount); - - // allocate 1MB for result - std::vector rh_utterance_transcription(1024 * 1024); - - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 10. Do inference -------------------------------------------------------- - - std::vector> ptrUtterances; - std::vector ptrScores; - std::vector ptrReferenceScores; - score_error_t frameError, totalError; - - ptrUtterances.resize(inputArkFiles.size()); - for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) { - std::map utterancePerfMap; - std::string uttName; - uint32_t numFrames(0), n(0); - std::vector numFrameElementsInput; - - uint32_t numFramesReference(0), numFrameElementsReference(0), numBytesPerElementReference(0), - numBytesReferenceScoreThisUtterance(0); - const uint32_t numScoresPerFrame = ptrOutputBlob->size() / batchSize; - - numFrameElementsInput.resize(numInputArkFiles); - for (size_t i = 0; i < inputArkFiles.size(); i++) { - std::vector ptrUtterance; - auto inputArkFilename = inputArkFiles[i].c_str(); - uint32_t currentNumFrames(0), currentNumFrameElementsInput(0), currentNumBytesPerElementInput(0); - GetKaldiArkInfo(inputArkFilename, utteranceIndex, &n, &numBytesThisUtterance[i]); - ptrUtterance.resize(numBytesThisUtterance[i]); - LoadKaldiArkArray(inputArkFilename, - utteranceIndex, - uttName, - ptrUtterance, - ¤tNumFrames, - ¤tNumFrameElementsInput, - ¤tNumBytesPerElementInput); - if (numFrames == 0) { - numFrames = currentNumFrames; - } else if (numFrames != currentNumFrames) { - std::string errMessage("Number of frames in ark files is different: " + std::to_string(numFrames) + - " and " + std::to_string(currentNumFrames)); - throw std::logic_error(errMessage); - } - - ptrUtterances[i] = ptrUtterance; - numFrameElementsInput[i] = currentNumFrameElementsInput; - } - - int i = 0; - for (auto& ptrInputBlob : ptrInputBlobs) { - if (ptrInputBlob->size() != numFrameElementsInput[i++] * batchSize) { - throw std::logic_error("network input size(" + std::to_string(ptrInputBlob->size()) + - ") mismatch to ark file size (" + - std::to_string(numFrameElementsInput[i-1] * batchSize) + ")"); - } - } - - ptrScores.resize(numFrames * numScoresPerFrame * sizeof(float)); - if (!FLAGS_r.empty()) { - std::string refUtteranceName; - GetKaldiArkInfo(FLAGS_r.c_str(), utteranceIndex, &n, &numBytesReferenceScoreThisUtterance); - ptrReferenceScores.resize(numBytesReferenceScoreThisUtterance); - LoadKaldiArkArray(FLAGS_r.c_str(), - utteranceIndex, - refUtteranceName, - ptrReferenceScores, - &numFramesReference, - &numFrameElementsReference, - &numBytesPerElementReference); - } - - double totalTime = 0.0; - - std::cout << "Utterance " << utteranceIndex << ": " << std::endl; - - ClearScoreError(&totalError); - totalError.threshold = frameError.threshold = MAX_SCORE_DIFFERENCE; - auto outputFrame = &ptrScores.front(); - std::vector inputFrame; - for (auto& ut : ptrUtterances) { - inputFrame.push_back(&ut.front()); - } - - std::map callPerfMap; - - size_t frameIndex = 0; - uint32_t numFramesArkFile = numFrames; - numFrames += FLAGS_cw_l + FLAGS_cw_r; - uint32_t numFramesThisBatch{batchSize}; - - auto t0 = Time::now(); - auto t1 = t0; - - while (frameIndex <= numFrames) { - if (frameIndex == numFrames) { - if (std::find_if(inferRequests.begin(), - inferRequests.end(), - [&](InferRequestStruct x) { return (x.frameIndex != -1); } ) == inferRequests.end()) { - break; - } - } - - bool inferRequestFetched = false; - for (auto &inferRequest : inferRequests) { - if (frameIndex == numFrames) { - numFramesThisBatch = 1; - } else { - numFramesThisBatch = (numFrames - frameIndex < batchSize) ? (numFrames - frameIndex) - : batchSize; - } - - if (inferRequest.frameIndex != -1) { - StatusCode code = inferRequest.inferRequest.Wait( - InferenceEngine::IInferRequest::WaitMode::RESULT_READY); - - if (code != StatusCode::OK) { - if (!useHetero) continue; - if (code != StatusCode::INFER_NOT_STARTED) continue; - } - - if (inferRequest.frameIndex >= 0) { - Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.rbegin()->first); - RhDecoderInfo info; - const float* acoustic_score_vector_index = outputBlob->buffer(); - - for (uint32_t f = 0; f < inferRequest.numFramesThisBatch; ++f) { - RhDecoderStatus rh_status = RhDecoderProcessFrame(rhDecoderInstanceParams.handle, - acoustic_score_vector_index, numScoresPerFrame, &info); - if (RhDecoderStatus::RH_DECODER_SUCCESS != rh_status) { - throw std::logic_error( - "Decoder failed to process frame: " + std::to_string(inferRequest.frameIndex)); - } - if (info.is_result_stable || inferRequest.frameIndex + f == numFrames - 1) { - RhDecoderGetResult(rhDecoderInstanceParams.handle, - RhDecoderResultType::RH_DECODER_FINAL_RESULT, - rh_utterance_transcription.data(), - rh_utterance_transcription.size()); - if (RhDecoderStatus::RH_DECODER_SUCCESS != rh_status) { - throw std::logic_error("Failed to retrieve speech recognition result"); - } - - std::cout << uttName << "\t" << rh_utterance_transcription.data() << std::endl; - } - - acoustic_score_vector_index += lastLayerOutputCount; - } - - if (!FLAGS_o.empty()) { - outputFrame = - &ptrScores.front() + numScoresPerFrame * sizeof(float) * (inferRequest.frameIndex); - Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.rbegin()->first); - auto byteSize = inferRequest.numFramesThisBatch * numScoresPerFrame * sizeof(float); - std::memcpy(outputFrame, - outputBlob->buffer(), - byteSize); - } - - if (!FLAGS_r.empty()) { - Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.begin()->first); - CompareScores(outputBlob->buffer().as(), - &ptrReferenceScores[inferRequest.frameIndex * - numFrameElementsReference * - numBytesPerElementReference], - &frameError, - inferRequest.numFramesThisBatch, - numFrameElementsReference); - UpdateScoreError(&frameError, &totalError); - } - if (FLAGS_pc) { - // retrive new counters - getPerformanceCounters(inferRequest.inferRequest, callPerfMap); - // summarize retrived counters with all previous - sumPerformanceCounters(callPerfMap, utterancePerfMap); - } - } - } - - if (frameIndex == numFrames) { - inferRequest.frameIndex = -1; - continue; - } - - ptrInputBlobs.clear(); - for (auto& input : cInputInfo) { - ptrInputBlobs.push_back(inferRequest.inferRequest.GetBlob(input.first)); - } - - for (size_t i = 0; i < numInputArkFiles; ++i) { - std::memcpy(ptrInputBlobs[i]->buffer(), - inputFrame[i], - ptrInputBlobs[i]->byteSize()); - } - - int index = static_cast(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r); - inferRequest.inferRequest.StartAsync(); - inferRequest.frameIndex = index < 0 ? -2 : index; - inferRequest.numFramesThisBatch = numFramesThisBatch; - - frameIndex += numFramesThisBatch; - for (size_t j = 0; j < inputArkFiles.size(); j++) { - if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) { - int i = frameIndex - FLAGS_cw_l; - if (i > 0 && i < static_cast(numFramesArkFile)) { - inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; - } else if (i >= static_cast(numFramesArkFile)) { - inputFrame[j] = &ptrUtterances[0].front() + - (numFramesArkFile - 1) * sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; - } else if (i < 0) { - inputFrame[j] = &ptrUtterances[0].front(); - } - } else { - inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; - } - } - inferRequestFetched |= true; - } - - if (!inferRequestFetched) { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - continue; - } - } - t1 = Time::now(); - - fsec fs = t1 - t0; - ms d = std::chrono::duration_cast(fs); - totalTime += d.count(); - - // resetting state between utterances - for (auto &&state : executableNet.QueryState()) { - state.Reset(); - } - - if (!FLAGS_o.empty()) { - bool shouldAppend = (utteranceIndex == 0) ? false : true; - SaveKaldiArkArray(FLAGS_o.c_str(), shouldAppend, uttName, &ptrScores.front(), - numFrames, numScoresPerFrame); - } - - /** Show performance results **/ - std::cout << "Total time in Infer (HW and SW):\t" << totalTime << " ms" - << std::endl; - std::cout << "Frames in utterance:\t\t\t" << numFrames << " frames" - << std::endl; - std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast(numFrames) << " ms" - << std::endl; - if (FLAGS_pc) { - // print - printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d)); - } - if (!FLAGS_r.empty()) { - printReferenceCompareResults(totalError, numFrames, std::cout); - } - std::cout << "End of Utterance " << utteranceIndex << std::endl << std::endl; - } - - FreeRhDecoder(rhDecoderInstanceParams); - // ----------------------------------------------------------------------------------------------------- - } - catch (const std::exception &error) { - slog::err << error.what() << slog::endl; - return 1; - } - catch (...) { - slog::err << "Unknown/internal exception happened" << slog::endl; - return 1; - } - - slog::info << "Execution successful" << slog::endl; - return 0; -} diff --git a/inference-engine/samples/speech_recognition_offline_demo/speech_sample.hpp b/inference-engine/samples/speech_recognition_offline_demo/speech_sample.hpp deleted file mode 100644 index cc3a4e2..0000000 --- a/inference-engine/samples/speech_recognition_offline_demo/speech_sample.hpp +++ /dev/null @@ -1,251 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include - - -/// @brief message for help argument -static const char help_message[] = "Print a usage message."; - -/// @brief message for images argument -static const char input_message[] = "Required. Paths to an .ark files. Example of usage: or ."; -/// @brief message for model argument -static const char model_message[] = "Required. Path to an .xml file with a trained model (required if -rg is missing)."; - -/// @brief message for plugin argument -static const char plugin_message[] = "Plugin name. For example MKLDNNPlugin. If this parameter is pointed, " \ - "the sample will look for this plugin only"; - -/// @brief message for assigning cnn calculation to device -static const char target_device_message[] = "Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_FP32 " - "GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU" - " as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look " - "for a suitable plugin for device specified."; - -/// @brief message for performance counters -static const char performance_counter_message[] = "Enables per-layer performance report"; - -/// @brief message for user library argument -static const char custom_cpu_library_message[] = "Required for MKLDNN (CPU)-targeted custom layers." \ -"Absolute path to a shared library with the kernels impl."; - -/// @brief message for score output argument -static const char output_message[] = "Output file name (default name is scores.ark)."; - -/// @brief message for reference score file argument -static const char reference_score_message[] = "Read reference score .ark file and compare scores."; - -/// @brief message for read GNA model argument -static const char read_gna_model_message[] = "Read GNA model from file using path/filename provided (required if -m is missing)."; - -/// @brief message for write GNA model argument -static const char write_gna_model_message[] = "Write GNA model to file using path/filename provided."; - -/// @brief message for write GNA embedded model argument -static const char write_embedded_model_message[] = "Write GNA embedded model to file using path/filename provided."; - -/// @brief message for quantization argument -static const char quantization_message[] = "Input quantization mode: static (default), dynamic, or user (use with -sf)."; - -/// @brief message for quantization bits argument -static const char quantization_bits_message[] = "Weight bits for quantization: 8 or 16 (default)"; - -/// @brief message for scale factor argument -static const char scale_factor_message[] = "Optional user-specified input scale factor for quantization (use with -q user)."; - -/// @brief message for batch size argument -static const char batch_size_message[] = "Batch size 1-8 (default 1)"; - -/// @brief message for #threads for CPU inference -static const char infer_num_threads_message[] = "Optional. Number of threads to use for concurrent async" \ -" inference requests on the GNA."; - -/// @brief message for context window argument -static const char context_window_message_l[] = "Optional. Number of frames for left context windows (default is 0). " \ - "Works only with context window networks." - " If you use the cw_l or cw_r flag, then batch size and nthreads arguments are ignored."; - -/// @brief message for right context window argument -static const char context_window_message_r[] = "Optional. Number of frames for right context windows (default is 0). " \ - "Works only with context window networks." - " If you use the cw_r or cw_l flag, then batch size and nthreads arguments are ignored."; - -/// @brief message for RH HMM model argument -static const char rh_hmm_model_message[] = "Required. Path to RH .hmm file."; - -/// @brief message for RH model argument -static const char rh_labels_message[] = "Required. Path to RH labels file."; - -/// @brief message for RH LM: G model argument -static const char rh_g_model_message[] = "Required. Path to RH LM: G .fst model file."; - -/// @brief message for RH LM: CL model argument -static const char rh_cl_model_message[] = "Required. Path to RH LM: CL .fst model file."; - -/// @brief message for RH acoustic model scale factor argument -static const char rh_am_scale_factor_message[] = "Optional. RH acoustic model scale factor."; - -/// @brief message for RH beam width argument -static const char rh_beam_width_message[] = "Optional. RH beam width."; - -/// @brief message for RH N-best result argument -static const char rh_nbest_message[] = "Optional. RH N-best results."; - -/// @brief message for RH G-cache log size argument -static const char rh_g_cache_log_size_message[] = "Optional. RH G-cache log size."; - -/// @brief message for RH trace back log size argument -static const char rh_trace_back_log_size_message[] = "Optional. RH trace back log size."; - -/// @brief message for RH minimum number of stable frames to attribute result as final -static const char rh_min_stable_frames_message[] = "Optional. Minimum number of stable frames to attribute result as final."; - -/// @brief message for RH token buffer size argument -static const char rh_token_buffer_size_message[] = "Optional. RH token buffer size."; - - -/// @brief Define flag for showing help message
-DEFINE_bool(h, false, help_message); - -/// @brief Define parameter for set image file
-/// It is a required parameter -DEFINE_string(i, "", input_message); - -/// @brief Define parameter for set model file
-/// It is a required parameter -DEFINE_string(m, "", model_message); - -/// @brief Define parameter for set plugin name
-/// It is a required parameter -DEFINE_string(p, "", plugin_message); - -/// @brief device the target device to infer on
-DEFINE_string(d, "GNA_AUTO", target_device_message); - -/// @brief Enable per-layer performance report -DEFINE_bool(pc, false, performance_counter_message); - -/// @brief Absolute path to CPU library with user layers
-/// It is a optional parameter -DEFINE_string(l, "", custom_cpu_library_message); - -/// @brief Write model to file (model.bin) -DEFINE_string(o, "", output_message); - -/// @brief Read reference score file -DEFINE_string(r, "", reference_score_message); - -/// @brief Read GNA model from file (model.bin) -DEFINE_string(rg, "", read_gna_model_message); - -/// @brief Write GNA model to file (model.bin) -DEFINE_string(wg, "", write_gna_model_message); - -/// @brief Write GNA embedded model to file (model.bin) -DEFINE_string(we, "", write_embedded_model_message); - -/// @brief Input quantization mode (default static) -DEFINE_string(q, "static", quantization_message); - -/// @brief Input quantization bits (default 16) -DEFINE_int32(qb, 16, quantization_bits_message); - -/// @brief Scale factor for quantization (default 1.0) -DEFINE_double(sf, 1.0, scale_factor_message); - -/// @brief Batch size (default 1) -DEFINE_int32(bs, 1, batch_size_message); - -/// @brief Number of threads to use for inference on the CPU (also affects Hetero cases) -DEFINE_int32(nthreads, 1, infer_num_threads_message); - -/// @brief Right context window size (default 0) -DEFINE_int32(cw_r, 0, context_window_message_r); - -/// @brief Left context window size (default 0) -DEFINE_int32(cw_l, 0, context_window_message_l); - -/// @brief Define parameter for set RH HMM model file -/// It is a required parameter -DEFINE_string(hmm, "rh.hmm", rh_hmm_model_message); - -/// @brief Define parameter for set RH labels file -/// It is a required parameter -DEFINE_string(labels, "labels.bin", rh_labels_message); - -/// @brief Define parameter for set RH LM: G model file -/// It is a required parameter -DEFINE_string(g, "g.fst", rh_g_model_message); - -/// @brief Define parameter for set RH LM: CL model file -/// It is a required parameter -DEFINE_string(cl, "cl.fst", rh_cl_model_message); - -/// @brief RH Acoustic model scale factor (default 1.0) -DEFINE_double(amsf, 1.0, rh_am_scale_factor_message); - -/// @brief RH beam width (default 14.0) -DEFINE_double(beam_width, 14.0, rh_beam_width_message); - -/// @brief RH N-best (default 1) -DEFINE_int32(nbest, 1, rh_nbest_message); - -/// @brief RH G cache log size (default 19) -DEFINE_int32(gcls, 19, rh_g_cache_log_size_message); - -/// @brief RH trace back log size (default 19) -DEFINE_int32(tbls, 19, rh_trace_back_log_size_message); - -/// @brief RH minimum stable frames (default -1) -DEFINE_int32(msf, -1, rh_min_stable_frames_message); - -/// @brief RH token buffer size (default 150000) -DEFINE_int32(tbs, 150000, rh_token_buffer_size_message); - - -/** - * @brief This function show a help message - */ -static void showUsage() { - std::cout << std::endl; - std::cout << "speech_recognition_offline_demo [OPTION]" << std::endl; - std::cout << "Options:" << std::endl; - std::cout << std::endl; - std::cout << " -h " << help_message << std::endl; - std::cout << " -i \"\" " << input_message << std::endl; - std::cout << " -m \"\" " << model_message << std::endl; - std::cout << " -o \"\" " << output_message << std::endl; - std::cout << " -l \"\" " << custom_cpu_library_message << std::endl; - std::cout << " -d \"\" " << target_device_message << std::endl; - std::cout << " -p " << plugin_message << std::endl; - std::cout << " -pc " << performance_counter_message << std::endl; - std::cout << " -q \"\" " << quantization_message << std::endl; - std::cout << " -qb \"\" " << quantization_bits_message << std::endl; - std::cout << " -sf \"\" " << scale_factor_message << std::endl; - std::cout << " -bs \"\" " << batch_size_message << std::endl; - std::cout << " -r \"\" " << reference_score_message << std::endl; - std::cout << " -rg \"\" " << read_gna_model_message << std::endl; - std::cout << " -wg \"\" " << write_gna_model_message << std::endl; - std::cout << " -we \"\" " << write_embedded_model_message << std::endl; - std::cout << " -nthreads \"\" " << infer_num_threads_message << std::endl; - std::cout << " -cw_l \"\" " << context_window_message_l << std::endl; - std::cout << " -cw_r \"\" " << context_window_message_r << std::endl; - std::cout << " -hmm \"\" " << rh_hmm_model_message << std::endl; - std::cout << " -labels \"\" " << rh_labels_message << std::endl; - std::cout << " -g \"\" " << rh_g_model_message << std::endl; - std::cout << " -cl \"\" " << rh_cl_model_message << std::endl; - std::cout << " -amsf \"\" " << rh_am_scale_factor_message << std::endl; - std::cout << " -beam_width \"\" " << rh_beam_width_message << std::endl; - std::cout << " -nbest \"\" " << rh_nbest_message << std::endl; - std::cout << " -gcls \"\" " << rh_g_cache_log_size_message << std::endl; - std::cout << " -tbls \"\" " << rh_trace_back_log_size_message << std::endl; - std::cout << " -msf \"\" " << rh_min_stable_frames_message << std::endl; - std::cout << " -tbs \"\" " << rh_token_buffer_size_message << std::endl; -} - -- 2.7.4