Send pcm data as real time from file, which app wants to recognize the speech 72/285272/3
authorwn.jang <wn.jang@samsung.com>
Thu, 8 Dec 2022 08:36:21 +0000 (17:36 +0900)
committerwn.jang <wn.jang@samsung.com>
Fri, 9 Dec 2022 02:25:08 +0000 (11:25 +0900)
Change-Id: I941d16223458020a546575156a6f5f8917f1a626

client/stt.c
client/stt_dbus.c
client/stt_dbus.h
include/stt_internal.h
server/sttd_dbus_server.c
server/sttd_engine_agent.c
server/sttd_engine_agent.h
server/sttd_recorder.c
server/sttd_recorder.h
server/sttd_server.c
server/sttd_server.h

index e8dee79..dda250f 100644 (file)
@@ -2066,7 +2066,7 @@ int stt_unset_speech_status_cb(stt_h stt)
        return 0;
 }
 
-int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath, stt_audio_type_e audio_type, int sample_rate)
+int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate)
 {
        stt_client_s* client = NULL;
        int tmp = __stt_check_precondition(stt, &client);
@@ -2110,7 +2110,7 @@ int stt_start_file(stt_h stt, const char* language, const char* type, const char
        }
 
        client->internal_state = STT_INTERNAL_STATE_STARTING;
-       ret = stt_dbus_request_start_file(client->uid, temp, type, client->silence, appid, client->credential, filepath, audio_type, sample_rate);
+       ret = stt_dbus_request_start_file(client->uid, temp, type, client->silence, appid, client->credential, filepath, audio_type, channels, sample_rate);
        if (0 != ret) {
                SLOG(LOG_ERROR, TAG_STTC, "[ERROR] Fail to start file : %s", __stt_get_error_code(ret));
                client->internal_state = STT_INTERNAL_STATE_NONE;
index 1c022a0..dea99fb 100644 (file)
@@ -1661,7 +1661,7 @@ int stt_dbus_request_cancel(unsigned int uid)
 }
 
 //LCOV_EXCL_START
-int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath, stt_audio_type_e audio_type, int sample_rate)
+int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate)
 {
        if (NULL == lang || NULL == type || NULL == appid) {
                SLOG(LOG_ERROR, TAG_STTC, "Input parameter is NULL");
@@ -1681,7 +1681,7 @@ int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char*
                SLOG(LOG_ERROR, TAG_STTC, ">>>> stt start file : Fail to make message");
                return STT_ERROR_OPERATION_FAILED;
        } else {
-               SLOG(LOG_DEBUG, TAG_STTC, ">>>> stt start file : uid(%u), language(%s), type(%s), appid(%s), filepath(%s), audio_type(%d), sample_rate(%d)", uid, lang, type, appid, filepath, audio_type, sample_rate);
+               SLOG(LOG_DEBUG, TAG_STTC, ">>>> stt start file : uid(%u), language(%s), type(%s), appid(%s), filepath(%s), audio_type(%d), channels(%d), sample_rate(%d)", uid, lang, type, appid, filepath, audio_type, channels, sample_rate);
        }
 
        char *temp = NULL;
@@ -1700,6 +1700,7 @@ int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char*
                DBUS_TYPE_STRING, &temp,
                DBUS_TYPE_STRING, &filepath,
                DBUS_TYPE_INT32, &audio_type,
+               DBUS_TYPE_INT32, &channels,
                DBUS_TYPE_INT32, &sample_rate,
                DBUS_TYPE_INVALID);
 
index 980e301..65c4f15 100644 (file)
@@ -63,7 +63,7 @@ int stt_dbus_request_stop(unsigned int uid);
 
 int stt_dbus_request_cancel(unsigned int uid);
 
-int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath, stt_audio_type_e audio_type, int sample_rate);
+int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate);
 
 int stt_dbus_request_cancel_file(unsigned int uid);
 
index 01337a1..0104b1e 100644 (file)
@@ -98,6 +98,7 @@ int stt_set_server_stt(stt_h stt, const char* key, char* user_data);
  * @param[in] type The type for recognition (e.g. #STT_RECOGNITION_TYPE_FREE, #STT_RECOGNITION_TYPE_FREE_PARTIAL)
  * @param[in] filepath PCM filepath for recognition
  * @param[in] audio_type audio type of file
+ * @param[in] channel channel of file (e.g. 1 for mono, 2 for stereo)
  * @param[in] sample_rate sample rate of file
  * @return @c 0 on success,
  *         otherwise a negative error value
@@ -115,7 +116,7 @@ int stt_set_server_stt(stt_h stt, const char* key, char* user_data);
  * @see stt_cancel_file()
  * @see stt_state_changed_cb()
 */
-int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath, stt_audio_type_e audio_type, int sample_rate);
+int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate);
 
 /**
  * @brief Cancels processing file recognition asynchronously.
index f90eee6..f13f1e8 100644 (file)
@@ -906,6 +906,7 @@ int sttd_dbus_server_start_file(DBusConnection* conn, DBusMessage* msg)
        char* credential;
        char* filepath;
        stte_audio_type_e audio_type;
+       int channels;
        int sample_rate;
 
        int ret = STTD_ERROR_OPERATION_FAILED;
@@ -919,6 +920,7 @@ int sttd_dbus_server_start_file(DBusConnection* conn, DBusMessage* msg)
                DBUS_TYPE_STRING, &credential,
                DBUS_TYPE_STRING, &filepath,
                DBUS_TYPE_INT32, &audio_type,
+               DBUS_TYPE_INT32, &channels,
                DBUS_TYPE_INT32, &sample_rate,
                DBUS_TYPE_INVALID);
 
@@ -929,9 +931,9 @@ int sttd_dbus_server_start_file(DBusConnection* conn, DBusMessage* msg)
                dbus_error_free(&err);
                ret = STTD_ERROR_OPERATION_FAILED;
        } else {
-               SLOG(LOG_DEBUG, TAG_STTD, "[IN] stt start file : uid(%u), lang(%s), type(%s), silence(%d) appid(%s) filepath(%s), audio_type(%d), sample_rate(%d)"
-                       , uid, lang, type, silence, appid, filepath, audio_type, sample_rate);
-               ret = sttd_server_start_file(uid, lang, type, silence, appid, credential, filepath, audio_type, sample_rate);
+               SLOG(LOG_DEBUG, TAG_STTD, "[IN] stt start file : uid(%u), lang(%s), type(%s), silence(%d) appid(%s) filepath(%s), audio_type(%d), channels(%d), sample_rate(%d)"
+                       , uid, lang, type, silence, appid, filepath, audio_type, channels, sample_rate);
+               ret = sttd_server_start_file(uid, lang, type, silence, appid, credential, filepath, audio_type, channels, sample_rate);
        }
 
        if (0 <= ret) {
index 790602d..ef72d12 100644 (file)
@@ -698,7 +698,7 @@ int sttd_engine_agent_recognize_start_recorder(unsigned int uid, const char* app
        return 0;
 }
 
-int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath)
+int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate)
 {
        int tmp = __sttd_engine_agent_check_precondition();
        if (STTD_ERROR_NONE != tmp)
@@ -707,7 +707,7 @@ int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepat
        SLOG(LOG_INFO, TAG_STTD, "[Engine Agent] Start recorder");
 
        int ret;
-       ret = sttd_recorder_start_file(uid, filepath);
+       ret = sttd_recorder_start_file(uid, filepath, audio_type, channels, sample_rate);
        if (0 != ret) {
                SLOG(LOG_ERROR, TAG_STTD, "[Engine Agent ERROR] Fail to start recorder : result(%d)", ret);
                stt_engine_recognize_cancel();
index d5992a0..673f3a8 100644 (file)
@@ -98,7 +98,7 @@ int sttd_engine_agent_recognize_start_engine(unsigned int uid, const char* lang,
 
 int sttd_engine_agent_recognize_start_recorder(unsigned int uid, const char* appid);
 
-int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath);
+int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate);
 
 int sttd_engine_agent_set_recording_data(const void* data, unsigned int length);
 
index 4b141b7..1c29476 100644 (file)
@@ -850,7 +850,37 @@ int sttd_recorder_stop()
        return 0;
 }
 
-int sttd_recorder_start_file(unsigned int uid, const char *filepath)
+static int __calculate_time_of_pcm_data(int bytes, int sample_rate, int channels, int bit_per_sample)
+{
+       int time = 0;
+       if (0 == bytes || 0 == sample_rate || 0 == channels || 0 == bit_per_sample) {
+               SLOG(LOG_ERROR, TAG_STTD, "[Recorder ERROR] Invalid parameter");
+               return 0;
+       }
+
+       int sample_per_sec = sample_rate * channels * bit_per_sample / 8;
+       time = bytes * 1000 / sample_per_sec;
+       return time;
+}
+
+static int __calculate_bit_per_sample(stte_audio_type_e audio_type)
+{
+       int bit_per_sample = 0;
+       switch (audio_type) {
+       case STTE_AUDIO_TYPE_PCM_S16_LE:
+               bit_per_sample = 16;
+               break;
+       case STTE_AUDIO_TYPE_PCM_U8:
+               bit_per_sample = 8;
+               break;
+       default:
+               SLOG(LOG_ERROR, TAG_STTD, "[Recorder ERROR] Invalid audio type");
+               break;
+       }
+       return bit_per_sample;
+}
+
+int sttd_recorder_start_file(unsigned int uid, const char *filepath, stte_audio_type_e audio_type, int channels, int sample_rate)
 {
        if (STTD_RECORDER_STATE_RECORDING == g_recorder_state)
                return 0;
@@ -873,6 +903,11 @@ int sttd_recorder_start_file(unsigned int uid, const char *filepath)
                while (!feof(infile)) {
                        static char pcm_buff[BUFFER_LENGTH];
                        int read_byte = fread(pcm_buff, 1, BUFFER_LENGTH, infile);
+
+                       // sleep for real time of pcm data
+                       int time = __calculate_time_of_pcm_data(read_byte, sample_rate, channels, __calculate_bit_per_sample(audio_type));
+                       usleep(time * 1000);
+
                        totalReadBytes += read_byte;
                        if (0 != read_byte) {
                                if (0 != g_audio_cb(pcm_buff, read_byte)) {
index ab53dd1..13000da 100644 (file)
@@ -42,7 +42,7 @@ int sttd_recorder_start(unsigned int uid, const char* appid);
 
 int sttd_recorder_stop();
 
-int sttd_recorder_start_file(unsigned int uid, const char *filepath);
+int sttd_recorder_start_file(unsigned int uid, const char *filepath, stte_audio_type_e audio_type, int channels, int sample_rate);
 
 int sttd_recorder_stop_file();
 
index 580b080..70261ec 100644 (file)
@@ -1594,7 +1594,7 @@ int sttd_server_cancel(unsigned int uid)
 }
 
 int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential,
-                                                       const char* filepath, stte_audio_type_e audio_type, int sample_rate)
+                                                       const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate)
 {
        if (NULL == lang || NULL == recognition_type || NULL == filepath) {
                SLOG(LOG_ERROR, TAG_STTD, "[Server ERROR] Input parameter is NULL");
@@ -1613,7 +1613,7 @@ int sttd_server_start_file(unsigned int uid, const char* lang, const char* recog
        }
 
        /* engine start recognition */
-       SLOG(LOG_DEBUG, TAG_STTD, "[Server] start : uid(%u), lang(%s), recog_type(%s), appid(%s), file(%s), audio_type(%d), sample_rate(%d)", uid, lang, recognition_type, appid, filepath, audio_type, sample_rate);
+       SLOG(LOG_DEBUG, TAG_STTD, "[Server] start : uid(%u), lang(%s), recog_type(%s), appid(%s), file(%s), audio_type(%d), channels(%d), sample_rate(%d)", uid, lang, recognition_type, appid, filepath, audio_type, channels, sample_rate);
 
        /* 1. Set audio session */
        ret = sttd_recorder_set_audio_session();
@@ -1636,7 +1636,7 @@ int sttd_server_start_file(unsigned int uid, const char* lang, const char* recog
        sttdc_send_set_state(uid, APP_STATE_RECORDING);
 
        /* 3. Start to send pcm from file to engine */
-       ret = sttd_engine_agent_recognize_start_file(uid, filepath);
+       ret = sttd_engine_agent_recognize_start_file(uid, filepath, audio_type, channels, sample_rate);
        if (0 != ret) {
                stt_client_unset_current_recognition();
                sttd_recorder_unset_audio_session();
index 114e4b7..96ac484 100644 (file)
@@ -76,7 +76,7 @@ int sttd_server_stop(unsigned int uid);
 
 int sttd_server_cancel(unsigned int uid);
 
-int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential, const char* filepath, stte_audio_type_e audio_type, int sample_rate);
+int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential, const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate);
 
 int sttd_server_cancel_file(unsigned int uid);