From 6564fe6a30aefa0f4659cbef9aa70b1a8e6ce250 Mon Sep 17 00:00:00 2001 From: Suyeon Hwang Date: Wed, 28 Feb 2024 17:48:53 +0900 Subject: [PATCH] Add new structure for saving synthesis parameter - Requirements: The app needs to play each utterance with different configuration such as pitch, speed, volume, or background volume. - Contents: This patch adds new structure type for saving synthesis parameter. This new type will be used for a member of speak_data_s. Through this change, each speak_data_s can store each synthesis parameter and can make difference playback utterance using that synthesis parameter. Change-Id: I58ea401a14d807a1087275dee0dbac7d029a2c91 Signed-off-by: Suyeon Hwang --- server/ttsd_data.cpp | 28 ++++++++++++++++++++-------- server/ttsd_data.h | 12 +++++++++--- server/ttsd_main.h | 1 + server/ttsd_server.c | 16 +++++++++------- 4 files changed, 39 insertions(+), 18 deletions(-) diff --git a/server/ttsd_data.cpp b/server/ttsd_data.cpp index 25d5208..1026589 100644 --- a/server/ttsd_data.cpp +++ b/server/ttsd_data.cpp @@ -197,10 +197,17 @@ int ttsd_data_new_client(int pid, unsigned int uid, ttsd_mode_e mode, ttsd_playi static inline void destroy_speak_data(speak_data_s* speak_data) { SLOG(LOG_DEBUG, tts_tag(), "[DEBUG] utt(%d), text(%s), lang(%s), vctype(%d) speed(%d)", - speak_data->utt_id, speak_data->text, speak_data->lang, speak_data->vctype, speak_data->speed); + speak_data->utt_id, speak_data->text, speak_data->voice.language, speak_data->voice.type, + speak_data->parameter.speed); free(speak_data->text); - free(speak_data->lang); + speak_data->text = nullptr; + + free(speak_data->voice.language); + speak_data->voice.language = nullptr; + + free(speak_data->voice.ptts_id); + speak_data->voice.ptts_id = nullptr; delete speak_data; } @@ -486,10 +493,14 @@ speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language } speak_data->text = strdup(text); - speak_data->lang = strdup(language); - - speak_data->vctype = voice_type; - speak_data->speed = speed; + speak_data->voice.language = strdup(language); + speak_data->voice.ptts_id = nullptr; + speak_data->voice.type = voice_type; + + speak_data->parameter.speed = speed; + speak_data->parameter.pitch = 0; + speak_data->parameter.volume = -1.0; + speak_data->parameter.bg_volume = -1.0; speak_data->utt_id = utt_id; return speak_data; @@ -525,7 +536,7 @@ int ttsd_data_add_speak_data(unsigned int uid, speak_data_s* data) } SLOG(LOG_ERROR, tts_tag(), "[DATA][%p] utt_id(%d), text(%s), lang(%s), vctype(%d), speed(%d)", - data, data->utt_id, data->text, data->lang, data->vctype, data->speed); + data, data->utt_id, data->text, data->voice.language, data->voice.type, data->parameter.speed); if (1 == data->utt_id) app_data->utt_id_stopped = 0; @@ -983,7 +994,8 @@ int ttsd_data_save_error_log(unsigned int uid, FILE* fp) i = 0; for (auto& speakData : app_data->m_speak_data) { SLOG(LOG_DEBUG, tts_tag(), "[%dth][%p] lang(%s), vctype(%d), speed(%d), uttid(%d), text(%s)", - i, speakData, speakData->lang, speakData->vctype, speakData->speed, speakData->utt_id, speakData->text); + i, speakData, speakData->voice.language, speakData->voice.type, speakData->parameter.speed, + speakData->utt_id, speakData->text); i++; } fprintf(fp, "---------------------"); diff --git a/server/ttsd_data.h b/server/ttsd_data.h index 9d22b00..80dade9 100644 --- a/server/ttsd_data.h +++ b/server/ttsd_data.h @@ -44,11 +44,17 @@ typedef enum { } ttsd_synthesis_control_e; typedef struct { + int speed; + int pitch; + double volume; + double bg_volume; +} synthesis_parameter_s; + +typedef struct { int utt_id; char* text; - char* lang; - int vctype; - int speed; + voice_s voice; + synthesis_parameter_s parameter; } speak_data_s; typedef struct { diff --git a/server/ttsd_main.h b/server/ttsd_main.h index c39559a..b834494 100644 --- a/server/ttsd_main.h +++ b/server/ttsd_main.h @@ -85,6 +85,7 @@ typedef struct { typedef struct { char* language; + char* ptts_id; int type; } voice_s; diff --git a/server/ttsd_server.c b/server/ttsd_server.c index 19e263f..1bdefeb 100644 --- a/server/ttsd_server.c +++ b/server/ttsd_server.c @@ -122,13 +122,13 @@ static void write_debugger_information(speak_data_s *speak_data, const char *app char* temp_lang = NULL; int temp_type; - if (true != ttsd_engine_select_valid_voice(speak_data->lang, speak_data->vctype, &temp_lang, &temp_type)) { + if (true != ttsd_engine_select_valid_voice(speak_data->voice.language, speak_data->voice.type, &temp_lang, + &temp_type)) { SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to select default voice"); } else { char value_buffer[2048] = {0, }; - snprintf(value_buffer, 2048, - "client=%s|type=%d|speed=%d|language=%s|text=%s|error=%d", - appid, temp_type, speak_data->speed, temp_lang, speak_data->text, ret); + snprintf(value_buffer, 2048, "client=%s|type=%d|speed=%d|language=%s|text=%s|error=%d", appid, temp_type, + speak_data->parameter.speed, temp_lang, speak_data->text, ret); SLOG(LOG_INFO, tts_tag(), "keyVal : %s", value_buffer); @@ -166,7 +166,7 @@ static void __synthesis(unsigned int uid) SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to get app id"); } - if (NULL == speak_data->lang || NULL == speak_data->text) { + if (NULL == speak_data->voice.language || NULL == speak_data->text) { SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Current data is NOT valid"); __stop_and_send_ready_state(uid); ttsd_data_destroy_speak_data(speak_data); @@ -181,13 +181,15 @@ static void __synthesis(unsigned int uid) SLOG(LOG_INFO, tts_tag(), "-----------------------------------------------------------"); SLOG(LOG_INFO, tts_tag(), "ID : uid (%u), uttid(%d) ", g_utt.uid, g_utt.uttid); - SLOG(LOG_INFO, tts_tag(), "Voice : langauge(%s), type(%d), speed(%d)", speak_data->lang, speak_data->vctype, speak_data->speed); + SLOG(LOG_INFO, tts_tag(), "Voice : language(%s), type(%d), speed(%d)", speak_data->voice.language, + speak_data->voice.type, speak_data->parameter.speed); SLOG(LOG_INFO, tts_tag(), "Text : %s", speak_data->text); SLOG(LOG_INFO, tts_tag(), "Credential : %s", credential); SLOG(LOG_INFO, tts_tag(), "-----------------------------------------------------------"); ttsd_data_set_synth_control(TTSD_SYNTHESIS_CONTROL_DOING); - ret = ttsd_engine_start_synthesis(speak_data->lang, speak_data->vctype, speak_data->text, speak_data->speed, appid, credential, NULL); + ret = ttsd_engine_start_synthesis(speak_data->voice.language, speak_data->voice.type, speak_data->text, + speak_data->parameter.speed, appid, credential, NULL); if (TTSD_ERROR_NONE != ret) { SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] * FAIL to start SYNTHESIS !!!! * "); -- 2.7.4