From 25cdea85adf69fef515464ee7ba5884a427a9d1b Mon Sep 17 00:00:00 2001 From: "sungwook79.park" Date: Tue, 30 Apr 2024 14:01:24 +0900 Subject: [PATCH] Implement IPC in server side for synthesis parameter APIs Change-Id: Ie5994f9abc6166547b7765da4ab2f43a61ebb072 Signed-off-by: sungwook79.park --- include/ttse.h | 4 +- server/ttsd_data.cpp | 4 +- server/ttsd_data.h | 2 +- server/ttsd_engine_agent.c | 16 ++++++-- server/ttsd_engine_agent.h | 2 +- server/ttsd_server.c | 99 ++++++++++++++++++++++++++++++++++++++++++++-- server/ttsd_server.h | 2 + server/ttsd_tidl.c | 16 ++++++++ 8 files changed, 132 insertions(+), 13 deletions(-) diff --git a/include/ttse.h b/include/ttse.h index e14a596..62ee3df 100755 --- a/include/ttse.h +++ b/include/ttse.h @@ -291,7 +291,9 @@ typedef bool (*ttse_need_app_credential_cb)(void); * For example, "ko_KR" for Korean, "en_US" for American English * @param[in] type The voice type * @param[in] text Texts +* @param[in] ptts_id The id of personal TTS * @param[in] speed The speed of speaking +* @param[in] pitch The pitch of speaking * @param[in] appid The Application ID * @param[in] credential The credential granted to the application * @param[in] user_data The user data which must be passed to ttse_send_result() @@ -309,7 +311,7 @@ typedef bool (*ttse_need_app_credential_cb)(void); * @see ttse_cancel_synthesis_cb() * @see ttse_need_app_credential_cb() */ -typedef int (*ttse_start_synthesis_cb)(const char* language, int type, const char* text, int speed, const char* appid, const char* credential, void* user_data); +typedef int (*ttse_start_synthesis_cb)(const char* language, int type, const char* text, const char* ptts_id, int speed, int pitch, const char* appid, const char* credential, void* user_data); /** diff --git a/server/ttsd_data.cpp b/server/ttsd_data.cpp index aa9985f..b12c314 100644 --- a/server/ttsd_data.cpp +++ b/server/ttsd_data.cpp @@ -483,7 +483,7 @@ int ttsd_data_reset_used_voice(unsigned int uid, ttsd_used_voice_cb callback) return TTSD_ERROR_NONE; } -speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language, int voice_type, int speed, int utt_id, bool is_silent, unsigned int duration_in_msec) +speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language, int voice_type, int speed, int pitch, double volume, double background_volume_ratio, int utt_id, bool is_silent, unsigned int duration_in_msec) { speak_data_s* speak_data = new speak_data_s(); if (nullptr == speak_data) { @@ -509,7 +509,7 @@ speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language speak_data->voice.type = voice_type; speak_data->synth_parameter.speed = speed; - speak_data->synth_parameter.pitch = 0; + speak_data->synth_parameter.pitch = pitch; speak_data->synth_parameter.volume = -1.0; speak_data->synth_parameter.background_volume_ratio = -1.0; diff --git a/server/ttsd_data.h b/server/ttsd_data.h index 38209e3..73fc7eb 100644 --- a/server/ttsd_data.h +++ b/server/ttsd_data.h @@ -99,7 +99,7 @@ int ttsd_data_set_credential(unsigned int uid, const char* credential); char* ttsd_data_get_credential(unsigned int uid); /* speak data */ -speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language, int voice_type, int speed, int utt_id, bool is_silent, unsigned int duration_in_msec); +speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language, int voice_type, int speed, int pitch, double volume, double bg_volume, int utt_id, bool is_silent, unsigned int duration_in_msec); void ttsd_data_destroy_speak_data(speak_data_s* speak_data); diff --git a/server/ttsd_engine_agent.c b/server/ttsd_engine_agent.c index 1e3dd5f..a1999ae 100644 --- a/server/ttsd_engine_agent.c +++ b/server/ttsd_engine_agent.c @@ -932,7 +932,7 @@ int ttsd_engine_unload_voice(const char* lang, const int vctype) return TTSD_ERROR_NONE; } -int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text, int speed, const char* appid, const char* credential, void* user_param) +int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text, const char* ptts_id, int speed, int pitch, const char* appid, const char* credential, void* user_param) { if (NULL == lang || NULL == text) { SLOG(LOG_ERROR, tts_tag(), "[Engine Agent ERROR] Invalid parameter"); @@ -956,8 +956,8 @@ int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text, } return TTSD_ERROR_INVALID_VOICE; } else { - SECURE_SLOG(LOG_DEBUG, tts_tag(), "[Engine Agent] Start synthesis : language(%s), type(%d), speed(%d), text(%s), credential(%s)", - (NULL == temp_lang) ? "NULL" : temp_lang, temp_type, speed, (NULL == text) ? "NULL" : text, (NULL == credential) ? "NULL" : credential); + SECURE_SLOG(LOG_DEBUG, tts_tag(), "[Engine Agent] Start synthesis : language(%s), type(%d), ptts_id(%s), speed(%d), pitch(%d), text(%s), credential(%s)", + (NULL == temp_lang) ? "NULL" : temp_lang, temp_type, (NULL == ptts_id) ? "NULL" : ptts_id, speed, pitch, (NULL == text) ? "NULL" : text, (NULL == credential) ? "NULL" : credential); } int temp_speed; @@ -968,8 +968,16 @@ int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text, temp_speed = speed; } + int temp_pitch; + + if (0 == pitch) { + temp_pitch = g_engine_info->default_pitch; + } else { + temp_pitch = pitch; + } + /* synthesize text */ - ret = g_engine_info->callbacks->start_synth(temp_lang, temp_type, text, temp_speed, appid, credential, user_param); + ret = g_engine_info->callbacks->start_synth(temp_lang, temp_type, text, ptts_id, temp_speed, temp_pitch, appid, credential, user_param); if (0 != ret) { SLOG(LOG_ERROR, tts_tag(), "[Engine Agent ERROR] ***************************************"); SLOG(LOG_ERROR, tts_tag(), "[Engine Agent ERROR] * synthesize error : %s *", __ttsd_get_engine_error_code(ret)); diff --git a/server/ttsd_engine_agent.h b/server/ttsd_engine_agent.h index 29a4fe8..f435c5f 100644 --- a/server/ttsd_engine_agent.h +++ b/server/ttsd_engine_agent.h @@ -78,7 +78,7 @@ int ttsd_engine_load_voice(const char* lang, int vctype); int ttsd_engine_unload_voice(const char* lang, int vctype); -int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text, int speed, const char* appid, const char* credential, void* user_param); +int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text, const char* ptts_id, int speed, int pitch, const char* appid, const char* credential, void* user_param); int ttsd_engine_cancel_synthesis(); diff --git a/server/ttsd_server.c b/server/ttsd_server.c index f158175..6640634 100644 --- a/server/ttsd_server.c +++ b/server/ttsd_server.c @@ -200,8 +200,8 @@ static void __synthesis(unsigned int uid) SLOG(LOG_INFO, tts_tag(), "-----------------------------------------------------------"); ttsd_data_set_synth_control(TTSD_SYNTHESIS_CONTROL_DOING); - ret = ttsd_engine_start_synthesis(speak_data->voice.language, speak_data->voice.type, speak_data->text, - speak_data->synth_parameter.speed, appid, credential, NULL); + ret = ttsd_engine_start_synthesis(speak_data->voice.language, speak_data->voice.type, speak_data->text, speak_data->voice.ptts_id, + speak_data->synth_parameter.speed, speak_data->synth_parameter.pitch, appid, credential, NULL); if (TTSD_ERROR_NONE != ret) { SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] * FAIL to start SYNTHESIS !!!! * "); @@ -1090,7 +1090,98 @@ int ttsd_server_add_text(unsigned int uid, const char* text, const char* lang, i return TTSD_ERROR_INVALID_VOICE; } - speak_data_s* speak_data = ttsd_data_create_speak_data(text, lang, voice_type, speed, utt_id, false, 0); + speak_data_s* speak_data = ttsd_data_create_speak_data(text, lang, voice_type, speed, 0, -1.0, -1.0, utt_id, false, 0); + if (NULL == speak_data) { + SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to allocate memory"); + if (NULL != temp_lang) { + free(temp_lang); + temp_lang = NULL; + } + + return TTSD_ERROR_OPERATION_FAILED; + } + + SLOG(LOG_INFO, tts_tag(), "[Server] Add queue, lang(%s), vctype(%d), speed(%d), uttid(%d), credential(%s)", lang, voice_type, speed, utt_id, credential); + + /* if state is APP_STATE_READY , APP_STATE_PAUSED , only need to add speak data to queue*/ + int ret = -1; + ret = ttsd_data_add_speak_data(uid, speak_data); + if (0 != ret) { + SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to add speak data"); + if (NULL != temp_lang) { + free(temp_lang); + temp_lang = NULL; + } + + ttsd_data_destroy_speak_data(speak_data); + speak_data = NULL; + + return ret; + } + + if (0 != ttsd_data_set_used_voice(uid, temp_lang, temp_type)) { + /* Request load voice */ + SLOG(LOG_DEBUG, tts_tag(), "[Server] Request to load voice"); + if (0 != ttsd_engine_load_voice(temp_lang, temp_type)) { + SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to load voice"); + } + } + + if (NULL != temp_lang) { + free(temp_lang); + temp_lang = NULL; + } + + if (APP_STATE_PLAYING == state) { + /* check if engine use network */ + if (ttsd_engine_agent_need_network()) { + if (false == __is_connected_to_network()) { + SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Disconnect network. Current engine needs network."); + return TTSD_ERROR_OPERATION_FAILED; + } + } + + ttsd_data_set_credential(uid, credential); + + /* Check whether tts-engine is running or not */ + ttsd_synthesis_control_e synth_control = ttsd_data_get_synth_control(); + SLOG(LOG_INFO, tts_tag(), "[Server INFO] synth_control(%d)", synth_control); + if (TTSD_SYNTHESIS_CONTROL_DOING == synth_control) { + SLOG(LOG_WARN, tts_tag(), "[Server WARNING] Engine has already been running."); + } else { + __synthesis(uid); + } + } + + return TTSD_ERROR_NONE; +} + +int ttsd_server_add_text_with_synthesis_parameter(unsigned int uid, const char* text, const char* lang, const char* ptts_id, int voice_type, int speed, int pitch, double volume, double background_volume_ratio, int utt_id, const char* credential) +{ + app_tts_state_e state = ttsd_data_get_client_state(uid); + if (APP_STATE_NONE == state) { + SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] ttsd_server_add_text : uid is not valid"); + return TTSD_ERROR_INVALID_PARAMETER; + } + + /* check valid voice */ + char* temp_lang = NULL; + int temp_type; + if (true != ttsd_engine_select_valid_voice((const char*)lang, voice_type, &temp_lang, &temp_type)) { + SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to select valid voice"); + if (NULL != temp_lang) { + free(temp_lang); + temp_lang = NULL; + } + return TTSD_ERROR_INVALID_VOICE; + } + + if (NULL == temp_lang) { + SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to select valid voice : result lang is NULL"); + return TTSD_ERROR_INVALID_VOICE; + } + + speak_data_s* speak_data = ttsd_data_create_speak_data(text, lang, voice_type, speed, pitch, volume, background_volume_ratio, utt_id, false, 0); if (NULL == speak_data) { SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to allocate memory"); if (NULL != temp_lang) { @@ -1164,7 +1255,7 @@ int ttsd_server_add_silent_utterance(unsigned int uid, unsigned int duration_in_ return TTSD_ERROR_INVALID_PARAMETER; } - speak_data_s* speak_data = ttsd_data_create_speak_data(NULL, NULL, 0, 0, utt_id, true, duration_in_msec); + speak_data_s* speak_data = ttsd_data_create_speak_data(NULL, NULL, 0, 0, 0, -1.0, -1.0, utt_id, true, duration_in_msec); if (NULL == speak_data) { SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to allocate memory"); return TTSD_ERROR_OPERATION_FAILED; diff --git a/server/ttsd_server.h b/server/ttsd_server.h index fa24958..f04998c 100644 --- a/server/ttsd_server.h +++ b/server/ttsd_server.h @@ -63,6 +63,8 @@ int ttsd_server_add_text(unsigned int uid, const char* text, const char* lang, i int ttsd_server_add_silent_utterance(unsigned int uid, unsigned int duration_in_msec, int utt_id, const char* credential); +int ttsd_server_add_text_with_synthesis_parameter(unsigned int uid, const char* text, const char* lang, const char* ptts_id, int voice_type, int speed, int pitch, double volume, double background_volume_ratio, int utt_id, const char* credential); + int ttsd_server_play(unsigned int uid, const char* credential); int ttsd_server_stop(unsigned int uid); diff --git a/server/ttsd_tidl.c b/server/ttsd_tidl.c index 9a84a9a..fcc2d5f 100644 --- a/server/ttsd_tidl.c +++ b/server/ttsd_tidl.c @@ -351,6 +351,21 @@ static int __add_silent_utterance_cb(rpc_port_stub_tts_context_h context, int ui return TTSD_ERROR_NONE; } +static int __add_text_with_synthesis_parameter_cb(rpc_port_stub_tts_context_h context, int uid, const char *text, const char* language, const char* ptts_id, int voice_type, int speed, int pitch, double volume, double background_volume_ratio, int uttid, const char *credential, void *user_data) +{ + unsigned int u_uid = (unsigned int)uid; + SLOG(LOG_DEBUG, tts_tag(), ">>>>> TTS ADD TEXT WITH SYNTHESIS PARAMETER(%u)", u_uid); + + int ret = ttsd_server_add_text_with_synthesis_parameter(u_uid, text, language, ptts_id, voice_type, speed, pitch, volume, background_volume_ratio, uttid, credential); + if (TTSD_ERROR_NONE != ret) { + SLOG(LOG_ERROR, tts_tag(), "[ERROR] TTS ADD TEXT WITH SYNTHESIS PARAMETER (%u) fail (%d/%s) <<<<<", u_uid, ret, get_error_message(ret)); + return ret; + } + + SLOG(LOG_DEBUG, tts_tag(), "<<<<<"); + return TTSD_ERROR_NONE; +} + static int __stop_cb(rpc_port_stub_tts_context_h context, int uid, void *user_data) { unsigned int u_uid = (unsigned int)uid; @@ -509,6 +524,7 @@ int ttsd_tidl_open_connection() g_callback.finalize = __finalize_cb; g_callback.add_text = __add_text_cb; g_callback.add_silent_utterance = __add_silent_utterance_cb; + g_callback.add_text_with_synthesis_parameter = __add_text_with_synthesis_parameter_cb; g_callback.stop = __stop_cb; g_callback.pause = __pause_cb; g_callback.play_pcm = __play_pcm_cb; -- 2.7.4