Implement IPC in server side for synthesis parameter APIs 02/310502/3
authorsungwook79.park <sungwook79.park@samsung.com>
Tue, 30 Apr 2024 05:01:24 +0000 (14:01 +0900)
committersungwook79.park <sungwook79.park@samsung.com>
Mon, 20 May 2024 01:08:14 +0000 (10:08 +0900)
Change-Id: Ie5994f9abc6166547b7765da4ab2f43a61ebb072
Signed-off-by: sungwook79.park <sungwook79.park@samsung.com>
include/ttse.h
server/ttsd_data.cpp
server/ttsd_data.h
server/ttsd_engine_agent.c
server/ttsd_engine_agent.h
server/ttsd_server.c
server/ttsd_server.h
server/ttsd_tidl.c

index e14a596..62ee3df 100755 (executable)
@@ -291,7 +291,9 @@ typedef bool (*ttse_need_app_credential_cb)(void);
 *                     For example, "ko_KR" for Korean, "en_US" for American English
 * @param[in] type The voice type
 * @param[in] text Texts
+* @param[in] ptts_id The id of personal TTS
 * @param[in] speed The speed of speaking
+* @param[in] pitch The pitch of speaking
 * @param[in] appid The Application ID
 * @param[in] credential The credential granted to the application
 * @param[in] user_data The user data which must be passed to ttse_send_result()
@@ -309,7 +311,7 @@ typedef bool (*ttse_need_app_credential_cb)(void);
 * @see ttse_cancel_synthesis_cb()
 * @see ttse_need_app_credential_cb()
 */
-typedef int (*ttse_start_synthesis_cb)(const char* language, int type, const char* text, int speed, const char* appid, const char* credential, void* user_data);
+typedef int (*ttse_start_synthesis_cb)(const char* language, int type, const char* text, const char* ptts_id, int speed, int pitch, const char* appid, const char* credential, void* user_data);
 
 
 /**
index aa9985f..b12c314 100644 (file)
@@ -483,7 +483,7 @@ int ttsd_data_reset_used_voice(unsigned int uid, ttsd_used_voice_cb callback)
        return TTSD_ERROR_NONE;
 }
 
-speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language, int voice_type, int speed, int utt_id, bool is_silent, unsigned int duration_in_msec)
+speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language, int voice_type, int speed, int pitch, double volume, double background_volume_ratio, int utt_id, bool is_silent, unsigned int duration_in_msec)
 {
        speak_data_s* speak_data = new speak_data_s();
        if (nullptr == speak_data) {
@@ -509,7 +509,7 @@ speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language
        speak_data->voice.type = voice_type;
 
        speak_data->synth_parameter.speed = speed;
-       speak_data->synth_parameter.pitch = 0;
+       speak_data->synth_parameter.pitch = pitch;
        speak_data->synth_parameter.volume = -1.0;
        speak_data->synth_parameter.background_volume_ratio = -1.0;
 
index 38209e3..73fc7eb 100644 (file)
@@ -99,7 +99,7 @@ int ttsd_data_set_credential(unsigned int uid, const char* credential);
 char* ttsd_data_get_credential(unsigned int uid);
 
 /* speak data */
-speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language, int voice_type, int speed, int utt_id, bool is_silent, unsigned int duration_in_msec);
+speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language, int voice_type, int speed, int pitch, double volume, double bg_volume, int utt_id, bool is_silent, unsigned int duration_in_msec);
 
 void ttsd_data_destroy_speak_data(speak_data_s* speak_data);
 
index 1e3dd5f..a1999ae 100644 (file)
@@ -932,7 +932,7 @@ int ttsd_engine_unload_voice(const char* lang, const int vctype)
        return TTSD_ERROR_NONE;
 }
 
-int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text, int speed, const char* appid, const char* credential, void* user_param)
+int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text, const char* ptts_id, int speed, int pitch, const char* appid, const char* credential, void* user_param)
 {
        if (NULL == lang || NULL == text) {
                SLOG(LOG_ERROR, tts_tag(), "[Engine Agent ERROR] Invalid parameter");
@@ -956,8 +956,8 @@ int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text,
                }
                return TTSD_ERROR_INVALID_VOICE;
        } else {
-               SECURE_SLOG(LOG_DEBUG, tts_tag(), "[Engine Agent] Start synthesis : language(%s), type(%d), speed(%d), text(%s), credential(%s)",
-                       (NULL == temp_lang) ? "NULL" : temp_lang, temp_type, speed, (NULL == text) ? "NULL" : text, (NULL == credential) ? "NULL" : credential);
+               SECURE_SLOG(LOG_DEBUG, tts_tag(), "[Engine Agent] Start synthesis : language(%s), type(%d), ptts_id(%s), speed(%d), pitch(%d), text(%s), credential(%s)",
+                       (NULL == temp_lang) ? "NULL" : temp_lang, temp_type, (NULL == ptts_id) ? "NULL" : ptts_id, speed, pitch, (NULL == text) ? "NULL" : text, (NULL == credential) ? "NULL" : credential);
        }
 
        int temp_speed;
@@ -968,8 +968,16 @@ int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text,
                temp_speed = speed;
        }
 
+       int temp_pitch;
+
+       if (0 == pitch) {
+               temp_pitch = g_engine_info->default_pitch;
+       } else {
+               temp_pitch = pitch;
+       }
+
        /* synthesize text */
-       ret = g_engine_info->callbacks->start_synth(temp_lang, temp_type, text, temp_speed, appid, credential, user_param);
+       ret = g_engine_info->callbacks->start_synth(temp_lang, temp_type, text, ptts_id, temp_speed, temp_pitch, appid, credential, user_param);
        if (0 != ret) {
                SLOG(LOG_ERROR, tts_tag(), "[Engine Agent ERROR] ***************************************");
                SLOG(LOG_ERROR, tts_tag(), "[Engine Agent ERROR] * synthesize error : %s *", __ttsd_get_engine_error_code(ret));
index 29a4fe8..f435c5f 100644 (file)
@@ -78,7 +78,7 @@ int ttsd_engine_load_voice(const char* lang, int vctype);
 
 int ttsd_engine_unload_voice(const char* lang, int vctype);
 
-int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text, int speed, const char* appid, const char* credential, void* user_param);
+int ttsd_engine_start_synthesis(const char* lang, int vctype, const char* text, const char* ptts_id, int speed, int pitch, const char* appid, const char* credential, void* user_param);
 
 int ttsd_engine_cancel_synthesis();
 
index f158175..6640634 100644 (file)
@@ -200,8 +200,8 @@ static void __synthesis(unsigned int uid)
                SLOG(LOG_INFO, tts_tag(), "-----------------------------------------------------------");
 
                ttsd_data_set_synth_control(TTSD_SYNTHESIS_CONTROL_DOING);
-               ret = ttsd_engine_start_synthesis(speak_data->voice.language, speak_data->voice.type, speak_data->text,
-                                                                               speak_data->synth_parameter.speed, appid, credential, NULL);
+               ret = ttsd_engine_start_synthesis(speak_data->voice.language, speak_data->voice.type, speak_data->text, speak_data->voice.ptts_id,
+                                                                               speak_data->synth_parameter.speed, speak_data->synth_parameter.pitch, appid, credential, NULL);
                if (TTSD_ERROR_NONE != ret) {
                        SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] * FAIL to start SYNTHESIS !!!! * ");
 
@@ -1090,7 +1090,98 @@ int ttsd_server_add_text(unsigned int uid, const char* text, const char* lang, i
                return TTSD_ERROR_INVALID_VOICE;
        }
 
-       speak_data_s* speak_data = ttsd_data_create_speak_data(text, lang, voice_type, speed, utt_id, false, 0);
+       speak_data_s* speak_data = ttsd_data_create_speak_data(text, lang, voice_type, speed, 0, -1.0, -1.0, utt_id, false, 0);
+       if (NULL == speak_data) {
+               SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to allocate memory");
+               if (NULL != temp_lang) {
+                       free(temp_lang);
+                       temp_lang = NULL;
+               }
+
+               return TTSD_ERROR_OPERATION_FAILED;
+       }
+
+       SLOG(LOG_INFO, tts_tag(), "[Server] Add queue, lang(%s), vctype(%d), speed(%d), uttid(%d), credential(%s)", lang, voice_type, speed, utt_id, credential);
+
+       /* if state is APP_STATE_READY , APP_STATE_PAUSED , only need to add speak data to queue*/
+       int ret = -1;
+       ret = ttsd_data_add_speak_data(uid, speak_data);
+       if (0 != ret) {
+               SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to add speak data");
+               if (NULL != temp_lang) {
+                       free(temp_lang);
+                       temp_lang = NULL;
+               }
+
+               ttsd_data_destroy_speak_data(speak_data);
+               speak_data = NULL;
+
+               return ret;
+       }
+
+       if (0 != ttsd_data_set_used_voice(uid, temp_lang, temp_type)) {
+               /* Request load voice */
+               SLOG(LOG_DEBUG, tts_tag(), "[Server] Request to load voice");
+               if (0 != ttsd_engine_load_voice(temp_lang, temp_type)) {
+                       SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to load voice");
+               }
+       }
+
+       if (NULL != temp_lang) {
+               free(temp_lang);
+               temp_lang = NULL;
+       }
+
+       if (APP_STATE_PLAYING == state) {
+               /* check if engine use network */
+               if (ttsd_engine_agent_need_network()) {
+                       if (false == __is_connected_to_network()) {
+                               SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Disconnect network. Current engine needs network.");
+                               return TTSD_ERROR_OPERATION_FAILED;
+                       }
+               }
+
+               ttsd_data_set_credential(uid, credential);
+
+               /* Check whether tts-engine is running or not */
+               ttsd_synthesis_control_e synth_control = ttsd_data_get_synth_control();
+               SLOG(LOG_INFO, tts_tag(), "[Server INFO] synth_control(%d)", synth_control);
+               if (TTSD_SYNTHESIS_CONTROL_DOING == synth_control) {
+                       SLOG(LOG_WARN, tts_tag(), "[Server WARNING] Engine has already been running.");
+               } else {
+                       __synthesis(uid);
+               }
+       }
+
+       return TTSD_ERROR_NONE;
+}
+
+int ttsd_server_add_text_with_synthesis_parameter(unsigned int uid, const char* text, const char* lang, const char* ptts_id, int voice_type, int speed, int pitch, double volume, double background_volume_ratio, int utt_id, const char* credential)
+{
+       app_tts_state_e state = ttsd_data_get_client_state(uid);
+       if (APP_STATE_NONE == state) {
+               SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] ttsd_server_add_text : uid is not valid");
+               return TTSD_ERROR_INVALID_PARAMETER;
+       }
+
+       /* check valid voice */
+       char* temp_lang = NULL;
+       int temp_type;
+       if (true != ttsd_engine_select_valid_voice((const char*)lang, voice_type, &temp_lang, &temp_type)) {
+               SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to select valid voice");
+               if (NULL != temp_lang) {
+                       free(temp_lang);
+                       temp_lang = NULL;
+               }
+               return TTSD_ERROR_INVALID_VOICE;
+       }
+
+       if (NULL == temp_lang) {
+               SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to select valid voice : result lang is NULL");
+               return TTSD_ERROR_INVALID_VOICE;
+       }
+
+       speak_data_s* speak_data = ttsd_data_create_speak_data(text, lang, voice_type, speed, pitch, volume, background_volume_ratio, utt_id, false, 0);
        if (NULL == speak_data) {
                SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to allocate memory");
                if (NULL != temp_lang) {
@@ -1164,7 +1255,7 @@ int ttsd_server_add_silent_utterance(unsigned int uid, unsigned int duration_in_
                return TTSD_ERROR_INVALID_PARAMETER;
        }
 
-       speak_data_s* speak_data = ttsd_data_create_speak_data(NULL, NULL, 0, 0, utt_id, true, duration_in_msec);
+       speak_data_s* speak_data = ttsd_data_create_speak_data(NULL, NULL, 0, 0, 0, -1.0, -1.0, utt_id, true, duration_in_msec);
        if (NULL == speak_data) {
                SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to allocate memory");
                return TTSD_ERROR_OPERATION_FAILED;
index fa24958..f04998c 100644 (file)
@@ -63,6 +63,8 @@ int ttsd_server_add_text(unsigned int uid, const char* text, const char* lang, i
 
 int ttsd_server_add_silent_utterance(unsigned int uid, unsigned int duration_in_msec, int utt_id, const char* credential);
 
+int ttsd_server_add_text_with_synthesis_parameter(unsigned int uid, const char* text, const char* lang, const char* ptts_id, int voice_type, int speed, int pitch, double volume, double background_volume_ratio, int utt_id, const char* credential);
+
 int ttsd_server_play(unsigned int uid, const char* credential);
 
 int ttsd_server_stop(unsigned int uid);
index 9a84a9a..fcc2d5f 100644 (file)
@@ -351,6 +351,21 @@ static int __add_silent_utterance_cb(rpc_port_stub_tts_context_h context, int ui
        return TTSD_ERROR_NONE;
 }
 
+static int __add_text_with_synthesis_parameter_cb(rpc_port_stub_tts_context_h context, int uid, const char *text, const char* language, const char* ptts_id, int voice_type, int speed, int pitch, double volume, double background_volume_ratio, int uttid, const char *credential, void *user_data)
+{
+       unsigned int u_uid = (unsigned int)uid;
+       SLOG(LOG_DEBUG, tts_tag(), ">>>>> TTS ADD TEXT WITH SYNTHESIS PARAMETER(%u)", u_uid);
+
+       int ret = ttsd_server_add_text_with_synthesis_parameter(u_uid, text, language, ptts_id, voice_type, speed, pitch, volume, background_volume_ratio, uttid, credential);
+       if (TTSD_ERROR_NONE != ret) {
+               SLOG(LOG_ERROR, tts_tag(), "[ERROR] TTS ADD TEXT WITH SYNTHESIS PARAMETER (%u) fail (%d/%s) <<<<<", u_uid, ret, get_error_message(ret));
+               return ret;
+       }
+
+       SLOG(LOG_DEBUG, tts_tag(), "<<<<<");
+       return TTSD_ERROR_NONE;
+}
+
 static int __stop_cb(rpc_port_stub_tts_context_h context, int uid, void *user_data)
 {
        unsigned int u_uid = (unsigned int)uid;
@@ -509,6 +524,7 @@ int ttsd_tidl_open_connection()
        g_callback.finalize = __finalize_cb;
        g_callback.add_text = __add_text_cb;
        g_callback.add_silent_utterance = __add_silent_utterance_cb;
+       g_callback.add_text_with_synthesis_parameter = __add_text_with_synthesis_parameter_cb;
        g_callback.stop = __stop_cb;
        g_callback.pause = __pause_cb;
        g_callback.play_pcm = __play_pcm_cb;