Add new structure for saving synthesis parameter 56/306956/5
authorSuyeon Hwang <stom.hwang@samsung.com>
Wed, 28 Feb 2024 08:48:53 +0000 (17:48 +0900)
committersungwook park <sungwook79.park@samsung.com>
Thu, 25 Apr 2024 03:03:39 +0000 (03:03 +0000)
- Requirements:
The app needs to play each utterance with different configuration such
as pitch, speed, volume, or background volume.

- Contents:
This patch adds new structure type for saving synthesis parameter. This
new type will be used for a member of speak_data_s. Through this change,
each speak_data_s can store each synthesis parameter and can make
difference playback utterance using that synthesis parameter.

Change-Id: I58ea401a14d807a1087275dee0dbac7d029a2c91
Signed-off-by: Suyeon Hwang <stom.hwang@samsung.com>
server/ttsd_data.cpp
server/ttsd_data.h
server/ttsd_main.h
server/ttsd_server.c

index 25d5208e655cef1b18d6a6662338736b96dd929b..1026589f939cb353e503f93c5c0f059caea48511 100644 (file)
@@ -197,10 +197,17 @@ int ttsd_data_new_client(int pid, unsigned int uid, ttsd_mode_e mode, ttsd_playi
 static inline void destroy_speak_data(speak_data_s* speak_data)
 {
        SLOG(LOG_DEBUG, tts_tag(), "[DEBUG] utt(%d), text(%s), lang(%s), vctype(%d) speed(%d)",
-                       speak_data->utt_id, speak_data->text, speak_data->lang, speak_data->vctype, speak_data->speed);
+                speak_data->utt_id, speak_data->text, speak_data->voice.language, speak_data->voice.type,
+                speak_data->parameter.speed);
 
        free(speak_data->text);
-       free(speak_data->lang);
+       speak_data->text = nullptr;
+
+       free(speak_data->voice.language);
+       speak_data->voice.language = nullptr;
+
+       free(speak_data->voice.ptts_id);
+       speak_data->voice.ptts_id = nullptr;
 
        delete speak_data;
 }
@@ -486,10 +493,14 @@ speak_data_s* ttsd_data_create_speak_data(const char* text, const char* language
        }
 
        speak_data->text = strdup(text);
-       speak_data->lang = strdup(language);
-
-       speak_data->vctype = voice_type;
-       speak_data->speed = speed;
+       speak_data->voice.language = strdup(language);
+       speak_data->voice.ptts_id = nullptr;
+       speak_data->voice.type = voice_type;
+
+       speak_data->parameter.speed = speed;
+       speak_data->parameter.pitch = 0;
+       speak_data->parameter.volume = -1.0;
+       speak_data->parameter.bg_volume = -1.0;
        speak_data->utt_id = utt_id;
 
        return speak_data;
@@ -525,7 +536,7 @@ int ttsd_data_add_speak_data(unsigned int uid, speak_data_s* data)
        }
 
        SLOG(LOG_ERROR, tts_tag(), "[DATA][%p] utt_id(%d), text(%s), lang(%s), vctype(%d), speed(%d)",
-                       data, data->utt_id, data->text, data->lang, data->vctype, data->speed);
+                data, data->utt_id, data->text, data->voice.language, data->voice.type, data->parameter.speed);
 
        if (1 == data->utt_id)
                app_data->utt_id_stopped = 0;
@@ -983,7 +994,8 @@ int ttsd_data_save_error_log(unsigned int uid, FILE* fp)
        i = 0;
        for (auto& speakData : app_data->m_speak_data) {
                SLOG(LOG_DEBUG, tts_tag(), "[%dth][%p] lang(%s), vctype(%d), speed(%d), uttid(%d), text(%s)",
-                               i, speakData, speakData->lang, speakData->vctype, speakData->speed, speakData->utt_id, speakData->text);
+                        i, speakData, speakData->voice.language, speakData->voice.type, speakData->parameter.speed,
+                        speakData->utt_id, speakData->text);
                i++;
        }
        fprintf(fp, "---------------------");
index 9d22b00cb78bfd8185030fc60df72bd0e0ec53d8..80dade90356653dee9711ce93b7a607525967ad8 100644 (file)
@@ -43,12 +43,18 @@ typedef enum {
        TTSD_SYNTHESIS_CONTROL_EXPIRED  = 2
 } ttsd_synthesis_control_e;
 
+typedef struct {
+       int speed;
+       int pitch;
+       double volume;
+       double bg_volume;
+} synthesis_parameter_s;
+
 typedef struct {
        int utt_id;
        char* text;
-       char* lang;
-       int vctype;
-       int speed;
+       voice_s voice;
+       synthesis_parameter_s parameter;
 } speak_data_s;
 
 typedef struct {
index c39559ab5968e0fdb5f771efdb3450be7a9e3da4..b834494fa784684ffb8c8e1f1f54d3e093470b00 100644 (file)
@@ -85,6 +85,7 @@ typedef struct {
 
 typedef struct {
        char* language;
+       char* ptts_id;
        int type;
 } voice_s;
 
index 19e263f06fda8d871b6f7afa18735fffec601f3c..1bdefeb1b3de724ce8fd4abad04e26b5cc30b677 100644 (file)
@@ -122,13 +122,13 @@ static void write_debugger_information(speak_data_s *speak_data, const char *app
 
        char* temp_lang = NULL;
        int temp_type;
-       if (true != ttsd_engine_select_valid_voice(speak_data->lang, speak_data->vctype, &temp_lang, &temp_type)) {
+       if (true != ttsd_engine_select_valid_voice(speak_data->voice.language, speak_data->voice.type, &temp_lang,
+                                                                                          &temp_type)) {
                SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to select default voice");
        } else {
                char value_buffer[2048] = {0, };
-               snprintf(value_buffer, 2048,
-                               "client=%s|type=%d|speed=%d|language=%s|text=%s|error=%d",
-                               appid, temp_type, speak_data->speed, temp_lang, speak_data->text, ret);
+               snprintf(value_buffer, 2048, "client=%s|type=%d|speed=%d|language=%s|text=%s|error=%d", appid, temp_type,
+                                speak_data->parameter.speed, temp_lang, speak_data->text, ret);
 
                SLOG(LOG_INFO, tts_tag(), "keyVal : %s", value_buffer);
 
@@ -166,7 +166,7 @@ static void __synthesis(unsigned int uid)
                SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Fail to get app id");
        }
 
-       if (NULL == speak_data->lang || NULL == speak_data->text) {
+       if (NULL == speak_data->voice.language || NULL == speak_data->text) {
                SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] Current data is NOT valid");
                __stop_and_send_ready_state(uid);
                ttsd_data_destroy_speak_data(speak_data);
@@ -181,13 +181,15 @@ static void __synthesis(unsigned int uid)
 
        SLOG(LOG_INFO, tts_tag(), "-----------------------------------------------------------");
        SLOG(LOG_INFO, tts_tag(), "ID : uid (%u), uttid(%d) ", g_utt.uid, g_utt.uttid);
-       SLOG(LOG_INFO, tts_tag(), "Voice : langauge(%s), type(%d), speed(%d)", speak_data->lang, speak_data->vctype, speak_data->speed);
+       SLOG(LOG_INFO, tts_tag(), "Voice : language(%s), type(%d), speed(%d)", speak_data->voice.language,
+                speak_data->voice.type, speak_data->parameter.speed);
        SLOG(LOG_INFO, tts_tag(), "Text : %s", speak_data->text);
        SLOG(LOG_INFO, tts_tag(), "Credential : %s", credential);
        SLOG(LOG_INFO, tts_tag(), "-----------------------------------------------------------");
 
        ttsd_data_set_synth_control(TTSD_SYNTHESIS_CONTROL_DOING);
-       ret = ttsd_engine_start_synthesis(speak_data->lang, speak_data->vctype, speak_data->text, speak_data->speed, appid, credential, NULL);
+       ret = ttsd_engine_start_synthesis(speak_data->voice.language, speak_data->voice.type, speak_data->text,
+                                                                         speak_data->parameter.speed, appid, credential, NULL);
        if (TTSD_ERROR_NONE != ret) {
                SLOG(LOG_ERROR, tts_tag(), "[Server ERROR] * FAIL to start SYNTHESIS !!!! * ");