Add logs to measure xRT for synthesizing from tts engine 84/291284/5
authorwn.jang <wn.jang@samsung.com>
Thu, 13 Apr 2023 01:30:17 +0000 (10:30 +0900)
committerwn.jang <wn.jang@samsung.com>
Wed, 19 Apr 2023 07:50:34 +0000 (16:50 +0900)
Change-Id: I9a43b12403df907db02d6309a5cdd4c5957a3be8

server/ttsd_server.c

index 3b7db8d2a60378466360fbae0abd6d172275d1ad..3ddcfd905639cca691faa22a83b886156d58b0d1 100644 (file)
 #define TIME_DIFF(start, end) \
        ((uint64_t)((end).tv_sec - (start).tv_sec) * 1000000 + (((end).tv_nsec - (start).tv_nsec) / 1000)) / 1000
 
-static struct timespec g_request_playing, g_start_playing;
+static struct timespec g_request_playing, g_start_playing, g_finish_playing;
 static double g_avg_latency;
 static double g_min_latency;
 static double g_max_latency;
 static int g_file_num;
+static int g_total_data_size;
 
 typedef struct {
        unsigned int uid;
@@ -210,6 +211,23 @@ int ttsd_send_error(ttse_error_e error, const char* msg)
        return 0;
 }
 
+static int ttsd_convert_audio_type_to_bytes(ttse_audio_type_e audio_type)
+{
+       int ret = 0;
+       switch (audio_type) {
+       case TTSE_AUDIO_TYPE_RAW_S16:
+               ret = 2; // 16bit
+               break;
+       case TTSE_AUDIO_TYPE_RAW_U8:
+               ret = 1; // 8bit
+               break;
+       default:
+               ret = 2; // Default 16bit
+               break;
+       }
+       return ret;
+}
+
 int ttsd_send_result(ttse_result_event_e event, const void* data, unsigned int data_size, ttse_audio_type_e audio_type, int rate, void* user_data)
 {
        SLOG(LOG_DEBUG, tts_tag(), "@@@ SEND SYNTHESIS RESULT START");
@@ -266,13 +284,26 @@ int ttsd_send_result(ttse_result_event_e event, const void* data, unsigned int d
                g_min_latency = (d_latency < g_min_latency) ? d_latency : g_min_latency;
                g_max_latency = (d_latency > g_max_latency) ? d_latency : g_max_latency;
                g_file_num++;
+               g_total_data_size = 0;
+               g_total_data_size += data_size;
                SLOG(LOG_INFO, tts_tag(), "[Server] File num(%d), Avg Latency(%lf), Min(%lf), Max(%lf)", g_file_num, (g_avg_latency / (double)g_file_num), g_min_latency, g_max_latency);
        } else if (TTSE_RESULT_EVENT_FINISH == event) {
                SLOG(LOG_INFO, tts_tag(), "[SERVER] Event : TTSE_RESULT_EVENT_FINISH");
                SECURE_SLOG(LOG_DEBUG, tts_tag(), "[SERVER] Result Info : uid(%u), utt(%d), data(%p), data size(%d) audiotype(%d) rate(%d)",
                        uid, uttid, data, data_size, audio_type, rate);
+
+               g_total_data_size += data_size;
+               int audio_type_bytes = ttsd_convert_audio_type_to_bytes(audio_type);
+               /* Calculate xRT */
+               clock_gettime(CLOCK_MONOTONIC_RAW, &g_finish_playing);
+               long long int time_processing = (uint64_t)TIME_DIFF(g_request_playing, g_finish_playing);
+               long long int time_speech = (long long int)(1000 * g_total_data_size * sizeof(short) / (rate * audio_type_bytes));
+               double xRT = (double)(time_processing) / (double)time_speech;
+               SLOG(LOG_INFO, tts_tag(), "[SERVER] time_processing : %lld, time_speech : %lld, total data size: %d", time_processing, time_speech, g_total_data_size);
+               SLOG(LOG_INFO, tts_tag(), "[SERVER] xRT : %lf", xRT);
        } else {
                /*if (TTSE_RESULT_EVENT_CONTINUE == event)  SLOG(LOG_DEBUG, tts_tag(), "[SERVER] Event : TTSE_RESULT_EVENT_CONTINUE");*/
+               g_total_data_size += data_size;
        }
 
        /* add wav data */