Remove unnecessary parameter for stt_start_file function 12/285312/1
authorwn.jang <wn.jang@samsung.com>
Fri, 9 Dec 2022 02:50:00 +0000 (11:50 +0900)
committerwn.jang <wn.jang@samsung.com>
Fri, 9 Dec 2022 02:50:04 +0000 (11:50 +0900)
The audio format types are not changable in current STT engine.
Therefore sample rate, channel and audio type are not necessary to transmit the pcm data to cloud.
The application should send pcm data, which consits of 1 channel, 16000 sample rate and signed 16 bit pcm data.

Change-Id: I4f9ad034419bf088d6de9d2a1b4a54adfc6e718a

client/stt.c
client/stt_dbus.c
client/stt_dbus.h
include/stt_internal.h
server/sttd_dbus_server.c
server/sttd_engine_agent.c
server/sttd_engine_agent.h
server/sttd_recorder.c
server/sttd_recorder.h
server/sttd_server.c
server/sttd_server.h

index dda250f..7eaa071 100644 (file)
@@ -2066,7 +2066,7 @@ int stt_unset_speech_status_cb(stt_h stt)
        return 0;
 }
 
-int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate)
+int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath)
 {
        stt_client_s* client = NULL;
        int tmp = __stt_check_precondition(stt, &client);
@@ -2110,7 +2110,7 @@ int stt_start_file(stt_h stt, const char* language, const char* type, const char
        }
 
        client->internal_state = STT_INTERNAL_STATE_STARTING;
-       ret = stt_dbus_request_start_file(client->uid, temp, type, client->silence, appid, client->credential, filepath, audio_type, channels, sample_rate);
+       ret = stt_dbus_request_start_file(client->uid, temp, type, client->silence, appid, client->credential, filepath);
        if (0 != ret) {
                SLOG(LOG_ERROR, TAG_STTC, "[ERROR] Fail to start file : %s", __stt_get_error_code(ret));
                client->internal_state = STT_INTERNAL_STATE_NONE;
index dea99fb..4bc695f 100644 (file)
@@ -1661,7 +1661,7 @@ int stt_dbus_request_cancel(unsigned int uid)
 }
 
 //LCOV_EXCL_START
-int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate)
+int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath)
 {
        if (NULL == lang || NULL == type || NULL == appid) {
                SLOG(LOG_ERROR, TAG_STTC, "Input parameter is NULL");
@@ -1681,7 +1681,7 @@ int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char*
                SLOG(LOG_ERROR, TAG_STTC, ">>>> stt start file : Fail to make message");
                return STT_ERROR_OPERATION_FAILED;
        } else {
-               SLOG(LOG_DEBUG, TAG_STTC, ">>>> stt start file : uid(%u), language(%s), type(%s), appid(%s), filepath(%s), audio_type(%d), channels(%d), sample_rate(%d)", uid, lang, type, appid, filepath, audio_type, channels, sample_rate);
+               SLOG(LOG_DEBUG, TAG_STTC, ">>>> stt start file : uid(%u), language(%s), type(%s), appid(%s), filepath(%s)", uid, lang, type, appid, filepath);
        }
 
        char *temp = NULL;
@@ -1699,9 +1699,6 @@ int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char*
                DBUS_TYPE_STRING, &appid,
                DBUS_TYPE_STRING, &temp,
                DBUS_TYPE_STRING, &filepath,
-               DBUS_TYPE_INT32, &audio_type,
-               DBUS_TYPE_INT32, &channels,
-               DBUS_TYPE_INT32, &sample_rate,
                DBUS_TYPE_INVALID);
 
        int ret = __stt_dbus_send_message_without_reply(msg, STT_METHOD_START_FILE);
index 65c4f15..24a41ef 100644 (file)
@@ -63,7 +63,7 @@ int stt_dbus_request_stop(unsigned int uid);
 
 int stt_dbus_request_cancel(unsigned int uid);
 
-int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate);
+int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath);
 
 int stt_dbus_request_cancel_file(unsigned int uid);
 
index 0104b1e..9e94ab9 100644 (file)
@@ -93,13 +93,14 @@ int stt_set_server_stt(stt_h stt, const char* key, char* user_data);
  * @privlevel public
  * @privilege %http://tizen.org/privilege/recorder
  * @remarks This function starts sending recorded data from file to engine.
+ *             The pcm file should be in raw format as following
+ *                     - 16000 for sample rate
+ *                     - 1 channel for mono
+ *                     - Signed 16 bits per sample
  * @param[in] stt The STT handle
  * @param[in] language The language selected from stt_foreach_supported_languages()
  * @param[in] type The type for recognition (e.g. #STT_RECOGNITION_TYPE_FREE, #STT_RECOGNITION_TYPE_FREE_PARTIAL)
  * @param[in] filepath PCM filepath for recognition
- * @param[in] audio_type audio type of file
- * @param[in] channel channel of file (e.g. 1 for mono, 2 for stereo)
- * @param[in] sample_rate sample rate of file
  * @return @c 0 on success,
  *         otherwise a negative error value
  * @retval #STT_ERROR_NONE Successful
@@ -116,7 +117,7 @@ int stt_set_server_stt(stt_h stt, const char* key, char* user_data);
  * @see stt_cancel_file()
  * @see stt_state_changed_cb()
 */
-int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate);
+int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath);
 
 /**
  * @brief Cancels processing file recognition asynchronously.
index f13f1e8..37ab634 100644 (file)
@@ -905,9 +905,6 @@ int sttd_dbus_server_start_file(DBusConnection* conn, DBusMessage* msg)
        int silence;
        char* credential;
        char* filepath;
-       stte_audio_type_e audio_type;
-       int channels;
-       int sample_rate;
 
        int ret = STTD_ERROR_OPERATION_FAILED;
 
@@ -919,9 +916,6 @@ int sttd_dbus_server_start_file(DBusConnection* conn, DBusMessage* msg)
                DBUS_TYPE_STRING, &appid,
                DBUS_TYPE_STRING, &credential,
                DBUS_TYPE_STRING, &filepath,
-               DBUS_TYPE_INT32, &audio_type,
-               DBUS_TYPE_INT32, &channels,
-               DBUS_TYPE_INT32, &sample_rate,
                DBUS_TYPE_INVALID);
 
        SLOG(LOG_DEBUG, TAG_STTD, ">>>>> STT Start File");
@@ -931,9 +925,8 @@ int sttd_dbus_server_start_file(DBusConnection* conn, DBusMessage* msg)
                dbus_error_free(&err);
                ret = STTD_ERROR_OPERATION_FAILED;
        } else {
-               SLOG(LOG_DEBUG, TAG_STTD, "[IN] stt start file : uid(%u), lang(%s), type(%s), silence(%d) appid(%s) filepath(%s), audio_type(%d), channels(%d), sample_rate(%d)"
-                       , uid, lang, type, silence, appid, filepath, audio_type, channels, sample_rate);
-               ret = sttd_server_start_file(uid, lang, type, silence, appid, credential, filepath, audio_type, channels, sample_rate);
+               SLOG(LOG_DEBUG, TAG_STTD, "[IN] stt start file : uid(%u), lang(%s), type(%s), silence(%d) appid(%s) filepath(%s)", uid, lang, type, silence, appid, filepath);
+               ret = sttd_server_start_file(uid, lang, type, silence, appid, credential, filepath);
        }
 
        if (0 <= ret) {
index ef72d12..790602d 100644 (file)
@@ -698,7 +698,7 @@ int sttd_engine_agent_recognize_start_recorder(unsigned int uid, const char* app
        return 0;
 }
 
-int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate)
+int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath)
 {
        int tmp = __sttd_engine_agent_check_precondition();
        if (STTD_ERROR_NONE != tmp)
@@ -707,7 +707,7 @@ int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepat
        SLOG(LOG_INFO, TAG_STTD, "[Engine Agent] Start recorder");
 
        int ret;
-       ret = sttd_recorder_start_file(uid, filepath, audio_type, channels, sample_rate);
+       ret = sttd_recorder_start_file(uid, filepath);
        if (0 != ret) {
                SLOG(LOG_ERROR, TAG_STTD, "[Engine Agent ERROR] Fail to start recorder : result(%d)", ret);
                stt_engine_recognize_cancel();
index 673f3a8..d5992a0 100644 (file)
@@ -98,7 +98,7 @@ int sttd_engine_agent_recognize_start_engine(unsigned int uid, const char* lang,
 
 int sttd_engine_agent_recognize_start_recorder(unsigned int uid, const char* appid);
 
-int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate);
+int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath);
 
 int sttd_engine_agent_set_recording_data(const void* data, unsigned int length);
 
index 1c29476..157d9dc 100644 (file)
@@ -850,37 +850,18 @@ int sttd_recorder_stop()
        return 0;
 }
 
-static int __calculate_time_of_pcm_data(int bytes, int sample_rate, int channels, int bit_per_sample)
+static int __calculate_time_of_pcm_data(int bytes)
 {
-       int time = 0;
-       if (0 == bytes || 0 == sample_rate || 0 == channels || 0 == bit_per_sample) {
-               SLOG(LOG_ERROR, TAG_STTD, "[Recorder ERROR] Invalid parameter");
-               return 0;
-       }
+       const static int SAMPLE_RATE = 16000;
+       const static int CHANNELS = 1;
+       const static int BIT_PER_SAMPLE = 16;
 
-       int sample_per_sec = sample_rate * channels * bit_per_sample / 8;
-       time = bytes * 1000 / sample_per_sec;
+       int sample_per_sec = SAMPLE_RATE * CHANNELS * BIT_PER_SAMPLE / 8;
+       int time = bytes * 1000 / sample_per_sec;
        return time;
 }
 
-static int __calculate_bit_per_sample(stte_audio_type_e audio_type)
-{
-       int bit_per_sample = 0;
-       switch (audio_type) {
-       case STTE_AUDIO_TYPE_PCM_S16_LE:
-               bit_per_sample = 16;
-               break;
-       case STTE_AUDIO_TYPE_PCM_U8:
-               bit_per_sample = 8;
-               break;
-       default:
-               SLOG(LOG_ERROR, TAG_STTD, "[Recorder ERROR] Invalid audio type");
-               break;
-       }
-       return bit_per_sample;
-}
-
-int sttd_recorder_start_file(unsigned int uid, const char *filepath, stte_audio_type_e audio_type, int channels, int sample_rate)
+int sttd_recorder_start_file(unsigned int uid, const char *filepath)
 {
        if (STTD_RECORDER_STATE_RECORDING == g_recorder_state)
                return 0;
@@ -905,7 +886,7 @@ int sttd_recorder_start_file(unsigned int uid, const char *filepath, stte_audio_
                        int read_byte = fread(pcm_buff, 1, BUFFER_LENGTH, infile);
 
                        // sleep for real time of pcm data
-                       int time = __calculate_time_of_pcm_data(read_byte, sample_rate, channels, __calculate_bit_per_sample(audio_type));
+                       int time = __calculate_time_of_pcm_data(read_byte);
                        usleep(time * 1000);
 
                        totalReadBytes += read_byte;
index 13000da..ab53dd1 100644 (file)
@@ -42,7 +42,7 @@ int sttd_recorder_start(unsigned int uid, const char* appid);
 
 int sttd_recorder_stop();
 
-int sttd_recorder_start_file(unsigned int uid, const char *filepath, stte_audio_type_e audio_type, int channels, int sample_rate);
+int sttd_recorder_start_file(unsigned int uid, const char *filepath);
 
 int sttd_recorder_stop_file();
 
index 70261ec..3c4b3e8 100644 (file)
@@ -1593,8 +1593,7 @@ int sttd_server_cancel(unsigned int uid)
        return ret;
 }
 
-int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential,
-                                                       const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate)
+int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential, const char* filepath)
 {
        if (NULL == lang || NULL == recognition_type || NULL == filepath) {
                SLOG(LOG_ERROR, TAG_STTD, "[Server ERROR] Input parameter is NULL");
@@ -1613,7 +1612,7 @@ int sttd_server_start_file(unsigned int uid, const char* lang, const char* recog
        }
 
        /* engine start recognition */
-       SLOG(LOG_DEBUG, TAG_STTD, "[Server] start : uid(%u), lang(%s), recog_type(%s), appid(%s), file(%s), audio_type(%d), channels(%d), sample_rate(%d)", uid, lang, recognition_type, appid, filepath, audio_type, channels, sample_rate);
+       SLOG(LOG_DEBUG, TAG_STTD, "[Server] start : uid(%u), lang(%s), recog_type(%s), appid(%s), file(%s)", uid, lang, recognition_type, appid, filepath);
 
        /* 1. Set audio session */
        ret = sttd_recorder_set_audio_session();
@@ -1636,7 +1635,7 @@ int sttd_server_start_file(unsigned int uid, const char* lang, const char* recog
        sttdc_send_set_state(uid, APP_STATE_RECORDING);
 
        /* 3. Start to send pcm from file to engine */
-       ret = sttd_engine_agent_recognize_start_file(uid, filepath, audio_type, channels, sample_rate);
+       ret = sttd_engine_agent_recognize_start_file(uid, filepath);
        if (0 != ret) {
                stt_client_unset_current_recognition();
                sttd_recorder_unset_audio_session();
index 96ac484..5ac8f50 100644 (file)
@@ -76,7 +76,7 @@ int sttd_server_stop(unsigned int uid);
 
 int sttd_server_cancel(unsigned int uid);
 
-int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential, const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate);
+int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential, const char* filepath);
 
 int sttd_server_cancel_file(unsigned int uid);