Remove unnecessary parameter for stt_start_file function 12/285312/1
authorwn.jang <wn.jang@samsung.com>
Fri, 9 Dec 2022 02:50:00 +0000 (11:50 +0900)
committerwn.jang <wn.jang@samsung.com>
Fri, 9 Dec 2022 02:50:04 +0000 (11:50 +0900)
The audio format types are not changable in current STT engine.
Therefore sample rate, channel and audio type are not necessary to transmit the pcm data to cloud.
The application should send pcm data, which consits of 1 channel, 16000 sample rate and signed 16 bit pcm data.

Change-Id: I4f9ad034419bf088d6de9d2a1b4a54adfc6e718a

client/stt.c
client/stt_dbus.c
client/stt_dbus.h
include/stt_internal.h
server/sttd_dbus_server.c
server/sttd_engine_agent.c
server/sttd_engine_agent.h
server/sttd_recorder.c
server/sttd_recorder.h
server/sttd_server.c
server/sttd_server.h

index dda250f3074caa7717735127af1aaaf62fd6db99..7eaa0715eada5e76818eef01e6045beb6ae6b296 100644 (file)
@@ -2066,7 +2066,7 @@ int stt_unset_speech_status_cb(stt_h stt)
        return 0;
 }
 
-int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate)
+int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath)
 {
        stt_client_s* client = NULL;
        int tmp = __stt_check_precondition(stt, &client);
@@ -2110,7 +2110,7 @@ int stt_start_file(stt_h stt, const char* language, const char* type, const char
        }
 
        client->internal_state = STT_INTERNAL_STATE_STARTING;
-       ret = stt_dbus_request_start_file(client->uid, temp, type, client->silence, appid, client->credential, filepath, audio_type, channels, sample_rate);
+       ret = stt_dbus_request_start_file(client->uid, temp, type, client->silence, appid, client->credential, filepath);
        if (0 != ret) {
                SLOG(LOG_ERROR, TAG_STTC, "[ERROR] Fail to start file : %s", __stt_get_error_code(ret));
                client->internal_state = STT_INTERNAL_STATE_NONE;
index dea99fbab0310924d4578d734f85292dc01c9121..4bc695f9caf432ea008f156fa7cb367c1f7411f5 100644 (file)
@@ -1661,7 +1661,7 @@ int stt_dbus_request_cancel(unsigned int uid)
 }
 
 //LCOV_EXCL_START
-int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate)
+int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath)
 {
        if (NULL == lang || NULL == type || NULL == appid) {
                SLOG(LOG_ERROR, TAG_STTC, "Input parameter is NULL");
@@ -1681,7 +1681,7 @@ int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char*
                SLOG(LOG_ERROR, TAG_STTC, ">>>> stt start file : Fail to make message");
                return STT_ERROR_OPERATION_FAILED;
        } else {
-               SLOG(LOG_DEBUG, TAG_STTC, ">>>> stt start file : uid(%u), language(%s), type(%s), appid(%s), filepath(%s), audio_type(%d), channels(%d), sample_rate(%d)", uid, lang, type, appid, filepath, audio_type, channels, sample_rate);
+               SLOG(LOG_DEBUG, TAG_STTC, ">>>> stt start file : uid(%u), language(%s), type(%s), appid(%s), filepath(%s)", uid, lang, type, appid, filepath);
        }
 
        char *temp = NULL;
@@ -1699,9 +1699,6 @@ int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char*
                DBUS_TYPE_STRING, &appid,
                DBUS_TYPE_STRING, &temp,
                DBUS_TYPE_STRING, &filepath,
-               DBUS_TYPE_INT32, &audio_type,
-               DBUS_TYPE_INT32, &channels,
-               DBUS_TYPE_INT32, &sample_rate,
                DBUS_TYPE_INVALID);
 
        int ret = __stt_dbus_send_message_without_reply(msg, STT_METHOD_START_FILE);
index 65c4f15ddbb89e1dddf7255ab7d9e0672662ddd5..24a41ef788de77881343efd7c84574b7494fd9c9 100644 (file)
@@ -63,7 +63,7 @@ int stt_dbus_request_stop(unsigned int uid);
 
 int stt_dbus_request_cancel(unsigned int uid);
 
-int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate);
+int stt_dbus_request_start_file(unsigned int uid, const char* lang, const char* type, int silence, const char* appid, const char* credential, const char* filepath);
 
 int stt_dbus_request_cancel_file(unsigned int uid);
 
index 0104b1e47ac4e150b268b49c4cd32d92dbedc270..9e94ab957128d4199856ff250685eacd44e23f54 100644 (file)
@@ -93,13 +93,14 @@ int stt_set_server_stt(stt_h stt, const char* key, char* user_data);
  * @privlevel public
  * @privilege %http://tizen.org/privilege/recorder
  * @remarks This function starts sending recorded data from file to engine.
+ *             The pcm file should be in raw format as following
+ *                     - 16000 for sample rate
+ *                     - 1 channel for mono
+ *                     - Signed 16 bits per sample
  * @param[in] stt The STT handle
  * @param[in] language The language selected from stt_foreach_supported_languages()
  * @param[in] type The type for recognition (e.g. #STT_RECOGNITION_TYPE_FREE, #STT_RECOGNITION_TYPE_FREE_PARTIAL)
  * @param[in] filepath PCM filepath for recognition
- * @param[in] audio_type audio type of file
- * @param[in] channel channel of file (e.g. 1 for mono, 2 for stereo)
- * @param[in] sample_rate sample rate of file
  * @return @c 0 on success,
  *         otherwise a negative error value
  * @retval #STT_ERROR_NONE Successful
@@ -116,7 +117,7 @@ int stt_set_server_stt(stt_h stt, const char* key, char* user_data);
  * @see stt_cancel_file()
  * @see stt_state_changed_cb()
 */
-int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath, stt_audio_type_e audio_type, int channels, int sample_rate);
+int stt_start_file(stt_h stt, const char* language, const char* type, const char* filepath);
 
 /**
  * @brief Cancels processing file recognition asynchronously.
index f13f1e8734ab33451f51b9f5490f660a1b52b778..37ab634b1fd0fa5919200db4c73a4a767147d365 100644 (file)
@@ -905,9 +905,6 @@ int sttd_dbus_server_start_file(DBusConnection* conn, DBusMessage* msg)
        int silence;
        char* credential;
        char* filepath;
-       stte_audio_type_e audio_type;
-       int channels;
-       int sample_rate;
 
        int ret = STTD_ERROR_OPERATION_FAILED;
 
@@ -919,9 +916,6 @@ int sttd_dbus_server_start_file(DBusConnection* conn, DBusMessage* msg)
                DBUS_TYPE_STRING, &appid,
                DBUS_TYPE_STRING, &credential,
                DBUS_TYPE_STRING, &filepath,
-               DBUS_TYPE_INT32, &audio_type,
-               DBUS_TYPE_INT32, &channels,
-               DBUS_TYPE_INT32, &sample_rate,
                DBUS_TYPE_INVALID);
 
        SLOG(LOG_DEBUG, TAG_STTD, ">>>>> STT Start File");
@@ -931,9 +925,8 @@ int sttd_dbus_server_start_file(DBusConnection* conn, DBusMessage* msg)
                dbus_error_free(&err);
                ret = STTD_ERROR_OPERATION_FAILED;
        } else {
-               SLOG(LOG_DEBUG, TAG_STTD, "[IN] stt start file : uid(%u), lang(%s), type(%s), silence(%d) appid(%s) filepath(%s), audio_type(%d), channels(%d), sample_rate(%d)"
-                       , uid, lang, type, silence, appid, filepath, audio_type, channels, sample_rate);
-               ret = sttd_server_start_file(uid, lang, type, silence, appid, credential, filepath, audio_type, channels, sample_rate);
+               SLOG(LOG_DEBUG, TAG_STTD, "[IN] stt start file : uid(%u), lang(%s), type(%s), silence(%d) appid(%s) filepath(%s)", uid, lang, type, silence, appid, filepath);
+               ret = sttd_server_start_file(uid, lang, type, silence, appid, credential, filepath);
        }
 
        if (0 <= ret) {
index ef72d123496775c85a1a849c293c591de225aa24..790602dca9940b19e9a6dc083888d160aa8d1711 100644 (file)
@@ -698,7 +698,7 @@ int sttd_engine_agent_recognize_start_recorder(unsigned int uid, const char* app
        return 0;
 }
 
-int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate)
+int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath)
 {
        int tmp = __sttd_engine_agent_check_precondition();
        if (STTD_ERROR_NONE != tmp)
@@ -707,7 +707,7 @@ int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepat
        SLOG(LOG_INFO, TAG_STTD, "[Engine Agent] Start recorder");
 
        int ret;
-       ret = sttd_recorder_start_file(uid, filepath, audio_type, channels, sample_rate);
+       ret = sttd_recorder_start_file(uid, filepath);
        if (0 != ret) {
                SLOG(LOG_ERROR, TAG_STTD, "[Engine Agent ERROR] Fail to start recorder : result(%d)", ret);
                stt_engine_recognize_cancel();
index 673f3a86db833b52677dd9a5d7576a6373236be1..d5992a0b0369387a44de7819c36e2b550206c578 100644 (file)
@@ -98,7 +98,7 @@ int sttd_engine_agent_recognize_start_engine(unsigned int uid, const char* lang,
 
 int sttd_engine_agent_recognize_start_recorder(unsigned int uid, const char* appid);
 
-int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate);
+int sttd_engine_agent_recognize_start_file(unsigned int uid, const char* filepath);
 
 int sttd_engine_agent_set_recording_data(const void* data, unsigned int length);
 
index 1c294760bead6276c3ee3b607b8f3604c4b1f228..157d9dc884a4ae341902d3ca42492da1ab25e54a 100644 (file)
@@ -850,37 +850,18 @@ int sttd_recorder_stop()
        return 0;
 }
 
-static int __calculate_time_of_pcm_data(int bytes, int sample_rate, int channels, int bit_per_sample)
+static int __calculate_time_of_pcm_data(int bytes)
 {
-       int time = 0;
-       if (0 == bytes || 0 == sample_rate || 0 == channels || 0 == bit_per_sample) {
-               SLOG(LOG_ERROR, TAG_STTD, "[Recorder ERROR] Invalid parameter");
-               return 0;
-       }
+       const static int SAMPLE_RATE = 16000;
+       const static int CHANNELS = 1;
+       const static int BIT_PER_SAMPLE = 16;
 
-       int sample_per_sec = sample_rate * channels * bit_per_sample / 8;
-       time = bytes * 1000 / sample_per_sec;
+       int sample_per_sec = SAMPLE_RATE * CHANNELS * BIT_PER_SAMPLE / 8;
+       int time = bytes * 1000 / sample_per_sec;
        return time;
 }
 
-static int __calculate_bit_per_sample(stte_audio_type_e audio_type)
-{
-       int bit_per_sample = 0;
-       switch (audio_type) {
-       case STTE_AUDIO_TYPE_PCM_S16_LE:
-               bit_per_sample = 16;
-               break;
-       case STTE_AUDIO_TYPE_PCM_U8:
-               bit_per_sample = 8;
-               break;
-       default:
-               SLOG(LOG_ERROR, TAG_STTD, "[Recorder ERROR] Invalid audio type");
-               break;
-       }
-       return bit_per_sample;
-}
-
-int sttd_recorder_start_file(unsigned int uid, const char *filepath, stte_audio_type_e audio_type, int channels, int sample_rate)
+int sttd_recorder_start_file(unsigned int uid, const char *filepath)
 {
        if (STTD_RECORDER_STATE_RECORDING == g_recorder_state)
                return 0;
@@ -905,7 +886,7 @@ int sttd_recorder_start_file(unsigned int uid, const char *filepath, stte_audio_
                        int read_byte = fread(pcm_buff, 1, BUFFER_LENGTH, infile);
 
                        // sleep for real time of pcm data
-                       int time = __calculate_time_of_pcm_data(read_byte, sample_rate, channels, __calculate_bit_per_sample(audio_type));
+                       int time = __calculate_time_of_pcm_data(read_byte);
                        usleep(time * 1000);
 
                        totalReadBytes += read_byte;
index 13000da13e9f8aa5fd2043f5c5bc44c8a294a8cf..ab53dd191d6d7990b4c7719d6d72d786e625e61a 100644 (file)
@@ -42,7 +42,7 @@ int sttd_recorder_start(unsigned int uid, const char* appid);
 
 int sttd_recorder_stop();
 
-int sttd_recorder_start_file(unsigned int uid, const char *filepath, stte_audio_type_e audio_type, int channels, int sample_rate);
+int sttd_recorder_start_file(unsigned int uid, const char *filepath);
 
 int sttd_recorder_stop_file();
 
index 70261ec41bb5e8f8e4ff9a9615cda10f56996954..3c4b3e8736d3053995f8ca7fe0b97ee257d50ca2 100644 (file)
@@ -1593,8 +1593,7 @@ int sttd_server_cancel(unsigned int uid)
        return ret;
 }
 
-int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential,
-                                                       const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate)
+int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential, const char* filepath)
 {
        if (NULL == lang || NULL == recognition_type || NULL == filepath) {
                SLOG(LOG_ERROR, TAG_STTD, "[Server ERROR] Input parameter is NULL");
@@ -1613,7 +1612,7 @@ int sttd_server_start_file(unsigned int uid, const char* lang, const char* recog
        }
 
        /* engine start recognition */
-       SLOG(LOG_DEBUG, TAG_STTD, "[Server] start : uid(%u), lang(%s), recog_type(%s), appid(%s), file(%s), audio_type(%d), channels(%d), sample_rate(%d)", uid, lang, recognition_type, appid, filepath, audio_type, channels, sample_rate);
+       SLOG(LOG_DEBUG, TAG_STTD, "[Server] start : uid(%u), lang(%s), recog_type(%s), appid(%s), file(%s)", uid, lang, recognition_type, appid, filepath);
 
        /* 1. Set audio session */
        ret = sttd_recorder_set_audio_session();
@@ -1636,7 +1635,7 @@ int sttd_server_start_file(unsigned int uid, const char* lang, const char* recog
        sttdc_send_set_state(uid, APP_STATE_RECORDING);
 
        /* 3. Start to send pcm from file to engine */
-       ret = sttd_engine_agent_recognize_start_file(uid, filepath, audio_type, channels, sample_rate);
+       ret = sttd_engine_agent_recognize_start_file(uid, filepath);
        if (0 != ret) {
                stt_client_unset_current_recognition();
                sttd_recorder_unset_audio_session();
index 96ac484c04db67a3aa4bf0a64df34b6eebd903f2..5ac8f509127efcf6ff3977e4e0c5dd586752c6a6 100644 (file)
@@ -76,7 +76,7 @@ int sttd_server_stop(unsigned int uid);
 
 int sttd_server_cancel(unsigned int uid);
 
-int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential, const char* filepath, stte_audio_type_e audio_type, int channels, int sample_rate);
+int sttd_server_start_file(unsigned int uid, const char* lang, const char* recognition_type, int silence, const char* appid, const char* credential, const char* filepath);
 
 int sttd_server_cancel_file(unsigned int uid);