add SpeakerRecognitionEngine for handleing speaker recognizer modality

author ulgal-park <ulgal.park@samsung.com>

Thu, 22 Dec 2022 01:15:51 +0000 (10:15 +0900)

committer Tizen AI <ai.tzn.sec@samsung.com>

Wed, 31 May 2023 01:19:37 +0000 (10:19 +0900)
author ulgal-park <ulgal.park@samsung.com>
Thu, 22 Dec 2022 01:15:51 +0000 (10:15 +0900)
committer Tizen AI <ai.tzn.sec@samsung.com>
Wed, 31 May 2023 01:19:37 +0000 (10:19 +0900)
diff --git a/src/mmimgr/iu/SpeakerRecognitionEngine.cpp b/src/mmimgr/iu/SpeakerRecognitionEngine.cpp

new file mode 100644 (file)

index 0000000..9fa0295
--- /dev/null
+++ b/src/mmimgr/iu/SpeakerRecognitionEngine.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *               http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+
+#include <chrono>
+#include <Ecore.h>
+#include <map>
+
+#include "mmi_iu_log.h"
+#include "json_provider.h"
+
+#include "SpeakerRecognitionEngine.h"
+
+// make function descripted in header file
+SpeakerRecognitionEngine::SpeakerRecognitionEngine()
+{
+    outputResultCallback = nullptr;
+    outputResultUserData = nullptr;
+}
+
+SpeakerRecognitionEngine::~SpeakerRecognitionEngine()
+{
+}
+
+long SpeakerRecognitionEngine::getCurrentTimestamp()
+{
+    auto currentTime = chrono::steady_clock::now();
+    long timestamp = chrono::time_point_cast<chrono::milliseconds>(currentTime).time_since_epoch().count();
+    _D("[SpeakerRecognitionEngine] current timestamp(%ld)", timestamp);
+
+    return timestamp;
+}
+
+void SpeakerRecognitionEngine::iterateSpeakerCallback(gpointer data, gpointer userData)
+{
+    speaker_result* info = reinterpret_cast<speaker_result*>(data);
+    SpeakerRecognitionEngine* engine = reinterpret_cast<SpeakerRecognitionEngine*>(userData);
+
+    // add speaker to speakerList
+    engine->speakerList.push_back(*info);
+    _D("[SpeakerRecognitionEngine] iterateSpeakerCallback: name(%s), id(%s), score(%f)", info->name, info->id, info->score);
+}
+
+bool SpeakerRecognitionEngine::setInputModalityData(int modalityType, void *data)
+{
+    // convert data to mmi_provider_event_speaker_recognizer
+    mmi_provider_event_speaker_recognizer *event = reinterpret_cast<mmi_provider_event_speaker_recognizer *>(data);
+    if (event == NULL) {
+        _E("event is NULL");
+        return false;
+    }
+
+    int timestamp = this->getCurrentTimestamp();
+    // make json using json_provider
+    JsonProvider provider;
+    provider.setInputEvent(MMI_KEY_SPEAKER_RECOGNITION);
+    // make 5 cases depends on event->type which is mmi_speaker_recognizer_operation
+    switch (event->type) {
+    case MMI_SPEAKER_RECOGNIZER_RECORDING_START:
+        // start speaker recognition
+        provider.setOutputEvent(MMI_KEY_RECORDING_START);
+        provider.setSpeakerRecognitionEvent(MMI_KEY_RECORDING_START, timestamp);
+        break;
+    case MMI_SPEAKER_RECOGNIZER_RECORDING_STOP:
+        // stop speaker recognition
+        provider.setOutputEvent(MMI_KEY_RECORDING_STOP);
+        provider.setSpeakerRecognitionEvent(MMI_KEY_RECORDING_STOP, timestamp);
+        break;
+    case MMI_SPEAKER_RECOGNIZER_ADD_USER:
+        // add user
+        provider.setOutputEvent(MMI_KEY_ADD_SPEAKER);
+        provider.setSpeakerRecognitionEvent(MMI_KEY_ADD_SPEAKER, timestamp);
+        break;
+    case MMI_SPEAKER_RECOGNIZER_DELETE_USER:
+        // add user
+        provider.setOutputEvent(MMI_KEY_DELETE_SPEAKER);
+        provider.setSpeakerRecognitionEvent(MMI_KEY_DELETE_SPEAKER, timestamp);
+        break;
+    case MMI_SPEAKER_RECOGNIZER_RECOGNIZE_SPEAKER:
+        _D("[SpeakerRecognitionEngine] recognized speaker(%s), confidence(%.6lf)", event->speaker, event->confidence);
+        // recognize speaker
+        provider.setOutputEvent(MMI_KEY_RECOGNIZED_RESULT);
+        provider.setSpeakerRecognitionEvent(MMI_KEY_RECOGNIZED_RESULT, timestamp);
+        provider.setRecognizedSpeakerResult(event->speaker);
+        speakerList.clear();
+        g_list_foreach(event->speaker_list, iterateSpeakerCallback, reinterpret_cast<gpointer>(this));
+        _D("[SpeakerRecognitionEngine] speaker list size(%d)", speakerList.size());
+        for (auto &speakerInfo : speakerList) {
+            // speakerInfo is tuple of name, id, score
+            _D("[SpeakerRecognitionEngine] speakerInfo: name(%s), id(%s), score(%f)", speakerInfo.name, speakerInfo.id, speakerInfo.score);
+            provider.addRecognizedSpeakerCandidate(speakerInfo.id, speakerInfo.name, speakerInfo.score);
+        }
+        break;
+
+    default:
+        _D("[SpeakerRecognitionEngine] unknown event type(%d)", event->type);
+        break;
+    }
+
+    // send json to server
+    invokeOutputResultCallback(provider.jsonToString());
+    return true;
+}
+
+void SpeakerRecognitionEngine::setOutputResultCallback(speaker_recognition_engine_output_result_cb callback, void *userData)
+{
+    outputResultCallback = callback;
+    outputResultUserData = userData;
+}
+
+void SpeakerRecognitionEngine::invokeOutputResultCallback(std::string outputResult)
+{
+    if (outputResultCallback == nullptr) {
+        _W("[SpeakerRecognitionEngine] output result callback is not set. Please check it");
+        return;
+    }
+
+    _I("[SpeakerRecognitionEngine] invokeOutputResultCallback");
+    outputResultCallback(outputResult.c_str(), outputResultUserData);
+}
+\ No newline at end of file
diff --git a/src/mmimgr/iu/SpeakerRecognitionEngine.h b/src/mmimgr/iu/SpeakerRecognitionEngine.h

new file mode 100644 (file)

index 0000000..91b6e6c
--- /dev/null
+++ b/src/mmimgr/iu/SpeakerRecognitionEngine.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *               http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+
+#ifndef __SPEAKER_RECOGNITION_ENGINE_H__
+#define __SPEAKER_RECOGNITION_ENGINE_H__
+
+
+#include <string>
+#include <vector>
+#include <list>
+
+#include <Ecore.h>
+
+#include "json_provider.h"
+#include "mmi-common.h"
+
+typedef void (*speaker_recognition_engine_output_result_cb)(const char *output_result, void *user_data);
+
+class SpeakerRecognitionEngine {
+public:
+    //constructor, destructor
+    SpeakerRecognitionEngine();
+    ~SpeakerRecognitionEngine();
+
+    // methods
+    bool setInputModalityData(int modalityType, void *data);
+    void setOutputResultCallback(speaker_recognition_engine_output_result_cb callback, void *userData);
+private:
+    // missing function which is in SpeakerRecognitionEngine.cpp
+    void invokeOutputResultCallback(std::string result);
+    long getCurrentTimestamp();
+    static void iterateSpeakerCallback(gpointer data, gpointer userData);
+
+private:
+    std::vector<speaker_result> speakerList;
+    speaker_recognition_engine_output_result_cb outputResultCallback;
+    void *outputResultUserData;
+};
+
+
+#endif /* __SPEAKER_RECOGNITION_ENGINE_H__ */
+\ No newline at end of file
diff --git a/src/mmimgr/iu/json_provider.cpp b/src/mmimgr/iu/json_provider.cpp

index 5b5e8c3..7a646f0 100644 (file)
--- a/src/mmimgr/iu/json_provider.cpp
+++ b/src/mmimgr/iu/json_provider.cpp
@@ -54,8 +54,62 @@ JsonProvider::~JsonProvider()
          json_object_unref(mRootObj);
          mRootObj = nullptr;
      }
+
+
+    if (mSpeakerRecognitionObj) {
+        json_object_unref(mSpeakerRecognitionObj);
+        mSpeakerRecognitionObj = nullptr;
+    }
+
+    if (mRecognizedSpeakerCandidateListArr) {
+        json_array_unref(mRecognizedSpeakerCandidateListArr);
+        mRecognizedSpeakerCandidateListArr = nullptr;
+    }
+
+    if (mRecognizedSpeakerResultObj) {
+        json_object_unref(mRecognizedSpeakerResultObj);
+        mRecognizedSpeakerResultObj = nullptr;
+    }
+
+}
+
+
+void JsonProvider::setSpeakerRecognitionEvent(const char* result, int timeStamp)
+{
+    if (!mSpeakerRecognitionObj)
+        mSpeakerRecognitionObj = json_object_new();
+
+    json_object_set_object_member(mRootObj, MMI_KEY_SPEAKER_RECOGNITION, mSpeakerRecognitionObj);
+    json_object_set_string_member(mSpeakerRecognitionObj, MMI_KEY_RESULT, result);
+    json_object_set_int_member(mSpeakerRecognitionObj, MMI_KEY_TIMESTAMP, timeStamp);
  }
  
+void JsonProvider::setRecognizedSpeakerResult(const char* result)
+{
+    if (!mRecognizedSpeakerResultObj) {
+        mRecognizedSpeakerResultObj = json_object_new();
+    }
+
+    json_object_set_object_member(mSpeakerRecognitionObj, MMI_KEY_RECOGNIZED_SPEAKER_RESULT, mRecognizedSpeakerResultObj);
+    json_object_set_string_member(mRecognizedSpeakerResultObj, MMI_KEY_RESULT, result);
+}
+
+void JsonProvider::addRecognizedSpeakerCandidate(const char* id, const char* name, float score)
+{
+    if (!mRecognizedSpeakerCandidateListArr) {
+        mRecognizedSpeakerCandidateListArr = json_array_new();
+        json_object_set_array_member(mSpeakerRecognitionObj, MMI_KEY_RECOGNIZED_SPEAKER_CANDIDATE_LIST, mRecognizedSpeakerCandidateListArr);
+    }
+
+    JsonObject* obj = json_object_new();
+    json_array_add_object_element(mRecognizedSpeakerCandidateListArr, obj);
+    json_object_set_string_member(obj, MMI_KEY_SPEAKER_ID, id);
+    json_object_set_string_member(obj, MMI_KEY_SPEAKER_NAME, name);
+    json_object_set_double_member(obj, MMI_KEY_SPEAKER_SCORE, (double)score);
+}
+
+
+
  void JsonProvider::setInputEvent(const char* value)
  {
         json_object_set_string_member(mRootObj, MMI_KEY_INPUT_EVENT, value);
diff --git a/src/mmimgr/iu/json_provider.h b/src/mmimgr/iu/json_provider.h

index 550c6eb..6d83053 100644 (file)
--- a/src/mmimgr/iu/json_provider.h
+++ b/src/mmimgr/iu/json_provider.h
@@ -9,7 +9,6 @@ using namespace std;
  
  #define MMI_KEY_INPUT_EVENT                    "inputEvent"
  #define MMI_KEY_VOICE_TOUCH                    "voiceTouch"
-#define MMI_KEY_VOICE_RECOGNITION      "voiceRecognition"
  #define MMI_KEY_SCREEN_KEY                     "screenKey"
  #define MMI_KEY_OUTPUT_EVENT           "outputEvent"        // changed result
  #define MMI_KEY_ASR_PARTIAL_RESULT     "asrPartialResult"
@@ -40,6 +39,24 @@ using namespace std;
  #define MMI_KEY_GRID_DEPTH                     "gridDepth"
  #define MMI_KEY_TURN_OFF_REQUEST       "turnOffRequest"
  
+
+#define MMI_KEY_SPEAKER_RECOGNITION                    "speakerRecognition"
+
+#define MMI_KEY_RECORDING_START                     "recordingStart"
+#define MMI_KEY_RECORDING_STOP                      "recordingStop"
+#define MMI_KEY_ADD_SPEAKER                         "addSpeaker"
+#define MMI_KEY_DELETE_SPEAKER                      "deleteSpeaker"
+#define MMI_KEY_RECOGNIZED_RESULT                     "recognizedResult"
+
+#define MMI_KEY_RECOGNIZED_SPEAKER_CANDIDATE_LIST     "recognizedSpeakerCandidateList"
+#define MMI_KEY_RECOGNIZED_SPEAKER_RESULT             "recognizedSpeakerResult"
+
+#define MMI_KEY_SPEAKER_ID                        "speakerId"
+#define MMI_KEY_SPEAKER_NAME                        "speakerName"
+#define MMI_KEY_SPEAKER_SCORE                        "speakerScore"
+
+
+
  #define MMI_REASON_NO_MATCHED_COMMANDS         "noMatchedCommands"
  #define MMI_REASON_NO_CLICKABLE_OBJECTS                "noClickableObjects"
  
@@ -77,6 +94,12 @@ public:
      void setErrorEvent(const char* reason, int timeStamp);
      void setTurnOffRequestEvent(int timeStamp);
  
+
+    void setSpeakerRecognitionEvent(const char* result, int timeStamp);
+    void addRecognizedSpeakerCandidate(const char* id, const char* name, float score);
+    void setRecognizedSpeakerResult(const char* result);
+
+
      std::string jsonToString(void);
  
  private:
@@ -93,6 +116,12 @@ private:
      JsonObject* mTurnOffRequestObj = nullptr;
  
      int mNumClickableObj;
+
+
+    JsonObject* mSpeakerRecognitionObj = nullptr;
+    JsonArray* mRecognizedSpeakerCandidateListArr = nullptr;
+    JsonObject* mRecognizedSpeakerResultObj = nullptr;
+
  };
  
  
diff --git a/src/mmimgr/iu/mmi_iu.cpp b/src/mmimgr/iu/mmi_iu.cpp

index fd4fb6d..2427a96 100644 (file)
--- a/src/mmimgr/iu/mmi_iu.cpp
+++ b/src/mmimgr/iu/mmi_iu.cpp
@@ -23,6 +23,7 @@
  #include "mmi_iu.h"
  #include "mmi_iu_log.h"
  #include "VoiceTouchEngine.h"
+#include "SpeakerRecognitionEngine.h"
  #include "mmi-common.h"
  
  using namespace std;
@@ -39,6 +40,8 @@ static bool initialized = false;
  
  static VoiceTouchEngine *g_VoiceTouchEngine = nullptr;
  
+static SpeakerRecognitionEngine *g_SpeakerRecognitionEngine = nullptr;
+
  
  static void init()
  {
@@ -73,6 +76,18 @@ static void mmi_iu_voice_touch_engine_output_modality_cb(void *output_modality,
      iu_output_modality_received_callback(MMI_PROVIDER_EVENT_VOICE_TOUCH, output_modality, iu_output_modality_user_data);
  }
  
+
+void __mmi_iu_speaker_recognition_engine_output_result_cb(const char *output_result, void *user_data)
+{
+    if (iu_output_result_received_callback == nullptr) {
+        _E("[MMI IU] Output result callback is not set");
+        return;
+    }
+
+    iu_output_result_received_callback(MMI_INPUT_EVENT_TYPE_SPEAKER_RECOGNITION, output_result, iu_output_result_user_data);
+}
+
+
  EXPORT_API int mmi_iu_init()
  {
      _I("[MMI IU] Initialize IU module.");
@@ -90,6 +105,19 @@ EXPORT_API int mmi_iu_init()
          g_VoiceTouchEngine->setOutputModalityCallback(mmi_iu_voice_touch_engine_output_modality_cb, nullptr);
      }
  
+
+    try {
+        g_SpeakerRecognitionEngine = new SpeakerRecognitionEngine();
+    } catch (exception &e) {
+        _E("[MMI IU] Fail to allocate memory. (%s)", e.what());
+        return MMI_IU_ERROR_OUT_OF_MEMORY;
+    }
+
+    if (g_SpeakerRecognitionEngine) {
+        g_SpeakerRecognitionEngine->setOutputResultCallback(__mmi_iu_speaker_recognition_engine_output_result_cb, nullptr);
+    }
+
+
      return MMI_IU_ERROR_NONE;
  }
  
@@ -101,6 +129,11 @@ EXPORT_API int mmi_iu_shutdown()
      delete g_VoiceTouchEngine;
      g_VoiceTouchEngine = nullptr;
  
+
+    delete g_SpeakerRecognitionEngine;
+    g_SpeakerRecognitionEngine = nullptr;
+
+
      if (!initialized)
          return MMI_IU_ERROR_NOT_INITIALIZED;
  
@@ -121,7 +154,18 @@ EXPORT_API int mmi_iu_feed_input_modality(int type, void *event)
          return MMI_IU_ERROR_NOT_INITIALIZED;
      }
  
-    string json_output_result;
+
+    if (type == MMI_PROVIDER_EVENT_SPEAKER_RECOGNIZER) {
+        if (g_SpeakerRecognitionEngine) {
+            int res = g_SpeakerRecognitionEngine->setInputModalityData(type, event);
+            if (!res) {
+                _W("[ERROR] Not Supported Event Type");
+                return MMI_IU_ERROR_NOT_SUPPORTED_TYPE;
+            }
+            return MMI_IU_ERROR_NONE;
+        }
+    }
+
      if (g_VoiceTouchEngine) {
          g_VoiceTouchEngine->setInputModalityData(type, event);
      }
diff --git a/src/mmimgr/meson.build b/src/mmimgr/meson.build

index 1b20f6f..3e51599 100644 (file)
--- a/src/mmimgr/meson.build
+++ b/src/mmimgr/meson.build
@@ -29,6 +29,8 @@ mmimgr_srcs = [
         'iu/json_provider.h',
         'iu/PreDefinedCommands.cpp',
         'iu/PreDefinedCommands.h',
+       'iu/SpeakerRecognitionEngine.cpp',
+       'iu/SpeakerRecognitionEngine.h',
         'output_modality/mmi_output_modality.cpp',
         'output_modality/mmi_output_modality.h',
         'output_modality/TouchModule.cpp',
author	ulgal-park <ulgal.park@samsung.com>
	Thu, 22 Dec 2022 01:15:51 +0000 (10:15 +0900)
committer	Tizen AI <ai.tzn.sec@samsung.com>
	Wed, 31 May 2023 01:19:37 +0000 (10:19 +0900)
src/mmimgr/iu/SpeakerRecognitionEngine.cpp	[new file with mode: 0644]	patch \| blob
src/mmimgr/iu/SpeakerRecognitionEngine.h	[new file with mode: 0644]	patch \| blob
src/mmimgr/iu/json_provider.cpp		patch \| blob \| history
src/mmimgr/iu/json_provider.h		patch \| blob \| history
src/mmimgr/iu/mmi_iu.cpp		patch \| blob \| history
src/mmimgr/meson.build		patch \| blob \| history