mv_machine_learning: add landmark detection task group

author Inki Dae <inki.dae@samsung.com>

Thu, 13 Apr 2023 08:08:37 +0000 (17:08 +0900)

committer Kwanghoon Son <k.son@samsung.com>

Wed, 14 Jun 2023 02:14:40 +0000 (11:14 +0900)
author Inki Dae <inki.dae@samsung.com>
Thu, 13 Apr 2023 08:08:37 +0000 (17:08 +0900)
committer Kwanghoon Son <k.son@samsung.com>
Wed, 14 Jun 2023 02:14:40 +0000 (11:14 +0900)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 4e0e9971c854cf5361856802680616293740e43a..45329d9c1b96be9e9de2ba665458359d4714055e 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,6 +39,8 @@ set(MV_OBJECT_DETECTION_LIB_NAME "mv_object_detection" CACHE STRING
         "Name of the library will be built for object detection module (without extension).")
  set(MV_OBJECT_DETECTION_3D_LIB_NAME "mv_object_detection_3d" CACHE STRING
         "Name of the library will be built for object detection 3d module (without extension).")
+set(MV_LANDMARK_DETECTION_LIB_NAME "mv_landmark_detection" CACHE STRING
+       "Name of the library will be built for object detection 3d module (without extension).")
  set(MV_IMAGE_CLASSIFICATION_LIB_NAME "mv_image_classification" CACHE STRING
         "Name of the library will be built for image classification module (without extension).")
  
@@ -222,6 +224,20 @@ if (${ENABLE_ML_OBJECT_DETECTION_3D})
         list(APPEND TOTAL_LDFLAGS ${PC_LDFLAGS})
  endif()
  
+if (${ENABLE_ML_LANDMARK_DETECTION})
+    set(PC_NAME ${fw_name}-landmark-detection)
+    set(PC_LDFLAGS "-l${MV_LANDMARK_DETECTION_LIB_NAME} -l${MV_COMMON_LIB_NAME}")
+    configure_file(
+           ${fw_name}.pc.in
+           ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-landmark-detection.pc
+           @ONLY
+    )
+    install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-landmark-detection.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
+    install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/landmark_detection/meta/facial_landmark.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
+       list(APPEND TOTAL_REQUIRED ${PC_NAME})
+       list(APPEND TOTAL_LDFLAGS ${PC_LDFLAGS})
+endif()
+
  string(REPLACE ";" " " TOTAL_LDFLAGS "${TOTAL_LDFLAGS}")
  string(REPLACE " " ";" TOTAL_LDFLAGS_LIST "${TOTAL_LDFLAGS}")
  list(REMOVE_DUPLICATES TOTAL_LDFLAGS_LIST)
diff --git a/include/mv_facial_landmark_internal.h b/include/mv_facial_landmark_internal.h

new file mode 100644 (file)

index 0000000..b405f1e
--- /dev/null
+++ b/include/mv_facial_landmark_internal.h
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_FACIAL_LANDMARK_INTERNAL_H__
+#define __TIZEN_MEDIAVISION_FACIAL_LANDMARK_INTERNAL_H__
+
+#include <mv_common.h>
+#include <mv_facial_landmark_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_facial_landmark.h
+ * @internal
+ * @brief  This file contains the Inference based Media Vision API.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_INFERENCE_MODULE
+ * @{
+ */
+
+/**
+ * @internal
+ * @brief Creates a inference handle for facial landmark object.
+ * @details Use this function to create a inference handle. After the creation
+ *          the facial landmark task has to be prepared with
+ *          mv_facial_landmark_prepare() function to prepare a network
+ *          for the inference.
+ *
+ * @since_tizen 7.5
+ *
+ * @remarks The @a infer should be released using mv_facial_landmark_destroy().
+ *
+ * @param[out] handle    The handle to the inference to be created.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ * @see mv_facial_landmark_destroy()
+ * @see mv_facial_landmark_prepare()
+ */
+int mv_facial_landmark_create(mv_facial_landmark_h *handle);
+
+/**
+ * @internal
+ * @brief Destroys inference handle and releases all its resources.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle    The handle to the inference to be destroyed.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create inference handle by using mv_facial_landmark_create()
+ *
+ * @see mv_facial_landmark_create()
+ */
+int mv_facial_landmark_destroy(mv_facial_landmark_h handle);
+
+/**
+ * @internal
+ * @brief Sets user-given model information.
+ * @details Use this function to change the model information instead of default one after calling @ref mv_facial_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle        The handle to the facial landmark object.
+ * @param[in] model_name    Model name.
+ * @param[in] model_file    Model file name.
+ * @param[in] meta_file     Model meta file name.
+ * @param[in] label_file    Label file name.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+ */
+int mv_facial_landmark_set_model(mv_facial_landmark_h handle, const char *model_name, const char *model_file,
+                                                                const char *meta_file, const char *label_file);
+
+/**
+ * @internal
+ * @brief Configures the backend for the facial landmark inference.
+ *
+ * @since_tizen 7.5
+ *
+ * @param [in] handle         The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ */
+int mv_facial_landmark_configure(mv_facial_landmark_h handle);
+
+/**
+ * @internal
+ * @brief Prepares the facial landmark inference
+ * @details Use this function to prepare the facial landmark inference based on
+ *          the configured network.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the inference.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_PERMISSION_DENIED Permission denied
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Not supported format
+ */
+int mv_facial_landmark_prepare(mv_facial_landmark_h handle);
+
+/**
+ * @internal
+ * @brief Performs the facial landmark inference on the @a source.
+ *
+ * @since_tizen 7.5
+ * @remarks This function is synchronous and may take considerable time to run.
+ *
+ * @param[in] handle          The handle to the inference
+ * @param[in] source         The handle to the source of the media
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_facial_landmark_create()
+ * @pre Prepare an inference by calling mv_facial_landmark_configure()
+ * @pre Prepare an inference by calling mv_facial_landmark_prepare()
+ */
+int mv_facial_landmark_inference(mv_facial_landmark_h handle, mv_source_h source);
+
+/**
+ * @internal
+ * @brief Gets the facial landmark positions on the @a source.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle               The handle to the inference
+ * @param[out] number_of_landmarks A number of landmarks detected.
+ * @param[out] pos_x               An array containing x-coordinate values.
+ * @param[out] pos_y               An array containing y-coordinate values.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_facial_landmark_create()
+ * @pre Prepare an inference by calling mv_facial_landmark_configure()
+ * @pre Prepare an inference by calling mv_facial_landmark_prepare()
+ * @pre Prepare an inference by calling mv_facial_landmark_inference()
+ */
+int mv_facial_landmark_get_positions(mv_facial_landmark_h handle, unsigned int *number_of_landmarks,
+                                                                        unsigned int **pos_x, unsigned int **pos_y);
+
+/**
+ * @internal
+ * @brief Sets user-given inference engine and device types for inference.
+ * @details Use this function to change the inference engine and device types for inference instead of default ones after calling @ref mv_facial_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle        The handle to the facial landmark object.
+ * @param[in] engine_type  A string of inference engine type.
+ * @param[in] device_type   A string of device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+ */
+int mv_facial_landmark_set_engine(mv_facial_landmark_h handle, const char *engine_type, const char *device_type);
+
+/**
+ * @internal
+ * @brief Gets a number of inference engines available for facial landmark task API.
+ * @details Use this function to get how many inference engines are supported for facial landmark after calling @ref mv_facial_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the facial landmark object.
+ * @param[out] engine_count  A number of inference engines available for facial landmark API.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+ */
+int mv_facial_landmark_get_engine_count(mv_facial_landmark_h handle, unsigned int *engine_count);
+
+/**
+ * @internal
+ * @brief Gets engine type to a given inference engine index.
+ * @details Use this function to get inference engine type with a given engine index after calling @ref mv_facial_landmark_get_engine_count().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle        The handle to the facial landmark object.
+ * @param[in] engine_index  A inference engine index for getting the inference engine type.
+ * @param[out] engine_type  A string to inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Get a number of inference engines available for facial landmark task API by calling @ref mv_facial_landmark_get_engine_count()
+ */
+int mv_facial_landmark_get_engine_type(mv_facial_landmark_h handle, const unsigned int engine_index,
+                                                                          char **engine_type);
+
+/**
+ * @internal
+ * @brief Gets a number of device types available to a given inference engine.
+ * @details Use this function to get how many device types are supported for a given inference engine after calling @ref mv_facial_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the facial landmark object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[out] device_count  A number of device types available for a given inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+ */
+int mv_facial_landmark_get_device_count(mv_facial_landmark_h handle, const char *engine_type,
+                                                                               unsigned int *device_count);
+
+/**
+ * @internal
+ * @brief Gets device type list available.
+ * @details Use this function to get what device types are supported for current inference engine type after calling @ref mv_facial_landmark_configure().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the facial landmark object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[in] device_index   A device index for getting the device type.
+ * @param[out] device_type   A string to device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+ * @pre Configure facial landmark task by calling @ref mv_facial_landmark_configure()
+ */
+int mv_facial_landmark_get_device_type(mv_facial_landmark_h handle, const char *engine_type,
+                                                                          const unsigned int device_index, char **device_type);
+/**
+ * @}
+ */
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_FACIAL_LANDMARK_INTERNAL_H__ */
diff --git a/include/mv_facial_landmark_type.h b/include/mv_facial_landmark_type.h

new file mode 100644 (file)

index 0000000..68a467b
--- /dev/null
+++ b/include/mv_facial_landmark_type.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_MV_FACIAL_LANDMARK_TYPE_H__
+#define __TIZEN_MEDIAVISION_MV_FACIAL_LANDMARK_TYPE_H__
+
+#include <mv_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_facial_landmark_type.h
+ * @brief  This file contains the facial landmark handle for Mediavision.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_FACE_LANDMARK_MODULE
+ * @{
+ */
+
+/**
+ * @brief The facial landmark object handle.
+ *
+ * @since_tizen 7.5
+ */
+typedef void *mv_facial_landmark_h;
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_MV_FACIAL_LANDMARK_TYPE_H__ */
diff --git a/mv_machine_learning/CMakeLists.txt b/mv_machine_learning/CMakeLists.txt

index c41e4f0c0de7d6f84bf8e52ea5d4d93b37850865..604acf9981f1c2fcf97ea1ad9f80ef8c5ce27bcc 100644 (file)
--- a/mv_machine_learning/CMakeLists.txt
+++ b/mv_machine_learning/CMakeLists.txt
@@ -12,6 +12,10 @@ if (${ENABLE_ML_OBJECT_DETECTION_3D})
      message("Enabled machine learning object detection 3d task group.")
      add_subdirectory(object_detection_3d)
  endif()
+if (${ENABLE_ML_LANDMARK_DETECTION})
+    message("Enabled machine learning landmark detection task group.")
+    add_subdirectory(landmark_detection)
+endif()
  
  if (${ENABLE_ML_FACE_RECOGNITION})
      message("Enabled machine learning face recognition feature.")
diff --git a/mv_machine_learning/landmark_detection/CMakeLists.txt b/mv_machine_learning/landmark_detection/CMakeLists.txt

new file mode 100644 (file)

index 0000000..516fa4a
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/CMakeLists.txt
@@ -0,0 +1,27 @@
+project(${MV_LANDMARK_DETECTION_LIB_NAME})
+cmake_minimum_required(VERSION 2.6...3.13)
+
+pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED inference-engine-interface-common iniparser json-glib-1.0)
+file(GLOB MV_LANDMARK_DETECTION_SOURCE_LIST  "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/../meta/src/*.cpp")
+
+find_package(OpenCV REQUIRED dnn imgproc)
+if(NOT OpenCV_FOUND)
+       message(SEND_ERROR "OpenCV NOT FOUND")
+       return()
+endif()
+
+if(FORCED_STATIC_BUILD)
+       add_library(${PROJECT_NAME} STATIC ${MV_LANDMARK_DETECTION_SOURCE_LIST})
+else()
+       add_library(${PROJECT_NAME} SHARED ${MV_LANDMARK_DETECTION_SOURCE_LIST})
+endif()
+
+target_link_libraries(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} ${OpenCV_LIBS} ${${PROJECT_NAME}_DEP_LIBRARIES} mv_inference)
+target_include_directories(${PROJECT_NAME} PRIVATE include ../inference/include ../common/include ../meta/include)
+install(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR})
+install(
+       DIRECTORY ${PROJECT_SOURCE_DIR}/../../include/ DESTINATION include/media
+       FILES_MATCHING
+       PATTERN "mv_facial_landmark_internal.h"
+       PATTERN "mv_facial_landmark_type.h"
+       )
diff --git a/mv_machine_learning/landmark_detection/include/LandmarkDetectionParser.h b/mv_machine_learning/landmark_detection/include/LandmarkDetectionParser.h

new file mode 100644 (file)

index 0000000..f536e7b
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/include/LandmarkDetectionParser.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LANDMARK_DETECTION_PARSER_H__
+#define __LANDMARK_DETECTION_PARSER_H__
+
+#include "MetaParser.h"
+#include "PostprocessParser.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class LandmarkDetectionParser : public MetaParser
+{
+private:
+       PostprocessParser _postprocessParser;
+
+protected:
+       void parsePostprocess(std::shared_ptr<MetaInfo> meta_info, JsonObject *in_obj) override;
+
+public:
+       LandmarkDetectionParser();
+       ~LandmarkDetectionParser();
+
+       void setTaskType(int type) override;
+};
+
+}
+}
+
+#endif
+\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/facial_landmark_adapter.h b/mv_machine_learning/landmark_detection/include/facial_landmark_adapter.h

new file mode 100644 (file)

index 0000000..dbc2391
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/include/facial_landmark_adapter.h
@@ -0,0 +1,63 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACE_LANDMARK_ADAPTER_H__
+#define __FACE_LANDMARK_ADAPTER_H__
+
+#include <dlog.h>
+
+#include "EngineConfig.h"
+#include "itask.h"
+#include "fld_tweak_cnn.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T, typename V> class FacialLandmarkAdapter : public mediavision::common::ITask<T, V>
+{
+private:
+       std::unique_ptr<LandmarkDetection> _landmark_detection;
+       T _source;
+       std::string _model_name;
+       std::string _model_file;
+       std::string _meta_file;
+       std::string _label_file;
+
+public:
+       FacialLandmarkAdapter();
+       ~FacialLandmarkAdapter();
+
+       void create(int type) override;
+
+       void setModelInfo(const char *model_file, const char *meta_file, const char *label_file,
+                                         const char *model_name) override;
+       void setEngineInfo(const char *engine_type, const char *device_type) override;
+       void configure() override;
+       void getNumberOfEngines(unsigned int *number_of_engines) override;
+       void getEngineType(unsigned int engine_index, char **engine_type) override;
+       void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices) override;
+       void getDeviceType(const char *engine_type, unsigned int device_index, char **device_type) override;
+       void prepare() override;
+       void setInput(T &t) override;
+       void perform() override;
+       V &getOutput() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
+\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h b/mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h

new file mode 100644 (file)

index 0000000..13e87c1
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FLD_TWEAK_CNN_H__
+#define __FLD_TWEAK_CNN_H__
+
+#include <string>
+#include <memory>
+#include <mv_common.h>
+#include "mv_private.h"
+
+#include "landmark_detection.h"
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class FldTweakCnn : public LandmarkDetection
+{
+private:
+       FacialLandmarkResult _result;
+
+public:
+       FldTweakCnn(LandmarkDetectionTaskType task_type);
+       ~FldTweakCnn();
+
+       FacialLandmarkResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
+\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/landmark_detection.h b/mv_machine_learning/landmark_detection/include/landmark_detection.h

new file mode 100644 (file)

index 0000000..0b378bb
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/include/landmark_detection.h
@@ -0,0 +1,83 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LANDMARK_DETECTION_H__
+#define __LANDMARK_DETECTION_H__
+
+#include <mv_common.h>
+#include <mv_inference_type.h>
+#include "mv_private.h"
+
+#include "EngineConfig.h"
+#include "inference_engine_common_impl.h"
+#include "Inference.h"
+#include "landmark_detection_type.h"
+#include "LandmarkDetectionParser.h"
+#include "Preprocess.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class LandmarkDetection
+{
+private:
+       void loadLabel();
+       void getEngineList();
+       void getDeviceList(const char *engine_type);
+
+       LandmarkDetectionTaskType _task_type;
+
+protected:
+       std::unique_ptr<mediavision::inference::Inference> _inference;
+       std::unique_ptr<MediaVision::Common::EngineConfig> _config;
+       std::unique_ptr<MetaParser> _parser;
+       std::vector<std::string> _labels;
+       std::vector<std::string> _valid_backends;
+       std::vector<std::string> _valid_devices;
+       Preprocess _preprocess;
+       std::string _modelFilePath;
+       std::string _modelMetaFilePath;
+       std::string _modelDefaultPath;
+       std::string _modelLabelFilePath;
+       int _backendType;
+       int _targetDeviceType;
+
+       void getOutputNames(std::vector<std::string> &names);
+       void getOutputTensor(std::string target_name, std::vector<float> &tensor);
+
+public:
+       LandmarkDetection(LandmarkDetectionTaskType task_type);
+       virtual ~LandmarkDetection() = default;
+       LandmarkDetectionTaskType getTaskType();
+       void setUserModel(std::string model_file, std::string meta_file, std::string label_file);
+       void setEngineInfo(std::string engine_type, std::string device_type);
+       void getNumberOfEngines(unsigned int *number_of_engines);
+       void getEngineType(unsigned int engine_index, char **engine_type);
+       void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices);
+       void getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type);
+       void parseMetaFile(const char *meta_file_name);
+       void configure();
+       void prepare();
+       void preprocess(mv_source_h &mv_src);
+       void inference(mv_source_h source);
+       virtual FacialLandmarkResult &result() = 0;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
+\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/landmark_detection_type.h b/mv_machine_learning/landmark_detection/include/landmark_detection_type.h

new file mode 100644 (file)

index 0000000..2b3d3ea
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/include/landmark_detection_type.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LANDMARK_DETECTION_TYPE_H__
+#define __LANDMARK_DETECTION_TYPE_H__
+
+#include <vector>
+
+#include <mv_common.h>
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+struct LandmarkDetectionInput {
+       mv_source_h inference_src;
+       // TODO.
+};
+
+struct FacialLandmarkResult {
+       unsigned int number_of_landmarks;
+       std::vector<unsigned int> x_pos;
+       std::vector<unsigned int> y_pos;
+};
+
+enum class LandmarkDetectionTaskType {
+       LANDMARK_DETECTION_TASK_NONE = 0,
+       FLD_TWEAK_CNN,
+       // TODO
+};
+
+}
+}
+
+#endif
+\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/mv_facial_landmark_open.h b/mv_machine_learning/landmark_detection/include/mv_facial_landmark_open.h

new file mode 100644 (file)

index 0000000..82274d7
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/include/mv_facial_landmark_open.h
@@ -0,0 +1,274 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_FACIAL_LANDMARK_OPEN_H__
+#define __MEDIA_VISION_FACIAL_LANDMARK_OPEN_H__
+
+#include <mv_common.h>
+#include <mv_private.h>
+#include <mv_facial_landmark_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+        * @brief Create facial landmark object handle.
+        * @details Use this function to create an facial landmark object handle.
+        *          After creation the handle has to be prepared with
+        *          @ref mv_facial_landmark_prepare_open() function to prepare
+        *               an facial landmark object.
+        *
+        * @since_tizen 7.5
+        *
+        * @param[out] out_handle    The handle to the facial landmark object to be created
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        *
+        * @post Release @a handle by using
+        *       @ref mv_facial_landmark_destroy_open() function when it is not needed
+        *       anymore
+        *
+        * @see mv_facial_landmark_destroy_open()
+        */
+int mv_facial_landmark_create_open(mv_facial_landmark_h *out_handle);
+
+/**
+        * @brief Destroy facial landmark handle and releases all its resources.
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle    The handle to the facial landmark object to be destroyed.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        *
+        * @pre Create an facial landmark handle by using @ref mv_facial_landmark_create_open()
+        *
+        * @see mv_facial_landmark_create_open()
+        */
+int mv_facial_landmark_destroy_open(mv_facial_landmark_h handle);
+
+/**
+        * @brief Set user-given model information.
+        * @details Use this function to change the model information instead of default one after calling @ref mv_facial_landmark_create().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle        The handle to the facial landmark object.
+        * @param[in] model_name    Model name.
+        * @param[in] model_file    Model file name.
+        * @param[in] meta_type     Model meta file name.
+        * @param[in] label_file    Label file name.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+        */
+int mv_facial_landmark_set_model_open(mv_facial_landmark_h handle, const char *model_name, const char *model_file,
+                                                                         const char *meta_file, const char *label_file);
+
+/**
+        * @brief Configure the backend to the inference handle
+        *
+        * @since_tizen 7.5
+        *
+        * @param [in] handle         The handle to the inference
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+        */
+int mv_facial_landmark_configure_open(mv_facial_landmark_h handle);
+
+/**
+        * @brief Prepare inference.
+        * @details Use this function to prepare inference based on
+        *          the configured network.
+        *
+        * @since_tizen 7.5
+        *
+        * @param [in] handle         The handle to the inference
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        */
+int mv_facial_landmark_prepare_open(mv_facial_landmark_h handle);
+
+/**
+        *
+        * @brief Inference with a given facial on the @a source
+        * @details Use this function to inference with a given source.
+        *
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle         The handle to the facial landmark object.
+        * @param[in] source         The handle to the source of the media.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+        *                                                  isn't supported
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        *
+        * @pre Create a source handle by calling @ref mv_create_source()
+        * @pre Create an facial landmark handle by calling @ref mv_facial_landmark_create_open()
+        * @pre Prepare an inference by calling mv_object_detect_configure_open()
+        * @pre Prepare an facial landmark by calling @ref mv_facial_landmark_prepare_open()
+        */
+int mv_facial_landmark_inference_open(mv_facial_landmark_h handle, mv_source_h source);
+
+/**
+ * @brief Gets the facial landmark positions on the @a source.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle               The handle to the inference
+ * @param[out] number_of_landmarks A number of landmarks detected.
+ * @param[out] pos_x               An array containing x-coordinate values.
+ * @param[out] pos_y               An array containing y-coordinate values.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_facial_landmark_create()
+ * @pre Prepare an inference by calling mv_facial_landmark_configure()
+ * @pre Prepare an inference by calling mv_facial_landmark_prepare()
+ * @pre Prepare an inference by calling mv_facial_landmark_inference()
+ */
+int mv_facial_landmark_get_positions_open(mv_facial_landmark_h handle, unsigned int *number_of_landmarks,
+                                                                                 unsigned int **pos_x, unsigned int **pos_y);
+
+/**
+        * @brief Set user-given backend and device types for inference.
+        * @details Use this function to change the backend and device types for inference instead of default ones after calling @ref mv_facial_landmark_create_open().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle        The handle to the image classification object.
+        * @param[in] backend_type  A string of backend type.
+        * @param[in] device_type   A string of device type.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Create a image classification handle by calling @ref mv_facial_landmark_create_open()
+        */
+int mv_facial_landmark_set_engine_open(mv_facial_landmark_h handle, const char *backend_type, const char *device_type);
+
+/**
+        * @brief Get a number of inference engines available for image classification task API.
+        * @details Use this function to get how many inference engines are supported for image classification after calling @ref mv_facial_landmark_create_open().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle         The handle to the image classification object.
+        * @param[out] engine_count  A number of inference engines available for image classification API.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Create a image classification handle by calling @ref mv_facial_landmark_create_open()
+        */
+int mv_facial_landmark_get_engine_count_open(mv_facial_landmark_h handle, unsigned int *engine_count);
+
+/**
+        * @brief Get engine type to a given inference engine index.
+        * @details Use this function to get inference engine type with a given engine index after calling @ref mv_facial_landmark_get_engine_count().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle        The handle to the image classification object.
+        * @param[in] engine_index  A inference engine index for getting the inference engine type.
+        * @param[out] engine_type  A string to inference engine.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Get a number of inference engines available for image classification task API by calling @ref mv_facial_landmark_get_engine_count()
+        */
+int mv_facial_landmark_get_engine_type_open(mv_facial_landmark_h handle, const unsigned int engine_index,
+                                                                                       char **engine_type);
+
+/**
+        * @brief Get a number of device types available to a given inference engine.
+        * @details Use this function to get how many device types are supported for a given inference engine after calling @ref mv_facial_landmark_create_open().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle         The handle to the image classification object.
+        * @param[in] engine_type    A inference engine string.
+        * @param[out] device_count  A number of device types available for a given inference engine.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Create a image classification handle by calling @ref mv_facial_landmark_create_open()
+        */
+int mv_facial_landmark_get_device_count_open(mv_facial_landmark_h handle, const char *engine_type,
+                                                                                        unsigned int *device_count);
+
+/**
+        * @brief Get device type list available.
+        * @details Use this function to get what device types are supported for current inference engine type after calling @ref mv_facial_landmark_configure().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle         The handle to the image classification object.
+        * @param[in] engine_type    A inference engine string.
+        * @param[in] device_index   A device index for getting the device type.
+        * @param[out] device_type   A string to device type.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Create a image classification handle by calling @ref mv_facial_landmark_create_open()
+        * @pre Configure image classification task by calling @ref mv_facial_landmark_configure_open()
+        */
+int mv_facial_landmark_get_device_type_open(mv_facial_landmark_h handle, const char *engine_type,
+                                                                                       const unsigned int device_index, char **device_type);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */
diff --git a/mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h b/mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h

new file mode 100644 (file)

index 0000000..8957553
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h
@@ -0,0 +1,65 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_LANDMARK_DETECTION_CONFIG_H__
+#define __MEDIA_VISION_LANDMARK_DETECTION_CONFIG_H__
+
+/**
+ * @brief Defines #MV_LANDMARK_DETECTION_MODEL_DEFAULT_PATH
+ *        to set the landmark detection default path.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_LANDMARK_DETECTION_MODEL_DEFAULT_PATH "MODEL_DEFAULT_PATH"
+
+/**
+ * @brief Defines #MV_LANDMARK_DETECTION_MODEL_FILE_PATH
+ *        to set the landmark detection model file path.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_LANDMARK_DETECTION_MODEL_FILE_PATH "MODEL_FILE_NAME"
+
+/**
+ * @brief Defines #MV_LANDMARK_DETECTION_MODEL_META_FILE_PATH to set inference
+ *        models's metadata file attribute of the engine configuration.
+ * @details The file includes inference model's metadata such as input and output
+ *          node names, input tensor's width and height,
+ *          mean and standard deviation values for pre-processing.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_LANDMARK_DETECTION_MODEL_META_FILE_PATH "META_FILE_NAME"
+
+#define MV_LANDMARK_DETECTION_LABEL_FILE_NAME "LABEL_FILE_NAME"
+
+/**
+ * @brief Defines #MV_LANDMARK_DETECTION_BACKEND_TYPE
+ *        to set inference backend engine type. In default, tensorflow lite is used.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_LANDMARK_DETECTION_BACKEND_TYPE "BACKEND_TYPE"
+
+/**
+ * @brief Defines #MV_LANDMARK_DETECTION_TARGET_DEVICE_TYPE
+ *        to set inference target device type. In default, CPU device is used.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_LANDMARK_DETECTION_TARGET_DEVICE_TYPE "TARGET_DEVICE_TYPE"
+
+#endif /* __MEDIA_VISION_LANDMARK_DETECTION_CONFIG_H__ */
diff --git a/mv_machine_learning/landmark_detection/meta/facial_landmark.json b/mv_machine_learning/landmark_detection/meta/facial_landmark.json

new file mode 100644 (file)

index 0000000..2c5cc69
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/meta/facial_landmark.json
@@ -0,0 +1,35 @@
+{
+    "attributes":
+    [
+        {
+            "name" : "MODEL_DEFAULT_PATH",
+            "type" : "string",
+            "value" : "/opt/usr/globalapps/mediavision.landmark.detection/models/tflite/"
+        },
+        {
+            "name"  : "MODEL_FILE_NAME",
+            "type"  : "string",
+            "value" : "fld_tweakcnn_128x128.tflite"
+        },
+        {
+            "name"  : "META_FILE_NAME",
+            "type"  : "string",
+            "value" : "fld_tweakcnn_128x128.json"
+        },
+        {
+            "name"  : "LABEL_FILE_NAME",
+            "type"  : "string",
+            "value" : ""
+        },
+        {
+            "name"  : "BACKEND_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        },
+        {
+            "name"  : "TARGET_DEVICE_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        }
+    ]
+}
diff --git a/mv_machine_learning/landmark_detection/src/LandmarkDetectionParser.cpp b/mv_machine_learning/landmark_detection/src/LandmarkDetectionParser.cpp

new file mode 100644 (file)

index 0000000..0fc38ed
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/src/LandmarkDetectionParser.cpp
@@ -0,0 +1,60 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <memory>
+
+#include "machine_learning_exception.h"
+#include "LandmarkDetectionParser.h"
+#include "landmark_detection_type.h"
+
+using namespace std;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+LandmarkDetectionParser::LandmarkDetectionParser()
+{
+       LOGI("ENTER");
+       LOGI("LEAVE");
+}
+
+LandmarkDetectionParser::~LandmarkDetectionParser()
+{}
+
+void LandmarkDetectionParser::setTaskType(int type)
+{
+       LandmarkDetectionTaskType task_type = static_cast<LandmarkDetectionTaskType>(type);
+       // TODO.
+}
+
+void LandmarkDetectionParser::parsePostprocess(shared_ptr<MetaInfo> meta_info, JsonObject *in_obj)
+{
+       LOGI("ENTER");
+
+       LOGI("tensor name : %s", meta_info->name.c_str());
+
+       if (json_object_has_member(in_obj, "score"))
+               _postprocessParser.parseScore(meta_info, in_obj);
+
+       if (json_object_has_member(in_obj, "landmark"))
+               _postprocessParser.parseLandmark(meta_info, in_obj);
+
+       LOGI("LEAVE");
+}
+
+}
+}
+\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp b/mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp

new file mode 100644 (file)

index 0000000..5d8b413
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp
@@ -0,0 +1,145 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "machine_learning_exception.h"
+#include "facial_landmark_adapter.h"
+
+using namespace std;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T, typename V> FacialLandmarkAdapter<T, V>::FacialLandmarkAdapter() : _source()
+{
+       // In default, Mobilenet v1 ssd model will be used.
+       // If other model is set by user then strategy pattern will be used
+       // to create its corresponding concerte class by calling create().
+       _landmark_detection = make_unique<FldTweakCnn>(LandmarkDetectionTaskType::FLD_TWEAK_CNN);
+}
+
+template<typename T, typename V> FacialLandmarkAdapter<T, V>::~FacialLandmarkAdapter()
+{}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::create(int type)
+{
+       LandmarkDetectionTaskType task_type = static_cast<LandmarkDetectionTaskType>(type);
+
+       // If default task type is same as a given one then skip.
+       if (_landmark_detection->getTaskType() == task_type)
+               return;
+
+       _landmark_detection.reset();
+
+       if (task_type == LandmarkDetectionTaskType::FLD_TWEAK_CNN)
+               _landmark_detection = make_unique<FldTweakCnn>(task_type);
+       // TODO.
+}
+
+template<typename T, typename V>
+void FacialLandmarkAdapter<T, V>::setModelInfo(const char *model_file, const char *meta_file, const char *label_file,
+                                                                                          const char *model_name)
+{
+       string model_name_str(model_name);
+
+       if (!model_name_str.empty()) {
+               transform(model_name_str.begin(), model_name_str.end(), model_name_str.begin(), ::toupper);
+
+               int model_type = 0;
+
+               if (model_name_str == string("FLD_TWEAK_CNN"))
+                       model_type = static_cast<int>(LandmarkDetectionTaskType::FLD_TWEAK_CNN);
+               // TODO.
+               else
+                       throw InvalidParameter("Invalid landmark detection model name.");
+
+               create(static_cast<int>(model_type));
+       }
+
+       _model_file = string(model_file);
+       _meta_file = string(meta_file);
+       _label_file = string(label_file);
+
+       if (_model_file.empty() && _meta_file.empty()) {
+               LOGW("Given model info is invalid so default model info will be used instead.");
+               return;
+       }
+
+       _landmark_detection->setUserModel(_model_file, _meta_file, _label_file);
+}
+
+template<typename T, typename V>
+void FacialLandmarkAdapter<T, V>::setEngineInfo(const char *engine_type, const char *device_type)
+{
+       _landmark_detection->setEngineInfo(string(engine_type), string(device_type));
+}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::configure()
+{
+       _landmark_detection->parseMetaFile("facial_landmark.json");
+       _landmark_detection->configure();
+}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::getNumberOfEngines(unsigned int *number_of_engines)
+{
+       _landmark_detection->getNumberOfEngines(number_of_engines);
+}
+
+template<typename T, typename V>
+void FacialLandmarkAdapter<T, V>::getEngineType(unsigned int engine_index, char **engine_type)
+{
+       _landmark_detection->getEngineType(engine_index, engine_type);
+}
+
+template<typename T, typename V>
+void FacialLandmarkAdapter<T, V>::getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices)
+{
+       _landmark_detection->getNumberOfDevices(engine_type, number_of_devices);
+}
+
+template<typename T, typename V>
+void FacialLandmarkAdapter<T, V>::getDeviceType(const char *engine_type, unsigned int device_index, char **device_type)
+{
+       _landmark_detection->getDeviceType(engine_type, device_index, device_type);
+}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::prepare()
+{
+       _landmark_detection->prepare();
+}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::setInput(T &t)
+{
+       _source = t;
+}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::perform()
+{
+       _landmark_detection->preprocess(_source.inference_src);
+       _landmark_detection->inference(_source.inference_src);
+}
+
+template<typename T, typename V> V &FacialLandmarkAdapter<T, V>::getOutput()
+{
+       return _landmark_detection->result();
+}
+
+template class FacialLandmarkAdapter<LandmarkDetectionInput, FacialLandmarkResult>;
+}
+}
+\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp b/mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp

new file mode 100644 (file)

index 0000000..ae90e6d
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp
@@ -0,0 +1,97 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <map>
+#include <algorithm>
+#include <iostream>
+
+#include "machine_learning_exception.h"
+#include "mv_landmark_detection_config.h"
+#include "fld_tweak_cnn.h"
+#include "Postprocess.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+FldTweakCnn::FldTweakCnn(LandmarkDetectionTaskType task_type) : LandmarkDetection(task_type), _result()
+{}
+
+FldTweakCnn::~FldTweakCnn()
+{}
+
+FacialLandmarkResult &FldTweakCnn::result()
+{
+       constexpr static unsigned int numberOfLandmarks = 5;
+
+       // Clear _result object because result() function can be called every time user wants
+       // so make sure to clear existing result data before getting the data again.
+       memset(reinterpret_cast<void *>(&_result), 0, sizeof(_result));
+
+       vector<string> names;
+
+       LandmarkDetection::getOutputNames(names);
+
+       auto scoreMetaInfo = _parser->getOutputMetaMap().at(names[0]);
+       auto decodingLandmark =
+                       static_pointer_cast<DecodingLandmark>(scoreMetaInfo->decodingTypeMap[DecodingType::LANDMARK]);
+
+       if (decodingLandmark->decoding_type != LandmarkDecodingType::BYPASS)
+               throw InvalidOperation("decoding type not support.");
+
+       if (decodingLandmark->coordinate_type != LandmarkCoordinateType::RATIO)
+               throw InvalidOperation("coordinate type not support.");
+
+       if (decodingLandmark->landmark_type != LandmarkType::SINGLE_2D)
+               throw InvalidOperation("landmark type not support.");
+
+       auto ori_src_width = static_cast<double>(_preprocess.getImageWidth()[0]);
+       auto ori_src_height = static_cast<double>(_preprocess.getImageHeight()[0]);
+       auto input_tensor_width = static_cast<double>(_inference->getInputWidth());
+       auto input_tensor_height = static_cast<double>(_inference->getInputHeight());
+
+       _result.number_of_landmarks = numberOfLandmarks;
+
+       vector<float> score_tensor;
+
+       LandmarkDetection::getOutputTensor(names[0], score_tensor);
+
+       // Calculate the ratio[A] between the original image size and the input tensor size.
+       double width_ratio = ori_src_width / input_tensor_width;
+       double height_ratio = ori_src_height / input_tensor_height;
+
+       // In case that landmark coordinate type is RATIO, output tensor buffer contains ratio values indicating
+       // the position of each landmark for the input tensor.
+       // Therefore, each landmark position for original image is as following,
+       //    x = [width A] * width of input tensor * width ratio value of output tensor.
+       //    y = [height A] * height of input tensor * height ratio value of output tensor.
+       for (unsigned int idx = 0; idx < numberOfLandmarks; ++idx) {
+               _result.x_pos.push_back(
+                               static_cast<unsigned int>(width_ratio * input_tensor_width * score_tensor[idx + idx * 1]));
+               _result.y_pos.push_back(
+                               static_cast<unsigned int>(height_ratio * input_tensor_height * score_tensor[idx + idx * 1 + 1]));
+       }
+
+       return _result;
+}
+
+}
+}
+\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/src/landmark_detection.cpp b/mv_machine_learning/landmark_detection/src/landmark_detection.cpp

new file mode 100644 (file)

index 0000000..58c5f73
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/src/landmark_detection.cpp
@@ -0,0 +1,313 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <fstream>
+#include <map>
+#include <memory>
+#include <algorithm>
+
+#include "machine_learning_exception.h"
+#include "mv_machine_learning_common.h"
+#include "mv_landmark_detection_config.h"
+#include "landmark_detection.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace MediaVision::Common;
+using namespace mediavision::common;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+LandmarkDetection::LandmarkDetection(LandmarkDetectionTaskType task_type)
+               : _task_type(task_type), _backendType(), _targetDeviceType()
+{
+       _inference = make_unique<Inference>();
+       _parser = make_unique<LandmarkDetectionParser>();
+}
+
+LandmarkDetectionTaskType LandmarkDetection::getTaskType()
+{
+       return _task_type;
+}
+
+void LandmarkDetection::getEngineList()
+{
+       for (auto idx = MV_INFERENCE_BACKEND_NONE + 1; idx < MV_INFERENCE_BACKEND_MAX; ++idx) {
+               auto backend = _inference->getSupportedInferenceBackend(idx);
+               // TODO. we need to describe what inference engines are supported by each Task API,
+               //       and based on it, below inference engine types should be checked
+               //       if a given type is supported by this Task API later. As of now, tflite only.
+               if (backend.second == true && backend.first.compare("tflite") == 0)
+                       _valid_backends.push_back(backend.first);
+       }
+}
+
+void LandmarkDetection::getDeviceList(const char *engine_type)
+{
+       // TODO. add device types available for a given engine type later.
+       //       In default, cpu and gpu only.
+       _valid_devices.push_back("cpu");
+       _valid_devices.push_back("gpu");
+}
+
+void LandmarkDetection::setEngineInfo(std::string engine_type, std::string device_type)
+{
+       if (engine_type.empty() || device_type.empty())
+               throw InvalidParameter("Invalid engine info.");
+
+       transform(engine_type.begin(), engine_type.end(), engine_type.begin(), ::toupper);
+       transform(device_type.begin(), device_type.end(), device_type.begin(), ::toupper);
+
+       _backendType = GetBackendType(engine_type);
+       _targetDeviceType = GetDeviceType(device_type);
+
+       LOGI("Engine type : %s => %d, Device type : %s => %d", engine_type.c_str(), GetBackendType(engine_type),
+                device_type.c_str(), GetDeviceType(device_type));
+
+       if (_backendType == MEDIA_VISION_ERROR_INVALID_PARAMETER ||
+               _targetDeviceType == MEDIA_VISION_ERROR_INVALID_PARAMETER)
+               throw InvalidParameter("backend or target device type not found.");
+}
+
+void LandmarkDetection::getNumberOfEngines(unsigned int *number_of_engines)
+{
+       if (!_valid_backends.empty()) {
+               *number_of_engines = _valid_backends.size();
+               return;
+       }
+
+       getEngineList();
+       *number_of_engines = _valid_backends.size();
+}
+
+void LandmarkDetection::getEngineType(unsigned int engine_index, char **engine_type)
+{
+       if (!_valid_backends.empty()) {
+               if (_valid_backends.size() <= engine_index)
+                       throw InvalidParameter("Invalid engine index.");
+
+               *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
+               return;
+       }
+
+       getEngineList();
+
+       if (_valid_backends.size() <= engine_index)
+               throw InvalidParameter("Invalid engine index.");
+
+       *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
+}
+
+void LandmarkDetection::getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices)
+{
+       if (!_valid_devices.empty()) {
+               *number_of_devices = _valid_devices.size();
+               return;
+       }
+
+       getDeviceList(engine_type);
+       *number_of_devices = _valid_devices.size();
+}
+
+void LandmarkDetection::getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type)
+{
+       if (!_valid_devices.empty()) {
+               if (_valid_devices.size() <= device_index)
+                       throw InvalidParameter("Invalid device index.");
+
+               *device_type = const_cast<char *>(_valid_devices[device_index].data());
+               return;
+       }
+
+       getDeviceList(engine_type);
+
+       if (_valid_devices.size() <= device_index)
+               throw InvalidParameter("Invalid device index.");
+
+       *device_type = const_cast<char *>(_valid_devices[device_index].data());
+}
+
+void LandmarkDetection::setUserModel(string model_file, string meta_file, string label_file)
+{
+       _modelFilePath = model_file;
+       _modelMetaFilePath = meta_file;
+       _modelLabelFilePath = label_file;
+}
+
+static bool IsJsonFile(const string &fileName)
+{
+       return (!fileName.substr(fileName.find_last_of(".") + 1).compare("json"));
+}
+
+void LandmarkDetection::loadLabel()
+{
+       ifstream readFile;
+
+       _labels.clear();
+       readFile.open(_modelLabelFilePath.c_str());
+
+       if (readFile.fail())
+               throw InvalidOperation("Fail to open " + _modelLabelFilePath + " file.");
+
+       string line;
+
+       while (getline(readFile, line))
+               _labels.push_back(line);
+
+       readFile.close();
+}
+
+void LandmarkDetection::parseMetaFile(const char *meta_file_name)
+{
+       _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + string(meta_file_name));
+
+       int ret = _config->getIntegerAttribute(string(MV_LANDMARK_DETECTION_BACKEND_TYPE), &_backendType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get backend engine type.");
+
+       ret = _config->getIntegerAttribute(string(MV_LANDMARK_DETECTION_TARGET_DEVICE_TYPE), &_targetDeviceType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get target device type.");
+
+       ret = _config->getStringAttribute(MV_LANDMARK_DETECTION_MODEL_DEFAULT_PATH, &_modelDefaultPath);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get model default path");
+
+       if (_modelFilePath.empty()) {
+               ret = _config->getStringAttribute(MV_LANDMARK_DETECTION_MODEL_FILE_PATH, &_modelFilePath);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw InvalidOperation("Fail to get model file path");
+       }
+
+       _modelFilePath = _modelDefaultPath + _modelFilePath;
+       LOGI("model file path = %s", _modelFilePath.c_str());
+
+       if (_modelMetaFilePath.empty()) {
+               ret = _config->getStringAttribute(MV_LANDMARK_DETECTION_MODEL_META_FILE_PATH, &_modelMetaFilePath);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw InvalidOperation("Fail to get model meta file path");
+
+               if (_modelMetaFilePath.empty())
+                       throw InvalidOperation("Model meta file doesn't exist.");
+
+               if (!IsJsonFile(_modelMetaFilePath))
+                       throw InvalidOperation("Model meta file should be json");
+       }
+
+       _modelMetaFilePath = _modelDefaultPath + _modelMetaFilePath;
+       LOGI("meta file path = %s", _modelMetaFilePath.c_str());
+
+       _parser->setTaskType(static_cast<int>(_task_type));
+       _parser->load(_modelMetaFilePath);
+
+       if (_modelLabelFilePath.empty()) {
+               ret = _config->getStringAttribute(MV_LANDMARK_DETECTION_LABEL_FILE_NAME, &_modelLabelFilePath);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw InvalidOperation("Fail to get label file path");
+
+               if (_modelLabelFilePath.empty()) {
+                       LOGW("Label doesn't exist.");
+                       return;
+               }
+       }
+
+       _modelLabelFilePath = _modelDefaultPath + _modelLabelFilePath;
+       LOGI("label file path = %s", _modelLabelFilePath.c_str());
+
+       loadLabel();
+}
+
+void LandmarkDetection::configure()
+{
+       int ret = _inference->bind(_backendType, _targetDeviceType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to bind a backend engine.");
+}
+
+void LandmarkDetection::prepare()
+{
+       int ret = _inference->configureInputMetaInfo(_parser->getInputMetaMap());
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to configure input tensor info from meta file.");
+
+       ret = _inference->configureOutputMetaInfo(_parser->getOutputMetaMap());
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to configure output tensor info from meta file.");
+
+       _inference->configureModelFiles("", _modelFilePath, "");
+
+       // Request to load model files to a backend engine.
+       ret = _inference->load();
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to load model files.");
+}
+
+void LandmarkDetection::preprocess(mv_source_h &mv_src)
+{
+       LOGI("ENTER");
+
+       TensorBuffer &tensor_buffer_obj = _inference->getInputTensorBuffer();
+       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+       vector<mv_source_h> mv_srcs = { mv_src };
+
+       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
+
+       LOGI("LEAVE");
+}
+
+void LandmarkDetection::inference(mv_source_h source)
+{
+       LOGI("ENTER");
+
+       vector<mv_source_h> sources;
+
+       sources.push_back(source);
+
+       int ret = _inference->run();
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to run inference");
+
+       LOGI("LEAVE");
+}
+
+void LandmarkDetection::getOutputNames(vector<string> &names)
+{
+       TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
+       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+
+       for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++)
+               names.push_back(it->first);
+}
+
+void LandmarkDetection::getOutputTensor(string target_name, vector<float> &tensor)
+{
+       TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
+
+       inference_engine_tensor_buffer *tensor_buffer = tensor_buffer_obj.getTensorBuffer(target_name);
+       if (!tensor_buffer)
+               throw InvalidOperation("Fail to get tensor buffer.");
+
+       auto raw_buffer = static_cast<float *>(tensor_buffer->buffer);
+
+       copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(float)], back_inserter(tensor));
+}
+
+}
+}
+\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/src/mv_facial_landmark.c b/mv_machine_learning/landmark_detection/src/mv_facial_landmark.c

new file mode 100644 (file)

index 0000000..1936fcd
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/src/mv_facial_landmark.c
@@ -0,0 +1,213 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "mv_facial_landmark_internal.h"
+#include "mv_facial_landmark_open.h"
+
+/**
+ * @file  mv_facial_landmark.c
+ * @brief This file contains Media Vision inference module.
+ */
+
+int mv_facial_landmark_create(mv_facial_landmark_h *handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_NULL_ARG_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_create_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_facial_landmark_destroy(mv_facial_landmark_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_destroy_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_facial_landmark_set_model(mv_facial_landmark_h handle, const char *model_name, const char *model_file,
+                                                                const char *meta_file, const char *label_file)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(model_name);
+       MEDIA_VISION_NULL_ARG_CHECK(model_file);
+       MEDIA_VISION_NULL_ARG_CHECK(meta_file);
+       MEDIA_VISION_NULL_ARG_CHECK(label_file);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_set_model_open(handle, model_name, model_file, meta_file, label_file);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_set_engine(mv_facial_landmark_h handle, const char *backend_type, const char *device_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(backend_type);
+       MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_set_engine_open(handle, backend_type, device_type);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_get_engine_count(mv_facial_landmark_h handle, unsigned int *engine_count)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_count);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_get_engine_count_open(handle, engine_count);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_get_engine_type(mv_facial_landmark_h handle, const unsigned int engine_index, char **engine_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_get_engine_type_open(handle, engine_index, engine_type);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_get_device_count(mv_facial_landmark_h handle, const char *engine_type,
+                                                                               unsigned int *device_count)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(device_count);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_get_device_count_open(handle, engine_type, device_count);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_get_device_type(mv_facial_landmark_h handle, const char *engine_type,
+                                                                          const unsigned int device_index, char **device_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+       MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_get_device_type_open(handle, engine_type, device_index, device_type);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_configure(mv_facial_landmark_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_configure_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_facial_landmark_prepare(mv_facial_landmark_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_prepare_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_facial_landmark_inference(mv_facial_landmark_h handle, mv_source_h source)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_image_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(source);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_inference_open(handle, source);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_get_positions(mv_facial_landmark_h handle, unsigned int *number_of_landmarks,
+                                                                        unsigned int **pos_x, unsigned int **pos_y)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_image_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(number_of_landmarks);
+       MEDIA_VISION_INSTANCE_CHECK(pos_x);
+       MEDIA_VISION_INSTANCE_CHECK(pos_y);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_get_positions_open(handle, number_of_landmarks, pos_x, pos_y);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
diff --git a/mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp b/mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp

new file mode 100644 (file)

index 0000000..a4c8d75
--- /dev/null
+++ b/mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp
@@ -0,0 +1,350 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "itask.h"
+#include "mv_facial_landmark_open.h"
+#include "facial_landmark_adapter.h"
+#include "machine_learning_exception.h"
+#include "landmark_detection_type.h"
+#include "context.h"
+
+#include <new>
+#include <unistd.h>
+#include <string>
+#include <algorithm>
+#include <mutex>
+#include <iostream>
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::common;
+using namespace mediavision::machine_learning;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning::exception;
+using LandmarkDetectionTask = ITask<LandmarkDetectionInput, FacialLandmarkResult>;
+
+static mutex g_facial_landmark_mutex;
+
+int mv_facial_landmark_create_open(mv_facial_landmark_h *handle)
+{
+       if (!handle) {
+               LOGE("Handle can't be created because handle pointer is NULL");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       Context *context = nullptr;
+       LandmarkDetectionTask *task = nullptr;
+
+       try {
+               context = new Context();
+               task = new FacialLandmarkAdapter<LandmarkDetectionInput, FacialLandmarkResult>();
+               context->__tasks.insert(make_pair("facial_landmark", task));
+               *handle = static_cast<mv_facial_landmark_h>(context);
+       } catch (const BaseException &e) {
+               delete task;
+               delete context;
+               return e.getError();
+       }
+
+       LOGD("facial landmark handle [%p] has been created", *handle);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_destroy_open(mv_facial_landmark_h handle)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       auto context = static_cast<Context *>(handle);
+
+       for (auto &m : context->__tasks)
+               delete static_cast<LandmarkDetectionTask *>(m.second);
+
+       delete context;
+
+       LOGD("facial landmark handle has been destroyed.");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_set_model_open(mv_facial_landmark_h handle, const char *model_name, const char *model_file,
+                                                                         const char *meta_file, const char *label_file)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->setModelInfo(model_file, meta_file, label_file, model_name);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_set_engine_open(mv_facial_landmark_h handle, const char *backend_type, const char *device_type)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->setEngineInfo(backend_type, device_type);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_get_engine_count_open(mv_facial_landmark_h handle, unsigned int *engine_count)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->getNumberOfEngines(engine_count);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_get_engine_type_open(mv_facial_landmark_h handle, const unsigned int engine_index,
+                                                                                       char **engine_type)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->getEngineType(engine_index, engine_type);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_get_device_count_open(mv_facial_landmark_h handle, const char *engine_type,
+                                                                                        unsigned int *device_count)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->getNumberOfDevices(engine_type, device_count);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_get_device_type_open(mv_facial_landmark_h handle, const char *engine_type,
+                                                                                       const unsigned int device_index, char **device_type)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->getDeviceType(engine_type, device_index, device_type);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_configure_open(mv_facial_landmark_h handle)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->configure();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_prepare_open(mv_facial_landmark_h handle)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->prepare();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_inference_open(mv_facial_landmark_h handle, mv_source_h source)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               LandmarkDetectionInput input = { source };
+
+               task->setInput(input);
+               task->perform();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_get_positions_open(mv_facial_landmark_h handle, unsigned int *number_of_landmarks,
+                                                                                 unsigned int **pos_x, unsigned int **pos_y)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               FacialLandmarkResult &result = task->getOutput();
+               *number_of_landmarks = result.number_of_landmarks;
+               *pos_x = result.x_pos.data();
+               *pos_y = result.y_pos.data();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
diff --git a/mv_machine_learning/meta/include/PostprocessParser.h b/mv_machine_learning/meta/include/PostprocessParser.h

index 4d71a7931ecdab912c7abad4569dfa3d7c66336a..f0b578121adc2951f0c9bf36ec9ec30c45a411f2 100644 (file)
--- a/mv_machine_learning/meta/include/PostprocessParser.h
+++ b/mv_machine_learning/meta/include/PostprocessParser.h
@@ -46,6 +46,7 @@ public:
         void parseBox(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root);
         void parseScore(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root);
         void parseNumber(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root);
+       void parseLandmark(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root);
  
         void SetTaskAnchorParser(std::shared_ptr<AnchorParser> anchorParser)
         {
diff --git a/mv_machine_learning/meta/include/types.h b/mv_machine_learning/meta/include/types.h

index 47704360e5899fa89dd6d7dec5be487345accfb1..e8112bab91a28ee46d1e58a7a4ac95bd52811a41 100644 (file)
--- a/mv_machine_learning/meta/include/types.h
+++ b/mv_machine_learning/meta/include/types.h
@@ -29,7 +29,7 @@ namespace mediavision
  {
  namespace machine_learning
  {
-enum class DecodingType { NORMAL, QUANTIZATION, DEQUANTIZATION, BOX, SCORE, LABEL, NUMBER };
+enum class DecodingType { NORMAL, QUANTIZATION, DEQUANTIZATION, BOX, SCORE, LABEL, NUMBER, LANDMARK };
  
  enum class ScoreType { NORMAL, SIGMOID };
  
@@ -39,8 +39,14 @@ enum class BoxCoordinateType { RATIO, PIXEL };
  
  enum class BoxType { LEFTTOP, CENTER };
  
+enum class LandmarkType { SINGLE_2D, MULTI_2D };
+
  enum class BoxNmsMode { NONE = -1, STANDARD };
  
+enum class LandmarkCoordinateType { RATIO, PIXEL };
+
+enum class LandmarkDecodingType { BYPASS, BYPASS_MULTICHANNEL, HEATMAP, HEATMAP_REFINE };
+
  struct InputSizeInfo {
         size_t imageWidth;
         size_t imageHeight;
@@ -141,6 +147,13 @@ struct DecodingDeQuantization {
         double zeropoint;
  };
  
+struct DecodingLandmark {
+       LandmarkType landmark_type;
+       LandmarkCoordinateType coordinate_type;
+       LandmarkDecodingType decoding_type;
+       unsigned int offset;
+};
+
  struct DecodingInfoAnchor {};
  
  struct DecodingInfoNms {};
diff --git a/mv_machine_learning/meta/src/PostprocessParser.cpp b/mv_machine_learning/meta/src/PostprocessParser.cpp

index 0d5d830d43ee4a6a60e81f858ae8b62a7076cde0..fb4effac3945d6e34a359a67c38135cae69935de 100644 (file)
--- a/mv_machine_learning/meta/src/PostprocessParser.cpp
+++ b/mv_machine_learning/meta/src/PostprocessParser.cpp
@@ -41,6 +41,18 @@ std::map<std::string, BoxDecodingType> gSupportedBoxDecodingTypes = { { "BYPASS"
                                                                                                                                           { "3D", BoxDecodingType::BBOX_3D } };
  std::map<std::string, ScoreType> gSupportedScoreTypes = { { "NORMAL", ScoreType::NORMAL },
                                                                                                                   { "SIGMOID", ScoreType::SIGMOID } };
+std::map<std::string, LandmarkType> gSupportedLandmarkTypes = { { "2D_SINGLE", LandmarkType::SINGLE_2D },
+                                                                                                                               { "2D_MULTI", LandmarkType::MULTI_2D } };
+std::map<std::string, LandmarkCoordinateType> gSupportedLandmarkCoordinateTypes = {
+       { "RATIO", LandmarkCoordinateType::RATIO },
+       { "PIXEL", LandmarkCoordinateType::PIXEL }
+};
+std::map<std::string, LandmarkDecodingType> gSupportedLandmarkDecodingTypes = {
+       { "BYPASS", LandmarkDecodingType::BYPASS },
+       { "BYPASS_MULTICHANNEL", LandmarkDecodingType::BYPASS_MULTICHANNEL },
+       { "HEATMAP", LandmarkDecodingType::HEATMAP },
+       { "HEATMAP_REFINE", LandmarkDecodingType::HEATMAP_REFINE }
+};
  
  /**
   * Function template.
@@ -100,7 +112,7 @@ void PostprocessParser::parseBox(shared_ptr<MetaInfo> metaInfo, JsonObject *root
  
         metaInfo->decodingTypeMap[DecodingType::BOX] = decodingBox;
  
-       // In case of bypss, we don't need to parse decoding_info.
+       // In case of bypass, we don't need to parse decoding_info.
         if (decodingBox->decodingType == BoxDecodingType::BYPASS)
                 return;
  
@@ -155,7 +167,7 @@ void PostprocessParser::parseScore(shared_ptr<MetaInfo> metaInfo, JsonObject *ro
  
                 if (json_object_has_member(object, "threshold")) {
                         decodingScore->threshold = static_cast<float>(json_object_get_double_member(object, "threshold"));
-                       LOGI("tthreshold : %f", decodingScore->threshold);
+                       LOGI("threshold : %f", decodingScore->threshold);
                 }
  
                 if (json_object_has_member(object, "score_type")) {
@@ -189,5 +201,42 @@ void PostprocessParser::parseNumber(shared_ptr<MetaInfo> metaInfo, JsonObject *r
         LOGI("LEAVE");
  }
  
+void PostprocessParser::parseLandmark(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root)
+{
+       LOGI("ENTER");
+
+       if (!json_object_has_member(root, "landmark"))
+               throw InvalidOperation("landmark node doesn't exist");
+
+       shared_ptr<DecodingLandmark> decodingLandmark = make_shared<DecodingLandmark>();
+       JsonObject *object = json_object_get_object_member(root, "landmark");
+
+       try {
+               if (json_object_has_member(object, "landmark_type"))
+                       decodingLandmark->landmark_type = GetSupportedType<LandmarkType, map<string, LandmarkType> >(
+                                       object, "landmark_type", gSupportedLandmarkTypes);
+
+               if (json_object_has_member(object, "landmark_coordinate"))
+                       decodingLandmark->coordinate_type =
+                                       GetSupportedType<LandmarkCoordinateType, map<string, LandmarkCoordinateType> >(
+                                                       object, "landmark_coordinate", gSupportedLandmarkCoordinateTypes);
+
+               if (json_object_has_member(object, "decoding_type"))
+                       decodingLandmark->decoding_type =
+                                       GetSupportedType<LandmarkDecodingType, map<string, LandmarkDecodingType> >(
+                                                       object, "decoding_type", gSupportedLandmarkDecodingTypes);
+
+               if (json_object_has_member(object, "landmark_offset"))
+                       decodingLandmark->offset = static_cast<unsigned int>(json_object_get_int_member(object, "landmark_offset"));
+       } catch (const std::exception &e) {
+               LOGE("%s", e.what());
+               throw InvalidOperation("Invalid landmark meta information.");
+       }
+
+       metaInfo->decodingTypeMap[DecodingType::LANDMARK] = decodingLandmark;
+
+       LOGI("LEAVE");
+}
+
  } /* machine_learning */
  } /* mediavision */
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec

index 005aef14c646c524205299d00a02dc0f6f273f75..8ba393b18874720862dd8f82d13b4d1aed2ca15f 100644 (file)
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -54,6 +54,7 @@ Requires:      training-engine-interface-common
  %define enable_ml_image_classification 1
  %define enable_ml_object_detection 1
  %define enable_ml_object_detection_3d 1
+%define enable_ml_landmark_detection 1
  
  %define build_depth_stream_testsuite 1
  BuildRequires: pkgconfig(vision-source)
@@ -63,7 +64,7 @@ BuildRequires: Open3D-devel
  %endif
  %endif
  %define build_options -DENABLE_INFERENCE_PROFILER=0 -DBUILD_DEPTH_STREAM_TESTSUITE=%{build_depth_stream_testsuite} -DMV_3D_POINTCLOUD_IS_AVAILABLE=%{enable_mv3d_pointcloud}
-%define task_group_options -DENABLE_ML_FACE_RECOGNITION=%{enable_ml_face_recognition} -DENABLE_ML_IMAGE_CLASSIFICATION=%{enable_ml_image_classification} -DENABLE_ML_OBJECT_DETECTION=%{enable_ml_object_detection} -DENABLE_ML_OBJECT_DETECTION_3D=%{enable_ml_object_detection_3d}
+%define task_group_options -DENABLE_ML_FACE_RECOGNITION=%{enable_ml_face_recognition} -DENABLE_ML_IMAGE_CLASSIFICATION=%{enable_ml_image_classification} -DENABLE_ML_OBJECT_DETECTION=%{enable_ml_object_detection} -DENABLE_ML_OBJECT_DETECTION_3D=%{enable_ml_object_detection_3d} -DENABLE_ML_LANDMARK_DETECTION=%{enable_ml_landmark_detection}
  Requires:   %{name}-machine_learning
  Requires:   inference-engine-interface-common
  %if !0%{?ml_only:1}
@@ -325,6 +326,9 @@ test_main() {
  %if "%{enable_ml_object_detection_3d}" == "1"
      /usr/bin/test_object_detection_3d
  %endif
+%if "%{enable_ml_landmark_detection}" == "1"
+    /usr/bin/test_landmark_detection
+%endif
  }
  
  teardown() {
@@ -447,6 +451,10 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
  %{_datadir}/%{name}/object_detection_3d.json
  %{_libdir}/libmv_object_detection_3d.so
  %endif
+%if "%{enable_ml_landmark_detection}" == "1"
+%{_datadir}/%{name}/facial_landmark.json
+%{_libdir}/libmv_landmark_detection.so
+%endif
  
  %files machine_learning-devel
  %{_includedir}/media/mv_infer*.h
@@ -473,6 +481,11 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
  %{_includedir}/media/mv_object_detection_3d_type.h
  %{_libdir}/pkgconfig/*object-detection-3d.pc
  %endif
+%if "%{enable_ml_landmark_detection}" == "1"
+%{_includedir}/media/mv_facial_landmark_internal.h
+%{_includedir}/media/mv_facial_landmark_type.h
+%{_libdir}/pkgconfig/*landmark-detection.pc
+%endif
  
  %files roi_tracker
  %manifest %{name}.manifest
@@ -508,6 +521,9 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
  %if "%{enable_ml_object_detection_3d}" == "1"
  %{_bindir}/test_object_detection_3d
  %endif
+%if "%{enable_ml_landmark_detection}" == "1"
+%{_bindir}/test_landmark_detection
+%endif
  %{_bindir}/tizen-unittests/%{name}/run-unittest.sh
  %endif
  
diff --git a/test/testsuites/machine_learning/CMakeLists.txt b/test/testsuites/machine_learning/CMakeLists.txt

index 9b8e668ffa53f076a59107313a1412dbdd219c19..f80d36936e990a295685462845f1fba1f66c60fe 100644 (file)
--- a/test/testsuites/machine_learning/CMakeLists.txt
+++ b/test/testsuites/machine_learning/CMakeLists.txt
@@ -15,6 +15,10 @@ if (${ENABLE_ML_OBJECT_DETECTION_3D})
      message("Enabled object detection 3d test case.")
      add_subdirectory(${PROJECT_SOURCE_DIR}/object_detection_3d)
  endif()
+if (${ENABLE_ML_LANDMARK_DETECTION})
+    message("Enabled landmark detection test case.")
+    add_subdirectory(${PROJECT_SOURCE_DIR}/landmark_detection)
+endif()
  if (${ENABLE_ML_FACE_RECOGNITION})
      message("Enabled machine learning face recognition test case.")
      add_subdirectory(${PROJECT_SOURCE_DIR}/face_recognition)
diff --git a/test/testsuites/machine_learning/landmark_detection/CMakeLists.txt b/test/testsuites/machine_learning/landmark_detection/CMakeLists.txt

new file mode 100644 (file)

index 0000000..9023e57
--- /dev/null
+++ b/test/testsuites/machine_learning/landmark_detection/CMakeLists.txt
@@ -0,0 +1,14 @@
+project(mv_landmark_detection_suite)
+cmake_minimum_required(VERSION 2.6...3.13)
+
+set(TEST_LANDMARK_DETECTION test_landmark_detection)
+
+add_executable(${TEST_LANDMARK_DETECTION} test_landmark_detection.cpp)
+
+target_link_libraries(${TEST_LANDMARK_DETECTION} gtest gtest_main
+                      mv_inference
+                      mv_landmark_detection
+                      mv_image_helper
+)
+
+install(TARGETS ${TEST_LANDMARK_DETECTION} DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp b/test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp

new file mode 100644 (file)

index 0000000..d0b01d5
--- /dev/null
+++ b/test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp
@@ -0,0 +1,148 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <string.h>
+
+#include "gtest/gtest.h"
+
+#include "ImageHelper.h"
+#include "mv_facial_landmark_internal.h"
+
+#define IMG_FACE MV_CONFIG_PATH "res/inference/images/faceLandmark.jpg"
+
+using namespace testing;
+using namespace std;
+
+using namespace MediaVision::Common;
+
+struct model_info {
+       string model_name;
+       string model_file;
+       string meta_file;
+       string label_file;
+};
+
+TEST(FacialLandmarkTest, GettingAvailableInferenceEnginesInfoShouldBeOk)
+{
+       mv_facial_landmark_h handle;
+
+       int ret = mv_facial_landmark_create(&handle);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       unsigned int engine_count = 0;
+
+       ret = mv_facial_landmark_get_engine_count(handle, &engine_count);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       cout << "Engine count = " << engine_count << endl;
+       ASSERT_GE(engine_count, 1);
+
+       for (unsigned int engine_idx = 0; engine_idx < engine_count; ++engine_idx) {
+               char *engine_type = nullptr;
+
+               ret = mv_facial_landmark_get_engine_type(handle, engine_idx, &engine_type);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               cout << "Engine type : " << engine_type << endl;
+
+               unsigned int device_count = 0;
+
+               ret = mv_facial_landmark_get_device_count(handle, engine_type, &device_count);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               cout << "Device count = " << device_count << endl;
+
+               ASSERT_GE(engine_count, 1);
+
+               for (unsigned int device_idx = 0; device_idx < device_count; ++device_idx) {
+                       char *device_type = nullptr;
+
+                       ret = mv_facial_landmark_get_device_type(handle, engine_type, device_idx, &device_type);
+                       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+                       cout << "Device type : " << device_type << endl;
+               }
+       }
+
+       ret = mv_facial_landmark_destroy(handle);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+}
+
+TEST(FacialLandmarkTest, InferenceShouldBeOk)
+{
+       mv_facial_landmark_h handle;
+       vector<model_info> test_models {
+               { "", "", "", "" }, // If empty then default model will be used.
+               { "FLD_TWEAK_CNN", "fld_tweakcnn_128x128.tflite", "fld_tweakcnn_128x128.json", "" }
+               // TODO.
+       };
+
+       const unsigned int answer[5][5] = { { 42, 87, 63, 48, 83 }, { 32, 31, 53, 75, 76 } };
+
+       mv_source_h mv_source = NULL;
+       int ret = mv_create_source(&mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       ret = ImageHelper::loadImageToSource(IMG_FACE, mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       for (auto model : test_models) {
+               cout << "model name : " << model.model_file << endl;
+
+               ret = mv_facial_landmark_create(&handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_facial_landmark_set_model(handle, model.model_name.c_str(), model.model_file.c_str(),
+                                                                                  model.meta_file.c_str(), model.label_file.c_str());
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_facial_landmark_set_engine(handle, "tflite", "cpu");
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_facial_landmark_configure(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_facial_landmark_prepare(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_facial_landmark_inference(handle, mv_source);
+               ASSERT_EQ(ret, 0);
+
+               unsigned int number_of_landmarks;
+               unsigned int *x_pos = nullptr, *y_pos = nullptr;
+
+               ret = mv_facial_landmark_get_positions(handle, &number_of_landmarks, &x_pos, &y_pos);
+               ASSERT_EQ(ret, 0);
+
+               for (unsigned int idx = 0; idx < number_of_landmarks; ++idx) {
+                       int distance_x = x_pos[idx] - answer[0][idx];
+                       int distance_y = y_pos[idx] - answer[1][idx];
+
+                       distance_x = distance_x < 0 ? distance_x * -1 : distance_x;
+                       distance_y = distance_y < 0 ? distance_y * -1 : distance_y;
+
+                       ASSERT_TRUE(distance_x <= 2 && distance_y <= 2);
+               }
+
+               ret = mv_facial_landmark_destroy(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+       }
+
+       ret = mv_destroy_source(mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+}
+\ No newline at end of file
author	Inki Dae <inki.dae@samsung.com>
	Thu, 13 Apr 2023 08:08:37 +0000 (17:08 +0900)
committer	Kwanghoon Son <k.son@samsung.com>
	Wed, 14 Jun 2023 02:14:40 +0000 (11:14 +0900)
CMakeLists.txt		patch \| blob \| history
include/mv_facial_landmark_internal.h	[new file with mode: 0644]	patch \| blob
include/mv_facial_landmark_type.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/CMakeLists.txt		patch \| blob \| history
mv_machine_learning/landmark_detection/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/include/LandmarkDetectionParser.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/include/facial_landmark_adapter.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/include/landmark_detection.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/include/landmark_detection_type.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/include/mv_facial_landmark_open.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/meta/facial_landmark.json	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/src/LandmarkDetectionParser.cpp	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/src/landmark_detection.cpp	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/src/mv_facial_landmark.c	[new file with mode: 0644]	patch \| blob
mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp	[new file with mode: 0644]	patch \| blob
mv_machine_learning/meta/include/PostprocessParser.h		patch \| blob \| history
mv_machine_learning/meta/include/types.h		patch \| blob \| history
mv_machine_learning/meta/src/PostprocessParser.cpp		patch \| blob \| history
packaging/capi-media-vision.spec		patch \| blob \| history
test/testsuites/machine_learning/CMakeLists.txt		patch \| blob \| history
test/testsuites/machine_learning/landmark_detection/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp	[new file with mode: 0644]	patch \| blob