mv_machine_learning: add landmark detection task group
authorInki Dae <inki.dae@samsung.com>
Thu, 13 Apr 2023 08:08:37 +0000 (17:08 +0900)
committerKwanghoon Son <k.son@samsung.com>
Wed, 14 Jun 2023 02:14:40 +0000 (11:14 +0900)
[Issue type] : new feature

Add landmark detection task group support.

Landmark detection task group consists of facial landmark and
pose estimation detection task groups internally, and also provides
native API set for each task group.

As initial implemention, this patch adds facial landmark detection task
group API, which uses fld tweak cnn model.

Change-Id: I021bb3e2a3b23edbc7da2805178872ba60c1d0e4
Signed-off-by: Inki Dae <inki.dae@samsung.com>
26 files changed:
CMakeLists.txt
include/mv_facial_landmark_internal.h [new file with mode: 0644]
include/mv_facial_landmark_type.h [new file with mode: 0644]
mv_machine_learning/CMakeLists.txt
mv_machine_learning/landmark_detection/CMakeLists.txt [new file with mode: 0644]
mv_machine_learning/landmark_detection/include/LandmarkDetectionParser.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/include/facial_landmark_adapter.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/include/landmark_detection.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/include/landmark_detection_type.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/include/mv_facial_landmark_open.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/meta/facial_landmark.json [new file with mode: 0644]
mv_machine_learning/landmark_detection/src/LandmarkDetectionParser.cpp [new file with mode: 0644]
mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp [new file with mode: 0644]
mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp [new file with mode: 0644]
mv_machine_learning/landmark_detection/src/landmark_detection.cpp [new file with mode: 0644]
mv_machine_learning/landmark_detection/src/mv_facial_landmark.c [new file with mode: 0644]
mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp [new file with mode: 0644]
mv_machine_learning/meta/include/PostprocessParser.h
mv_machine_learning/meta/include/types.h
mv_machine_learning/meta/src/PostprocessParser.cpp
packaging/capi-media-vision.spec
test/testsuites/machine_learning/CMakeLists.txt
test/testsuites/machine_learning/landmark_detection/CMakeLists.txt [new file with mode: 0644]
test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp [new file with mode: 0644]

index 4e0e9971c854cf5361856802680616293740e43a..45329d9c1b96be9e9de2ba665458359d4714055e 100644 (file)
@@ -39,6 +39,8 @@ set(MV_OBJECT_DETECTION_LIB_NAME "mv_object_detection" CACHE STRING
        "Name of the library will be built for object detection module (without extension).")
 set(MV_OBJECT_DETECTION_3D_LIB_NAME "mv_object_detection_3d" CACHE STRING
        "Name of the library will be built for object detection 3d module (without extension).")
+set(MV_LANDMARK_DETECTION_LIB_NAME "mv_landmark_detection" CACHE STRING
+       "Name of the library will be built for object detection 3d module (without extension).")
 set(MV_IMAGE_CLASSIFICATION_LIB_NAME "mv_image_classification" CACHE STRING
        "Name of the library will be built for image classification module (without extension).")
 
@@ -222,6 +224,20 @@ if (${ENABLE_ML_OBJECT_DETECTION_3D})
        list(APPEND TOTAL_LDFLAGS ${PC_LDFLAGS})
 endif()
 
+if (${ENABLE_ML_LANDMARK_DETECTION})
+    set(PC_NAME ${fw_name}-landmark-detection)
+    set(PC_LDFLAGS "-l${MV_LANDMARK_DETECTION_LIB_NAME} -l${MV_COMMON_LIB_NAME}")
+    configure_file(
+           ${fw_name}.pc.in
+           ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-landmark-detection.pc
+           @ONLY
+    )
+    install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-landmark-detection.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
+    install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/landmark_detection/meta/facial_landmark.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
+       list(APPEND TOTAL_REQUIRED ${PC_NAME})
+       list(APPEND TOTAL_LDFLAGS ${PC_LDFLAGS})
+endif()
+
 string(REPLACE ";" " " TOTAL_LDFLAGS "${TOTAL_LDFLAGS}")
 string(REPLACE " " ";" TOTAL_LDFLAGS_LIST "${TOTAL_LDFLAGS}")
 list(REMOVE_DUPLICATES TOTAL_LDFLAGS_LIST)
diff --git a/include/mv_facial_landmark_internal.h b/include/mv_facial_landmark_internal.h
new file mode 100644 (file)
index 0000000..b405f1e
--- /dev/null
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_FACIAL_LANDMARK_INTERNAL_H__
+#define __TIZEN_MEDIAVISION_FACIAL_LANDMARK_INTERNAL_H__
+
+#include <mv_common.h>
+#include <mv_facial_landmark_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_facial_landmark.h
+ * @internal
+ * @brief  This file contains the Inference based Media Vision API.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_INFERENCE_MODULE
+ * @{
+ */
+
+/**
+ * @internal
+ * @brief Creates a inference handle for facial landmark object.
+ * @details Use this function to create a inference handle. After the creation
+ *          the facial landmark task has to be prepared with
+ *          mv_facial_landmark_prepare() function to prepare a network
+ *          for the inference.
+ *
+ * @since_tizen 7.5
+ *
+ * @remarks The @a infer should be released using mv_facial_landmark_destroy().
+ *
+ * @param[out] handle    The handle to the inference to be created.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ * @see mv_facial_landmark_destroy()
+ * @see mv_facial_landmark_prepare()
+ */
+int mv_facial_landmark_create(mv_facial_landmark_h *handle);
+
+/**
+ * @internal
+ * @brief Destroys inference handle and releases all its resources.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle    The handle to the inference to be destroyed.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create inference handle by using mv_facial_landmark_create()
+ *
+ * @see mv_facial_landmark_create()
+ */
+int mv_facial_landmark_destroy(mv_facial_landmark_h handle);
+
+/**
+ * @internal
+ * @brief Sets user-given model information.
+ * @details Use this function to change the model information instead of default one after calling @ref mv_facial_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle        The handle to the facial landmark object.
+ * @param[in] model_name    Model name.
+ * @param[in] model_file    Model file name.
+ * @param[in] meta_file     Model meta file name.
+ * @param[in] label_file    Label file name.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+ */
+int mv_facial_landmark_set_model(mv_facial_landmark_h handle, const char *model_name, const char *model_file,
+                                                                const char *meta_file, const char *label_file);
+
+/**
+ * @internal
+ * @brief Configures the backend for the facial landmark inference.
+ *
+ * @since_tizen 7.5
+ *
+ * @param [in] handle         The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ */
+int mv_facial_landmark_configure(mv_facial_landmark_h handle);
+
+/**
+ * @internal
+ * @brief Prepares the facial landmark inference
+ * @details Use this function to prepare the facial landmark inference based on
+ *          the configured network.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the inference.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_PERMISSION_DENIED Permission denied
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Not supported format
+ */
+int mv_facial_landmark_prepare(mv_facial_landmark_h handle);
+
+/**
+ * @internal
+ * @brief Performs the facial landmark inference on the @a source.
+ *
+ * @since_tizen 7.5
+ * @remarks This function is synchronous and may take considerable time to run.
+ *
+ * @param[in] handle          The handle to the inference
+ * @param[in] source         The handle to the source of the media
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_facial_landmark_create()
+ * @pre Prepare an inference by calling mv_facial_landmark_configure()
+ * @pre Prepare an inference by calling mv_facial_landmark_prepare()
+ */
+int mv_facial_landmark_inference(mv_facial_landmark_h handle, mv_source_h source);
+
+/**
+ * @internal
+ * @brief Gets the facial landmark positions on the @a source.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle               The handle to the inference
+ * @param[out] number_of_landmarks A number of landmarks detected.
+ * @param[out] pos_x               An array containing x-coordinate values.
+ * @param[out] pos_y               An array containing y-coordinate values.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_facial_landmark_create()
+ * @pre Prepare an inference by calling mv_facial_landmark_configure()
+ * @pre Prepare an inference by calling mv_facial_landmark_prepare()
+ * @pre Prepare an inference by calling mv_facial_landmark_inference()
+ */
+int mv_facial_landmark_get_positions(mv_facial_landmark_h handle, unsigned int *number_of_landmarks,
+                                                                        unsigned int **pos_x, unsigned int **pos_y);
+
+/**
+ * @internal
+ * @brief Sets user-given inference engine and device types for inference.
+ * @details Use this function to change the inference engine and device types for inference instead of default ones after calling @ref mv_facial_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle        The handle to the facial landmark object.
+ * @param[in] engine_type  A string of inference engine type.
+ * @param[in] device_type   A string of device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+ */
+int mv_facial_landmark_set_engine(mv_facial_landmark_h handle, const char *engine_type, const char *device_type);
+
+/**
+ * @internal
+ * @brief Gets a number of inference engines available for facial landmark task API.
+ * @details Use this function to get how many inference engines are supported for facial landmark after calling @ref mv_facial_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the facial landmark object.
+ * @param[out] engine_count  A number of inference engines available for facial landmark API.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+ */
+int mv_facial_landmark_get_engine_count(mv_facial_landmark_h handle, unsigned int *engine_count);
+
+/**
+ * @internal
+ * @brief Gets engine type to a given inference engine index.
+ * @details Use this function to get inference engine type with a given engine index after calling @ref mv_facial_landmark_get_engine_count().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle        The handle to the facial landmark object.
+ * @param[in] engine_index  A inference engine index for getting the inference engine type.
+ * @param[out] engine_type  A string to inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Get a number of inference engines available for facial landmark task API by calling @ref mv_facial_landmark_get_engine_count()
+ */
+int mv_facial_landmark_get_engine_type(mv_facial_landmark_h handle, const unsigned int engine_index,
+                                                                          char **engine_type);
+
+/**
+ * @internal
+ * @brief Gets a number of device types available to a given inference engine.
+ * @details Use this function to get how many device types are supported for a given inference engine after calling @ref mv_facial_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the facial landmark object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[out] device_count  A number of device types available for a given inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+ */
+int mv_facial_landmark_get_device_count(mv_facial_landmark_h handle, const char *engine_type,
+                                                                               unsigned int *device_count);
+
+/**
+ * @internal
+ * @brief Gets device type list available.
+ * @details Use this function to get what device types are supported for current inference engine type after calling @ref mv_facial_landmark_configure().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the facial landmark object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[in] device_index   A device index for getting the device type.
+ * @param[out] device_type   A string to device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+ * @pre Configure facial landmark task by calling @ref mv_facial_landmark_configure()
+ */
+int mv_facial_landmark_get_device_type(mv_facial_landmark_h handle, const char *engine_type,
+                                                                          const unsigned int device_index, char **device_type);
+/**
+ * @}
+ */
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_FACIAL_LANDMARK_INTERNAL_H__ */
diff --git a/include/mv_facial_landmark_type.h b/include/mv_facial_landmark_type.h
new file mode 100644 (file)
index 0000000..68a467b
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_MV_FACIAL_LANDMARK_TYPE_H__
+#define __TIZEN_MEDIAVISION_MV_FACIAL_LANDMARK_TYPE_H__
+
+#include <mv_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_facial_landmark_type.h
+ * @brief  This file contains the facial landmark handle for Mediavision.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_FACE_LANDMARK_MODULE
+ * @{
+ */
+
+/**
+ * @brief The facial landmark object handle.
+ *
+ * @since_tizen 7.5
+ */
+typedef void *mv_facial_landmark_h;
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_MV_FACIAL_LANDMARK_TYPE_H__ */
index c41e4f0c0de7d6f84bf8e52ea5d4d93b37850865..604acf9981f1c2fcf97ea1ad9f80ef8c5ce27bcc 100644 (file)
@@ -12,6 +12,10 @@ if (${ENABLE_ML_OBJECT_DETECTION_3D})
     message("Enabled machine learning object detection 3d task group.")
     add_subdirectory(object_detection_3d)
 endif()
+if (${ENABLE_ML_LANDMARK_DETECTION})
+    message("Enabled machine learning landmark detection task group.")
+    add_subdirectory(landmark_detection)
+endif()
 
 if (${ENABLE_ML_FACE_RECOGNITION})
     message("Enabled machine learning face recognition feature.")
diff --git a/mv_machine_learning/landmark_detection/CMakeLists.txt b/mv_machine_learning/landmark_detection/CMakeLists.txt
new file mode 100644 (file)
index 0000000..516fa4a
--- /dev/null
@@ -0,0 +1,27 @@
+project(${MV_LANDMARK_DETECTION_LIB_NAME})
+cmake_minimum_required(VERSION 2.6...3.13)
+
+pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED inference-engine-interface-common iniparser json-glib-1.0)
+file(GLOB MV_LANDMARK_DETECTION_SOURCE_LIST  "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/../meta/src/*.cpp")
+
+find_package(OpenCV REQUIRED dnn imgproc)
+if(NOT OpenCV_FOUND)
+       message(SEND_ERROR "OpenCV NOT FOUND")
+       return()
+endif()
+
+if(FORCED_STATIC_BUILD)
+       add_library(${PROJECT_NAME} STATIC ${MV_LANDMARK_DETECTION_SOURCE_LIST})
+else()
+       add_library(${PROJECT_NAME} SHARED ${MV_LANDMARK_DETECTION_SOURCE_LIST})
+endif()
+
+target_link_libraries(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} ${OpenCV_LIBS} ${${PROJECT_NAME}_DEP_LIBRARIES} mv_inference)
+target_include_directories(${PROJECT_NAME} PRIVATE include ../inference/include ../common/include ../meta/include)
+install(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR})
+install(
+       DIRECTORY ${PROJECT_SOURCE_DIR}/../../include/ DESTINATION include/media
+       FILES_MATCHING
+       PATTERN "mv_facial_landmark_internal.h"
+       PATTERN "mv_facial_landmark_type.h"
+       )
diff --git a/mv_machine_learning/landmark_detection/include/LandmarkDetectionParser.h b/mv_machine_learning/landmark_detection/include/LandmarkDetectionParser.h
new file mode 100644 (file)
index 0000000..f536e7b
--- /dev/null
@@ -0,0 +1,45 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LANDMARK_DETECTION_PARSER_H__
+#define __LANDMARK_DETECTION_PARSER_H__
+
+#include "MetaParser.h"
+#include "PostprocessParser.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class LandmarkDetectionParser : public MetaParser
+{
+private:
+       PostprocessParser _postprocessParser;
+
+protected:
+       void parsePostprocess(std::shared_ptr<MetaInfo> meta_info, JsonObject *in_obj) override;
+
+public:
+       LandmarkDetectionParser();
+       ~LandmarkDetectionParser();
+
+       void setTaskType(int type) override;
+};
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/facial_landmark_adapter.h b/mv_machine_learning/landmark_detection/include/facial_landmark_adapter.h
new file mode 100644 (file)
index 0000000..dbc2391
--- /dev/null
@@ -0,0 +1,63 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACE_LANDMARK_ADAPTER_H__
+#define __FACE_LANDMARK_ADAPTER_H__
+
+#include <dlog.h>
+
+#include "EngineConfig.h"
+#include "itask.h"
+#include "fld_tweak_cnn.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T, typename V> class FacialLandmarkAdapter : public mediavision::common::ITask<T, V>
+{
+private:
+       std::unique_ptr<LandmarkDetection> _landmark_detection;
+       T _source;
+       std::string _model_name;
+       std::string _model_file;
+       std::string _meta_file;
+       std::string _label_file;
+
+public:
+       FacialLandmarkAdapter();
+       ~FacialLandmarkAdapter();
+
+       void create(int type) override;
+
+       void setModelInfo(const char *model_file, const char *meta_file, const char *label_file,
+                                         const char *model_name) override;
+       void setEngineInfo(const char *engine_type, const char *device_type) override;
+       void configure() override;
+       void getNumberOfEngines(unsigned int *number_of_engines) override;
+       void getEngineType(unsigned int engine_index, char **engine_type) override;
+       void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices) override;
+       void getDeviceType(const char *engine_type, unsigned int device_index, char **device_type) override;
+       void prepare() override;
+       void setInput(T &t) override;
+       void perform() override;
+       V &getOutput() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h b/mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h
new file mode 100644 (file)
index 0000000..13e87c1
--- /dev/null
@@ -0,0 +1,47 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FLD_TWEAK_CNN_H__
+#define __FLD_TWEAK_CNN_H__
+
+#include <string>
+#include <memory>
+#include <mv_common.h>
+#include "mv_private.h"
+
+#include "landmark_detection.h"
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class FldTweakCnn : public LandmarkDetection
+{
+private:
+       FacialLandmarkResult _result;
+
+public:
+       FldTweakCnn(LandmarkDetectionTaskType task_type);
+       ~FldTweakCnn();
+
+       FacialLandmarkResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/landmark_detection.h b/mv_machine_learning/landmark_detection/include/landmark_detection.h
new file mode 100644 (file)
index 0000000..0b378bb
--- /dev/null
@@ -0,0 +1,83 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LANDMARK_DETECTION_H__
+#define __LANDMARK_DETECTION_H__
+
+#include <mv_common.h>
+#include <mv_inference_type.h>
+#include "mv_private.h"
+
+#include "EngineConfig.h"
+#include "inference_engine_common_impl.h"
+#include "Inference.h"
+#include "landmark_detection_type.h"
+#include "LandmarkDetectionParser.h"
+#include "Preprocess.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class LandmarkDetection
+{
+private:
+       void loadLabel();
+       void getEngineList();
+       void getDeviceList(const char *engine_type);
+
+       LandmarkDetectionTaskType _task_type;
+
+protected:
+       std::unique_ptr<mediavision::inference::Inference> _inference;
+       std::unique_ptr<MediaVision::Common::EngineConfig> _config;
+       std::unique_ptr<MetaParser> _parser;
+       std::vector<std::string> _labels;
+       std::vector<std::string> _valid_backends;
+       std::vector<std::string> _valid_devices;
+       Preprocess _preprocess;
+       std::string _modelFilePath;
+       std::string _modelMetaFilePath;
+       std::string _modelDefaultPath;
+       std::string _modelLabelFilePath;
+       int _backendType;
+       int _targetDeviceType;
+
+       void getOutputNames(std::vector<std::string> &names);
+       void getOutputTensor(std::string target_name, std::vector<float> &tensor);
+
+public:
+       LandmarkDetection(LandmarkDetectionTaskType task_type);
+       virtual ~LandmarkDetection() = default;
+       LandmarkDetectionTaskType getTaskType();
+       void setUserModel(std::string model_file, std::string meta_file, std::string label_file);
+       void setEngineInfo(std::string engine_type, std::string device_type);
+       void getNumberOfEngines(unsigned int *number_of_engines);
+       void getEngineType(unsigned int engine_index, char **engine_type);
+       void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices);
+       void getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type);
+       void parseMetaFile(const char *meta_file_name);
+       void configure();
+       void prepare();
+       void preprocess(mv_source_h &mv_src);
+       void inference(mv_source_h source);
+       virtual FacialLandmarkResult &result() = 0;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/landmark_detection_type.h b/mv_machine_learning/landmark_detection/include/landmark_detection_type.h
new file mode 100644 (file)
index 0000000..2b3d3ea
--- /dev/null
@@ -0,0 +1,49 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LANDMARK_DETECTION_TYPE_H__
+#define __LANDMARK_DETECTION_TYPE_H__
+
+#include <vector>
+
+#include <mv_common.h>
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+struct LandmarkDetectionInput {
+       mv_source_h inference_src;
+       // TODO.
+};
+
+struct FacialLandmarkResult {
+       unsigned int number_of_landmarks;
+       std::vector<unsigned int> x_pos;
+       std::vector<unsigned int> y_pos;
+};
+
+enum class LandmarkDetectionTaskType {
+       LANDMARK_DETECTION_TASK_NONE = 0,
+       FLD_TWEAK_CNN,
+       // TODO
+};
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/mv_facial_landmark_open.h b/mv_machine_learning/landmark_detection/include/mv_facial_landmark_open.h
new file mode 100644 (file)
index 0000000..82274d7
--- /dev/null
@@ -0,0 +1,274 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_FACIAL_LANDMARK_OPEN_H__
+#define __MEDIA_VISION_FACIAL_LANDMARK_OPEN_H__
+
+#include <mv_common.h>
+#include <mv_private.h>
+#include <mv_facial_landmark_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+        * @brief Create facial landmark object handle.
+        * @details Use this function to create an facial landmark object handle.
+        *          After creation the handle has to be prepared with
+        *          @ref mv_facial_landmark_prepare_open() function to prepare
+        *               an facial landmark object.
+        *
+        * @since_tizen 7.5
+        *
+        * @param[out] out_handle    The handle to the facial landmark object to be created
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        *
+        * @post Release @a handle by using
+        *       @ref mv_facial_landmark_destroy_open() function when it is not needed
+        *       anymore
+        *
+        * @see mv_facial_landmark_destroy_open()
+        */
+int mv_facial_landmark_create_open(mv_facial_landmark_h *out_handle);
+
+/**
+        * @brief Destroy facial landmark handle and releases all its resources.
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle    The handle to the facial landmark object to be destroyed.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        *
+        * @pre Create an facial landmark handle by using @ref mv_facial_landmark_create_open()
+        *
+        * @see mv_facial_landmark_create_open()
+        */
+int mv_facial_landmark_destroy_open(mv_facial_landmark_h handle);
+
+/**
+        * @brief Set user-given model information.
+        * @details Use this function to change the model information instead of default one after calling @ref mv_facial_landmark_create().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle        The handle to the facial landmark object.
+        * @param[in] model_name    Model name.
+        * @param[in] model_file    Model file name.
+        * @param[in] meta_type     Model meta file name.
+        * @param[in] label_file    Label file name.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Create a facial landmark handle by calling @ref mv_facial_landmark_create()
+        */
+int mv_facial_landmark_set_model_open(mv_facial_landmark_h handle, const char *model_name, const char *model_file,
+                                                                         const char *meta_file, const char *label_file);
+
+/**
+        * @brief Configure the backend to the inference handle
+        *
+        * @since_tizen 7.5
+        *
+        * @param [in] handle         The handle to the inference
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+        */
+int mv_facial_landmark_configure_open(mv_facial_landmark_h handle);
+
+/**
+        * @brief Prepare inference.
+        * @details Use this function to prepare inference based on
+        *          the configured network.
+        *
+        * @since_tizen 7.5
+        *
+        * @param [in] handle         The handle to the inference
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        */
+int mv_facial_landmark_prepare_open(mv_facial_landmark_h handle);
+
+/**
+        *
+        * @brief Inference with a given facial on the @a source
+        * @details Use this function to inference with a given source.
+        *
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle         The handle to the facial landmark object.
+        * @param[in] source         The handle to the source of the media.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+        *                                                  isn't supported
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        *
+        * @pre Create a source handle by calling @ref mv_create_source()
+        * @pre Create an facial landmark handle by calling @ref mv_facial_landmark_create_open()
+        * @pre Prepare an inference by calling mv_object_detect_configure_open()
+        * @pre Prepare an facial landmark by calling @ref mv_facial_landmark_prepare_open()
+        */
+int mv_facial_landmark_inference_open(mv_facial_landmark_h handle, mv_source_h source);
+
+/**
+ * @brief Gets the facial landmark positions on the @a source.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle               The handle to the inference
+ * @param[out] number_of_landmarks A number of landmarks detected.
+ * @param[out] pos_x               An array containing x-coordinate values.
+ * @param[out] pos_y               An array containing y-coordinate values.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_facial_landmark_create()
+ * @pre Prepare an inference by calling mv_facial_landmark_configure()
+ * @pre Prepare an inference by calling mv_facial_landmark_prepare()
+ * @pre Prepare an inference by calling mv_facial_landmark_inference()
+ */
+int mv_facial_landmark_get_positions_open(mv_facial_landmark_h handle, unsigned int *number_of_landmarks,
+                                                                                 unsigned int **pos_x, unsigned int **pos_y);
+
+/**
+        * @brief Set user-given backend and device types for inference.
+        * @details Use this function to change the backend and device types for inference instead of default ones after calling @ref mv_facial_landmark_create_open().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle        The handle to the image classification object.
+        * @param[in] backend_type  A string of backend type.
+        * @param[in] device_type   A string of device type.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Create a image classification handle by calling @ref mv_facial_landmark_create_open()
+        */
+int mv_facial_landmark_set_engine_open(mv_facial_landmark_h handle, const char *backend_type, const char *device_type);
+
+/**
+        * @brief Get a number of inference engines available for image classification task API.
+        * @details Use this function to get how many inference engines are supported for image classification after calling @ref mv_facial_landmark_create_open().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle         The handle to the image classification object.
+        * @param[out] engine_count  A number of inference engines available for image classification API.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Create a image classification handle by calling @ref mv_facial_landmark_create_open()
+        */
+int mv_facial_landmark_get_engine_count_open(mv_facial_landmark_h handle, unsigned int *engine_count);
+
+/**
+        * @brief Get engine type to a given inference engine index.
+        * @details Use this function to get inference engine type with a given engine index after calling @ref mv_facial_landmark_get_engine_count().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle        The handle to the image classification object.
+        * @param[in] engine_index  A inference engine index for getting the inference engine type.
+        * @param[out] engine_type  A string to inference engine.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Get a number of inference engines available for image classification task API by calling @ref mv_facial_landmark_get_engine_count()
+        */
+int mv_facial_landmark_get_engine_type_open(mv_facial_landmark_h handle, const unsigned int engine_index,
+                                                                                       char **engine_type);
+
+/**
+        * @brief Get a number of device types available to a given inference engine.
+        * @details Use this function to get how many device types are supported for a given inference engine after calling @ref mv_facial_landmark_create_open().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle         The handle to the image classification object.
+        * @param[in] engine_type    A inference engine string.
+        * @param[out] device_count  A number of device types available for a given inference engine.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Create a image classification handle by calling @ref mv_facial_landmark_create_open()
+        */
+int mv_facial_landmark_get_device_count_open(mv_facial_landmark_h handle, const char *engine_type,
+                                                                                        unsigned int *device_count);
+
+/**
+        * @brief Get device type list available.
+        * @details Use this function to get what device types are supported for current inference engine type after calling @ref mv_facial_landmark_configure().
+        *
+        * @since_tizen 7.5
+        *
+        * @param[in] handle         The handle to the image classification object.
+        * @param[in] engine_type    A inference engine string.
+        * @param[in] device_index   A device index for getting the device type.
+        * @param[out] device_type   A string to device type.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Create a image classification handle by calling @ref mv_facial_landmark_create_open()
+        * @pre Configure image classification task by calling @ref mv_facial_landmark_configure_open()
+        */
+int mv_facial_landmark_get_device_type_open(mv_facial_landmark_h handle, const char *engine_type,
+                                                                                       const unsigned int device_index, char **device_type);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */
diff --git a/mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h b/mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h
new file mode 100644 (file)
index 0000000..8957553
--- /dev/null
@@ -0,0 +1,65 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_LANDMARK_DETECTION_CONFIG_H__
+#define __MEDIA_VISION_LANDMARK_DETECTION_CONFIG_H__
+
+/**
+ * @brief Defines #MV_LANDMARK_DETECTION_MODEL_DEFAULT_PATH
+ *        to set the landmark detection default path.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_LANDMARK_DETECTION_MODEL_DEFAULT_PATH "MODEL_DEFAULT_PATH"
+
+/**
+ * @brief Defines #MV_LANDMARK_DETECTION_MODEL_FILE_PATH
+ *        to set the landmark detection model file path.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_LANDMARK_DETECTION_MODEL_FILE_PATH "MODEL_FILE_NAME"
+
+/**
+ * @brief Defines #MV_LANDMARK_DETECTION_MODEL_META_FILE_PATH to set inference
+ *        models's metadata file attribute of the engine configuration.
+ * @details The file includes inference model's metadata such as input and output
+ *          node names, input tensor's width and height,
+ *          mean and standard deviation values for pre-processing.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_LANDMARK_DETECTION_MODEL_META_FILE_PATH "META_FILE_NAME"
+
+#define MV_LANDMARK_DETECTION_LABEL_FILE_NAME "LABEL_FILE_NAME"
+
+/**
+ * @brief Defines #MV_LANDMARK_DETECTION_BACKEND_TYPE
+ *        to set inference backend engine type. In default, tensorflow lite is used.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_LANDMARK_DETECTION_BACKEND_TYPE "BACKEND_TYPE"
+
+/**
+ * @brief Defines #MV_LANDMARK_DETECTION_TARGET_DEVICE_TYPE
+ *        to set inference target device type. In default, CPU device is used.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_LANDMARK_DETECTION_TARGET_DEVICE_TYPE "TARGET_DEVICE_TYPE"
+
+#endif /* __MEDIA_VISION_LANDMARK_DETECTION_CONFIG_H__ */
diff --git a/mv_machine_learning/landmark_detection/meta/facial_landmark.json b/mv_machine_learning/landmark_detection/meta/facial_landmark.json
new file mode 100644 (file)
index 0000000..2c5cc69
--- /dev/null
@@ -0,0 +1,35 @@
+{
+    "attributes":
+    [
+        {
+            "name" : "MODEL_DEFAULT_PATH",
+            "type" : "string",
+            "value" : "/opt/usr/globalapps/mediavision.landmark.detection/models/tflite/"
+        },
+        {
+            "name"  : "MODEL_FILE_NAME",
+            "type"  : "string",
+            "value" : "fld_tweakcnn_128x128.tflite"
+        },
+        {
+            "name"  : "META_FILE_NAME",
+            "type"  : "string",
+            "value" : "fld_tweakcnn_128x128.json"
+        },
+        {
+            "name"  : "LABEL_FILE_NAME",
+            "type"  : "string",
+            "value" : ""
+        },
+        {
+            "name"  : "BACKEND_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        },
+        {
+            "name"  : "TARGET_DEVICE_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        }
+    ]
+}
diff --git a/mv_machine_learning/landmark_detection/src/LandmarkDetectionParser.cpp b/mv_machine_learning/landmark_detection/src/LandmarkDetectionParser.cpp
new file mode 100644 (file)
index 0000000..0fc38ed
--- /dev/null
@@ -0,0 +1,60 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <memory>
+
+#include "machine_learning_exception.h"
+#include "LandmarkDetectionParser.h"
+#include "landmark_detection_type.h"
+
+using namespace std;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+LandmarkDetectionParser::LandmarkDetectionParser()
+{
+       LOGI("ENTER");
+       LOGI("LEAVE");
+}
+
+LandmarkDetectionParser::~LandmarkDetectionParser()
+{}
+
+void LandmarkDetectionParser::setTaskType(int type)
+{
+       LandmarkDetectionTaskType task_type = static_cast<LandmarkDetectionTaskType>(type);
+       // TODO.
+}
+
+void LandmarkDetectionParser::parsePostprocess(shared_ptr<MetaInfo> meta_info, JsonObject *in_obj)
+{
+       LOGI("ENTER");
+
+       LOGI("tensor name : %s", meta_info->name.c_str());
+
+       if (json_object_has_member(in_obj, "score"))
+               _postprocessParser.parseScore(meta_info, in_obj);
+
+       if (json_object_has_member(in_obj, "landmark"))
+               _postprocessParser.parseLandmark(meta_info, in_obj);
+
+       LOGI("LEAVE");
+}
+
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp b/mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp
new file mode 100644 (file)
index 0000000..5d8b413
--- /dev/null
@@ -0,0 +1,145 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "machine_learning_exception.h"
+#include "facial_landmark_adapter.h"
+
+using namespace std;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T, typename V> FacialLandmarkAdapter<T, V>::FacialLandmarkAdapter() : _source()
+{
+       // In default, Mobilenet v1 ssd model will be used.
+       // If other model is set by user then strategy pattern will be used
+       // to create its corresponding concerte class by calling create().
+       _landmark_detection = make_unique<FldTweakCnn>(LandmarkDetectionTaskType::FLD_TWEAK_CNN);
+}
+
+template<typename T, typename V> FacialLandmarkAdapter<T, V>::~FacialLandmarkAdapter()
+{}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::create(int type)
+{
+       LandmarkDetectionTaskType task_type = static_cast<LandmarkDetectionTaskType>(type);
+
+       // If default task type is same as a given one then skip.
+       if (_landmark_detection->getTaskType() == task_type)
+               return;
+
+       _landmark_detection.reset();
+
+       if (task_type == LandmarkDetectionTaskType::FLD_TWEAK_CNN)
+               _landmark_detection = make_unique<FldTweakCnn>(task_type);
+       // TODO.
+}
+
+template<typename T, typename V>
+void FacialLandmarkAdapter<T, V>::setModelInfo(const char *model_file, const char *meta_file, const char *label_file,
+                                                                                          const char *model_name)
+{
+       string model_name_str(model_name);
+
+       if (!model_name_str.empty()) {
+               transform(model_name_str.begin(), model_name_str.end(), model_name_str.begin(), ::toupper);
+
+               int model_type = 0;
+
+               if (model_name_str == string("FLD_TWEAK_CNN"))
+                       model_type = static_cast<int>(LandmarkDetectionTaskType::FLD_TWEAK_CNN);
+               // TODO.
+               else
+                       throw InvalidParameter("Invalid landmark detection model name.");
+
+               create(static_cast<int>(model_type));
+       }
+
+       _model_file = string(model_file);
+       _meta_file = string(meta_file);
+       _label_file = string(label_file);
+
+       if (_model_file.empty() && _meta_file.empty()) {
+               LOGW("Given model info is invalid so default model info will be used instead.");
+               return;
+       }
+
+       _landmark_detection->setUserModel(_model_file, _meta_file, _label_file);
+}
+
+template<typename T, typename V>
+void FacialLandmarkAdapter<T, V>::setEngineInfo(const char *engine_type, const char *device_type)
+{
+       _landmark_detection->setEngineInfo(string(engine_type), string(device_type));
+}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::configure()
+{
+       _landmark_detection->parseMetaFile("facial_landmark.json");
+       _landmark_detection->configure();
+}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::getNumberOfEngines(unsigned int *number_of_engines)
+{
+       _landmark_detection->getNumberOfEngines(number_of_engines);
+}
+
+template<typename T, typename V>
+void FacialLandmarkAdapter<T, V>::getEngineType(unsigned int engine_index, char **engine_type)
+{
+       _landmark_detection->getEngineType(engine_index, engine_type);
+}
+
+template<typename T, typename V>
+void FacialLandmarkAdapter<T, V>::getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices)
+{
+       _landmark_detection->getNumberOfDevices(engine_type, number_of_devices);
+}
+
+template<typename T, typename V>
+void FacialLandmarkAdapter<T, V>::getDeviceType(const char *engine_type, unsigned int device_index, char **device_type)
+{
+       _landmark_detection->getDeviceType(engine_type, device_index, device_type);
+}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::prepare()
+{
+       _landmark_detection->prepare();
+}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::setInput(T &t)
+{
+       _source = t;
+}
+
+template<typename T, typename V> void FacialLandmarkAdapter<T, V>::perform()
+{
+       _landmark_detection->preprocess(_source.inference_src);
+       _landmark_detection->inference(_source.inference_src);
+}
+
+template<typename T, typename V> V &FacialLandmarkAdapter<T, V>::getOutput()
+{
+       return _landmark_detection->result();
+}
+
+template class FacialLandmarkAdapter<LandmarkDetectionInput, FacialLandmarkResult>;
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp b/mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp
new file mode 100644 (file)
index 0000000..ae90e6d
--- /dev/null
@@ -0,0 +1,97 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <map>
+#include <algorithm>
+#include <iostream>
+
+#include "machine_learning_exception.h"
+#include "mv_landmark_detection_config.h"
+#include "fld_tweak_cnn.h"
+#include "Postprocess.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+FldTweakCnn::FldTweakCnn(LandmarkDetectionTaskType task_type) : LandmarkDetection(task_type), _result()
+{}
+
+FldTweakCnn::~FldTweakCnn()
+{}
+
+FacialLandmarkResult &FldTweakCnn::result()
+{
+       constexpr static unsigned int numberOfLandmarks = 5;
+
+       // Clear _result object because result() function can be called every time user wants
+       // so make sure to clear existing result data before getting the data again.
+       memset(reinterpret_cast<void *>(&_result), 0, sizeof(_result));
+
+       vector<string> names;
+
+       LandmarkDetection::getOutputNames(names);
+
+       auto scoreMetaInfo = _parser->getOutputMetaMap().at(names[0]);
+       auto decodingLandmark =
+                       static_pointer_cast<DecodingLandmark>(scoreMetaInfo->decodingTypeMap[DecodingType::LANDMARK]);
+
+       if (decodingLandmark->decoding_type != LandmarkDecodingType::BYPASS)
+               throw InvalidOperation("decoding type not support.");
+
+       if (decodingLandmark->coordinate_type != LandmarkCoordinateType::RATIO)
+               throw InvalidOperation("coordinate type not support.");
+
+       if (decodingLandmark->landmark_type != LandmarkType::SINGLE_2D)
+               throw InvalidOperation("landmark type not support.");
+
+       auto ori_src_width = static_cast<double>(_preprocess.getImageWidth()[0]);
+       auto ori_src_height = static_cast<double>(_preprocess.getImageHeight()[0]);
+       auto input_tensor_width = static_cast<double>(_inference->getInputWidth());
+       auto input_tensor_height = static_cast<double>(_inference->getInputHeight());
+
+       _result.number_of_landmarks = numberOfLandmarks;
+
+       vector<float> score_tensor;
+
+       LandmarkDetection::getOutputTensor(names[0], score_tensor);
+
+       // Calculate the ratio[A] between the original image size and the input tensor size.
+       double width_ratio = ori_src_width / input_tensor_width;
+       double height_ratio = ori_src_height / input_tensor_height;
+
+       // In case that landmark coordinate type is RATIO, output tensor buffer contains ratio values indicating
+       // the position of each landmark for the input tensor.
+       // Therefore, each landmark position for original image is as following,
+       //    x = [width A] * width of input tensor * width ratio value of output tensor.
+       //    y = [height A] * height of input tensor * height ratio value of output tensor.
+       for (unsigned int idx = 0; idx < numberOfLandmarks; ++idx) {
+               _result.x_pos.push_back(
+                               static_cast<unsigned int>(width_ratio * input_tensor_width * score_tensor[idx + idx * 1]));
+               _result.y_pos.push_back(
+                               static_cast<unsigned int>(height_ratio * input_tensor_height * score_tensor[idx + idx * 1 + 1]));
+       }
+
+       return _result;
+}
+
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/src/landmark_detection.cpp b/mv_machine_learning/landmark_detection/src/landmark_detection.cpp
new file mode 100644 (file)
index 0000000..58c5f73
--- /dev/null
@@ -0,0 +1,313 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <fstream>
+#include <map>
+#include <memory>
+#include <algorithm>
+
+#include "machine_learning_exception.h"
+#include "mv_machine_learning_common.h"
+#include "mv_landmark_detection_config.h"
+#include "landmark_detection.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace MediaVision::Common;
+using namespace mediavision::common;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+LandmarkDetection::LandmarkDetection(LandmarkDetectionTaskType task_type)
+               : _task_type(task_type), _backendType(), _targetDeviceType()
+{
+       _inference = make_unique<Inference>();
+       _parser = make_unique<LandmarkDetectionParser>();
+}
+
+LandmarkDetectionTaskType LandmarkDetection::getTaskType()
+{
+       return _task_type;
+}
+
+void LandmarkDetection::getEngineList()
+{
+       for (auto idx = MV_INFERENCE_BACKEND_NONE + 1; idx < MV_INFERENCE_BACKEND_MAX; ++idx) {
+               auto backend = _inference->getSupportedInferenceBackend(idx);
+               // TODO. we need to describe what inference engines are supported by each Task API,
+               //       and based on it, below inference engine types should be checked
+               //       if a given type is supported by this Task API later. As of now, tflite only.
+               if (backend.second == true && backend.first.compare("tflite") == 0)
+                       _valid_backends.push_back(backend.first);
+       }
+}
+
+void LandmarkDetection::getDeviceList(const char *engine_type)
+{
+       // TODO. add device types available for a given engine type later.
+       //       In default, cpu and gpu only.
+       _valid_devices.push_back("cpu");
+       _valid_devices.push_back("gpu");
+}
+
+void LandmarkDetection::setEngineInfo(std::string engine_type, std::string device_type)
+{
+       if (engine_type.empty() || device_type.empty())
+               throw InvalidParameter("Invalid engine info.");
+
+       transform(engine_type.begin(), engine_type.end(), engine_type.begin(), ::toupper);
+       transform(device_type.begin(), device_type.end(), device_type.begin(), ::toupper);
+
+       _backendType = GetBackendType(engine_type);
+       _targetDeviceType = GetDeviceType(device_type);
+
+       LOGI("Engine type : %s => %d, Device type : %s => %d", engine_type.c_str(), GetBackendType(engine_type),
+                device_type.c_str(), GetDeviceType(device_type));
+
+       if (_backendType == MEDIA_VISION_ERROR_INVALID_PARAMETER ||
+               _targetDeviceType == MEDIA_VISION_ERROR_INVALID_PARAMETER)
+               throw InvalidParameter("backend or target device type not found.");
+}
+
+void LandmarkDetection::getNumberOfEngines(unsigned int *number_of_engines)
+{
+       if (!_valid_backends.empty()) {
+               *number_of_engines = _valid_backends.size();
+               return;
+       }
+
+       getEngineList();
+       *number_of_engines = _valid_backends.size();
+}
+
+void LandmarkDetection::getEngineType(unsigned int engine_index, char **engine_type)
+{
+       if (!_valid_backends.empty()) {
+               if (_valid_backends.size() <= engine_index)
+                       throw InvalidParameter("Invalid engine index.");
+
+               *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
+               return;
+       }
+
+       getEngineList();
+
+       if (_valid_backends.size() <= engine_index)
+               throw InvalidParameter("Invalid engine index.");
+
+       *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
+}
+
+void LandmarkDetection::getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices)
+{
+       if (!_valid_devices.empty()) {
+               *number_of_devices = _valid_devices.size();
+               return;
+       }
+
+       getDeviceList(engine_type);
+       *number_of_devices = _valid_devices.size();
+}
+
+void LandmarkDetection::getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type)
+{
+       if (!_valid_devices.empty()) {
+               if (_valid_devices.size() <= device_index)
+                       throw InvalidParameter("Invalid device index.");
+
+               *device_type = const_cast<char *>(_valid_devices[device_index].data());
+               return;
+       }
+
+       getDeviceList(engine_type);
+
+       if (_valid_devices.size() <= device_index)
+               throw InvalidParameter("Invalid device index.");
+
+       *device_type = const_cast<char *>(_valid_devices[device_index].data());
+}
+
+void LandmarkDetection::setUserModel(string model_file, string meta_file, string label_file)
+{
+       _modelFilePath = model_file;
+       _modelMetaFilePath = meta_file;
+       _modelLabelFilePath = label_file;
+}
+
+static bool IsJsonFile(const string &fileName)
+{
+       return (!fileName.substr(fileName.find_last_of(".") + 1).compare("json"));
+}
+
+void LandmarkDetection::loadLabel()
+{
+       ifstream readFile;
+
+       _labels.clear();
+       readFile.open(_modelLabelFilePath.c_str());
+
+       if (readFile.fail())
+               throw InvalidOperation("Fail to open " + _modelLabelFilePath + " file.");
+
+       string line;
+
+       while (getline(readFile, line))
+               _labels.push_back(line);
+
+       readFile.close();
+}
+
+void LandmarkDetection::parseMetaFile(const char *meta_file_name)
+{
+       _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + string(meta_file_name));
+
+       int ret = _config->getIntegerAttribute(string(MV_LANDMARK_DETECTION_BACKEND_TYPE), &_backendType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get backend engine type.");
+
+       ret = _config->getIntegerAttribute(string(MV_LANDMARK_DETECTION_TARGET_DEVICE_TYPE), &_targetDeviceType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get target device type.");
+
+       ret = _config->getStringAttribute(MV_LANDMARK_DETECTION_MODEL_DEFAULT_PATH, &_modelDefaultPath);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get model default path");
+
+       if (_modelFilePath.empty()) {
+               ret = _config->getStringAttribute(MV_LANDMARK_DETECTION_MODEL_FILE_PATH, &_modelFilePath);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw InvalidOperation("Fail to get model file path");
+       }
+
+       _modelFilePath = _modelDefaultPath + _modelFilePath;
+       LOGI("model file path = %s", _modelFilePath.c_str());
+
+       if (_modelMetaFilePath.empty()) {
+               ret = _config->getStringAttribute(MV_LANDMARK_DETECTION_MODEL_META_FILE_PATH, &_modelMetaFilePath);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw InvalidOperation("Fail to get model meta file path");
+
+               if (_modelMetaFilePath.empty())
+                       throw InvalidOperation("Model meta file doesn't exist.");
+
+               if (!IsJsonFile(_modelMetaFilePath))
+                       throw InvalidOperation("Model meta file should be json");
+       }
+
+       _modelMetaFilePath = _modelDefaultPath + _modelMetaFilePath;
+       LOGI("meta file path = %s", _modelMetaFilePath.c_str());
+
+       _parser->setTaskType(static_cast<int>(_task_type));
+       _parser->load(_modelMetaFilePath);
+
+       if (_modelLabelFilePath.empty()) {
+               ret = _config->getStringAttribute(MV_LANDMARK_DETECTION_LABEL_FILE_NAME, &_modelLabelFilePath);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw InvalidOperation("Fail to get label file path");
+
+               if (_modelLabelFilePath.empty()) {
+                       LOGW("Label doesn't exist.");
+                       return;
+               }
+       }
+
+       _modelLabelFilePath = _modelDefaultPath + _modelLabelFilePath;
+       LOGI("label file path = %s", _modelLabelFilePath.c_str());
+
+       loadLabel();
+}
+
+void LandmarkDetection::configure()
+{
+       int ret = _inference->bind(_backendType, _targetDeviceType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to bind a backend engine.");
+}
+
+void LandmarkDetection::prepare()
+{
+       int ret = _inference->configureInputMetaInfo(_parser->getInputMetaMap());
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to configure input tensor info from meta file.");
+
+       ret = _inference->configureOutputMetaInfo(_parser->getOutputMetaMap());
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to configure output tensor info from meta file.");
+
+       _inference->configureModelFiles("", _modelFilePath, "");
+
+       // Request to load model files to a backend engine.
+       ret = _inference->load();
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to load model files.");
+}
+
+void LandmarkDetection::preprocess(mv_source_h &mv_src)
+{
+       LOGI("ENTER");
+
+       TensorBuffer &tensor_buffer_obj = _inference->getInputTensorBuffer();
+       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+       vector<mv_source_h> mv_srcs = { mv_src };
+
+       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
+
+       LOGI("LEAVE");
+}
+
+void LandmarkDetection::inference(mv_source_h source)
+{
+       LOGI("ENTER");
+
+       vector<mv_source_h> sources;
+
+       sources.push_back(source);
+
+       int ret = _inference->run();
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to run inference");
+
+       LOGI("LEAVE");
+}
+
+void LandmarkDetection::getOutputNames(vector<string> &names)
+{
+       TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
+       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+
+       for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++)
+               names.push_back(it->first);
+}
+
+void LandmarkDetection::getOutputTensor(string target_name, vector<float> &tensor)
+{
+       TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
+
+       inference_engine_tensor_buffer *tensor_buffer = tensor_buffer_obj.getTensorBuffer(target_name);
+       if (!tensor_buffer)
+               throw InvalidOperation("Fail to get tensor buffer.");
+
+       auto raw_buffer = static_cast<float *>(tensor_buffer->buffer);
+
+       copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(float)], back_inserter(tensor));
+}
+
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/src/mv_facial_landmark.c b/mv_machine_learning/landmark_detection/src/mv_facial_landmark.c
new file mode 100644 (file)
index 0000000..1936fcd
--- /dev/null
@@ -0,0 +1,213 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "mv_facial_landmark_internal.h"
+#include "mv_facial_landmark_open.h"
+
+/**
+ * @file  mv_facial_landmark.c
+ * @brief This file contains Media Vision inference module.
+ */
+
+int mv_facial_landmark_create(mv_facial_landmark_h *handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_NULL_ARG_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_create_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_facial_landmark_destroy(mv_facial_landmark_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_destroy_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_facial_landmark_set_model(mv_facial_landmark_h handle, const char *model_name, const char *model_file,
+                                                                const char *meta_file, const char *label_file)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(model_name);
+       MEDIA_VISION_NULL_ARG_CHECK(model_file);
+       MEDIA_VISION_NULL_ARG_CHECK(meta_file);
+       MEDIA_VISION_NULL_ARG_CHECK(label_file);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_set_model_open(handle, model_name, model_file, meta_file, label_file);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_set_engine(mv_facial_landmark_h handle, const char *backend_type, const char *device_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(backend_type);
+       MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_set_engine_open(handle, backend_type, device_type);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_get_engine_count(mv_facial_landmark_h handle, unsigned int *engine_count)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_count);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_get_engine_count_open(handle, engine_count);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_get_engine_type(mv_facial_landmark_h handle, const unsigned int engine_index, char **engine_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_get_engine_type_open(handle, engine_index, engine_type);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_get_device_count(mv_facial_landmark_h handle, const char *engine_type,
+                                                                               unsigned int *device_count)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(device_count);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_get_device_count_open(handle, engine_type, device_count);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_get_device_type(mv_facial_landmark_h handle, const char *engine_type,
+                                                                          const unsigned int device_index, char **device_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+       MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_get_device_type_open(handle, engine_type, device_index, device_type);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_configure(mv_facial_landmark_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_configure_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_facial_landmark_prepare(mv_facial_landmark_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_prepare_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_facial_landmark_inference(mv_facial_landmark_h handle, mv_source_h source)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_image_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(source);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_inference_open(handle, source);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_facial_landmark_get_positions(mv_facial_landmark_h handle, unsigned int *number_of_landmarks,
+                                                                        unsigned int **pos_x, unsigned int **pos_y)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_image_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(number_of_landmarks);
+       MEDIA_VISION_INSTANCE_CHECK(pos_x);
+       MEDIA_VISION_INSTANCE_CHECK(pos_y);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_facial_landmark_get_positions_open(handle, number_of_landmarks, pos_x, pos_y);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
diff --git a/mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp b/mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp
new file mode 100644 (file)
index 0000000..a4c8d75
--- /dev/null
@@ -0,0 +1,350 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "itask.h"
+#include "mv_facial_landmark_open.h"
+#include "facial_landmark_adapter.h"
+#include "machine_learning_exception.h"
+#include "landmark_detection_type.h"
+#include "context.h"
+
+#include <new>
+#include <unistd.h>
+#include <string>
+#include <algorithm>
+#include <mutex>
+#include <iostream>
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::common;
+using namespace mediavision::machine_learning;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning::exception;
+using LandmarkDetectionTask = ITask<LandmarkDetectionInput, FacialLandmarkResult>;
+
+static mutex g_facial_landmark_mutex;
+
+int mv_facial_landmark_create_open(mv_facial_landmark_h *handle)
+{
+       if (!handle) {
+               LOGE("Handle can't be created because handle pointer is NULL");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       Context *context = nullptr;
+       LandmarkDetectionTask *task = nullptr;
+
+       try {
+               context = new Context();
+               task = new FacialLandmarkAdapter<LandmarkDetectionInput, FacialLandmarkResult>();
+               context->__tasks.insert(make_pair("facial_landmark", task));
+               *handle = static_cast<mv_facial_landmark_h>(context);
+       } catch (const BaseException &e) {
+               delete task;
+               delete context;
+               return e.getError();
+       }
+
+       LOGD("facial landmark handle [%p] has been created", *handle);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_destroy_open(mv_facial_landmark_h handle)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       auto context = static_cast<Context *>(handle);
+
+       for (auto &m : context->__tasks)
+               delete static_cast<LandmarkDetectionTask *>(m.second);
+
+       delete context;
+
+       LOGD("facial landmark handle has been destroyed.");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_set_model_open(mv_facial_landmark_h handle, const char *model_name, const char *model_file,
+                                                                         const char *meta_file, const char *label_file)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->setModelInfo(model_file, meta_file, label_file, model_name);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_set_engine_open(mv_facial_landmark_h handle, const char *backend_type, const char *device_type)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->setEngineInfo(backend_type, device_type);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_get_engine_count_open(mv_facial_landmark_h handle, unsigned int *engine_count)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->getNumberOfEngines(engine_count);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_get_engine_type_open(mv_facial_landmark_h handle, const unsigned int engine_index,
+                                                                                       char **engine_type)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->getEngineType(engine_index, engine_type);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_get_device_count_open(mv_facial_landmark_h handle, const char *engine_type,
+                                                                                        unsigned int *device_count)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->getNumberOfDevices(engine_type, device_count);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_get_device_type_open(mv_facial_landmark_h handle, const char *engine_type,
+                                                                                       const unsigned int device_index, char **device_type)
+{
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->getDeviceType(engine_type, device_index, device_type);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_configure_open(mv_facial_landmark_h handle)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->configure();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_prepare_open(mv_facial_landmark_h handle)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               task->prepare();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_inference_open(mv_facial_landmark_h handle, mv_source_h source)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               LandmarkDetectionInput input = { source };
+
+               task->setInput(input);
+               task->perform();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_facial_landmark_get_positions_open(mv_facial_landmark_h handle, unsigned int *number_of_landmarks,
+                                                                                 unsigned int **pos_x, unsigned int **pos_y)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_facial_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
+
+               FacialLandmarkResult &result = task->getOutput();
+               *number_of_landmarks = result.number_of_landmarks;
+               *pos_x = result.x_pos.data();
+               *pos_y = result.y_pos.data();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
index 4d71a7931ecdab912c7abad4569dfa3d7c66336a..f0b578121adc2951f0c9bf36ec9ec30c45a411f2 100644 (file)
@@ -46,6 +46,7 @@ public:
        void parseBox(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root);
        void parseScore(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root);
        void parseNumber(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root);
+       void parseLandmark(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root);
 
        void SetTaskAnchorParser(std::shared_ptr<AnchorParser> anchorParser)
        {
index 47704360e5899fa89dd6d7dec5be487345accfb1..e8112bab91a28ee46d1e58a7a4ac95bd52811a41 100644 (file)
@@ -29,7 +29,7 @@ namespace mediavision
 {
 namespace machine_learning
 {
-enum class DecodingType { NORMAL, QUANTIZATION, DEQUANTIZATION, BOX, SCORE, LABEL, NUMBER };
+enum class DecodingType { NORMAL, QUANTIZATION, DEQUANTIZATION, BOX, SCORE, LABEL, NUMBER, LANDMARK };
 
 enum class ScoreType { NORMAL, SIGMOID };
 
@@ -39,8 +39,14 @@ enum class BoxCoordinateType { RATIO, PIXEL };
 
 enum class BoxType { LEFTTOP, CENTER };
 
+enum class LandmarkType { SINGLE_2D, MULTI_2D };
+
 enum class BoxNmsMode { NONE = -1, STANDARD };
 
+enum class LandmarkCoordinateType { RATIO, PIXEL };
+
+enum class LandmarkDecodingType { BYPASS, BYPASS_MULTICHANNEL, HEATMAP, HEATMAP_REFINE };
+
 struct InputSizeInfo {
        size_t imageWidth;
        size_t imageHeight;
@@ -141,6 +147,13 @@ struct DecodingDeQuantization {
        double zeropoint;
 };
 
+struct DecodingLandmark {
+       LandmarkType landmark_type;
+       LandmarkCoordinateType coordinate_type;
+       LandmarkDecodingType decoding_type;
+       unsigned int offset;
+};
+
 struct DecodingInfoAnchor {};
 
 struct DecodingInfoNms {};
index 0d5d830d43ee4a6a60e81f858ae8b62a7076cde0..fb4effac3945d6e34a359a67c38135cae69935de 100644 (file)
@@ -41,6 +41,18 @@ std::map<std::string, BoxDecodingType> gSupportedBoxDecodingTypes = { { "BYPASS"
                                                                                                                                          { "3D", BoxDecodingType::BBOX_3D } };
 std::map<std::string, ScoreType> gSupportedScoreTypes = { { "NORMAL", ScoreType::NORMAL },
                                                                                                                  { "SIGMOID", ScoreType::SIGMOID } };
+std::map<std::string, LandmarkType> gSupportedLandmarkTypes = { { "2D_SINGLE", LandmarkType::SINGLE_2D },
+                                                                                                                               { "2D_MULTI", LandmarkType::MULTI_2D } };
+std::map<std::string, LandmarkCoordinateType> gSupportedLandmarkCoordinateTypes = {
+       { "RATIO", LandmarkCoordinateType::RATIO },
+       { "PIXEL", LandmarkCoordinateType::PIXEL }
+};
+std::map<std::string, LandmarkDecodingType> gSupportedLandmarkDecodingTypes = {
+       { "BYPASS", LandmarkDecodingType::BYPASS },
+       { "BYPASS_MULTICHANNEL", LandmarkDecodingType::BYPASS_MULTICHANNEL },
+       { "HEATMAP", LandmarkDecodingType::HEATMAP },
+       { "HEATMAP_REFINE", LandmarkDecodingType::HEATMAP_REFINE }
+};
 
 /**
  * Function template.
@@ -100,7 +112,7 @@ void PostprocessParser::parseBox(shared_ptr<MetaInfo> metaInfo, JsonObject *root
 
        metaInfo->decodingTypeMap[DecodingType::BOX] = decodingBox;
 
-       // In case of bypss, we don't need to parse decoding_info.
+       // In case of bypass, we don't need to parse decoding_info.
        if (decodingBox->decodingType == BoxDecodingType::BYPASS)
                return;
 
@@ -155,7 +167,7 @@ void PostprocessParser::parseScore(shared_ptr<MetaInfo> metaInfo, JsonObject *ro
 
                if (json_object_has_member(object, "threshold")) {
                        decodingScore->threshold = static_cast<float>(json_object_get_double_member(object, "threshold"));
-                       LOGI("tthreshold : %f", decodingScore->threshold);
+                       LOGI("threshold : %f", decodingScore->threshold);
                }
 
                if (json_object_has_member(object, "score_type")) {
@@ -189,5 +201,42 @@ void PostprocessParser::parseNumber(shared_ptr<MetaInfo> metaInfo, JsonObject *r
        LOGI("LEAVE");
 }
 
+void PostprocessParser::parseLandmark(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root)
+{
+       LOGI("ENTER");
+
+       if (!json_object_has_member(root, "landmark"))
+               throw InvalidOperation("landmark node doesn't exist");
+
+       shared_ptr<DecodingLandmark> decodingLandmark = make_shared<DecodingLandmark>();
+       JsonObject *object = json_object_get_object_member(root, "landmark");
+
+       try {
+               if (json_object_has_member(object, "landmark_type"))
+                       decodingLandmark->landmark_type = GetSupportedType<LandmarkType, map<string, LandmarkType> >(
+                                       object, "landmark_type", gSupportedLandmarkTypes);
+
+               if (json_object_has_member(object, "landmark_coordinate"))
+                       decodingLandmark->coordinate_type =
+                                       GetSupportedType<LandmarkCoordinateType, map<string, LandmarkCoordinateType> >(
+                                                       object, "landmark_coordinate", gSupportedLandmarkCoordinateTypes);
+
+               if (json_object_has_member(object, "decoding_type"))
+                       decodingLandmark->decoding_type =
+                                       GetSupportedType<LandmarkDecodingType, map<string, LandmarkDecodingType> >(
+                                                       object, "decoding_type", gSupportedLandmarkDecodingTypes);
+
+               if (json_object_has_member(object, "landmark_offset"))
+                       decodingLandmark->offset = static_cast<unsigned int>(json_object_get_int_member(object, "landmark_offset"));
+       } catch (const std::exception &e) {
+               LOGE("%s", e.what());
+               throw InvalidOperation("Invalid landmark meta information.");
+       }
+
+       metaInfo->decodingTypeMap[DecodingType::LANDMARK] = decodingLandmark;
+
+       LOGI("LEAVE");
+}
+
 } /* machine_learning */
 } /* mediavision */
index 005aef14c646c524205299d00a02dc0f6f273f75..8ba393b18874720862dd8f82d13b4d1aed2ca15f 100644 (file)
@@ -54,6 +54,7 @@ Requires:      training-engine-interface-common
 %define enable_ml_image_classification 1
 %define enable_ml_object_detection 1
 %define enable_ml_object_detection_3d 1
+%define enable_ml_landmark_detection 1
 
 %define build_depth_stream_testsuite 1
 BuildRequires: pkgconfig(vision-source)
@@ -63,7 +64,7 @@ BuildRequires: Open3D-devel
 %endif
 %endif
 %define build_options -DENABLE_INFERENCE_PROFILER=0 -DBUILD_DEPTH_STREAM_TESTSUITE=%{build_depth_stream_testsuite} -DMV_3D_POINTCLOUD_IS_AVAILABLE=%{enable_mv3d_pointcloud}
-%define task_group_options -DENABLE_ML_FACE_RECOGNITION=%{enable_ml_face_recognition} -DENABLE_ML_IMAGE_CLASSIFICATION=%{enable_ml_image_classification} -DENABLE_ML_OBJECT_DETECTION=%{enable_ml_object_detection} -DENABLE_ML_OBJECT_DETECTION_3D=%{enable_ml_object_detection_3d}
+%define task_group_options -DENABLE_ML_FACE_RECOGNITION=%{enable_ml_face_recognition} -DENABLE_ML_IMAGE_CLASSIFICATION=%{enable_ml_image_classification} -DENABLE_ML_OBJECT_DETECTION=%{enable_ml_object_detection} -DENABLE_ML_OBJECT_DETECTION_3D=%{enable_ml_object_detection_3d} -DENABLE_ML_LANDMARK_DETECTION=%{enable_ml_landmark_detection}
 Requires:   %{name}-machine_learning
 Requires:   inference-engine-interface-common
 %if !0%{?ml_only:1}
@@ -325,6 +326,9 @@ test_main() {
 %if "%{enable_ml_object_detection_3d}" == "1"
     /usr/bin/test_object_detection_3d
 %endif
+%if "%{enable_ml_landmark_detection}" == "1"
+    /usr/bin/test_landmark_detection
+%endif
 }
 
 teardown() {
@@ -447,6 +451,10 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
 %{_datadir}/%{name}/object_detection_3d.json
 %{_libdir}/libmv_object_detection_3d.so
 %endif
+%if "%{enable_ml_landmark_detection}" == "1"
+%{_datadir}/%{name}/facial_landmark.json
+%{_libdir}/libmv_landmark_detection.so
+%endif
 
 %files machine_learning-devel
 %{_includedir}/media/mv_infer*.h
@@ -473,6 +481,11 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
 %{_includedir}/media/mv_object_detection_3d_type.h
 %{_libdir}/pkgconfig/*object-detection-3d.pc
 %endif
+%if "%{enable_ml_landmark_detection}" == "1"
+%{_includedir}/media/mv_facial_landmark_internal.h
+%{_includedir}/media/mv_facial_landmark_type.h
+%{_libdir}/pkgconfig/*landmark-detection.pc
+%endif
 
 %files roi_tracker
 %manifest %{name}.manifest
@@ -508,6 +521,9 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
 %if "%{enable_ml_object_detection_3d}" == "1"
 %{_bindir}/test_object_detection_3d
 %endif
+%if "%{enable_ml_landmark_detection}" == "1"
+%{_bindir}/test_landmark_detection
+%endif
 %{_bindir}/tizen-unittests/%{name}/run-unittest.sh
 %endif
 
index 9b8e668ffa53f076a59107313a1412dbdd219c19..f80d36936e990a295685462845f1fba1f66c60fe 100644 (file)
@@ -15,6 +15,10 @@ if (${ENABLE_ML_OBJECT_DETECTION_3D})
     message("Enabled object detection 3d test case.")
     add_subdirectory(${PROJECT_SOURCE_DIR}/object_detection_3d)
 endif()
+if (${ENABLE_ML_LANDMARK_DETECTION})
+    message("Enabled landmark detection test case.")
+    add_subdirectory(${PROJECT_SOURCE_DIR}/landmark_detection)
+endif()
 if (${ENABLE_ML_FACE_RECOGNITION})
     message("Enabled machine learning face recognition test case.")
     add_subdirectory(${PROJECT_SOURCE_DIR}/face_recognition)
diff --git a/test/testsuites/machine_learning/landmark_detection/CMakeLists.txt b/test/testsuites/machine_learning/landmark_detection/CMakeLists.txt
new file mode 100644 (file)
index 0000000..9023e57
--- /dev/null
@@ -0,0 +1,14 @@
+project(mv_landmark_detection_suite)
+cmake_minimum_required(VERSION 2.6...3.13)
+
+set(TEST_LANDMARK_DETECTION test_landmark_detection)
+
+add_executable(${TEST_LANDMARK_DETECTION} test_landmark_detection.cpp)
+
+target_link_libraries(${TEST_LANDMARK_DETECTION} gtest gtest_main
+                      mv_inference
+                      mv_landmark_detection
+                      mv_image_helper
+)
+
+install(TARGETS ${TEST_LANDMARK_DETECTION} DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp b/test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp
new file mode 100644 (file)
index 0000000..d0b01d5
--- /dev/null
@@ -0,0 +1,148 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <string.h>
+
+#include "gtest/gtest.h"
+
+#include "ImageHelper.h"
+#include "mv_facial_landmark_internal.h"
+
+#define IMG_FACE MV_CONFIG_PATH "res/inference/images/faceLandmark.jpg"
+
+using namespace testing;
+using namespace std;
+
+using namespace MediaVision::Common;
+
+struct model_info {
+       string model_name;
+       string model_file;
+       string meta_file;
+       string label_file;
+};
+
+TEST(FacialLandmarkTest, GettingAvailableInferenceEnginesInfoShouldBeOk)
+{
+       mv_facial_landmark_h handle;
+
+       int ret = mv_facial_landmark_create(&handle);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       unsigned int engine_count = 0;
+
+       ret = mv_facial_landmark_get_engine_count(handle, &engine_count);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       cout << "Engine count = " << engine_count << endl;
+       ASSERT_GE(engine_count, 1);
+
+       for (unsigned int engine_idx = 0; engine_idx < engine_count; ++engine_idx) {
+               char *engine_type = nullptr;
+
+               ret = mv_facial_landmark_get_engine_type(handle, engine_idx, &engine_type);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               cout << "Engine type : " << engine_type << endl;
+
+               unsigned int device_count = 0;
+
+               ret = mv_facial_landmark_get_device_count(handle, engine_type, &device_count);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               cout << "Device count = " << device_count << endl;
+
+               ASSERT_GE(engine_count, 1);
+
+               for (unsigned int device_idx = 0; device_idx < device_count; ++device_idx) {
+                       char *device_type = nullptr;
+
+                       ret = mv_facial_landmark_get_device_type(handle, engine_type, device_idx, &device_type);
+                       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+                       cout << "Device type : " << device_type << endl;
+               }
+       }
+
+       ret = mv_facial_landmark_destroy(handle);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+}
+
+TEST(FacialLandmarkTest, InferenceShouldBeOk)
+{
+       mv_facial_landmark_h handle;
+       vector<model_info> test_models {
+               { "", "", "", "" }, // If empty then default model will be used.
+               { "FLD_TWEAK_CNN", "fld_tweakcnn_128x128.tflite", "fld_tweakcnn_128x128.json", "" }
+               // TODO.
+       };
+
+       const unsigned int answer[5][5] = { { 42, 87, 63, 48, 83 }, { 32, 31, 53, 75, 76 } };
+
+       mv_source_h mv_source = NULL;
+       int ret = mv_create_source(&mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       ret = ImageHelper::loadImageToSource(IMG_FACE, mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       for (auto model : test_models) {
+               cout << "model name : " << model.model_file << endl;
+
+               ret = mv_facial_landmark_create(&handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_facial_landmark_set_model(handle, model.model_name.c_str(), model.model_file.c_str(),
+                                                                                  model.meta_file.c_str(), model.label_file.c_str());
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_facial_landmark_set_engine(handle, "tflite", "cpu");
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_facial_landmark_configure(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_facial_landmark_prepare(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_facial_landmark_inference(handle, mv_source);
+               ASSERT_EQ(ret, 0);
+
+               unsigned int number_of_landmarks;
+               unsigned int *x_pos = nullptr, *y_pos = nullptr;
+
+               ret = mv_facial_landmark_get_positions(handle, &number_of_landmarks, &x_pos, &y_pos);
+               ASSERT_EQ(ret, 0);
+
+               for (unsigned int idx = 0; idx < number_of_landmarks; ++idx) {
+                       int distance_x = x_pos[idx] - answer[0][idx];
+                       int distance_y = y_pos[idx] - answer[1][idx];
+
+                       distance_x = distance_x < 0 ? distance_x * -1 : distance_x;
+                       distance_y = distance_y < 0 ? distance_y * -1 : distance_y;
+
+                       ASSERT_TRUE(distance_x <= 2 && distance_y <= 2);
+               }
+
+               ret = mv_facial_landmark_destroy(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+       }
+
+       ret = mv_destroy_source(mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+}
\ No newline at end of file