mv_machine_learning: add pose landmark task API
authorInki Dae <inki.dae@samsung.com>
Mon, 24 Apr 2023 04:06:23 +0000 (13:06 +0900)
committerKwanghoon Son <k.son@samsung.com>
Wed, 28 Jun 2023 09:51:32 +0000 (18:51 +0900)
[Issue type] : new feature

Add pose landmark task API.

Pose landmark is one of landmark task groups. Therefore,
pose landmark task API is implemented in landmark task group
directory but provides separate task API.

As a initial model support for pose landmark, this patch implements
the CPM(Convolutional Pose Machines) model.

Change-Id: Ic55b673619c04873abb496b6670d15ebc79a9f62
Signed-off-by: Inki Dae <inki.dae@samsung.com>
22 files changed:
CMakeLists.txt
include/mv_pose_landmark_internal.h [new file with mode: 0644]
include/mv_pose_landmark_type.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/CMakeLists.txt
mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h
mv_machine_learning/landmark_detection/include/landmark_detection.h
mv_machine_learning/landmark_detection/include/landmark_detection_type.h
mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h
mv_machine_learning/landmark_detection/include/mv_pose_landmark_open.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/include/pld_cpm.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/include/pose_landmark_adapter.h [new file with mode: 0644]
mv_machine_learning/landmark_detection/meta/pose_landmark.json [new file with mode: 0644]
mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp
mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp
mv_machine_learning/landmark_detection/src/landmark_detection.cpp
mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp
mv_machine_learning/landmark_detection/src/mv_pose_landmark.c [new file with mode: 0644]
mv_machine_learning/landmark_detection/src/mv_pose_landmark_open.cpp [new file with mode: 0644]
mv_machine_learning/landmark_detection/src/pld_cpm.cpp [new file with mode: 0644]
mv_machine_learning/landmark_detection/src/pose_landmark_adapter.cpp [new file with mode: 0644]
packaging/capi-media-vision.spec
test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp

index 45329d9c1b96be9e9de2ba665458359d4714055e..a808f4894a90ac72b095447b88f725a54ba6e40d 100644 (file)
@@ -234,6 +234,7 @@ if (${ENABLE_ML_LANDMARK_DETECTION})
     )
     install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-landmark-detection.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
     install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/landmark_detection/meta/facial_landmark.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
+    install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/landmark_detection/meta/pose_landmark.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
        list(APPEND TOTAL_REQUIRED ${PC_NAME})
        list(APPEND TOTAL_LDFLAGS ${PC_LDFLAGS})
 endif()
diff --git a/include/mv_pose_landmark_internal.h b/include/mv_pose_landmark_internal.h
new file mode 100644 (file)
index 0000000..50d932d
--- /dev/null
@@ -0,0 +1,281 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_POSE_LANDMARK_INTERNAL_H__
+#define __MEDIA_VISION_POSE_LANDMARK_INTERNAL_H__
+
+#include <mv_common.h>
+#include <mv_private.h>
+#include <mv_pose_landmark_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @brief Creates pose landmark object handle.
+ * @details Use this function to create an pose landmark object handle.
+ *          After creation the handle has to be prepared with
+ *          @ref mv_pose_landmark_prepare() function to prepare
+ *               an pose landmark object.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[out] out_handle    The handle to the pose landmark object to be created
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ * @post Release @a handle by using
+ *       @ref mv_pose_landmark_destroy() function when it is not needed
+ *       anymore
+ *
+ * @see mv_pose_landmark_destroy()
+ */
+int mv_pose_landmark_create(mv_pose_landmark_h *out_handle);
+
+/**
+ * @brief Destroys pose landmark handle and releases all its resources.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle    The handle to the pose landmark object to be destroyed.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create an pose landmark handle by using @ref mv_pose_landmark_create()
+ *
+ * @see mv_pose_landmark_create()
+ */
+int mv_pose_landmark_destroy(mv_pose_landmark_h handle);
+
+/**
+ * @brief Set user-given model information.
+ * @details Use this function to change the model information instead of default one after calling @ref mv_pose_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle        The handle to the pose landmark object.
+ * @param[in] model_name    Model name.
+ * @param[in] model_file    Model file name.
+ * @param[in] meta_type     Model meta file name.
+ * @param[in] label_file    Label file name.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a pose landmark handle by calling @ref mv_pose_landmark_create()
+ */
+int mv_pose_landmark_set_model(mv_pose_landmark_h handle, const char *model_name, const char *model_file,
+                                                          const char *meta_file, const char *label_file);
+
+/**
+ * @brief Configures the backend to the inference handle
+ *
+ * @since_tizen 7.5
+ *
+ * @param [in] handle         The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ */
+int mv_pose_landmark_configure(mv_pose_landmark_h handle);
+
+/**
+ * @brief Prepares inference.
+ * @details Use this function to prepare inference based on
+ *          the configured network.
+ *
+ * @since_tizen 7.5
+ *
+ * @param [in] handle         The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ */
+int mv_pose_landmark_prepare(mv_pose_landmark_h handle);
+
+/**
+ *
+ * @brief Inferences with a given facial on the @a source
+ * @details Use this function to inference with a given source.
+ *
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the pose landmark object.
+ * @param[in] source         The handle to the source of the media.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ * @pre Create a source handle by calling @ref mv_create_source()
+ * @pre Create an pose landmark handle by calling @ref mv_pose_landmark_create()
+ * @pre Prepare an inference by calling mv_object_detect_configure()
+ * @pre Prepare an pose landmark by calling @ref mv_pose_landmark_prepare()
+ */
+int mv_pose_landmark_inference(mv_pose_landmark_h handle, mv_source_h source);
+
+/**
+ * @brief Gets the pose landmark positions on the @a source.
+ *
+ * @since_tizen 7.5
+ * @remarks pos_x and pos_y arrays are allocated internally by the framework and will remain valid
+ *          until the handle is returned.
+ *          Please do not deallocate them directly, and if you want to use them after the handle is returned,
+ *          please copy them to user memory and use the copy.
+ * @param[in] handle               The handle to the inference
+ * @param[out] number_of_landmarks A number of landmarks detected.
+ * @param[out] pos_x               An array containing x-coordinate values.
+ * @param[out] pos_y               An array containing y-coordinate values.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_pose_landmark_create()
+ * @pre Prepare an inference by calling mv_pose_landmark_configure()
+ * @pre Prepare an inference by calling mv_pose_landmark_prepare()
+ * @pre Prepare an inference by calling mv_pose_landmark_inference()
+ */
+int mv_pose_landmark_get_pos(mv_pose_landmark_h handle, unsigned int *number_of_landmarks, unsigned int **pos_x,
+                                                        unsigned int **pos_y);
+
+/**
+ * @brief Set user-given backend and device types for inference.
+ * @details Use this function to change the backend and device types for inference instead of default ones after calling @ref mv_pose_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle        The handle to the image classification object.
+ * @param[in] backend_type  A string of backend type.
+ * @param[in] device_type   A string of device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Creates a image classification handle by calling @ref mv_pose_landmark_create()
+ */
+int mv_pose_landmark_set_engine(mv_pose_landmark_h handle, const char *backend_type, const char *device_type);
+
+/**
+ * @brief Get a number of inference engines available for image classification task API.
+ * @details Use this function to get how many inference engines are supported for image classification after calling @ref mv_pose_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the image classification object.
+ * @param[out] engine_count  A number of inference engines available for image classification API.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a image classification handle by calling @ref mv_pose_landmark_create()
+ */
+int mv_pose_landmark_get_engine_count(mv_pose_landmark_h handle, unsigned int *engine_count);
+
+/**
+ * @brief Gets engine type to a given inference engine index.
+ * @details Use this function to get inference engine type with a given engine index after calling @ref mv_pose_landmark_get_engine_count().
+ *
+ * @since_tizen 7.5
+ * @remarks engine_type array is allocated internally by the framework and will remain valid
+ *          until the handle is returned.
+ *          Please do not deallocate it directly, and if you want to use it after the handle is returned,
+ *          please copy it to user memory and use the copy.
+ * @param[in] handle        The handle to the image classification object.
+ * @param[in] engine_index  A inference engine index for getting the inference engine type.
+ * @param[out] engine_type  A string to inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Get a number of inference engines available for image classification task API by calling @ref mv_pose_landmark_get_engine_count()
+ */
+int mv_pose_landmark_get_engine_type(mv_pose_landmark_h handle, const unsigned int engine_index, char **engine_type);
+
+/**
+ * @brief Gets a number of device types available to a given inference engine.
+ * @details Use this function to get how many device types are supported for a given inference engine after calling @ref mv_pose_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the image classification object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[out] device_count  A number of device types available for a given inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a image classification handle by calling @ref mv_pose_landmark_create()
+ */
+int mv_pose_landmark_get_device_count(mv_pose_landmark_h handle, const char *engine_type, unsigned int *device_count);
+
+/**
+ * @brief Gets device type list available.
+ * @details Use this function to get what device types are supported for current inference engine type after calling @ref mv_pose_landmark_configure().
+ *
+ * @since_tizen 7.5
+ * @remarks device_type array is allocated internally by the framework and will remain valid
+ *          until the handle is returned.
+ *          Please do not deallocate it directly, and if you want to use it after the handle is returned,
+ *          please copy it to user memory and use the copy.
+ * @param[in] handle         The handle to the image classification object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[in] device_index   A device index for getting the device type.
+ * @param[out] device_type   A string to device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Creates a image classification handle by calling @ref mv_pose_landmark_create()
+ * @pre Configure image classification task by calling @ref mv_pose_landmark_configure()
+ */
+int mv_pose_landmark_get_device_type(mv_pose_landmark_h handle, const char *engine_type,
+                                                                        const unsigned int device_index, char **device_type);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __MEDIA_VISION_POSE_LANDMARK_INTERNAL_H__ */
diff --git a/include/mv_pose_landmark_type.h b/include/mv_pose_landmark_type.h
new file mode 100644 (file)
index 0000000..038f787
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_MV_POSE_LANDMARK_TYPE_H__
+#define __TIZEN_MEDIAVISION_MV_POSE_LANDMARK_TYPE_H__
+
+#include <mv_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_pose_landmark_type.h
+ * @brief  This file contains the pose landmark handle for Mediavision.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_POSE_LANDMARK_MODULE
+ * @{
+ */
+
+/**
+ * @brief The pose landmark object handle.
+ *
+ * @since_tizen 7.5
+ */
+typedef void *mv_pose_landmark_h;
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_MV_POSE_LANDMARK_TYPE_H__ */
index 516fa4a091423811a09392158e7f2a050016952f..010b0533679a38e8e8747e3050b78a069d1ad8d9 100644 (file)
@@ -24,4 +24,6 @@ install(
        FILES_MATCHING
        PATTERN "mv_facial_landmark_internal.h"
        PATTERN "mv_facial_landmark_type.h"
+       PATTERN "mv_pose_landmark_internal.h"
+       PATTERN "mv_pose_landmark_type.h"
        )
index 13e87c180d3d4ea7e00e56dd67451b350693340d..6f60474a84a0b215501fc7d4da83ff0bf89a2aee 100644 (file)
@@ -32,13 +32,13 @@ namespace machine_learning
 class FldTweakCnn : public LandmarkDetection
 {
 private:
-       FacialLandmarkResult _result;
+       LandmarkDetectionResult _result;
 
 public:
        FldTweakCnn(LandmarkDetectionTaskType task_type);
        ~FldTweakCnn();
 
-       FacialLandmarkResult &result() override;
+       LandmarkDetectionResult &result() override;
 };
 
 } // machine_learning
index 0b378bbfc81a4d9dece6eea70c85995d2f627a21..89b718ba38d2735d7e67f9cc812c0fb86fe91c51 100644 (file)
@@ -55,6 +55,7 @@ protected:
        std::string _modelLabelFilePath;
        int _backendType;
        int _targetDeviceType;
+       double _confidence_threshold;
 
        void getOutputNames(std::vector<std::string> &names);
        void getOutputTensor(std::string target_name, std::vector<float> &tensor);
@@ -74,7 +75,7 @@ public:
        void prepare();
        void preprocess(mv_source_h &mv_src);
        void inference(mv_source_h source);
-       virtual FacialLandmarkResult &result() = 0;
+       virtual LandmarkDetectionResult &result() = 0;
 };
 
 } // machine_learning
index 2b3d3ea0ad9704175a7101ac5df72d983572794b..6a208095fbdf24cd12d5a44f40f374d09815677a 100644 (file)
@@ -31,15 +31,18 @@ struct LandmarkDetectionInput {
        // TODO.
 };
 
-struct FacialLandmarkResult {
+struct LandmarkDetectionResult {
        unsigned int number_of_landmarks;
        std::vector<unsigned int> x_pos;
        std::vector<unsigned int> y_pos;
+       std::vector<float> scores;
+       std::vector<std::string> labels;
 };
 
 enum class LandmarkDetectionTaskType {
        LANDMARK_DETECTION_TASK_NONE = 0,
        FLD_TWEAK_CNN,
+       PLD_CPM
        // TODO
 };
 
index 8957553e155db641ba6f51114b780e4f062d26b7..2266bcfa4e847a5c8f2901c3027fe81e7421c8a9 100644 (file)
@@ -62,4 +62,6 @@
  */
 #define MV_LANDMARK_DETECTION_TARGET_DEVICE_TYPE "TARGET_DEVICE_TYPE"
 
+#define MV_LANDMARK_DETECTION_CONFIDENCE_THRESHOLD "CONFIDENCE_THRESHOLD"
+
 #endif /* __MEDIA_VISION_LANDMARK_DETECTION_CONFIG_H__ */
diff --git a/mv_machine_learning/landmark_detection/include/mv_pose_landmark_open.h b/mv_machine_learning/landmark_detection/include/mv_pose_landmark_open.h
new file mode 100644 (file)
index 0000000..5158c9a
--- /dev/null
@@ -0,0 +1,283 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_POSE_LANDMARK_OPEN_H__
+#define __MEDIA_VISION_POSE_LANDMARK_OPEN_H__
+
+#include <mv_common.h>
+#include <mv_private.h>
+#include <mv_pose_landmark_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @brief Creates pose landmark object handle.
+ * @details Use this function to create an pose landmark object handle.
+ *          After creation the handle has to be prepared with
+ *          @ref mv_pose_landmark_prepare_open() function to prepare
+ *               an pose landmark object.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[out] out_handle    The handle to the pose landmark object to be created
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ * @post Release @a handle by using
+ *       @ref mv_pose_landmark_destroy_open() function when it is not needed
+ *       anymore
+ *
+ * @see mv_pose_landmark_destroy_open()
+ */
+int mv_pose_landmark_create_open(mv_pose_landmark_h *out_handle);
+
+/**
+ * @brief Destroys pose landmark handle and releases all its resources.
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle    The handle to the pose landmark object to be destroyed.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create an pose landmark handle by using @ref mv_pose_landmark_create_open()
+ *
+ * @see mv_pose_landmark_create_open()
+ */
+int mv_pose_landmark_destroy_open(mv_pose_landmark_h handle);
+
+/**
+ * @brief Set user-given model information.
+ * @details Use this function to change the model information instead of default one after calling @ref mv_pose_landmark_create().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle        The handle to the pose landmark object.
+ * @param[in] model_name    Model name.
+ * @param[in] model_file    Model file name.
+ * @param[in] meta_type     Model meta file name.
+ * @param[in] label_file    Label file name.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a pose landmark handle by calling @ref mv_pose_landmark_create()
+ */
+int mv_pose_landmark_set_model_open(mv_pose_landmark_h handle, const char *model_name, const char *model_file,
+                                                                       const char *meta_file, const char *label_file);
+
+/**
+ * @brief Configures the backend to the inference handle
+ *
+ * @since_tizen 7.5
+ *
+ * @param [in] handle         The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ */
+int mv_pose_landmark_configure_open(mv_pose_landmark_h handle);
+
+/**
+ * @brief Prepares inference.
+ * @details Use this function to prepare inference based on
+ *          the configured network.
+ *
+ * @since_tizen 7.5
+ *
+ * @param [in] handle         The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ */
+int mv_pose_landmark_prepare_open(mv_pose_landmark_h handle);
+
+/**
+ *
+ * @brief Inferences with a given raw image on the @a source
+ * @details Use this function to inference with a given source.
+ *
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the pose landmark object.
+ * @param[in] source         The handle to the source of the media.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ * @pre Create a source handle by calling @ref mv_create_source()
+ * @pre Create an pose landmark handle by calling @ref mv_pose_landmark_create_open()
+ * @pre Prepare an inference by calling mv_object_detect_configure_open()
+ * @pre Prepare an pose landmark by calling @ref mv_pose_landmark_prepare_open()
+ */
+int mv_pose_landmark_inference_open(mv_pose_landmark_h handle, mv_source_h source);
+
+/**
+ * @brief Gets the pose landmark positions on the @a source.
+ *
+ * @since_tizen 7.5
+ * @remarks pos_x and pos_y arrays are allocated internally by the framework and will remain valid
+ *          until the handle is returned.
+ *          Please do not deallocate them directly, and if you want to use them after the handle is returned,
+ *          please copy them to user memory and use the copy.
+ * @param[in] handle               The handle to the inference
+ * @param[out] number_of_landmarks A number of landmarks detected.
+ * @param[out] pos_x               An array containing x-coordinate values.
+ * @param[out] pos_y               An array containing y-coordinate values.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_pose_landmark_create()
+ * @pre Prepare an inference by calling mv_pose_landmark_configure()
+ * @pre Prepare an inference by calling mv_pose_landmark_prepare()
+ * @pre Prepare an inference by calling mv_pose_landmark_inference()
+ */
+int mv_pose_landmark_get_pos_open(mv_pose_landmark_h handle, unsigned int *number_of_landmarks, unsigned int **pos_x,
+                                                                 unsigned int **pos_y);
+
+/**
+ * @brief Set user-given backend and device types for inference.
+ * @details Use this function to change the backend and device types for inference instead of default ones after calling @ref mv_pose_landmark_create_open().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle        The handle to the image classification object.
+ * @param[in] backend_type  A string of backend type.
+ * @param[in] device_type   A string of device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Creates a image classification handle by calling @ref mv_pose_landmark_create_open()
+ */
+int mv_pose_landmark_set_engine_open(mv_pose_landmark_h handle, const char *backend_type, const char *device_type);
+
+/**
+ * @brief Get a number of inference engines available for image classification task API.
+ * @details Use this function to get how many inference engines are supported for image classification after calling @ref mv_pose_landmark_create_open().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the image classification object.
+ * @param[out] engine_count  A number of inference engines available for image classification API.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a image classification handle by calling @ref mv_pose_landmark_create_open()
+ */
+int mv_pose_landmark_get_engine_count_open(mv_pose_landmark_h handle, unsigned int *engine_count);
+
+/**
+ * @brief Gets engine type to a given inference engine index.
+ * @details Use this function to get inference engine type with a given engine index after calling @ref mv_pose_landmark_get_engine_count().
+ *
+ * @since_tizen 7.5
+ * @remarks engine_type array is allocated internally by the framework and will remain valid
+ *          until the handle is returned.
+ *          Please do not deallocate it directly, and if you want to use it after the handle is returned,
+ *          please copy it to user memory and use the copy.
+ * @param[in] handle        The handle to the image classification object.
+ * @param[in] engine_index  A inference engine index for getting the inference engine type.
+ * @param[out] engine_type  A string to inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Get a number of inference engines available for image classification task API by calling @ref mv_pose_landmark_get_engine_count()
+ */
+int mv_pose_landmark_get_engine_type_open(mv_pose_landmark_h handle, const unsigned int engine_index,
+                                                                                 char **engine_type);
+
+/**
+ * @brief Gets a number of device types available to a given inference engine.
+ * @details Use this function to get how many device types are supported for a given inference engine after calling @ref mv_pose_landmark_create_open().
+ *
+ * @since_tizen 7.5
+ *
+ * @param[in] handle         The handle to the image classification object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[out] device_count  A number of device types available for a given inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a image classification handle by calling @ref mv_pose_landmark_create_open()
+ */
+int mv_pose_landmark_get_device_count_open(mv_pose_landmark_h handle, const char *engine_type,
+                                                                                  unsigned int *device_count);
+
+/**
+ * @brief Gets device type list available.
+ * @details Use this function to get what device types are supported for current inference engine type after calling @ref mv_pose_landmark_configure().
+ *
+ * @since_tizen 7.5
+ * @remarks device_type array is allocated internally by the framework and will remain valid
+ *          until the handle is returned.
+ *          Please do not deallocate it directly, and if you want to use it after the handle is returned,
+ *          please copy it to user memory and use the copy.
+ * @param[in] handle         The handle to the image classification object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[in] device_index   A device index for getting the device type.
+ * @param[out] device_type   A string to device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Creates a image classification handle by calling @ref mv_pose_landmark_create_open()
+ * @pre Configure image classification task by calling @ref mv_pose_landmark_configure_open()
+ */
+int mv_pose_landmark_get_device_type_open(mv_pose_landmark_h handle, const char *engine_type,
+                                                                                 const unsigned int device_index, char **device_type);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */
diff --git a/mv_machine_learning/landmark_detection/include/pld_cpm.h b/mv_machine_learning/landmark_detection/include/pld_cpm.h
new file mode 100644 (file)
index 0000000..ba9defb
--- /dev/null
@@ -0,0 +1,47 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PLD_CPM_H__
+#define __PLD_CPM_H__
+
+#include <string>
+#include <memory>
+#include <mv_common.h>
+#include "mv_private.h"
+
+#include "landmark_detection.h"
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class PldCpm : public LandmarkDetection
+{
+private:
+       LandmarkDetectionResult _result;
+
+public:
+       PldCpm(LandmarkDetectionTaskType task_type);
+       ~PldCpm();
+
+       LandmarkDetectionResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/include/pose_landmark_adapter.h b/mv_machine_learning/landmark_detection/include/pose_landmark_adapter.h
new file mode 100644 (file)
index 0000000..92613fe
--- /dev/null
@@ -0,0 +1,63 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POSE_LANDMARK_ADAPTER_H__
+#define __POSE_LANDMARK_ADAPTER_H__
+
+#include <dlog.h>
+
+#include "EngineConfig.h"
+#include "itask.h"
+#include "pld_cpm.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T, typename V> class PoseLandmarkAdapter : public mediavision::common::ITask<T, V>
+{
+private:
+       std::unique_ptr<LandmarkDetection> _landmark_detection;
+       T _source;
+       std::string _model_name;
+       std::string _model_file;
+       std::string _meta_file;
+       std::string _label_file;
+
+public:
+       PoseLandmarkAdapter();
+       ~PoseLandmarkAdapter();
+
+       void create(int type) override;
+
+       void setModelInfo(const char *model_file, const char *meta_file, const char *label_file,
+                                         const char *model_name) override;
+       void setEngineInfo(const char *engine_type, const char *device_type) override;
+       void configure() override;
+       void getNumberOfEngines(unsigned int *number_of_engines) override;
+       void getEngineType(unsigned int engine_index, char **engine_type) override;
+       void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices) override;
+       void getDeviceType(const char *engine_type, unsigned int device_index, char **device_type) override;
+       void prepare() override;
+       void setInput(T &t) override;
+       void perform() override;
+       V &getOutput() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/meta/pose_landmark.json b/mv_machine_learning/landmark_detection/meta/pose_landmark.json
new file mode 100644 (file)
index 0000000..787b3a8
--- /dev/null
@@ -0,0 +1,40 @@
+{
+    "attributes":
+    [
+        {
+            "name" : "MODEL_DEFAULT_PATH",
+            "type" : "string",
+            "value" : "/opt/usr/globalapps/mediavision.landmark.detection/models/tflite/"
+        },
+        {
+            "name"  : "MODEL_FILE_NAME",
+            "type"  : "string",
+            "value" : "pld_cpm_192x192.tflite"
+        },
+        {
+            "name"  : "META_FILE_NAME",
+            "type"  : "string",
+            "value" : "pld_cpm_192x192.json"
+        },
+        {
+            "name"  : "LABEL_FILE_NAME",
+            "type"  : "string",
+            "value" : ""
+        },
+        {
+            "name"  : "BACKEND_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        },
+        {
+            "name"  : "TARGET_DEVICE_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        },
+        {
+            "name" : "CONFIDENCE_THRESHOLD",
+            "type"  : "double",
+            "value" : 0.6
+        }
+    ]
+}
index 5d8b413025af87bd86e529fe6d140fd2577ce1d3..b633370d81a0db43fe66fcc48eb11650edf519ee 100644 (file)
@@ -140,6 +140,6 @@ template<typename T, typename V> V &FacialLandmarkAdapter<T, V>::getOutput()
        return _landmark_detection->result();
 }
 
-template class FacialLandmarkAdapter<LandmarkDetectionInput, FacialLandmarkResult>;
+template class FacialLandmarkAdapter<LandmarkDetectionInput, LandmarkDetectionResult>;
 }
 }
\ No newline at end of file
index ae90e6dd0d194372c0fb88030ed1eb994b77f2b7..b391f265f3a141d128cf8a3bf6aaa4d82b5cef58 100644 (file)
@@ -17,7 +17,6 @@
 #include <string.h>
 #include <map>
 #include <algorithm>
-#include <iostream>
 
 #include "machine_learning_exception.h"
 #include "mv_landmark_detection_config.h"
@@ -38,7 +37,7 @@ FldTweakCnn::FldTweakCnn(LandmarkDetectionTaskType task_type) : LandmarkDetectio
 FldTweakCnn::~FldTweakCnn()
 {}
 
-FacialLandmarkResult &FldTweakCnn::result()
+LandmarkDetectionResult &FldTweakCnn::result()
 {
        constexpr static unsigned int numberOfLandmarks = 5;
 
index 58c5f73a0ce8ec692f860815f002a7d089eac5a2..f19c85760607bea8b7f38760e8accd4023f8c3bf 100644 (file)
@@ -231,6 +231,10 @@ void LandmarkDetection::parseMetaFile(const char *meta_file_name)
        _modelLabelFilePath = _modelDefaultPath + _modelLabelFilePath;
        LOGI("label file path = %s", _modelLabelFilePath.c_str());
 
+       ret = _config->getDoubleAttribute(MV_LANDMARK_DETECTION_CONFIDENCE_THRESHOLD, &_confidence_threshold);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               LOGW("threshold value doesn't exist.");
+
        loadLabel();
 }
 
index a4c8d752a532b7d0675b6736f54fd551e5fbc30b..79d88c6e4684feff0607a653c333f66830246f4b 100644 (file)
@@ -35,7 +35,7 @@ using namespace mediavision::common;
 using namespace mediavision::machine_learning;
 using namespace MediaVision::Common;
 using namespace mediavision::machine_learning::exception;
-using LandmarkDetectionTask = ITask<LandmarkDetectionInput, FacialLandmarkResult>;
+using LandmarkDetectionTask = ITask<LandmarkDetectionInput, LandmarkDetectionResult>;
 
 static mutex g_facial_landmark_mutex;
 
@@ -51,7 +51,7 @@ int mv_facial_landmark_create_open(mv_facial_landmark_h *handle)
 
        try {
                context = new Context();
-               task = new FacialLandmarkAdapter<LandmarkDetectionInput, FacialLandmarkResult>();
+               task = new FacialLandmarkAdapter<LandmarkDetectionInput, LandmarkDetectionResult>();
                context->__tasks.insert(make_pair("facial_landmark", task));
                *handle = static_cast<mv_facial_landmark_h>(context);
        } catch (const BaseException &e) {
@@ -335,7 +335,7 @@ int mv_facial_landmark_get_positions_open(mv_facial_landmark_h handle, unsigned
                auto context = static_cast<Context *>(handle);
                auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("facial_landmark"));
 
-               FacialLandmarkResult &result = task->getOutput();
+               LandmarkDetectionResult &result = task->getOutput();
                *number_of_landmarks = result.number_of_landmarks;
                *pos_x = result.x_pos.data();
                *pos_y = result.y_pos.data();
diff --git a/mv_machine_learning/landmark_detection/src/mv_pose_landmark.c b/mv_machine_learning/landmark_detection/src/mv_pose_landmark.c
new file mode 100644 (file)
index 0000000..ed52827
--- /dev/null
@@ -0,0 +1,212 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "mv_pose_landmark_internal.h"
+#include "mv_pose_landmark_open.h"
+
+/**
+ * @file  mv_pose_landmark.c
+ * @brief This file contains Media Vision inference module.
+ */
+
+int mv_pose_landmark_create(mv_pose_landmark_h *handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_NULL_ARG_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_create_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_pose_landmark_destroy(mv_pose_landmark_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_destroy_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_pose_landmark_set_model(mv_pose_landmark_h handle, const char *model_name, const char *model_file,
+                                                          const char *meta_file, const char *label_file)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(model_name);
+       MEDIA_VISION_NULL_ARG_CHECK(model_file);
+       MEDIA_VISION_NULL_ARG_CHECK(meta_file);
+       MEDIA_VISION_NULL_ARG_CHECK(label_file);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_set_model_open(handle, model_name, model_file, meta_file, label_file);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_pose_landmark_set_engine(mv_pose_landmark_h handle, const char *backend_type, const char *device_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(backend_type);
+       MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_set_engine_open(handle, backend_type, device_type);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_pose_landmark_get_engine_count(mv_pose_landmark_h handle, unsigned int *engine_count)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_count);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_get_engine_count_open(handle, engine_count);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_pose_landmark_get_engine_type(mv_pose_landmark_h handle, const unsigned int engine_index, char **engine_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_get_engine_type_open(handle, engine_index, engine_type);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_pose_landmark_get_device_count(mv_pose_landmark_h handle, const char *engine_type, unsigned int *device_count)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(device_count);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_get_device_count_open(handle, engine_type, device_count);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_pose_landmark_get_device_type(mv_pose_landmark_h handle, const char *engine_type,
+                                                                        const unsigned int device_index, char **device_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+       MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_get_device_type_open(handle, engine_type, device_index, device_type);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_pose_landmark_configure(mv_pose_landmark_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_configure_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_pose_landmark_prepare(mv_pose_landmark_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_prepare_open(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_pose_landmark_inference(mv_pose_landmark_h handle, mv_source_h source)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_image_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(source);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_inference_open(handle, source);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_pose_landmark_get_pos(mv_pose_landmark_h handle, unsigned int *number_of_landmarks, unsigned int **pos_x,
+                                                        unsigned int **pos_y)
+{
+       MEDIA_VISION_SUPPORT_CHECK(_mv_inference_image_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(number_of_landmarks);
+       MEDIA_VISION_INSTANCE_CHECK(pos_x);
+       MEDIA_VISION_INSTANCE_CHECK(pos_y);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = mv_pose_landmark_get_pos_open(handle, number_of_landmarks, pos_x, pos_y);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
diff --git a/mv_machine_learning/landmark_detection/src/mv_pose_landmark_open.cpp b/mv_machine_learning/landmark_detection/src/mv_pose_landmark_open.cpp
new file mode 100644 (file)
index 0000000..5fe40d9
--- /dev/null
@@ -0,0 +1,350 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "itask.h"
+#include "mv_pose_landmark_open.h"
+#include "pose_landmark_adapter.h"
+#include "machine_learning_exception.h"
+#include "landmark_detection_type.h"
+#include "context.h"
+
+#include <new>
+#include <unistd.h>
+#include <string>
+#include <algorithm>
+#include <mutex>
+#include <iostream>
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::common;
+using namespace mediavision::machine_learning;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning::exception;
+using LandmarkDetectionTask = ITask<LandmarkDetectionInput, LandmarkDetectionResult>;
+
+static mutex g_pose_landmark_mutex;
+
+int mv_pose_landmark_create_open(mv_pose_landmark_h *handle)
+{
+       if (!handle) {
+               LOGE("Handle can't be created because handle pointer is NULL");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       Context *context = nullptr;
+       LandmarkDetectionTask *task = nullptr;
+
+       try {
+               context = new Context();
+               task = new PoseLandmarkAdapter<LandmarkDetectionInput, LandmarkDetectionResult>();
+               context->__tasks.insert(make_pair("pose_landmark", task));
+               *handle = static_cast<mv_pose_landmark_h>(context);
+       } catch (const BaseException &e) {
+               delete task;
+               delete context;
+               return e.getError();
+       }
+
+       LOGD("pose landmark handle [%p] has been created", *handle);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_destroy_open(mv_pose_landmark_h handle)
+{
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       auto context = static_cast<Context *>(handle);
+
+       for (auto &m : context->__tasks)
+               delete static_cast<LandmarkDetectionTask *>(m.second);
+
+       delete context;
+
+       LOGD("pose landmark handle has been destroyed.");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_set_model_open(mv_pose_landmark_h handle, const char *model_name, const char *model_file,
+                                                                       const char *meta_file, const char *label_file)
+{
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("pose_landmark"));
+
+               task->setModelInfo(model_file, meta_file, label_file, model_name);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_set_engine_open(mv_pose_landmark_h handle, const char *backend_type, const char *device_type)
+{
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("pose_landmark"));
+
+               task->setEngineInfo(backend_type, device_type);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_get_engine_count_open(mv_pose_landmark_h handle, unsigned int *engine_count)
+{
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("pose_landmark"));
+
+               task->getNumberOfEngines(engine_count);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_get_engine_type_open(mv_pose_landmark_h handle, const unsigned int engine_index,
+                                                                                 char **engine_type)
+{
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("pose_landmark"));
+
+               task->getEngineType(engine_index, engine_type);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_get_device_count_open(mv_pose_landmark_h handle, const char *engine_type,
+                                                                                  unsigned int *device_count)
+{
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("pose_landmark"));
+
+               task->getNumberOfDevices(engine_type, device_count);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_get_device_type_open(mv_pose_landmark_h handle, const char *engine_type,
+                                                                                 const unsigned int device_index, char **device_type)
+{
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("pose_landmark"));
+
+               task->getDeviceType(engine_type, device_index, device_type);
+               // TODO.
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_configure_open(mv_pose_landmark_h handle)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("pose_landmark"));
+
+               task->configure();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_prepare_open(mv_pose_landmark_h handle)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("pose_landmark"));
+
+               task->prepare();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_inference_open(mv_pose_landmark_h handle, mv_source_h source)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("pose_landmark"));
+
+               LandmarkDetectionInput input = { source };
+
+               task->setInput(input);
+               task->perform();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_landmark_get_pos_open(mv_pose_landmark_h handle, unsigned int *number_of_landmarks, unsigned int **pos_x,
+                                                                 unsigned int **pos_y)
+{
+       LOGD("ENTER");
+
+       lock_guard<mutex> lock(g_pose_landmark_mutex);
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<LandmarkDetectionTask *>(context->__tasks.at("pose_landmark"));
+
+               LandmarkDetectionResult &result = task->getOutput();
+               *number_of_landmarks = result.number_of_landmarks;
+               *pos_x = result.x_pos.data();
+               *pos_y = result.y_pos.data();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
diff --git a/mv_machine_learning/landmark_detection/src/pld_cpm.cpp b/mv_machine_learning/landmark_detection/src/pld_cpm.cpp
new file mode 100644 (file)
index 0000000..d16fe17
--- /dev/null
@@ -0,0 +1,112 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <map>
+#include <algorithm>
+
+#include "machine_learning_exception.h"
+#include "mv_landmark_detection_config.h"
+#include "pld_cpm.h"
+#include "Postprocess.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+PldCpm::PldCpm(LandmarkDetectionTaskType task_type) : LandmarkDetection(task_type), _result()
+{}
+
+PldCpm::~PldCpm()
+{}
+
+LandmarkDetectionResult &PldCpm::result()
+{
+       // Clear _result object because result() function can be called every time user wants
+       // so make sure to clear existing result data before getting the data again.
+       memset(reinterpret_cast<void *>(&_result), 0, sizeof(_result));
+
+       vector<string> names;
+
+       LandmarkDetection::getOutputNames(names);
+
+       auto scoreMetaInfo = _parser->getOutputMetaMap().at(names[0]);
+       auto decodingLandmark =
+                       static_pointer_cast<DecodingLandmark>(scoreMetaInfo->decodingTypeMap[DecodingType::LANDMARK]);
+
+       if (decodingLandmark->decoding_type != LandmarkDecodingType::HEATMAP)
+               throw InvalidOperation("decoding type not support.");
+
+       if (decodingLandmark->coordinate_type != LandmarkCoordinateType::PIXEL)
+               throw InvalidOperation("coordinate type not support.");
+
+       if (decodingLandmark->landmark_type != LandmarkType::SINGLE_2D)
+               throw InvalidOperation("landmark type not support.");
+
+       auto heatMapWidth = scoreMetaInfo->dims[2];
+       auto heatMapHeight = scoreMetaInfo->dims[1];
+       auto heatMapChannel = scoreMetaInfo->dims[3];
+       vector<float> score_tensor;
+
+       _result.number_of_landmarks = heatMapChannel;
+
+       LandmarkDetection::getOutputTensor(names[0], score_tensor);
+
+       auto ori_src_width = static_cast<double>(_preprocess.getImageWidth()[0]);
+       auto ori_src_height = static_cast<double>(_preprocess.getImageHeight()[0]);
+       auto width_ratio = ori_src_width / static_cast<double>(heatMapWidth);
+       auto height_ratio = ori_src_height / static_cast<double>(heatMapHeight);
+
+       for (auto c = 0; c < heatMapChannel; ++c) {
+               float max_score = 0.0f;
+               int max_x = 0;
+               int max_y = 0;
+
+               for (auto y = 0; y < heatMapHeight; ++y) {
+                       for (auto x = 0; x < heatMapWidth; ++x) {
+                               auto score = score_tensor[y * heatMapWidth * heatMapChannel + x * heatMapChannel + c];
+                               if (score < _confidence_threshold)
+                                       continue;
+
+                               if (max_score < score) {
+                                       max_score = score;
+                                       max_x = x;
+                                       max_y = y;
+                               }
+                       }
+               }
+
+               if (max_score == 0.0f) {
+                       // If max_score is 0 then it means that all score values of current heatmap is 0 so
+                       // ignore the scores that do not meet the threshold.
+                       _result.number_of_landmarks--;
+                       continue;
+               }
+
+               _result.x_pos.push_back(static_cast<size_t>(static_cast<double>(max_x) * width_ratio));
+               _result.y_pos.push_back(static_cast<size_t>(static_cast<double>(max_y) * height_ratio));
+               _result.scores.push_back(max_score);
+       }
+
+       return _result;
+}
+
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/landmark_detection/src/pose_landmark_adapter.cpp b/mv_machine_learning/landmark_detection/src/pose_landmark_adapter.cpp
new file mode 100644 (file)
index 0000000..3dfce33
--- /dev/null
@@ -0,0 +1,145 @@
+/**
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "machine_learning_exception.h"
+#include "pose_landmark_adapter.h"
+
+using namespace std;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T, typename V> PoseLandmarkAdapter<T, V>::PoseLandmarkAdapter() : _source()
+{
+       // In default, Mobilenet v1 ssd model will be used.
+       // If other model is set by user then strategy pattern will be used
+       // to create its corresponding concerte class by calling create().
+       _landmark_detection = make_unique<PldCpm>(LandmarkDetectionTaskType::PLD_CPM);
+}
+
+template<typename T, typename V> PoseLandmarkAdapter<T, V>::~PoseLandmarkAdapter()
+{}
+
+template<typename T, typename V> void PoseLandmarkAdapter<T, V>::create(int type)
+{
+       LandmarkDetectionTaskType task_type = static_cast<LandmarkDetectionTaskType>(type);
+
+       // If default task type is same as a given one then skip.
+       if (_landmark_detection->getTaskType() == task_type)
+               return;
+
+       _landmark_detection.reset();
+
+       if (task_type == LandmarkDetectionTaskType::PLD_CPM)
+               _landmark_detection = make_unique<PldCpm>(task_type);
+       // TODO.
+}
+
+template<typename T, typename V>
+void PoseLandmarkAdapter<T, V>::setModelInfo(const char *model_file, const char *meta_file, const char *label_file,
+                                                                                        const char *model_name)
+{
+       string model_name_str(model_name);
+
+       if (!model_name_str.empty()) {
+               transform(model_name_str.begin(), model_name_str.end(), model_name_str.begin(), ::toupper);
+
+               int model_type = 0;
+
+               if (model_name_str == string("PLD_CPM"))
+                       model_type = static_cast<int>(LandmarkDetectionTaskType::PLD_CPM);
+               // TODO.
+               else
+                       throw InvalidParameter("Invalid landmark detection model name.");
+
+               create(static_cast<int>(model_type));
+       }
+
+       _model_file = string(model_file);
+       _meta_file = string(meta_file);
+       _label_file = string(label_file);
+
+       if (_model_file.empty() && _meta_file.empty()) {
+               LOGW("Given model info is invalid so default model info will be used instead.");
+               return;
+       }
+
+       _landmark_detection->setUserModel(_model_file, _meta_file, _label_file);
+}
+
+template<typename T, typename V>
+void PoseLandmarkAdapter<T, V>::setEngineInfo(const char *engine_type, const char *device_type)
+{
+       _landmark_detection->setEngineInfo(string(engine_type), string(device_type));
+}
+
+template<typename T, typename V> void PoseLandmarkAdapter<T, V>::configure()
+{
+       _landmark_detection->parseMetaFile("pose_landmark.json");
+       _landmark_detection->configure();
+}
+
+template<typename T, typename V> void PoseLandmarkAdapter<T, V>::getNumberOfEngines(unsigned int *number_of_engines)
+{
+       _landmark_detection->getNumberOfEngines(number_of_engines);
+}
+
+template<typename T, typename V>
+void PoseLandmarkAdapter<T, V>::getEngineType(unsigned int engine_index, char **engine_type)
+{
+       _landmark_detection->getEngineType(engine_index, engine_type);
+}
+
+template<typename T, typename V>
+void PoseLandmarkAdapter<T, V>::getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices)
+{
+       _landmark_detection->getNumberOfDevices(engine_type, number_of_devices);
+}
+
+template<typename T, typename V>
+void PoseLandmarkAdapter<T, V>::getDeviceType(const char *engine_type, unsigned int device_index, char **device_type)
+{
+       _landmark_detection->getDeviceType(engine_type, device_index, device_type);
+}
+
+template<typename T, typename V> void PoseLandmarkAdapter<T, V>::prepare()
+{
+       _landmark_detection->prepare();
+}
+
+template<typename T, typename V> void PoseLandmarkAdapter<T, V>::setInput(T &t)
+{
+       _source = t;
+}
+
+template<typename T, typename V> void PoseLandmarkAdapter<T, V>::perform()
+{
+       _landmark_detection->preprocess(_source.inference_src);
+       _landmark_detection->inference(_source.inference_src);
+}
+
+template<typename T, typename V> V &PoseLandmarkAdapter<T, V>::getOutput()
+{
+       return _landmark_detection->result();
+}
+
+template class PoseLandmarkAdapter<LandmarkDetectionInput, LandmarkDetectionResult>;
+}
+}
\ No newline at end of file
index 39996841064f4d583c879e661ad3fa51097caa6e..623ed6d103715e8a4d8f7915f9a5187b3a72b6e6 100644 (file)
@@ -407,6 +407,7 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
 %endif
 %if "%{enable_ml_landmark_detection}" == "1"
 %{_datadir}/%{name}/facial_landmark.json
+%{_datadir}/%{name}/pose_landmark.json
 %{_libdir}/libmv_landmark_detection.so
 %endif
 
@@ -438,6 +439,8 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
 %if "%{enable_ml_landmark_detection}" == "1"
 %{_includedir}/media/mv_facial_landmark_internal.h
 %{_includedir}/media/mv_facial_landmark_type.h
+%{_includedir}/media/mv_pose_landmark_internal.h
+%{_includedir}/media/mv_pose_landmark_type.h
 %{_libdir}/pkgconfig/*landmark-detection.pc
 %endif
 
index d0b01d57e2927c6668b0a2046447ab1b69413bbb..12022f1185719f024f9caff0ed2627e6e4d628f9 100644 (file)
 
 #include "ImageHelper.h"
 #include "mv_facial_landmark_internal.h"
+#include "mv_pose_landmark_internal.h"
 
 #define IMG_FACE MV_CONFIG_PATH "res/inference/images/faceLandmark.jpg"
+#define IMG_POSE MV_CONFIG_PATH "res/inference/images/poseLandmark.jpg"
 
 using namespace testing;
 using namespace std;
@@ -92,7 +94,7 @@ TEST(FacialLandmarkTest, InferenceShouldBeOk)
                // TODO.
        };
 
-       const unsigned int answer[5][5] = { { 42, 87, 63, 48, 83 }, { 32, 31, 53, 75, 76 } };
+       const unsigned int answer[][5] = { { 42, 87, 63, 48, 83 }, { 32, 31, 53, 75, 76 } };
 
        mv_source_h mv_source = NULL;
        int ret = mv_create_source(&mv_source);
@@ -101,7 +103,7 @@ TEST(FacialLandmarkTest, InferenceShouldBeOk)
        ret = ImageHelper::loadImageToSource(IMG_FACE, mv_source);
        ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
-       for (auto model : test_models) {
+       for (auto &model : test_models) {
                cout << "model name : " << model.model_file << endl;
 
                ret = mv_facial_landmark_create(&handle);
@@ -143,6 +145,118 @@ TEST(FacialLandmarkTest, InferenceShouldBeOk)
                ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
        }
 
+       ret = mv_destroy_source(mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+}
+
+TEST(PoseLandmarkTest, GettingAvailableInferenceEnginesInfoShouldBeOk)
+{
+       mv_pose_landmark_h handle;
+
+       int ret = mv_pose_landmark_create(&handle);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       unsigned int engine_count = 0;
+
+       ret = mv_pose_landmark_get_engine_count(handle, &engine_count);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       cout << "Engine count = " << engine_count << endl;
+       ASSERT_GE(engine_count, 1);
+
+       for (unsigned int engine_idx = 0; engine_idx < engine_count; ++engine_idx) {
+               char *engine_type = nullptr;
+
+               ret = mv_pose_landmark_get_engine_type(handle, engine_idx, &engine_type);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               cout << "Engine type : " << engine_type << endl;
+
+               unsigned int device_count = 0;
+
+               ret = mv_pose_landmark_get_device_count(handle, engine_type, &device_count);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               cout << "Device count = " << device_count << endl;
+
+               ASSERT_GE(engine_count, 1);
+
+               for (unsigned int device_idx = 0; device_idx < device_count; ++device_idx) {
+                       char *device_type = nullptr;
+
+                       ret = mv_pose_landmark_get_device_type(handle, engine_type, device_idx, &device_type);
+                       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+                       cout << "Device type : " << device_type << endl;
+               }
+       }
+
+       ret = mv_pose_landmark_destroy(handle);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+}
+
+TEST(PoseLandmarkTest, InferenceShouldBeOk)
+{
+       mv_pose_landmark_h handle;
+       vector<model_info> test_models {
+               { "", "", "", "" }, // If empty then default model will be used.
+               { "PLD_CPM", "pld_cpm_192x192.tflite", "pld_cpm_192x192.json", "" }
+               // TODO.
+       };
+       const unsigned int coordinate_answers[][14] = {
+               { 300, 300, 275, 250, 275, 325, 325, 325, 275, 225, 225, 325, 350, 375 },
+               { 50, 87, 100, 137, 162, 100, 137, 162, 162, 187, 250, 162, 187, 250 }
+       };
+
+       mv_source_h mv_source = NULL;
+       int ret = mv_create_source(&mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       ret = ImageHelper::loadImageToSource(IMG_POSE, mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       for (auto &model : test_models) {
+               cout << "model name : " << model.model_file << endl;
+
+               ret = mv_pose_landmark_create(&handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_pose_landmark_set_model(handle, model.model_name.c_str(), model.model_file.c_str(),
+                                                                                model.meta_file.c_str(), model.label_file.c_str());
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_pose_landmark_set_engine(handle, "tflite", "cpu");
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_pose_landmark_configure(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_pose_landmark_prepare(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_pose_landmark_inference(handle, mv_source);
+               ASSERT_EQ(ret, 0);
+
+               unsigned int number_of_landmarks;
+               unsigned int *x_pos = nullptr, *y_pos = nullptr;
+
+               ret = mv_pose_landmark_get_pos(handle, &number_of_landmarks, &x_pos, &y_pos);
+               ASSERT_EQ(ret, 0);
+
+               for (unsigned int idx = 0; idx < number_of_landmarks; ++idx) {
+                       int distance_x = x_pos[idx] - coordinate_answers[0][idx];
+                       int distance_y = y_pos[idx] - coordinate_answers[1][idx];
+
+                       distance_x = distance_x < 0 ? distance_x * -1 : distance_x;
+                       distance_y = distance_y < 0 ? distance_y * -1 : distance_y;
+
+                       ASSERT_TRUE(distance_x <= 3 && distance_y <= 3);
+               }
+
+               ret = mv_pose_landmark_destroy(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+       }
+
        ret = mv_destroy_source(mv_source);
        ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 }
\ No newline at end of file