From: Inki Dae Date: Mon, 24 Apr 2023 04:06:23 +0000 (+0900) Subject: mv_machine_learning: add pose landmark task API X-Git-Tag: accepted/tizen/unified/20230629.132048^2~5 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e46504ac0d7985bad61cef2a19c0bf97c5dc454c;p=platform%2Fcore%2Fapi%2Fmediavision.git mv_machine_learning: add pose landmark task API [Issue type] : new feature Add pose landmark task API. Pose landmark is one of landmark task groups. Therefore, pose landmark task API is implemented in landmark task group directory but provides separate task API. As a initial model support for pose landmark, this patch implements the CPM(Convolutional Pose Machines) model. Change-Id: Ic55b673619c04873abb496b6670d15ebc79a9f62 Signed-off-by: Inki Dae --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 45329d9c..a808f489 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -234,6 +234,7 @@ if (${ENABLE_ML_LANDMARK_DETECTION}) ) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-landmark-detection.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/landmark_detection/meta/facial_landmark.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name}) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/landmark_detection/meta/pose_landmark.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name}) list(APPEND TOTAL_REQUIRED ${PC_NAME}) list(APPEND TOTAL_LDFLAGS ${PC_LDFLAGS}) endif() diff --git a/include/mv_pose_landmark_internal.h b/include/mv_pose_landmark_internal.h new file mode 100644 index 00000000..50d932d1 --- /dev/null +++ b/include/mv_pose_landmark_internal.h @@ -0,0 +1,281 @@ +/** + * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_POSE_LANDMARK_INTERNAL_H__ +#define __MEDIA_VISION_POSE_LANDMARK_INTERNAL_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @brief Creates pose landmark object handle. + * @details Use this function to create an pose landmark object handle. + * After creation the handle has to be prepared with + * @ref mv_pose_landmark_prepare() function to prepare + * an pose landmark object. + * + * @since_tizen 7.5 + * + * @param[out] out_handle The handle to the pose landmark object to be created + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * + * @post Release @a handle by using + * @ref mv_pose_landmark_destroy() function when it is not needed + * anymore + * + * @see mv_pose_landmark_destroy() + */ +int mv_pose_landmark_create(mv_pose_landmark_h *out_handle); + +/** + * @brief Destroys pose landmark handle and releases all its resources. + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the pose landmark object to be destroyed. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * + * @pre Create an pose landmark handle by using @ref mv_pose_landmark_create() + * + * @see mv_pose_landmark_create() + */ +int mv_pose_landmark_destroy(mv_pose_landmark_h handle); + +/** + * @brief Set user-given model information. + * @details Use this function to change the model information instead of default one after calling @ref mv_pose_landmark_create(). + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the pose landmark object. + * @param[in] model_name Model name. + * @param[in] model_file Model file name. + * @param[in] meta_type Model meta file name. + * @param[in] label_file Label file name. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Create a pose landmark handle by calling @ref mv_pose_landmark_create() + */ +int mv_pose_landmark_set_model(mv_pose_landmark_h handle, const char *model_name, const char *model_file, + const char *meta_file, const char *label_file); + +/** + * @brief Configures the backend to the inference handle + * + * @since_tizen 7.5 + * + * @param [in] handle The handle to the inference + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ +int mv_pose_landmark_configure(mv_pose_landmark_h handle); + +/** + * @brief Prepares inference. + * @details Use this function to prepare inference based on + * the configured network. + * + * @since_tizen 7.5 + * + * @param [in] handle The handle to the inference + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + */ +int mv_pose_landmark_prepare(mv_pose_landmark_h handle); + +/** + * + * @brief Inferences with a given facial on the @a source + * @details Use this function to inference with a given source. + * + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the pose landmark object. + * @param[in] source The handle to the source of the media. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * + * @pre Create a source handle by calling @ref mv_create_source() + * @pre Create an pose landmark handle by calling @ref mv_pose_landmark_create() + * @pre Prepare an inference by calling mv_object_detect_configure() + * @pre Prepare an pose landmark by calling @ref mv_pose_landmark_prepare() + */ +int mv_pose_landmark_inference(mv_pose_landmark_h handle, mv_source_h source); + +/** + * @brief Gets the pose landmark positions on the @a source. + * + * @since_tizen 7.5 + * @remarks pos_x and pos_y arrays are allocated internally by the framework and will remain valid + * until the handle is returned. + * Please do not deallocate them directly, and if you want to use them after the handle is returned, + * please copy them to user memory and use the copy. + * @param[in] handle The handle to the inference + * @param[out] number_of_landmarks A number of landmarks detected. + * @param[out] pos_x An array containing x-coordinate values. + * @param[out] pos_y An array containing y-coordinate values. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + * + * @pre Create a source handle by calling mv_create_source() + * @pre Create an inference handle by calling mv_pose_landmark_create() + * @pre Prepare an inference by calling mv_pose_landmark_configure() + * @pre Prepare an inference by calling mv_pose_landmark_prepare() + * @pre Prepare an inference by calling mv_pose_landmark_inference() + */ +int mv_pose_landmark_get_pos(mv_pose_landmark_h handle, unsigned int *number_of_landmarks, unsigned int **pos_x, + unsigned int **pos_y); + +/** + * @brief Set user-given backend and device types for inference. + * @details Use this function to change the backend and device types for inference instead of default ones after calling @ref mv_pose_landmark_create(). + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the image classification object. + * @param[in] backend_type A string of backend type. + * @param[in] device_type A string of device type. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Creates a image classification handle by calling @ref mv_pose_landmark_create() + */ +int mv_pose_landmark_set_engine(mv_pose_landmark_h handle, const char *backend_type, const char *device_type); + +/** + * @brief Get a number of inference engines available for image classification task API. + * @details Use this function to get how many inference engines are supported for image classification after calling @ref mv_pose_landmark_create(). + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the image classification object. + * @param[out] engine_count A number of inference engines available for image classification API. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Create a image classification handle by calling @ref mv_pose_landmark_create() + */ +int mv_pose_landmark_get_engine_count(mv_pose_landmark_h handle, unsigned int *engine_count); + +/** + * @brief Gets engine type to a given inference engine index. + * @details Use this function to get inference engine type with a given engine index after calling @ref mv_pose_landmark_get_engine_count(). + * + * @since_tizen 7.5 + * @remarks engine_type array is allocated internally by the framework and will remain valid + * until the handle is returned. + * Please do not deallocate it directly, and if you want to use it after the handle is returned, + * please copy it to user memory and use the copy. + * @param[in] handle The handle to the image classification object. + * @param[in] engine_index A inference engine index for getting the inference engine type. + * @param[out] engine_type A string to inference engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Get a number of inference engines available for image classification task API by calling @ref mv_pose_landmark_get_engine_count() + */ +int mv_pose_landmark_get_engine_type(mv_pose_landmark_h handle, const unsigned int engine_index, char **engine_type); + +/** + * @brief Gets a number of device types available to a given inference engine. + * @details Use this function to get how many device types are supported for a given inference engine after calling @ref mv_pose_landmark_create(). + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the image classification object. + * @param[in] engine_type A inference engine string. + * @param[out] device_count A number of device types available for a given inference engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Create a image classification handle by calling @ref mv_pose_landmark_create() + */ +int mv_pose_landmark_get_device_count(mv_pose_landmark_h handle, const char *engine_type, unsigned int *device_count); + +/** + * @brief Gets device type list available. + * @details Use this function to get what device types are supported for current inference engine type after calling @ref mv_pose_landmark_configure(). + * + * @since_tizen 7.5 + * @remarks device_type array is allocated internally by the framework and will remain valid + * until the handle is returned. + * Please do not deallocate it directly, and if you want to use it after the handle is returned, + * please copy it to user memory and use the copy. + * @param[in] handle The handle to the image classification object. + * @param[in] engine_type A inference engine string. + * @param[in] device_index A device index for getting the device type. + * @param[out] device_type A string to device type. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Creates a image classification handle by calling @ref mv_pose_landmark_create() + * @pre Configure image classification task by calling @ref mv_pose_landmark_configure() + */ +int mv_pose_landmark_get_device_type(mv_pose_landmark_h handle, const char *engine_type, + const unsigned int device_index, char **device_type); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __MEDIA_VISION_POSE_LANDMARK_INTERNAL_H__ */ diff --git a/include/mv_pose_landmark_type.h b/include/mv_pose_landmark_type.h new file mode 100644 index 00000000..038f787a --- /dev/null +++ b/include/mv_pose_landmark_type.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TIZEN_MEDIAVISION_MV_POSE_LANDMARK_TYPE_H__ +#define __TIZEN_MEDIAVISION_MV_POSE_LANDMARK_TYPE_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @file mv_pose_landmark_type.h + * @brief This file contains the pose landmark handle for Mediavision. + */ + +/** + * @addtogroup CAPI_MEDIA_VISION_POSE_LANDMARK_MODULE + * @{ + */ + +/** + * @brief The pose landmark object handle. + * + * @since_tizen 7.5 + */ +typedef void *mv_pose_landmark_h; + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __TIZEN_MEDIAVISION_MV_POSE_LANDMARK_TYPE_H__ */ diff --git a/mv_machine_learning/landmark_detection/CMakeLists.txt b/mv_machine_learning/landmark_detection/CMakeLists.txt index 516fa4a0..010b0533 100644 --- a/mv_machine_learning/landmark_detection/CMakeLists.txt +++ b/mv_machine_learning/landmark_detection/CMakeLists.txt @@ -24,4 +24,6 @@ install( FILES_MATCHING PATTERN "mv_facial_landmark_internal.h" PATTERN "mv_facial_landmark_type.h" + PATTERN "mv_pose_landmark_internal.h" + PATTERN "mv_pose_landmark_type.h" ) diff --git a/mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h b/mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h index 13e87c18..6f60474a 100644 --- a/mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h +++ b/mv_machine_learning/landmark_detection/include/fld_tweak_cnn.h @@ -32,13 +32,13 @@ namespace machine_learning class FldTweakCnn : public LandmarkDetection { private: - FacialLandmarkResult _result; + LandmarkDetectionResult _result; public: FldTweakCnn(LandmarkDetectionTaskType task_type); ~FldTweakCnn(); - FacialLandmarkResult &result() override; + LandmarkDetectionResult &result() override; }; } // machine_learning diff --git a/mv_machine_learning/landmark_detection/include/landmark_detection.h b/mv_machine_learning/landmark_detection/include/landmark_detection.h index 0b378bbf..89b718ba 100644 --- a/mv_machine_learning/landmark_detection/include/landmark_detection.h +++ b/mv_machine_learning/landmark_detection/include/landmark_detection.h @@ -55,6 +55,7 @@ protected: std::string _modelLabelFilePath; int _backendType; int _targetDeviceType; + double _confidence_threshold; void getOutputNames(std::vector &names); void getOutputTensor(std::string target_name, std::vector &tensor); @@ -74,7 +75,7 @@ public: void prepare(); void preprocess(mv_source_h &mv_src); void inference(mv_source_h source); - virtual FacialLandmarkResult &result() = 0; + virtual LandmarkDetectionResult &result() = 0; }; } // machine_learning diff --git a/mv_machine_learning/landmark_detection/include/landmark_detection_type.h b/mv_machine_learning/landmark_detection/include/landmark_detection_type.h index 2b3d3ea0..6a208095 100644 --- a/mv_machine_learning/landmark_detection/include/landmark_detection_type.h +++ b/mv_machine_learning/landmark_detection/include/landmark_detection_type.h @@ -31,15 +31,18 @@ struct LandmarkDetectionInput { // TODO. }; -struct FacialLandmarkResult { +struct LandmarkDetectionResult { unsigned int number_of_landmarks; std::vector x_pos; std::vector y_pos; + std::vector scores; + std::vector labels; }; enum class LandmarkDetectionTaskType { LANDMARK_DETECTION_TASK_NONE = 0, FLD_TWEAK_CNN, + PLD_CPM // TODO }; diff --git a/mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h b/mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h index 8957553e..2266bcfa 100644 --- a/mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h +++ b/mv_machine_learning/landmark_detection/include/mv_landmark_detection_config.h @@ -62,4 +62,6 @@ */ #define MV_LANDMARK_DETECTION_TARGET_DEVICE_TYPE "TARGET_DEVICE_TYPE" +#define MV_LANDMARK_DETECTION_CONFIDENCE_THRESHOLD "CONFIDENCE_THRESHOLD" + #endif /* __MEDIA_VISION_LANDMARK_DETECTION_CONFIG_H__ */ diff --git a/mv_machine_learning/landmark_detection/include/mv_pose_landmark_open.h b/mv_machine_learning/landmark_detection/include/mv_pose_landmark_open.h new file mode 100644 index 00000000..5158c9af --- /dev/null +++ b/mv_machine_learning/landmark_detection/include/mv_pose_landmark_open.h @@ -0,0 +1,283 @@ +/** + * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_POSE_LANDMARK_OPEN_H__ +#define __MEDIA_VISION_POSE_LANDMARK_OPEN_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @brief Creates pose landmark object handle. + * @details Use this function to create an pose landmark object handle. + * After creation the handle has to be prepared with + * @ref mv_pose_landmark_prepare_open() function to prepare + * an pose landmark object. + * + * @since_tizen 7.5 + * + * @param[out] out_handle The handle to the pose landmark object to be created + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * + * @post Release @a handle by using + * @ref mv_pose_landmark_destroy_open() function when it is not needed + * anymore + * + * @see mv_pose_landmark_destroy_open() + */ +int mv_pose_landmark_create_open(mv_pose_landmark_h *out_handle); + +/** + * @brief Destroys pose landmark handle and releases all its resources. + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the pose landmark object to be destroyed. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * + * @pre Create an pose landmark handle by using @ref mv_pose_landmark_create_open() + * + * @see mv_pose_landmark_create_open() + */ +int mv_pose_landmark_destroy_open(mv_pose_landmark_h handle); + +/** + * @brief Set user-given model information. + * @details Use this function to change the model information instead of default one after calling @ref mv_pose_landmark_create(). + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the pose landmark object. + * @param[in] model_name Model name. + * @param[in] model_file Model file name. + * @param[in] meta_type Model meta file name. + * @param[in] label_file Label file name. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Create a pose landmark handle by calling @ref mv_pose_landmark_create() + */ +int mv_pose_landmark_set_model_open(mv_pose_landmark_h handle, const char *model_name, const char *model_file, + const char *meta_file, const char *label_file); + +/** + * @brief Configures the backend to the inference handle + * + * @since_tizen 7.5 + * + * @param [in] handle The handle to the inference + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ +int mv_pose_landmark_configure_open(mv_pose_landmark_h handle); + +/** + * @brief Prepares inference. + * @details Use this function to prepare inference based on + * the configured network. + * + * @since_tizen 7.5 + * + * @param [in] handle The handle to the inference + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + */ +int mv_pose_landmark_prepare_open(mv_pose_landmark_h handle); + +/** + * + * @brief Inferences with a given raw image on the @a source + * @details Use this function to inference with a given source. + * + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the pose landmark object. + * @param[in] source The handle to the source of the media. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * + * @pre Create a source handle by calling @ref mv_create_source() + * @pre Create an pose landmark handle by calling @ref mv_pose_landmark_create_open() + * @pre Prepare an inference by calling mv_object_detect_configure_open() + * @pre Prepare an pose landmark by calling @ref mv_pose_landmark_prepare_open() + */ +int mv_pose_landmark_inference_open(mv_pose_landmark_h handle, mv_source_h source); + +/** + * @brief Gets the pose landmark positions on the @a source. + * + * @since_tizen 7.5 + * @remarks pos_x and pos_y arrays are allocated internally by the framework and will remain valid + * until the handle is returned. + * Please do not deallocate them directly, and if you want to use them after the handle is returned, + * please copy them to user memory and use the copy. + * @param[in] handle The handle to the inference + * @param[out] number_of_landmarks A number of landmarks detected. + * @param[out] pos_x An array containing x-coordinate values. + * @param[out] pos_y An array containing y-coordinate values. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + * + * @pre Create a source handle by calling mv_create_source() + * @pre Create an inference handle by calling mv_pose_landmark_create() + * @pre Prepare an inference by calling mv_pose_landmark_configure() + * @pre Prepare an inference by calling mv_pose_landmark_prepare() + * @pre Prepare an inference by calling mv_pose_landmark_inference() + */ +int mv_pose_landmark_get_pos_open(mv_pose_landmark_h handle, unsigned int *number_of_landmarks, unsigned int **pos_x, + unsigned int **pos_y); + +/** + * @brief Set user-given backend and device types for inference. + * @details Use this function to change the backend and device types for inference instead of default ones after calling @ref mv_pose_landmark_create_open(). + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the image classification object. + * @param[in] backend_type A string of backend type. + * @param[in] device_type A string of device type. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Creates a image classification handle by calling @ref mv_pose_landmark_create_open() + */ +int mv_pose_landmark_set_engine_open(mv_pose_landmark_h handle, const char *backend_type, const char *device_type); + +/** + * @brief Get a number of inference engines available for image classification task API. + * @details Use this function to get how many inference engines are supported for image classification after calling @ref mv_pose_landmark_create_open(). + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the image classification object. + * @param[out] engine_count A number of inference engines available for image classification API. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Create a image classification handle by calling @ref mv_pose_landmark_create_open() + */ +int mv_pose_landmark_get_engine_count_open(mv_pose_landmark_h handle, unsigned int *engine_count); + +/** + * @brief Gets engine type to a given inference engine index. + * @details Use this function to get inference engine type with a given engine index after calling @ref mv_pose_landmark_get_engine_count(). + * + * @since_tizen 7.5 + * @remarks engine_type array is allocated internally by the framework and will remain valid + * until the handle is returned. + * Please do not deallocate it directly, and if you want to use it after the handle is returned, + * please copy it to user memory and use the copy. + * @param[in] handle The handle to the image classification object. + * @param[in] engine_index A inference engine index for getting the inference engine type. + * @param[out] engine_type A string to inference engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Get a number of inference engines available for image classification task API by calling @ref mv_pose_landmark_get_engine_count() + */ +int mv_pose_landmark_get_engine_type_open(mv_pose_landmark_h handle, const unsigned int engine_index, + char **engine_type); + +/** + * @brief Gets a number of device types available to a given inference engine. + * @details Use this function to get how many device types are supported for a given inference engine after calling @ref mv_pose_landmark_create_open(). + * + * @since_tizen 7.5 + * + * @param[in] handle The handle to the image classification object. + * @param[in] engine_type A inference engine string. + * @param[out] device_count A number of device types available for a given inference engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Create a image classification handle by calling @ref mv_pose_landmark_create_open() + */ +int mv_pose_landmark_get_device_count_open(mv_pose_landmark_h handle, const char *engine_type, + unsigned int *device_count); + +/** + * @brief Gets device type list available. + * @details Use this function to get what device types are supported for current inference engine type after calling @ref mv_pose_landmark_configure(). + * + * @since_tizen 7.5 + * @remarks device_type array is allocated internally by the framework and will remain valid + * until the handle is returned. + * Please do not deallocate it directly, and if you want to use it after the handle is returned, + * please copy it to user memory and use the copy. + * @param[in] handle The handle to the image classification object. + * @param[in] engine_type A inference engine string. + * @param[in] device_index A device index for getting the device type. + * @param[out] device_type A string to device type. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Creates a image classification handle by calling @ref mv_pose_landmark_create_open() + * @pre Configure image classification task by calling @ref mv_pose_landmark_configure_open() + */ +int mv_pose_landmark_get_device_type_open(mv_pose_landmark_h handle, const char *engine_type, + const unsigned int device_index, char **device_type); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */ diff --git a/mv_machine_learning/landmark_detection/include/pld_cpm.h b/mv_machine_learning/landmark_detection/include/pld_cpm.h new file mode 100644 index 00000000..ba9defb7 --- /dev/null +++ b/mv_machine_learning/landmark_detection/include/pld_cpm.h @@ -0,0 +1,47 @@ +/** + * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __PLD_CPM_H__ +#define __PLD_CPM_H__ + +#include +#include +#include +#include "mv_private.h" + +#include "landmark_detection.h" +#include + +namespace mediavision +{ +namespace machine_learning +{ +class PldCpm : public LandmarkDetection +{ +private: + LandmarkDetectionResult _result; + +public: + PldCpm(LandmarkDetectionTaskType task_type); + ~PldCpm(); + + LandmarkDetectionResult &result() override; +}; + +} // machine_learning +} // mediavision + +#endif \ No newline at end of file diff --git a/mv_machine_learning/landmark_detection/include/pose_landmark_adapter.h b/mv_machine_learning/landmark_detection/include/pose_landmark_adapter.h new file mode 100644 index 00000000..92613fe4 --- /dev/null +++ b/mv_machine_learning/landmark_detection/include/pose_landmark_adapter.h @@ -0,0 +1,63 @@ +/** + * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __POSE_LANDMARK_ADAPTER_H__ +#define __POSE_LANDMARK_ADAPTER_H__ + +#include + +#include "EngineConfig.h" +#include "itask.h" +#include "pld_cpm.h" + +namespace mediavision +{ +namespace machine_learning +{ +template class PoseLandmarkAdapter : public mediavision::common::ITask +{ +private: + std::unique_ptr _landmark_detection; + T _source; + std::string _model_name; + std::string _model_file; + std::string _meta_file; + std::string _label_file; + +public: + PoseLandmarkAdapter(); + ~PoseLandmarkAdapter(); + + void create(int type) override; + + void setModelInfo(const char *model_file, const char *meta_file, const char *label_file, + const char *model_name) override; + void setEngineInfo(const char *engine_type, const char *device_type) override; + void configure() override; + void getNumberOfEngines(unsigned int *number_of_engines) override; + void getEngineType(unsigned int engine_index, char **engine_type) override; + void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices) override; + void getDeviceType(const char *engine_type, unsigned int device_index, char **device_type) override; + void prepare() override; + void setInput(T &t) override; + void perform() override; + V &getOutput() override; +}; + +} // machine_learning +} // mediavision + +#endif \ No newline at end of file diff --git a/mv_machine_learning/landmark_detection/meta/pose_landmark.json b/mv_machine_learning/landmark_detection/meta/pose_landmark.json new file mode 100644 index 00000000..787b3a8c --- /dev/null +++ b/mv_machine_learning/landmark_detection/meta/pose_landmark.json @@ -0,0 +1,40 @@ +{ + "attributes": + [ + { + "name" : "MODEL_DEFAULT_PATH", + "type" : "string", + "value" : "/opt/usr/globalapps/mediavision.landmark.detection/models/tflite/" + }, + { + "name" : "MODEL_FILE_NAME", + "type" : "string", + "value" : "pld_cpm_192x192.tflite" + }, + { + "name" : "META_FILE_NAME", + "type" : "string", + "value" : "pld_cpm_192x192.json" + }, + { + "name" : "LABEL_FILE_NAME", + "type" : "string", + "value" : "" + }, + { + "name" : "BACKEND_TYPE", + "type" : "integer", + "value" : 1 + }, + { + "name" : "TARGET_DEVICE_TYPE", + "type" : "integer", + "value" : 1 + }, + { + "name" : "CONFIDENCE_THRESHOLD", + "type" : "double", + "value" : 0.6 + } + ] +} diff --git a/mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp b/mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp index 5d8b4130..b633370d 100644 --- a/mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp +++ b/mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp @@ -140,6 +140,6 @@ template V &FacialLandmarkAdapter::getOutput() return _landmark_detection->result(); } -template class FacialLandmarkAdapter; +template class FacialLandmarkAdapter; } } \ No newline at end of file diff --git a/mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp b/mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp index ae90e6dd..b391f265 100644 --- a/mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp +++ b/mv_machine_learning/landmark_detection/src/fld_tweak_cnn.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include "machine_learning_exception.h" #include "mv_landmark_detection_config.h" @@ -38,7 +37,7 @@ FldTweakCnn::FldTweakCnn(LandmarkDetectionTaskType task_type) : LandmarkDetectio FldTweakCnn::~FldTweakCnn() {} -FacialLandmarkResult &FldTweakCnn::result() +LandmarkDetectionResult &FldTweakCnn::result() { constexpr static unsigned int numberOfLandmarks = 5; diff --git a/mv_machine_learning/landmark_detection/src/landmark_detection.cpp b/mv_machine_learning/landmark_detection/src/landmark_detection.cpp index 58c5f73a..f19c8576 100644 --- a/mv_machine_learning/landmark_detection/src/landmark_detection.cpp +++ b/mv_machine_learning/landmark_detection/src/landmark_detection.cpp @@ -231,6 +231,10 @@ void LandmarkDetection::parseMetaFile(const char *meta_file_name) _modelLabelFilePath = _modelDefaultPath + _modelLabelFilePath; LOGI("label file path = %s", _modelLabelFilePath.c_str()); + ret = _config->getDoubleAttribute(MV_LANDMARK_DETECTION_CONFIDENCE_THRESHOLD, &_confidence_threshold); + if (ret != MEDIA_VISION_ERROR_NONE) + LOGW("threshold value doesn't exist."); + loadLabel(); } diff --git a/mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp b/mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp index a4c8d752..79d88c6e 100644 --- a/mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp +++ b/mv_machine_learning/landmark_detection/src/mv_facial_landmark_open.cpp @@ -35,7 +35,7 @@ using namespace mediavision::common; using namespace mediavision::machine_learning; using namespace MediaVision::Common; using namespace mediavision::machine_learning::exception; -using LandmarkDetectionTask = ITask; +using LandmarkDetectionTask = ITask; static mutex g_facial_landmark_mutex; @@ -51,7 +51,7 @@ int mv_facial_landmark_create_open(mv_facial_landmark_h *handle) try { context = new Context(); - task = new FacialLandmarkAdapter(); + task = new FacialLandmarkAdapter(); context->__tasks.insert(make_pair("facial_landmark", task)); *handle = static_cast(context); } catch (const BaseException &e) { @@ -335,7 +335,7 @@ int mv_facial_landmark_get_positions_open(mv_facial_landmark_h handle, unsigned auto context = static_cast(handle); auto task = static_cast(context->__tasks.at("facial_landmark")); - FacialLandmarkResult &result = task->getOutput(); + LandmarkDetectionResult &result = task->getOutput(); *number_of_landmarks = result.number_of_landmarks; *pos_x = result.x_pos.data(); *pos_y = result.y_pos.data(); diff --git a/mv_machine_learning/landmark_detection/src/mv_pose_landmark.c b/mv_machine_learning/landmark_detection/src/mv_pose_landmark.c new file mode 100644 index 00000000..ed52827b --- /dev/null +++ b/mv_machine_learning/landmark_detection/src/mv_pose_landmark.c @@ -0,0 +1,212 @@ +/** + * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "mv_pose_landmark_internal.h" +#include "mv_pose_landmark_open.h" + +/** + * @file mv_pose_landmark.c + * @brief This file contains Media Vision inference module. + */ + +int mv_pose_landmark_create(mv_pose_landmark_h *handle) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_NULL_ARG_CHECK(handle); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_create_open(handle); + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_pose_landmark_destroy(mv_pose_landmark_h handle) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(handle); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_destroy_open(handle); + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_pose_landmark_set_model(mv_pose_landmark_h handle, const char *model_name, const char *model_file, + const char *meta_file, const char *label_file) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported()); + + MEDIA_VISION_INSTANCE_CHECK(handle); + MEDIA_VISION_INSTANCE_CHECK(model_name); + MEDIA_VISION_NULL_ARG_CHECK(model_file); + MEDIA_VISION_NULL_ARG_CHECK(meta_file); + MEDIA_VISION_NULL_ARG_CHECK(label_file); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_set_model_open(handle, model_name, model_file, meta_file, label_file); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_pose_landmark_set_engine(mv_pose_landmark_h handle, const char *backend_type, const char *device_type) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported()); + + MEDIA_VISION_INSTANCE_CHECK(handle); + MEDIA_VISION_NULL_ARG_CHECK(backend_type); + MEDIA_VISION_NULL_ARG_CHECK(device_type); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_set_engine_open(handle, backend_type, device_type); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_pose_landmark_get_engine_count(mv_pose_landmark_h handle, unsigned int *engine_count) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported()); + + MEDIA_VISION_INSTANCE_CHECK(handle); + MEDIA_VISION_NULL_ARG_CHECK(engine_count); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_get_engine_count_open(handle, engine_count); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_pose_landmark_get_engine_type(mv_pose_landmark_h handle, const unsigned int engine_index, char **engine_type) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported()); + + MEDIA_VISION_INSTANCE_CHECK(handle); + MEDIA_VISION_NULL_ARG_CHECK(engine_type); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_get_engine_type_open(handle, engine_index, engine_type); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_pose_landmark_get_device_count(mv_pose_landmark_h handle, const char *engine_type, unsigned int *device_count) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported()); + + MEDIA_VISION_INSTANCE_CHECK(handle); + MEDIA_VISION_NULL_ARG_CHECK(device_count); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_get_device_count_open(handle, engine_type, device_count); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_pose_landmark_get_device_type(mv_pose_landmark_h handle, const char *engine_type, + const unsigned int device_index, char **device_type) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported()); + + MEDIA_VISION_INSTANCE_CHECK(handle); + MEDIA_VISION_NULL_ARG_CHECK(engine_type); + MEDIA_VISION_NULL_ARG_CHECK(device_type); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_get_device_type_open(handle, engine_type, device_index, device_type); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_pose_landmark_configure(mv_pose_landmark_h handle) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(handle); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_configure_open(handle); + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_pose_landmark_prepare(mv_pose_landmark_h handle) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(handle); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_prepare_open(handle); + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_pose_landmark_inference(mv_pose_landmark_h handle, mv_source_h source) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_image_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(handle); + MEDIA_VISION_INSTANCE_CHECK(source); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_inference_open(handle, source); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_pose_landmark_get_pos(mv_pose_landmark_h handle, unsigned int *number_of_landmarks, unsigned int **pos_x, + unsigned int **pos_y) +{ + MEDIA_VISION_SUPPORT_CHECK(_mv_inference_image_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(handle); + MEDIA_VISION_INSTANCE_CHECK(number_of_landmarks); + MEDIA_VISION_INSTANCE_CHECK(pos_x); + MEDIA_VISION_INSTANCE_CHECK(pos_y); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = mv_pose_landmark_get_pos_open(handle, number_of_landmarks, pos_x, pos_y); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} diff --git a/mv_machine_learning/landmark_detection/src/mv_pose_landmark_open.cpp b/mv_machine_learning/landmark_detection/src/mv_pose_landmark_open.cpp new file mode 100644 index 00000000..5fe40d90 --- /dev/null +++ b/mv_machine_learning/landmark_detection/src/mv_pose_landmark_open.cpp @@ -0,0 +1,350 @@ +/** + * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "itask.h" +#include "mv_pose_landmark_open.h" +#include "pose_landmark_adapter.h" +#include "machine_learning_exception.h" +#include "landmark_detection_type.h" +#include "context.h" + +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace mediavision::inference; +using namespace mediavision::common; +using namespace mediavision::machine_learning; +using namespace MediaVision::Common; +using namespace mediavision::machine_learning::exception; +using LandmarkDetectionTask = ITask; + +static mutex g_pose_landmark_mutex; + +int mv_pose_landmark_create_open(mv_pose_landmark_h *handle) +{ + if (!handle) { + LOGE("Handle can't be created because handle pointer is NULL"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + Context *context = nullptr; + LandmarkDetectionTask *task = nullptr; + + try { + context = new Context(); + task = new PoseLandmarkAdapter(); + context->__tasks.insert(make_pair("pose_landmark", task)); + *handle = static_cast(context); + } catch (const BaseException &e) { + delete task; + delete context; + return e.getError(); + } + + LOGD("pose landmark handle [%p] has been created", *handle); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_destroy_open(mv_pose_landmark_h handle) +{ + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + auto context = static_cast(handle); + + for (auto &m : context->__tasks) + delete static_cast(m.second); + + delete context; + + LOGD("pose landmark handle has been destroyed."); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_set_model_open(mv_pose_landmark_h handle, const char *model_name, const char *model_file, + const char *meta_file, const char *label_file) +{ + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks.at("pose_landmark")); + + task->setModelInfo(model_file, meta_file, label_file, model_name); + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_set_engine_open(mv_pose_landmark_h handle, const char *backend_type, const char *device_type) +{ + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks.at("pose_landmark")); + + task->setEngineInfo(backend_type, device_type); + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_get_engine_count_open(mv_pose_landmark_h handle, unsigned int *engine_count) +{ + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks.at("pose_landmark")); + + task->getNumberOfEngines(engine_count); + // TODO. + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_get_engine_type_open(mv_pose_landmark_h handle, const unsigned int engine_index, + char **engine_type) +{ + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks.at("pose_landmark")); + + task->getEngineType(engine_index, engine_type); + // TODO. + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_get_device_count_open(mv_pose_landmark_h handle, const char *engine_type, + unsigned int *device_count) +{ + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks.at("pose_landmark")); + + task->getNumberOfDevices(engine_type, device_count); + // TODO. + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_get_device_type_open(mv_pose_landmark_h handle, const char *engine_type, + const unsigned int device_index, char **device_type) +{ + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks.at("pose_landmark")); + + task->getDeviceType(engine_type, device_index, device_type); + // TODO. + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_configure_open(mv_pose_landmark_h handle) +{ + LOGD("ENTER"); + + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks.at("pose_landmark")); + + task->configure(); + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_prepare_open(mv_pose_landmark_h handle) +{ + LOGD("ENTER"); + + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks.at("pose_landmark")); + + task->prepare(); + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_inference_open(mv_pose_landmark_h handle, mv_source_h source) +{ + LOGD("ENTER"); + + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks.at("pose_landmark")); + + LandmarkDetectionInput input = { source }; + + task->setInput(input); + task->perform(); + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_landmark_get_pos_open(mv_pose_landmark_h handle, unsigned int *number_of_landmarks, unsigned int **pos_x, + unsigned int **pos_y) +{ + LOGD("ENTER"); + + lock_guard lock(g_pose_landmark_mutex); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks.at("pose_landmark")); + + LandmarkDetectionResult &result = task->getOutput(); + *number_of_landmarks = result.number_of_landmarks; + *pos_x = result.x_pos.data(); + *pos_y = result.y_pos.data(); + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} diff --git a/mv_machine_learning/landmark_detection/src/pld_cpm.cpp b/mv_machine_learning/landmark_detection/src/pld_cpm.cpp new file mode 100644 index 00000000..d16fe174 --- /dev/null +++ b/mv_machine_learning/landmark_detection/src/pld_cpm.cpp @@ -0,0 +1,112 @@ +/** + * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "machine_learning_exception.h" +#include "mv_landmark_detection_config.h" +#include "pld_cpm.h" +#include "Postprocess.h" + +using namespace std; +using namespace mediavision::inference; +using namespace mediavision::machine_learning::exception; + +namespace mediavision +{ +namespace machine_learning +{ +PldCpm::PldCpm(LandmarkDetectionTaskType task_type) : LandmarkDetection(task_type), _result() +{} + +PldCpm::~PldCpm() +{} + +LandmarkDetectionResult &PldCpm::result() +{ + // Clear _result object because result() function can be called every time user wants + // so make sure to clear existing result data before getting the data again. + memset(reinterpret_cast(&_result), 0, sizeof(_result)); + + vector names; + + LandmarkDetection::getOutputNames(names); + + auto scoreMetaInfo = _parser->getOutputMetaMap().at(names[0]); + auto decodingLandmark = + static_pointer_cast(scoreMetaInfo->decodingTypeMap[DecodingType::LANDMARK]); + + if (decodingLandmark->decoding_type != LandmarkDecodingType::HEATMAP) + throw InvalidOperation("decoding type not support."); + + if (decodingLandmark->coordinate_type != LandmarkCoordinateType::PIXEL) + throw InvalidOperation("coordinate type not support."); + + if (decodingLandmark->landmark_type != LandmarkType::SINGLE_2D) + throw InvalidOperation("landmark type not support."); + + auto heatMapWidth = scoreMetaInfo->dims[2]; + auto heatMapHeight = scoreMetaInfo->dims[1]; + auto heatMapChannel = scoreMetaInfo->dims[3]; + vector score_tensor; + + _result.number_of_landmarks = heatMapChannel; + + LandmarkDetection::getOutputTensor(names[0], score_tensor); + + auto ori_src_width = static_cast(_preprocess.getImageWidth()[0]); + auto ori_src_height = static_cast(_preprocess.getImageHeight()[0]); + auto width_ratio = ori_src_width / static_cast(heatMapWidth); + auto height_ratio = ori_src_height / static_cast(heatMapHeight); + + for (auto c = 0; c < heatMapChannel; ++c) { + float max_score = 0.0f; + int max_x = 0; + int max_y = 0; + + for (auto y = 0; y < heatMapHeight; ++y) { + for (auto x = 0; x < heatMapWidth; ++x) { + auto score = score_tensor[y * heatMapWidth * heatMapChannel + x * heatMapChannel + c]; + if (score < _confidence_threshold) + continue; + + if (max_score < score) { + max_score = score; + max_x = x; + max_y = y; + } + } + } + + if (max_score == 0.0f) { + // If max_score is 0 then it means that all score values of current heatmap is 0 so + // ignore the scores that do not meet the threshold. + _result.number_of_landmarks--; + continue; + } + + _result.x_pos.push_back(static_cast(static_cast(max_x) * width_ratio)); + _result.y_pos.push_back(static_cast(static_cast(max_y) * height_ratio)); + _result.scores.push_back(max_score); + } + + return _result; +} + +} +} \ No newline at end of file diff --git a/mv_machine_learning/landmark_detection/src/pose_landmark_adapter.cpp b/mv_machine_learning/landmark_detection/src/pose_landmark_adapter.cpp new file mode 100644 index 00000000..3dfce334 --- /dev/null +++ b/mv_machine_learning/landmark_detection/src/pose_landmark_adapter.cpp @@ -0,0 +1,145 @@ +/** + * Copyright (c) 2023 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "machine_learning_exception.h" +#include "pose_landmark_adapter.h" + +using namespace std; +using namespace MediaVision::Common; +using namespace mediavision::machine_learning; +using namespace mediavision::machine_learning::exception; + +namespace mediavision +{ +namespace machine_learning +{ +template PoseLandmarkAdapter::PoseLandmarkAdapter() : _source() +{ + // In default, Mobilenet v1 ssd model will be used. + // If other model is set by user then strategy pattern will be used + // to create its corresponding concerte class by calling create(). + _landmark_detection = make_unique(LandmarkDetectionTaskType::PLD_CPM); +} + +template PoseLandmarkAdapter::~PoseLandmarkAdapter() +{} + +template void PoseLandmarkAdapter::create(int type) +{ + LandmarkDetectionTaskType task_type = static_cast(type); + + // If default task type is same as a given one then skip. + if (_landmark_detection->getTaskType() == task_type) + return; + + _landmark_detection.reset(); + + if (task_type == LandmarkDetectionTaskType::PLD_CPM) + _landmark_detection = make_unique(task_type); + // TODO. +} + +template +void PoseLandmarkAdapter::setModelInfo(const char *model_file, const char *meta_file, const char *label_file, + const char *model_name) +{ + string model_name_str(model_name); + + if (!model_name_str.empty()) { + transform(model_name_str.begin(), model_name_str.end(), model_name_str.begin(), ::toupper); + + int model_type = 0; + + if (model_name_str == string("PLD_CPM")) + model_type = static_cast(LandmarkDetectionTaskType::PLD_CPM); + // TODO. + else + throw InvalidParameter("Invalid landmark detection model name."); + + create(static_cast(model_type)); + } + + _model_file = string(model_file); + _meta_file = string(meta_file); + _label_file = string(label_file); + + if (_model_file.empty() && _meta_file.empty()) { + LOGW("Given model info is invalid so default model info will be used instead."); + return; + } + + _landmark_detection->setUserModel(_model_file, _meta_file, _label_file); +} + +template +void PoseLandmarkAdapter::setEngineInfo(const char *engine_type, const char *device_type) +{ + _landmark_detection->setEngineInfo(string(engine_type), string(device_type)); +} + +template void PoseLandmarkAdapter::configure() +{ + _landmark_detection->parseMetaFile("pose_landmark.json"); + _landmark_detection->configure(); +} + +template void PoseLandmarkAdapter::getNumberOfEngines(unsigned int *number_of_engines) +{ + _landmark_detection->getNumberOfEngines(number_of_engines); +} + +template +void PoseLandmarkAdapter::getEngineType(unsigned int engine_index, char **engine_type) +{ + _landmark_detection->getEngineType(engine_index, engine_type); +} + +template +void PoseLandmarkAdapter::getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices) +{ + _landmark_detection->getNumberOfDevices(engine_type, number_of_devices); +} + +template +void PoseLandmarkAdapter::getDeviceType(const char *engine_type, unsigned int device_index, char **device_type) +{ + _landmark_detection->getDeviceType(engine_type, device_index, device_type); +} + +template void PoseLandmarkAdapter::prepare() +{ + _landmark_detection->prepare(); +} + +template void PoseLandmarkAdapter::setInput(T &t) +{ + _source = t; +} + +template void PoseLandmarkAdapter::perform() +{ + _landmark_detection->preprocess(_source.inference_src); + _landmark_detection->inference(_source.inference_src); +} + +template V &PoseLandmarkAdapter::getOutput() +{ + return _landmark_detection->result(); +} + +template class PoseLandmarkAdapter; +} +} \ No newline at end of file diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 39996841..623ed6d1 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -407,6 +407,7 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d %endif %if "%{enable_ml_landmark_detection}" == "1" %{_datadir}/%{name}/facial_landmark.json +%{_datadir}/%{name}/pose_landmark.json %{_libdir}/libmv_landmark_detection.so %endif @@ -438,6 +439,8 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d %if "%{enable_ml_landmark_detection}" == "1" %{_includedir}/media/mv_facial_landmark_internal.h %{_includedir}/media/mv_facial_landmark_type.h +%{_includedir}/media/mv_pose_landmark_internal.h +%{_includedir}/media/mv_pose_landmark_type.h %{_libdir}/pkgconfig/*landmark-detection.pc %endif diff --git a/test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp b/test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp index d0b01d57..12022f11 100644 --- a/test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp +++ b/test/testsuites/machine_learning/landmark_detection/test_landmark_detection.cpp @@ -22,8 +22,10 @@ #include "ImageHelper.h" #include "mv_facial_landmark_internal.h" +#include "mv_pose_landmark_internal.h" #define IMG_FACE MV_CONFIG_PATH "res/inference/images/faceLandmark.jpg" +#define IMG_POSE MV_CONFIG_PATH "res/inference/images/poseLandmark.jpg" using namespace testing; using namespace std; @@ -92,7 +94,7 @@ TEST(FacialLandmarkTest, InferenceShouldBeOk) // TODO. }; - const unsigned int answer[5][5] = { { 42, 87, 63, 48, 83 }, { 32, 31, 53, 75, 76 } }; + const unsigned int answer[][5] = { { 42, 87, 63, 48, 83 }, { 32, 31, 53, 75, 76 } }; mv_source_h mv_source = NULL; int ret = mv_create_source(&mv_source); @@ -101,7 +103,7 @@ TEST(FacialLandmarkTest, InferenceShouldBeOk) ret = ImageHelper::loadImageToSource(IMG_FACE, mv_source); ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); - for (auto model : test_models) { + for (auto &model : test_models) { cout << "model name : " << model.model_file << endl; ret = mv_facial_landmark_create(&handle); @@ -143,6 +145,118 @@ TEST(FacialLandmarkTest, InferenceShouldBeOk) ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); } + ret = mv_destroy_source(mv_source); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); +} + +TEST(PoseLandmarkTest, GettingAvailableInferenceEnginesInfoShouldBeOk) +{ + mv_pose_landmark_h handle; + + int ret = mv_pose_landmark_create(&handle); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + unsigned int engine_count = 0; + + ret = mv_pose_landmark_get_engine_count(handle, &engine_count); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + cout << "Engine count = " << engine_count << endl; + ASSERT_GE(engine_count, 1); + + for (unsigned int engine_idx = 0; engine_idx < engine_count; ++engine_idx) { + char *engine_type = nullptr; + + ret = mv_pose_landmark_get_engine_type(handle, engine_idx, &engine_type); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + cout << "Engine type : " << engine_type << endl; + + unsigned int device_count = 0; + + ret = mv_pose_landmark_get_device_count(handle, engine_type, &device_count); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + cout << "Device count = " << device_count << endl; + + ASSERT_GE(engine_count, 1); + + for (unsigned int device_idx = 0; device_idx < device_count; ++device_idx) { + char *device_type = nullptr; + + ret = mv_pose_landmark_get_device_type(handle, engine_type, device_idx, &device_type); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + cout << "Device type : " << device_type << endl; + } + } + + ret = mv_pose_landmark_destroy(handle); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); +} + +TEST(PoseLandmarkTest, InferenceShouldBeOk) +{ + mv_pose_landmark_h handle; + vector test_models { + { "", "", "", "" }, // If empty then default model will be used. + { "PLD_CPM", "pld_cpm_192x192.tflite", "pld_cpm_192x192.json", "" } + // TODO. + }; + const unsigned int coordinate_answers[][14] = { + { 300, 300, 275, 250, 275, 325, 325, 325, 275, 225, 225, 325, 350, 375 }, + { 50, 87, 100, 137, 162, 100, 137, 162, 162, 187, 250, 162, 187, 250 } + }; + + mv_source_h mv_source = NULL; + int ret = mv_create_source(&mv_source); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = ImageHelper::loadImageToSource(IMG_POSE, mv_source); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + for (auto &model : test_models) { + cout << "model name : " << model.model_file << endl; + + ret = mv_pose_landmark_create(&handle); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = mv_pose_landmark_set_model(handle, model.model_name.c_str(), model.model_file.c_str(), + model.meta_file.c_str(), model.label_file.c_str()); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = mv_pose_landmark_set_engine(handle, "tflite", "cpu"); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = mv_pose_landmark_configure(handle); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = mv_pose_landmark_prepare(handle); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = mv_pose_landmark_inference(handle, mv_source); + ASSERT_EQ(ret, 0); + + unsigned int number_of_landmarks; + unsigned int *x_pos = nullptr, *y_pos = nullptr; + + ret = mv_pose_landmark_get_pos(handle, &number_of_landmarks, &x_pos, &y_pos); + ASSERT_EQ(ret, 0); + + for (unsigned int idx = 0; idx < number_of_landmarks; ++idx) { + int distance_x = x_pos[idx] - coordinate_answers[0][idx]; + int distance_y = y_pos[idx] - coordinate_answers[1][idx]; + + distance_x = distance_x < 0 ? distance_x * -1 : distance_x; + distance_y = distance_y < 0 ? distance_y * -1 : distance_y; + + ASSERT_TRUE(distance_x <= 3 && distance_y <= 3); + } + + ret = mv_pose_landmark_destroy(handle); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + } + ret = mv_destroy_source(mv_source); ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); } \ No newline at end of file