mv_machine_learning: add hand detection model support 01/317601/5
authorInki Dae <inki.dae@samsung.com>
Mon, 22 Jul 2024 05:53:14 +0000 (14:53 +0900)
committerInki Dae <inki.dae@samsung.com>
Mon, 23 Sep 2024 06:53:45 +0000 (06:53 +0000)
Change-Id: I411852d4cd429aa48ef1ddcee9ae5f1fbb2bb43d
Signed-off-by: Inki Dae <inki.dae@samsung.com>
16 files changed:
CMakeLists.txt
include/mv_hand_detection.h [new file with mode: 0644]
include/mv_hand_detection_internal.h [new file with mode: 0644]
include/mv_hand_detection_type.h [new file with mode: 0644]
mv_machine_learning/object_detection/CMakeLists.txt
mv_machine_learning/object_detection/include/Anchors.h [new file with mode: 0644]
mv_machine_learning/object_detection/include/HandDetectionAdapter.h [new file with mode: 0644]
mv_machine_learning/object_detection/include/PalmDetection.h [new file with mode: 0644]
mv_machine_learning/object_detection/include/object_detection_type.h
mv_machine_learning/object_detection/meta/hand_detection.json [new file with mode: 0644]
mv_machine_learning/object_detection/meta/hand_detection_plugin.json [new file with mode: 0644]
mv_machine_learning/object_detection/src/Anchors.cpp [new file with mode: 0644]
mv_machine_learning/object_detection/src/HandDetectionAdapter.cpp [new file with mode: 0644]
mv_machine_learning/object_detection/src/PalmDetection.cpp [new file with mode: 0644]
mv_machine_learning/object_detection/src/mv_hand_detection.cpp [new file with mode: 0644]
packaging/capi-media-vision.spec

index d86eecba1e58dd70508d91a84f328658dc685690..c31e8f5b4c952b3437add42c37912c49245e074c 100644 (file)
@@ -228,6 +228,8 @@ if (${ENABLE_ML_OBJECT_DETECTION})
         "${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/object_detection_plugin.json"
         "${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/face_detection.json"
         "${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/face_detection_plugin.json"
+        "${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/hand_detection.json"
+        "${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/hand_detection_plugin.json"
     )
        install(FILES ${OBJECT_DETECTION_JSON_FILES} DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
        list(APPEND TOTAL_REQUIRED ${PC_NAME})
diff --git a/include/mv_hand_detection.h b/include/mv_hand_detection.h
new file mode 100644 (file)
index 0000000..d76292d
--- /dev/null
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_MV_HAND_DETECTION_H__
+#define __TIZEN_MEDIAVISION_MV_HAND_DETECTION_H__
+
+#include <mv_common.h>
+#include <mv_hand_detection_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_hand_detection.h
+ * @internal
+ * @brief  This file contains the Inference based Media Vision API.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_HAND_DETECTION_MODULE
+ * @{
+ */
+
+/**
+ * @internal
+ * @brief Creates a inference handle for hand detection object.
+ * @details Use this function to create a inference handle. After the creation
+ *          the hand detection task has to be prepared with
+ *          mv_hand_detection_prepare() function to prepare a network
+ *          for the inference.
+ *
+ * @since_tizen 9.0
+ *
+ * @remarks The @a handle should be released using mv_hand_detection_destroy().
+ *
+ * @param[out] handle    The handle to the inference to be created.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ * @code
+ * #include <mv_hand_detection.h>
+ * ...
+ * mv_hand_detection_h handle = NULL;
+ * mv_hand_detection_create(&handle);
+ * ...
+ * mv_hand_detection_destroy(handle);
+ * @endcode
+ *
+ * @see mv_hand_detection_destroy()
+ * @see mv_hand_detection_prepare()
+ */
+int mv_hand_detection_create(mv_hand_detection_h *handle);
+
+/**
+ * @internal
+ * @brief Destroys inference handle and releases all its resources.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle    The handle to the inference to be destroyed.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create inference handle by using mv_hand_detection_create()
+ *
+ * @see mv_hand_detection_create()
+ */
+int mv_hand_detection_destroy(mv_hand_detection_h handle);
+
+/**
+ * @internal
+ * @brief Configures the backend for the hand detection inference.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle         The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ */
+int mv_hand_detection_configure(mv_hand_detection_h handle);
+
+/**
+ * @internal
+ * @brief Prepares the hand detection inference.
+ * @details Use this function to prepare the hand detection inference based on
+ *          the configured network.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle         The handle to the inference.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Not supported format
+ */
+int mv_hand_detection_prepare(mv_hand_detection_h handle);
+
+/**
+ * @internal
+ * @brief Performs the hand detection inference on the @a source.
+ *
+ * @since_tizen 9.0
+ * @remarks This function is synchronous and may take considerable time to run.
+ *
+ * @param[in] handle          The handle to the inference
+ * @param[in] source         The handle to the source of the media
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_hand_detection_create()
+ * @pre Prepare an inference by calling mv_hand_detection_configure()
+ * @pre Prepare an inference by calling mv_hand_detection_prepare()
+ *
+ * @par Inference Example
+ * @snippet hand_detection_sync.c FD sync
+ */
+int mv_hand_detection_inference(mv_hand_detection_h handle, mv_source_h source);
+
+/**
+ * @internal
+ * @brief Performs asynchronously the hand detection inference on the @a source.
+ *
+ * @since_tizen 9.0
+ * @remarks This function operates asynchronously, so it returns immediately upon invocation.
+ *          The inference results are inserted into the outgoing queue within the framework
+ *          in the order of processing, and the results can be obtained through mv_hand_detection_get_result_count()
+ *          and mv_hand_detection_get_bound_box().
+ *
+ * @param[in] handle         The handle to the inference
+ * @param[in] source         The handle to the source of the media
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_hand_detection_create()
+ * @pre Prepare an inference by calling mv_hand_detection_configure()
+ * @pre Prepare an inference by calling mv_hand_detection_prepare()
+ *
+ * @par Async Inference Example
+ * @snippet hand_detection_async.c FD async
+ */
+int mv_hand_detection_inference_async(mv_hand_detection_h handle, mv_source_h source);
+
+/**
+ * @internal
+ * @brief Gets the hand detection inference result on the @a handle.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle          The handle to the inference
+ * @param[out] frame_number   A frame number inferenced.
+ * @param[out] result_cnt     A number of results.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_hand_detection_create()
+ * @pre Prepare an inference by calling mv_hand_detection_configure()
+ * @pre Prepare an inference by calling mv_hand_detection_prepare()
+ * @pre Request an inference by calling mv_hand_detection_inference()
+ */
+int mv_hand_detection_get_result_count(mv_hand_detection_h handle, unsigned long *frame_number,
+                                                                          unsigned int *result_cnt);
+
+/**
+ * @internal
+ * @brief Gets a bound box to detected hand region.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle              The handle to the inference
+ * @param[in] index               A result index.
+ * @param[out] left               An left position of bound box.
+ * @param[out] top                An top position of bound box.
+ * @param[out] right              An right position of bound box.
+ * @param[out] bottom             An bottom position of bound box.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_hand_detection_create()
+ * @pre Prepare an inference by calling mv_hand_detection_configure()
+ * @pre Prepare an inference by calling mv_hand_detection_prepare()
+ * @pre Request an inference by calling mv_hand_detection_inference()
+ * @pre Get result count by calling mv_hand_detection_get_result_count()
+ */
+int mv_hand_detection_get_bound_box(mv_hand_detection_h handle, unsigned int index, int *left, int *top, int *right,
+                                                                       int *bottom);
+/**
+ * @}
+ */
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_MV_HAND_DETECTION_H__ */
diff --git a/include/mv_hand_detection_internal.h b/include/mv_hand_detection_internal.h
new file mode 100644 (file)
index 0000000..abba2ea
--- /dev/null
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_HAND_DETECT_INTERNAL_H__
+#define __TIZEN_MEDIAVISION_HAND_DETECT_INTERNAL_H__
+
+#include <mv_common.h>
+#include <mv_hand_detection_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_hand_detection.h
+ * @internal
+ * @brief  This file contains the Inference based Media Vision API.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_HAND_DETECTION_MODULE
+ * @{
+ */
+
+/**
+ * @internal
+ * @brief Sets user-given model information.
+ * @details Use this function to change the model information instead of default one after calling mv_hand_detection_create().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle        The handle to the hand detection object.
+ * @param[in] model_file    Model file name.
+ * @param[in] meta_file     Model meta file name.
+ * @param[in] label_file    Label file name.
+ * @param[in] model_name    Model name.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a hand detection handle by calling mv_hand_detection_create()
+ */
+int mv_hand_detection_set_model(mv_hand_detection_h handle, const char *model_file, const char *meta_file,
+                                                               const char *label_file, const char *model_name);
+
+/**
+ * @internal
+ * @brief Sets user-given inference engine and device types for inference.
+ * @details Use this function to change the inference engine and device types for inference instead of default ones after calling mv_hand_detection_create().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle        The handle to the hand detection object.
+ * @param[in] engine_type   A string of inference engine type.
+ * @param[in] device_type   A string of device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a hand detection handle by calling mv_hand_detection_create()
+ */
+int mv_hand_detection_set_engine(mv_hand_detection_h handle, const char *engine_type, const char *device_type);
+
+/**
+ * @internal
+ * @brief Gets a number of inference engines available for hand detection task API.
+ * @details Use this function to get how many inference engines are supported for hand detection after calling mv_hand_detection_create().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle         The handle to the hand detection object.
+ * @param[out] engine_count  A number of inference engines available for hand detection API.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a hand detection handle by calling mv_hand_detection_create()
+ */
+int mv_hand_detection_get_engine_count(mv_hand_detection_h handle, unsigned int *engine_count);
+
+/**
+ * @internal
+ * @brief Gets engine type to a given inference engine index.
+ * @details Use this function to get inference engine type with a given engine index after calling mv_hand_detection_get_engine_count().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle        The handle to the hand detection object.
+ * @param[in] engine_index  A inference engine index for getting the inference engine type.
+ * @param[out] engine_type  A string to inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Get a number of inference engines available for hand detection task API by calling mv_hand_detection_get_engine_count()
+ */
+int mv_hand_detection_get_engine_type(mv_hand_detection_h handle, const unsigned int engine_index, char **engine_type);
+
+/**
+ * @internal
+ * @brief Gets a number of device types available to a given inference engine.
+ * @details Use this function to get how many device types are supported for a given inference engine after calling mv_hand_detection_create().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle         The handle to the hand detection object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[out] device_count  A number of device types available for a given inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a hand detection handle by calling mv_hand_detection_create()
+ */
+int mv_hand_detection_get_device_count(mv_hand_detection_h handle, const char *engine_type, unsigned int *device_count);
+
+/**
+ * @internal
+ * @brief Gets device type list available.
+ * @details Use this function to get what device types are supported for current inference engine type after calling mv_hand_detection_configure().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle         The handle to the hand detection object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[in] device_index   A device index for getting the device type.
+ * @param[out] device_type   A string to device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a hand detection handle by calling mv_hand_detection_create()
+ * @pre Configure hand detection task by calling mv_hand_detection_configure()
+ */
+int mv_hand_detection_get_device_type(mv_hand_detection_h handle, const char *engine_type,
+                                                                         const unsigned int device_index, char **device_type);
+/**
+ * @}
+ */
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_FACE_DETECT_INTERNAL_H__ */
diff --git a/include/mv_hand_detection_type.h b/include/mv_hand_detection_type.h
new file mode 100644 (file)
index 0000000..de42322
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_MV_HAND_DETECTION_TYPE_H__
+#define __TIZEN_MEDIAVISION_MV_HAND_DETECTION_TYPE_H__
+
+#include <mv_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_hand_detection_type.h
+ * @brief  This file contains the hand detection handle for Mediavision.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_HAND_DETECTION_MODULE
+ * @{
+ */
+
+/**
+ * @brief The hand detection object handle.
+ *
+ * @since_tizen 9.0
+ */
+typedef void *mv_hand_detection_h;
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_MV_HAND_DETECTION_TYPE_H__ */
index 6237686fe7d4d36a2130723bdbc5a92d6dc9653a..5fa029e2330be39e4830177bce8cbbc6fa362ca3 100644 (file)
@@ -23,6 +23,9 @@ install(
        PATTERN "mv_face_detection_internal.h"
        PATTERN "mv_face_detection.h"
        PATTERN "mv_face_detection_type.h"
+       PATTERN "mv_hand_detection_internal.h"
+       PATTERN "mv_hand_detection.h"
+       PATTERN "mv_hand_detection_type.h"
        )
 install(
        DIRECTORY ${PROJECT_SOURCE_DIR}/include/ DESTINATION include/media
diff --git a/mv_machine_learning/object_detection/include/Anchors.h b/mv_machine_learning/object_detection/include/Anchors.h
new file mode 100644 (file)
index 0000000..8cb4339
--- /dev/null
@@ -0,0 +1,53 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANCHORS_H__
+#define __ANCHORS_H__
+
+#include <cmath>
+#include <vector>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+struct Anchor {
+       float x_center, y_center, w, h;
+};
+
+struct SsdAnchorsCalculatorOptions {
+       int input_size_width {};
+       int input_size_height {};
+       float min_scale {};
+       float max_scale {};
+       float anchor_offset_x {};
+       float anchor_offset_y {};
+       int num_layers {};
+       std::vector<int> feature_map_width;
+       std::vector<int> feature_map_height;
+       std::vector<int> strides;
+       std::vector<float> aspect_ratios;
+       bool reduce_boxes_in_lowest_layer { false };
+       float interpolated_scale_aspect_ratio {};
+       bool fixed_anchor_size { false };
+};
+
+float CalculateScale(float min_scale, float max_scale, int stride_index, int num_strides);
+void GenerateAnchors(std::vector<Anchor> &anchors, const SsdAnchorsCalculatorOptions &options);
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/object_detection/include/HandDetectionAdapter.h b/mv_machine_learning/object_detection/include/HandDetectionAdapter.h
new file mode 100644 (file)
index 0000000..54f2158
--- /dev/null
@@ -0,0 +1,65 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HAND_DETECTION_ADAPTER_H__
+#define __HAND_DETECTION_ADAPTER_H__
+
+#include <dlog.h>
+
+#include "EngineConfig.h"
+#include "ITask.h"
+#include "MobilenetV1Ssd.h"
+#include "MvMlConfig.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class HandDetectionAdapter : public mediavision::common::ITask
+{
+private:
+       std::unique_ptr<IObjectDetection> _object_detection;
+       std::shared_ptr<Config> _config;
+       const std::string _config_file_name = "hand_detection.json";
+       const std::string _plugin_config_file_name = "hand_detection_plugin.json";
+
+       void create(std::string model_name = "");
+       template<typename U> void create(ObjectDetectionTaskType task_type);
+       ObjectDetectionTaskType convertToTaskType(std::string model_name);
+
+public:
+       HandDetectionAdapter();
+       ~HandDetectionAdapter();
+
+       void setModelInfo(const std::string &model_file, const std::string &meta_file, const std::string &label_file,
+                                         const std::string &model_name) override;
+       void setEngineInfo(const std::string &engine_type, const std::string &device_type) override;
+       void configure() override;
+       unsigned int getNumberOfEngines() override;
+       const std::string &getEngineType(unsigned int engine_index) override;
+       unsigned int getNumberOfDevices(const std::string &engine_type) override;
+       const std::string &getDeviceType(const std::string &engine_type, unsigned int device_index) override;
+       void prepare() override;
+       void perform(InputBaseType &input) override;
+       void performAsync(InputBaseType &input) override;
+       OutputBaseType &getOutput() override;
+       OutputBaseType &getOutputCache() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/object_detection/include/PalmDetection.h b/mv_machine_learning/object_detection/include/PalmDetection.h
new file mode 100644 (file)
index 0000000..c5d25e3
--- /dev/null
@@ -0,0 +1,108 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PALM_DETECTION_H__
+#define __PALM_DETECTION_H__
+
+#include "mv_private.h"
+#include <list>
+#include <memory>
+#include <mv_common.h>
+#include <string>
+
+#include "Anchors.h"
+#include "ObjectDetection.h"
+#include <mv_inference_type.h>
+
+#define MAX_PALM_NUM 4
+
+namespace mediavision
+{
+namespace machine_learning
+{
+struct fvec2 {
+       float x {};
+       float y {};
+};
+
+struct f_rect {
+       fvec2 topleft;
+       fvec2 btmright;
+};
+
+struct Palm {
+       // model outputs after decoding
+       float hand_cx {};
+       float hand_cy {};
+       float hand_w {};
+       float hand_h {};
+       fvec2 keys[7];
+
+       // model outputs
+       float score {};
+
+       // palm rectangle
+       f_rect rect;
+
+       // hole hand rectangle
+       float rotation {};
+       fvec2 hand_pos[4];
+};
+
+struct PalmDetectionResult {
+       int num {};
+       Palm palms[MAX_PALM_NUM];
+};
+
+template<typename T> class PalmDetection : public ObjectDetection<T>
+{
+       using ObjectDetection<T>::_config;
+       using ObjectDetection<T>::_preprocess;
+       using ObjectDetection<T>::_labels;
+       using ObjectDetection<T>::_inference;
+
+private:
+       ObjectDetectionResult _result;
+       std::vector<Anchor> _anchors;
+       float _confThreshold = 0.3;
+       float _nmsThreshold = 0.4;
+
+       // NonMaxSuppression
+       float CalcIntersectionOverUnion(f_rect &rect0, f_rect &rect1);
+       static bool Compare(Palm &v1, Palm &v2);
+       int NonMaxSuppression(std::list<Palm> &face_list, std::list<Palm> &face_sel_list);
+
+       // Expand palm to hand
+       float NormalizeRadians(float angle);
+       void ComputeRotation(Palm &palm);
+       void RotVec(fvec2 &vec, float rotation);
+       void ComputeHandRect(Palm &palm);
+       void PackPalmResult(PalmDetectionResult *palm_result, std::list<Palm> &palm_list);
+
+       // Decode palm detection result
+       void DecodeKeypoints(std::list<Palm> &palm_list);
+
+public:
+       PalmDetection(ObjectDetectionTaskType task_type, std::shared_ptr<Config> config);
+       virtual ~PalmDetection() = default;
+
+       ObjectDetectionResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
index 98fe59b9dae79e5048d52017a92c3c6e170a2ac9..358dfef87104a59cc5b754309719a8d232987d5d 100644 (file)
@@ -53,7 +53,8 @@ enum class ObjectDetectionTaskType {
        MOBILENET_V2_SSD,
        FD_MOBILENET_V1_SSD,
        OD_TRIV2,
-       FD_TRIV2
+       FD_TRIV2,
+       HD_PALM
        // TODO
 };
 
diff --git a/mv_machine_learning/object_detection/meta/hand_detection.json b/mv_machine_learning/object_detection/meta/hand_detection.json
new file mode 100644 (file)
index 0000000..ed4109e
--- /dev/null
@@ -0,0 +1,40 @@
+{
+    "attributes":
+    [
+        {
+            "name" : "MODEL_DEFAULT_PATH",
+            "type" : "string",
+            "value" : "/opt/usr/globalapps/mediavision.object.detection/models/tflite/"
+        },
+               {
+            "name"  : "MODEL_FILE_NAME",
+            "type"  : "string",
+            "value" : "palm_detection_full.tflite"
+        },
+        {
+            "name"  : "DEFAULT_MODEL_NAME",
+            "type"  : "string",
+            "value" : "HD_PALM"
+        },
+        {
+            "name"  : "MODEL_META_FILE_NAME",
+            "type"  : "string",
+            "value" : "palm_detection_full.json"
+        },
+        {
+            "name"  : "MODEL_LABEL_FILE_NAME",
+            "type"  : "string",
+            "value" : "fd_mobilenet_v1_ssd_postop_label.txt"
+        },
+        {
+            "name"  : "BACKEND_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        },
+        {
+            "name"  : "TARGET_DEVICE_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        }
+    ]
+}
diff --git a/mv_machine_learning/object_detection/meta/hand_detection_plugin.json b/mv_machine_learning/object_detection/meta/hand_detection_plugin.json
new file mode 100644 (file)
index 0000000..af15a45
--- /dev/null
@@ -0,0 +1,20 @@
+{
+    "attributes":
+    [
+        {
+            "name" : "PLUGIN_NAME",
+            "type" : "string",
+            "value" : "libobject_detection_plugin.so"
+        },
+        {
+            "name"  : "DEFAULT_MODEL_NAME",
+            "type"  : "string",
+            "value" : "FD_TRIV2"
+        },
+        {
+            "name"  : "USE_PLUGIN",
+            "type"  : "boolean",
+            "value" : false
+        }
+    ]
+}
diff --git a/mv_machine_learning/object_detection/src/Anchors.cpp b/mv_machine_learning/object_detection/src/Anchors.cpp
new file mode 100644 (file)
index 0000000..f3c68ce
--- /dev/null
@@ -0,0 +1,122 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Anchors.h"
+
+using namespace std;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+float CalculateScale(float min_scale, float max_scale, int stride_index, int num_strides)
+{
+       return min_scale + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
+}
+
+void GenerateAnchors(vector<Anchor> &anchors, const SsdAnchorsCalculatorOptions &options)
+{
+       int layer_id = 0;
+
+       while (layer_id < static_cast<int>(options.strides.size())) {
+               vector<float> anchor_height;
+               vector<float> anchor_width;
+               vector<float> aspect_ratios;
+               vector<float> scales;
+               // For same strides, we merge the anchors in the same order.
+               int last_same_stride_layer = layer_id;
+
+               while (last_same_stride_layer < (int) options.strides.size() &&
+                          options.strides[last_same_stride_layer] == options.strides[layer_id]) {
+                       const float scale = CalculateScale(options.min_scale, options.max_scale, last_same_stride_layer,
+                                                                                          options.strides.size());
+
+                       if (last_same_stride_layer == 0 && options.reduce_boxes_in_lowest_layer) {
+                               // For first layer, it can be specified to use predefined anchors.
+                               aspect_ratios.push_back(1.0);
+                               aspect_ratios.push_back(2.0);
+                               aspect_ratios.push_back(0.5);
+                               scales.push_back(0.1);
+                               scales.push_back(scale);
+                               scales.push_back(scale);
+                       } else {
+                               for (int aspect_ratio_id = 0; aspect_ratio_id < (int) options.aspect_ratios.size(); ++aspect_ratio_id) {
+                                       aspect_ratios.push_back(options.aspect_ratios[aspect_ratio_id]);
+                                       scales.push_back(scale);
+                               }
+
+                               if (options.interpolated_scale_aspect_ratio > 0.0) {
+                                       const float scale_next = last_same_stride_layer == (int) options.strides.size() - 1 ?
+                                                                                                        1.0f :
+                                                                                                        CalculateScale(options.min_scale, options.max_scale,
+                                                                                                                                       last_same_stride_layer + 1, options.strides.size());
+                                       scales.push_back(sqrt(scale * scale_next));
+                                       aspect_ratios.push_back(options.interpolated_scale_aspect_ratio);
+                               }
+                       }
+
+                       last_same_stride_layer++;
+               }
+
+               for (int i = 0; i < (int) aspect_ratios.size(); ++i) {
+                       const float ratio_sqrts = sqrt(aspect_ratios[i]);
+
+                       anchor_height.push_back(scales[i] / ratio_sqrts);
+                       anchor_width.push_back(scales[i] * ratio_sqrts);
+               }
+
+               int feature_map_height = 0;
+               int feature_map_width = 0;
+
+               if (options.feature_map_height.size()) {
+                       feature_map_height = options.feature_map_height[layer_id];
+                       feature_map_width = options.feature_map_width[layer_id];
+               } else {
+                       const int stride = options.strides[layer_id];
+                       feature_map_height = ceil(1.0f * options.input_size_height / stride);
+                       feature_map_width = ceil(1.0f * options.input_size_width / stride);
+               }
+
+               for (int y = 0; y < feature_map_height; ++y) {
+                       for (int x = 0; x < feature_map_width; ++x) {
+                               for (int anchor_id = 0; anchor_id < (int) anchor_height.size(); ++anchor_id) {
+                                       // TODO: Support specifying anchor_offset_x, anchor_offset_y.
+                                       const float x_center = (x + options.anchor_offset_x) * 1.0f / feature_map_width;
+                                       const float y_center = (y + options.anchor_offset_y) * 1.0f / feature_map_height;
+                                       Anchor new_anchor;
+
+                                       new_anchor.x_center = x_center;
+                                       new_anchor.y_center = y_center;
+
+                                       if (options.fixed_anchor_size) {
+                                               new_anchor.w = 1.0f;
+                                               new_anchor.h = 1.0f;
+                                       } else {
+                                               new_anchor.w = anchor_width[anchor_id];
+                                               new_anchor.h = anchor_height[anchor_id];
+                                       }
+
+                                       anchors.push_back(new_anchor);
+                               }
+                       }
+               }
+
+               layer_id = last_same_stride_layer;
+       }
+}
+
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/object_detection/src/HandDetectionAdapter.cpp b/mv_machine_learning/object_detection/src/HandDetectionAdapter.cpp
new file mode 100644 (file)
index 0000000..1fa005c
--- /dev/null
@@ -0,0 +1,178 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HandDetectionAdapter.h"
+#include "MvMlException.h"
+#include "ObjectDetectionExternal.h"
+#include "PalmDetection.h"
+#include "mv_object_detection_config.h"
+
+using namespace std;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+HandDetectionAdapter::HandDetectionAdapter()
+{
+       _config = make_shared<Config>();
+
+       // If the model type needs external plugin then bypass to load the meta file and just create the external plugin.
+       // In this case, external plugin will use its own meta file approach regardless of Mediavision's one.
+       _config->parsePluginConfigFile(_plugin_config_file_name);
+       if (!_config->isPluginUsed())
+               _config->parseConfigFile(_config_file_name);
+
+       create(_config->getDefaultModelName());
+}
+
+HandDetectionAdapter::~HandDetectionAdapter()
+{
+       _object_detection->preDestroy();
+}
+
+template<typename U> void HandDetectionAdapter::create(ObjectDetectionTaskType task_type)
+{
+       switch (task_type) {
+       case ObjectDetectionTaskType::HD_PALM:
+               _object_detection = make_unique<PalmDetection<U> >(task_type, _config);
+               break;
+       default:
+               throw InvalidOperation("Invalid hand detection task type.");
+       }
+       // TODO.
+}
+
+void HandDetectionAdapter::create(string model_name)
+{
+       if (model_name.empty())
+               model_name = _config->getDefaultModelName();
+
+       auto task_type = convertToTaskType(model_name);
+
+       if (_config->isPluginUsed()) {
+               const auto &plugin_name = _config->getPluginFileName();
+
+               _object_detection = make_unique<ObjectDetectionExternal>(task_type, plugin_name.c_str());
+               return;
+       }
+
+       _config->loadMetaFile(make_unique<ObjectDetectionParser>(static_cast<int>(task_type)));
+       mv_inference_data_type_e dataType = _config->getInputMetaMap().begin()->second->dataType;
+
+       switch (dataType) {
+       case MV_INFERENCE_DATA_UINT8:
+               create<unsigned char>(task_type);
+               break;
+       case MV_INFERENCE_DATA_FLOAT32:
+               create<float>(task_type);
+               break;
+       default:
+               throw InvalidOperation("Invalid hand detection data type.");
+       }
+}
+
+ObjectDetectionTaskType HandDetectionAdapter::convertToTaskType(string model_name)
+{
+       if (model_name.empty())
+               throw InvalidParameter("model name is empty.");
+
+       transform(model_name.begin(), model_name.end(), model_name.begin(), ::toupper);
+
+       if (model_name == "HD_PALM")
+               return ObjectDetectionTaskType::HD_PALM;
+       // TODO.
+
+       throw InvalidParameter("Invalid hand detection model name.");
+}
+
+void HandDetectionAdapter::setModelInfo(const string &model_file, const string &meta_file, const string &label_file,
+                                                                               const string &model_name)
+{
+       try {
+               _config->setUserModel(model_file, meta_file, label_file);
+               if (!model_name.empty())
+                       create(model_name);
+       } catch (const BaseException &e) {
+               LOGW("A given model name is invalid so default task type will be used.");
+       }
+
+       if (model_file.empty() && meta_file.empty()) {
+               LOGW("Given model info is invalid so default model info will be used instead.");
+               return;
+       }
+}
+
+void HandDetectionAdapter::setEngineInfo(const string &engine_type, const string &device_type)
+{
+       _object_detection->setEngineInfo(string(engine_type), string(device_type));
+}
+
+void HandDetectionAdapter::configure()
+{
+       _object_detection->configure();
+}
+
+unsigned int HandDetectionAdapter::getNumberOfEngines()
+{
+       return _object_detection->getNumberOfEngines();
+}
+
+const string &HandDetectionAdapter::getEngineType(unsigned int engine_index)
+{
+       return _object_detection->getEngineType(engine_index);
+}
+
+unsigned int HandDetectionAdapter::getNumberOfDevices(const string &engine_type)
+{
+       return _object_detection->getNumberOfDevices(engine_type);
+}
+
+const string &HandDetectionAdapter::getDeviceType(const string &engine_type, unsigned int device_index)
+{
+       return _object_detection->getDeviceType(engine_type, device_index);
+}
+
+void HandDetectionAdapter::prepare()
+{
+       _object_detection->prepare();
+}
+
+void HandDetectionAdapter::perform(InputBaseType &input)
+{
+       _object_detection->perform(input.inference_src);
+}
+
+void HandDetectionAdapter::performAsync(InputBaseType &input)
+{
+       _object_detection->performAsync(static_cast<ObjectDetectionInput &>(input));
+}
+
+OutputBaseType &HandDetectionAdapter::getOutput()
+{
+       return _object_detection->getOutput();
+}
+
+OutputBaseType &HandDetectionAdapter::getOutputCache()
+{
+       return _object_detection->getOutputCache();
+}
+
+}
+}
diff --git a/mv_machine_learning/object_detection/src/PalmDetection.cpp b/mv_machine_learning/object_detection/src/PalmDetection.cpp
new file mode 100644 (file)
index 0000000..44bbce9
--- /dev/null
@@ -0,0 +1,351 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <list>
+#include <map>
+#include <string.h>
+
+#include "Anchors.h"
+#include "MvMlException.h"
+#include "PalmDetection.h"
+#include "Postprocess.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T>
+PalmDetection<T>::PalmDetection(ObjectDetectionTaskType task_type, shared_ptr<Config> config)
+               : ObjectDetection<T>(task_type, config), _result()
+{
+       // TODO. use meta file instead of using fixed values later
+       SsdAnchorsCalculatorOptions anchor_options { .input_size_width = 192,
+                                                                                                .input_size_height = 192,
+                                                                                                .min_scale = 0.1484375,
+                                                                                                .max_scale = 0.75,
+                                                                                                .anchor_offset_x = 0.5f,
+                                                                                                .anchor_offset_y = 0.5f,
+                                                                                                .num_layers = 4,
+                                                                                                .reduce_boxes_in_lowest_layer = false,
+                                                                                                .interpolated_scale_aspect_ratio = 1.0,
+                                                                                                .fixed_anchor_size = true };
+
+       anchor_options.strides.push_back(8);
+       anchor_options.strides.push_back(16);
+       anchor_options.strides.push_back(16);
+       anchor_options.strides.push_back(16);
+       anchor_options.aspect_ratios.push_back(1.0);
+
+       GenerateAnchors(_anchors, anchor_options);
+}
+
+template<typename T> void PalmDetection<T>::DecodeKeypoints(list<Palm> &palm_list)
+{
+       Palm palm_item;
+       int tensorIdx = 0;
+       float tensorWidth = static_cast<float>(_inference->getInputWidth());
+       float tensorHeight = static_cast<float>(_inference->getInputHeight());
+
+       vector<string> names;
+       ObjectDetection<T>::getOutputNames(names);
+
+       vector<float> bbox_tensor;
+       ObjectDetection<T>::getOutputTensor(names[0], bbox_tensor);
+
+       vector<float> prob_tensor;
+       ObjectDetection<T>::getOutputTensor(names[1], prob_tensor);
+
+       for (auto itr = _anchors.begin(); itr != _anchors.end(); tensorIdx++, itr++) {
+               Anchor anchor = *itr;
+               float score0 = prob_tensor[tensorIdx];
+               float score = 1.0f / (1.0f + exp(-score0));
+               if (score > _confThreshold) {
+                       float *p = bbox_tensor.data() + (tensorIdx * 18);
+
+                       /* boundary box */
+                       float sx = p[0];
+                       float sy = p[1];
+                       float w = p[2];
+                       float h = p[3];
+
+                       float cx = sx + anchor.x_center * tensorWidth;
+                       float cy = sy + anchor.y_center * tensorHeight;
+
+                       cx /= tensorWidth;
+                       cy /= tensorHeight;
+                       w /= tensorWidth;
+                       h /= tensorHeight;
+
+                       fvec2 topleft, btmright;
+                       topleft.x = cx - w * 0.5f;
+                       topleft.y = cy - h * 0.5f;
+                       btmright.x = cx + w * 0.5f;
+                       btmright.y = cy + h * 0.5f;
+
+                       palm_item.score = score;
+                       palm_item.rect.topleft = topleft;
+                       palm_item.rect.btmright = btmright;
+
+                       /* landmark positions (7 keys) */
+                       for (int keyIdx = 0; keyIdx < 7; keyIdx++) {
+                               float lx = p[4 + (2 * keyIdx) + 0];
+                               float ly = p[4 + (2 * keyIdx) + 1];
+
+                               lx += anchor.x_center * tensorWidth;
+                               ly += anchor.y_center * tensorHeight;
+                               lx /= tensorWidth;
+                               ly /= tensorHeight;
+
+                               palm_item.keys[keyIdx].x = lx;
+                               palm_item.keys[keyIdx].y = ly;
+                       }
+
+                       palm_list.push_back(palm_item);
+               }
+       }
+}
+
+template<typename T> float PalmDetection<T>::CalcIntersectionOverUnion(f_rect &rect0, f_rect &rect1)
+{
+       float sx0 = rect0.topleft.x;
+       float sy0 = rect0.topleft.y;
+       float ex0 = rect0.btmright.x;
+       float ey0 = rect0.btmright.y;
+       float sx1 = rect1.topleft.x;
+       float sy1 = rect1.topleft.y;
+       float ex1 = rect1.btmright.x;
+       float ey1 = rect1.btmright.y;
+
+       float xmin0 = min(sx0, ex0);
+       float ymin0 = min(sy0, ey0);
+       float xmax0 = max(sx0, ex0);
+       float ymax0 = max(sy0, ey0);
+       float xmin1 = min(sx1, ex1);
+       float ymin1 = min(sy1, ey1);
+       float xmax1 = max(sx1, ex1);
+       float ymax1 = max(sy1, ey1);
+
+       float area0 = (ymax0 - ymin0) * (xmax0 - xmin0);
+       float area1 = (ymax1 - ymin1) * (xmax1 - xmin1);
+       if (area0 <= 0 || area1 <= 0)
+               return 0.0f;
+
+       float intersect_xmin = max(xmin0, xmin1);
+       float intersect_ymin = max(ymin0, ymin1);
+       float intersect_xmax = min(xmax0, xmax1);
+       float intersect_ymax = min(ymax0, ymax1);
+
+       float intersect_area = max(intersect_ymax - intersect_ymin, 0.0f) * max(intersect_xmax - intersect_xmin, 0.0f);
+
+       return intersect_area / (area0 + area1 - intersect_area);
+}
+
+template<typename T> bool PalmDetection<T>::Compare(Palm &v1, Palm &v2)
+{
+       return (v1.score > v2.score);
+}
+
+template<typename T> int PalmDetection<T>::NonMaxSuppression(list<Palm> &face_list, list<Palm> &face_sel_list)
+{
+       face_list.sort(Compare);
+
+       for (auto itr = face_list.begin(); itr != face_list.end(); itr++) {
+               Palm face_candidate = *itr;
+               int ignore_candidate = false;
+
+               for (auto itr_sel = face_sel_list.rbegin(); itr_sel != face_sel_list.rend(); itr_sel++) {
+                       Palm face_sel = *itr_sel;
+                       float iou = CalcIntersectionOverUnion(face_candidate.rect, face_sel.rect);
+
+                       if (iou >= _nmsThreshold) {
+                               ignore_candidate = true;
+                               break;
+                       }
+               }
+
+               if (!ignore_candidate) {
+                       face_sel_list.push_back(face_candidate);
+
+                       if (face_sel_list.size() >= MAX_PALM_NUM)
+                               break;
+               }
+       }
+
+       return 0;
+}
+
+template<typename T> float PalmDetection<T>::NormalizeRadians(float angle)
+{
+       return angle - 2 * M_PI * floor((angle - (-M_PI)) / (2 * M_PI));
+}
+
+template<typename T> void PalmDetection<T>::ComputeRotation(Palm &palm)
+{
+       float x0 = palm.keys[0].x; // Center of wrist.
+       float y0 = palm.keys[0].y;
+       float x1 = palm.keys[2].x; // MCP of middle finger.
+       float y1 = palm.keys[2].y;
+
+       float target_angle = M_PI * 0.5f;
+       float rotation = target_angle - atan2(-(y1 - y0), x1 - x0);
+
+       palm.rotation = NormalizeRadians(rotation);
+}
+
+template<typename T> void PalmDetection<T>::RotVec(fvec2 &vec, float rotation)
+{
+       float sx = vec.x;
+       float sy = vec.y;
+
+       vec.x = sx * cos(rotation) - sy * sin(rotation);
+       vec.y = sx * sin(rotation) + sy * cos(rotation);
+}
+
+template<typename T> void PalmDetection<T>::ComputeHandRect(Palm &palm)
+{
+       float width = palm.rect.btmright.x - palm.rect.topleft.x;
+       float height = palm.rect.btmright.y - palm.rect.topleft.y;
+       float palm_cx = palm.rect.topleft.x + width * 0.5f;
+       float palm_cy = palm.rect.topleft.y + height * 0.5f;
+       float hand_cx;
+       float hand_cy;
+       float rotation = palm.rotation;
+       float shift_x = 0.0f;
+       float shift_y = -0.5f;
+
+       if (rotation == 0.0f) {
+               hand_cx = palm_cx + (width * shift_x);
+               hand_cy = palm_cy + (height * shift_y);
+       } else {
+               float dx = (width * shift_x) * cos(rotation) - (height * shift_y) * sin(rotation);
+
+               float dy = (width * shift_x) * sin(rotation) + (height * shift_y) * cos(rotation);
+
+               hand_cx = palm_cx + dx;
+               hand_cy = palm_cy + dy;
+       }
+
+       // make the crop rectangle
+       float long_side = max(width, height);
+       width = long_side;
+       height = long_side;
+
+       float hand_w = width * 2.0f;
+       float hand_h = height * 2.0f;
+
+       palm.hand_cx = hand_cx;
+       palm.hand_cy = hand_cy;
+       palm.hand_w = hand_w;
+       palm.hand_h = hand_h;
+
+       float dx = hand_w * 0.5f;
+       float dy = hand_h * 0.5f;
+
+       palm.hand_pos[0].x = -dx;
+       palm.hand_pos[0].y = -dy;
+       palm.hand_pos[1].x = +dx;
+       palm.hand_pos[1].y = -dy;
+       palm.hand_pos[2].x = +dx;
+       palm.hand_pos[2].y = +dy;
+       palm.hand_pos[3].x = -dx;
+       palm.hand_pos[3].y = +dy;
+
+       for (int posIdx = 0; posIdx < 4; posIdx++) {
+               RotVec(palm.hand_pos[posIdx], rotation);
+               palm.hand_pos[posIdx].x += hand_cx;
+               palm.hand_pos[posIdx].y += hand_cy;
+       }
+}
+
+template<typename T> void PalmDetection<T>::PackPalmResult(PalmDetectionResult *palm_result, list<Palm> &palm_list)
+{
+       int num_palms = 0;
+
+       for (auto itr = palm_list.begin(); itr != palm_list.end(); itr++) {
+               Palm palm = *itr;
+
+               ComputeRotation(palm);
+               ComputeHandRect(palm);
+
+               memcpy(&palm_result->palms[num_palms++], &palm, sizeof(palm));
+               palm_result->num = num_palms;
+
+               if (num_palms >= MAX_PALM_NUM)
+                       break;
+       }
+}
+
+template<typename T> ObjectDetectionResult &PalmDetection<T>::result()
+{
+       // Clear _result object because result() function can be called every time user wants
+       // so make sure to clear existing result data before getting the data again.
+       _result = ObjectDetectionResult();
+
+       list<Palm> palm;
+       DecodeKeypoints(palm);
+
+       list<Palm> palmNms;
+       NonMaxSuppression(palm, palmNms);
+
+       PalmDetectionResult palmResult;
+       PackPalmResult(&palmResult, palmNms);
+
+       float img_width = static_cast<float>(_preprocess.getImageWidth()[0]);
+       float img_height = static_cast<float>(_preprocess.getImageHeight()[0]);
+
+       _result.number_of_objects = 0;
+
+       for (auto palm : palmResult.palms) {
+               float min_x = static_cast<float>(img_width);
+               float max_x = 0.0f;
+               float min_y = static_cast<float>(img_height);
+               float max_y = 0.0f;
+
+               if (palm.score < 0.5)
+                       continue;
+
+               for (unsigned int idx = 0; idx < 4; ++idx) {
+                       min_x = min(palm.hand_pos[idx].x, min_x);
+                       max_x = max(palm.hand_pos[idx].x, max_x);
+                       min_y = min(palm.hand_pos[idx].y, min_y);
+                       max_y = max(palm.hand_pos[idx].y, max_y);
+               }
+
+               float left = min_x * img_width < 0.0f ? 0.0f : min_x * img_width;
+               float right = max_x * img_width >= img_width ? img_width - 1.0f : max_x * img_width;
+               float top = min_y * img_height < 0 ? 0 : min_y * img_height;
+               float bottom = max_y * img_height >= img_height ? img_height - 1.0f : max_y * img_height;
+
+               _result.left.push_back(static_cast<int>(left));
+               _result.right.push_back(static_cast<int>(right));
+               _result.top.push_back(static_cast<int>(top));
+               _result.bottom.push_back(static_cast<int>(bottom));
+               _result.number_of_objects++;
+       }
+
+       return _result;
+}
+
+template class PalmDetection<float>;
+template class PalmDetection<unsigned char>;
+
+}
+}
diff --git a/mv_machine_learning/object_detection/src/mv_hand_detection.cpp b/mv_machine_learning/object_detection/src/mv_hand_detection.cpp
new file mode 100644 (file)
index 0000000..8eef177
--- /dev/null
@@ -0,0 +1,358 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_hand_detection.h"
+#include "Context.h"
+#include "HandDetectionAdapter.h"
+#include "ITask.h"
+#include "MvMlException.h"
+#include "mv_feature_key.h"
+#include "mv_hand_detection_internal.h"
+#include "mv_private.h"
+#include "native_capi.h"
+#include "object_detection_type.h"
+
+#include <algorithm>
+#include <iostream>
+#include <mutex>
+#include <new>
+#include <string>
+#include <unistd.h>
+
+#define TASK_NAME "hand_detection"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::common;
+using namespace mediavision::machine_learning;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning::exception;
+
+static const char *feature_keys[] = { "http://tizen.org/feature/vision.inference",
+                                                                         "http://tizen.org/feature/vision.inference.face" };
+static const size_t num_keys = sizeof(feature_keys) / sizeof(char *);
+
+int mv_hand_detection_create(mv_hand_detection_h *handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_NULL_ARG_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       mv_hand_detection_h ctx = nullptr;
+
+       try {
+               ctx = machine_learning_native_create();
+               machine_learning_native_add(ctx, TASK_NAME, new HandDetectionAdapter());
+       } catch (const BaseException &e) {
+               return e.getError();
+       } catch (const std::exception &e) {
+               LOGE("%s", e.what());
+               return MEDIA_VISION_ERROR_INTERNAL;
+       }
+
+       *handle = ctx;
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_destroy(mv_hand_detection_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       machine_learning_native_destroy(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_set_model(mv_hand_detection_h handle, const char *model_file, const char *meta_file,
+                                                               const char *label_file, const char *model_name)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(model_file);
+       MEDIA_VISION_NULL_ARG_CHECK(meta_file);
+       MEDIA_VISION_NULL_ARG_CHECK(label_file);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_set_model(handle, TASK_NAME, model_file, meta_file, label_file, model_name);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_set_engine(mv_hand_detection_h handle, const char *backend_type, const char *device_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(backend_type);
+       MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_set_engine(handle, TASK_NAME, backend_type, device_type);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_engine_count(mv_hand_detection_h handle, unsigned int *engine_count)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_count);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_get_engine_count(handle, TASK_NAME, engine_count);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_engine_type(mv_hand_detection_h handle, const unsigned int engine_index, char **engine_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_get_engine_type(handle, TASK_NAME, engine_index, engine_type);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_device_count(mv_hand_detection_h handle, const char *engine_type, unsigned int *device_count)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(device_count);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_get_device_count(handle, TASK_NAME, engine_type, device_count);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_device_type(mv_hand_detection_h handle, const char *engine_type,
+                                                                         const unsigned int device_index, char **device_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+       MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_get_device_type(handle, TASK_NAME, engine_type, device_index, device_type);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_configure(mv_hand_detection_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_configure(handle, TASK_NAME);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_prepare(mv_hand_detection_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_prepare(handle, TASK_NAME);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_inference(mv_hand_detection_h handle, mv_source_h source)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               ObjectDetectionInput input(source);
+
+               machine_learning_native_inference(handle, TASK_NAME, input);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_inference_async(mv_hand_detection_h handle, mv_source_h source)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(source);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               ObjectDetectionInput input(source);
+
+               machine_learning_native_inference_async(handle, TASK_NAME, input);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_result_count(mv_hand_detection_h handle, unsigned long *frame_number,
+                                                                          unsigned int *result_cnt)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(frame_number);
+       MEDIA_VISION_INSTANCE_CHECK(result_cnt);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               auto &result = static_cast<ObjectDetectionResult &>(machine_learning_native_get_result(handle, TASK_NAME));
+
+               *frame_number = result.frame_number;
+               *result_cnt = result.number_of_objects;
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_bound_box(mv_hand_detection_h handle, unsigned int index, int *left, int *top, int *right,
+                                                                       int *bottom)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(left);
+       MEDIA_VISION_INSTANCE_CHECK(top);
+       MEDIA_VISION_INSTANCE_CHECK(right);
+       MEDIA_VISION_INSTANCE_CHECK(bottom);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               auto &result =
+                               static_cast<ObjectDetectionResult &>(machine_learning_native_get_result_cache(handle, TASK_NAME));
+               if (index >= result.number_of_objects) {
+                       LOGE("Invalid index(index = %u, result count = %u).", index, result.number_of_objects);
+                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+               }
+
+               *left = result.left[index];
+               *top = result.top[index];
+               *right = result.right[index];
+               *bottom = result.bottom[index];
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
\ No newline at end of file
index c309a64709506340921160b39bbf364b1cdaf4a5..e3c6528d7e686c1a197cd02d790ebb0f4d954256 100644 (file)
@@ -392,6 +392,8 @@ find . -name '*.gcno' -not -path "./test/*" -not -path "./mv_machine_learning/*"
 %{_datadir}/%{name}/object_detection_plugin.json
 %{_datadir}/%{name}/face_detection.json
 %{_datadir}/%{name}/face_detection_plugin.json
+%{_datadir}/%{name}/hand_detection.json
+%{_datadir}/%{name}/hand_detection_plugin.json
 %{_libdir}/libmv_object_detection.so
 %endif
 %if "%{enable_ml_object_detection_3d}" == "1"
@@ -438,6 +440,10 @@ find . -name '*.gcno' -not -path "./test/*" -not -path "./mv_machine_learning/*"
 %{_includedir}/media/IObjectDetection.h
 %{_includedir}/media/object_detection_type.h
 %{_libdir}/pkgconfig/*object-detection.pc
+%{_includedir}/media/mv_hand_detection_internal.h
+%{_includedir}/media/mv_hand_detection.h
+%{_includedir}/media/mv_hand_detection_type.h
+
 %endif
 %if "%{enable_ml_object_detection_3d}" == "1"
 %{_includedir}/media/mv_object_detection_3d_internal.h