"${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/object_detection_plugin.json"
"${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/face_detection.json"
"${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/face_detection_plugin.json"
+ "${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/hand_detection.json"
+ "${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/hand_detection_plugin.json"
)
install(FILES ${OBJECT_DETECTION_JSON_FILES} DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
list(APPEND TOTAL_REQUIRED ${PC_NAME})
--- /dev/null
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_MV_HAND_DETECTION_H__
+#define __TIZEN_MEDIAVISION_MV_HAND_DETECTION_H__
+
+#include <mv_common.h>
+#include <mv_hand_detection_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file mv_hand_detection.h
+ * @internal
+ * @brief This file contains the Inference based Media Vision API.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_HAND_DETECTION_MODULE
+ * @{
+ */
+
+/**
+ * @internal
+ * @brief Creates a inference handle for hand detection object.
+ * @details Use this function to create a inference handle. After the creation
+ * the hand detection task has to be prepared with
+ * mv_hand_detection_prepare() function to prepare a network
+ * for the inference.
+ *
+ * @since_tizen 9.0
+ *
+ * @remarks The @a handle should be released using mv_hand_detection_destroy().
+ *
+ * @param[out] handle The handle to the inference to be created.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ * @code
+ * #include <mv_hand_detection.h>
+ * ...
+ * mv_hand_detection_h handle = NULL;
+ * mv_hand_detection_create(&handle);
+ * ...
+ * mv_hand_detection_destroy(handle);
+ * @endcode
+ *
+ * @see mv_hand_detection_destroy()
+ * @see mv_hand_detection_prepare()
+ */
+int mv_hand_detection_create(mv_hand_detection_h *handle);
+
+/**
+ * @internal
+ * @brief Destroys inference handle and releases all its resources.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the inference to be destroyed.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create inference handle by using mv_hand_detection_create()
+ *
+ * @see mv_hand_detection_create()
+ */
+int mv_hand_detection_destroy(mv_hand_detection_h handle);
+
+/**
+ * @internal
+ * @brief Configures the backend for the hand detection inference.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ */
+int mv_hand_detection_configure(mv_hand_detection_h handle);
+
+/**
+ * @internal
+ * @brief Prepares the hand detection inference.
+ * @details Use this function to prepare the hand detection inference based on
+ * the configured network.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the inference.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Not supported format
+ */
+int mv_hand_detection_prepare(mv_hand_detection_h handle);
+
+/**
+ * @internal
+ * @brief Performs the hand detection inference on the @a source.
+ *
+ * @since_tizen 9.0
+ * @remarks This function is synchronous and may take considerable time to run.
+ *
+ * @param[in] handle The handle to the inference
+ * @param[in] source The handle to the source of the media
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ * isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_hand_detection_create()
+ * @pre Prepare an inference by calling mv_hand_detection_configure()
+ * @pre Prepare an inference by calling mv_hand_detection_prepare()
+ *
+ * @par Inference Example
+ * @snippet hand_detection_sync.c FD sync
+ */
+int mv_hand_detection_inference(mv_hand_detection_h handle, mv_source_h source);
+
+/**
+ * @internal
+ * @brief Performs asynchronously the hand detection inference on the @a source.
+ *
+ * @since_tizen 9.0
+ * @remarks This function operates asynchronously, so it returns immediately upon invocation.
+ * The inference results are inserted into the outgoing queue within the framework
+ * in the order of processing, and the results can be obtained through mv_hand_detection_get_result_count()
+ * and mv_hand_detection_get_bound_box().
+ *
+ * @param[in] handle The handle to the inference
+ * @param[in] source The handle to the source of the media
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ * isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_hand_detection_create()
+ * @pre Prepare an inference by calling mv_hand_detection_configure()
+ * @pre Prepare an inference by calling mv_hand_detection_prepare()
+ *
+ * @par Async Inference Example
+ * @snippet hand_detection_async.c FD async
+ */
+int mv_hand_detection_inference_async(mv_hand_detection_h handle, mv_source_h source);
+
+/**
+ * @internal
+ * @brief Gets the hand detection inference result on the @a handle.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the inference
+ * @param[out] frame_number A frame number inferenced.
+ * @param[out] result_cnt A number of results.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_hand_detection_create()
+ * @pre Prepare an inference by calling mv_hand_detection_configure()
+ * @pre Prepare an inference by calling mv_hand_detection_prepare()
+ * @pre Request an inference by calling mv_hand_detection_inference()
+ */
+int mv_hand_detection_get_result_count(mv_hand_detection_h handle, unsigned long *frame_number,
+ unsigned int *result_cnt);
+
+/**
+ * @internal
+ * @brief Gets a bound box to detected hand region.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the inference
+ * @param[in] index A result index.
+ * @param[out] left An left position of bound box.
+ * @param[out] top An top position of bound box.
+ * @param[out] right An right position of bound box.
+ * @param[out] bottom An bottom position of bound box.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_hand_detection_create()
+ * @pre Prepare an inference by calling mv_hand_detection_configure()
+ * @pre Prepare an inference by calling mv_hand_detection_prepare()
+ * @pre Request an inference by calling mv_hand_detection_inference()
+ * @pre Get result count by calling mv_hand_detection_get_result_count()
+ */
+int mv_hand_detection_get_bound_box(mv_hand_detection_h handle, unsigned int index, int *left, int *top, int *right,
+ int *bottom);
+/**
+ * @}
+ */
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_MV_HAND_DETECTION_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_HAND_DETECT_INTERNAL_H__
+#define __TIZEN_MEDIAVISION_HAND_DETECT_INTERNAL_H__
+
+#include <mv_common.h>
+#include <mv_hand_detection_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file mv_hand_detection.h
+ * @internal
+ * @brief This file contains the Inference based Media Vision API.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_HAND_DETECTION_MODULE
+ * @{
+ */
+
+/**
+ * @internal
+ * @brief Sets user-given model information.
+ * @details Use this function to change the model information instead of default one after calling mv_hand_detection_create().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the hand detection object.
+ * @param[in] model_file Model file name.
+ * @param[in] meta_file Model meta file name.
+ * @param[in] label_file Label file name.
+ * @param[in] model_name Model name.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a hand detection handle by calling mv_hand_detection_create()
+ */
+int mv_hand_detection_set_model(mv_hand_detection_h handle, const char *model_file, const char *meta_file,
+ const char *label_file, const char *model_name);
+
+/**
+ * @internal
+ * @brief Sets user-given inference engine and device types for inference.
+ * @details Use this function to change the inference engine and device types for inference instead of default ones after calling mv_hand_detection_create().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the hand detection object.
+ * @param[in] engine_type A string of inference engine type.
+ * @param[in] device_type A string of device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a hand detection handle by calling mv_hand_detection_create()
+ */
+int mv_hand_detection_set_engine(mv_hand_detection_h handle, const char *engine_type, const char *device_type);
+
+/**
+ * @internal
+ * @brief Gets a number of inference engines available for hand detection task API.
+ * @details Use this function to get how many inference engines are supported for hand detection after calling mv_hand_detection_create().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the hand detection object.
+ * @param[out] engine_count A number of inference engines available for hand detection API.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a hand detection handle by calling mv_hand_detection_create()
+ */
+int mv_hand_detection_get_engine_count(mv_hand_detection_h handle, unsigned int *engine_count);
+
+/**
+ * @internal
+ * @brief Gets engine type to a given inference engine index.
+ * @details Use this function to get inference engine type with a given engine index after calling mv_hand_detection_get_engine_count().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the hand detection object.
+ * @param[in] engine_index A inference engine index for getting the inference engine type.
+ * @param[out] engine_type A string to inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Get a number of inference engines available for hand detection task API by calling mv_hand_detection_get_engine_count()
+ */
+int mv_hand_detection_get_engine_type(mv_hand_detection_h handle, const unsigned int engine_index, char **engine_type);
+
+/**
+ * @internal
+ * @brief Gets a number of device types available to a given inference engine.
+ * @details Use this function to get how many device types are supported for a given inference engine after calling mv_hand_detection_create().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the hand detection object.
+ * @param[in] engine_type A inference engine string.
+ * @param[out] device_count A number of device types available for a given inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a hand detection handle by calling mv_hand_detection_create()
+ */
+int mv_hand_detection_get_device_count(mv_hand_detection_h handle, const char *engine_type, unsigned int *device_count);
+
+/**
+ * @internal
+ * @brief Gets device type list available.
+ * @details Use this function to get what device types are supported for current inference engine type after calling mv_hand_detection_configure().
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the hand detection object.
+ * @param[in] engine_type A inference engine string.
+ * @param[in] device_index A device index for getting the device type.
+ * @param[out] device_type A string to device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a hand detection handle by calling mv_hand_detection_create()
+ * @pre Configure hand detection task by calling mv_hand_detection_configure()
+ */
+int mv_hand_detection_get_device_type(mv_hand_detection_h handle, const char *engine_type,
+ const unsigned int device_index, char **device_type);
+/**
+ * @}
+ */
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_FACE_DETECT_INTERNAL_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_MV_HAND_DETECTION_TYPE_H__
+#define __TIZEN_MEDIAVISION_MV_HAND_DETECTION_TYPE_H__
+
+#include <mv_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file mv_hand_detection_type.h
+ * @brief This file contains the hand detection handle for Mediavision.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_HAND_DETECTION_MODULE
+ * @{
+ */
+
+/**
+ * @brief The hand detection object handle.
+ *
+ * @since_tizen 9.0
+ */
+typedef void *mv_hand_detection_h;
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_MV_HAND_DETECTION_TYPE_H__ */
PATTERN "mv_face_detection_internal.h"
PATTERN "mv_face_detection.h"
PATTERN "mv_face_detection_type.h"
+ PATTERN "mv_hand_detection_internal.h"
+ PATTERN "mv_hand_detection.h"
+ PATTERN "mv_hand_detection_type.h"
)
install(
DIRECTORY ${PROJECT_SOURCE_DIR}/include/ DESTINATION include/media
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANCHORS_H__
+#define __ANCHORS_H__
+
+#include <cmath>
+#include <vector>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+struct Anchor {
+ float x_center, y_center, w, h;
+};
+
+struct SsdAnchorsCalculatorOptions {
+ int input_size_width {};
+ int input_size_height {};
+ float min_scale {};
+ float max_scale {};
+ float anchor_offset_x {};
+ float anchor_offset_y {};
+ int num_layers {};
+ std::vector<int> feature_map_width;
+ std::vector<int> feature_map_height;
+ std::vector<int> strides;
+ std::vector<float> aspect_ratios;
+ bool reduce_boxes_in_lowest_layer { false };
+ float interpolated_scale_aspect_ratio {};
+ bool fixed_anchor_size { false };
+};
+
+float CalculateScale(float min_scale, float max_scale, int stride_index, int num_strides);
+void GenerateAnchors(std::vector<Anchor> &anchors, const SsdAnchorsCalculatorOptions &options);
+}
+}
+
+#endif
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HAND_DETECTION_ADAPTER_H__
+#define __HAND_DETECTION_ADAPTER_H__
+
+#include <dlog.h>
+
+#include "EngineConfig.h"
+#include "ITask.h"
+#include "MobilenetV1Ssd.h"
+#include "MvMlConfig.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class HandDetectionAdapter : public mediavision::common::ITask
+{
+private:
+ std::unique_ptr<IObjectDetection> _object_detection;
+ std::shared_ptr<Config> _config;
+ const std::string _config_file_name = "hand_detection.json";
+ const std::string _plugin_config_file_name = "hand_detection_plugin.json";
+
+ void create(std::string model_name = "");
+ template<typename U> void create(ObjectDetectionTaskType task_type);
+ ObjectDetectionTaskType convertToTaskType(std::string model_name);
+
+public:
+ HandDetectionAdapter();
+ ~HandDetectionAdapter();
+
+ void setModelInfo(const std::string &model_file, const std::string &meta_file, const std::string &label_file,
+ const std::string &model_name) override;
+ void setEngineInfo(const std::string &engine_type, const std::string &device_type) override;
+ void configure() override;
+ unsigned int getNumberOfEngines() override;
+ const std::string &getEngineType(unsigned int engine_index) override;
+ unsigned int getNumberOfDevices(const std::string &engine_type) override;
+ const std::string &getDeviceType(const std::string &engine_type, unsigned int device_index) override;
+ void prepare() override;
+ void perform(InputBaseType &input) override;
+ void performAsync(InputBaseType &input) override;
+ OutputBaseType &getOutput() override;
+ OutputBaseType &getOutputCache() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PALM_DETECTION_H__
+#define __PALM_DETECTION_H__
+
+#include "mv_private.h"
+#include <list>
+#include <memory>
+#include <mv_common.h>
+#include <string>
+
+#include "Anchors.h"
+#include "ObjectDetection.h"
+#include <mv_inference_type.h>
+
+#define MAX_PALM_NUM 4
+
+namespace mediavision
+{
+namespace machine_learning
+{
+struct fvec2 {
+ float x {};
+ float y {};
+};
+
+struct f_rect {
+ fvec2 topleft;
+ fvec2 btmright;
+};
+
+struct Palm {
+ // model outputs after decoding
+ float hand_cx {};
+ float hand_cy {};
+ float hand_w {};
+ float hand_h {};
+ fvec2 keys[7];
+
+ // model outputs
+ float score {};
+
+ // palm rectangle
+ f_rect rect;
+
+ // hole hand rectangle
+ float rotation {};
+ fvec2 hand_pos[4];
+};
+
+struct PalmDetectionResult {
+ int num {};
+ Palm palms[MAX_PALM_NUM];
+};
+
+template<typename T> class PalmDetection : public ObjectDetection<T>
+{
+ using ObjectDetection<T>::_config;
+ using ObjectDetection<T>::_preprocess;
+ using ObjectDetection<T>::_labels;
+ using ObjectDetection<T>::_inference;
+
+private:
+ ObjectDetectionResult _result;
+ std::vector<Anchor> _anchors;
+ float _confThreshold = 0.3;
+ float _nmsThreshold = 0.4;
+
+ // NonMaxSuppression
+ float CalcIntersectionOverUnion(f_rect &rect0, f_rect &rect1);
+ static bool Compare(Palm &v1, Palm &v2);
+ int NonMaxSuppression(std::list<Palm> &face_list, std::list<Palm> &face_sel_list);
+
+ // Expand palm to hand
+ float NormalizeRadians(float angle);
+ void ComputeRotation(Palm &palm);
+ void RotVec(fvec2 &vec, float rotation);
+ void ComputeHandRect(Palm &palm);
+ void PackPalmResult(PalmDetectionResult *palm_result, std::list<Palm> &palm_list);
+
+ // Decode palm detection result
+ void DecodeKeypoints(std::list<Palm> &palm_list);
+
+public:
+ PalmDetection(ObjectDetectionTaskType task_type, std::shared_ptr<Config> config);
+ virtual ~PalmDetection() = default;
+
+ ObjectDetectionResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
MOBILENET_V2_SSD,
FD_MOBILENET_V1_SSD,
OD_TRIV2,
- FD_TRIV2
+ FD_TRIV2,
+ HD_PALM
// TODO
};
--- /dev/null
+{
+ "attributes":
+ [
+ {
+ "name" : "MODEL_DEFAULT_PATH",
+ "type" : "string",
+ "value" : "/opt/usr/globalapps/mediavision.object.detection/models/tflite/"
+ },
+ {
+ "name" : "MODEL_FILE_NAME",
+ "type" : "string",
+ "value" : "palm_detection_full.tflite"
+ },
+ {
+ "name" : "DEFAULT_MODEL_NAME",
+ "type" : "string",
+ "value" : "HD_PALM"
+ },
+ {
+ "name" : "MODEL_META_FILE_NAME",
+ "type" : "string",
+ "value" : "palm_detection_full.json"
+ },
+ {
+ "name" : "MODEL_LABEL_FILE_NAME",
+ "type" : "string",
+ "value" : "fd_mobilenet_v1_ssd_postop_label.txt"
+ },
+ {
+ "name" : "BACKEND_TYPE",
+ "type" : "integer",
+ "value" : 1
+ },
+ {
+ "name" : "TARGET_DEVICE_TYPE",
+ "type" : "integer",
+ "value" : 1
+ }
+ ]
+}
--- /dev/null
+{
+ "attributes":
+ [
+ {
+ "name" : "PLUGIN_NAME",
+ "type" : "string",
+ "value" : "libobject_detection_plugin.so"
+ },
+ {
+ "name" : "DEFAULT_MODEL_NAME",
+ "type" : "string",
+ "value" : "FD_TRIV2"
+ },
+ {
+ "name" : "USE_PLUGIN",
+ "type" : "boolean",
+ "value" : false
+ }
+ ]
+}
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Anchors.h"
+
+using namespace std;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+float CalculateScale(float min_scale, float max_scale, int stride_index, int num_strides)
+{
+ return min_scale + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
+}
+
+void GenerateAnchors(vector<Anchor> &anchors, const SsdAnchorsCalculatorOptions &options)
+{
+ int layer_id = 0;
+
+ while (layer_id < static_cast<int>(options.strides.size())) {
+ vector<float> anchor_height;
+ vector<float> anchor_width;
+ vector<float> aspect_ratios;
+ vector<float> scales;
+ // For same strides, we merge the anchors in the same order.
+ int last_same_stride_layer = layer_id;
+
+ while (last_same_stride_layer < (int) options.strides.size() &&
+ options.strides[last_same_stride_layer] == options.strides[layer_id]) {
+ const float scale = CalculateScale(options.min_scale, options.max_scale, last_same_stride_layer,
+ options.strides.size());
+
+ if (last_same_stride_layer == 0 && options.reduce_boxes_in_lowest_layer) {
+ // For first layer, it can be specified to use predefined anchors.
+ aspect_ratios.push_back(1.0);
+ aspect_ratios.push_back(2.0);
+ aspect_ratios.push_back(0.5);
+ scales.push_back(0.1);
+ scales.push_back(scale);
+ scales.push_back(scale);
+ } else {
+ for (int aspect_ratio_id = 0; aspect_ratio_id < (int) options.aspect_ratios.size(); ++aspect_ratio_id) {
+ aspect_ratios.push_back(options.aspect_ratios[aspect_ratio_id]);
+ scales.push_back(scale);
+ }
+
+ if (options.interpolated_scale_aspect_ratio > 0.0) {
+ const float scale_next = last_same_stride_layer == (int) options.strides.size() - 1 ?
+ 1.0f :
+ CalculateScale(options.min_scale, options.max_scale,
+ last_same_stride_layer + 1, options.strides.size());
+ scales.push_back(sqrt(scale * scale_next));
+ aspect_ratios.push_back(options.interpolated_scale_aspect_ratio);
+ }
+ }
+
+ last_same_stride_layer++;
+ }
+
+ for (int i = 0; i < (int) aspect_ratios.size(); ++i) {
+ const float ratio_sqrts = sqrt(aspect_ratios[i]);
+
+ anchor_height.push_back(scales[i] / ratio_sqrts);
+ anchor_width.push_back(scales[i] * ratio_sqrts);
+ }
+
+ int feature_map_height = 0;
+ int feature_map_width = 0;
+
+ if (options.feature_map_height.size()) {
+ feature_map_height = options.feature_map_height[layer_id];
+ feature_map_width = options.feature_map_width[layer_id];
+ } else {
+ const int stride = options.strides[layer_id];
+ feature_map_height = ceil(1.0f * options.input_size_height / stride);
+ feature_map_width = ceil(1.0f * options.input_size_width / stride);
+ }
+
+ for (int y = 0; y < feature_map_height; ++y) {
+ for (int x = 0; x < feature_map_width; ++x) {
+ for (int anchor_id = 0; anchor_id < (int) anchor_height.size(); ++anchor_id) {
+ // TODO: Support specifying anchor_offset_x, anchor_offset_y.
+ const float x_center = (x + options.anchor_offset_x) * 1.0f / feature_map_width;
+ const float y_center = (y + options.anchor_offset_y) * 1.0f / feature_map_height;
+ Anchor new_anchor;
+
+ new_anchor.x_center = x_center;
+ new_anchor.y_center = y_center;
+
+ if (options.fixed_anchor_size) {
+ new_anchor.w = 1.0f;
+ new_anchor.h = 1.0f;
+ } else {
+ new_anchor.w = anchor_width[anchor_id];
+ new_anchor.h = anchor_height[anchor_id];
+ }
+
+ anchors.push_back(new_anchor);
+ }
+ }
+ }
+
+ layer_id = last_same_stride_layer;
+ }
+}
+
+}
+}
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HandDetectionAdapter.h"
+#include "MvMlException.h"
+#include "ObjectDetectionExternal.h"
+#include "PalmDetection.h"
+#include "mv_object_detection_config.h"
+
+using namespace std;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+HandDetectionAdapter::HandDetectionAdapter()
+{
+ _config = make_shared<Config>();
+
+ // If the model type needs external plugin then bypass to load the meta file and just create the external plugin.
+ // In this case, external plugin will use its own meta file approach regardless of Mediavision's one.
+ _config->parsePluginConfigFile(_plugin_config_file_name);
+ if (!_config->isPluginUsed())
+ _config->parseConfigFile(_config_file_name);
+
+ create(_config->getDefaultModelName());
+}
+
+HandDetectionAdapter::~HandDetectionAdapter()
+{
+ _object_detection->preDestroy();
+}
+
+template<typename U> void HandDetectionAdapter::create(ObjectDetectionTaskType task_type)
+{
+ switch (task_type) {
+ case ObjectDetectionTaskType::HD_PALM:
+ _object_detection = make_unique<PalmDetection<U> >(task_type, _config);
+ break;
+ default:
+ throw InvalidOperation("Invalid hand detection task type.");
+ }
+ // TODO.
+}
+
+void HandDetectionAdapter::create(string model_name)
+{
+ if (model_name.empty())
+ model_name = _config->getDefaultModelName();
+
+ auto task_type = convertToTaskType(model_name);
+
+ if (_config->isPluginUsed()) {
+ const auto &plugin_name = _config->getPluginFileName();
+
+ _object_detection = make_unique<ObjectDetectionExternal>(task_type, plugin_name.c_str());
+ return;
+ }
+
+ _config->loadMetaFile(make_unique<ObjectDetectionParser>(static_cast<int>(task_type)));
+ mv_inference_data_type_e dataType = _config->getInputMetaMap().begin()->second->dataType;
+
+ switch (dataType) {
+ case MV_INFERENCE_DATA_UINT8:
+ create<unsigned char>(task_type);
+ break;
+ case MV_INFERENCE_DATA_FLOAT32:
+ create<float>(task_type);
+ break;
+ default:
+ throw InvalidOperation("Invalid hand detection data type.");
+ }
+}
+
+ObjectDetectionTaskType HandDetectionAdapter::convertToTaskType(string model_name)
+{
+ if (model_name.empty())
+ throw InvalidParameter("model name is empty.");
+
+ transform(model_name.begin(), model_name.end(), model_name.begin(), ::toupper);
+
+ if (model_name == "HD_PALM")
+ return ObjectDetectionTaskType::HD_PALM;
+ // TODO.
+
+ throw InvalidParameter("Invalid hand detection model name.");
+}
+
+void HandDetectionAdapter::setModelInfo(const string &model_file, const string &meta_file, const string &label_file,
+ const string &model_name)
+{
+ try {
+ _config->setUserModel(model_file, meta_file, label_file);
+ if (!model_name.empty())
+ create(model_name);
+ } catch (const BaseException &e) {
+ LOGW("A given model name is invalid so default task type will be used.");
+ }
+
+ if (model_file.empty() && meta_file.empty()) {
+ LOGW("Given model info is invalid so default model info will be used instead.");
+ return;
+ }
+}
+
+void HandDetectionAdapter::setEngineInfo(const string &engine_type, const string &device_type)
+{
+ _object_detection->setEngineInfo(string(engine_type), string(device_type));
+}
+
+void HandDetectionAdapter::configure()
+{
+ _object_detection->configure();
+}
+
+unsigned int HandDetectionAdapter::getNumberOfEngines()
+{
+ return _object_detection->getNumberOfEngines();
+}
+
+const string &HandDetectionAdapter::getEngineType(unsigned int engine_index)
+{
+ return _object_detection->getEngineType(engine_index);
+}
+
+unsigned int HandDetectionAdapter::getNumberOfDevices(const string &engine_type)
+{
+ return _object_detection->getNumberOfDevices(engine_type);
+}
+
+const string &HandDetectionAdapter::getDeviceType(const string &engine_type, unsigned int device_index)
+{
+ return _object_detection->getDeviceType(engine_type, device_index);
+}
+
+void HandDetectionAdapter::prepare()
+{
+ _object_detection->prepare();
+}
+
+void HandDetectionAdapter::perform(InputBaseType &input)
+{
+ _object_detection->perform(input.inference_src);
+}
+
+void HandDetectionAdapter::performAsync(InputBaseType &input)
+{
+ _object_detection->performAsync(static_cast<ObjectDetectionInput &>(input));
+}
+
+OutputBaseType &HandDetectionAdapter::getOutput()
+{
+ return _object_detection->getOutput();
+}
+
+OutputBaseType &HandDetectionAdapter::getOutputCache()
+{
+ return _object_detection->getOutputCache();
+}
+
+}
+}
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <list>
+#include <map>
+#include <string.h>
+
+#include "Anchors.h"
+#include "MvMlException.h"
+#include "PalmDetection.h"
+#include "Postprocess.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T>
+PalmDetection<T>::PalmDetection(ObjectDetectionTaskType task_type, shared_ptr<Config> config)
+ : ObjectDetection<T>(task_type, config), _result()
+{
+ // TODO. use meta file instead of using fixed values later
+ SsdAnchorsCalculatorOptions anchor_options { .input_size_width = 192,
+ .input_size_height = 192,
+ .min_scale = 0.1484375,
+ .max_scale = 0.75,
+ .anchor_offset_x = 0.5f,
+ .anchor_offset_y = 0.5f,
+ .num_layers = 4,
+ .reduce_boxes_in_lowest_layer = false,
+ .interpolated_scale_aspect_ratio = 1.0,
+ .fixed_anchor_size = true };
+
+ anchor_options.strides.push_back(8);
+ anchor_options.strides.push_back(16);
+ anchor_options.strides.push_back(16);
+ anchor_options.strides.push_back(16);
+ anchor_options.aspect_ratios.push_back(1.0);
+
+ GenerateAnchors(_anchors, anchor_options);
+}
+
+template<typename T> void PalmDetection<T>::DecodeKeypoints(list<Palm> &palm_list)
+{
+ Palm palm_item;
+ int tensorIdx = 0;
+ float tensorWidth = static_cast<float>(_inference->getInputWidth());
+ float tensorHeight = static_cast<float>(_inference->getInputHeight());
+
+ vector<string> names;
+ ObjectDetection<T>::getOutputNames(names);
+
+ vector<float> bbox_tensor;
+ ObjectDetection<T>::getOutputTensor(names[0], bbox_tensor);
+
+ vector<float> prob_tensor;
+ ObjectDetection<T>::getOutputTensor(names[1], prob_tensor);
+
+ for (auto itr = _anchors.begin(); itr != _anchors.end(); tensorIdx++, itr++) {
+ Anchor anchor = *itr;
+ float score0 = prob_tensor[tensorIdx];
+ float score = 1.0f / (1.0f + exp(-score0));
+ if (score > _confThreshold) {
+ float *p = bbox_tensor.data() + (tensorIdx * 18);
+
+ /* boundary box */
+ float sx = p[0];
+ float sy = p[1];
+ float w = p[2];
+ float h = p[3];
+
+ float cx = sx + anchor.x_center * tensorWidth;
+ float cy = sy + anchor.y_center * tensorHeight;
+
+ cx /= tensorWidth;
+ cy /= tensorHeight;
+ w /= tensorWidth;
+ h /= tensorHeight;
+
+ fvec2 topleft, btmright;
+ topleft.x = cx - w * 0.5f;
+ topleft.y = cy - h * 0.5f;
+ btmright.x = cx + w * 0.5f;
+ btmright.y = cy + h * 0.5f;
+
+ palm_item.score = score;
+ palm_item.rect.topleft = topleft;
+ palm_item.rect.btmright = btmright;
+
+ /* landmark positions (7 keys) */
+ for (int keyIdx = 0; keyIdx < 7; keyIdx++) {
+ float lx = p[4 + (2 * keyIdx) + 0];
+ float ly = p[4 + (2 * keyIdx) + 1];
+
+ lx += anchor.x_center * tensorWidth;
+ ly += anchor.y_center * tensorHeight;
+ lx /= tensorWidth;
+ ly /= tensorHeight;
+
+ palm_item.keys[keyIdx].x = lx;
+ palm_item.keys[keyIdx].y = ly;
+ }
+
+ palm_list.push_back(palm_item);
+ }
+ }
+}
+
+template<typename T> float PalmDetection<T>::CalcIntersectionOverUnion(f_rect &rect0, f_rect &rect1)
+{
+ float sx0 = rect0.topleft.x;
+ float sy0 = rect0.topleft.y;
+ float ex0 = rect0.btmright.x;
+ float ey0 = rect0.btmright.y;
+ float sx1 = rect1.topleft.x;
+ float sy1 = rect1.topleft.y;
+ float ex1 = rect1.btmright.x;
+ float ey1 = rect1.btmright.y;
+
+ float xmin0 = min(sx0, ex0);
+ float ymin0 = min(sy0, ey0);
+ float xmax0 = max(sx0, ex0);
+ float ymax0 = max(sy0, ey0);
+ float xmin1 = min(sx1, ex1);
+ float ymin1 = min(sy1, ey1);
+ float xmax1 = max(sx1, ex1);
+ float ymax1 = max(sy1, ey1);
+
+ float area0 = (ymax0 - ymin0) * (xmax0 - xmin0);
+ float area1 = (ymax1 - ymin1) * (xmax1 - xmin1);
+ if (area0 <= 0 || area1 <= 0)
+ return 0.0f;
+
+ float intersect_xmin = max(xmin0, xmin1);
+ float intersect_ymin = max(ymin0, ymin1);
+ float intersect_xmax = min(xmax0, xmax1);
+ float intersect_ymax = min(ymax0, ymax1);
+
+ float intersect_area = max(intersect_ymax - intersect_ymin, 0.0f) * max(intersect_xmax - intersect_xmin, 0.0f);
+
+ return intersect_area / (area0 + area1 - intersect_area);
+}
+
+template<typename T> bool PalmDetection<T>::Compare(Palm &v1, Palm &v2)
+{
+ return (v1.score > v2.score);
+}
+
+template<typename T> int PalmDetection<T>::NonMaxSuppression(list<Palm> &face_list, list<Palm> &face_sel_list)
+{
+ face_list.sort(Compare);
+
+ for (auto itr = face_list.begin(); itr != face_list.end(); itr++) {
+ Palm face_candidate = *itr;
+ int ignore_candidate = false;
+
+ for (auto itr_sel = face_sel_list.rbegin(); itr_sel != face_sel_list.rend(); itr_sel++) {
+ Palm face_sel = *itr_sel;
+ float iou = CalcIntersectionOverUnion(face_candidate.rect, face_sel.rect);
+
+ if (iou >= _nmsThreshold) {
+ ignore_candidate = true;
+ break;
+ }
+ }
+
+ if (!ignore_candidate) {
+ face_sel_list.push_back(face_candidate);
+
+ if (face_sel_list.size() >= MAX_PALM_NUM)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+template<typename T> float PalmDetection<T>::NormalizeRadians(float angle)
+{
+ return angle - 2 * M_PI * floor((angle - (-M_PI)) / (2 * M_PI));
+}
+
+template<typename T> void PalmDetection<T>::ComputeRotation(Palm &palm)
+{
+ float x0 = palm.keys[0].x; // Center of wrist.
+ float y0 = palm.keys[0].y;
+ float x1 = palm.keys[2].x; // MCP of middle finger.
+ float y1 = palm.keys[2].y;
+
+ float target_angle = M_PI * 0.5f;
+ float rotation = target_angle - atan2(-(y1 - y0), x1 - x0);
+
+ palm.rotation = NormalizeRadians(rotation);
+}
+
+template<typename T> void PalmDetection<T>::RotVec(fvec2 &vec, float rotation)
+{
+ float sx = vec.x;
+ float sy = vec.y;
+
+ vec.x = sx * cos(rotation) - sy * sin(rotation);
+ vec.y = sx * sin(rotation) + sy * cos(rotation);
+}
+
+template<typename T> void PalmDetection<T>::ComputeHandRect(Palm &palm)
+{
+ float width = palm.rect.btmright.x - palm.rect.topleft.x;
+ float height = palm.rect.btmright.y - palm.rect.topleft.y;
+ float palm_cx = palm.rect.topleft.x + width * 0.5f;
+ float palm_cy = palm.rect.topleft.y + height * 0.5f;
+ float hand_cx;
+ float hand_cy;
+ float rotation = palm.rotation;
+ float shift_x = 0.0f;
+ float shift_y = -0.5f;
+
+ if (rotation == 0.0f) {
+ hand_cx = palm_cx + (width * shift_x);
+ hand_cy = palm_cy + (height * shift_y);
+ } else {
+ float dx = (width * shift_x) * cos(rotation) - (height * shift_y) * sin(rotation);
+
+ float dy = (width * shift_x) * sin(rotation) + (height * shift_y) * cos(rotation);
+
+ hand_cx = palm_cx + dx;
+ hand_cy = palm_cy + dy;
+ }
+
+ // make the crop rectangle
+ float long_side = max(width, height);
+ width = long_side;
+ height = long_side;
+
+ float hand_w = width * 2.0f;
+ float hand_h = height * 2.0f;
+
+ palm.hand_cx = hand_cx;
+ palm.hand_cy = hand_cy;
+ palm.hand_w = hand_w;
+ palm.hand_h = hand_h;
+
+ float dx = hand_w * 0.5f;
+ float dy = hand_h * 0.5f;
+
+ palm.hand_pos[0].x = -dx;
+ palm.hand_pos[0].y = -dy;
+ palm.hand_pos[1].x = +dx;
+ palm.hand_pos[1].y = -dy;
+ palm.hand_pos[2].x = +dx;
+ palm.hand_pos[2].y = +dy;
+ palm.hand_pos[3].x = -dx;
+ palm.hand_pos[3].y = +dy;
+
+ for (int posIdx = 0; posIdx < 4; posIdx++) {
+ RotVec(palm.hand_pos[posIdx], rotation);
+ palm.hand_pos[posIdx].x += hand_cx;
+ palm.hand_pos[posIdx].y += hand_cy;
+ }
+}
+
+template<typename T> void PalmDetection<T>::PackPalmResult(PalmDetectionResult *palm_result, list<Palm> &palm_list)
+{
+ int num_palms = 0;
+
+ for (auto itr = palm_list.begin(); itr != palm_list.end(); itr++) {
+ Palm palm = *itr;
+
+ ComputeRotation(palm);
+ ComputeHandRect(palm);
+
+ memcpy(&palm_result->palms[num_palms++], &palm, sizeof(palm));
+ palm_result->num = num_palms;
+
+ if (num_palms >= MAX_PALM_NUM)
+ break;
+ }
+}
+
+template<typename T> ObjectDetectionResult &PalmDetection<T>::result()
+{
+ // Clear _result object because result() function can be called every time user wants
+ // so make sure to clear existing result data before getting the data again.
+ _result = ObjectDetectionResult();
+
+ list<Palm> palm;
+ DecodeKeypoints(palm);
+
+ list<Palm> palmNms;
+ NonMaxSuppression(palm, palmNms);
+
+ PalmDetectionResult palmResult;
+ PackPalmResult(&palmResult, palmNms);
+
+ float img_width = static_cast<float>(_preprocess.getImageWidth()[0]);
+ float img_height = static_cast<float>(_preprocess.getImageHeight()[0]);
+
+ _result.number_of_objects = 0;
+
+ for (auto palm : palmResult.palms) {
+ float min_x = static_cast<float>(img_width);
+ float max_x = 0.0f;
+ float min_y = static_cast<float>(img_height);
+ float max_y = 0.0f;
+
+ if (palm.score < 0.5)
+ continue;
+
+ for (unsigned int idx = 0; idx < 4; ++idx) {
+ min_x = min(palm.hand_pos[idx].x, min_x);
+ max_x = max(palm.hand_pos[idx].x, max_x);
+ min_y = min(palm.hand_pos[idx].y, min_y);
+ max_y = max(palm.hand_pos[idx].y, max_y);
+ }
+
+ float left = min_x * img_width < 0.0f ? 0.0f : min_x * img_width;
+ float right = max_x * img_width >= img_width ? img_width - 1.0f : max_x * img_width;
+ float top = min_y * img_height < 0 ? 0 : min_y * img_height;
+ float bottom = max_y * img_height >= img_height ? img_height - 1.0f : max_y * img_height;
+
+ _result.left.push_back(static_cast<int>(left));
+ _result.right.push_back(static_cast<int>(right));
+ _result.top.push_back(static_cast<int>(top));
+ _result.bottom.push_back(static_cast<int>(bottom));
+ _result.number_of_objects++;
+ }
+
+ return _result;
+}
+
+template class PalmDetection<float>;
+template class PalmDetection<unsigned char>;
+
+}
+}
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_hand_detection.h"
+#include "Context.h"
+#include "HandDetectionAdapter.h"
+#include "ITask.h"
+#include "MvMlException.h"
+#include "mv_feature_key.h"
+#include "mv_hand_detection_internal.h"
+#include "mv_private.h"
+#include "native_capi.h"
+#include "object_detection_type.h"
+
+#include <algorithm>
+#include <iostream>
+#include <mutex>
+#include <new>
+#include <string>
+#include <unistd.h>
+
+#define TASK_NAME "hand_detection"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::common;
+using namespace mediavision::machine_learning;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning::exception;
+
+static const char *feature_keys[] = { "http://tizen.org/feature/vision.inference",
+ "http://tizen.org/feature/vision.inference.face" };
+static const size_t num_keys = sizeof(feature_keys) / sizeof(char *);
+
+int mv_hand_detection_create(mv_hand_detection_h *handle)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+ MEDIA_VISION_NULL_ARG_CHECK(handle);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ mv_hand_detection_h ctx = nullptr;
+
+ try {
+ ctx = machine_learning_native_create();
+ machine_learning_native_add(ctx, TASK_NAME, new HandDetectionAdapter());
+ } catch (const BaseException &e) {
+ return e.getError();
+ } catch (const std::exception &e) {
+ LOGE("%s", e.what());
+ return MEDIA_VISION_ERROR_INTERNAL;
+ }
+
+ *handle = ctx;
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_destroy(mv_hand_detection_h handle)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ machine_learning_native_destroy(handle);
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_set_model(mv_hand_detection_h handle, const char *model_file, const char *meta_file,
+ const char *label_file, const char *model_name)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+ MEDIA_VISION_NULL_ARG_CHECK(model_file);
+ MEDIA_VISION_NULL_ARG_CHECK(meta_file);
+ MEDIA_VISION_NULL_ARG_CHECK(label_file);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ machine_learning_native_set_model(handle, TASK_NAME, model_file, meta_file, label_file, model_name);
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_set_engine(mv_hand_detection_h handle, const char *backend_type, const char *device_type)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+ MEDIA_VISION_NULL_ARG_CHECK(backend_type);
+ MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ machine_learning_native_set_engine(handle, TASK_NAME, backend_type, device_type);
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_engine_count(mv_hand_detection_h handle, unsigned int *engine_count)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+ MEDIA_VISION_NULL_ARG_CHECK(engine_count);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ machine_learning_native_get_engine_count(handle, TASK_NAME, engine_count);
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_engine_type(mv_hand_detection_h handle, const unsigned int engine_index, char **engine_type)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+ MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ machine_learning_native_get_engine_type(handle, TASK_NAME, engine_index, engine_type);
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_device_count(mv_hand_detection_h handle, const char *engine_type, unsigned int *device_count)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+ MEDIA_VISION_NULL_ARG_CHECK(device_count);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ machine_learning_native_get_device_count(handle, TASK_NAME, engine_type, device_count);
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_device_type(mv_hand_detection_h handle, const char *engine_type,
+ const unsigned int device_index, char **device_type)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+ MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+ MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ machine_learning_native_get_device_type(handle, TASK_NAME, engine_type, device_index, device_type);
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_configure(mv_hand_detection_h handle)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ machine_learning_native_configure(handle, TASK_NAME);
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_prepare(mv_hand_detection_h handle)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ machine_learning_native_prepare(handle, TASK_NAME);
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_inference(mv_hand_detection_h handle, mv_source_h source)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+ MEDIA_VISION_INSTANCE_CHECK(source);
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ ObjectDetectionInput input(source);
+
+ machine_learning_native_inference(handle, TASK_NAME, input);
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_inference_async(mv_hand_detection_h handle, mv_source_h source)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+ MEDIA_VISION_INSTANCE_CHECK(source);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ ObjectDetectionInput input(source);
+
+ machine_learning_native_inference_async(handle, TASK_NAME, input);
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ LOGD("LEAVE");
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_result_count(mv_hand_detection_h handle, unsigned long *frame_number,
+ unsigned int *result_cnt)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+ MEDIA_VISION_INSTANCE_CHECK(frame_number);
+ MEDIA_VISION_INSTANCE_CHECK(result_cnt);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ auto &result = static_cast<ObjectDetectionResult &>(machine_learning_native_get_result(handle, TASK_NAME));
+
+ *frame_number = result.frame_number;
+ *result_cnt = result.number_of_objects;
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_hand_detection_get_bound_box(mv_hand_detection_h handle, unsigned int index, int *left, int *top, int *right,
+ int *bottom)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+ MEDIA_VISION_INSTANCE_CHECK(handle);
+ MEDIA_VISION_INSTANCE_CHECK(left);
+ MEDIA_VISION_INSTANCE_CHECK(top);
+ MEDIA_VISION_INSTANCE_CHECK(right);
+ MEDIA_VISION_INSTANCE_CHECK(bottom);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ auto &result =
+ static_cast<ObjectDetectionResult &>(machine_learning_native_get_result_cache(handle, TASK_NAME));
+ if (index >= result.number_of_objects) {
+ LOGE("Invalid index(index = %u, result count = %u).", index, result.number_of_objects);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
+ *left = result.left[index];
+ *top = result.top[index];
+ *right = result.right[index];
+ *bottom = result.bottom[index];
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return MEDIA_VISION_ERROR_NONE;
+}
\ No newline at end of file
%{_datadir}/%{name}/object_detection_plugin.json
%{_datadir}/%{name}/face_detection.json
%{_datadir}/%{name}/face_detection_plugin.json
+%{_datadir}/%{name}/hand_detection.json
+%{_datadir}/%{name}/hand_detection_plugin.json
%{_libdir}/libmv_object_detection.so
%endif
%if "%{enable_ml_object_detection_3d}" == "1"
%{_includedir}/media/IObjectDetection.h
%{_includedir}/media/object_detection_type.h
%{_libdir}/pkgconfig/*object-detection.pc
+%{_includedir}/media/mv_hand_detection_internal.h
+%{_includedir}/media/mv_hand_detection.h
+%{_includedir}/media/mv_hand_detection_type.h
+
%endif
%if "%{enable_ml_object_detection_3d}" == "1"
%{_includedir}/media/mv_object_detection_3d_internal.h