From 1828addc1633c3bedba9e2749baf9a12d8913bd0 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Mon, 19 Sep 2022 11:32:37 +0900 Subject: [PATCH] mv_machine_learning: add object detection 3d API [Version] : 0.24.0-0 [Issue type] : new feature Add Objectron model based object detection 3d API. What this patch does, - Implement Objectron model[1] based object detection 3d framework. - Add object_detection directory in mv_machine_learning, which incluses object detection relevant files. - Add create interface to itask class, which creates model specific class object. - Implement Object detection and its based Objectron class for object detection 3d inference. - Implement itask interface class based object detection adapter class. - Implement object detection 3d API. [1] https://arxiv.org/abs/2003.03522 Change-Id: I3f4f9d53a305bd45d83cb330757bc6d5f83396b8 Signed-off-by: Inki Dae --- CMakeLists.txt | 12 ++ include/mv_object_detection_3d.h | 168 +++++++++++++++++ include/mv_object_detection_3d_type.h | 77 ++++++++ mv_machine_learning/CMakeLists.txt | 1 + mv_machine_learning/common/include/itask.h | 1 + .../include/face_recognition_adapter.h | 1 + .../src/face_recognition_adapter.cpp | 5 + mv_machine_learning/inference/include/BoxInfo.h | 5 +- mv_machine_learning/inference/include/Inference.h | 20 ++ .../inference/include/OutputMetadataTypes.h | 3 +- .../inference/src/OutputMetadata.cpp | 7 +- .../object_detection/CMakeLists.txt | 26 +++ .../include/mv_object_detection_3d_config.h | 57 ++++++ .../include/mv_object_detection_3d_open.h | 147 +++++++++++++++ .../object_detection/include/object_detection.h | 56 ++++++ .../include/object_detection_adapter.h | 54 ++++++ .../include/object_detection_type.h | 54 ++++++ .../object_detection/include/objectron.h | 45 +++++ .../object_detection/meta/object_detection_3d.json | 30 +++ .../object_detection/src/mv_object_detection_3d.c | 125 +++++++++++++ .../src/mv_object_detection_3d_open.cpp | 201 +++++++++++++++++++++ .../object_detection/src/object_detection.cpp | 79 ++++++++ .../src/object_detection_adapter.cpp | 90 +++++++++ .../object_detection/src/objectron.cpp | 140 ++++++++++++++ packaging/capi-media-vision.spec | 9 +- test/testsuites/machine_learning/CMakeLists.txt | 1 + .../object_detection/CMakeLists.txt | 14 ++ .../object_detection/test_object_detection_3d.cpp | 79 ++++++++ 28 files changed, 1501 insertions(+), 6 deletions(-) create mode 100644 include/mv_object_detection_3d.h create mode 100644 include/mv_object_detection_3d_type.h create mode 100644 mv_machine_learning/object_detection/CMakeLists.txt create mode 100644 mv_machine_learning/object_detection/include/mv_object_detection_3d_config.h create mode 100644 mv_machine_learning/object_detection/include/mv_object_detection_3d_open.h create mode 100644 mv_machine_learning/object_detection/include/object_detection.h create mode 100644 mv_machine_learning/object_detection/include/object_detection_adapter.h create mode 100644 mv_machine_learning/object_detection/include/object_detection_type.h create mode 100644 mv_machine_learning/object_detection/include/objectron.h create mode 100644 mv_machine_learning/object_detection/meta/object_detection_3d.json create mode 100644 mv_machine_learning/object_detection/src/mv_object_detection_3d.c create mode 100644 mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp create mode 100644 mv_machine_learning/object_detection/src/object_detection.cpp create mode 100644 mv_machine_learning/object_detection/src/object_detection_adapter.cpp create mode 100644 mv_machine_learning/object_detection/src/objectron.cpp create mode 100644 test/testsuites/machine_learning/object_detection/CMakeLists.txt create mode 100644 test/testsuites/machine_learning/object_detection/test_object_detection_3d.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fab7f73..bc87c9d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,8 @@ set(MV_3D_LIB_NAME "mv_3d" CACHE STRING "Name of the library will be built for 3d module (without extension).") set(MV_ROI_TRACKER_LIB_NAME "mv_roi_tracker" CACHE STRING "Name of the library will be built for tracker module (without extension).") +set(MV_OBJECT_DETECTION_LIB_NAME "mv_object_detection" CACHE STRING + "Name of the library will be built for object detection module (without extension).") include(FindPkgConfig) include(GNUInstallDirs) @@ -158,6 +160,16 @@ configure_file( ) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-inference.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig) +set(PC_NAME ${fw_name}-object-detection) +set(PC_LDFLAGS "-l${MV_OBJECT_DETECTION_LIB_NAME} -l${MV_COMMON_LIB_NAME}") +configure_file( + ${fw_name}.pc.in + ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-object-detection.pc + @ONLY +) +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-object-detection.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig) +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/object_detection_3d.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name}) + if (${ENABLE_ML_FACE_RECOGNITION}) set(PC_NAME ${fw_name}-training) set(PC_LDFLAGS "-l${MV_TRAINING_LIB_NAME} -l${MV_COMMON_LIB_NAME}") diff --git a/include/mv_object_detection_3d.h b/include/mv_object_detection_3d.h new file mode 100644 index 0000000..8438f6d --- /dev/null +++ b/include/mv_object_detection_3d.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TIZEN_MEDIAVISION_OBJECT_DETECT_3D_H__ +#define __TIZEN_MEDIAVISION_OBJECT_DETECT_3D_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @file mv_object_detection_3d.h + * @brief This file contains the Inference based Media Vision API. + */ + +/** + * @addtogroup CAPI_MEDIA_VISION_INFERENCE_MODULE + * @{ + */ + +/** + * @brief Creates a inference handle for object detection 3d object. + * @details Use this function to create a inference handle. After the creation + * the object detection 3d task has to be prepared with + * mv_object_detection_3d_prepare() function to prepare a network + * for the inference. + * + * @since_tizen 7.0 + * + * @remarks The @a infer should be released using mv_object_detection_3d_destroy(). + * + * @param[out] infer The handle to the inference to be created. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * + * @see mv_object_detection_3d_destroy() + * @see mv_object_detection_3d_prepare() + */ +int mv_object_detection_3d_create(mv_object_detection_3d_h *infer); + +/** + * @brief Destroys inference handle and releases all its resources. + * + * @since_tizen 7.0 + * + * @param[in] infer The handle to the inference to be destroyed. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * + * @pre Create inference handle by using mv_object_detection_3d_create() + * + * @see mv_object_detection_3d_create() + */ +int mv_object_detection_3d_destroy(mv_object_detection_3d_h infer); + +/** + * @brief Configures the backend for the object detection 3d inference. + * + * @since_tizen 7.0 + * + * @param [in] infer The handle to the inference + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ +int mv_object_detection_3d_configure(mv_object_detection_3d_h infer); + +/** + * @brief Prepares the object detection 3d inference + * @details Use this function to prepare the object detection 3d inference based on + * the configured network. + * + * @since_tizen 7.0 + * + * @param[in] infer The handle to the inference. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_PERMISSION_DENIED Permission denied + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Not supported format + */ +int mv_object_detection_3d_prepare(mv_object_detection_3d_h infer); + +/** + * @brief Performs the object detection 3d inference on the @a source. + * + * @since_tizen 7.0 + * @remarks This function is synchronous and may take considerable time to run. + * + * @param[in] source The handle to the source of the media + * @param[in] infer The handle to the inference + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * + * @pre Create a source handle by calling mv_create_source() + * @pre Create an inference handle by calling mv_object_detect_3d_create() + * @pre Prepare an inference by calling mv_object_detect_3d_prepare() + * @post + * + * @see mv_object_detect_3d_result_s structure + */ +int mv_object_detection_3d_inference(mv_object_detection_3d_h infer, mv_source_h source); + +/** + * @brief Gets the object detection 3d result. + * @details Use this function to get the detected result after calling @ref mv_object_detection_3d_inference(). + * + * @since_tizen 7.0 + * + * @remarks The @a result must NOT be released using free() + * + * @param[in] handle The handle to the face recognition object. + * @param[out] out_result The structure point which contains object detection 3d inference result. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Request an inference by calling @ref mv_object_detection_3d_inference() + */ +int mv_object_detection_3d_get_result(mv_object_detection_3d_h handle, mv_object_detection_3d_result_s *out_result); +/** + * @} + */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __TIZEN_MEDIAVISION_OBJECT_DETECT_3D_H__ */ diff --git a/include/mv_object_detection_3d_type.h b/include/mv_object_detection_3d_type.h new file mode 100644 index 0000000..fae4eda --- /dev/null +++ b/include/mv_object_detection_3d_type.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TIZEN_MEDIAVISION_MV_OBJECT_DETECTION_3D_TYPE_H__ +#define __TIZEN_MEDIAVISION_MV_OBJECT_DETECTION_3D_TYPE_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @file mv_object_detection_3d_type.h + * @brief This file contains the face recognition handle for Mediavision. + */ + +/** + * @addtogroup CAPI_MEDIA_VISION_OBJECT_DETECT_MODULE + * @{ + */ + +/** + * @brief The structure to object detection 3d result. + * + * @since_tizen 7.0 + * @remarks @a points should not be released by app. + * The number of elements in @a points is equal to @a number_of_points. + * + * @probability The probability value to the detected object. + * @number_of_points The number of points. + * @points 2d coodinations to keypoints. + * @number_of_edges The number of edges. + * @edge_indexes pair index per a edge to the points array. + * + * @pre Call mv_object_detection_3d_inference() function to perform 3d detection of the objects + * in @a source and to get a result. + * + * @see mv_object_detect_3d_inference() + */ +typedef struct mv_object_detection_3d_result { + unsigned int probability; + unsigned int number_of_points; + mv_point_s points[9]; + unsigned int number_of_edges; + unsigned int edge_indexes[12][2]; +} mv_object_detection_3d_result_s; + +/** + * @brief The object detection 3d object handle. + * + * @since_tizen 7.0 + */ +typedef void *mv_object_detection_3d_h; + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __TIZEN_MEDIAVISION_MV_OBJECT_DETECTION_3D_TYPE_H__ */ diff --git a/mv_machine_learning/CMakeLists.txt b/mv_machine_learning/CMakeLists.txt index a077ce1..02e1678 100644 --- a/mv_machine_learning/CMakeLists.txt +++ b/mv_machine_learning/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(inference) +add_subdirectory(object_detection) if (${ENABLE_ML_FACE_RECOGNITION}) message("Enabled machine learning face recognition feature.") diff --git a/mv_machine_learning/common/include/itask.h b/mv_machine_learning/common/include/itask.h index 1216ccf..940668e 100644 --- a/mv_machine_learning/common/include/itask.h +++ b/mv_machine_learning/common/include/itask.h @@ -26,6 +26,7 @@ template class ITask { public: virtual ~ITask() {}; + virtual void create(int type) = 0; virtual void configure() = 0; virtual void prepare() = 0; virtual void setInput(T &t) = 0; diff --git a/mv_machine_learning/face_recognition/include/face_recognition_adapter.h b/mv_machine_learning/face_recognition/include/face_recognition_adapter.h index 0839b80..d77d938 100644 --- a/mv_machine_learning/face_recognition/include/face_recognition_adapter.h +++ b/mv_machine_learning/face_recognition/include/face_recognition_adapter.h @@ -76,6 +76,7 @@ public: return _config; } + void create(int type) override; void configure() override; void prepare() override; void setInput(T &t) override; diff --git a/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp b/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp index 2fcca3a..7961901 100644 --- a/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp +++ b/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp @@ -36,6 +36,11 @@ template FaceRecognitionAdapter::FaceRecognitionAd template FaceRecognitionAdapter::~FaceRecognitionAdapter() {} +template void FaceRecognitionAdapter::create(int type) +{ + throw InvalidOperation("Not support yet."); +} + template void FaceRecognitionAdapter::configure() { _config = make_unique(string(MV_CONFIG_PATH) + string(FACE_RECOGNITION_META_FILE_NAME)); diff --git a/mv_machine_learning/inference/include/BoxInfo.h b/mv_machine_learning/inference/include/BoxInfo.h index dedc7e3..7813a3d 100644 --- a/mv_machine_learning/inference/include/BoxInfo.h +++ b/mv_machine_learning/inference/include/BoxInfo.h @@ -71,7 +71,8 @@ struct BoxInfo std::map supportedBoxDecodingTypes = { { "BYPASS", INFERENCE_BOX_DECODING_TYPE_BYPASS }, { "SSD_ANCHOR", INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR }, - { "YOLO_ANCHOR", INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR } + { "YOLO_ANCHOR", INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR }, + { "SINGLE_3D", INFERENCE_BOX_DECODING_TYPE_SINGLE_3D} }; ~BoxInfo() = default; @@ -127,4 +128,4 @@ struct BoxInfo } /* Inference */ } /* MediaVision */ -#endif \ No newline at end of file +#endif diff --git a/mv_machine_learning/inference/include/Inference.h b/mv_machine_learning/inference/include/Inference.h index db792c0..1106b70 100644 --- a/mv_machine_learning/inference/include/Inference.h +++ b/mv_machine_learning/inference/include/Inference.h @@ -324,6 +324,26 @@ public: return mOutputTensorBuffers; } + size_t getSourceWidth() + { + return mSourceSize.width; + } + + size_t getSourceHeight() + { + return mSourceSize.height; + } + + size_t getInputWidth() + { + return mInputSize.width; + } + + size_t getInputHeight() + { + return mInputSize.height; + } + private: bool mCanRun = false; /**< The flag indicating ready to run Inference */ InferenceConfig mConfig; diff --git a/mv_machine_learning/inference/include/OutputMetadataTypes.h b/mv_machine_learning/inference/include/OutputMetadataTypes.h index 1ce8c9e..1e8aa2a 100644 --- a/mv_machine_learning/inference/include/OutputMetadataTypes.h +++ b/mv_machine_learning/inference/include/OutputMetadataTypes.h @@ -50,7 +50,8 @@ typedef enum { INFERENCE_BOX_DECODING_TYPE_BYPASS, INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR, - INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR + INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR, + INFERENCE_BOX_DECODING_TYPE_SINGLE_3D } inference_box_decoding_type_e; typedef enum diff --git a/mv_machine_learning/inference/src/OutputMetadata.cpp b/mv_machine_learning/inference/src/OutputMetadata.cpp index 3040fa9..7ac595f 100644 --- a/mv_machine_learning/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/inference/src/OutputMetadata.cpp @@ -100,6 +100,12 @@ int OutputMetadata::Parse(JsonObject *root) } if (!box.GetName().empty()) { + // In case of object detection 3d for single object, other property isn't needed. + if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SINGLE_3D) { + parsed = true; + return MEDIA_VISION_ERROR_NONE; + } + // addtional parsing is required according to decoding type if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { ret = box.ParseLabel(root); @@ -113,7 +119,6 @@ int OutputMetadata::Parse(JsonObject *root) LOGE("Fail to GetNumber[%d]", ret); return ret; } - } else { ret = box.ParseDecodeInfo(root); if (ret != MEDIA_VISION_ERROR_NONE) { diff --git a/mv_machine_learning/object_detection/CMakeLists.txt b/mv_machine_learning/object_detection/CMakeLists.txt new file mode 100644 index 0000000..0d03d03 --- /dev/null +++ b/mv_machine_learning/object_detection/CMakeLists.txt @@ -0,0 +1,26 @@ +project(${MV_OBJECT_DETECTION_LIB_NAME}) +cmake_minimum_required(VERSION 2.6...3.13) + +pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED inference-engine-interface-common iniparser json-glib-1.0) +file(GLOB MV_INFERENCE_SOURCE_LIST "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp") + +find_package(OpenCV REQUIRED dnn imgproc) +if(NOT OpenCV_FOUND) + message(SEND_ERROR "OpenCV NOT FOUND") + return() +endif() + +if(FORCED_STATIC_BUILD) + add_library(${PROJECT_NAME} STATIC ${MV_INFERENCE_SOURCE_LIST}) +else() + add_library(${PROJECT_NAME} SHARED ${MV_INFERENCE_SOURCE_LIST}) +endif() + +IF (${ENABLE_INFERENCE_PROFILER}) + MESSAGE("Enabled Mediavision Inference Engine Profiler.") + ADD_DEFINITIONS(-DENABLE_INFERENCE_PROFILER) +ENDIF() + +target_link_libraries(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} ${OpenCV_LIBS} ${${PROJECT_NAME}_DEP_LIBRARIES} mv_inference) +target_include_directories(${PROJECT_NAME} PRIVATE include ../inference/include ../common/include) +install(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR}) diff --git a/mv_machine_learning/object_detection/include/mv_object_detection_3d_config.h b/mv_machine_learning/object_detection/include/mv_object_detection_3d_config.h new file mode 100644 index 0000000..5722016 --- /dev/null +++ b/mv_machine_learning/object_detection/include/mv_object_detection_3d_config.h @@ -0,0 +1,57 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_OBJECT_DETECTION_3D_CONFIG_H__ +#define __MEDIA_VISION_OBJECT_DETECTION_3D_CONFIG_H__ + +/** + * @brief Defines #MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH + * to set the object detection 3d model file path. + * + * @since_tizen 7.0 + */ +#define MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH "MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH" + +/** + * @brief Defines #MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH to set inference + * models's metadata file attribute of the engine configuration. + * @details The file includes inference model's metadata such as input and output + * node names, input tensor's width and height, + * mean and standard deviation values for pre-processing. + * + * @since_tizen 7.0 + */ +#define MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH "MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH" + +/** + * @brief Defines #MV_OBJECT_DETECT_3D_BACKEND_TYPE + * to set inference backend engine type. In default, tensorflow lite is used. + * + * @since_tizen 7.0 + */ +#define MV_OBJECT_DETECTION_3D_BACKEND_TYPE "MV_OBJECT_DETECTION_3D_BACKEND_TYPE" + +/** + * @brief Defines #MV_OBJECT_DETECT_3D_TARGET_DEVICE_TYPE + * to set inference target device type. In default, CPU device is used. + * + * @since_tizen 7.0 + */ +#define MV_OBJECT_DETECTION_3D_TARGET_DEVICE_TYPE "MV_OBJECT_DETECTION_3D_TARGET_DEVICE_TYPE" + +#define OBJECT_DETECTION_3D_META_FILE_NAME "object_detection_3d.json" + +#endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */ diff --git a/mv_machine_learning/object_detection/include/mv_object_detection_3d_open.h b/mv_machine_learning/object_detection/include/mv_object_detection_3d_open.h new file mode 100644 index 0000000..29a94be --- /dev/null +++ b/mv_machine_learning/object_detection/include/mv_object_detection_3d_open.h @@ -0,0 +1,147 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_OBJECT_DETECTION_3D_OPEN_H__ +#define __MEDIA_VISION_OBJECT_DETECTION_3D_OPEN_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + /** + * @brief Create face recognition object handle. + * @details Use this function to create an face recognition object handle. + * After creation the handle has to be prepared with + * @ref mv_object_detection_3d_prepare_open() function to prepare + * an face recognition object. + * + * @since_tizen 7.0 + * + * @param[out] out_handle The handle to the face recognition object to be created + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * + * @post Release @a handle by using + * @ref mv_object_detection_3d_destroy_open() function when it is not needed + * anymore + * + * @see mv_object_detection_3d_destroy_open() + */ + int mv_object_detection_3d_create_open(mv_object_detection_3d_h *out_handle); + + /** + * @brief Destroy face recognition handle and releases all its resources. + * + * @since_tizen 7.0 + * + * @param[in] handle The handle to the face recognition object to be destroyed. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * + * @pre Create an face recognition handle by using @ref mv_object_detection_3d_create_open() + * + * @see mv_object_detection_3d_create_open() + */ + int mv_object_detection_3d_destroy_open(mv_object_detection_3d_h handle); + + /** + * @brief Configure the backend to the inference handle + * + * @since_tizen 7.0 + * + * @param [in] handle The handle to the inference + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ + int mv_object_detection_3d_configure_open(mv_object_detection_3d_h handle); + + /** + * @brief Prepare inference. + * @details Use this function to prepare inference based on + * the configured network. + * + * @since_tizen 7.0 + * + * @param [in] handle The handle to the inference + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + */ + int mv_object_detection_3d_prepare_open(mv_object_detection_3d_h handle); + + /** + * + * @brief Inference with a given face on the @a source + * @details Use this function to inference with a given source. + * + * + * @since_tizen 7.0 + * + * @param[in] handle The handle to the face recognition object. + * @param[in] source The handle to the source of the media. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * + * @pre Create a source handle by calling @ref mv_create_source() + * @pre Create an face recognition handle by calling @ref mv_object_detection_3d_create_open() + * @pre Prepare an face recognition by calling @ref mv_object_detection_3d_prepare_open() + * @pre Register a new face by calling @ref mv_object_detection_3d_register_open() + */ + int mv_object_detection_3d_inference_open(mv_object_detection_3d_h handle, mv_source_h source); + + /** + * @brief Get a label name and store it to @a out_label. + * @details Use this function to get a label name after calling mv_object_detection_3d_inference_open function. + * + * @since_tizen 7.0 + * + * @param[in] handle The handle to the object detection 3d object. + * @param[out] + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Request a inference by calling @ref mv_object_detection_3d_inference_open() + */ + int mv_object_detection_3d_get_result_open(mv_object_detection_3d_h handle, mv_object_detection_3d_result_s *out_result); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */ diff --git a/mv_machine_learning/object_detection/include/object_detection.h b/mv_machine_learning/object_detection/include/object_detection.h new file mode 100644 index 0000000..8ad9cda --- /dev/null +++ b/mv_machine_learning/object_detection/include/object_detection.h @@ -0,0 +1,56 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OBJECT_DETECTION_H__ +#define __OBJECT_DETECTION_H__ + +#include +#include + +#include "EngineConfig.h" +#include "inference_engine_common_impl.h" +#include "Inference.h" +#include "object_detection_type.h" + +namespace mediavision +{ +namespace machine_learning +{ + +class ObjectDetection +{ +protected: + std::unique_ptr _inference; + std::unique_ptr _config; + std::string _model_file_path; + std::string _model_meta_file_path; + int _backend_type; + int _target_device_type; + +public: + ObjectDetection(); + virtual ~ObjectDetection() = default; + virtual void parseMetaFile() = 0; + void configure(); + void prepare(); + void inference(mv_source_h source); + virtual object_detection_3d_result_s& getResult() = 0; +}; + +} // machine_learning +} // mediavision + +#endif \ No newline at end of file diff --git a/mv_machine_learning/object_detection/include/object_detection_adapter.h b/mv_machine_learning/object_detection/include/object_detection_adapter.h new file mode 100644 index 0000000..6aff8eb --- /dev/null +++ b/mv_machine_learning/object_detection/include/object_detection_adapter.h @@ -0,0 +1,54 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OBJECT_DETECTION_ADAPTER_H__ +#define __OBJECT_DETECTION_ADAPTER_H__ + +#include + +#include "EngineConfig.h" +#include "itask.h" +#include "objectron.h" + +namespace mediavision +{ +namespace machine_learning +{ + +template +class ObjectDetectionAdapter : public mediavision::common::ITask +{ +private: + std::unique_ptr _object_detection; + T _source; + +public: + ObjectDetectionAdapter(); + ~ObjectDetectionAdapter(); + + void create(int type) override; + + void configure() override; + void prepare() override; + void setInput(T &t) override; + void perform() override; + V &getOutput() override; +}; + +} // machine_learning +} // mediavision + +#endif \ No newline at end of file diff --git a/mv_machine_learning/object_detection/include/object_detection_type.h b/mv_machine_learning/object_detection/include/object_detection_type.h new file mode 100644 index 0000000..980598b --- /dev/null +++ b/mv_machine_learning/object_detection/include/object_detection_type.h @@ -0,0 +1,54 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OBJECT_DETECTION_TYPE_H__ +#define __OBJECT_DETECTION_TYPE_H__ + +#include +#include + +namespace mediavision +{ +namespace machine_learning +{ + +typedef struct { + mv_source_h inference_src; +} object_detection_input_s; + +/** + * @brief The object detection result structure. + * @details Contains object detection 3d result. + */ +typedef struct { + unsigned int probability; + unsigned int number_of_points; + mv_point_s points[9]; + unsigned int number_of_edges; + unsigned int edge_indexes[12][2]; +} object_detection_3d_result_s; + +typedef enum { + OBJECT_DETECTION_TASK_NONE = 0, + OBJECT_DETECTION_TASK_2D, + OBJECT_DETECTION_TASK_3D + // TODO +} object_detection_task_type_e; + +} +} + +#endif \ No newline at end of file diff --git a/mv_machine_learning/object_detection/include/objectron.h b/mv_machine_learning/object_detection/include/objectron.h new file mode 100644 index 0000000..bfe3f9f --- /dev/null +++ b/mv_machine_learning/object_detection/include/objectron.h @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OBJECTRON_H__ +#define __OBJECTRON_H__ + +#include +#include "object_detection.h" +#include +#include "EngineConfig.h" + +namespace mediavision +{ +namespace machine_learning +{ + +class Objectron : public ObjectDetection +{ +private: + object_detection_3d_result_s _result; + +public: + Objectron(); + ~Objectron(); + void parseMetaFile() override; + object_detection_3d_result_s& getResult() override; +}; + +} // machine_learning +} // mediavision + +#endif \ No newline at end of file diff --git a/mv_machine_learning/object_detection/meta/object_detection_3d.json b/mv_machine_learning/object_detection/meta/object_detection_3d.json new file mode 100644 index 0000000..25636fe --- /dev/null +++ b/mv_machine_learning/object_detection/meta/object_detection_3d.json @@ -0,0 +1,30 @@ +{ + "attributes": + [ + { + "name" : "MV_OBJECT_DETECTION_3D_MODEL_OBJECT_NAME", + "type" : "string", + "value" : "cup" + }, + { + "name" : "MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH", + "type" : "string", + "value" : "/home/owner/media/res/object_detection_3d/object_detection_3d_cup.tflite" + }, + { + "name" : "MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH", + "type" : "string", + "value" : "/home/owner/media/res/object_detection_3d/object_detection_3d_cup.json" + }, + { + "name" : "MV_OBJECT_DETECTION_3D_BACKEND_TYPE", + "type" : "integer", + "value" : 1 + }, + { + "name" : "MV_OBJECT_DETECTION_3D_TARGET_DEVICE_TYPE", + "type" : "integer", + "value" : 1 + } + ] +} diff --git a/mv_machine_learning/object_detection/src/mv_object_detection_3d.c b/mv_machine_learning/object_detection/src/mv_object_detection_3d.c new file mode 100644 index 0000000..94f876d --- /dev/null +++ b/mv_machine_learning/object_detection/src/mv_object_detection_3d.c @@ -0,0 +1,125 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "mv_object_detection_3d.h" +#include "mv_object_detection_3d_open.h" + +/** + * @file mv_object_detection_3d.c + * @brief This file contains Media Vision inference module. + */ + +int mv_object_detection_3d_create(mv_object_detection_3d_h *infer) +{ + MEDIA_VISION_SUPPORT_CHECK( + _mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_NULL_ARG_CHECK(infer); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_object_detection_3d_create_open(infer); + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_object_detection_3d_destroy(mv_object_detection_3d_h infer) +{ + MEDIA_VISION_SUPPORT_CHECK( + _mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(infer); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_object_detection_3d_destroy_open(infer); + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_object_detection_3d_configure(mv_object_detection_3d_h infer) +{ + MEDIA_VISION_SUPPORT_CHECK( + _mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(infer); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_object_detection_3d_configure_open(infer); + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_object_detection_3d_prepare(mv_object_detection_3d_h infer) +{ + MEDIA_VISION_SUPPORT_CHECK( + _mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(infer); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_object_detection_3d_prepare_open(infer); + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_object_detection_3d_inference(mv_object_detection_3d_h infer, mv_source_h source) +{ + MEDIA_VISION_SUPPORT_CHECK( + _mv_inference_image_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_INSTANCE_CHECK(infer); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_object_detection_3d_inference_open(infer, source); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_object_detection_3d_get_result(mv_object_detection_3d_h handle, mv_object_detection_3d_result_s *out_result) +{ + MEDIA_VISION_SUPPORT_CHECK( + _mv_inference_face_check_system_info_feature_supported()); + + MEDIA_VISION_INSTANCE_CHECK(handle); + MEDIA_VISION_NULL_ARG_CHECK(out_result); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_object_detection_3d_get_result_open(handle, out_result); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} diff --git a/mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp b/mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp new file mode 100644 index 0000000..3705a54 --- /dev/null +++ b/mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp @@ -0,0 +1,201 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "itask.h" +#include "mv_object_detection_3d_open.h" +#include "object_detection_adapter.h" +#include "machine_learning_exception.h" +#include "object_detection_type.h" +#include "context.h" + +#include +#include +#include +#include + +using namespace std; +using namespace mediavision::inference; +using namespace mediavision::common; +using namespace mediavision::machine_learning; +using namespace MediaVision::Common; +using namespace mediavision::machine_learning::exception; +using ObjectDetectionTask = ITask; + +int mv_object_detection_3d_create_open(mv_object_detection_3d_h *out_handle) +{ + if (!out_handle) { + LOGE("Handle can't be created because handle pointer is NULL"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + Context *context = new (nothrow) Context(); + if (!context) { + LOGE("Fail to allocate a context."); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + ObjectDetectionTask *task = new (nothrow) + ObjectDetectionAdapter(); + if (!task) { + delete context; + LOGE("Fail to allocate a task."); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + try { + task->create(OBJECT_DETECTION_TASK_3D); + } catch (const BaseException& e) { + return e.getError(); + } + + context->__tasks.insert(make_pair("objectron", task)); + *out_handle = static_cast(context); + + LOGD("object detection 3d handle [%p] has been created", *out_handle); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_object_detection_3d_destroy_open(mv_object_detection_3d_h handle) +{ + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + auto context = static_cast(handle); + + for (auto& m : context->__tasks) + delete static_cast(m.second); + + delete context; + + LOGD("Object detection 3d handle has been destroyed."); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_object_detection_3d_configure_open(mv_object_detection_3d_h handle) +{ + LOGD("ENTER"); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks["objectron"]); + + task->configure(); + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_object_detection_3d_prepare_open(mv_object_detection_3d_h handle) +{ + LOGD("ENTER"); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks["objectron"]); + + task->prepare(); + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_object_detection_3d_inference_open(mv_object_detection_3d_h handle, mv_source_h source) +{ + LOGD("ENTER"); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks["objectron"]); + + object_detection_input_s input = { source }; + + task->setInput(input); + task->perform(); + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_object_detection_3d_get_result_open(mv_object_detection_3d_h handle, mv_object_detection_3d_result_s *out_result) +{ + LOGD("ENTER"); + + if (!handle) { + LOGE("Handle is NULL."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + try { + auto context = static_cast(handle); + auto task = static_cast(context->__tasks["objectron"]); + + object_detection_3d_result_s result = task->getOutput(); + + out_result->probability = result.probability; + out_result->number_of_points = result.number_of_points; + out_result->number_of_edges = result.number_of_edges; + + for (unsigned int idx = 0; idx < out_result->number_of_points; ++idx) + out_result->points[idx] = result.points[idx]; + + for (unsigned int idx = 0; idx < out_result->number_of_edges; ++idx) { + out_result->edge_indexes[idx][0] = result.edge_indexes[idx][0]; + out_result->edge_indexes[idx][1] = result.edge_indexes[idx][1]; + } + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + LOGD("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} \ No newline at end of file diff --git a/mv_machine_learning/object_detection/src/object_detection.cpp b/mv_machine_learning/object_detection/src/object_detection.cpp new file mode 100644 index 0000000..95fd34b --- /dev/null +++ b/mv_machine_learning/object_detection/src/object_detection.cpp @@ -0,0 +1,79 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "machine_learning_exception.h" +#include "object_detection.h" + +using namespace std; +using namespace mediavision::inference; +using namespace mediavision::machine_learning::exception; + +namespace mediavision +{ +namespace machine_learning +{ + +ObjectDetection::ObjectDetection() : _backend_type(), _target_device_type() +{ +} + +void ObjectDetection::configure() +{ + int ret = _inference->Bind(_backend_type, _target_device_type); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to bind a backend engine."); +} + +void ObjectDetection::prepare() +{ + std::vector input_names; + + // TODO. In case of using model meta file, this function shouldn't be needed. + // Instead, all configurations should be done in MetaData class. + _inference->ConfigureInputInfo(0, 0, 0, 0, 0, 0, 0, input_names); + + std::vector output_names; + std::vector output_tensor_info; + + // TODO. In case of using model meta file, this function shouldn't be needed. + // Instead, all configurations should be done in MetaData class. + _inference->ConfigureOutputInfo(output_names, output_tensor_info); + + _inference->ConfigureModelFiles("", _model_file_path, ""); + + // Request to load model files to a backend engine. + int ret = _inference->Load(); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to load model files."); +} + +void ObjectDetection::inference(mv_source_h source) +{ + std::vector sources; + std::vector rects; + + sources.push_back(source); + int ret = _inference->Run(sources, rects); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to run inference"); +} + +} +} \ No newline at end of file diff --git a/mv_machine_learning/object_detection/src/object_detection_adapter.cpp b/mv_machine_learning/object_detection/src/object_detection_adapter.cpp new file mode 100644 index 0000000..7d95585 --- /dev/null +++ b/mv_machine_learning/object_detection/src/object_detection_adapter.cpp @@ -0,0 +1,90 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "machine_learning_exception.h" +#include "object_detection_adapter.h" + +using namespace std; +using namespace MediaVision::Common; +using namespace mediavision::machine_learning; +using namespace mediavision::machine_learning::exception; + +namespace mediavision +{ +namespace machine_learning +{ + +template ObjectDetectionAdapter::ObjectDetectionAdapter() : _source() +{ + +} + +template ObjectDetectionAdapter::~ObjectDetectionAdapter() +{} + +template +void ObjectDetectionAdapter::create(int type) +{ + switch (type) { + case OBJECT_DETECTION_TASK_3D: + _object_detection = make_unique(); + break; + default: + throw InvalidParameter("Invalid object detection task type."); + } +} + +template void ObjectDetectionAdapter::configure() +{ + try { + _object_detection->parseMetaFile(); + _object_detection->configure(); + } catch (const BaseException &e) { + throw e; + } +} + +template void ObjectDetectionAdapter::prepare() +{ + try { + _object_detection->prepare(); + } catch (const BaseException &e) { + throw e; + } +} + +template void ObjectDetectionAdapter::setInput(T &t) +{ + _source = t; +} + +template void ObjectDetectionAdapter::perform() +{ + try { + _object_detection->inference(_source.inference_src); + } catch (const BaseException &e) { + throw e; + } +} + +template V& ObjectDetectionAdapter::getOutput() +{ + return _object_detection->getResult(); +} + +template class ObjectDetectionAdapter; +} +} \ No newline at end of file diff --git a/mv_machine_learning/object_detection/src/objectron.cpp b/mv_machine_learning/object_detection/src/objectron.cpp new file mode 100644 index 0000000..9bb7d2b --- /dev/null +++ b/mv_machine_learning/object_detection/src/objectron.cpp @@ -0,0 +1,140 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "machine_learning_exception.h" +#include "objectron.h" +#include "mv_object_detection_3d_config.h" + +using namespace std; +using namespace mediavision::inference; +using namespace MediaVision::Common; +using namespace mediavision::machine_learning::exception; + +namespace mediavision +{ +namespace machine_learning +{ + +Objectron::Objectron() : _result() +{ + _inference = make_unique(); +} + +Objectron::~Objectron() +{ +} + +static bool IsJsonFile(const string &fileName) +{ + return (!fileName.substr(fileName.find_last_of(".") + 1).compare("json")); +} + +void Objectron::parseMetaFile() +{ + _config = make_unique(string(MV_CONFIG_PATH) + + string(OBJECT_DETECTION_3D_META_FILE_NAME)); + + int ret = _config->getIntegerAttribute(string(MV_OBJECT_DETECTION_3D_BACKEND_TYPE), + &_backend_type); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to get backend engine type."); + + ret = _config->getIntegerAttribute(string(MV_OBJECT_DETECTION_3D_TARGET_DEVICE_TYPE), + &_target_device_type); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to get target device type."); + + ret = _config->getStringAttribute(MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH, &_model_file_path); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to get model file path"); + + ret = _config->getStringAttribute(MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH, &_model_meta_file_path); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to get model meta file path"); + + if (_model_meta_file_path.empty()) + throw InvalidOperation("Model meta file doesn't exist."); + + if (!IsJsonFile(_model_meta_file_path)) + throw InvalidOperation("Model meta file should be json"); + + ret = _inference->ParseMetadata(_model_meta_file_path); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to ParseMetadata"); +} + +object_detection_3d_result_s& Objectron::getResult() +{ + TensorBuffer& tensor_buffer_obj = _inference->GetOutputTensorBuffer(); + IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer(); + + vector output_layer_names; + + for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++) + output_layer_names.push_back(it->first); + + string& identity_1_layer = output_layer_names[1]; + + inference_engine_tensor_buffer* tensor_buffer = tensor_buffer_obj.getTensorBuffer(identity_1_layer); + if (!tensor_buffer) + throw InvalidOperation("Fail to get tensor buffer."); + + unsigned int output_size = tensor_buffer->size / 4; + auto *keypoints = reinterpret_cast(tensor_buffer->buffer); + + if (output_size != 18) + throw InvalidOperation("Invalid number of points. Number of points should be 18."); + + unsigned int result_idx = 0; + + float x_scale = static_cast(_inference->getSourceWidth()) / static_cast(_inference->getInputWidth()); + float y_scale = static_cast(_inference->getSourceHeight()) / static_cast(_inference->getInputHeight()); + + for (unsigned int idx = 0; idx < output_size; idx += 2) { + _result.points[result_idx].x = static_cast(keypoints[idx] * x_scale); + _result.points[result_idx++].y = static_cast(keypoints[idx + 1] * y_scale); + } + + _result.number_of_points = output_size / 2; + + string& identity_layer = output_layer_names[0]; + + tensor_buffer = tensor_buffer_obj.getTensorBuffer(identity_layer); + if (!tensor_buffer) + throw InvalidOperation("Fail to get tensor buffer."); + + auto *prob = reinterpret_cast(tensor_buffer->buffer); + _result.probability = static_cast(prob[0] * 100); + + _result.number_of_edges = 12; + + unsigned int edges[12][2] = { + {2, 3}, {4, 5}, {6, 7}, {8, 9}, + {2, 4}, {3, 5}, {6, 8}, {7, 9}, + {2, 6}, {3, 7}, {4, 8}, {5, 9} + }; + + ::copy(&edges[0][0], &edges[0][0] + 12 * 2, &_result.edge_indexes[0][0]); + + return _result; +} + +} +} \ No newline at end of file diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 28a66fc..e3f1a4e 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,7 +1,7 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.23.41 -Release: 2 +Version: 0.24.0 +Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause Source0: %{name}-%{version}.tar.gz @@ -374,6 +374,8 @@ find . -name '*.gcno' -exec cp --parents '{}' "$gcno_obj_dir" ';' %manifest %{name}.manifest %license LICENSE.APLv2 %{_libdir}/libmv_inference*.so +%{_datadir}/%{name}/object_detection_3d.json +%{_libdir}/libmv_object_detection*.so %if "%{enable_ml_face_recognition}" == "1" %{_datadir}/%{name}/face_recognition.json %{_libdir}/libmv_training.so @@ -382,7 +384,9 @@ find . -name '*.gcno' -exec cp --parents '{}' "$gcno_obj_dir" ';' %files machine_learning-devel %{_includedir}/media/mv_infer*.h +%{_includedir}/media/mv_object_detection_3d*.h %{_libdir}/pkgconfig/*inference.pc +%{_libdir}/pkgconfig/*object-detection.pc %if "%{enable_ml_face_recognition}" == "1" %{_includedir}/media/mv_face_recognition*.h %{_libdir}/pkgconfig/*training.pc @@ -409,6 +413,7 @@ find . -name '*.gcno' -exec cp --parents '{}' "$gcno_obj_dir" ';' %endif %{_libdir}/libmv_testsuite*.so %{_bindir}/mv_* +%{_bindir}/test_object_detection_3d %if "%{enable_ml_face_recognition}" == "1" %{_bindir}/test_face_recognition %{_bindir}/measure_face_recognition diff --git a/test/testsuites/machine_learning/CMakeLists.txt b/test/testsuites/machine_learning/CMakeLists.txt index ebf24f4..288a2ef 100644 --- a/test/testsuites/machine_learning/CMakeLists.txt +++ b/test/testsuites/machine_learning/CMakeLists.txt @@ -2,6 +2,7 @@ project(machine_learning) cmake_minimum_required(VERSION 2.6...3.13) add_subdirectory(${PROJECT_SOURCE_DIR}/inference) +add_subdirectory(${PROJECT_SOURCE_DIR}/object_detection) if (${ENABLE_ML_FACE_RECOGNITION}) message("Enabled machine learning face recognition test cases.") diff --git a/test/testsuites/machine_learning/object_detection/CMakeLists.txt b/test/testsuites/machine_learning/object_detection/CMakeLists.txt new file mode 100644 index 0000000..6f5cb3e --- /dev/null +++ b/test/testsuites/machine_learning/object_detection/CMakeLists.txt @@ -0,0 +1,14 @@ +project(mv_object_detection_3d_suite) +cmake_minimum_required(VERSION 2.6...3.13) + +set(TEST_OBJECT_DETECTION_3D test_object_detection_3d) + +add_executable(${TEST_OBJECT_DETECTION_3D} test_object_detection_3d.cpp) + +target_link_libraries(${TEST_OBJECT_DETECTION_3D} gtest gtest_main + mv_inference + mv_object_detection + mv_image_helper +) + +install(TARGETS ${TEST_OBJECT_DETECTION_3D} DESTINATION ${CMAKE_INSTALL_BINDIR}) \ No newline at end of file diff --git a/test/testsuites/machine_learning/object_detection/test_object_detection_3d.cpp b/test/testsuites/machine_learning/object_detection/test_object_detection_3d.cpp new file mode 100644 index 0000000..f349413 --- /dev/null +++ b/test/testsuites/machine_learning/object_detection/test_object_detection_3d.cpp @@ -0,0 +1,79 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "gtest/gtest.h" + +#include "ImageHelper.h" +#include "mv_object_detection_3d.h" + +#define IMAGE_PATH "/res/inference/images/cup.jpeg" + +using namespace testing; +using namespace std; + +using namespace MediaVision::Common; + +TEST(ObjectDetection3DTest, InferenceShouldBeOk) +{ + mv_object_detection_3d_h handle; + + int ret = mv_object_detection_3d_create(&handle); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = mv_object_detection_3d_configure(handle); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = mv_object_detection_3d_prepare(handle); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + const string image_path = IMAGE_PATH; + mv_source_h mv_source = NULL; + + ret = mv_create_source(&mv_source); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = ImageHelper::loadImageToSource(image_path.c_str(), mv_source); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = mv_object_detection_3d_inference(handle, mv_source); + ASSERT_EQ(ret, 0); + + mv_object_detection_3d_result_s result; + ret = mv_object_detection_3d_get_result(handle, &result); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + std::cout << "Probability = " << result.probability << std::endl; + + for (unsigned int idx = 0; idx < result.number_of_points; ++idx) { + std::cout << "index = " << idx + 1 << " : " << result.points[idx].x + << " x " << result.points[idx].y << std::endl; + } + + std::cout << "Number of edges = " << result.number_of_edges << std::endl; + + for (unsigned int idx = 0; idx < result.number_of_edges; ++idx) + std::cout << result.edge_indexes[idx][0] << " to " << result.edge_indexes[idx][1] << std::endl; + + ret = mv_destroy_source(mv_source); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + ret = mv_object_detection_3d_destroy(handle); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); +} \ No newline at end of file -- 2.7.4