mv_machine_learning: add object detection 3d API
authorInki Dae <inki.dae@samsung.com>
Mon, 19 Sep 2022 02:32:37 +0000 (11:32 +0900)
committerInki Dae <inki.dae@samsung.com>
Mon, 7 Nov 2022 03:12:39 +0000 (12:12 +0900)
[Version] : 0.24.0-0
[Issue type] : new feature

Add Objectron model based object detection 3d API.

What this patch does,
    - Implement Objectron model[1] based object detection 3d framework.
    - Add object_detection directory in mv_machine_learning, which
      incluses object detection relevant files.
    - Add create interface to itask class, which creates model specific class
      object.
    - Implement Object detection and its based Objectron class for
      object detection 3d inference.
    - Implement itask interface class based object detection adapter class.
    - Implement object detection 3d API.

[1] https://arxiv.org/abs/2003.03522

Change-Id: I3f4f9d53a305bd45d83cb330757bc6d5f83396b8
Signed-off-by: Inki Dae <inki.dae@samsung.com>
28 files changed:
CMakeLists.txt
include/mv_object_detection_3d.h [new file with mode: 0644]
include/mv_object_detection_3d_type.h [new file with mode: 0644]
mv_machine_learning/CMakeLists.txt
mv_machine_learning/common/include/itask.h
mv_machine_learning/face_recognition/include/face_recognition_adapter.h
mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp
mv_machine_learning/inference/include/BoxInfo.h
mv_machine_learning/inference/include/Inference.h
mv_machine_learning/inference/include/OutputMetadataTypes.h
mv_machine_learning/inference/src/OutputMetadata.cpp
mv_machine_learning/object_detection/CMakeLists.txt [new file with mode: 0644]
mv_machine_learning/object_detection/include/mv_object_detection_3d_config.h [new file with mode: 0644]
mv_machine_learning/object_detection/include/mv_object_detection_3d_open.h [new file with mode: 0644]
mv_machine_learning/object_detection/include/object_detection.h [new file with mode: 0644]
mv_machine_learning/object_detection/include/object_detection_adapter.h [new file with mode: 0644]
mv_machine_learning/object_detection/include/object_detection_type.h [new file with mode: 0644]
mv_machine_learning/object_detection/include/objectron.h [new file with mode: 0644]
mv_machine_learning/object_detection/meta/object_detection_3d.json [new file with mode: 0644]
mv_machine_learning/object_detection/src/mv_object_detection_3d.c [new file with mode: 0644]
mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp [new file with mode: 0644]
mv_machine_learning/object_detection/src/object_detection.cpp [new file with mode: 0644]
mv_machine_learning/object_detection/src/object_detection_adapter.cpp [new file with mode: 0644]
mv_machine_learning/object_detection/src/objectron.cpp [new file with mode: 0644]
packaging/capi-media-vision.spec
test/testsuites/machine_learning/CMakeLists.txt
test/testsuites/machine_learning/object_detection/CMakeLists.txt [new file with mode: 0644]
test/testsuites/machine_learning/object_detection/test_object_detection_3d.cpp [new file with mode: 0644]

index fab7f7313ad657b98cf856d30910d608a7f2b7b5..bc87c9dfab5a30bd70993e738924b6ea7fd94f3d 100644 (file)
@@ -34,6 +34,8 @@ set(MV_3D_LIB_NAME "mv_3d" CACHE STRING
        "Name of the library will be built for 3d module (without extension).")
 set(MV_ROI_TRACKER_LIB_NAME "mv_roi_tracker" CACHE STRING
        "Name of the library will be built for tracker module (without extension).")
+set(MV_OBJECT_DETECTION_LIB_NAME "mv_object_detection" CACHE STRING
+       "Name of the library will be built for object detection module (without extension).")
 
 include(FindPkgConfig)
 include(GNUInstallDirs)
@@ -158,6 +160,16 @@ configure_file(
 )
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-inference.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
 
+set(PC_NAME ${fw_name}-object-detection)
+set(PC_LDFLAGS "-l${MV_OBJECT_DETECTION_LIB_NAME} -l${MV_COMMON_LIB_NAME}")
+configure_file(
+       ${fw_name}.pc.in
+       ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-object-detection.pc
+       @ONLY
+)
+install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-object-detection.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
+install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/object_detection_3d.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
+
 if (${ENABLE_ML_FACE_RECOGNITION})
     set(PC_NAME ${fw_name}-training)
     set(PC_LDFLAGS "-l${MV_TRAINING_LIB_NAME} -l${MV_COMMON_LIB_NAME}")
diff --git a/include/mv_object_detection_3d.h b/include/mv_object_detection_3d.h
new file mode 100644 (file)
index 0000000..8438f6d
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_OBJECT_DETECT_3D_H__
+#define __TIZEN_MEDIAVISION_OBJECT_DETECT_3D_H__
+
+#include <mv_common.h>
+#include <mv_object_detection_3d_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_object_detection_3d.h
+ * @brief  This file contains the Inference based Media Vision API.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_INFERENCE_MODULE
+ * @{
+ */
+
+/**
+ * @brief Creates a inference handle for object detection 3d object.
+ * @details Use this function to create a inference handle. After the creation
+ *          the object detection 3d task has to be prepared with
+ *          mv_object_detection_3d_prepare() function to prepare a network
+ *          for the inference.
+ *
+ * @since_tizen 7.0
+ *
+ * @remarks The @a infer should be released using mv_object_detection_3d_destroy().
+ *
+ * @param[out] infer    The handle to the inference to be created.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ * @see mv_object_detection_3d_destroy()
+ * @see mv_object_detection_3d_prepare()
+ */
+int mv_object_detection_3d_create(mv_object_detection_3d_h *infer);
+
+/**
+ * @brief Destroys inference handle and releases all its resources.
+ *
+ * @since_tizen 7.0
+ *
+ * @param[in] infer    The handle to the inference to be destroyed.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create inference handle by using mv_object_detection_3d_create()
+ *
+ * @see mv_object_detection_3d_create()
+ */
+int mv_object_detection_3d_destroy(mv_object_detection_3d_h infer);
+
+/**
+ * @brief Configures the backend for the object detection 3d inference.
+ *
+ * @since_tizen 7.0
+ *
+ * @param [in] infer         The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ */
+int mv_object_detection_3d_configure(mv_object_detection_3d_h infer);
+
+/**
+ * @brief Prepares the object detection 3d inference
+ * @details Use this function to prepare the object detection 3d inference based on
+ *          the configured network.
+ *
+ * @since_tizen 7.0
+ *
+ * @param[in] infer         The handle to the inference.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_PERMISSION_DENIED Permission denied
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Not supported format
+ */
+int mv_object_detection_3d_prepare(mv_object_detection_3d_h infer);
+
+/**
+ * @brief Performs the object detection 3d inference on the @a source.
+ *
+ * @since_tizen 7.0
+ * @remarks This function is synchronous and may take considerable time to run.
+ *
+ * @param[in] source         The handle to the source of the media
+ * @param[in] infer          The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detect_3d_create()
+ * @pre Prepare an inference by calling mv_object_detect_3d_prepare()
+ * @post
+ *
+ * @see mv_object_detect_3d_result_s structure
+ */
+int mv_object_detection_3d_inference(mv_object_detection_3d_h infer, mv_source_h source);
+
+/**
+ * @brief Gets the object detection 3d result.
+ * @details Use this function to get the detected result after calling @ref mv_object_detection_3d_inference().
+ *
+ * @since_tizen 7.0
+ *
+ * @remarks The @a result must NOT be released using free()
+ *
+ * @param[in] handle         The handle to the face recognition object.
+ * @param[out] out_result    The structure point which contains object detection 3d inference result.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Request an inference by calling @ref mv_object_detection_3d_inference()
+ */
+int mv_object_detection_3d_get_result(mv_object_detection_3d_h handle, mv_object_detection_3d_result_s *out_result);
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_OBJECT_DETECT_3D_H__ */
diff --git a/include/mv_object_detection_3d_type.h b/include/mv_object_detection_3d_type.h
new file mode 100644 (file)
index 0000000..fae4eda
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_MV_OBJECT_DETECTION_3D_TYPE_H__
+#define __TIZEN_MEDIAVISION_MV_OBJECT_DETECTION_3D_TYPE_H__
+
+#include <mv_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_object_detection_3d_type.h
+ * @brief  This file contains the face recognition handle for Mediavision.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_OBJECT_DETECT_MODULE
+ * @{
+ */
+
+/**
+ * @brief                The structure to object detection 3d result.
+ *
+ * @since_tizen 7.0
+ * @remarks @a points should not be released by app.
+ *          The number of elements in @a points is equal to @a number_of_points.
+ *
+ * @probability          The probability value to the detected object.
+ * @number_of_points     The number of points.
+ * @points               2d coodinations to keypoints.
+ * @number_of_edges      The number of edges.
+ * @edge_indexes         pair index per a edge to the points array.
+ *
+ * @pre Call mv_object_detection_3d_inference() function to perform 3d detection of the objects
+ *      in @a source and to get a result.
+ *
+ * @see mv_object_detect_3d_inference()
+ */
+typedef struct mv_object_detection_3d_result {
+       unsigned int probability;
+       unsigned int number_of_points;
+       mv_point_s points[9];
+       unsigned int number_of_edges;
+       unsigned int edge_indexes[12][2];
+} mv_object_detection_3d_result_s;
+
+/**
+ * @brief The object detection 3d object handle.
+ *
+ * @since_tizen 7.0
+ */
+typedef void *mv_object_detection_3d_h;
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_MV_OBJECT_DETECTION_3D_TYPE_H__ */
index a077ce1e4b3c26a7a88e36e5f1161aa91ff1679d..02e1678c9b5698724aeabdb92b4645656a9c07ce 100644 (file)
@@ -1,4 +1,5 @@
 add_subdirectory(inference)
+add_subdirectory(object_detection)
 
 if (${ENABLE_ML_FACE_RECOGNITION})
     message("Enabled machine learning face recognition feature.")
index 1216ccf81495ffb70add67939085e1e787e621ea..940668e8eefb7292efd9295610ac019f19a5a48f 100644 (file)
@@ -26,6 +26,7 @@ template<typename T, typename V> class ITask
 {
 public:
        virtual ~ITask() {};
+       virtual void create(int type) = 0;
        virtual void configure() = 0;
        virtual void prepare() = 0;
        virtual void setInput(T &t) = 0;
index 0839b80262013e682fe86e6cdc6dbc39b3bb2ad2..d77d938f0fb527555ef473b28d9d10cf5e465fe5 100644 (file)
@@ -76,6 +76,7 @@ public:
                return _config;
        }
 
+       void create(int type) override;
        void configure() override;
        void prepare() override;
        void setInput(T &t) override;
index 2fcca3a775992563e732e19fa926871b8d2ea072..79619011149044df8709a34cc370af27a9174d95 100644 (file)
@@ -36,6 +36,11 @@ template<typename T, typename V> FaceRecognitionAdapter<T, V>::FaceRecognitionAd
 template<typename T, typename V> FaceRecognitionAdapter<T, V>::~FaceRecognitionAdapter()
 {}
 
+template<typename T, typename V> void FaceRecognitionAdapter<T, V>::create(int type)
+{
+       throw InvalidOperation("Not support yet.");
+}
+
 template<typename T, typename V> void FaceRecognitionAdapter<T, V>::configure()
 {
        _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + string(FACE_RECOGNITION_META_FILE_NAME));
index dedc7e30807d50a21ef43de0a97293e3ddecdf10..7813a3dab94754a66e5e004868bc5ef3409798a8 100644 (file)
@@ -71,7 +71,8 @@ struct BoxInfo
        std::map<std::string, inference_box_decoding_type_e> supportedBoxDecodingTypes = {
                { "BYPASS", INFERENCE_BOX_DECODING_TYPE_BYPASS },
                { "SSD_ANCHOR", INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR },
-               { "YOLO_ANCHOR", INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR }
+               { "YOLO_ANCHOR", INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR },
+               { "SINGLE_3D", INFERENCE_BOX_DECODING_TYPE_SINGLE_3D}
        };
 
        ~BoxInfo() = default;
@@ -127,4 +128,4 @@ struct BoxInfo
 } /* Inference */
 } /* MediaVision */
 
-#endif
\ No newline at end of file
+#endif
index db792c0b64fa7892fd8820e474247350ac08aad3..1106b70ad624f63b1b4db924b46e3b992bfa8ea5 100644 (file)
@@ -324,6 +324,26 @@ public:
                return mOutputTensorBuffers;
        }
 
+       size_t getSourceWidth()
+       {
+               return mSourceSize.width;
+       }
+
+       size_t getSourceHeight()
+       {
+               return mSourceSize.height;
+       }
+
+       size_t getInputWidth()
+       {
+               return mInputSize.width;
+       }
+
+       size_t getInputHeight()
+       {
+               return mInputSize.height;
+       }
+
 private:
        bool mCanRun = false; /**< The flag indicating ready to run Inference */
        InferenceConfig mConfig;
index 1ce8c9e76cb94028183ab7b0901cedcfdcab18a0..1e8aa2ae47439e30c07f5939c7238f382bd987ed 100644 (file)
@@ -50,7 +50,8 @@ typedef enum
 {
        INFERENCE_BOX_DECODING_TYPE_BYPASS,
        INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR,
-       INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR
+       INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR,
+       INFERENCE_BOX_DECODING_TYPE_SINGLE_3D
 } inference_box_decoding_type_e;
 
 typedef enum
index 3040fa9e5b53b31de813e8a7f6fd0d86b68264dc..7ac595f5e3d3a3af1415215535c6bf521a889c6b 100644 (file)
@@ -100,6 +100,12 @@ int OutputMetadata::Parse(JsonObject *root)
        }
 
        if (!box.GetName().empty()) {
+               // In case of object detection 3d for single object, other property isn't needed.
+               if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SINGLE_3D) {
+                       parsed = true;
+                       return MEDIA_VISION_ERROR_NONE;
+               }
+
                // addtional parsing is required according to decoding type
                if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                        ret = box.ParseLabel(root);
@@ -113,7 +119,6 @@ int OutputMetadata::Parse(JsonObject *root)
                                LOGE("Fail to GetNumber[%d]", ret);
                                return ret;
                        }
-
                } else {
                        ret = box.ParseDecodeInfo(root);
                        if (ret != MEDIA_VISION_ERROR_NONE) {
diff --git a/mv_machine_learning/object_detection/CMakeLists.txt b/mv_machine_learning/object_detection/CMakeLists.txt
new file mode 100644 (file)
index 0000000..0d03d03
--- /dev/null
@@ -0,0 +1,26 @@
+project(${MV_OBJECT_DETECTION_LIB_NAME})
+cmake_minimum_required(VERSION 2.6...3.13)
+
+pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED inference-engine-interface-common iniparser json-glib-1.0)
+file(GLOB MV_INFERENCE_SOURCE_LIST  "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp")
+
+find_package(OpenCV REQUIRED dnn imgproc)
+if(NOT OpenCV_FOUND)
+       message(SEND_ERROR "OpenCV NOT FOUND")
+       return()
+endif()
+
+if(FORCED_STATIC_BUILD)
+       add_library(${PROJECT_NAME} STATIC ${MV_INFERENCE_SOURCE_LIST})
+else()
+       add_library(${PROJECT_NAME} SHARED ${MV_INFERENCE_SOURCE_LIST})
+endif()
+
+IF (${ENABLE_INFERENCE_PROFILER})
+    MESSAGE("Enabled Mediavision Inference Engine Profiler.")
+    ADD_DEFINITIONS(-DENABLE_INFERENCE_PROFILER)
+ENDIF()
+
+target_link_libraries(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} ${OpenCV_LIBS} ${${PROJECT_NAME}_DEP_LIBRARIES} mv_inference)
+target_include_directories(${PROJECT_NAME} PRIVATE include ../inference/include ../common/include)
+install(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR})
diff --git a/mv_machine_learning/object_detection/include/mv_object_detection_3d_config.h b/mv_machine_learning/object_detection/include/mv_object_detection_3d_config.h
new file mode 100644 (file)
index 0000000..5722016
--- /dev/null
@@ -0,0 +1,57 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_OBJECT_DETECTION_3D_CONFIG_H__
+#define __MEDIA_VISION_OBJECT_DETECTION_3D_CONFIG_H__
+
+/**
+ * @brief Defines #MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH
+ *        to set the object detection 3d model file path.
+ *
+ * @since_tizen 7.0
+ */
+#define MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH "MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH"
+
+/**
+ * @brief Defines #MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH to set inference
+ *        models's metadata file attribute of the engine configuration.
+ * @details The file includes inference model's metadata such as input and output
+ *          node names, input tensor's width and height,
+ *          mean and standard deviation values for pre-processing.
+ *
+ * @since_tizen 7.0
+ */
+#define MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH "MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH"
+
+/**
+ * @brief Defines #MV_OBJECT_DETECT_3D_BACKEND_TYPE
+ *        to set inference backend engine type. In default, tensorflow lite is used.
+ *
+ * @since_tizen 7.0
+ */
+#define MV_OBJECT_DETECTION_3D_BACKEND_TYPE "MV_OBJECT_DETECTION_3D_BACKEND_TYPE"
+
+/**
+ * @brief Defines #MV_OBJECT_DETECT_3D_TARGET_DEVICE_TYPE
+ *        to set inference target device type. In default, CPU device is used.
+ *
+ * @since_tizen 7.0
+ */
+#define MV_OBJECT_DETECTION_3D_TARGET_DEVICE_TYPE "MV_OBJECT_DETECTION_3D_TARGET_DEVICE_TYPE"
+
+#define OBJECT_DETECTION_3D_META_FILE_NAME     "object_detection_3d.json"
+
+#endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */
diff --git a/mv_machine_learning/object_detection/include/mv_object_detection_3d_open.h b/mv_machine_learning/object_detection/include/mv_object_detection_3d_open.h
new file mode 100644 (file)
index 0000000..29a94be
--- /dev/null
@@ -0,0 +1,147 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_OBJECT_DETECTION_3D_OPEN_H__
+#define __MEDIA_VISION_OBJECT_DETECTION_3D_OPEN_H__
+
+#include <mv_common.h>
+#include <mv_private.h>
+#include <mv_object_detection_3d_type.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif /* __cplusplus */
+
+       /**
+        * @brief Create face recognition object handle.
+        * @details Use this function to create an face recognition object handle.
+        *          After creation the handle has to be prepared with
+        *          @ref mv_object_detection_3d_prepare_open() function to prepare
+        *               an face recognition object.
+        *
+        * @since_tizen 7.0
+        *
+        * @param[out] out_handle    The handle to the face recognition object to be created
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        *
+        * @post Release @a handle by using
+        *       @ref mv_object_detection_3d_destroy_open() function when it is not needed
+        *       anymore
+        *
+        * @see mv_object_detection_3d_destroy_open()
+        */
+       int mv_object_detection_3d_create_open(mv_object_detection_3d_h *out_handle);
+
+       /**
+        * @brief Destroy face recognition handle and releases all its resources.
+        *
+        * @since_tizen 7.0
+        *
+        * @param[in] handle    The handle to the face recognition object to be destroyed.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        *
+        * @pre Create an face recognition handle by using @ref mv_object_detection_3d_create_open()
+        *
+        * @see mv_object_detection_3d_create_open()
+        */
+       int mv_object_detection_3d_destroy_open(mv_object_detection_3d_h handle);
+
+       /**
+        * @brief Configure the backend to the inference handle
+        *
+        * @since_tizen 7.0
+        *
+        * @param [in] handle         The handle to the inference
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+        */
+       int mv_object_detection_3d_configure_open(mv_object_detection_3d_h handle);
+
+       /**
+        * @brief Prepare inference.
+        * @details Use this function to prepare inference based on
+        *          the configured network.
+        *
+        * @since_tizen 7.0
+        *
+        * @param [in] handle         The handle to the inference
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        */
+       int mv_object_detection_3d_prepare_open(mv_object_detection_3d_h handle);
+
+       /**
+        *
+        * @brief Inference with a given face on the @a source
+        * @details Use this function to inference with a given source.
+        *
+        *
+        * @since_tizen 7.0
+        *
+        * @param[in] handle         The handle to the face recognition object.
+        * @param[in] source         The handle to the source of the media.
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+        *                                                  isn't supported
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        *
+        * @pre Create a source handle by calling @ref mv_create_source()
+        * @pre Create an face recognition handle by calling @ref mv_object_detection_3d_create_open()
+        * @pre Prepare an face recognition by calling @ref mv_object_detection_3d_prepare_open()
+        * @pre Register a new face by calling @ref mv_object_detection_3d_register_open()
+        */
+       int mv_object_detection_3d_inference_open(mv_object_detection_3d_h handle, mv_source_h source);
+
+       /**
+        * @brief Get a label name and store it to @a out_label.
+        * @details Use this function to get a label name after calling mv_object_detection_3d_inference_open function.
+        *
+        * @since_tizen 7.0
+        *
+        * @param[in] handle         The handle to the object detection 3d object.
+        * @param[out]
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Request a inference by calling @ref mv_object_detection_3d_inference_open()
+        */
+       int mv_object_detection_3d_get_result_open(mv_object_detection_3d_h handle, mv_object_detection_3d_result_s *out_result);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */
diff --git a/mv_machine_learning/object_detection/include/object_detection.h b/mv_machine_learning/object_detection/include/object_detection.h
new file mode 100644 (file)
index 0000000..8ad9cda
--- /dev/null
@@ -0,0 +1,56 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OBJECT_DETECTION_H__
+#define __OBJECT_DETECTION_H__
+
+#include <mv_common.h>
+#include <mv_inference_type.h>
+
+#include "EngineConfig.h"
+#include "inference_engine_common_impl.h"
+#include "Inference.h"
+#include "object_detection_type.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+class ObjectDetection
+{
+protected:
+       std::unique_ptr<mediavision::inference::Inference> _inference;
+       std::unique_ptr<MediaVision::Common::EngineConfig> _config;
+       std::string _model_file_path;
+       std::string _model_meta_file_path;
+       int _backend_type;
+       int _target_device_type;
+
+public:
+       ObjectDetection();
+       virtual ~ObjectDetection() = default;
+       virtual void parseMetaFile() = 0;
+       void configure();
+       void prepare();
+       void inference(mv_source_h source);
+       virtual object_detection_3d_result_s& getResult() = 0;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/object_detection/include/object_detection_adapter.h b/mv_machine_learning/object_detection/include/object_detection_adapter.h
new file mode 100644 (file)
index 0000000..6aff8eb
--- /dev/null
@@ -0,0 +1,54 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OBJECT_DETECTION_ADAPTER_H__
+#define __OBJECT_DETECTION_ADAPTER_H__
+
+#include <dlog.h>
+
+#include "EngineConfig.h"
+#include "itask.h"
+#include "objectron.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+template<typename T, typename V>
+class ObjectDetectionAdapter : public mediavision::common::ITask<T, V>
+{
+private:
+       std::unique_ptr<ObjectDetection> _object_detection;
+       T _source;
+
+public:
+       ObjectDetectionAdapter();
+       ~ObjectDetectionAdapter();
+
+       void create(int type) override;
+
+       void configure() override;
+       void prepare() override;
+       void setInput(T &t) override;
+       void perform() override;
+       V &getOutput() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/object_detection/include/object_detection_type.h b/mv_machine_learning/object_detection/include/object_detection_type.h
new file mode 100644 (file)
index 0000000..980598b
--- /dev/null
@@ -0,0 +1,54 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OBJECT_DETECTION_TYPE_H__
+#define __OBJECT_DETECTION_TYPE_H__
+
+#include <mv_common.h>
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+typedef struct {
+       mv_source_h inference_src;
+} object_detection_input_s;
+
+/**
+ * @brief The object detection result structure.
+ * @details Contains object detection 3d result.
+ */
+typedef struct {
+       unsigned int probability;
+       unsigned int number_of_points;
+       mv_point_s points[9];
+       unsigned int number_of_edges;
+       unsigned int edge_indexes[12][2];
+} object_detection_3d_result_s;
+
+typedef enum {
+       OBJECT_DETECTION_TASK_NONE = 0,
+       OBJECT_DETECTION_TASK_2D,
+       OBJECT_DETECTION_TASK_3D
+       // TODO
+} object_detection_task_type_e;
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/object_detection/include/objectron.h b/mv_machine_learning/object_detection/include/objectron.h
new file mode 100644 (file)
index 0000000..bfe3f9f
--- /dev/null
@@ -0,0 +1,45 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OBJECTRON_H__
+#define __OBJECTRON_H__
+
+#include <mv_common.h>
+#include "object_detection.h"
+#include <mv_inference_type.h>
+#include "EngineConfig.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+class Objectron : public ObjectDetection
+{
+private:
+       object_detection_3d_result_s _result;
+
+public:
+       Objectron();
+       ~Objectron();
+       void parseMetaFile() override;
+       object_detection_3d_result_s& getResult() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/object_detection/meta/object_detection_3d.json b/mv_machine_learning/object_detection/meta/object_detection_3d.json
new file mode 100644 (file)
index 0000000..25636fe
--- /dev/null
@@ -0,0 +1,30 @@
+{
+    "attributes":
+    [
+               {
+            "name" : "MV_OBJECT_DETECTION_3D_MODEL_OBJECT_NAME",
+                       "type" : "string",
+                       "value" : "cup"
+               },
+        {
+            "name"  : "MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH",
+            "type"  : "string",
+            "value" : "/home/owner/media/res/object_detection_3d/object_detection_3d_cup.tflite"
+        },
+               {
+            "name"  : "MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH",
+            "type"  : "string",
+            "value" : "/home/owner/media/res/object_detection_3d/object_detection_3d_cup.json"
+        },
+        {
+            "name"  : "MV_OBJECT_DETECTION_3D_BACKEND_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        },
+        {
+            "name"  : "MV_OBJECT_DETECTION_3D_TARGET_DEVICE_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        }
+    ]
+}
diff --git a/mv_machine_learning/object_detection/src/mv_object_detection_3d.c b/mv_machine_learning/object_detection/src/mv_object_detection_3d.c
new file mode 100644 (file)
index 0000000..94f876d
--- /dev/null
@@ -0,0 +1,125 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "mv_object_detection_3d.h"
+#include "mv_object_detection_3d_open.h"
+
+/**
+ * @file  mv_object_detection_3d.c
+ * @brief This file contains Media Vision inference module.
+ */
+
+int mv_object_detection_3d_create(mv_object_detection_3d_h *infer)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       _mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_NULL_ARG_CHECK(infer);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       ret = mv_object_detection_3d_create_open(infer);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_object_detection_3d_destroy(mv_object_detection_3d_h infer)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       _mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(infer);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       ret = mv_object_detection_3d_destroy_open(infer);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_object_detection_3d_configure(mv_object_detection_3d_h infer)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       _mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(infer);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       ret = mv_object_detection_3d_configure_open(infer);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_object_detection_3d_prepare(mv_object_detection_3d_h infer)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       _mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(infer);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       ret = mv_object_detection_3d_prepare_open(infer);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+       return ret;
+}
+
+int mv_object_detection_3d_inference(mv_object_detection_3d_h infer, mv_source_h source)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       _mv_inference_image_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_INSTANCE_CHECK(infer);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       ret = mv_object_detection_3d_inference_open(infer, source);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_object_detection_3d_get_result(mv_object_detection_3d_h handle, mv_object_detection_3d_result_s *out_result)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+               _mv_inference_face_check_system_info_feature_supported());
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(out_result);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       ret = mv_object_detection_3d_get_result_open(handle, out_result);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
diff --git a/mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp b/mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp
new file mode 100644 (file)
index 0000000..3705a54
--- /dev/null
@@ -0,0 +1,201 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "itask.h"
+#include "mv_object_detection_3d_open.h"
+#include "object_detection_adapter.h"
+#include "machine_learning_exception.h"
+#include "object_detection_type.h"
+#include "context.h"
+
+#include <new>
+#include <unistd.h>
+#include <string>
+#include <algorithm>
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::common;
+using namespace mediavision::machine_learning;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning::exception;
+using ObjectDetectionTask = ITask<object_detection_input_s, object_detection_3d_result_s>;
+
+int mv_object_detection_3d_create_open(mv_object_detection_3d_h *out_handle)
+{
+       if (!out_handle) {
+               LOGE("Handle can't be created because handle pointer is NULL");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       Context *context = new (nothrow) Context();
+       if (!context) {
+               LOGE("Fail to allocate a context.");
+               return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
+       }
+
+       ObjectDetectionTask *task = new (nothrow)
+                       ObjectDetectionAdapter<object_detection_input_s, object_detection_3d_result_s>();
+       if (!task) {
+               delete context;
+               LOGE("Fail to allocate a task.");
+               return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
+       }
+
+       try {
+               task->create(OBJECT_DETECTION_TASK_3D);
+       } catch (const BaseException& e) {
+               return e.getError();
+       }
+
+       context->__tasks.insert(make_pair("objectron", task));
+       *out_handle = static_cast<mv_object_detection_3d_h>(context);
+
+       LOGD("object detection 3d handle [%p] has been created", *out_handle);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_object_detection_3d_destroy_open(mv_object_detection_3d_h handle)
+{
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       auto context = static_cast<Context *>(handle);
+
+       for (auto& m : context->__tasks)
+               delete static_cast<ObjectDetectionTask *>(m.second);
+
+       delete context;
+
+       LOGD("Object detection 3d handle has been destroyed.");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_object_detection_3d_configure_open(mv_object_detection_3d_h handle)
+{
+       LOGD("ENTER");
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<ObjectDetectionTask *>(context->__tasks["objectron"]);
+
+               task->configure();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_object_detection_3d_prepare_open(mv_object_detection_3d_h handle)
+{
+       LOGD("ENTER");
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<ObjectDetectionTask *>(context->__tasks["objectron"]);
+
+               task->prepare();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_object_detection_3d_inference_open(mv_object_detection_3d_h handle, mv_source_h source)
+{
+       LOGD("ENTER");
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<ObjectDetectionTask *>(context->__tasks["objectron"]);
+
+               object_detection_input_s input = { source };
+
+               task->setInput(input);
+               task->perform();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_object_detection_3d_get_result_open(mv_object_detection_3d_h handle, mv_object_detection_3d_result_s *out_result)
+{
+       LOGD("ENTER");
+
+       if (!handle) {
+               LOGE("Handle is NULL.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       try {
+               auto context = static_cast<Context *>(handle);
+               auto task = static_cast<ObjectDetectionTask *>(context->__tasks["objectron"]);
+
+               object_detection_3d_result_s result = task->getOutput();
+
+               out_result->probability = result.probability;
+               out_result->number_of_points = result.number_of_points;
+               out_result->number_of_edges = result.number_of_edges;
+
+               for (unsigned int idx = 0; idx < out_result->number_of_points; ++idx)
+                       out_result->points[idx] = result.points[idx];
+
+               for (unsigned int idx = 0; idx < out_result->number_of_edges; ++idx) {
+                       out_result->edge_indexes[idx][0] = result.edge_indexes[idx][0];
+                       out_result->edge_indexes[idx][1] = result.edge_indexes[idx][1];
+               }
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       LOGD("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
\ No newline at end of file
diff --git a/mv_machine_learning/object_detection/src/object_detection.cpp b/mv_machine_learning/object_detection/src/object_detection.cpp
new file mode 100644 (file)
index 0000000..95fd34b
--- /dev/null
@@ -0,0 +1,79 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <map>
+#include <algorithm>
+
+#include "machine_learning_exception.h"
+#include "object_detection.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+ObjectDetection::ObjectDetection() : _backend_type(), _target_device_type()
+{
+}
+
+void ObjectDetection::configure()
+{
+       int ret = _inference->Bind(_backend_type, _target_device_type);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to bind a backend engine.");
+}
+
+void ObjectDetection::prepare()
+{
+       std::vector<std::string> input_names;
+
+       // TODO. In case of using model meta file, this function shouldn't be needed.
+       //       Instead, all configurations should be done in MetaData class.
+       _inference->ConfigureInputInfo(0, 0, 0, 0, 0, 0, 0, input_names);
+
+       std::vector<std::string> output_names;
+       std::vector<inference_engine_tensor_info> output_tensor_info;
+
+       // TODO. In case of using model meta file, this function shouldn't be needed.
+       //       Instead, all configurations should be done in MetaData class.
+       _inference->ConfigureOutputInfo(output_names, output_tensor_info);
+
+       _inference->ConfigureModelFiles("", _model_file_path, "");
+
+       // Request to load model files to a backend engine.
+       int ret = _inference->Load();
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to load model files.");
+}
+
+void ObjectDetection::inference(mv_source_h source)
+{
+       std::vector<mv_source_h> sources;
+       std::vector<mv_rectangle_s> rects;
+
+       sources.push_back(source);
+       int ret = _inference->Run(sources, rects);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to run inference");
+}
+
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/object_detection/src/object_detection_adapter.cpp b/mv_machine_learning/object_detection/src/object_detection_adapter.cpp
new file mode 100644 (file)
index 0000000..7d95585
--- /dev/null
@@ -0,0 +1,90 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "machine_learning_exception.h"
+#include "object_detection_adapter.h"
+
+using namespace std;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+template<typename T, typename V>  ObjectDetectionAdapter<T, V>::ObjectDetectionAdapter() : _source()
+{
+
+}
+
+template<typename T, typename V>  ObjectDetectionAdapter<T, V>::~ObjectDetectionAdapter()
+{}
+
+template<typename T, typename V>
+void ObjectDetectionAdapter<T, V>::create(int type)
+{
+       switch (type) {
+       case OBJECT_DETECTION_TASK_3D:
+               _object_detection = make_unique<Objectron>();
+               break;
+       default:
+               throw InvalidParameter("Invalid object detection task type.");
+       }
+}
+
+template<typename T, typename V>  void ObjectDetectionAdapter<T, V>::configure()
+{
+       try {
+               _object_detection->parseMetaFile();
+               _object_detection->configure();
+       } catch (const BaseException &e) {
+               throw e;
+       }
+}
+
+template<typename T, typename V>  void ObjectDetectionAdapter<T, V>::prepare()
+{
+       try {
+               _object_detection->prepare();
+       } catch (const BaseException &e) {
+               throw e;
+       }
+}
+
+template<typename T, typename V> void ObjectDetectionAdapter<T, V>::setInput(T &t)
+{
+       _source = t;
+}
+
+template<typename T, typename V> void ObjectDetectionAdapter<T, V>::perform()
+{
+       try {
+               _object_detection->inference(_source.inference_src);
+       } catch (const BaseException &e) {
+               throw e;
+       }
+}
+
+template<typename T, typename V> V& ObjectDetectionAdapter<T, V>::getOutput()
+{
+       return _object_detection->getResult();
+}
+
+template class ObjectDetectionAdapter<object_detection_input_s, object_detection_3d_result_s>;
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/object_detection/src/objectron.cpp b/mv_machine_learning/object_detection/src/objectron.cpp
new file mode 100644 (file)
index 0000000..9bb7d2b
--- /dev/null
@@ -0,0 +1,140 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <map>
+#include <algorithm>
+
+#include "machine_learning_exception.h"
+#include "objectron.h"
+#include "mv_object_detection_3d_config.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+Objectron::Objectron() : _result()
+{
+       _inference = make_unique<Inference>();
+}
+
+Objectron::~Objectron()
+{
+}
+
+static bool IsJsonFile(const string &fileName)
+{
+       return (!fileName.substr(fileName.find_last_of(".") + 1).compare("json"));
+}
+
+void Objectron::parseMetaFile()
+{
+       _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) +
+                                                                               string(OBJECT_DETECTION_3D_META_FILE_NAME));
+
+       int ret = _config->getIntegerAttribute(string(MV_OBJECT_DETECTION_3D_BACKEND_TYPE),
+                                                                                 &_backend_type);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get backend engine type.");
+
+       ret = _config->getIntegerAttribute(string(MV_OBJECT_DETECTION_3D_TARGET_DEVICE_TYPE),
+                                                                         &_target_device_type);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get target device type.");
+
+       ret = _config->getStringAttribute(MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH, &_model_file_path);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get model file path");
+
+       ret = _config->getStringAttribute(MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH, &_model_meta_file_path);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get model meta file path");
+
+       if (_model_meta_file_path.empty())
+               throw InvalidOperation("Model meta file doesn't exist.");
+
+       if (!IsJsonFile(_model_meta_file_path))
+               throw InvalidOperation("Model meta file should be json");
+
+       ret = _inference->ParseMetadata(_model_meta_file_path);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to ParseMetadata");
+}
+
+object_detection_3d_result_s& Objectron::getResult()
+{
+       TensorBuffer& tensor_buffer_obj = _inference->GetOutputTensorBuffer();
+       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+
+       vector<string> output_layer_names;
+
+       for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++)
+               output_layer_names.push_back(it->first);
+
+       string& identity_1_layer = output_layer_names[1];
+
+       inference_engine_tensor_buffer* tensor_buffer = tensor_buffer_obj.getTensorBuffer(identity_1_layer);
+       if (!tensor_buffer)
+               throw InvalidOperation("Fail to get tensor buffer.");
+
+       unsigned int output_size = tensor_buffer->size / 4;
+       auto *keypoints = reinterpret_cast<float *>(tensor_buffer->buffer);
+
+       if (output_size != 18)
+               throw InvalidOperation("Invalid number of points. Number of points should be 18.");
+
+       unsigned int result_idx = 0;
+
+       float x_scale = static_cast<float>(_inference->getSourceWidth()) / static_cast<float>(_inference->getInputWidth());
+       float y_scale = static_cast<float>(_inference->getSourceHeight()) / static_cast<float>(_inference->getInputHeight());
+
+       for (unsigned int idx = 0; idx < output_size; idx += 2) {
+               _result.points[result_idx].x = static_cast<int>(keypoints[idx] * x_scale);
+               _result.points[result_idx++].y = static_cast<int>(keypoints[idx + 1] * y_scale);
+       }
+
+       _result.number_of_points = output_size / 2;
+
+       string& identity_layer = output_layer_names[0];
+
+       tensor_buffer = tensor_buffer_obj.getTensorBuffer(identity_layer);
+       if (!tensor_buffer)
+               throw InvalidOperation("Fail to get tensor buffer.");
+
+       auto *prob = reinterpret_cast<float *>(tensor_buffer->buffer);
+       _result.probability = static_cast<unsigned int>(prob[0] * 100);
+
+       _result.number_of_edges = 12;
+
+       unsigned int edges[12][2] = {
+               {2, 3}, {4, 5}, {6, 7}, {8, 9},
+               {2, 4}, {3, 5}, {6, 8}, {7, 9},
+               {2, 6}, {3, 7}, {4, 8}, {5, 9}
+       };
+
+       ::copy(&edges[0][0], &edges[0][0] + 12 * 2, &_result.edge_indexes[0][0]);
+
+       return _result;
+}
+
+}
+}
\ No newline at end of file
index 28a66fc6528cee021eb752cdfde6258accbf00b1..e3f1a4e69d42816ea6a791e91d3323a164ef1f0f 100644 (file)
@@ -1,7 +1,7 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.23.41
-Release:     2
+Version:     0.24.0
+Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
 Source0:     %{name}-%{version}.tar.gz
@@ -374,6 +374,8 @@ find . -name '*.gcno' -exec cp --parents '{}' "$gcno_obj_dir" ';'
 %manifest %{name}.manifest
 %license LICENSE.APLv2
 %{_libdir}/libmv_inference*.so
+%{_datadir}/%{name}/object_detection_3d.json
+%{_libdir}/libmv_object_detection*.so
 %if "%{enable_ml_face_recognition}" == "1"
 %{_datadir}/%{name}/face_recognition.json
 %{_libdir}/libmv_training.so
@@ -382,7 +384,9 @@ find . -name '*.gcno' -exec cp --parents '{}' "$gcno_obj_dir" ';'
 
 %files machine_learning-devel
 %{_includedir}/media/mv_infer*.h
+%{_includedir}/media/mv_object_detection_3d*.h
 %{_libdir}/pkgconfig/*inference.pc
+%{_libdir}/pkgconfig/*object-detection.pc
 %if "%{enable_ml_face_recognition}" == "1"
 %{_includedir}/media/mv_face_recognition*.h
 %{_libdir}/pkgconfig/*training.pc
@@ -409,6 +413,7 @@ find . -name '*.gcno' -exec cp --parents '{}' "$gcno_obj_dir" ';'
 %endif
 %{_libdir}/libmv_testsuite*.so
 %{_bindir}/mv_*
+%{_bindir}/test_object_detection_3d
 %if "%{enable_ml_face_recognition}" == "1"
 %{_bindir}/test_face_recognition
 %{_bindir}/measure_face_recognition
index ebf24f49105ce230604b7bbce72c3bdd6a7e4775..288a2ef13044f796412f90226eabfebf24941827 100644 (file)
@@ -2,6 +2,7 @@ project(machine_learning)
 cmake_minimum_required(VERSION 2.6...3.13)
 
 add_subdirectory(${PROJECT_SOURCE_DIR}/inference)
+add_subdirectory(${PROJECT_SOURCE_DIR}/object_detection)
 
 if (${ENABLE_ML_FACE_RECOGNITION})
     message("Enabled machine learning face recognition test cases.")
diff --git a/test/testsuites/machine_learning/object_detection/CMakeLists.txt b/test/testsuites/machine_learning/object_detection/CMakeLists.txt
new file mode 100644 (file)
index 0000000..6f5cb3e
--- /dev/null
@@ -0,0 +1,14 @@
+project(mv_object_detection_3d_suite)
+cmake_minimum_required(VERSION 2.6...3.13)
+
+set(TEST_OBJECT_DETECTION_3D test_object_detection_3d)
+
+add_executable(${TEST_OBJECT_DETECTION_3D} test_object_detection_3d.cpp)
+
+target_link_libraries(${TEST_OBJECT_DETECTION_3D} gtest gtest_main
+                      mv_inference
+                      mv_object_detection
+                      mv_image_helper
+)
+
+install(TARGETS ${TEST_OBJECT_DETECTION_3D} DESTINATION ${CMAKE_INSTALL_BINDIR})
\ No newline at end of file
diff --git a/test/testsuites/machine_learning/object_detection/test_object_detection_3d.cpp b/test/testsuites/machine_learning/object_detection/test_object_detection_3d.cpp
new file mode 100644 (file)
index 0000000..f349413
--- /dev/null
@@ -0,0 +1,79 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <string.h>
+#include <map>
+
+#include "gtest/gtest.h"
+
+#include "ImageHelper.h"
+#include "mv_object_detection_3d.h"
+
+#define IMAGE_PATH "/res/inference/images/cup.jpeg"
+
+using namespace testing;
+using namespace std;
+
+using namespace MediaVision::Common;
+
+TEST(ObjectDetection3DTest, InferenceShouldBeOk)
+{
+       mv_object_detection_3d_h handle;
+
+       int ret = mv_object_detection_3d_create(&handle);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       ret = mv_object_detection_3d_configure(handle);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       ret = mv_object_detection_3d_prepare(handle);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       const string image_path = IMAGE_PATH;
+       mv_source_h mv_source = NULL;
+
+       ret = mv_create_source(&mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       ret = ImageHelper::loadImageToSource(image_path.c_str(), mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       ret = mv_object_detection_3d_inference(handle, mv_source);
+       ASSERT_EQ(ret, 0);
+
+       mv_object_detection_3d_result_s result;
+       ret = mv_object_detection_3d_get_result(handle, &result);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       std::cout << "Probability = " << result.probability << std::endl;
+
+       for (unsigned int idx = 0; idx < result.number_of_points; ++idx) {
+               std::cout << "index = " << idx + 1 << " : " << result.points[idx].x
+                                << " x " << result.points[idx].y << std::endl;
+       }
+
+       std::cout << "Number of edges = " << result.number_of_edges << std::endl;
+
+       for (unsigned int idx = 0; idx < result.number_of_edges; ++idx)
+               std::cout << result.edge_indexes[idx][0] << " to " << result.edge_indexes[idx][1] << std::endl;
+
+       ret = mv_destroy_source(mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       ret = mv_object_detection_3d_destroy(handle);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+}
\ No newline at end of file