mv_machine_learning: code refactoring to object detection task group
authorInki Dae <inki.dae@samsung.com>
Fri, 10 Feb 2023 10:07:14 +0000 (19:07 +0900)
committerKwanghoon Son <k.son@samsung.com>
Fri, 3 Mar 2023 08:11:58 +0000 (17:11 +0900)
[Issue type] : code refactoring

This is a code refactoring patch for making object detection task group
to be more generic by doing,
   - rename internal API from object_detection_3d to object_detection
     excepting below three object detection 3d relevant API,
 mv_object_detection_3d_get_probability,
 mv_object_detection_3d_get_num_of_points,
 and mv_object_detection_3d_get_points_open.
   - use more generic prefix to definitions for configuration.
   - introduce getOutputNames() and getOutputTensors() functions for
     object detection group class, and make objectron class to use them
 instead of implementing there because they can be used commonly for
 other specific classes of the object detection group later.

Change-Id: Ic305d42a8b6fb16cae7d806264dd78499c59ee97
Signed-off-by: Inki Dae <inki.dae@samsung.com>
17 files changed:
CMakeLists.txt
include/mv_object_detection_internal.h [moved from include/mv_object_detection_3d_internal.h with 76% similarity]
include/mv_object_detection_type.h [moved from include/mv_object_detection_3d_type.h with 80% similarity]
mv_machine_learning/object_detection/include/mv_object_detection_config.h [moved from mv_machine_learning/object_detection/include/mv_object_detection_3d_config.h with 63% similarity]
mv_machine_learning/object_detection/include/mv_object_detection_open.h [moved from mv_machine_learning/object_detection/include/mv_object_detection_3d_open.h with 82% similarity]
mv_machine_learning/object_detection/include/object_detection.h
mv_machine_learning/object_detection/include/object_detection_type.h
mv_machine_learning/object_detection/include/objectron.h
mv_machine_learning/object_detection/meta/object_detection.json [moved from mv_machine_learning/object_detection/meta/object_detection_3d.json with 55% similarity]
mv_machine_learning/object_detection/src/mv_object_detection.c [moved from mv_machine_learning/object_detection/src/mv_object_detection_3d.c with 75% similarity]
mv_machine_learning/object_detection/src/mv_object_detection_open.cpp [moved from mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp with 79% similarity]
mv_machine_learning/object_detection/src/object_detection.cpp
mv_machine_learning/object_detection/src/object_detection_adapter.cpp
mv_machine_learning/object_detection/src/objectron.cpp
packaging/capi-media-vision.spec
test/testsuites/machine_learning/object_detection/CMakeLists.txt
test/testsuites/machine_learning/object_detection/test_object_detection.cpp [moved from test/testsuites/machine_learning/object_detection/test_object_detection_3d.cpp with 86% similarity]

index 6cbf430..6ba134b 100644 (file)
@@ -170,7 +170,7 @@ configure_file(
        @ONLY
 )
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-object-detection.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
-install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/object_detection_3d.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
+install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/object_detection/meta/object_detection.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
 
 set(PC_NAME ${fw_name}-image-classification)
 set(PC_LDFLAGS "-l${MV_IMAGE_CLASSIFICATION_LIB_NAME} -l${MV_COMMON_LIB_NAME}")
similarity index 76%
rename from include/mv_object_detection_3d_internal.h
rename to include/mv_object_detection_internal.h
index 778b0f2..726366f 100644 (file)
  * limitations under the License.
  */
 
-#ifndef __TIZEN_MEDIAVISION_OBJECT_DETECT_3D_INTERNAL_H__
-#define __TIZEN_MEDIAVISION_OBJECT_DETECT_3D_INTERNAL_H__
+#ifndef __TIZEN_MEDIAVISION_OBJECT_DETECT_INTERNAL_H__
+#define __TIZEN_MEDIAVISION_OBJECT_DETECT_INTERNAL_H__
 
 #include <mv_common.h>
-#include <mv_object_detection_3d_type.h>
+#include <mv_object_detection_type.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
 
 /**
- * @file   mv_object_detection_3d.h
+ * @file   mv_object_detection.h
  * @brief  This file contains the Inference based Media Vision API.
  */
 
@@ -35,15 +35,15 @@ extern "C" {
  */
 
 /**
- * @brief Creates a inference handle for object detection 3d object.
+ * @brief Creates a inference handle for object detection object.
  * @details Use this function to create a inference handle. After the creation
  *          the object detection 3d task has to be prepared with
- *          mv_object_detection_3d_prepare() function to prepare a network
+ *          mv_object_detection_prepare() function to prepare a network
  *          for the inference.
  *
  * @since_tizen 7.0
  *
- * @remarks The @a infer should be released using mv_object_detection_3d_destroy().
+ * @remarks The @a infer should be released using mv_object_detection_destroy().
  *
  * @param[out] infer    The handle to the inference to be created.
  *
@@ -53,10 +53,10 @@ extern "C" {
  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
  * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
  *
- * @see mv_object_detection_3d_destroy()
- * @see mv_object_detection_3d_prepare()
+ * @see mv_object_detection_destroy()
+ * @see mv_object_detection_prepare()
  */
-int mv_object_detection_3d_create(mv_object_detection_3d_h *infer);
+int mv_object_detection_create(mv_object_detection_h *infer);
 
 /**
  * @brief Destroys inference handle and releases all its resources.
@@ -70,14 +70,14 @@ int mv_object_detection_3d_create(mv_object_detection_3d_h *infer);
  * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
  *
- * @pre Create inference handle by using mv_object_detection_3d_create()
+ * @pre Create inference handle by using mv_object_detection_create()
  *
- * @see mv_object_detection_3d_create()
+ * @see mv_object_detection_create()
  */
-int mv_object_detection_3d_destroy(mv_object_detection_3d_h infer);
+int mv_object_detection_destroy(mv_object_detection_h infer);
 
 /**
- * @brief Configures the backend for the object detection 3d inference.
+ * @brief Configures the backend for the object detection inference.
  *
  * @since_tizen 7.0
  *
@@ -89,11 +89,11 @@ int mv_object_detection_3d_destroy(mv_object_detection_3d_h infer);
  * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
  * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
  */
-int mv_object_detection_3d_configure(mv_object_detection_3d_h infer);
+int mv_object_detection_configure(mv_object_detection_h infer);
 
 /**
- * @brief Prepares the object detection 3d inference
- * @details Use this function to prepare the object detection 3d inference based on
+ * @brief Prepares the object detection inference
+ * @details Use this function to prepare the object detection inference based on
  *          the configured network.
  *
  * @since_tizen 7.0
@@ -110,10 +110,10 @@ int mv_object_detection_3d_configure(mv_object_detection_3d_h infer);
  * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
  * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Not supported format
  */
-int mv_object_detection_3d_prepare(mv_object_detection_3d_h infer);
+int mv_object_detection_prepare(mv_object_detection_h infer);
 
 /**
- * @brief Performs the object detection 3d inference on the @a source.
+ * @brief Performs the object detection inference on the @a source.
  *
  * @since_tizen 7.0
  * @remarks This function is synchronous and may take considerable time to run.
@@ -130,17 +130,17 @@ int mv_object_detection_3d_prepare(mv_object_detection_3d_h infer);
  *                                                  isn't supported
  *
  * @pre Create a source handle by calling mv_create_source()
- * @pre Create an inference handle by calling mv_object_detect_3d_create()
- * @pre Prepare an inference by calling mv_object_detect_3d_prepare()
+ * @pre Create an inference handle by calling mv_object_detect_create()
+ * @pre Prepare an inference by calling mv_object_detect_prepare()
  * @post
  *
- * @see mv_object_detect_3d_result_s structure
+ * @see mv_object_detect_result_s structure
  */
-int mv_object_detection_3d_inference(mv_object_detection_3d_h infer, mv_source_h source);
+int mv_object_detection_inference(mv_object_detection_h infer, mv_source_h source);
 
 /**
  * @brief Gets the probability value to the detected object.
- * @details Use this function to get the probability value after calling @ref mv_object_detection_3d_inference().
+ * @details Use this function to get the probability value after calling @ref mv_object_detection_inference().
  *
  * @since_tizen 7.0
  *
@@ -154,13 +154,13 @@ int mv_object_detection_3d_inference(mv_object_detection_3d_h infer, mv_source_h
  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
  * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
  *
- * @pre Request an inference by calling @ref mv_object_detection_3d_inference()
+ * @pre Request an inference by calling @ref mv_object_detection_inference()
  */
-int mv_object_detection_3d_get_probability(mv_object_detection_3d_h handle, unsigned int *out_probability);
+int mv_object_detection_3d_get_probability(mv_object_detection_h handle, unsigned int *out_probability);
 
 /**
  * @brief Gets the number of points to the 3D bounding box of the detected object.
- * @details Use this function to get the number of points after calling @ref mv_object_detection_3d_inference().
+ * @details Use this function to get the number of points after calling @ref mv_object_detection_inference().
  *
  * @since_tizen 7.0
  *
@@ -174,13 +174,13 @@ int mv_object_detection_3d_get_probability(mv_object_detection_3d_h handle, unsi
  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
  * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
  *
- * @pre Request an inference by calling @ref mv_object_detection_3d_inference()
+ * @pre Request an inference by calling @ref mv_object_detection_inference()
  */
-int mv_object_detection_3d_get_num_of_points(mv_object_detection_3d_h handle, unsigned int *out_num_of_points);
+int mv_object_detection_3d_get_num_of_points(mv_object_detection_h handle, unsigned int *out_num_of_points);
 
 /**
  * @brief Gets the x and y coordinates values to the 3D bounding box of the detected object.
- * @details Use this function to get the coordinates values after calling @ref mv_object_detection_3d_inference().
+ * @details Use this function to get the coordinates values after calling @ref mv_object_detection_inference().
  *
  * @since_tizen 7.0
  *
@@ -195,9 +195,9 @@ int mv_object_detection_3d_get_num_of_points(mv_object_detection_3d_h handle, un
  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
  * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
  *
- * @pre Request an inference by calling @ref mv_object_detection_3d_inference()
+ * @pre Request an inference by calling @ref mv_object_detection_inference()
  */
-int mv_object_detection_3d_get_points(mv_object_detection_3d_h handle, unsigned int **out_x, unsigned int **out_y);
+int mv_object_detection_3d_get_points(mv_object_detection_h handle, unsigned int **out_x, unsigned int **out_y);
 /**
  * @}
  */
similarity index 80%
rename from include/mv_object_detection_3d_type.h
rename to include/mv_object_detection_type.h
index 5e207be..909392e 100644 (file)
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __TIZEN_MEDIAVISION_MV_OBJECT_DETECTION_3D_TYPE_H__
-#define __TIZEN_MEDIAVISION_MV_OBJECT_DETECTION_3D_TYPE_H__
+#ifndef __TIZEN_MEDIAVISION_MV_OBJECT_DETECTION_TYPE_H__
+#define __TIZEN_MEDIAVISION_MV_OBJECT_DETECTION_TYPE_H__
 
 #include <mv_common.h>
 
@@ -24,7 +24,7 @@ extern "C" {
 #endif /* __cplusplus */
 
 /**
- * @file   mv_object_detection_3d_type.h
+ * @file   mv_object_detection_type.h
  * @brief  This file contains the face recognition handle for Mediavision.
  */
 
@@ -34,11 +34,11 @@ extern "C" {
  */
 
 /**
- * @brief The object detection 3d object handle.
+ * @brief The object detection object handle.
  *
  * @since_tizen 7.0
  */
-typedef void *mv_object_detection_3d_h;
+typedef void *mv_object_detection_h;
 
 /**
  * @}
  * limitations under the License.
  */
 
-#ifndef __MEDIA_VISION_OBJECT_DETECTION_3D_CONFIG_H__
-#define __MEDIA_VISION_OBJECT_DETECTION_3D_CONFIG_H__
+#ifndef __MEDIA_VISION_OBJECT_DETECTION_CONFIG_H__
+#define __MEDIA_VISION_OBJECT_DETECTION_CONFIG_H__
 
 /**
- * @brief Defines #MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH
- *        to set the object detection 3d model file path.
+ * @brief Defines #MV_OBJECT_DETECTION_MODEL_DEFAULT_PATH
+ *        to set the object detection default path.
+ *
+ * @since_tizen 7.5
+ */
+#define MV_OBJECT_DETECTION_MODEL_DEFAULT_PATH "MODEL_DEFAULT_PATH"
+
+/**
+ * @brief Defines #MV_OBJECT_DETECTION_MODEL_FILE_PATH
+ *        to set the object detection model file path.
  *
  * @since_tizen 7.0
  */
-#define MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH "MODEL_FILE_PATH"
+#define MV_OBJECT_DETECTION_MODEL_FILE_PATH "MODEL_FILE_NAME"
 
 /**
  * @brief Defines #MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH to set inference
  *
  * @since_tizen 7.0
  */
-#define MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH "META_FILE_PATH"
+#define MV_OBJECT_DETECTION_MODEL_META_FILE_PATH "META_FILE_NAME"
 
 /**
- * @brief Defines #MV_OBJECT_DETECT_3D_BACKEND_TYPE
+ * @brief Defines #MV_OBJECT_DETECT_BACKEND_TYPE
  *        to set inference backend engine type. In default, tensorflow lite is used.
  *
  * @since_tizen 7.0
  */
-#define MV_OBJECT_DETECTION_3D_BACKEND_TYPE "BACKEND_TYPE"
+#define MV_OBJECT_DETECTION_BACKEND_TYPE "BACKEND_TYPE"
 
 /**
- * @brief Defines #MV_OBJECT_DETECT_3D_TARGET_DEVICE_TYPE
+ * @brief Defines #MV_OBJECT_DETECT_TARGET_DEVICE_TYPE
  *        to set inference target device type. In default, CPU device is used.
  *
  * @since_tizen 7.0
  */
-#define MV_OBJECT_DETECTION_3D_TARGET_DEVICE_TYPE "TARGET_DEVICE_TYPE"
+#define MV_OBJECT_DETECTION_TARGET_DEVICE_TYPE "TARGET_DEVICE_TYPE"
 
 #define MV_OBJECT_DETECTION_3D_MAX_NUM_OF_POINTS "MAX_NUM_OF_POINTS"
 
 #define MV_OBJECT_DETECTION_3D_MAX_NUM_OF_EDGES "MAX_NUM_OF_EDGES"
 
-#define MV_OBJECT_DETECTION_3D_META_FILE_NAME "object_detection_3d.json"
+#define MV_OBJECT_DETECTION_META_FILE_NAME "object_detection.json"
 
 #endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */
  * limitations under the License.
  */
 
-#ifndef __MEDIA_VISION_OBJECT_DETECTION_3D_OPEN_H__
-#define __MEDIA_VISION_OBJECT_DETECTION_3D_OPEN_H__
+#ifndef __MEDIA_VISION_OBJECT_DETECTION_OPEN_H__
+#define __MEDIA_VISION_OBJECT_DETECTION_OPEN_H__
 
 #include <mv_common.h>
 #include <mv_private.h>
-#include <mv_object_detection_3d_type.h>
+#include <mv_object_detection_type.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -29,7 +29,7 @@ extern "C" {
         * @brief Create face recognition object handle.
         * @details Use this function to create an face recognition object handle.
         *          After creation the handle has to be prepared with
-        *          @ref mv_object_detection_3d_prepare_open() function to prepare
+        *          @ref mv_object_detection_prepare_open() function to prepare
         *               an face recognition object.
         *
         * @since_tizen 7.0
@@ -42,12 +42,12 @@ extern "C" {
         * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
         *
         * @post Release @a handle by using
-        *       @ref mv_object_detection_3d_destroy_open() function when it is not needed
+        *       @ref mv_object_detection_destroy_open() function when it is not needed
         *       anymore
         *
-        * @see mv_object_detection_3d_destroy_open()
+        * @see mv_object_detection_destroy_open()
         */
-int mv_object_detection_3d_create_open(mv_object_detection_3d_h *out_handle);
+int mv_object_detection_create_open(mv_object_detection_h *out_handle);
 
 /**
         * @brief Destroy face recognition handle and releases all its resources.
@@ -60,11 +60,11 @@ int mv_object_detection_3d_create_open(mv_object_detection_3d_h *out_handle);
         * @retval #MEDIA_VISION_ERROR_NONE Successful
         * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
         *
-        * @pre Create an face recognition handle by using @ref mv_object_detection_3d_create_open()
+        * @pre Create an face recognition handle by using @ref mv_object_detection_create_open()
         *
-        * @see mv_object_detection_3d_create_open()
+        * @see mv_object_detection_create_open()
         */
-int mv_object_detection_3d_destroy_open(mv_object_detection_3d_h handle);
+int mv_object_detection_destroy_open(mv_object_detection_h handle);
 
 /**
         * @brief Configure the backend to the inference handle
@@ -78,7 +78,7 @@ int mv_object_detection_3d_destroy_open(mv_object_detection_3d_h handle);
         * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
         * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
         */
-int mv_object_detection_3d_configure_open(mv_object_detection_3d_h handle);
+int mv_object_detection_configure_open(mv_object_detection_h handle);
 
 /**
         * @brief Prepare inference.
@@ -94,7 +94,7 @@ int mv_object_detection_3d_configure_open(mv_object_detection_3d_h handle);
         * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
         * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
         */
-int mv_object_detection_3d_prepare_open(mv_object_detection_3d_h handle);
+int mv_object_detection_prepare_open(mv_object_detection_h handle);
 
 /**
         *
@@ -115,15 +115,15 @@ int mv_object_detection_3d_prepare_open(mv_object_detection_3d_h handle);
         * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
         *
         * @pre Create a source handle by calling @ref mv_create_source()
-        * @pre Create an face recognition handle by calling @ref mv_object_detection_3d_create_open()
-        * @pre Prepare an face recognition by calling @ref mv_object_detection_3d_prepare_open()
-        * @pre Register a new face by calling @ref mv_object_detection_3d_register_open()
+        * @pre Create an face recognition handle by calling @ref mv_object_detection_create_open()
+        * @pre Prepare an face recognition by calling @ref mv_object_detection_prepare_open()
+        * @pre Register a new face by calling @ref mv_object_detection_register_open()
         */
-int mv_object_detection_3d_inference_open(mv_object_detection_3d_h handle, mv_source_h source);
+int mv_object_detection_inference_open(mv_object_detection_h handle, mv_source_h source);
 
 /**
         * @brief Gets the probability value to the detected object.
-        * @details Use this function to get the probability value after calling @ref mv_object_detection_3d_inference().
+        * @details Use this function to get the probability value after calling @ref mv_object_detection_inference().
         *
         * @since_tizen 7.0
         *
@@ -137,13 +137,13 @@ int mv_object_detection_3d_inference_open(mv_object_detection_3d_h handle, mv_so
         * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
         * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
         *
-        * @pre Request an inference by calling @ref mv_object_detection_3d_inference()
+        * @pre Request an inference by calling @ref mv_object_detection_inference()
         */
-int mv_object_detection_3d_get_probability_open(mv_object_detection_3d_h handle, unsigned int *out_probability);
+int mv_object_detection_3d_get_probability_open(mv_object_detection_h handle, unsigned int *out_probability);
 
 /**
         * @brief Gets the number of points to the 3D bounding box of the detected object.
-        * @details Use this function to get the number of points after calling @ref mv_object_detection_3d_inference().
+        * @details Use this function to get the number of points after calling @ref mv_object_detection_inference().
         *
         * @since_tizen 7.0
         *
@@ -159,11 +159,11 @@ int mv_object_detection_3d_get_probability_open(mv_object_detection_3d_h handle,
         *
         * @pre Request an inference by calling @ref mv_object_detection_3d_inference()
         */
-int mv_object_detection_3d_get_num_of_points_open(mv_object_detection_3d_h handle, unsigned int *out_num_of_points);
+int mv_object_detection_3d_get_num_of_points_open(mv_object_detection_h handle, unsigned int *out_num_of_points);
 
 /**
         * @brief Gets the x and y coordinates values to the 3D bounding box of the detected object.
-        * @details Use this function to get the coordinates values after calling @ref mv_object_detection_3d_inference().
+        * @details Use this function to get the coordinates values after calling @ref mv_object_detection_inference().
         *
         * @since_tizen 7.0
         *
@@ -178,9 +178,9 @@ int mv_object_detection_3d_get_num_of_points_open(mv_object_detection_3d_h handl
         * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
         * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
         *
-        * @pre Request an inference by calling @ref mv_object_detection_3d_inference()
+        * @pre Request an inference by calling @ref mv_object_detection_inference()
         */
-int mv_object_detection_3d_get_points_open(mv_object_detection_3d_h handle, unsigned int **out_x, unsigned int **out_y);
+int mv_object_detection_3d_get_points_open(mv_object_detection_h handle, unsigned int **out_x, unsigned int **out_y);
 
 #ifdef __cplusplus
 }
index 02c3a50..889a2cc 100644 (file)
@@ -44,15 +44,18 @@ protected:
        int _backendType;
        int _targetDeviceType;
 
+       void getOutputNames(std::vector<std::string> &names);
+       void getOutputTensor(std::string &target_name, std::vector<float> &output);
+
 public:
        ObjectDetection();
        virtual ~ObjectDetection() = default;
-       virtual void parseMetaFile() = 0;
+       void parseMetaFile();
        void configure();
        void prepare();
        void preprocess(mv_source_h &mv_src);
        void inference(mv_source_h source);
-       virtual object_detection_3d_result_s &getResult() = 0;
+       virtual object_detection_result_s &getResult() = 0;
 };
 
 } // machine_learning
index bd2c36f..674ef81 100644 (file)
@@ -24,20 +24,20 @@ namespace mediavision
 {
 namespace machine_learning
 {
-typedef struct {
+struct object_detection_input_s {
        mv_source_h inference_src;
-} object_detection_input_s;
+};
 
-typedef struct {
+struct edge_index_s {
        unsigned int start;
        unsigned int end;
-} edge_index_s;
+};
 
 /**
  * @brief The object detection result structure.
- * @details Contains object detection 3d result.
+ * @details Contains object detection result.
  */
-struct object_detection_3d_result_s {
+struct object_detection_result_s {
        unsigned int probability;
        unsigned int number_of_points;
        std::vector<unsigned int> x_vec;
@@ -46,12 +46,11 @@ struct object_detection_3d_result_s {
        std::vector<edge_index_s> edge_index_vec;
 };
 
-typedef enum {
+enum class object_detection_task_type_e {
        OBJECT_DETECTION_TASK_NONE = 0,
-       OBJECT_DETECTION_TASK_2D,
-       OBJECT_DETECTION_TASK_3D
+       OBJECTRON
        // TODO
-} object_detection_task_type_e;
+};
 
 }
 }
index 404786a..1c70378 100644 (file)
@@ -23,7 +23,6 @@
 
 #include "object_detection.h"
 #include <mv_inference_type.h>
-#include "EngineConfig.h"
 
 namespace mediavision
 {
@@ -32,13 +31,12 @@ namespace machine_learning
 class Objectron : public ObjectDetection
 {
 private:
-       object_detection_3d_result_s _result;
+       object_detection_result_s _result;
 
 public:
        Objectron();
        ~Objectron();
-       void parseMetaFile() override;
-       object_detection_3d_result_s &getResult() override;
+       object_detection_result_s &getResult() override;
 };
 
 } // machine_learning
@@ -1,20 +1,25 @@
 {
     "attributes":
     [
+        {
+            "name" : "MODEL_DEFAULT_PATH",
+            "type" : "string",
+            "value" : "/usr/share/capi-media-vision/models/OD/tflite/"
+        },
                {
             "name" : "OBJECT_NAME",
                        "type" : "string",
                        "value" : "cup"
                },
                {
-            "name"  : "MODEL_FILE_PATH",
+            "name"  : "MODEL_FILE_NAME",
             "type"  : "string",
-            "value" : "/home/owner/media/res/object_detection_3d/tflite/object_detection_3d_cup.tflite"
+            "value" : "object_detection_3d_cup.tflite"
         },
                {
-            "name"  : "META_FILE_PATH",
+            "name"  : "META_FILE_NAME",
             "type"  : "string",
-            "value" : "/home/owner/media/res/object_detection_3d/tflite/object_detection_3d_cup.json"
+            "value" : "object_detection_3d_cup.json"
         },
         {
             "name"  : "BACKEND_TYPE",
  */
 
 #include "mv_private.h"
-#include "mv_object_detection_3d_internal.h"
-#include "mv_object_detection_3d_open.h"
+#include "mv_object_detection_internal.h"
+#include "mv_object_detection_open.h"
 
 /**
- * @file  mv_object_detection_3d.c
+ * @file  mv_object_detection.c
  * @brief This file contains Media Vision inference module.
  */
 
-int mv_object_detection_3d_create(mv_object_detection_3d_h *infer)
+int mv_object_detection_create(mv_object_detection_h *infer)
 {
        MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
        MEDIA_VISION_NULL_ARG_CHECK(infer);
@@ -32,13 +32,13 @@ int mv_object_detection_3d_create(mv_object_detection_3d_h *infer)
 
        int ret = MEDIA_VISION_ERROR_NONE;
 
-       ret = mv_object_detection_3d_create_open(infer);
+       ret = mv_object_detection_create_open(infer);
 
        MEDIA_VISION_FUNCTION_LEAVE();
        return ret;
 }
 
-int mv_object_detection_3d_destroy(mv_object_detection_3d_h infer)
+int mv_object_detection_destroy(mv_object_detection_h infer)
 {
        MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
        MEDIA_VISION_INSTANCE_CHECK(infer);
@@ -47,13 +47,13 @@ int mv_object_detection_3d_destroy(mv_object_detection_3d_h infer)
 
        int ret = MEDIA_VISION_ERROR_NONE;
 
-       ret = mv_object_detection_3d_destroy_open(infer);
+       ret = mv_object_detection_destroy_open(infer);
 
        MEDIA_VISION_FUNCTION_LEAVE();
        return ret;
 }
 
-int mv_object_detection_3d_configure(mv_object_detection_3d_h infer)
+int mv_object_detection_configure(mv_object_detection_h infer)
 {
        MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
        MEDIA_VISION_INSTANCE_CHECK(infer);
@@ -62,13 +62,13 @@ int mv_object_detection_3d_configure(mv_object_detection_3d_h infer)
 
        int ret = MEDIA_VISION_ERROR_NONE;
 
-       ret = mv_object_detection_3d_configure_open(infer);
+       ret = mv_object_detection_configure_open(infer);
 
        MEDIA_VISION_FUNCTION_LEAVE();
        return ret;
 }
 
-int mv_object_detection_3d_prepare(mv_object_detection_3d_h infer)
+int mv_object_detection_prepare(mv_object_detection_h infer)
 {
        MEDIA_VISION_SUPPORT_CHECK(_mv_inference_check_system_info_feature_supported());
        MEDIA_VISION_INSTANCE_CHECK(infer);
@@ -77,13 +77,13 @@ int mv_object_detection_3d_prepare(mv_object_detection_3d_h infer)
 
        int ret = MEDIA_VISION_ERROR_NONE;
 
-       ret = mv_object_detection_3d_prepare_open(infer);
+       ret = mv_object_detection_prepare_open(infer);
 
        MEDIA_VISION_FUNCTION_LEAVE();
        return ret;
 }
 
-int mv_object_detection_3d_inference(mv_object_detection_3d_h infer, mv_source_h source)
+int mv_object_detection_inference(mv_object_detection_h infer, mv_source_h source)
 {
        MEDIA_VISION_SUPPORT_CHECK(_mv_inference_image_check_system_info_feature_supported());
        MEDIA_VISION_INSTANCE_CHECK(source);
@@ -93,14 +93,14 @@ int mv_object_detection_3d_inference(mv_object_detection_3d_h infer, mv_source_h
 
        int ret = MEDIA_VISION_ERROR_NONE;
 
-       ret = mv_object_detection_3d_inference_open(infer, source);
+       ret = mv_object_detection_inference_open(infer, source);
 
        MEDIA_VISION_FUNCTION_LEAVE();
 
        return ret;
 }
 
-int mv_object_detection_3d_get_probability(mv_object_detection_3d_h handle, unsigned int *out_probability)
+int mv_object_detection_3d_get_probability(mv_object_detection_h handle, unsigned int *out_probability)
 {
        MEDIA_VISION_SUPPORT_CHECK(_mv_inference_face_check_system_info_feature_supported());
 
@@ -118,7 +118,7 @@ int mv_object_detection_3d_get_probability(mv_object_detection_3d_h handle, unsi
        return ret;
 }
 
-int mv_object_detection_3d_get_num_of_points(mv_object_detection_3d_h handle, unsigned int *out_num_of_points)
+int mv_object_detection_3d_get_num_of_points(mv_object_detection_h handle, unsigned int *out_num_of_points)
 {
        MEDIA_VISION_SUPPORT_CHECK(_mv_inference_face_check_system_info_feature_supported());
 
@@ -136,7 +136,7 @@ int mv_object_detection_3d_get_num_of_points(mv_object_detection_3d_h handle, un
        return ret;
 }
 
-int mv_object_detection_3d_get_points(mv_object_detection_3d_h handle, unsigned int **out_x, unsigned int **out_y)
+int mv_object_detection_3d_get_points(mv_object_detection_h handle, unsigned int **out_x, unsigned int **out_y)
 {
        MEDIA_VISION_SUPPORT_CHECK(_mv_inference_face_check_system_info_feature_supported());
 
@@ -16,7 +16,7 @@
 
 #include "mv_private.h"
 #include "itask.h"
-#include "mv_object_detection_3d_open.h"
+#include "mv_object_detection_open.h"
 #include "object_detection_adapter.h"
 #include "machine_learning_exception.h"
 #include "object_detection_type.h"
@@ -33,9 +33,9 @@ using namespace mediavision::common;
 using namespace mediavision::machine_learning;
 using namespace MediaVision::Common;
 using namespace mediavision::machine_learning::exception;
-using ObjectDetectionTask = ITask<object_detection_input_s, object_detection_3d_result_s>;
+using ObjectDetectionTask = ITask<object_detection_input_s, object_detection_result_s>;
 
-int mv_object_detection_3d_create_open(mv_object_detection_3d_h *out_handle)
+int mv_object_detection_create_open(mv_object_detection_h *out_handle)
 {
        if (!out_handle) {
                LOGE("Handle can't be created because handle pointer is NULL");
@@ -49,7 +49,7 @@ int mv_object_detection_3d_create_open(mv_object_detection_3d_h *out_handle)
        }
 
        ObjectDetectionTask *task = new (nothrow)
-                       ObjectDetectionAdapter<object_detection_input_s, object_detection_3d_result_s>();
+                       ObjectDetectionAdapter<object_detection_input_s, object_detection_result_s>();
        if (!task) {
                delete context;
                LOGE("Fail to allocate a task.");
@@ -57,20 +57,20 @@ int mv_object_detection_3d_create_open(mv_object_detection_3d_h *out_handle)
        }
 
        try {
-               task->create(OBJECT_DETECTION_TASK_3D);
+               task->create(static_cast<int>(object_detection_task_type_e::OBJECTRON));
        } catch (const BaseException &e) {
                return e.getError();
        }
 
-       context->__tasks.insert(make_pair("objectron", task));
-       *out_handle = static_cast<mv_object_detection_3d_h>(context);
+       context->__tasks.insert(make_pair("object_detection", task));
+       *out_handle = static_cast<mv_object_detection_h>(context);
 
        LOGD("object detection 3d handle [%p] has been created", *out_handle);
 
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int mv_object_detection_3d_destroy_open(mv_object_detection_3d_h handle)
+int mv_object_detection_destroy_open(mv_object_detection_h handle)
 {
        if (!handle) {
                LOGE("Handle is NULL.");
@@ -89,7 +89,7 @@ int mv_object_detection_3d_destroy_open(mv_object_detection_3d_h handle)
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int mv_object_detection_3d_configure_open(mv_object_detection_3d_h handle)
+int mv_object_detection_configure_open(mv_object_detection_h handle)
 {
        LOGD("ENTER");
 
@@ -100,7 +100,7 @@ int mv_object_detection_3d_configure_open(mv_object_detection_3d_h handle)
 
        try {
                auto context = static_cast<Context *>(handle);
-               auto task = static_cast<ObjectDetectionTask *>(context->__tasks["objectron"]);
+               auto task = static_cast<ObjectDetectionTask *>(context->__tasks.at("object_detection"));
 
                task->configure();
        } catch (const BaseException &e) {
@@ -113,7 +113,7 @@ int mv_object_detection_3d_configure_open(mv_object_detection_3d_h handle)
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int mv_object_detection_3d_prepare_open(mv_object_detection_3d_h handle)
+int mv_object_detection_prepare_open(mv_object_detection_h handle)
 {
        LOGD("ENTER");
 
@@ -124,7 +124,7 @@ int mv_object_detection_3d_prepare_open(mv_object_detection_3d_h handle)
 
        try {
                auto context = static_cast<Context *>(handle);
-               auto task = static_cast<ObjectDetectionTask *>(context->__tasks["objectron"]);
+               auto task = static_cast<ObjectDetectionTask *>(context->__tasks.at("object_detection"));
 
                task->prepare();
        } catch (const BaseException &e) {
@@ -137,7 +137,7 @@ int mv_object_detection_3d_prepare_open(mv_object_detection_3d_h handle)
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int mv_object_detection_3d_inference_open(mv_object_detection_3d_h handle, mv_source_h source)
+int mv_object_detection_inference_open(mv_object_detection_h handle, mv_source_h source)
 {
        LOGD("ENTER");
 
@@ -148,7 +148,7 @@ int mv_object_detection_3d_inference_open(mv_object_detection_3d_h handle, mv_so
 
        try {
                auto context = static_cast<Context *>(handle);
-               auto task = static_cast<ObjectDetectionTask *>(context->__tasks["objectron"]);
+               auto task = static_cast<ObjectDetectionTask *>(context->__tasks.at("object_detection"));
 
                object_detection_input_s input = { source };
 
@@ -164,7 +164,7 @@ int mv_object_detection_3d_inference_open(mv_object_detection_3d_h handle, mv_so
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int mv_object_detection_3d_get_probability_open(mv_object_detection_3d_h handle, unsigned int *out_probability)
+int mv_object_detection_3d_get_probability_open(mv_object_detection_h handle, unsigned int *out_probability)
 {
        LOGD("ENTER");
 
@@ -175,9 +175,9 @@ int mv_object_detection_3d_get_probability_open(mv_object_detection_3d_h handle,
 
        try {
                auto context = static_cast<Context *>(handle);
-               auto task = static_cast<ObjectDetectionTask *>(context->__tasks["objectron"]);
+               auto task = static_cast<ObjectDetectionTask *>(context->__tasks.at("object_detection"));
 
-               object_detection_3d_result_s &result = task->getOutput();
+               object_detection_result_s &result = task->getOutput();
 
                *out_probability = result.probability;
        } catch (const BaseException &e) {
@@ -190,7 +190,7 @@ int mv_object_detection_3d_get_probability_open(mv_object_detection_3d_h handle,
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int mv_object_detection_3d_get_num_of_points_open(mv_object_detection_3d_h handle, unsigned int *out_num_of_points)
+int mv_object_detection_3d_get_num_of_points_open(mv_object_detection_h handle, unsigned int *out_num_of_points)
 {
        LOGD("ENTER");
 
@@ -201,9 +201,9 @@ int mv_object_detection_3d_get_num_of_points_open(mv_object_detection_3d_h handl
 
        try {
                auto context = static_cast<Context *>(handle);
-               auto task = static_cast<ObjectDetectionTask *>(context->__tasks["objectron"]);
+               auto task = static_cast<ObjectDetectionTask *>(context->__tasks.at("object_detection"));
 
-               object_detection_3d_result_s &result = task->getOutput();
+               object_detection_result_s &result = task->getOutput();
 
                *out_num_of_points = result.number_of_points;
        } catch (const BaseException &e) {
@@ -216,7 +216,7 @@ int mv_object_detection_3d_get_num_of_points_open(mv_object_detection_3d_h handl
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int mv_object_detection_3d_get_points_open(mv_object_detection_3d_h handle, unsigned int **out_x, unsigned int **out_y)
+int mv_object_detection_3d_get_points_open(mv_object_detection_h handle, unsigned int **out_x, unsigned int **out_y)
 {
        LOGD("ENTER");
 
@@ -227,23 +227,23 @@ int mv_object_detection_3d_get_points_open(mv_object_detection_3d_h handle, unsi
 
        try {
                Context *context = static_cast<Context *>(handle);
-               auto task = static_cast<ObjectDetectionTask *>(context->__tasks["objectron"]);
+               auto task = static_cast<ObjectDetectionTask *>(context->__tasks.at("object_detection"));
 
-               object_detection_3d_result_s &result = task->getOutput();
+               object_detection_result_s &result = task->getOutput();
 
                *out_x = result.x_vec.data();
                *out_y = result.y_vec.data();
 
                for (auto &edge : result.edge_index_vec)
                        LOGI("%d,%d ", edge.start, edge.end);
+
+               for (unsigned int i = 0; i < result.number_of_points; ++i)
+                       LOGI("%d %d", (*out_x)[i], (*out_y)[i]);
        } catch (const BaseException &e) {
                LOGE("%s", e.what());
                return e.getError();
        }
 
-       for (unsigned int i = 0; i < 9; ++i)
-               LOGI("%d %d", (*out_x)[i], (*out_y)[i]);
-
        LOGD("LEAVE");
 
        return MEDIA_VISION_ERROR_NONE;
index e50fb14..b75e82b 100644 (file)
 #include <algorithm>
 
 #include "machine_learning_exception.h"
+#include "mv_object_detection_config.h"
 #include "object_detection.h"
 
 using namespace std;
 using namespace mediavision::inference;
+using namespace MediaVision::Common;
 using namespace mediavision::machine_learning::exception;
 
 namespace mediavision
@@ -32,9 +34,54 @@ namespace machine_learning
 {
 ObjectDetection::ObjectDetection() : _backendType(), _targetDeviceType()
 {
+       _inference = make_unique<Inference>();
        _parser = make_unique<ObjectDetectionParser>();
 }
 
+static bool IsJsonFile(const string &fileName)
+{
+       return (!fileName.substr(fileName.find_last_of(".") + 1).compare("json"));
+}
+
+void ObjectDetection::parseMetaFile()
+{
+       _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + string(MV_OBJECT_DETECTION_META_FILE_NAME));
+
+       int ret = _config->getIntegerAttribute(string(MV_OBJECT_DETECTION_BACKEND_TYPE), &_backendType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get backend engine type.");
+
+       ret = _config->getIntegerAttribute(string(MV_OBJECT_DETECTION_TARGET_DEVICE_TYPE), &_targetDeviceType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get target device type.");
+
+       string modelDefaultPath;
+
+       ret = _config->getStringAttribute(MV_OBJECT_DETECTION_MODEL_DEFAULT_PATH, &modelDefaultPath);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get model default path");
+
+       ret = _config->getStringAttribute(MV_OBJECT_DETECTION_MODEL_FILE_PATH, &_modelFilePath);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get model file path");
+
+       _modelFilePath = modelDefaultPath + _modelFilePath;
+
+       ret = _config->getStringAttribute(MV_OBJECT_DETECTION_MODEL_META_FILE_PATH, &_modelMetaFilePath);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get model meta file path");
+
+       if (_modelMetaFilePath.empty())
+               throw InvalidOperation("Model meta file doesn't exist.");
+
+       if (!IsJsonFile(_modelMetaFilePath))
+               throw InvalidOperation("Model meta file should be json");
+
+       _modelMetaFilePath = modelDefaultPath + _modelMetaFilePath;
+
+       _parser->load(_modelMetaFilePath);
+}
+
 void ObjectDetection::configure()
 {
        int ret = _inference->Bind(_backendType, _targetDeviceType);
@@ -87,5 +134,31 @@ void ObjectDetection::inference(mv_source_h source)
        LOGI("LEAVE");
 }
 
+void ObjectDetection::getOutputNames(vector<string> &names)
+{
+       TensorBuffer &tensor_buffer_obj = _inference->GetOutputTensorBuffer();
+       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+
+       for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++)
+               names.push_back(it->first);
+}
+
+void ObjectDetection::getOutputTensor(string &target_name, vector<float> &output)
+{
+       LOGI("ENTER");
+
+       TensorBuffer &tensor_buffer_obj = _inference->GetOutputTensorBuffer();
+
+       inference_engine_tensor_buffer *tensor_buffer = tensor_buffer_obj.getTensorBuffer(target_name);
+       if (!tensor_buffer)
+               throw InvalidOperation("Fail to get tensor buffer.");
+
+       auto raw_buffer = static_cast<float *>(tensor_buffer->buffer);
+
+       copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(float)], back_inserter(output));
+
+       LOGI("LEAVE");
+}
+
 }
 }
\ No newline at end of file
index edcefaf..61fe189 100644 (file)
@@ -34,8 +34,8 @@ template<typename T, typename V> ObjectDetectionAdapter<T, V>::~ObjectDetectionA
 
 template<typename T, typename V> void ObjectDetectionAdapter<T, V>::create(int type)
 {
-       switch (type) {
-       case OBJECT_DETECTION_TASK_3D:
+       switch (static_cast<object_detection_task_type_e>(type)) {
+       case object_detection_task_type_e::OBJECTRON:
                _object_detection = make_unique<Objectron>();
                break;
        default:
@@ -45,21 +45,13 @@ template<typename T, typename V> void ObjectDetectionAdapter<T, V>::create(int t
 
 template<typename T, typename V> void ObjectDetectionAdapter<T, V>::configure()
 {
-       try {
-               _object_detection->parseMetaFile();
-               _object_detection->configure();
-       } catch (const BaseException &e) {
-               throw e;
-       }
+       _object_detection->parseMetaFile();
+       _object_detection->configure();
 }
 
 template<typename T, typename V> void ObjectDetectionAdapter<T, V>::prepare()
 {
-       try {
-               _object_detection->prepare();
-       } catch (const BaseException &e) {
-               throw e;
-       }
+       _object_detection->prepare();
 }
 
 template<typename T, typename V> void ObjectDetectionAdapter<T, V>::setInput(T &t)
@@ -69,12 +61,8 @@ template<typename T, typename V> void ObjectDetectionAdapter<T, V>::setInput(T &
 
 template<typename T, typename V> void ObjectDetectionAdapter<T, V>::perform()
 {
-       try {
-               _object_detection->preprocess(_source.inference_src);
-               _object_detection->inference(_source.inference_src);
-       } catch (const BaseException &e) {
-               throw e;
-       }
+       _object_detection->preprocess(_source.inference_src);
+       _object_detection->inference(_source.inference_src);
 }
 
 template<typename T, typename V> V &ObjectDetectionAdapter<T, V>::getOutput()
@@ -82,6 +70,6 @@ template<typename T, typename V> V &ObjectDetectionAdapter<T, V>::getOutput()
        return _object_detection->getResult();
 }
 
-template class ObjectDetectionAdapter<object_detection_input_s, object_detection_3d_result_s>;
+template class ObjectDetectionAdapter<object_detection_input_s, object_detection_result_s>;
 }
 }
\ No newline at end of file
index a02ec68..76793fa 100644 (file)
 
 #include "machine_learning_exception.h"
 #include "objectron.h"
-#include "mv_object_detection_3d_config.h"
 #include "Postprocess.h"
 
 using namespace std;
 using namespace mediavision::inference;
-using namespace MediaVision::Common;
 using namespace mediavision::machine_learning::exception;
 
 namespace mediavision
@@ -33,68 +31,22 @@ namespace mediavision
 namespace machine_learning
 {
 Objectron::Objectron() : _result()
-{
-       _inference = make_unique<Inference>();
-}
+{}
 
 Objectron::~Objectron()
 {}
 
-static bool IsJsonFile(const string &fileName)
-{
-       return (!fileName.substr(fileName.find_last_of(".") + 1).compare("json"));
-}
-
-void Objectron::parseMetaFile()
+object_detection_result_s &Objectron::getResult()
 {
-       _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + string(MV_OBJECT_DETECTION_3D_META_FILE_NAME));
-
-       int ret = _config->getIntegerAttribute(string(MV_OBJECT_DETECTION_3D_BACKEND_TYPE), &_backendType);
-       if (ret != MEDIA_VISION_ERROR_NONE)
-               throw InvalidOperation("Fail to get backend engine type.");
-
-       ret = _config->getIntegerAttribute(string(MV_OBJECT_DETECTION_3D_TARGET_DEVICE_TYPE), &_targetDeviceType);
-       if (ret != MEDIA_VISION_ERROR_NONE)
-               throw InvalidOperation("Fail to get target device type.");
+       vector<string> names;
 
-       ret = _config->getStringAttribute(MV_OBJECT_DETECTION_3D_MODEL_FILE_PATH, &_modelFilePath);
-       if (ret != MEDIA_VISION_ERROR_NONE)
-               throw InvalidOperation("Fail to get model file path");
+       ObjectDetection::getOutputNames(names);
 
-       ret = _config->getStringAttribute(MV_OBJECT_DETECTION_3D_MODEL_META_FILE_PATH, &_modelMetaFilePath);
-       if (ret != MEDIA_VISION_ERROR_NONE)
-               throw InvalidOperation("Fail to get model meta file path");
+       vector<float> keypoints;
 
-       if (_modelMetaFilePath.empty())
-               throw InvalidOperation("Model meta file doesn't exist.");
+       ObjectDetection::getOutputTensor(names[1], keypoints);
 
-       if (!IsJsonFile(_modelMetaFilePath))
-               throw InvalidOperation("Model meta file should be json");
-
-       _parser->load(_modelMetaFilePath);
-}
-
-object_detection_3d_result_s &Objectron::getResult()
-{
-       TensorBuffer &tensor_buffer_obj = _inference->GetOutputTensorBuffer();
-       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
-
-       vector<string> output_layer_names;
-
-       for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++)
-               output_layer_names.push_back(it->first);
-
-       string &identity_1_layer = output_layer_names[1];
-
-       inference_engine_tensor_buffer *tensor_buffer = tensor_buffer_obj.getTensorBuffer(identity_1_layer);
-       if (!tensor_buffer)
-               throw InvalidOperation("Fail to get tensor buffer.");
-
-       unsigned int output_size = tensor_buffer->size / 4;
-       auto *keypoints = reinterpret_cast<float *>(tensor_buffer->buffer);
-
-       if (output_size != 18)
-               throw InvalidOperation("Invalid number of points. Number of points should be 18.");
+       size_t output_size = keypoints.size();
 
        Postprocess postprocess({ _preprocess.getImageWidth()[0], _preprocess.getImageHeight()[0],
                                                          _inference->getInputWidth(), _inference->getInputHeight() });
@@ -102,25 +54,23 @@ object_detection_3d_result_s &Objectron::getResult()
        _result.x_vec.clear();
        _result.y_vec.clear();
 
-       for (unsigned int idx = 0; idx < output_size; idx += 2) {
+       for (size_t idx = 0; idx < output_size; idx += 2) {
                _result.x_vec.push_back(postprocess.getScaledX(keypoints[idx]));
                _result.y_vec.push_back(postprocess.getScaledY(keypoints[idx + 1]));
        }
 
        _result.number_of_points = output_size / 2;
 
-       string &identity_layer = output_layer_names[0];
-
-       tensor_buffer = tensor_buffer_obj.getTensorBuffer(identity_layer);
-       if (!tensor_buffer)
-               throw InvalidOperation("Fail to get tensor buffer.");
+       vector<float> probability_vec;
 
-       auto *prob = reinterpret_cast<float *>(tensor_buffer->buffer);
+       // names[0] is "Identity"
+       ObjectDetection::getOutputTensor(names[0], probability_vec);
 
-       _result.probability = static_cast<unsigned int>(prob[0] * 100);
+       _result.probability = static_cast<unsigned int>(probability_vec[0] * 100);
 
        try {
-               auto metaInfo = _parser->getOutputMetaMap()["Identity_1"];
+               // names[1] is "Identity_1"
+               auto metaInfo = _parser->getOutputMetaMap().at(names[1]);
                auto decodingBox = static_pointer_cast<DecodingBox>(metaInfo->decodingTypeMap[DecodingType::BOX]);
 
                for (size_t idx = 0; idx < decodingBox->edges.size(); idx += 2)
index c72ba25..0d99a4a 100644 (file)
@@ -406,7 +406,7 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
 %manifest %{name}.manifest
 %license LICENSE.APLv2
 %{_libdir}/libmv_inference*.so
-%{_datadir}/%{name}/object_detection_3d.json
+%{_datadir}/%{name}/object_detection.json
 %{_libdir}/libmv_object_detection*.so
 %{_datadir}/%{name}/image_classification.json
 %{_libdir}/libmv_image_classification*.so
@@ -418,7 +418,7 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
 
 %files machine_learning-devel
 %{_includedir}/media/mv_infer*.h
-%{_includedir}/media/mv_object_detection_3d*.h
+%{_includedir}/media/mv_object_detection*.h
 %{_includedir}/media/mv_image_classification*.h
 %{_libdir}/pkgconfig/*inference.pc
 %{_libdir}/pkgconfig/*object-detection.pc
@@ -450,7 +450,7 @@ find . -name '*.gcno' -not -path "./test/*" -exec cp --parents '{}' "$gcno_obj_d
 %{_libdir}/libmv_testsuite*.so
 %{_bindir}/mv_*
 %{_bindir}/test_image_classification
-%{_bindir}/test_object_detection_3d
+%{_bindir}/test_object_detection
 %if "%{enable_ml_face_recognition}" == "1"
 %{_bindir}/test_face_recognition
 %{_bindir}/test_face_recognition_multi_threads
index 6f5cb3e..db4beba 100644 (file)
@@ -1,14 +1,14 @@
-project(mv_object_detection_3d_suite)
+project(mv_object_detection_suite)
 cmake_minimum_required(VERSION 2.6...3.13)
 
-set(TEST_OBJECT_DETECTION_3D test_object_detection_3d)
+set(TEST_OBJECT_DETECTION test_object_detection)
 
-add_executable(${TEST_OBJECT_DETECTION_3D} test_object_detection_3d.cpp)
+add_executable(${TEST_OBJECT_DETECTION} test_object_detection.cpp)
 
-target_link_libraries(${TEST_OBJECT_DETECTION_3D} gtest gtest_main
+target_link_libraries(${TEST_OBJECT_DETECTION} gtest gtest_main
                       mv_inference
                       mv_object_detection
                       mv_image_helper
 )
 
-install(TARGETS ${TEST_OBJECT_DETECTION_3D} DESTINATION ${CMAKE_INSTALL_BINDIR})
\ No newline at end of file
+install(TARGETS ${TEST_OBJECT_DETECTION} DESTINATION ${CMAKE_INSTALL_BINDIR})
\ No newline at end of file
@@ -21,7 +21,7 @@
 #include "gtest/gtest.h"
 
 #include "ImageHelper.h"
-#include "mv_object_detection_3d_internal.h"
+#include "mv_object_detection_internal.h"
 
 #define IMAGE_PATH MV_CONFIG_PATH "/res/object_detection/cup.jpeg"
 
@@ -32,15 +32,15 @@ using namespace MediaVision::Common;
 
 TEST(ObjectDetection3DTest, InferenceShouldBeOk)
 {
-       mv_object_detection_3d_h handle;
+       mv_object_detection_h handle;
 
-       int ret = mv_object_detection_3d_create(&handle);
+       int ret = mv_object_detection_create(&handle);
        ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
-       ret = mv_object_detection_3d_configure(handle);
+       ret = mv_object_detection_configure(handle);
        ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
-       ret = mv_object_detection_3d_prepare(handle);
+       ret = mv_object_detection_prepare(handle);
        ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
        const string image_path = IMAGE_PATH;
@@ -52,7 +52,7 @@ TEST(ObjectDetection3DTest, InferenceShouldBeOk)
        ret = ImageHelper::loadImageToSource(image_path.c_str(), mv_source);
        ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
-       ret = mv_object_detection_3d_inference(handle, mv_source);
+       ret = mv_object_detection_inference(handle, mv_source);
        ASSERT_EQ(ret, 0);
 
        unsigned int probability;
@@ -79,6 +79,6 @@ TEST(ObjectDetection3DTest, InferenceShouldBeOk)
        ret = mv_destroy_source(mv_source);
        ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
-       ret = mv_object_detection_3d_destroy(handle);
+       ret = mv_object_detection_destroy(handle);
        ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 }