mv_machine_learning: introduce get_result_count API for object detection 3d 49/305149/6
authorInki Dae <inki.dae@samsung.com>
Mon, 29 Jan 2024 09:24:41 +0000 (18:24 +0900)
committerInki Dae <inki.dae@samsung.com>
Wed, 31 Jan 2024 02:23:17 +0000 (02:23 +0000)
[Issue type] : new feature

Introduce get_result_count API for object detection 3d task group.

In user perspective, this API provides information on how many results exist
so that user can request each result corresponding to a user-given index.
And also, in framework perspective, it provides consistent API behavior -
get_result_count API call updates _current_result of task group by calling
getOutput function of ITask, and other API call returns _current_result
value by calling getOutputCache function of ITask.

get_result_count : return a number of results to detected 3d objectes.
get_point_count : return a number of points to a given 3d object index.
get_points : return x and y positions to a given position index.

And we are enough with these API so so drop existing API.

In addition, this patch cleans up ObjectDetection3d and ObjectDetection3dAdapter
classes by dropping unnecessary code, and getOutput and getOutputCache functions
to Adapter class which was already introduced by other task groups.

Change-Id: I8370bb71fc94cf6109f1fb3e25a9bbee7d39a2bb
Signed-off-by: Inki Dae <inki.dae@samsung.com>
include/mv_face_detection_internal.h
include/mv_object_detection_3d_internal.h
mv_machine_learning/object_detection_3d/include/IObjectDetection3d.h
mv_machine_learning/object_detection_3d/include/ObjectDetection3d.h
mv_machine_learning/object_detection_3d/src/ObjectDetection3d.cpp
mv_machine_learning/object_detection_3d/src/ObjectDetection3dAdapter.cpp
mv_machine_learning/object_detection_3d/src/mv_object_detection_3d.cpp
test/testsuites/machine_learning/object_detection_3d/test_object_detection_3d.cpp

index d4b6d3e7589fc31e7127161a6ec03bc9438981df..f9e862e2a615ae2bd73a71948be0e892f166e297 100644 (file)
@@ -226,10 +226,10 @@ int mv_face_detection_get_result_count(mv_face_detection_h handle, unsigned int
  * @param[in] index               A result index.
  * @param[out] frame_number       A frame number inferenced.
  * @param[out] confidences        Probability to detected objects.
- * @param[out] left               An left position array to bound boxes.
- * @param[out] top                An top position array to bound boxes.
- * @param[out] right              An right position array to bound boxes.
- * @param[out] bottom             An bottom position array to bound boxes.
+ * @param[out] left               An left position of bound box.
+ * @param[out] top                An top position of bound box.
+ * @param[out] right              An right position of bound box.
+ * @param[out] bottom             An bottom position of bound box.
  * @param[out] label              A label name to a detected object.
  *
  * @return @c 0 on success, otherwise a negative error value
index 68697a1893fbfab0c8c1b7e13bf6a4381a87a730..72aa244edc846382a5099ddf6daf665be6d606de 100644 (file)
@@ -161,65 +161,84 @@ int mv_object_detection_3d_prepare(mv_object_detection_3d_h handle);
 int mv_object_detection_3d_inference(mv_object_detection_3d_h handle, mv_source_h source);
 
 /**
- * @brief Gets the probability value to the detected object.
- * @details Use this function to get the probability value after calling @ref mv_object_detection_3d_inference().
+ * @internal
+ * @brief Gets the object detection 3d inference result count.
  *
  * @since_tizen 9.0
  *
- * @remarks The @a result must NOT be released using free()
- *
- * @param[in] handle              The handle to the face recognition object.
- * @param[out] out_probability    A pointer to probability array.
+ * @param[in] handle         The handle to the inference
+ * @param[out] frame_number  A frame number inferenced.
+ * @param[out] result_cnt    A number of results.
  *
  * @return @c 0 on success, otherwise a negative error value
  * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
- * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
  *
- * @pre Request an inference by calling @ref mv_object_detection_3d_inference()
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detect_3d_create()
+ * @pre Prepare an inference by calling mv_object_detect_3d_configure()
+ * @pre Prepare an inference by calling mv_object_detect_3d_prepare()
+ * @pre Request an inference by calling mv_object_detect_3d_inference()
  */
-int mv_object_detection_3d_get_probability(mv_object_detection_3d_h handle, unsigned int *out_probability);
+int mv_object_detection_3d_get_result_count(mv_object_detection_3d_h handle, unsigned long *frame_number,
+                                                                                       unsigned int *result_cnt);
 
 /**
- * @brief Gets the number of points to the 3D bounding box of the detected object.
- * @details Use this function to get the number of points after calling @ref mv_object_detection_3d_inference().
+ * @internal
+ * @brief Gets a point count to detected object corresponding to a given object index
  *
  * @since_tizen 9.0
  *
- * @remarks The @a result must NOT be released using free()
- *
- * @param[in] handle                The handle to the face recognition object.
- * @param[out] out_num_of_points    Number of points.
+ * @param[in] handle        The handle to the inference
+ * @param[in] object_index  A object index
+ * @param[out] confidence   Probability to detected object.
+ * @param[out] point_cnt    A number of points to detected object
  *
  * @return @c 0 on success, otherwise a negative error value
  * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
- * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
  *
- * @pre Request an inference by calling @ref mv_object_detection_3d_inference()
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detect_3d_create()
+ * @pre Prepare an inference by calling mv_object_detect_3d_configure()
+ * @pre Prepare an inference by calling mv_object_detect_3d_prepare()
+ * @pre Request an inference by calling mv_object_detect_3d_inference()
+ * @pre Get result count by calling mv_object_detection_3d_get_result_count()
  */
-int mv_object_detection_3d_get_num_of_points(mv_object_detection_3d_h handle, unsigned int *out_num_of_points);
+int mv_object_detection_3d_get_point_count(mv_object_detection_3d_h handle, unsigned int object_index,
+                                                                                  float *confidence, unsigned int *point_cnt);
 
 /**
- * @brief Gets the x and y coordinates values to the 3D bounding box of the detected object.
- * @details Use this function to get the coordinates values after calling @ref mv_object_detection_3d_inference().
+ * @internal
+ * @brief Gets result to detected object corresponding to a given index
  *
  * @since_tizen 9.0
  *
- * @remarks The @a result must NOT be released using free()
+ * @param[in] handle              The handle to the inference
+ * @param[in] point_index         A point index.
+ * @param[out] pos_x              x coordinate.
+ * @param[out] pos_y              y coordinate.
  *
- * @param[in] handle    The handle to the face recognition object.
- * @param[out] out_x    A pointer to x coordinates array.
- * @param[out] out_y    A pointer to y coordinates array.
-  *
  * @return @c 0 on success, otherwise a negative error value
  * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
- * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
  *
- * @pre Request an inference by calling @ref mv_object_detection_3d_inference()
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detect_3d_create()
+ * @pre Prepare an inference by calling mv_object_detect_3d_configure()
+ * @pre Prepare an inference by calling mv_object_detect_3d_prepare()
+ * @pre Prepare an inference by calling mv_object_detect_3d_inference()
+ * @pre Get result count by calling mv_object_detection_3d_get_result_count()
+ * @pre Get object count by calling mv_object_detection_3d_get_object_count()
  */
-int mv_object_detection_3d_get_points(mv_object_detection_3d_h handle, unsigned int **out_x, unsigned int **out_y);
+int mv_object_detection_3d_get_points(mv_object_detection_3d_h handle, unsigned int point_index, int *pos_x,
+                                                                         int *pos_y);
 
 /**
         * @brief Set user-given inference engine and device types for inference.
index 1f1fa05fa19e15814e843100883953d5eb9c2917..b385200715020b7110ebbfd76ad936bf18db2eb2 100644 (file)
@@ -30,7 +30,6 @@ class IObjectDetection3d
 public:
        virtual ~IObjectDetection3d() {};
 
-       virtual ObjectDetection3dTaskType getTaskType() = 0;
        virtual void setEngineInfo(std::string engine_type_name, std::string device_type_name) = 0;
        virtual unsigned int getNumberOfEngines() = 0;
        virtual const std::string &getEngineType(unsigned int engine_index) = 0;
@@ -39,8 +38,9 @@ public:
        virtual std::shared_ptr<MetaInfo> getInputMetaInfo() = 0;
        virtual void configure() = 0;
        virtual void prepare() = 0;
-       virtual void perform(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo) = 0;
-       virtual ObjectDetection3dResult &result() = 0;
+       virtual void perform(mv_source_h &mv_src) = 0;
+       virtual ObjectDetection3dResult &getOutput() = 0;
+       virtual ObjectDetection3dResult &getOutputCache() = 0;
 };
 
 } // machine_learning
index fb6058dcf6b90e400d6166bb831d9e78df55c27c..7fcf809b69d6b720f25e6a24089dbcfcfc8fc353 100644 (file)
@@ -39,6 +39,7 @@ template<typename T> class ObjectDetection3d : public IObjectDetection3d
 {
 private:
        ObjectDetection3dTaskType _task_type;
+       ObjectDetection3dResult _current_result;
 
        void loadLabel();
        void getEngineList();
@@ -56,22 +57,23 @@ protected:
        void getOutputTensor(std::string &target_name, std::vector<float> &tensor);
        void configurePreprocess();
        void inference(std::vector<std::vector<T> > &inputVectors);
+       virtual ObjectDetection3dResult &result() = 0;
 
 public:
        ObjectDetection3d(ObjectDetection3dTaskType task_type, std::shared_ptr<Config> config);
        virtual ~ObjectDetection3d() = default;
 
-       ObjectDetection3dTaskType getTaskType();
-       void setEngineInfo(std::string engine_type_name, std::string device_type_name);
-       unsigned int getNumberOfEngines();
-       const std::string &getEngineType(unsigned int engine_index);
-       unsigned int getNumberOfDevices(const std::string &engine_type);
-       const std::string &getDeviceType(const std::string &engine_type, unsigned int device_index);
-       std::shared_ptr<MetaInfo> getInputMetaInfo();
-       void configure();
-       void prepare();
-       void perform(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo);
-       virtual ObjectDetection3dResult &result() = 0;
+       void setEngineInfo(std::string engine_type_name, std::string device_type_name) override;
+       unsigned int getNumberOfEngines() override;
+       const std::string &getEngineType(unsigned int engine_index) override;
+       unsigned int getNumberOfDevices(const std::string &engine_type) override;
+       const std::string &getDeviceType(const std::string &engine_type, unsigned int device_index) override;
+       std::shared_ptr<MetaInfo> getInputMetaInfo() override;
+       void configure() override;
+       void prepare() override;
+       void perform(mv_source_h &mv_src) override;
+       ObjectDetection3dResult &getOutput() override;
+       ObjectDetection3dResult &getOutputCache() override;
 };
 
 } // machine_learning
index 51f1b109ff655ce577cf1ee2e3f3e2c958005dbb..0c2772ad71199a80c547f05923980adcc06e5f78 100644 (file)
@@ -41,11 +41,6 @@ ObjectDetection3d<T>::ObjectDetection3d(ObjectDetection3dTaskType task_type, std
        _inference = make_unique<Inference>();
 }
 
-template<typename T> ObjectDetection3dTaskType ObjectDetection3d<T>::getTaskType()
-{
-       return _task_type;
-}
-
 template<typename T> void ObjectDetection3d<T>::getEngineList()
 {
        for (auto idx = MV_INFERENCE_BACKEND_NONE + 1; idx < MV_INFERENCE_BACKEND_MAX; ++idx) {
@@ -249,7 +244,7 @@ template<typename T> void ObjectDetection3d<T>::inference(vector<vector<T> > &in
        LOGI("LEAVE");
 }
 
-template<typename T> void ObjectDetection3d<T>::perform(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo)
+template<typename T> void ObjectDetection3d<T>::perform(mv_source_h &mv_src)
 {
        vector<vector<T> > inputVectors(1);
 
@@ -284,6 +279,19 @@ template<typename T> void ObjectDetection3d<T>::getOutputTensor(string &target_n
        LOGI("LEAVE");
 }
 
+template<typename T> ObjectDetection3dResult &ObjectDetection3d<T>::getOutput()
+{
+       // TODO. consider for async API later.
+       _current_result = result();
+
+       return _current_result;
+}
+
+template<typename T> ObjectDetection3dResult &ObjectDetection3d<T>::getOutputCache()
+{
+       return _current_result;
+}
+
 template class ObjectDetection3d<float>;
 template class ObjectDetection3d<unsigned char>;
 
index bb2bace2363e4d791b3c43b0c135d9c4f3c2e843..cdd061f5133b5281fd200cea3b53b0d14b4f6268 100644 (file)
@@ -132,8 +132,7 @@ void ObjectDetection3dAdapter::prepare()
 
 void ObjectDetection3dAdapter::perform(InputBaseType &input)
 {
-       shared_ptr<MetaInfo> metaInfo = _object_detection_3d->getInputMetaInfo();
-       _object_detection_3d->perform(input.inference_src, metaInfo);
+       _object_detection_3d->perform(input.inference_src);
 }
 
 void ObjectDetection3dAdapter::performAsync(InputBaseType &input)
@@ -143,12 +142,12 @@ void ObjectDetection3dAdapter::performAsync(InputBaseType &input)
 
 OutputBaseType &ObjectDetection3dAdapter::getOutput()
 {
-       return _object_detection_3d->result();
+       return _object_detection_3d->getOutput();
 }
 
 OutputBaseType &ObjectDetection3dAdapter::getOutputCache()
 {
-       throw InvalidOperation("Not support yet.");
+       return _object_detection_3d->getOutputCache();
 }
 
 }
index d43b5c489d61b647f44dd1e537e57dc9dc182985..446b8cccd9e3b0b40a041e9d1071b6308cf0318f 100644 (file)
@@ -303,21 +303,25 @@ int mv_object_detection_3d_inference(mv_object_detection_3d_h handle, mv_source_
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int mv_object_detection_3d_get_probability(mv_object_detection_3d_h handle, unsigned int *out_probability)
+int mv_object_detection_3d_get_result_count(mv_object_detection_3d_h handle, unsigned long *frame_number,
+                                                                                       unsigned int *result_cnt)
 {
        lock_guard<mutex> lock(g_object_detection_3d_mutex);
 
        MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
 
        MEDIA_VISION_INSTANCE_CHECK(handle);
-       MEDIA_VISION_NULL_ARG_CHECK(out_probability);
+       MEDIA_VISION_NULL_ARG_CHECK(frame_number);
+       MEDIA_VISION_NULL_ARG_CHECK(result_cnt);
 
        MEDIA_VISION_FUNCTION_ENTER();
 
        try {
                auto &result = static_cast<ObjectDetection3dResult &>(machine_learning_native_get_result(handle, TASK_NAME));
 
-               *out_probability = result.probability;
+               // As of now, object detection 3d can detect only one object.
+               *result_cnt = result.x_vec.size() ? 1 : 0;
+               *frame_number = result.frame_number;
        } catch (const BaseException &e) {
                LOGE("%s", e.what());
                return e.getError();
@@ -328,21 +332,30 @@ int mv_object_detection_3d_get_probability(mv_object_detection_3d_h handle, unsi
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int mv_object_detection_3d_get_num_of_points(mv_object_detection_3d_h handle, unsigned int *out_num_of_points)
+int mv_object_detection_3d_get_point_count(mv_object_detection_3d_h handle, unsigned int object_index,
+                                                                                  float *confidence, unsigned int *point_cnt)
 {
        lock_guard<mutex> lock(g_object_detection_3d_mutex);
 
        MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
 
        MEDIA_VISION_INSTANCE_CHECK(handle);
-       MEDIA_VISION_NULL_ARG_CHECK(out_num_of_points);
+       MEDIA_VISION_NULL_ARG_CHECK(confidence);
+       MEDIA_VISION_NULL_ARG_CHECK(point_cnt);
 
        MEDIA_VISION_FUNCTION_ENTER();
 
        try {
-               auto &result = static_cast<ObjectDetection3dResult &>(machine_learning_native_get_result(handle, TASK_NAME));
+               auto &result =
+                               static_cast<ObjectDetection3dResult &>(machine_learning_native_get_result_cache(handle, TASK_NAME));
+               // As of now, object detection 3d can detect only one object.
+               unsigned int object_cnt = result.x_vec.size() ? 1 : 0;
+
+               if (object_index >= object_cnt)
+                       throw InvalidParameter("invalid object index.");
 
-               *out_num_of_points = result.number_of_points;
+               *confidence = result.probability;
+               *point_cnt = result.number_of_points;
        } catch (const BaseException &e) {
                LOGE("%s", e.what());
                return e.getError();
@@ -353,29 +366,27 @@ int mv_object_detection_3d_get_num_of_points(mv_object_detection_3d_h handle, un
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int mv_object_detection_3d_get_points(mv_object_detection_3d_h handle, unsigned int **out_x, unsigned int **out_y)
+int mv_object_detection_3d_get_points(mv_object_detection_3d_h handle, unsigned int point_index, int *pos_x, int *pos_y)
 {
        lock_guard<mutex> lock(g_object_detection_3d_mutex);
 
        MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
 
        MEDIA_VISION_INSTANCE_CHECK(handle);
-       MEDIA_VISION_NULL_ARG_CHECK(out_x);
-       MEDIA_VISION_NULL_ARG_CHECK(out_y);
+       MEDIA_VISION_NULL_ARG_CHECK(pos_x);
+       MEDIA_VISION_NULL_ARG_CHECK(pos_y);
 
        MEDIA_VISION_FUNCTION_ENTER();
 
        try {
-               auto &result = static_cast<ObjectDetection3dResult &>(machine_learning_native_get_result(handle, TASK_NAME));
-
-               *out_x = result.x_vec.data();
-               *out_y = result.y_vec.data();
+               auto &result =
+                               static_cast<ObjectDetection3dResult &>(machine_learning_native_get_result_cache(handle, TASK_NAME));
 
-               for (auto &edge : result.edge_index_vec)
-                       LOGI("%d,%d ", edge.start, edge.end);
+               if (point_index >= result.number_of_points)
+                       throw InvalidParameter("invalid point index.");
 
-               for (unsigned int i = 0; i < result.number_of_points; ++i)
-                       LOGI("%d %d", (*out_x)[i], (*out_y)[i]);
+               *pos_x = result.x_vec[point_index];
+               *pos_y = result.y_vec[point_index];
        } catch (const BaseException &e) {
                LOGE("%s", e.what());
                return e.getError();
index 0ce07a53f94bd4e2a8c6b63ef010478cfc8ed6b6..6732dc6e7281c458ee08b977ca201ab48e4bff64 100644 (file)
@@ -90,6 +90,8 @@ TEST(ObjectDetection3dTest, InferenceShouldBeOk)
                { "objectron", "object_detection_3d_cup.tflite", "object_detection_3d_cup.json", "" }
                // TODO.
        };
+       const unsigned int coordinate_answers[][9] = { { 459, 381, 258, 374, 222, 649, 583, 674, 599 },
+                                                                                                  { 381, 457, 511, 235, 243, 492, 571, 247, 273 } };
 
        const string image_path = IMAGE_PATH;
        mv_source_h mv_source = NULL;
@@ -124,25 +126,28 @@ TEST(ObjectDetection3dTest, InferenceShouldBeOk)
                ret = mv_object_detection_3d_inference(handle, mv_source);
                ASSERT_EQ(ret, 0);
 
-               unsigned int probability;
+               unsigned long frame_number;
+               unsigned int result_cnt;
 
-               ret = mv_object_detection_3d_get_probability(handle, &probability);
+               ret = mv_object_detection_3d_get_result_count(handle, &frame_number, &result_cnt);
                ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
-               std::cout << "Probability = " << probability << std::endl;
+               for (unsigned int object_idx = 0; object_idx < result_cnt; ++object_idx) {
+                       float confidence;
+                       unsigned int point_cnt;
 
-               unsigned int num_of_points;
+                       ret = mv_object_detection_3d_get_point_count(handle, object_idx, &confidence, &point_cnt);
+                       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
-               ret = mv_object_detection_3d_get_num_of_points(handle, &num_of_points);
-               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+                       for (unsigned int point_idx = 0; point_idx < point_cnt; ++point_idx) {
+                               int pos_x, pos_y;
 
-               unsigned int *x_array, *y_array;
+                               ret = mv_object_detection_3d_get_points(handle, point_idx, &pos_x, &pos_y);
+                               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
-               ret = mv_object_detection_3d_get_points(handle, &x_array, &y_array);
-               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
-
-               for (unsigned int idx = 0; idx < num_of_points; ++idx)
-                       std::cout << "index = " << idx + 1 << " : " << x_array[idx] << " x " << y_array[idx] << std::endl;
+                               ASSERT_TRUE(pos_x == coordinate_answers[0][point_idx] && pos_y == coordinate_answers[1][point_idx]);
+                       }
+               }
 
                ret = mv_object_detection_3d_destroy(handle);
                ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);