mv_machine_learning: add mv_object_detection_get_label C API support sandbox/inki.dae/hailort
authorInki Dae <inki.dae@samsung.com>
Thu, 23 Jan 2025 07:34:19 +0000 (16:34 +0900)
committerInki Dae <inki.dae@samsung.com>
Thu, 23 Jan 2025 07:58:51 +0000 (16:58 +0900)
Add mv_object_detection_get_label C API support to provide a label name to
each detected object.

Change-Id: I77581029b2a19fd65973b7352fc2904a61f91143
Signed-off-by: Inki Dae <inki.dae@samsung.com>
include/mv_object_detection.h
mv_machine_learning/object_detection/meta/object_detection.json
mv_machine_learning/object_detection/src/HailoYoloXs.cpp
mv_machine_learning/object_detection/src/mv_object_detection.cpp
test/testsuites/machine_learning/object_detection/test_object_detection.cpp

index 07092c10c09ea649a268f57a3965077d6fc073ec..8ca397dff828498ddffd770e79754acd0d7d8d04 100644 (file)
@@ -224,6 +224,29 @@ int mv_object_detection_get_result_count(mv_object_detection_h handle, unsigned
  */
 int mv_object_detection_get_bound_box(mv_object_detection_h handle, unsigned int index, int *left, int *top, int *right,
                                                                          int *bottom);
+
+/**
+ * @brief Gets a bound box to detected object region.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle              The handle to the inference
+ * @param[in] index               A result index.
+ * @param[out] label              A label name to a detected object.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detection_create()
+ * @pre Prepare an inference by calling mv_object_detection_configure()
+ * @pre Prepare an inference by calling mv_object_detection_prepare()
+ * @pre Prepare an inference by calling mv_object_detection_inference()
+ */
+int mv_object_detection_get_label(mv_object_detection_h handle, unsigned int index, const char **label);
+
 /**
  * @}
  */
index 310606516bd45de361c702dbc42b7b39bc0dbe3b..983a63fd9b2686c603fe9577e84a56e902499a6d 100644 (file)
@@ -4,37 +4,37 @@
         {
             "name" : "MODEL_DEFAULT_PATH",
             "type" : "string",
-            "value" : "/opt/usr/globalapps/mediavision.object.detection/models/tflite/"
+            "value" : "/opt/usr/globalapps/mediavision.object.detection/models/hailo8l/"
         },
                {
             "name"  : "MODEL_FILE_NAME",
             "type"  : "string",
-            "value" : "od_mobilenet_v1_ssd_postop_300x300.tflite"
+            "value" : "yolov10s.hef"
         },
         {
             "name"  : "DEFAULT_MODEL_NAME",
             "type"  : "string",
-            "value" : "MOBILENET_V1_SSD"
+            "value" : "HAILO8_YOLOXS"
         },
         {
             "name"  : "MODEL_META_FILE_NAME",
             "type"  : "string",
-            "value" : "od_mobilenet_v1_ssd_postop_300x300.json"
+            "value" : "yolov10s.json"
         },
         {
             "name"  : "MODEL_LABEL_FILE_NAME",
             "type"  : "string",
-            "value" : "od_mobilenet_v1_ssd_postop_label.txt"
+            "value" : "yolov10s_label.txt"
         },
         {
             "name"  : "BACKEND_TYPE",
             "type"  : "integer",
-            "value" : 1
+            "value" : 7
         },
         {
             "name"  : "TARGET_DEVICE_TYPE",
             "type"  : "integer",
-            "value" : 1
+            "value" : 4
         }
     ]
 }
index 1e25020508242d58edd06e2b1a8496db28b9c5ad..aac00ec8c17a8b051b0101a9c94ed44c8cc3846b 100644 (file)
@@ -62,39 +62,48 @@ template<typename T> ObjectDetectionResult &HailoYoloXs<T>::result()
        auto input_tensor_width = static_cast<float>(_inference->getInputWidth());
        auto input_tensor_height = static_cast<float>(_inference->getInputHeight());
 
-       // Calculate the ratio[A] between the original image size and the input tensor size.
+       // Calculates the width and height ratios between the original source dimensions and the input tensor dimensions.
        auto width_ratio = ori_src_width / input_tensor_width;
        auto height_ratio = ori_src_height / input_tensor_height;
+       unsigned int class_id = 0;
 
        for (size_t tensor_idx = 0; tensor_idx < output_tensor.size(); ++tensor_idx) {
                float num_of_classes;
-               unsigned int class_id;
 
                num_of_classes = output_tensor[tensor_idx];
-               class_id = tensor_idx;
-               if (num_of_classes <= 0.0f)
+               if (num_of_classes <= 0.0f) {
+                       class_id++;
                        continue;
+               }
+
+               unsigned int valid_objects = 0;
 
                for (unsigned int class_idx = 0; class_idx < num_of_classes; ++class_idx) {
                        float left, top, right, bottom, confidence;
 
-                       // x = [width A] * width of input tensor * width ratio value of output tensor.
-                       // y = [height A] * height of input tensor * height ratio value of output tensor.
+                       // Calculates the coordinates of a bounding box from the output tensor values.
+                       // - It computes the top, left, bottom, and right coordinates of a bounding box
+                       //   based on the given output tensor values and scaling factors for height and width.
                        top = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
                        left = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
                        bottom = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
                        right = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
                        confidence = output_tensor[++tensor_idx];
+                       if (confidence < 0.6f)
+                               continue;
+
+                       valid_objects++;
 
                        _result.top.push_back(static_cast<int>(top));
                        _result.left.push_back(static_cast<int>(left));
                        _result.bottom.push_back(static_cast<int>(bottom));
                        _result.right.push_back(static_cast<int>(right));
                        _result.confidences.push_back(confidence);
+                       _result.names.push_back(_labels[class_id]);
                }
 
-               _result.number_of_objects += num_of_classes;
-               _result.indices.push_back(class_id);
+               _result.number_of_objects += valid_objects;
+               _result.indices.push_back(class_id++);
        }
 
        return _result;
index a437082e8503df45b4f88be198ab473e57d53dd1..2163b4c6e3e8d2194e6bd2fe57e448d0618057b0 100644 (file)
@@ -354,5 +354,34 @@ int mv_object_detection_get_bound_box(mv_object_detection_h handle, unsigned int
 
        MEDIA_VISION_FUNCTION_LEAVE();
 
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_object_detection_get_label(mv_object_detection_h handle, unsigned int index, const char **label)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(label);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               auto &result =
+                               static_cast<ObjectDetectionResult &>(machine_learning_native_get_result_cache(handle, TASK_NAME));
+               if (index >= result.number_of_objects) {
+                       LOGE("Invalid index(index = %u, result count = %u).", index, result.number_of_objects);
+                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+               }
+
+               if (result.names.empty())
+                       return MEDIA_VISION_ERROR_NO_DATA;
+
+               *label = result.names[index].c_str();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
        return MEDIA_VISION_ERROR_NONE;
 }
\ No newline at end of file
index 6852d61ef6942a093afff9386bcd795468a39649..8c39c79da5735813a3591f080797421a427b5071 100644 (file)
@@ -229,6 +229,7 @@ TEST(ObjectDetectionHailo8LTest, YoloXsInferenceShouldBeOk)
                // TODO.
        };
        const int coordinate_answers[3][4] = { { 327, 0, 718, 513 }, { 409, 66, 1001, 601 }, { 24, 29, 311, 546 } };
+       const char *label_answers[3] = { "person", "dog", "dog" };
 
        mv_source_h mv_source = NULL;
        int ret = mv_create_source(&mv_source);
@@ -266,10 +267,16 @@ TEST(ObjectDetectionHailo8LTest, YoloXsInferenceShouldBeOk)
                        int ret = mv_object_detection_get_bound_box(handle, idx, &left, &top, &right, &bottom);
                        ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
+                       const char *label = nullptr;
+
+                       ret = mv_object_detection_get_label(handle, idx, &label);
+                       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
                        ASSERT_EQ(coordinate_answers[idx][0], left);
                        ASSERT_EQ(coordinate_answers[idx][1], top);
                        ASSERT_EQ(coordinate_answers[idx][2], right);
                        ASSERT_EQ(coordinate_answers[idx][3], bottom);
+                       ASSERT_EQ(string(label_answers[idx]), string(label));
                }
 
                ret = mv_object_detection_destroy(handle);