mv_machine_learning: add mv_object_detection_get_label C API support

author Inki Dae <inki.dae@samsung.com>

Thu, 23 Jan 2025 07:34:19 +0000 (16:34 +0900)

committer Inki Dae <inki.dae@samsung.com>

Thu, 23 Jan 2025 07:58:51 +0000 (16:58 +0900)
author Inki Dae <inki.dae@samsung.com>
Thu, 23 Jan 2025 07:34:19 +0000 (16:34 +0900)
committer Inki Dae <inki.dae@samsung.com>
Thu, 23 Jan 2025 07:58:51 +0000 (16:58 +0900)
diff --git a/include/mv_object_detection.h b/include/mv_object_detection.h

index 07092c10c09ea649a268f57a3965077d6fc073ec..8ca397dff828498ddffd770e79754acd0d7d8d04 100644 (file)
--- a/include/mv_object_detection.h
+++ b/include/mv_object_detection.h
@@ -224,6 +224,29 @@ int mv_object_detection_get_result_count(mv_object_detection_h handle, unsigned
   */
  int mv_object_detection_get_bound_box(mv_object_detection_h handle, unsigned int index, int *left, int *top, int *right,
                                                                           int *bottom);
+
+/**
+ * @brief Gets a bound box to detected object region.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle              The handle to the inference
+ * @param[in] index               A result index.
+ * @param[out] label              A label name to a detected object.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detection_create()
+ * @pre Prepare an inference by calling mv_object_detection_configure()
+ * @pre Prepare an inference by calling mv_object_detection_prepare()
+ * @pre Prepare an inference by calling mv_object_detection_inference()
+ */
+int mv_object_detection_get_label(mv_object_detection_h handle, unsigned int index, const char **label);
+
  /**
   * @}
   */
diff --git a/mv_machine_learning/object_detection/meta/object_detection.json b/mv_machine_learning/object_detection/meta/object_detection.json

index 310606516bd45de361c702dbc42b7b39bc0dbe3b..983a63fd9b2686c603fe9577e84a56e902499a6d 100644 (file)
--- a/mv_machine_learning/object_detection/meta/object_detection.json
+++ b/mv_machine_learning/object_detection/meta/object_detection.json
@@ -4,37 +4,37 @@
          {
              "name" : "MODEL_DEFAULT_PATH",
              "type" : "string",
-            "value" : "/opt/usr/globalapps/mediavision.object.detection/models/tflite/"
+            "value" : "/opt/usr/globalapps/mediavision.object.detection/models/hailo8l/"
          },
                 {
              "name"  : "MODEL_FILE_NAME",
              "type"  : "string",
-            "value" : "od_mobilenet_v1_ssd_postop_300x300.tflite"
+            "value" : "yolov10s.hef"
          },
          {
              "name"  : "DEFAULT_MODEL_NAME",
              "type"  : "string",
-            "value" : "MOBILENET_V1_SSD"
+            "value" : "HAILO8_YOLOXS"
          },
          {
              "name"  : "MODEL_META_FILE_NAME",
              "type"  : "string",
-            "value" : "od_mobilenet_v1_ssd_postop_300x300.json"
+            "value" : "yolov10s.json"
          },
          {
              "name"  : "MODEL_LABEL_FILE_NAME",
              "type"  : "string",
-            "value" : "od_mobilenet_v1_ssd_postop_label.txt"
+            "value" : "yolov10s_label.txt"
          },
          {
              "name"  : "BACKEND_TYPE",
              "type"  : "integer",
-            "value" : 1
+            "value" : 7
          },
          {
              "name"  : "TARGET_DEVICE_TYPE",
              "type"  : "integer",
-            "value" : 1
+            "value" : 4
          }
      ]
  }
diff --git a/mv_machine_learning/object_detection/src/HailoYoloXs.cpp b/mv_machine_learning/object_detection/src/HailoYoloXs.cpp

index 1e25020508242d58edd06e2b1a8496db28b9c5ad..aac00ec8c17a8b051b0101a9c94ed44c8cc3846b 100644 (file)
--- a/mv_machine_learning/object_detection/src/HailoYoloXs.cpp
+++ b/mv_machine_learning/object_detection/src/HailoYoloXs.cpp
@@ -62,39 +62,48 @@ template<typename T> ObjectDetectionResult &HailoYoloXs<T>::result()
         auto input_tensor_width = static_cast<float>(_inference->getInputWidth());
         auto input_tensor_height = static_cast<float>(_inference->getInputHeight());
  
-       // Calculate the ratio[A] between the original image size and the input tensor size.
+       // Calculates the width and height ratios between the original source dimensions and the input tensor dimensions.
         auto width_ratio = ori_src_width / input_tensor_width;
         auto height_ratio = ori_src_height / input_tensor_height;
+       unsigned int class_id = 0;
  
         for (size_t tensor_idx = 0; tensor_idx < output_tensor.size(); ++tensor_idx) {
                 float num_of_classes;
-               unsigned int class_id;
  
                 num_of_classes = output_tensor[tensor_idx];
-               class_id = tensor_idx;
-               if (num_of_classes <= 0.0f)
+               if (num_of_classes <= 0.0f) {
+                       class_id++;
                         continue;
+               }
+
+               unsigned int valid_objects = 0;
  
                 for (unsigned int class_idx = 0; class_idx < num_of_classes; ++class_idx) {
                         float left, top, right, bottom, confidence;
  
-                       // x = [width A] * width of input tensor * width ratio value of output tensor.
-                       // y = [height A] * height of input tensor * height ratio value of output tensor.
+                       // Calculates the coordinates of a bounding box from the output tensor values.
+                       // - It computes the top, left, bottom, and right coordinates of a bounding box
+                       //   based on the given output tensor values and scaling factors for height and width.
                         top = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
                         left = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
                         bottom = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
                         right = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
                         confidence = output_tensor[++tensor_idx];
+                       if (confidence < 0.6f)
+                               continue;
+
+                       valid_objects++;
  
                         _result.top.push_back(static_cast<int>(top));
                         _result.left.push_back(static_cast<int>(left));
                         _result.bottom.push_back(static_cast<int>(bottom));
                         _result.right.push_back(static_cast<int>(right));
                         _result.confidences.push_back(confidence);
+                       _result.names.push_back(_labels[class_id]);
                 }
  
-               _result.number_of_objects += num_of_classes;
-               _result.indices.push_back(class_id);
+               _result.number_of_objects += valid_objects;
+               _result.indices.push_back(class_id++);
         }
  
         return _result;
diff --git a/mv_machine_learning/object_detection/src/mv_object_detection.cpp b/mv_machine_learning/object_detection/src/mv_object_detection.cpp

index a437082e8503df45b4f88be198ab473e57d53dd1..2163b4c6e3e8d2194e6bd2fe57e448d0618057b0 100644 (file)
--- a/mv_machine_learning/object_detection/src/mv_object_detection.cpp
+++ b/mv_machine_learning/object_detection/src/mv_object_detection.cpp
@@ -354,5 +354,34 @@ int mv_object_detection_get_bound_box(mv_object_detection_h handle, unsigned int
  
         MEDIA_VISION_FUNCTION_LEAVE();
  
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_object_detection_get_label(mv_object_detection_h handle, unsigned int index, const char **label)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(label);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               auto &result =
+                               static_cast<ObjectDetectionResult &>(machine_learning_native_get_result_cache(handle, TASK_NAME));
+               if (index >= result.number_of_objects) {
+                       LOGE("Invalid index(index = %u, result count = %u).", index, result.number_of_objects);
+                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+               }
+
+               if (result.names.empty())
+                       return MEDIA_VISION_ERROR_NO_DATA;
+
+               *label = result.names[index].c_str();
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
         return MEDIA_VISION_ERROR_NONE;
  }
 \ No newline at end of file
diff --git a/test/testsuites/machine_learning/object_detection/test_object_detection.cpp b/test/testsuites/machine_learning/object_detection/test_object_detection.cpp

index 6852d61ef6942a093afff9386bcd795468a39649..8c39c79da5735813a3591f080797421a427b5071 100644 (file)
--- a/test/testsuites/machine_learning/object_detection/test_object_detection.cpp
+++ b/test/testsuites/machine_learning/object_detection/test_object_detection.cpp
@@ -229,6 +229,7 @@ TEST(ObjectDetectionHailo8LTest, YoloXsInferenceShouldBeOk)
                 // TODO.
         };
         const int coordinate_answers[3][4] = { { 327, 0, 718, 513 }, { 409, 66, 1001, 601 }, { 24, 29, 311, 546 } };
+       const char *label_answers[3] = { "person", "dog", "dog" };
  
         mv_source_h mv_source = NULL;
         int ret = mv_create_source(&mv_source);
@@ -266,10 +267,16 @@ TEST(ObjectDetectionHailo8LTest, YoloXsInferenceShouldBeOk)
                         int ret = mv_object_detection_get_bound_box(handle, idx, &left, &top, &right, &bottom);
                         ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
  
+                       const char *label = nullptr;
+
+                       ret = mv_object_detection_get_label(handle, idx, &label);
+                       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
                         ASSERT_EQ(coordinate_answers[idx][0], left);
                         ASSERT_EQ(coordinate_answers[idx][1], top);
                         ASSERT_EQ(coordinate_answers[idx][2], right);
                         ASSERT_EQ(coordinate_answers[idx][3], bottom);
+                       ASSERT_EQ(string(label_answers[idx]), string(label));
                 }
  
                 ret = mv_object_detection_destroy(handle);
author	Inki Dae <inki.dae@samsung.com>
	Thu, 23 Jan 2025 07:34:19 +0000 (16:34 +0900)
committer	Inki Dae <inki.dae@samsung.com>
	Thu, 23 Jan 2025 07:58:51 +0000 (16:58 +0900)
include/mv_object_detection.h		patch \| blob \| history
mv_machine_learning/object_detection/meta/object_detection.json		patch \| blob \| history
mv_machine_learning/object_detection/src/HailoYoloXs.cpp		patch \| blob \| history
mv_machine_learning/object_detection/src/mv_object_detection.cpp		patch \| blob \| history
test/testsuites/machine_learning/object_detection/test_object_detection.cpp		patch \| blob \| history