From: Inki Dae <inki.dae@samsung.com>
Date: Thu, 23 Jan 2025 07:34:19 +0000 (+0900)
Subject: mv_machine_learning: add mv_object_detection_get_label C API support
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c98bad81a496c50b457403e20e38c410e2ba1ae3;p=platform%2Fcore%2Fapi%2Fmediavision.git

mv_machine_learning: add mv_object_detection_get_label C API support

Add mv_object_detection_get_label C API support to provide a label name to
each detected object.

Change-Id: I77581029b2a19fd65973b7352fc2904a61f91143
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---

diff --git a/include/mv_object_detection.h b/include/mv_object_detection.h
index 07092c10..36944c3d 100644
--- a/include/mv_object_detection.h
+++ b/include/mv_object_detection.h
@@ -224,6 +224,30 @@ int mv_object_detection_get_result_count(mv_object_detection_h handle, unsigned
  */
 int mv_object_detection_get_bound_box(mv_object_detection_h handle, unsigned int index, int *left, int *top, int *right,
 									  int *bottom);
+
+/**
+ * @internal
+ * @brief Gets a label to detected object region.
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] handle              The handle to the inference
+ * @param[in] index               A result index.
+ * @param[out] label              A label name to a detected object.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detection_create()
+ * @pre Prepare an inference by calling mv_object_detection_configure()
+ * @pre Prepare an inference by calling mv_object_detection_prepare()
+ * @pre Prepare an inference by calling mv_object_detection_inference()
+ */
+int mv_object_detection_get_label(mv_object_detection_h handle, unsigned int index, const char **label);
+
 /**
  * @}
  */
diff --git a/mv_machine_learning/object_detection/meta/object_detection.json b/mv_machine_learning/object_detection/meta/object_detection.json
index 31060651..983a63fd 100644
--- a/mv_machine_learning/object_detection/meta/object_detection.json
+++ b/mv_machine_learning/object_detection/meta/object_detection.json
@@ -4,37 +4,37 @@
         {
             "name" : "MODEL_DEFAULT_PATH",
             "type" : "string",
-            "value" : "/opt/usr/globalapps/mediavision.object.detection/models/tflite/"
+            "value" : "/opt/usr/globalapps/mediavision.object.detection/models/hailo8l/"
         },
 		{
             "name"  : "MODEL_FILE_NAME",
             "type"  : "string",
-            "value" : "od_mobilenet_v1_ssd_postop_300x300.tflite"
+            "value" : "yolov10s.hef"
         },
         {
             "name"  : "DEFAULT_MODEL_NAME",
             "type"  : "string",
-            "value" : "MOBILENET_V1_SSD"
+            "value" : "HAILO8_YOLOXS"
         },
         {
             "name"  : "MODEL_META_FILE_NAME",
             "type"  : "string",
-            "value" : "od_mobilenet_v1_ssd_postop_300x300.json"
+            "value" : "yolov10s.json"
         },
         {
             "name"  : "MODEL_LABEL_FILE_NAME",
             "type"  : "string",
-            "value" : "od_mobilenet_v1_ssd_postop_label.txt"
+            "value" : "yolov10s_label.txt"
         },
         {
             "name"  : "BACKEND_TYPE",
             "type"  : "integer",
-            "value" : 1
+            "value" : 7
         },
         {
             "name"  : "TARGET_DEVICE_TYPE",
             "type"  : "integer",
-            "value" : 1
+            "value" : 4
         }
     ]
 }
diff --git a/mv_machine_learning/object_detection/src/HailoYoloXs.cpp b/mv_machine_learning/object_detection/src/HailoYoloXs.cpp
index 09f2e85a..e6aec175 100644
--- a/mv_machine_learning/object_detection/src/HailoYoloXs.cpp
+++ b/mv_machine_learning/object_detection/src/HailoYoloXs.cpp
@@ -62,39 +62,48 @@ template<typename T> ObjectDetectionResult &HailoYoloXs<T>::result()
 	auto input_tensor_width = static_cast<float>(_inference->getInputWidth());
 	auto input_tensor_height = static_cast<float>(_inference->getInputHeight());
 
-	// Calculate the ratio[A] between the original image size and the input tensor size.
+	// Calculates the width and height ratios between the original source dimensions and the input tensor dimensions.
 	auto width_ratio = ori_src_width / input_tensor_width;
 	auto height_ratio = ori_src_height / input_tensor_height;
+	unsigned int class_id = 0;
 
 	for (size_t tensor_idx = 0; tensor_idx < output_tensor.size(); ++tensor_idx) {
 		float num_of_classes;
-		unsigned int class_id;
 
 		num_of_classes = output_tensor[tensor_idx];
-		class_id = tensor_idx;
-		if (num_of_classes <= 0.0f)
+		if (num_of_classes <= 0.0f) {
+			class_id++;
 			continue;
+		}
+
+		unsigned int valid_objects = 0;
 
 		for (unsigned int class_idx = 0; class_idx < num_of_classes; ++class_idx) {
 			float left, top, right, bottom, confidence;
 
-			// x = [width A] * width of input tensor * width ratio value of output tensor.
-			// y = [height A] * height of input tensor * height ratio value of output tensor.
+			// Calculates the coordinates of a bounding box from the output tensor values.
+			// - It computes the top, left, bottom, and right coordinates of a bounding box
+			//   based on the given output tensor values and scaling factors for height and width.
 			top = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
 			left = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
 			bottom = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
 			right = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
 			confidence = output_tensor[++tensor_idx];
+			if (confidence < 0.6f)
+				continue;
+
+			valid_objects++;
 
 			_result.top.push_back(static_cast<int>(top));
 			_result.left.push_back(static_cast<int>(left));
 			_result.bottom.push_back(static_cast<int>(bottom));
 			_result.right.push_back(static_cast<int>(right));
 			_result.confidences.push_back(confidence);
+			_result.names.push_back(_labels[class_id]);
 		}
 
-		_result.number_of_objects += num_of_classes;
-		_result.indices.push_back(class_id);
+		_result.number_of_objects += valid_objects;
+		_result.indices.push_back(class_id++);
 	}
 
 	return _result;
diff --git a/mv_machine_learning/object_detection/src/mv_object_detection.cpp b/mv_machine_learning/object_detection/src/mv_object_detection.cpp
index a437082e..2163b4c6 100644
--- a/mv_machine_learning/object_detection/src/mv_object_detection.cpp
+++ b/mv_machine_learning/object_detection/src/mv_object_detection.cpp
@@ -354,5 +354,34 @@ int mv_object_detection_get_bound_box(mv_object_detection_h handle, unsigned int
 
 	MEDIA_VISION_FUNCTION_LEAVE();
 
+	return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_object_detection_get_label(mv_object_detection_h handle, unsigned int index, const char **label)
+{
+	MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+	MEDIA_VISION_INSTANCE_CHECK(label);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+
+	try {
+		auto &result =
+				static_cast<ObjectDetectionResult &>(machine_learning_native_get_result_cache(handle, TASK_NAME));
+		if (index >= result.number_of_objects) {
+			LOGE("Invalid index(index = %u, result count = %u).", index, result.number_of_objects);
+			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		}
+
+		if (result.names.empty())
+			return MEDIA_VISION_ERROR_NO_DATA;
+
+		*label = result.names[index].c_str();
+	} catch (const BaseException &e) {
+		LOGE("%s", e.what());
+		return e.getError();
+	}
+
+	MEDIA_VISION_FUNCTION_LEAVE();
+
 	return MEDIA_VISION_ERROR_NONE;
 }
\ No newline at end of file
diff --git a/test/testsuites/machine_learning/object_detection/test_object_detection.cpp b/test/testsuites/machine_learning/object_detection/test_object_detection.cpp
index 6852d61e..8c39c79d 100644
--- a/test/testsuites/machine_learning/object_detection/test_object_detection.cpp
+++ b/test/testsuites/machine_learning/object_detection/test_object_detection.cpp
@@ -229,6 +229,7 @@ TEST(ObjectDetectionHailo8LTest, YoloXsInferenceShouldBeOk)
 		// TODO.
 	};
 	const int coordinate_answers[3][4] = { { 327, 0, 718, 513 }, { 409, 66, 1001, 601 }, { 24, 29, 311, 546 } };
+	const char *label_answers[3] = { "person", "dog", "dog" };
 
 	mv_source_h mv_source = NULL;
 	int ret = mv_create_source(&mv_source);
@@ -266,10 +267,16 @@ TEST(ObjectDetectionHailo8LTest, YoloXsInferenceShouldBeOk)
 			int ret = mv_object_detection_get_bound_box(handle, idx, &left, &top, &right, &bottom);
 			ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
 
+			const char *label = nullptr;
+
+			ret = mv_object_detection_get_label(handle, idx, &label);
+			ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
 			ASSERT_EQ(coordinate_answers[idx][0], left);
 			ASSERT_EQ(coordinate_answers[idx][1], top);
 			ASSERT_EQ(coordinate_answers[idx][2], right);
 			ASSERT_EQ(coordinate_answers[idx][3], bottom);
+			ASSERT_EQ(string(label_answers[idx]), string(label));
 		}
 
 		ret = mv_object_detection_destroy(handle);