From: Inki Dae Date: Thu, 23 Jan 2025 07:34:19 +0000 (+0900) Subject: mv_machine_learning: add mv_object_detection_get_label C API support X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c98bad81a496c50b457403e20e38c410e2ba1ae3;p=platform%2Fcore%2Fapi%2Fmediavision.git mv_machine_learning: add mv_object_detection_get_label C API support Add mv_object_detection_get_label C API support to provide a label name to each detected object. Change-Id: I77581029b2a19fd65973b7352fc2904a61f91143 Signed-off-by: Inki Dae --- diff --git a/include/mv_object_detection.h b/include/mv_object_detection.h index 07092c10..36944c3d 100644 --- a/include/mv_object_detection.h +++ b/include/mv_object_detection.h @@ -224,6 +224,30 @@ int mv_object_detection_get_result_count(mv_object_detection_h handle, unsigned */ int mv_object_detection_get_bound_box(mv_object_detection_h handle, unsigned int index, int *left, int *top, int *right, int *bottom); + +/** + * @internal + * @brief Gets a label to detected object region. + * + * @since_tizen 10.0 + * + * @param[in] handle The handle to the inference + * @param[in] index A result index. + * @param[out] label A label name to a detected object. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * + * @pre Create a source handle by calling mv_create_source() + * @pre Create an inference handle by calling mv_object_detection_create() + * @pre Prepare an inference by calling mv_object_detection_configure() + * @pre Prepare an inference by calling mv_object_detection_prepare() + * @pre Prepare an inference by calling mv_object_detection_inference() + */ +int mv_object_detection_get_label(mv_object_detection_h handle, unsigned int index, const char **label); + /** * @} */ diff --git a/mv_machine_learning/object_detection/meta/object_detection.json b/mv_machine_learning/object_detection/meta/object_detection.json index 31060651..983a63fd 100644 --- a/mv_machine_learning/object_detection/meta/object_detection.json +++ b/mv_machine_learning/object_detection/meta/object_detection.json @@ -4,37 +4,37 @@ { "name" : "MODEL_DEFAULT_PATH", "type" : "string", - "value" : "/opt/usr/globalapps/mediavision.object.detection/models/tflite/" + "value" : "/opt/usr/globalapps/mediavision.object.detection/models/hailo8l/" }, { "name" : "MODEL_FILE_NAME", "type" : "string", - "value" : "od_mobilenet_v1_ssd_postop_300x300.tflite" + "value" : "yolov10s.hef" }, { "name" : "DEFAULT_MODEL_NAME", "type" : "string", - "value" : "MOBILENET_V1_SSD" + "value" : "HAILO8_YOLOXS" }, { "name" : "MODEL_META_FILE_NAME", "type" : "string", - "value" : "od_mobilenet_v1_ssd_postop_300x300.json" + "value" : "yolov10s.json" }, { "name" : "MODEL_LABEL_FILE_NAME", "type" : "string", - "value" : "od_mobilenet_v1_ssd_postop_label.txt" + "value" : "yolov10s_label.txt" }, { "name" : "BACKEND_TYPE", "type" : "integer", - "value" : 1 + "value" : 7 }, { "name" : "TARGET_DEVICE_TYPE", "type" : "integer", - "value" : 1 + "value" : 4 } ] } diff --git a/mv_machine_learning/object_detection/src/HailoYoloXs.cpp b/mv_machine_learning/object_detection/src/HailoYoloXs.cpp index 09f2e85a..e6aec175 100644 --- a/mv_machine_learning/object_detection/src/HailoYoloXs.cpp +++ b/mv_machine_learning/object_detection/src/HailoYoloXs.cpp @@ -62,39 +62,48 @@ template ObjectDetectionResult &HailoYoloXs::result() auto input_tensor_width = static_cast(_inference->getInputWidth()); auto input_tensor_height = static_cast(_inference->getInputHeight()); - // Calculate the ratio[A] between the original image size and the input tensor size. + // Calculates the width and height ratios between the original source dimensions and the input tensor dimensions. auto width_ratio = ori_src_width / input_tensor_width; auto height_ratio = ori_src_height / input_tensor_height; + unsigned int class_id = 0; for (size_t tensor_idx = 0; tensor_idx < output_tensor.size(); ++tensor_idx) { float num_of_classes; - unsigned int class_id; num_of_classes = output_tensor[tensor_idx]; - class_id = tensor_idx; - if (num_of_classes <= 0.0f) + if (num_of_classes <= 0.0f) { + class_id++; continue; + } + + unsigned int valid_objects = 0; for (unsigned int class_idx = 0; class_idx < num_of_classes; ++class_idx) { float left, top, right, bottom, confidence; - // x = [width A] * width of input tensor * width ratio value of output tensor. - // y = [height A] * height of input tensor * height ratio value of output tensor. + // Calculates the coordinates of a bounding box from the output tensor values. + // - It computes the top, left, bottom, and right coordinates of a bounding box + // based on the given output tensor values and scaling factors for height and width. top = height_ratio * input_tensor_height * output_tensor[++tensor_idx]; left = width_ratio * input_tensor_width * output_tensor[++tensor_idx]; bottom = height_ratio * input_tensor_height * output_tensor[++tensor_idx]; right = width_ratio * input_tensor_width * output_tensor[++tensor_idx]; confidence = output_tensor[++tensor_idx]; + if (confidence < 0.6f) + continue; + + valid_objects++; _result.top.push_back(static_cast(top)); _result.left.push_back(static_cast(left)); _result.bottom.push_back(static_cast(bottom)); _result.right.push_back(static_cast(right)); _result.confidences.push_back(confidence); + _result.names.push_back(_labels[class_id]); } - _result.number_of_objects += num_of_classes; - _result.indices.push_back(class_id); + _result.number_of_objects += valid_objects; + _result.indices.push_back(class_id++); } return _result; diff --git a/mv_machine_learning/object_detection/src/mv_object_detection.cpp b/mv_machine_learning/object_detection/src/mv_object_detection.cpp index a437082e..2163b4c6 100644 --- a/mv_machine_learning/object_detection/src/mv_object_detection.cpp +++ b/mv_machine_learning/object_detection/src/mv_object_detection.cpp @@ -354,5 +354,34 @@ int mv_object_detection_get_bound_box(mv_object_detection_h handle, unsigned int MEDIA_VISION_FUNCTION_LEAVE(); + return MEDIA_VISION_ERROR_NONE; +} + +int mv_object_detection_get_label(mv_object_detection_h handle, unsigned int index, const char **label) +{ + MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true)); + MEDIA_VISION_INSTANCE_CHECK(label); + + MEDIA_VISION_FUNCTION_ENTER(); + + try { + auto &result = + static_cast(machine_learning_native_get_result_cache(handle, TASK_NAME)); + if (index >= result.number_of_objects) { + LOGE("Invalid index(index = %u, result count = %u).", index, result.number_of_objects); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + if (result.names.empty()) + return MEDIA_VISION_ERROR_NO_DATA; + + *label = result.names[index].c_str(); + } catch (const BaseException &e) { + LOGE("%s", e.what()); + return e.getError(); + } + + MEDIA_VISION_FUNCTION_LEAVE(); + return MEDIA_VISION_ERROR_NONE; } \ No newline at end of file diff --git a/test/testsuites/machine_learning/object_detection/test_object_detection.cpp b/test/testsuites/machine_learning/object_detection/test_object_detection.cpp index 6852d61e..8c39c79d 100644 --- a/test/testsuites/machine_learning/object_detection/test_object_detection.cpp +++ b/test/testsuites/machine_learning/object_detection/test_object_detection.cpp @@ -229,6 +229,7 @@ TEST(ObjectDetectionHailo8LTest, YoloXsInferenceShouldBeOk) // TODO. }; const int coordinate_answers[3][4] = { { 327, 0, 718, 513 }, { 409, 66, 1001, 601 }, { 24, 29, 311, 546 } }; + const char *label_answers[3] = { "person", "dog", "dog" }; mv_source_h mv_source = NULL; int ret = mv_create_source(&mv_source); @@ -266,10 +267,16 @@ TEST(ObjectDetectionHailo8LTest, YoloXsInferenceShouldBeOk) int ret = mv_object_detection_get_bound_box(handle, idx, &left, &top, &right, &bottom); ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + const char *label = nullptr; + + ret = mv_object_detection_get_label(handle, idx, &label); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(coordinate_answers[idx][0], left); ASSERT_EQ(coordinate_answers[idx][1], top); ASSERT_EQ(coordinate_answers[idx][2], right); ASSERT_EQ(coordinate_answers[idx][3], bottom); + ASSERT_EQ(string(label_answers[idx]), string(label)); } ret = mv_object_detection_destroy(handle);