*/
int mv_object_detection_get_bound_box(mv_object_detection_h handle, unsigned int index, int *left, int *top, int *right,
int *bottom);
+
+/**
+ * @brief Gets a bound box to detected object region.
+ *
+ * @since_tizen 9.0
+ *
+ * @param[in] handle The handle to the inference
+ * @param[in] index A result index.
+ * @param[out] label A label name to a detected object.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detection_create()
+ * @pre Prepare an inference by calling mv_object_detection_configure()
+ * @pre Prepare an inference by calling mv_object_detection_prepare()
+ * @pre Prepare an inference by calling mv_object_detection_inference()
+ */
+int mv_object_detection_get_label(mv_object_detection_h handle, unsigned int index, const char **label);
+
/**
* @}
*/
{
"name" : "MODEL_DEFAULT_PATH",
"type" : "string",
- "value" : "/opt/usr/globalapps/mediavision.object.detection/models/tflite/"
+ "value" : "/opt/usr/globalapps/mediavision.object.detection/models/hailo8l/"
},
{
"name" : "MODEL_FILE_NAME",
"type" : "string",
- "value" : "od_mobilenet_v1_ssd_postop_300x300.tflite"
+ "value" : "yolov10s.hef"
},
{
"name" : "DEFAULT_MODEL_NAME",
"type" : "string",
- "value" : "MOBILENET_V1_SSD"
+ "value" : "HAILO8_YOLOXS"
},
{
"name" : "MODEL_META_FILE_NAME",
"type" : "string",
- "value" : "od_mobilenet_v1_ssd_postop_300x300.json"
+ "value" : "yolov10s.json"
},
{
"name" : "MODEL_LABEL_FILE_NAME",
"type" : "string",
- "value" : "od_mobilenet_v1_ssd_postop_label.txt"
+ "value" : "yolov10s_label.txt"
},
{
"name" : "BACKEND_TYPE",
"type" : "integer",
- "value" : 1
+ "value" : 7
},
{
"name" : "TARGET_DEVICE_TYPE",
"type" : "integer",
- "value" : 1
+ "value" : 4
}
]
}
auto input_tensor_width = static_cast<float>(_inference->getInputWidth());
auto input_tensor_height = static_cast<float>(_inference->getInputHeight());
- // Calculate the ratio[A] between the original image size and the input tensor size.
+ // Calculates the width and height ratios between the original source dimensions and the input tensor dimensions.
auto width_ratio = ori_src_width / input_tensor_width;
auto height_ratio = ori_src_height / input_tensor_height;
+ unsigned int class_id = 0;
for (size_t tensor_idx = 0; tensor_idx < output_tensor.size(); ++tensor_idx) {
float num_of_classes;
- unsigned int class_id;
num_of_classes = output_tensor[tensor_idx];
- class_id = tensor_idx;
- if (num_of_classes <= 0.0f)
+ if (num_of_classes <= 0.0f) {
+ class_id++;
continue;
+ }
+
+ unsigned int valid_objects = 0;
for (unsigned int class_idx = 0; class_idx < num_of_classes; ++class_idx) {
float left, top, right, bottom, confidence;
- // x = [width A] * width of input tensor * width ratio value of output tensor.
- // y = [height A] * height of input tensor * height ratio value of output tensor.
+ // Calculates the coordinates of a bounding box from the output tensor values.
+ // - It computes the top, left, bottom, and right coordinates of a bounding box
+ // based on the given output tensor values and scaling factors for height and width.
top = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
left = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
bottom = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
right = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
confidence = output_tensor[++tensor_idx];
+ if (confidence < 0.6f)
+ continue;
+
+ valid_objects++;
_result.top.push_back(static_cast<int>(top));
_result.left.push_back(static_cast<int>(left));
_result.bottom.push_back(static_cast<int>(bottom));
_result.right.push_back(static_cast<int>(right));
_result.confidences.push_back(confidence);
+ _result.names.push_back(_labels[class_id]);
}
- _result.number_of_objects += num_of_classes;
- _result.indices.push_back(class_id);
+ _result.number_of_objects += valid_objects;
+ _result.indices.push_back(class_id++);
}
return _result;
MEDIA_VISION_FUNCTION_LEAVE();
+ return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_object_detection_get_label(mv_object_detection_h handle, unsigned int index, const char **label)
+{
+ MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+ MEDIA_VISION_INSTANCE_CHECK(label);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ try {
+ auto &result =
+ static_cast<ObjectDetectionResult &>(machine_learning_native_get_result_cache(handle, TASK_NAME));
+ if (index >= result.number_of_objects) {
+ LOGE("Invalid index(index = %u, result count = %u).", index, result.number_of_objects);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
+ if (result.names.empty())
+ return MEDIA_VISION_ERROR_NO_DATA;
+
+ *label = result.names[index].c_str();
+ } catch (const BaseException &e) {
+ LOGE("%s", e.what());
+ return e.getError();
+ }
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
return MEDIA_VISION_ERROR_NONE;
}
\ No newline at end of file
// TODO.
};
const int coordinate_answers[3][4] = { { 327, 0, 718, 513 }, { 409, 66, 1001, 601 }, { 24, 29, 311, 546 } };
+ const char *label_answers[3] = { "person", "dog", "dog" };
mv_source_h mv_source = NULL;
int ret = mv_create_source(&mv_source);
int ret = mv_object_detection_get_bound_box(handle, idx, &left, &top, &right, &bottom);
ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+ const char *label = nullptr;
+
+ ret = mv_object_detection_get_label(handle, idx, &label);
+ ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
ASSERT_EQ(coordinate_answers[idx][0], left);
ASSERT_EQ(coordinate_answers[idx][1], top);
ASSERT_EQ(coordinate_answers[idx][2], right);
ASSERT_EQ(coordinate_answers[idx][3], bottom);
+ ASSERT_EQ(string(label_answers[idx]), string(label));
}
ret = mv_object_detection_destroy(handle);