mv_machine_learning: use threshold value instead of fixed one

author Inki Dae <inki.dae@samsung.com>

Mon, 3 Feb 2025 05:14:20 +0000 (14:14 +0900)

committer Inki Dae <inki.dae@samsung.com>

Mon, 10 Feb 2025 08:15:07 +0000 (17:15 +0900)
author Inki Dae <inki.dae@samsung.com>
Mon, 3 Feb 2025 05:14:20 +0000 (14:14 +0900)
committer Inki Dae <inki.dae@samsung.com>
Mon, 10 Feb 2025 08:15:07 +0000 (17:15 +0900)
diff --git a/mv_machine_learning/object_detection/include/HailoYoloXs.h b/mv_machine_learning/object_detection/include/HailoYoloXs.h

index b285d45d9c16c5c800ced64ff72895ea742db7a2..b5e9ba3b0d91a197a9c0d6c1f84ae73e87b873f4 100644 (file)
--- a/mv_machine_learning/object_detection/include/HailoYoloXs.h
+++ b/mv_machine_learning/object_detection/include/HailoYoloXs.h
@@ -34,6 +34,7 @@ template<typename T> class HailoYoloXs : public ObjectDetection<T>
         using ObjectDetection<T>::_preprocess;
         using ObjectDetection<T>::_labels;
         using ObjectDetection<T>::_inference;
+       using ObjectDetection<T>::_config;
  
  private:
         ObjectDetectionResult _result;
diff --git a/mv_machine_learning/object_detection/src/HailoYoloXs.cpp b/mv_machine_learning/object_detection/src/HailoYoloXs.cpp

index e6aec17515ba3de7cd6a2306eb66691a7e0da0bb..1196e7b303d9f6e37a1f69cf4f5a7f55c5878b64 100644 (file)
--- a/mv_machine_learning/object_detection/src/HailoYoloXs.cpp
+++ b/mv_machine_learning/object_detection/src/HailoYoloXs.cpp
@@ -57,6 +57,11 @@ template<typename T> ObjectDetectionResult &HailoYoloXs<T>::result()
         // output layer name is yolov10s/yolov8_nms_postprocess
         ObjectDetection<T>::getOutputTensor(names[0], output_tensor);
  
+       auto scoreMetaInfo = _config->getOutputMetaMap().at(names[0]);
+       auto decodingScore = static_pointer_cast<DecodingScore>(scoreMetaInfo->decodingTypeMap[DecodingType::SCORE]);
+
+       LOGD("threshold = %f", decodingScore->threshold);
+
         auto ori_src_width = static_cast<float>(_preprocess.getImageWidth()[0]);
         auto ori_src_height = static_cast<float>(_preprocess.getImageHeight()[0]);
         auto input_tensor_width = static_cast<float>(_inference->getInputWidth());
@@ -65,12 +70,14 @@ template<typename T> ObjectDetectionResult &HailoYoloXs<T>::result()
         // Calculates the width and height ratios between the original source dimensions and the input tensor dimensions.
         auto width_ratio = ori_src_width / input_tensor_width;
         auto height_ratio = ori_src_height / input_tensor_height;
+
         unsigned int class_id = 0;
+       size_t tensor_idx = 0;
  
-       for (size_t tensor_idx = 0; tensor_idx < output_tensor.size(); ++tensor_idx) {
+       while (tensor_idx < output_tensor.size()) {
                 float num_of_classes;
  
-               num_of_classes = output_tensor[tensor_idx];
+               num_of_classes = output_tensor[tensor_idx++];
                 if (num_of_classes <= 0.0f) {
                         class_id++;
                         continue;
@@ -84,12 +91,12 @@ template<typename T> ObjectDetectionResult &HailoYoloXs<T>::result()
                         // Calculates the coordinates of a bounding box from the output tensor values.
                         // - It computes the top, left, bottom, and right coordinates of a bounding box
                         //   based on the given output tensor values and scaling factors for height and width.
-                       top = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
-                       left = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
-                       bottom = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
-                       right = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
-                       confidence = output_tensor[++tensor_idx];
-                       if (confidence < 0.6f)
+                       top = height_ratio * input_tensor_height * output_tensor[tensor_idx++];
+                       left = width_ratio * input_tensor_width * output_tensor[tensor_idx++];
+                       bottom = height_ratio * input_tensor_height * output_tensor[tensor_idx++];
+                       right = width_ratio * input_tensor_width * output_tensor[tensor_idx++];
+                       confidence = output_tensor[tensor_idx++];
+                       if (confidence < decodingScore->threshold)
                                 continue;
  
                         valid_objects++;
@@ -100,10 +107,11 @@ template<typename T> ObjectDetectionResult &HailoYoloXs<T>::result()
                         _result.right.push_back(static_cast<int>(right));
                         _result.confidences.push_back(confidence);
                         _result.names.push_back(_labels[class_id]);
+                       _result.indices.push_back(class_id);
                 }
  
                 _result.number_of_objects += valid_objects;
-               _result.indices.push_back(class_id++);
+               class_id++;
         }
  
         return _result;
author	Inki Dae <inki.dae@samsung.com>
	Mon, 3 Feb 2025 05:14:20 +0000 (14:14 +0900)
committer	Inki Dae <inki.dae@samsung.com>
	Mon, 10 Feb 2025 08:15:07 +0000 (17:15 +0900)
mv_machine_learning/object_detection/include/HailoYoloXs.h		patch \| blob \| history
mv_machine_learning/object_detection/src/HailoYoloXs.cpp		patch \| blob \| history