[WIP-05] Bugfix: MvFaceLandmarkDetection fills _mv_src with wrong data
authorTae-Young Chung <ty83.chung@samsung.com>
Thu, 25 Apr 2024 00:05:01 +0000 (09:05 +0900)
committerTae-Young Chung <ty83.chung@samsung.com>
Thu, 25 Apr 2024 02:51:42 +0000 (11:51 +0900)
cv::Rect with cv::Mat is shallow copy.
Using mv_source_fill_by_buffer() with shallow copied cv::Mat
causes wrong result.
Use clone() to do deep copy.

Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
inference/backends/mediavision/include/MvFaceLandmarkDetectionTask.h
inference/backends/mediavision/src/MvFaceDetectionTask.cpp
inference/backends/mediavision/src/MvFaceLandmarkDetectionTask.cpp
inference/src/MvInferenceFaceService.cpp
test/services/test_smartpointer.cpp

index a7ce8b7278e12e8fc579714010e26ce9058e96a4..407aa27c2fb810f9b1f36f993519addf51851db1 100644 (file)
@@ -18,6 +18,7 @@
 #define __MV_FACE_LANDMARK_DETECTION_TASK_H__
 
 #include <memory>
+#include <opencv2/core.hpp>
 #include "MvFaceTaskManager.h"
 #include "mv_facial_landmark_internal.h"
 
@@ -33,6 +34,7 @@ private:
     mv_facial_landmark_h _handle {};
     mv_source_h _mv_src {};
 
+    void getLandmarks(cv::Mat &data, Points &landmarks, int roi_left = 0, int roi_right = 0);
 public:
     MvFaceLandmarkDetectionTask();
     virtual ~MvFaceLandmarkDetectionTask();
index 672fa67885431e13fe044df0559082cd925091bf..d131fcd515ca08fe5222041521a51213fb48ba38 100644 (file)
@@ -17,6 +17,9 @@
 #include <stdexcept>
 #include "MvFaceDetectionTask.h"
 #include "SingleoTimer.h"
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
 
 using namespace std;
 
index d87de1d89dd7655318ee262f33fb6e5e627b7468..61a2ce0098a9d6b057c99a25862066add27143d1 100644 (file)
@@ -15,7 +15,8 @@
  */
 
 #include <stdexcept>
-#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
 #include "MvFaceLandmarkDetectionTask.h"
 #include "SingleoTimer.h"
 
@@ -37,6 +38,10 @@ MvFaceLandmarkDetectionTask::MvFaceLandmarkDetectionTask()
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw runtime_error("Fail to create face landmark detection handle.");
 
+       ret = mv_facial_landmark_set_model(_handle, "FLD_U2NET", "fld_u2net_160x160.tflite", "fld_u2net_160x160.json", "");
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to set face landmark detection model.");
+
     ret = mv_facial_landmark_configure(_handle);
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw runtime_error("Fail to configure face landmark detection.");
@@ -52,48 +57,55 @@ MvFaceLandmarkDetectionTask::~MvFaceLandmarkDetectionTask()
     mv_destroy_source(_mv_src);
 }
 
+void MvFaceLandmarkDetectionTask::getLandmarks(cv::Mat &data, Points &landmarks, int roi_left, int roi_right)
+{
+       int ret = mv_source_fill_by_buffer(_mv_src, data.data, data.cols * data.rows * data.channels(), data.cols,
+                                                                               data.rows, MEDIA_VISION_COLORSPACE_RGB888);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to convert to mv source.");
+
+       ret = mv_facial_landmark_inference(_handle, _mv_src);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to invoke face landmark detection.");
+
+       unsigned long frame_number;
+       unsigned int result_cnt;
+       ret = mv_facial_landmark_get_result_count(_handle, &frame_number, &result_cnt);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to get face landmark detection result count.");
+
+       Point landmark;
+       for (unsigned int idx = 0; idx < result_cnt; ++idx) {
+
+               ret = mv_facial_landmark_get_position(_handle, idx, &landmark.x, &landmark.y);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to get face landmark detection bound box.");
+
+               landmark.x += roi_left;
+               landmark.y += roi_right;
+               landmarks.push_back(landmark);
+               SINGLEO_LOGD("idx[%2zd]: (%3zd, %3zd)", idx, landmark.x, landmark.y);
+       }
+}
+
 FaceResult& MvFaceLandmarkDetectionTask::handle(unsigned char* data, unsigned int width, unsigned int height, unsigned int byte_per_pixel, FaceResult &result)
 {
     SINGLEO_LOGD("MvFaceLandmarkDetectionTask::handle()");
 
        Timer timer;
        cv::Mat cvData(cv::Size(width, height), CV_MAKE_TYPE(CV_8U, byte_per_pixel), data);
-       cv::Mat roiCvData;
+       Points landmarks;
 
-       if (result._rects.empty())
-               roiCvData = cvData;
-       else{
+       if (result._rects.empty()) {
+               getLandmarks(cvData, landmarks);
+
+               result._landmarks.push_back(landmarks);
+               result = MvFaceTaskManager::handle(data, width, height, byte_per_pixel, result);
+       } else {
                for (auto &rect : result._rects) {
-                       roiCvData = cvData(cv::Rect(rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top));
-
-                       int ret = mv_source_fill_by_buffer(_mv_src, roiCvData.data, roiCvData.cols * roiCvData.rows * byte_per_pixel, roiCvData.cols,
-                                                                                       roiCvData.rows, MEDIA_VISION_COLORSPACE_RGB888);
-                       if (ret != MEDIA_VISION_ERROR_NONE)
-                               throw runtime_error("Fail to convert to mv source.");
-
-                       ret = mv_facial_landmark_inference(_handle, _mv_src);
-                       if (ret != MEDIA_VISION_ERROR_NONE)
-                               throw runtime_error("Fail to invoke face landmark detection.");
-
-                       unsigned long frame_number;
-                       unsigned int result_cnt;
-                       ret = mv_facial_landmark_get_result_count(_handle, &frame_number, &result_cnt);
-                       if (ret != MEDIA_VISION_ERROR_NONE)
-                               throw runtime_error("Fail to get face landmark detection result count.");
-
-                       Points landmarks;
-                       for (unsigned int idx = 0; idx < result_cnt; ++idx) {
-                               Point landmark;
-
-                               ret = mv_facial_landmark_get_position(_handle, idx, &landmark.x, &landmark.y);
-                               if (ret != MEDIA_VISION_ERROR_NONE)
-                                       throw runtime_error("Fail to get face landmark detection bound box.");
-
-                               landmark.x += rect.left;
-                               landmark.y += rect.top;
-                               landmarks.push_back(landmark);
-                               SINGLEO_LOGD("idx[%2zd]: (%3zd, %3zd)", idx, landmark.x, landmark.y);
-                       }
+                       cv::Mat roiCvData = cvData(cv::Rect(rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top)).clone();
+
+                       getLandmarks(roiCvData, landmarks, rect.left, rect.top);
 
                        result._landmarks.push_back(landmarks);
                        result = MvFaceTaskManager::handle(data, width, height, byte_per_pixel, result);
index 759dff383f7adc5a3aa710bc26d71de4a26d80aa..49adc08d669f1ef28e7e385fe150be2c5c0058ed 100644 (file)
@@ -18,6 +18,9 @@
 #include "MvInferenceFaceService.h"
 #include "MvFaceDetectionTask.h"
 #include "MvFaceLandmarkDetectionTask.h"
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
 
 using namespace std;
 
@@ -71,6 +74,20 @@ void MvInferenceFaceService::invoke(BaseDataType &input)
 
     ImageDataType &data = dynamic_cast<ImageDataType &>(input);
     _result = _taskManager->handle(data.ptr, data.width, data.height, data.byte_per_pixel, _result);
+
+    cv::Mat _dump(cv::Size(data.width, data.height),CV_MAKE_TYPE(CV_8U, data.byte_per_pixel), data.ptr);
+
+    int id = 0;
+    for (auto &rect : _result._rects) {
+        cv::rectangle(_dump, cv::Rect(rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top), cv::Scalar(0, 255, 0), 2);
+        auto &points = _result._landmarks[id];
+        for (auto &point : points)
+            cv::circle(_dump, cv::Point(point.x, point.y), 2, cv::Scalar(255, 0, 0), 2);
+        id++;
+    }
+
+    cv::cvtColor(_dump, _dump, cv::COLOR_RGB2BGR);
+    cv::imwrite("dump_face.jpg",_dump);
 }
 
 FaceResult& MvInferenceFaceService::result()
index 3efbf4983ba782f5fa06b94636cbde580c814d4c..a491bd191902265b1d3635d08c5499b51ea88682 100644 (file)
@@ -24,7 +24,9 @@
 #include "singleo_native_capi.h"
 #include "singleo_error.h"
 
-#define CHALAMET_01 "/root/singleo/images/chalamet_02.jpeg"
+#define SAMPLE "/root/singleo/images/chalamet_02.jpeg"
+// #define SAMPLE "/root/singleo/images/faceDetection.jpg"
+// #define SAMPLE "/root/singleo/images/obama.jpg"
 
 using namespace testing;
 using namespace std;
@@ -36,7 +38,7 @@ TEST(SmartPointerTest, InferenceRequestShouldBeOk)
        int ret = singleo_service_create("service=smart_pointer", &handle);
        ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
 
-       ret = singleo_service_add_input_image_file(handle, CHALAMET_01);
+       ret = singleo_service_add_input_image_file(handle, SAMPLE);
        ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
 
        ret = singleo_service_perform(handle);