From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Mon, 29 Apr 2024 08:48:46 +0000 (+0900)
Subject: [WIP-09] Add estimation of head pose from landmark
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=11a1b35c11b08970bdbf5a91d9d5b62e9c2778f9;p=platform%2Fcore%2Fapi%2Fsingleo.git

[WIP-09] Add estimation of head pose from landmark

Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---

diff --git a/inference/src/MvInferenceFaceService.cpp b/inference/src/MvInferenceFaceService.cpp
index 49adc08..0bcb578 100644
--- a/inference/src/MvInferenceFaceService.cpp
+++ b/inference/src/MvInferenceFaceService.cpp
@@ -82,7 +82,7 @@ void MvInferenceFaceService::invoke(BaseDataType &input)
         cv::rectangle(_dump, cv::Rect(rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top), cv::Scalar(0, 255, 0), 2);
         auto &points = _result._landmarks[id];
         for (auto &point : points)
-            cv::circle(_dump, cv::Point(point.x, point.y), 2, cv::Scalar(255, 0, 0), 2);
+            cv::circle(_dump, cv::Point(point.x, point.y), 2, cv::Scalar(255, 255, 0), 2);
         id++;
     }
 
diff --git a/services/CMakeLists.txt b/services/CMakeLists.txt
index e79d746..c2b3e90 100644
--- a/services/CMakeLists.txt
+++ b/services/CMakeLists.txt
@@ -22,7 +22,7 @@ ENDIF()
 ADD_LIBRARY(${PROJECT_NAME} SHARED ${SINGLEO_SERVICE_SOURCE_FILES})
 
 TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PRIVATE include common/include ../capi/ ../input/include ../log/include ../inference/include ../common/include ${SERVICE_HEADER_LIST})
-TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE opencv_imgcodecs singleo_log singleo_input ${SERVICE_LIBRARY_LIST})
+TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE opencv_imgcodecs opencv_calib3d singleo_log singleo_input ${SERVICE_LIBRARY_LIST})
 
 INSTALL(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR})
 
diff --git a/services/smart_pointer/include/HeadPoseEstimator.h b/services/smart_pointer/include/HeadPoseEstimator.h
index 743ca6f..ed0f84f 100644
--- a/services/smart_pointer/include/HeadPoseEstimator.h
+++ b/services/smart_pointer/include/HeadPoseEstimator.h
@@ -20,7 +20,11 @@
 #include <memory>
 #include <vector>
 #include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
 #include <opencv2/calib3d.hpp>
+#include "PoseVector.h"
+#include "SingleoCommonTypes.h"
 
 namespace singleo
 {
@@ -34,12 +38,22 @@ private:
     std::vector<cv::Point2f> _landmarks_2d;
     std::vector<cv::Point3f> _landmarks_3d;
 
-    cv::Mat _camera_param;
+    cv::Mat _camera_matrix;
     cv::Mat _camera_dist_coeff;
 
+    cv::Mat _rotation_vector;
+    cv::Mat _translation_vector;
+    cv::Mat _rotation_matrix;
+
+    std::vector<cv::Point3f> _pose_axes_3d;
+    std::vector<singleo::services::smartpointer::Point2f> _pose_axes;
+
 public:
     HeadPoseEstimator();
     ~HeadPoseEstimator();
+
+    PoseVector estimate(const std::vector<singleo::Point> &landmark2d);
+    std::vector<singleo::services::smartpointer::Point2f> &getPoseAxes();
 };
 } // smartpointer
 } // services 
diff --git a/services/smart_pointer/include/PoseVector.h b/services/smart_pointer/include/PoseVector.h
index 116f423..b8c989e 100644
--- a/services/smart_pointer/include/PoseVector.h
+++ b/services/smart_pointer/include/PoseVector.h
@@ -24,6 +24,12 @@ namespace services
 namespace smartpointer
 {
 
+struct Point2f
+{
+    float x;
+    float y;
+};
+
 struct Point3f
 {
     float x;
@@ -33,12 +39,17 @@ struct Point3f
 
 struct PoseVector
 {
-    Point3f vec;
+    Point3f _rot_vec;
+    Point3f _tran_vec;
     void reset()
     {
-        vec.x = 0.0f;
-        vec.y = 0.0f;
-        vec.z = 0.0f;
+        _rot_vec.x = 0.0f;
+        _rot_vec.y = 0.0f;
+        _rot_vec.z = 0.0f;
+
+        _tran_vec.x = 0.f;
+        _tran_vec.y = 0.f;
+        _tran_vec.z = 0.f;
     }
 };
 } // smartpointer
diff --git a/services/smart_pointer/src/GazeEstimator.cpp b/services/smart_pointer/src/GazeEstimator.cpp
index 996a96d..d1c6fc9 100644
--- a/services/smart_pointer/src/GazeEstimator.cpp
+++ b/services/smart_pointer/src/GazeEstimator.cpp
@@ -19,6 +19,7 @@
 #include "InputCamera.h"
 #include "SingleoLog.h"
 
+
 using namespace std;
 using namespace singleo::inference;
 using namespace singleo::input;
@@ -54,6 +55,16 @@ PoseVector &GazeEstimator::estimateHeadpose(BaseDataType &input)
     auto &result = _face_estimator->result();
     SINGLEO_LOGD("Result done");
 
+    _headPose = _head_pose_estimator->estimate(result._landmarks[0]);
+    vector<Point2f> poseAxes = _head_pose_estimator->getPoseAxes();
+
+    ImageDataType &data = dynamic_cast<ImageDataType &>(input);
+    cv::Mat _dump(cv::Size(data.width, data.height), CV_MAKETYPE(CV_8U, data.byte_per_pixel), data.ptr);
+    cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[1].x, poseAxes[1].y), cv::Scalar(0, 0, 255), 2); // x - red
+    cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[2].x, poseAxes[2].y), cv::Scalar(0, 255, 0), 2); // y - green
+    cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[3].x, poseAxes[3].y), cv::Scalar(255, 0, 0), 2); // z - blue
+
+    cv::imwrite("dump_headpose.jpg", _dump);
     // auto &headPose = dynamic_cast<FldResultType&>(result);
     // SINGLEO_LOGD("Landmark: %zd, %zd",
     //             headPose._landmarks[0].x, headPose._landmarks[0].y);
diff --git a/services/smart_pointer/src/HeadPoseEstimator.cpp b/services/smart_pointer/src/HeadPoseEstimator.cpp
index 7ab6b4c..1bd7bc8 100644
--- a/services/smart_pointer/src/HeadPoseEstimator.cpp
+++ b/services/smart_pointer/src/HeadPoseEstimator.cpp
@@ -19,6 +19,7 @@
 #include "CameraParamManager.h"
 #include "HeadPoseEstimator.h"
 #include "SingleoLog.h"
+#include "SingleoTimer.h"
 
 using namespace std;
 using namespace cv;
@@ -37,16 +38,27 @@ HeadPoseEstimator::HeadPoseEstimator()
     for (auto &point : faceShape)
         _landmarks_3d.push_back(cv::Point3f(point.x, point.y, point.z));
 
+    // 33rd landmark is the nose's tip
+    _pose_axes_3d.push_back(_landmarks_3d[33]);
+    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x - 50, _landmarks_3d[33].y, _landmarks_3d[33].z));
+    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x, _landmarks_3d[33].y - 50, _landmarks_3d[33].z));
+    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x, _landmarks_3d[33].y, _landmarks_3d[33].z -50));
+    _pose_axes.resize(_pose_axes_3d.size());
+
+    _landmarks_2d.resize(_landmarks_3d.size());
+    _rotation_vector = Mat(Size(3,1), CV_32FC1);
+    _translation_vector = Mat(Size(3,1), CV_32FC1);
+
     CameraParamManager cam_dummy("/usr/share/singleo/camera_param_dummy.yaml");
     
-    _camera_param = Mat::zeros(Size(3,3), CV_32FC1);
+    _camera_matrix = Mat::zeros(Size(3,3), CV_32FC1);
     tuple<float, float> focalLength = cam_dummy.getFocalLengthXY();
     tuple<float, float> principlePoint = cam_dummy.getPrinciplePointXY();
-    _camera_param.at<float>(0,0) = get<0>(focalLength);
-    _camera_param.at<float>(1,1) = get<1>(focalLength);
-    _camera_param.at<float>(0,2) = get<0>(principlePoint);
-    _camera_param.at<float>(1,2) = get<1>(principlePoint);
-    _camera_param.at<float>(2,2) = 1.0;
+    _camera_matrix.at<float>(0,0) = get<0>(focalLength);
+    _camera_matrix.at<float>(1,1) = get<1>(focalLength);
+    _camera_matrix.at<float>(0,2) = get<0>(principlePoint);
+    _camera_matrix.at<float>(1,2) = get<1>(principlePoint);
+    _camera_matrix.at<float>(2,2) = 1.0;
 
     _camera_dist_coeff = Mat::zeros(Size(1,5), CV_32FC1);
     tuple<float, float, float, float, float> distCoeff = cam_dummy.getDistortionCoeff();
@@ -56,9 +68,9 @@ HeadPoseEstimator::HeadPoseEstimator()
     _camera_dist_coeff.at<float>(0,3) = get<3>(distCoeff);
     _camera_dist_coeff.at<float>(0,4) = get<4>(distCoeff);
 
-    for (int r = 0; r < _camera_param.rows; ++r) {
-        for (int c = 0; c < _camera_param.cols; ++c) {
-            SINGLEO_LOGI("%.3f", _camera_param.at<float>(r, c));
+    for (int r = 0; r < _camera_matrix.rows; ++r) {
+        for (int c = 0; c < _camera_matrix.cols; ++c) {
+            SINGLEO_LOGI("%.3f", _camera_matrix.at<float>(r, c));
         }
     }
     for (int r = 0; r < _camera_dist_coeff.rows; ++r) {
@@ -73,6 +85,49 @@ HeadPoseEstimator::~HeadPoseEstimator()
 
 }
 
+PoseVector HeadPoseEstimator::estimate(const vector<Point> &landmark2d)
+{
+    Timer timer;
+
+    // TODO: parallel??, c++17 structured binding??
+    for (size_t idx = 0; idx < landmark2d.size(); ++idx) {
+        _landmarks_2d[idx].x = landmark2d[idx].x;
+        _landmarks_2d[idx].y = landmark2d[idx].y;
+    }
+    SINGLEO_LOGI("Get 2d landmark takes %d ms", timer.check_ms());
+
+    timer.reset();
+    // init
+    cv::solvePnPRansac(_landmarks_3d, _landmarks_2d,
+                _camera_matrix, _camera_dist_coeff,
+                _rotation_vector, _translation_vector, false, 100, 8.0F, 0.99, cv::noArray(), cv::SOLVEPNP_EPNP);
+    SINGLEO_LOGI("1st solvePnP takes %d ms", timer.check_ms());
+
+    timer.reset();
+    // estimate pose
+    // https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#ga357634492a94efe8858d0ce1509da869
+    cv::solvePnP(_landmarks_3d, _landmarks_2d,
+            _camera_matrix, _camera_dist_coeff,
+            _rotation_vector, _translation_vector, true, cv::SOLVEPNP_ITERATIVE);
+    SINGLEO_LOGI("2nd solvePnP takes %d ms", timer.check_ms());
+
+    return PoseVector{ Point3f{ _rotation_vector.at<float>(0), _rotation_vector.at<float>(1), _rotation_vector.at<float>(2)},
+                     Point3f{ _translation_vector.at<float>(0), _translation_vector.at<float>(1), _translation_vector.at<float>(2)} };
+}
+
+vector<Point2f> &HeadPoseEstimator::getPoseAxes()
+{
+    std::vector<cv::Point2f> poseAxes2d;
+    cv::projectPoints(_pose_axes_3d, _rotation_vector, _translation_vector, _camera_matrix, _camera_dist_coeff, poseAxes2d);
+    SINGLEO_LOGI("projectPoints %zd", poseAxes2d.size());
+    for (size_t i = 0; i < poseAxes2d.size(); i++) {
+        _pose_axes[i].x = poseAxes2d[i].x;
+        _pose_axes[i].y = poseAxes2d[i].y;
+    }
+
+    return _pose_axes;
+}
+
 } // smartpointer
 } // services
 } // singleo