[WIP-09] Add estimation of head pose from landmark
authorTae-Young Chung <ty83.chung@samsung.com>
Mon, 29 Apr 2024 08:48:46 +0000 (17:48 +0900)
committerTae-Young Chung <ty83.chung@samsung.com>
Mon, 29 Apr 2024 08:59:55 +0000 (17:59 +0900)
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
inference/src/MvInferenceFaceService.cpp
services/CMakeLists.txt
services/smart_pointer/include/HeadPoseEstimator.h
services/smart_pointer/include/PoseVector.h
services/smart_pointer/src/GazeEstimator.cpp
services/smart_pointer/src/HeadPoseEstimator.cpp

index 49adc08d669f1ef28e7e385fe150be2c5c0058ed..0bcb5780e8582fbe614a74deed70e366f1e2283f 100644 (file)
@@ -82,7 +82,7 @@ void MvInferenceFaceService::invoke(BaseDataType &input)
         cv::rectangle(_dump, cv::Rect(rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top), cv::Scalar(0, 255, 0), 2);
         auto &points = _result._landmarks[id];
         for (auto &point : points)
-            cv::circle(_dump, cv::Point(point.x, point.y), 2, cv::Scalar(255, 0, 0), 2);
+            cv::circle(_dump, cv::Point(point.x, point.y), 2, cv::Scalar(255, 255, 0), 2);
         id++;
     }
 
index e79d7462f4f632b48dc32d07b9776f8588e32f0e..c2b3e90e946ab91d119f5aa1589cd07f7cd5e91d 100644 (file)
@@ -22,7 +22,7 @@ ENDIF()
 ADD_LIBRARY(${PROJECT_NAME} SHARED ${SINGLEO_SERVICE_SOURCE_FILES})
 
 TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PRIVATE include common/include ../capi/ ../input/include ../log/include ../inference/include ../common/include ${SERVICE_HEADER_LIST})
-TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE opencv_imgcodecs singleo_log singleo_input ${SERVICE_LIBRARY_LIST})
+TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE opencv_imgcodecs opencv_calib3d singleo_log singleo_input ${SERVICE_LIBRARY_LIST})
 
 INSTALL(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR})
 
index 743ca6f50cbfbddbcee78bb865486dd12302f008..ed0f84f2367d2394a45d7dce5f3477d0aed7ac63 100644 (file)
 #include <memory>
 #include <vector>
 #include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
 #include <opencv2/calib3d.hpp>
+#include "PoseVector.h"
+#include "SingleoCommonTypes.h"
 
 namespace singleo
 {
@@ -34,12 +38,22 @@ private:
     std::vector<cv::Point2f> _landmarks_2d;
     std::vector<cv::Point3f> _landmarks_3d;
 
-    cv::Mat _camera_param;
+    cv::Mat _camera_matrix;
     cv::Mat _camera_dist_coeff;
 
+    cv::Mat _rotation_vector;
+    cv::Mat _translation_vector;
+    cv::Mat _rotation_matrix;
+
+    std::vector<cv::Point3f> _pose_axes_3d;
+    std::vector<singleo::services::smartpointer::Point2f> _pose_axes;
+
 public:
     HeadPoseEstimator();
     ~HeadPoseEstimator();
+
+    PoseVector estimate(const std::vector<singleo::Point> &landmark2d);
+    std::vector<singleo::services::smartpointer::Point2f> &getPoseAxes();
 };
 } // smartpointer
 } // services 
index 116f423263cba9e2ae0624d0ee2d54cc492982fb..b8c989e5a65d4d40564ecfb782bdfa93455ac509 100644 (file)
@@ -24,6 +24,12 @@ namespace services
 namespace smartpointer
 {
 
+struct Point2f
+{
+    float x;
+    float y;
+};
+
 struct Point3f
 {
     float x;
@@ -33,12 +39,17 @@ struct Point3f
 
 struct PoseVector
 {
-    Point3f vec;
+    Point3f _rot_vec;
+    Point3f _tran_vec;
     void reset()
     {
-        vec.x = 0.0f;
-        vec.y = 0.0f;
-        vec.z = 0.0f;
+        _rot_vec.x = 0.0f;
+        _rot_vec.y = 0.0f;
+        _rot_vec.z = 0.0f;
+
+        _tran_vec.x = 0.f;
+        _tran_vec.y = 0.f;
+        _tran_vec.z = 0.f;
     }
 };
 } // smartpointer
index 996a96df43f0705c876a0154b26d77a34ed3b524..d1c6fc97e4096721b4b012e5cee070a644e197ba 100644 (file)
@@ -19,6 +19,7 @@
 #include "InputCamera.h"
 #include "SingleoLog.h"
 
+
 using namespace std;
 using namespace singleo::inference;
 using namespace singleo::input;
@@ -54,6 +55,16 @@ PoseVector &GazeEstimator::estimateHeadpose(BaseDataType &input)
     auto &result = _face_estimator->result();
     SINGLEO_LOGD("Result done");
 
+    _headPose = _head_pose_estimator->estimate(result._landmarks[0]);
+    vector<Point2f> poseAxes = _head_pose_estimator->getPoseAxes();
+
+    ImageDataType &data = dynamic_cast<ImageDataType &>(input);
+    cv::Mat _dump(cv::Size(data.width, data.height), CV_MAKETYPE(CV_8U, data.byte_per_pixel), data.ptr);
+    cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[1].x, poseAxes[1].y), cv::Scalar(0, 0, 255), 2); // x - red
+    cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[2].x, poseAxes[2].y), cv::Scalar(0, 255, 0), 2); // y - green
+    cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[3].x, poseAxes[3].y), cv::Scalar(255, 0, 0), 2); // z - blue
+
+    cv::imwrite("dump_headpose.jpg", _dump);
     // auto &headPose = dynamic_cast<FldResultType&>(result);
     // SINGLEO_LOGD("Landmark: %zd, %zd",
     //             headPose._landmarks[0].x, headPose._landmarks[0].y);
index 7ab6b4c135bbbf7a728bb79c837ff1dd125dfa92..1bd7bc89e344cba715f8bd4253a948dd66e57213 100644 (file)
@@ -19,6 +19,7 @@
 #include "CameraParamManager.h"
 #include "HeadPoseEstimator.h"
 #include "SingleoLog.h"
+#include "SingleoTimer.h"
 
 using namespace std;
 using namespace cv;
@@ -37,16 +38,27 @@ HeadPoseEstimator::HeadPoseEstimator()
     for (auto &point : faceShape)
         _landmarks_3d.push_back(cv::Point3f(point.x, point.y, point.z));
 
+    // 33rd landmark is the nose's tip
+    _pose_axes_3d.push_back(_landmarks_3d[33]);
+    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x - 50, _landmarks_3d[33].y, _landmarks_3d[33].z));
+    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x, _landmarks_3d[33].y - 50, _landmarks_3d[33].z));
+    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x, _landmarks_3d[33].y, _landmarks_3d[33].z -50));
+    _pose_axes.resize(_pose_axes_3d.size());
+
+    _landmarks_2d.resize(_landmarks_3d.size());
+    _rotation_vector = Mat(Size(3,1), CV_32FC1);
+    _translation_vector = Mat(Size(3,1), CV_32FC1);
+
     CameraParamManager cam_dummy("/usr/share/singleo/camera_param_dummy.yaml");
     
-    _camera_param = Mat::zeros(Size(3,3), CV_32FC1);
+    _camera_matrix = Mat::zeros(Size(3,3), CV_32FC1);
     tuple<float, float> focalLength = cam_dummy.getFocalLengthXY();
     tuple<float, float> principlePoint = cam_dummy.getPrinciplePointXY();
-    _camera_param.at<float>(0,0) = get<0>(focalLength);
-    _camera_param.at<float>(1,1) = get<1>(focalLength);
-    _camera_param.at<float>(0,2) = get<0>(principlePoint);
-    _camera_param.at<float>(1,2) = get<1>(principlePoint);
-    _camera_param.at<float>(2,2) = 1.0;
+    _camera_matrix.at<float>(0,0) = get<0>(focalLength);
+    _camera_matrix.at<float>(1,1) = get<1>(focalLength);
+    _camera_matrix.at<float>(0,2) = get<0>(principlePoint);
+    _camera_matrix.at<float>(1,2) = get<1>(principlePoint);
+    _camera_matrix.at<float>(2,2) = 1.0;
 
     _camera_dist_coeff = Mat::zeros(Size(1,5), CV_32FC1);
     tuple<float, float, float, float, float> distCoeff = cam_dummy.getDistortionCoeff();
@@ -56,9 +68,9 @@ HeadPoseEstimator::HeadPoseEstimator()
     _camera_dist_coeff.at<float>(0,3) = get<3>(distCoeff);
     _camera_dist_coeff.at<float>(0,4) = get<4>(distCoeff);
 
-    for (int r = 0; r < _camera_param.rows; ++r) {
-        for (int c = 0; c < _camera_param.cols; ++c) {
-            SINGLEO_LOGI("%.3f", _camera_param.at<float>(r, c));
+    for (int r = 0; r < _camera_matrix.rows; ++r) {
+        for (int c = 0; c < _camera_matrix.cols; ++c) {
+            SINGLEO_LOGI("%.3f", _camera_matrix.at<float>(r, c));
         }
     }
     for (int r = 0; r < _camera_dist_coeff.rows; ++r) {
@@ -73,6 +85,49 @@ HeadPoseEstimator::~HeadPoseEstimator()
 
 }
 
+PoseVector HeadPoseEstimator::estimate(const vector<Point> &landmark2d)
+{
+    Timer timer;
+
+    // TODO: parallel??, c++17 structured binding??
+    for (size_t idx = 0; idx < landmark2d.size(); ++idx) {
+        _landmarks_2d[idx].x = landmark2d[idx].x;
+        _landmarks_2d[idx].y = landmark2d[idx].y;
+    }
+    SINGLEO_LOGI("Get 2d landmark takes %d ms", timer.check_ms());
+
+    timer.reset();
+    // init
+    cv::solvePnPRansac(_landmarks_3d, _landmarks_2d,
+                _camera_matrix, _camera_dist_coeff,
+                _rotation_vector, _translation_vector, false, 100, 8.0F, 0.99, cv::noArray(), cv::SOLVEPNP_EPNP);
+    SINGLEO_LOGI("1st solvePnP takes %d ms", timer.check_ms());
+
+    timer.reset();
+    // estimate pose
+    // https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#ga357634492a94efe8858d0ce1509da869
+    cv::solvePnP(_landmarks_3d, _landmarks_2d,
+            _camera_matrix, _camera_dist_coeff,
+            _rotation_vector, _translation_vector, true, cv::SOLVEPNP_ITERATIVE);
+    SINGLEO_LOGI("2nd solvePnP takes %d ms", timer.check_ms());
+
+    return PoseVector{ Point3f{ _rotation_vector.at<float>(0), _rotation_vector.at<float>(1), _rotation_vector.at<float>(2)},
+                     Point3f{ _translation_vector.at<float>(0), _translation_vector.at<float>(1), _translation_vector.at<float>(2)} };
+}
+
+vector<Point2f> &HeadPoseEstimator::getPoseAxes()
+{
+    std::vector<cv::Point2f> poseAxes2d;
+    cv::projectPoints(_pose_axes_3d, _rotation_vector, _translation_vector, _camera_matrix, _camera_dist_coeff, poseAxes2d);
+    SINGLEO_LOGI("projectPoints %zd", poseAxes2d.size());
+    for (size_t i = 0; i < poseAxes2d.size(); i++) {
+        _pose_axes[i].x = poseAxes2d[i].x;
+        _pose_axes[i].y = poseAxes2d[i].y;
+    }
+
+    return _pose_axes;
+}
+
 } // smartpointer
 } // services
 } // singleo