Add HeadPoseEstimator which estimates the pose from face landmark 46/317846/8
authorTae-Young Chung <ty83.chung@samsung.com>
Thu, 19 Sep 2024 08:43:09 +0000 (17:43 +0900)
committerTae-Young Chung <ty83.chung@samsung.com>
Tue, 1 Oct 2024 08:27:19 +0000 (17:27 +0900)
HeadPoseEstimator consists of FaceShapeModelManager and
CameraParameterManager.
FaceShapeModelManager loads face shape from a file and estimates
actual pose by projecting the face landmarks results on the face shape.
CameraParameterManager loads camera parameters from a file.
Face shape file (pdm.txt) and dummpy camera parameter file are added to
res directory. They are installed when single rpm is installed.

Currently, only the first face's pose is estimated and an image is
tested. Commit working with a camera will be patched.

Note: pdm.txt is from https://github.com/TadasBaltrusaitis/OpenFace/blob/master/lib/local/LandmarkDetector/model/pdms/In-the-wild_aligned_PDM_68.txt. It consists of 68 landmarks in 3D coordinate. The xyz order is xxxxx....yyyyy...zzzz... .

Change-Id: I0b2157d335e13dfbe8914e22e71535f0b368636e
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
21 files changed:
packaging/singleo.spec
services/CMakeLists.txt
services/focus_finder/CMakeLists.txt
services/focus_finder/include/CameraParamManager.h [new file with mode: 0644]
services/focus_finder/include/FaceDetector.h
services/focus_finder/include/FaceLandmarker.h
services/focus_finder/include/FaceShapeModelManager.h [new file with mode: 0644]
services/focus_finder/include/FaceTaskManager.h
services/focus_finder/include/FocusFinderDataTypes.h
services/focus_finder/include/GazeEstimator.h
services/focus_finder/include/HeadPoseEstimator.h [new file with mode: 0644]
services/focus_finder/include/IFaceTaskManager.h
services/focus_finder/res/camera_param_dummy.yaml [new file with mode: 0755]
services/focus_finder/res/pdm.txt [new file with mode: 0644]
services/focus_finder/src/CameraParamManager.cpp [new file with mode: 0644]
services/focus_finder/src/FaceDetector.cpp
services/focus_finder/src/FaceLandmarker.cpp
services/focus_finder/src/FaceShapeModelManager.cpp [new file with mode: 0644]
services/focus_finder/src/FaceTaskManager.cpp
services/focus_finder/src/GazeEstimator.cpp
services/focus_finder/src/HeadPoseEstimator.cpp [new file with mode: 0644]

index 79edf4f9ef53920f591df48db8b7f345199a4043..69db00a7b1cf072cbe27a5fc00ab948ec6efa4bb 100644 (file)
@@ -20,6 +20,7 @@ BuildRequires: pkgconfig(grpc++)
 BuildRequires: pkgconfig(re2)
 
 %define enable_visualizer 0
+%define enable_focusfinder 1
 
 %description
 SingleO AI Service Framework
@@ -59,7 +60,7 @@ MAJORVER=`echo %{version} | awk 'BEGIN {FS="."}{print $1}'`
          -DUSE_EXTERNAL_INFERENCE_SERVICE=OFF \
          -DUSE_INPUT_OPENCV_BACKEND=ON \
          -DUSE_AUTOZOOM_API=ON \
-         -DUSE_FOCUSFINDER_API=ON \
+         -DUSE_FOCUSFINDER_API=%{enable_focusfinder} \
          -DUSE_INPUT_CAMERA_API_BACKEND=ON \
                 -DUSE_VISUALIZER=%{enable_visualizer}
 
@@ -80,6 +81,9 @@ rm -rf %{buildroot}
 %if "%{enable_visualizer}" == "1"
 %{_libdir}/libsingleo_visualizer.so
 %endif
+%if "%{enable_focusfinder}" == "1"
+%{_datadir}/%{name}/*
+%endif
 
 %files devel
 %{_libdir}/pkgconfig/*.pc
index c4ad0f19092551a4ea9b2467d410855ce90fe2d6..d9a165b09b485a92f1b2a418ce31423e60d9ad32 100644 (file)
@@ -23,6 +23,6 @@ ENDIF()
 ADD_LIBRARY(${PROJECT_NAME} SHARED ${SINGLEO_SERVICE_SOURCE_FILES})
 
 TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PRIVATE include common/include ${ROOT_DIRECTORY}/capi/ ${COMMON_HEADER_LIST} ${INPUT_HEADER_LIST} ${INFERENCE_HEADER_LIST} ${SERVICE_HEADER_LIST} ${LOG_HEADER_LIST})
-TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE opencv_core opencv_imgcodecs opencv_highgui opencv_videoio ${INFERENCE_LIBRARY_LIST} ${SERVICE_LIBRARY_LIST} ${LOG_LIBRARY_LIST})
+TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE opencv_core opencv_imgcodecs opencv_highgui opencv_videoio opencv_calib3d ${INFERENCE_LIBRARY_LIST} ${SERVICE_LIBRARY_LIST} ${LOG_LIBRARY_LIST})
 
 INSTALL(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR})
\ No newline at end of file
index 8d15540822416889b5c826b682ec93380e3eae76..a2bb792e79e198288e244871092e0d4d069537a9 100644 (file)
@@ -5,7 +5,14 @@ SET(SINGLEO_SERVICE_SOURCE_FILES
     focus_finder/src/FaceTaskManager.cpp
     focus_finder/src/FaceDetector.cpp
     focus_finder/src/FaceLandmarker.cpp
+    focus_finder/src/CameraParamManager.cpp
+    focus_finder/src/FaceShapeModelManager.cpp
+    focus_finder/src/HeadPoseEstimator.cpp
 )
 
 LIST(APPEND SERVICE_LIBRARY_LIST ${SERVICE_LIBRARY_LIST})
-LIST(APPEND SERVICE_HEADER_LIST ${SERVICE_HEADER_LIST} ${CMAKE_CURRENT_SOURCE_DIR}/focus_finder/include)
\ No newline at end of file
+LIST(APPEND SERVICE_HEADER_LIST ${SERVICE_HEADER_LIST} ${CMAKE_CURRENT_SOURCE_DIR}/focus_finder/include)
+
+SET(SINGLEO_SERVICE_RESOURCE_FILES ${SINGLEO_SERVICE_RESOURCE_FILES} ${CMAKE_CURRENT_SOURCE_DIR}/focus_finder/res/pdm.txt)
+SET(SINGLEO_SERVICE_RESOURCE_FILES ${SINGLEO_SERVICE_RESOURCE_FILES} ${CMAKE_CURRENT_SOURCE_DIR}/focus_finder/res/camera_param_dummy.yaml)
+INSTALL(FILES ${SINGLEO_SERVICE_RESOURCE_FILES} DESTINATION /usr/share/singleo)
\ No newline at end of file
diff --git a/services/focus_finder/include/CameraParamManager.h b/services/focus_finder/include/CameraParamManager.h
new file mode 100644 (file)
index 0000000..dd52317
--- /dev/null
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CAMERA_PARAM_MANAGER_H__
+#define __CAMERA_PARAM_MANAGER_H__
+
+#include <memory>
+#include <vector>
+#include <tuple>
+#include <opencv2/core.hpp>
+
+namespace singleo
+{
+namespace services
+{
+namespace focusfinder
+{
+class CameraParamManager
+{
+private:
+       cv::Mat _camera_param;
+       cv::Mat _dist_coeff;
+
+       void loadParamFromFile(const std::string &file_path);
+
+public:
+       CameraParamManager(const std::string &file_path);
+       ~CameraParamManager() = default;
+       std::tuple<float, float> getFocalLengthXY();
+       std::tuple<float, float> getPrinciplePointXY();
+       std::tuple<float, float, float, float, float> getDistortionCoeff();
+};
+} // focusfinder
+} // services
+} // singleo
+
+#endif
\ No newline at end of file
index 33ed835876d40ab9d60bc7db5d1d45b529ab8142..3d29441c89bb8150f8ba21794442247789075b27 100644 (file)
@@ -30,13 +30,13 @@ namespace focusfinder
 class FaceDetector : public FaceTaskManager
 {
 private:
-    std::unique_ptr<inference::IInferenceTaskInterface> _face_detector;
+       std::unique_ptr<inference::IInferenceTaskInterface> _face_detector;
 
 public:
-    FaceDetector();
-    virtual ~FaceDetector() = default;
+       FaceDetector();
+       virtual ~FaceDetector() = default;
 
-       FaceResulthandle(BaseDataType &input, FaceResult &result) override;
+       FaceResult &handle(BaseDataType &input, FaceResult &result) override;
 };
 
 } // backends
index 1f04022f07ccdf8265a7d9d3ff9f2c99365526ea..08e5a9082fc1b9b90ca51f2558bf1e5f8ab2887c 100644 (file)
@@ -30,13 +30,13 @@ namespace focusfinder
 class FaceLandmarker : public FaceTaskManager
 {
 private:
-    std::unique_ptr<inference::IInferenceTaskInterface> _face_landmarker;
+       std::unique_ptr<inference::IInferenceTaskInterface> _face_landmarker;
 
 public:
-    FaceLandmarker();
-    virtual ~FaceLandmarker() = default;
+       FaceLandmarker();
+       virtual ~FaceLandmarker() = default;
 
-       FaceResulthandle(BaseDataType &input, FaceResult &result) override;
+       FaceResult &handle(BaseDataType &input, FaceResult &result) override;
 };
 
 } // backends
diff --git a/services/focus_finder/include/FaceShapeModelManager.h b/services/focus_finder/include/FaceShapeModelManager.h
new file mode 100644 (file)
index 0000000..6e0acec
--- /dev/null
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __FACE_SHAPE_MODEL_MANAGER_H__
+#define __FACE_SHAPE_MODEL_MANAGER_H__
+
+#include <memory>
+#include <vector>
+#include <map>
+#include <opencv2/core.hpp>
+#include "FocusFinderDataTypes.h"
+namespace singleo
+{
+namespace services
+{
+namespace focusfinder
+{
+class FaceShapeModelManager
+{
+private:
+       std::vector<cv::Point3f> _faceShape;
+
+       void loadModelFromFile(const std::string &model_path);
+       template<typename T> T ToNumber(const std::string &text);
+       std::map<FaceComponents, std::vector<int> > _faceShapeIndices;
+
+public:
+       FaceShapeModelManager(const std::string &model_path);
+       ~FaceShapeModelManager();
+       const std::vector<cv::Point3f> &getFaceShape();
+       const std::vector<int> &getFaceShapeIndices(FaceComponents component);
+};
+} // focusfinder
+} // services
+} // singleo
+
+#endif
\ No newline at end of file
index 3f12925cac53770fba2d59131f6f530a904a681d..8661e5e7b1fe5b0c9ea5b62df4f56629a1763344 100644 (file)
@@ -29,13 +29,14 @@ namespace focusfinder
 class FaceTaskManager : public IFaceTaskManager
 {
 private:
-    std::shared_ptr<IFaceTaskManager> _next;
+       std::shared_ptr<IFaceTaskManager> _next;
+
 public:
-    FaceTaskManager();
-    virtual ~FaceTaskManager();
+       FaceTaskManager();
+       virtual ~FaceTaskManager();
 
        std::shared_ptr<IFaceTaskManager> setNext(std::shared_ptr<IFaceTaskManager> next) override;
-       FaceResulthandle(BaseDataType &input, FaceResult &result) override;
+       FaceResult &handle(BaseDataType &input, FaceResult &result) override;
 };
 
 } // backends
index a3d067b0a9b49b1edbab7ddc22e007ea4898f51f..0003883e1c063da06878c2cc2d65f872350d7583 100644 (file)
@@ -29,10 +29,16 @@ namespace focusfinder
 using Points = std::vector<Point>;
 
 struct FaceResult {
-    std::vector<Rect> _rects;
-    std::vector<Points> _landmarks;
+       std::vector<Rect> _rects;
+       std::vector<Points> _landmarks;
 };
 
+enum class FaceComponents {
+       NOSE_RIDGE,
+       NOSE_BASE,
+       LEFT_EYE_CORNERS,
+       RIGHT_EYE_CORNERS,
+};
 }
 }
 }
index 174ecf57a9c4872129d85f884d0120be1dbe73fc..5fa66ecfb8dd88ce6e343a3e5771271cb7aafa8e 100644 (file)
@@ -20,6 +20,7 @@
 #include <memory>
 #include "FocusFinderDataTypes.h"
 #include "FaceTaskManager.h"
+#include "HeadPoseEstimator.h"
 
 namespace singleo
 {
@@ -30,13 +31,13 @@ namespace focusfinder
 class GazeEstimator
 {
 private:
-    std::shared_ptr<FaceTaskManager> _face_tasker;
+       std::shared_ptr<FaceTaskManager> _face_tasker;
+       std::unique_ptr<HeadPoseEstimator> _head_pose_estimator;
 
 public:
-    GazeEstimator();
-    ~GazeEstimator();
-    void run(BaseDataType &input);
-
+       GazeEstimator();
+       ~GazeEstimator();
+       void run(BaseDataType &input);
 };
 } // focusfinder
 } // services
diff --git a/services/focus_finder/include/HeadPoseEstimator.h b/services/focus_finder/include/HeadPoseEstimator.h
new file mode 100644 (file)
index 0000000..77bf222
--- /dev/null
@@ -0,0 +1,71 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __HEAD_POSE_ESTIMATOR_H__
+#define __HEAD_POSE_ESTIMATOR_H__
+
+#include <memory>
+#include <vector>
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/calib3d.hpp>
+#include "SingleoCommonTypes.h"
+
+namespace singleo
+{
+namespace services
+{
+namespace focusfinder
+{
+class HeadPoseEstimator
+{
+private:
+       std::vector<cv::Point2f> _landmarks_2d;
+       std::vector<cv::Point3f> _landmarks_3d;
+       std::vector<int> _faceComponentIndices;
+
+       cv::Mat _camera_matrix;
+       cv::Mat _camera_dist_coeff;
+
+       cv::Mat _rotation_vector;
+       cv::Mat _translation_vector;
+       cv::Mat _rotation_matrix;
+
+       std::vector<cv::Point3f> _pose_axes_3d;
+       std::vector<cv::Point2f> _pose_axes;
+
+       bool _isInitialized;
+       bool validRotationMatrix(cv::Mat &matrix);
+       cv::Vec3f rotationMatrixToEulerAngles(cv::Mat &matrix);
+       double calculateReProjectectionError();
+       void print3x3Matrix(cv::Mat &matrix, std::string name);
+       void print3x1Matrix(cv::Mat &matrix, std::string name);
+       void printVec3f(cv::Vec3f &vec, std::string name);
+
+public:
+       HeadPoseEstimator(const std::vector<FaceComponents> &components);
+       ~HeadPoseEstimator();
+
+       std::tuple<cv::Point3f, cv::Point3f> estimate(const std::vector<singleo::Point> &landmark2d);
+       std::vector<cv::Point2f> &getPoseAxes();
+       std::vector<int> &getFaceComponentsIndieces();
+};
+} // smartpointer
+} // services
+} // singleo
+
+#endif
index e0fa98529d8773bc8022b078271f0ff9c9dcebef..234fac7ceb831cd48d3088ed103c58ff28bb1b7d 100644 (file)
@@ -17,7 +17,7 @@
 #ifndef __INTERFACE_FACE_TASK_MANAGER_H__
 #define __INTERFACE_FACE_TASK_MANAGER_H__
 
-#include <memory> 
+#include <memory>
 #include "FocusFinderDataTypes.h"
 
 namespace singleo
@@ -30,7 +30,7 @@ class IFaceTaskManager
 {
 public:
        virtual std::shared_ptr<IFaceTaskManager> setNext(std::shared_ptr<IFaceTaskManager> next) = 0;
-       virtual FaceResulthandle(BaseDataType &input, FaceResult &result) = 0;
+       virtual FaceResult &handle(BaseDataType &input, FaceResult &result) = 0;
 };
 
 } // focusfinder
diff --git a/services/focus_finder/res/camera_param_dummy.yaml b/services/focus_finder/res/camera_param_dummy.yaml
new file mode 100755 (executable)
index 0000000..80ae3bf
--- /dev/null
@@ -0,0 +1,13 @@
+%YAML:1.0
+camera_matrix: !!opencv-matrix
+  rows: 3
+  cols: 3
+  dt: f
+  data: [324.0, 0.0, 324.0,
+        0.0, 324.0, 216.0,
+        0.0, 0.0, 1.0]
+dist_coeff: !!opencv-matrix
+  rows: 5
+  cols: 1
+  dt: f
+  data: [0.0, 0.0, 0.0, 0.0, 0.0]
\ No newline at end of file
diff --git a/services/focus_finder/res/pdm.txt b/services/focus_finder/res/pdm.txt
new file mode 100644 (file)
index 0000000..f415390
--- /dev/null
@@ -0,0 +1,205 @@
+68
+-73.393523 
+-72.775014 
+-70.533638 
+-66.850058 
+-59.790187 
+-48.368973 
+-34.121101 
+-17.875411 
+0.098749 
+17.477031 
+32.648966 
+46.372358 
+57.343480 
+64.388482 
+68.212038 
+70.486405 
+71.375822 
+-61.119406 
+-51.287588 
+-37.804800 
+-24.022754 
+-11.635713 
+12.056636 
+25.106256 
+38.338588 
+51.191007 
+60.053851 
+0.653940 
+0.804809 
+0.992204 
+1.226783 
+-14.772472 
+-7.180239 
+0.555920 
+8.272499 
+15.214351 
+-46.047290 
+-37.674688 
+-27.883856 
+-19.648268 
+-28.272965 
+-38.082418 
+19.265868 
+27.894191 
+37.437529 
+45.170805 
+38.196454 
+28.764989 
+-28.916267 
+-17.533194 
+-6.684590 
+0.381001 
+8.375443 
+18.876618 
+28.794412 
+19.057574 
+8.956375 
+0.381549 
+-7.428895 
+-18.160634 
+-24.377490 
+-6.897633 
+0.340663 
+8.444722 
+24.474473 
+8.449166 
+0.205322 
+-7.198266 
+-29.801432 
+-10.949766 
+7.929818 
+26.074280 
+42.564390 
+56.481080 
+67.246992 
+75.056892 
+77.061286 
+74.758448 
+66.929021 
+56.311389 
+42.419126 
+25.455880 
+6.990805 
+-11.666193 
+-30.365191 
+-49.361602 
+-58.769795 
+-61.996155 
+-61.033399 
+-56.686759 
+-57.391033 
+-61.902186 
+-62.777713 
+-59.302347 
+-50.190255 
+-42.193790 
+-30.993721 
+-19.944596 
+-8.414541 
+2.598255 
+4.751589 
+6.562900 
+4.661005 
+2.643046 
+-37.471411 
+-42.730510 
+-42.711517 
+-36.754742 
+-35.134493 
+-34.919043 
+-37.032306 
+-43.342445 
+-43.110822 
+-38.086515 
+-35.532024 
+-35.484289 
+28.612716 
+22.172187 
+19.029051 
+20.721118 
+19.035460 
+22.394109 
+28.079924 
+36.298248 
+39.634575 
+40.395647 
+39.836405 
+36.677899 
+28.677771 
+25.475976 
+26.014269 
+25.326198 
+28.323008 
+30.596216 
+31.408738 
+30.844876 
+47.667532 
+45.909403 
+44.842580 
+43.141114 
+38.635298 
+30.750622 
+18.456453 
+3.609035 
+-0.881698 
+5.181201 
+19.176563 
+30.770570 
+37.628629 
+40.886309 
+42.281449 
+44.142567 
+47.140426 
+14.254422 
+7.268147 
+0.442051 
+-6.606501 
+-11.967398 
+-12.051204 
+-7.315098 
+-1.022953 
+5.349435 
+11.615746 
+-13.380835 
+-21.150853 
+-29.284036 
+-36.948060 
+-20.132003 
+-23.536684 
+-25.944448 
+-23.695741 
+-20.858157 
+7.037989 
+3.021217 
+1.353629 
+-0.111088 
+-0.147273 
+1.476612 
+-0.665746 
+0.247660 
+1.696435 
+4.894163 
+0.282961 
+-1.172675 
+-2.240310 
+-15.934335 
+-22.611355 
+-23.748437 
+-22.721995 
+-15.610679 
+-3.217393 
+-14.987997 
+-22.554245 
+-23.591626 
+-22.406106 
+-15.121907 
+-4.785684 
+-20.893742 
+-22.220479 
+-21.025520 
+-5.712776 
+-20.671489 
+-21.903670 
+-20.328022 
diff --git a/services/focus_finder/src/CameraParamManager.cpp b/services/focus_finder/src/CameraParamManager.cpp
new file mode 100644 (file)
index 0000000..93c486b
--- /dev/null
@@ -0,0 +1,69 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <fstream>
+#include <sstream>
+#include <iostream>
+#include "CameraParamManager.h"
+#include "SingleoLog.h"
+
+using namespace std;
+using namespace cv;
+
+namespace singleo
+{
+namespace services
+{
+namespace focusfinder
+{
+CameraParamManager::CameraParamManager(const string &file_path)
+{
+       if (file_path.empty())
+               throw invalid_argument("Invalid camera param path path");
+
+       loadParamFromFile(file_path);
+}
+
+void CameraParamManager::loadParamFromFile(const string &file_path)
+{
+       FileStorage file(file_path, FileStorage::READ);
+       if (!file.isOpened())
+               throw runtime_error("Fail to open camera param file");
+
+       file["camera_matrix"] >> _camera_param;
+       file["dist_coeff"] >> _dist_coeff;
+}
+
+tuple<float, float> CameraParamManager::getFocalLengthXY()
+{
+       return tuple<float, float>(_camera_param.at<float>(0, 0), _camera_param.at<float>(1, 1));
+}
+
+tuple<float, float> CameraParamManager::getPrinciplePointXY()
+{
+       return tuple<float, float>(_camera_param.at<float>(0, 2), _camera_param.at<float>(1, 2));
+}
+
+tuple<float, float, float, float, float> CameraParamManager::getDistortionCoeff()
+{
+       return tuple<float, float, float, float, float>(_dist_coeff.at<float>(0, 0), _dist_coeff.at<float>(0, 1),
+                                                                                                       _dist_coeff.at<float>(0, 2), _dist_coeff.at<float>(0, 3),
+                                                                                                       _dist_coeff.at<float>(0, 3));
+}
+
+} // focusfinder
+} // services
+} // singleo
index a79a8f31d3e4370c4cdf5f522119997139b4b3f9..c974f2d074d3169045610e79be69e6acbf9e30de 100644 (file)
@@ -29,22 +29,22 @@ namespace focusfinder
 {
 FaceDetector::FaceDetector()
 {
-    _face_detector = inference::InferenceTaskFactory::instance().create("MvInferenceTaskFactory")->createFaceDetection();
+       _face_detector =
+                       inference::InferenceTaskFactory::instance().create("MvInferenceTaskFactory")->createFaceDetection();
 
-    _face_detector->configure();
-    _face_detector->prepare();
+       _face_detector->configure();
+       _face_detector->prepare();
 }
 
-
-FaceResult& FaceDetector::handle(BaseDataType &input, FaceResult &result)
+FaceResult &FaceDetector::handle(BaseDataType &input, FaceResult &result)
 {
-    _face_detector->invoke(input, false);
-    auto face_result = dynamic_cast<FdResultType&>(_face_detector->result());
-    result._rects = face_result._rects;
+       _face_detector->invoke(input, false);
+       auto face_result = dynamic_cast<FdResultType &>(_face_detector->result());
+       result._rects = face_result._rects;
 
-    result = FaceTaskManager::handle(input, result);
+       result = FaceTaskManager::handle(input, result);
 
-    return result;
+       return result;
 }
 
 }
index c15c36c7f1d190e724cf4f07c1bad1732f5e7a8f..ab7c1138799b84e741667d2de0281c5ef45c8bff 100644 (file)
@@ -30,30 +30,32 @@ namespace focusfinder
 {
 FaceLandmarker::FaceLandmarker()
 {
-    _face_landmarker = inference::InferenceTaskFactory::instance().create("MvInferenceTaskFactory")->createFaceLandmarkDetection();
+       _face_landmarker =
+                       inference::InferenceTaskFactory::instance().create("MvInferenceTaskFactory")->createFaceLandmarkDetection();
 
-    _face_landmarker->configure();
-    _face_landmarker->prepare();
+       _face_landmarker->configure();
+       _face_landmarker->prepare();
 }
 
-
-FaceResult& FaceLandmarker::handle(BaseDataType &input, FaceResult &result)
+FaceResult &FaceLandmarker::handle(BaseDataType &input, FaceResult &result)
 {
-    if (!result._rects.empty()) {
-        for (auto &rect : result._rects) {
-            auto image = dynamic_cast<ImageDataType&>(input);
-            cv::Mat cv_data(cv::Size(image.width, image.height), CV_MAKE_TYPE(CV_8U, image.byte_per_pixel), image.ptr);
-            cv::Mat roi_cv_Data = cv_data(cv::Rect(rect.left, rect.top, (rect.right - rect.left), (rect.bottom - rect.top))).clone();
-            ImageDataType roi_image(roi_cv_Data.data, roi_cv_Data.cols, roi_cv_Data.rows, roi_cv_Data.channels(), ImagePixelFormat::RGB888);
-
-            _face_landmarker->invoke(roi_image, false);
-            auto landmark_result = dynamic_cast<FldResultType&>(_face_landmarker->result());
-            result._landmarks.push_back(landmark_result._points);
-        }
-    }
-    result = FaceTaskManager::handle(input, result);
-
-    return result;
+       if (!result._rects.empty()) {
+               for (auto &rect : result._rects) {
+                       auto image = dynamic_cast<ImageDataType &>(input);
+                       cv::Mat cvData(cv::Size(image.width, image.height), CV_MAKE_TYPE(CV_8U, image.byte_per_pixel), image.ptr);
+                       cv::Mat roiCvData =
+                                       cvData(cv::Rect(rect.left, rect.top, (rect.right - rect.left), (rect.bottom - rect.top))).clone();
+                       ImageDataType roiImage(roiCvData.data, roiCvData.cols, roiCvData.rows, roiCvData.channels(),
+                                                                  ImagePixelFormat::RGB888);
+
+                       _face_landmarker->invoke(roiImage, false);
+                       auto landmark_result = dynamic_cast<FldResultType &>(_face_landmarker->result());
+                       result._landmarks.push_back(landmark_result._points);
+               }
+       }
+       result = FaceTaskManager::handle(input, result);
+
+       return result;
 }
 
 }
diff --git a/services/focus_finder/src/FaceShapeModelManager.cpp b/services/focus_finder/src/FaceShapeModelManager.cpp
new file mode 100644 (file)
index 0000000..b9cdf2b
--- /dev/null
@@ -0,0 +1,101 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <fstream>
+#include <sstream>
+#include <iostream>
+#include "FaceShapeModelManager.h"
+#include "SingleoLog.h"
+
+using namespace std;
+using namespace cv;
+
+namespace singleo
+{
+namespace services
+{
+namespace focusfinder
+{
+FaceShapeModelManager::FaceShapeModelManager(const string &model_path)
+{
+       if (model_path.empty())
+               throw invalid_argument("Invalid face shape model path");
+
+       loadModelFromFile(model_path);
+       _faceShapeIndices.insert(
+                       make_pair<FaceComponents, vector<int> >(FaceComponents::NOSE_RIDGE, vector<int> { 27, 28, 29, 30 }));
+       _faceShapeIndices.insert(
+                       make_pair<FaceComponents, vector<int> >(FaceComponents::NOSE_BASE, vector<int> { 31, 32, 33, 34, 35 }));
+       _faceShapeIndices.insert(
+                       make_pair<FaceComponents, vector<int> >(FaceComponents::LEFT_EYE_CORNERS, vector<int> { 36, 39 }));
+       _faceShapeIndices.insert(
+                       make_pair<FaceComponents, vector<int> >(FaceComponents::RIGHT_EYE_CORNERS, vector<int> { 42, 45 }));
+}
+
+FaceShapeModelManager::~FaceShapeModelManager()
+{}
+
+template<typename T> T FaceShapeModelManager::ToNumber(const string &text)
+{
+       stringstream ss(text);
+       T number;
+       ss >> number;
+       return number;
+}
+
+void FaceShapeModelManager::loadModelFromFile(const string &model_path)
+{
+       fstream file(model_path, ios::in);
+       if (!file.is_open())
+               throw runtime_error("Fail to open face shape model file");
+
+       string line;
+       getline(file, line);
+
+       _faceShape.resize(ToNumber<int>(line));
+       for (auto &point : _faceShape) {
+               getline(file, line);
+               point.x = -1.0f * ToNumber<float>(line);
+       }
+
+       for (auto &point : _faceShape) {
+               getline(file, line);
+               point.y = -1.0f * ToNumber<float>(line);
+       }
+
+       for (auto &point : _faceShape) {
+               getline(file, line);
+               point.z = ToNumber<float>(line);
+       }
+
+       SINGLEO_LOGI("%zd landarks", _faceShape.size());
+}
+
+const vector<Point3f> &FaceShapeModelManager::getFaceShape()
+{
+       return _faceShape;
+}
+
+const vector<int> &FaceShapeModelManager::getFaceShapeIndices(FaceComponents component)
+{
+       auto indices = _faceShapeIndices.find(component);
+       return indices->second;
+}
+
+template float FaceShapeModelManager::ToNumber(const string &text);
+} // focusfinder
+} // services
+} // singleo
index caa63e3f1fa879e867dba3572dc7624979287df9..a1e31f75ab0db58dd9e682a6552c0edc8fed0856 100644 (file)
@@ -26,27 +26,23 @@ namespace services
 namespace focusfinder
 {
 FaceTaskManager::FaceTaskManager() : _next(nullptr)
-{
-
-}
+{}
 
 FaceTaskManager::~FaceTaskManager()
-{
-
-}
+{}
 
 shared_ptr<IFaceTaskManager> FaceTaskManager::setNext(shared_ptr<IFaceTaskManager> next)
 {
-    _next = next;
-    return next;
+       _next = next;
+       return next;
 }
 
-FaceResultFaceTaskManager::handle(BaseDataType &input, FaceResult &result)
+FaceResult &FaceTaskManager::handle(BaseDataType &input, FaceResult &result)
 {
-    if (_next)
-        return _next->handle(input, result);
+       if (_next)
+               return _next->handle(input, result);
 
-    return result;
+       return result;
 }
 } // focusfinder
 } // services
index 62c739f17b9783140919f4ccb25272a8daf9fac9..f9515eaa91f6454daa9c43a557910cdbb8ac15d5 100644 (file)
@@ -30,20 +30,31 @@ namespace focusfinder
 {
 GazeEstimator::GazeEstimator()
 {
-    _face_tasker = make_shared<FaceDetector>();
-    _face_tasker->setNext(make_shared<FaceLandmarker>());
+       _face_tasker = make_shared<FaceDetector>();
+       _face_tasker->setNext(make_shared<FaceLandmarker>());
+
+       const vector<FaceComponents> _faceComponents { FaceComponents::NOSE_RIDGE, FaceComponents::NOSE_BASE,
+                                                                                                  FaceComponents::LEFT_EYE_CORNERS,
+                                                                                                  FaceComponents::RIGHT_EYE_CORNERS };
+       _head_pose_estimator = make_unique<HeadPoseEstimator>(_faceComponents);
 }
 
 GazeEstimator::~GazeEstimator()
-{
-
-}
+{}
 
 void GazeEstimator::run(BaseDataType &input)
 {
-    FaceResult result;
-     _face_tasker->handle(input, result);
-     SINGLEO_LOGI("FaceResult: faces %zd, landmarks %zd-%zd", result._rects.size(), result._landmarks.size(), result._landmarks[0].size());
+       FaceResult result;
+       _face_tasker->handle(input, result);
+
+       SINGLEO_LOGI("FaceResult: faces %zu, landmarks %zu-%zu", result._rects.size(), result._landmarks.size(),
+                                result._landmarks[0].size());
+
+       // TODO: use multiple faces
+       auto head_pose = _head_pose_estimator->estimate(result._landmarks[0]);
+       SINGLEO_LOGI("head pose: \n\t rotation(x,y,z): %.3f, %.3f, %.3f\n\t translation(x,y,z): %.3f, %.3f, %.3f\n",
+                                get<0>(head_pose).x, get<0>(head_pose).y, get<0>(head_pose).z, get<1>(head_pose).x,
+                                get<1>(head_pose).y, get<1>(head_pose).z);
 }
 
 } // smartpointer
diff --git a/services/focus_finder/src/HeadPoseEstimator.cpp b/services/focus_finder/src/HeadPoseEstimator.cpp
new file mode 100644 (file)
index 0000000..508ade9
--- /dev/null
@@ -0,0 +1,209 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <tuple>
+#include "FaceShapeModelManager.h"
+#include "CameraParamManager.h"
+#include "HeadPoseEstimator.h"
+#include "SingleoLog.h"
+
+#include <iostream>
+
+using namespace std;
+using namespace cv;
+
+namespace singleo
+{
+namespace services
+{
+namespace focusfinder
+{
+HeadPoseEstimator::HeadPoseEstimator(const vector<FaceComponents> &components)
+{
+       FaceShapeModelManager faceShapeModelmgr("/usr/share/singleo/pdm.txt");
+       const vector<Point3f> faceShape = faceShapeModelmgr.getFaceShape();
+
+       for (auto &component : components) {
+               for (auto &index : faceShapeModelmgr.getFaceShapeIndices(component)) {
+                       _landmarks_3d.push_back(Point3f(faceShape[index].x, faceShape[index].y, faceShape[index].z));
+                       _faceComponentIndices.push_back(index);
+               }
+       }
+
+       // Nose base landmarks are 31, 32, 33, 34, 35 and 33 is the nose tip
+       int noseTipIndex = faceShapeModelmgr.getFaceShapeIndices(FaceComponents::NOSE_BASE)[2];
+       _pose_axes_3d.push_back(Point3f(faceShape[noseTipIndex].x, faceShape[noseTipIndex].y, faceShape[noseTipIndex].z));
+       _pose_axes_3d.push_back(
+                       Point3f(faceShape[noseTipIndex].x - 50, faceShape[noseTipIndex].y, faceShape[noseTipIndex].z));
+       _pose_axes_3d.push_back(
+                       Point3f(faceShape[noseTipIndex].x, faceShape[noseTipIndex].y - 50, faceShape[noseTipIndex].z));
+       _pose_axes_3d.push_back(
+                       Point3f(faceShape[noseTipIndex].x, faceShape[noseTipIndex].y, faceShape[noseTipIndex].z - 50));
+       _pose_axes.resize(_pose_axes_3d.size());
+
+       _landmarks_2d.resize(_landmarks_3d.size());
+       _rotation_vector = Mat(Size(3, 1), CV_32FC1);
+       _translation_vector = Mat(Size(3, 1), CV_32FC1);
+
+       CameraParamManager cam_dummy("/usr/share/singleo/camera_param_dummy.yaml");
+
+       _camera_matrix = Mat::zeros(Size(3, 3), CV_32FC1);
+       tuple<float, float> focalLength = cam_dummy.getFocalLengthXY();
+       tuple<float, float> principlePoint = cam_dummy.getPrinciplePointXY();
+       _camera_matrix.at<float>(0, 0) = get<0>(focalLength);
+       _camera_matrix.at<float>(1, 1) = get<1>(focalLength);
+       _camera_matrix.at<float>(0, 2) = get<0>(principlePoint);
+       _camera_matrix.at<float>(1, 2) = get<1>(principlePoint);
+       _camera_matrix.at<float>(2, 2) = 1.0;
+
+       _camera_dist_coeff = Mat::zeros(Size(1, 5), CV_32FC1);
+       tuple<float, float, float, float, float> distCoeff = cam_dummy.getDistortionCoeff();
+       _camera_dist_coeff.at<float>(0, 0) = get<0>(distCoeff);
+       _camera_dist_coeff.at<float>(0, 1) = get<1>(distCoeff);
+       _camera_dist_coeff.at<float>(0, 2) = get<2>(distCoeff);
+       _camera_dist_coeff.at<float>(0, 3) = get<3>(distCoeff);
+       _camera_dist_coeff.at<float>(0, 4) = get<4>(distCoeff);
+
+       for (int r = 0; r < _camera_matrix.rows; ++r) {
+               for (int c = 0; c < _camera_matrix.cols; ++c) {
+                       SINGLEO_LOGI("%.3f", _camera_matrix.at<float>(r, c));
+               }
+       }
+       for (int r = 0; r < _camera_dist_coeff.rows; ++r) {
+               for (int c = 0; c < _camera_dist_coeff.cols; ++c) {
+                       SINGLEO_LOGI("%.3f", _camera_dist_coeff.at<float>(r, c));
+               }
+       }
+}
+
+HeadPoseEstimator::~HeadPoseEstimator()
+{}
+
+void HeadPoseEstimator::print3x3Matrix(Mat &matrix, string name)
+{
+       SINGLEO_LOGI("Print %s matrix", name.c_str());
+       SINGLEO_LOGI("%d x %d", matrix.cols, matrix.rows);
+       for (int r = 0; r < matrix.rows; ++r) {
+               SINGLEO_LOGD("%f, %f, %f", matrix.at<float>(r, 0), matrix.at<float>(r, 1), matrix.at<float>(r, 2));
+       }
+}
+
+void HeadPoseEstimator::print3x1Matrix(Mat &matrix, string name)
+{
+       SINGLEO_LOGI("Print %s matrix", name.c_str());
+       SINGLEO_LOGI("%d x %d", matrix.cols, matrix.rows);
+       for (int r = 0; r < matrix.rows; ++r) {
+               SINGLEO_LOGD("%f", matrix.at<double>(r, 0));
+       }
+}
+
+void HeadPoseEstimator::printVec3f(Vec3f &vec, string name)
+{
+       SINGLEO_LOGI("Print %s vector", name.c_str());
+       SINGLEO_LOGD("%.3f, %.3f, %.3f", vec[0], vec[1], vec[2]);
+}
+
+tuple<Point3f, Point3f> HeadPoseEstimator::estimate(const vector<Point> &landmark2d)
+{
+       // TODO: parallel??, c++17 structured binding??
+       for (size_t idx = 0; idx < landmark2d.size(); ++idx) {
+               _landmarks_2d[idx].x = landmark2d[idx].x;
+               _landmarks_2d[idx].y = landmark2d[idx].y;
+       }
+
+       solvePnPRansac(_landmarks_3d, _landmarks_2d, _camera_matrix, _camera_dist_coeff, _rotation_vector,
+                                  _translation_vector, false, 100, 3.0F, 0.99, noArray(), SOLVEPNP_EPNP);
+
+       // estimate pose
+       // https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#ga357634492a94efe8858d0ce1509da869
+       solvePnP(_landmarks_3d, _landmarks_2d, _camera_matrix, _camera_dist_coeff, _rotation_vector, _translation_vector,
+                        true, SOLVEPNP_ITERATIVE);
+
+       Rodrigues(_rotation_vector, _rotation_matrix);
+       auto rotation_eulerAngles = rotationMatrixToEulerAngles(_rotation_matrix);
+
+       return make_tuple(Point3f { rotation_eulerAngles[0], rotation_eulerAngles[1], rotation_eulerAngles[2] },
+                                         Point3f { static_cast<float>(_translation_vector.at<double>(0)),
+                                                               static_cast<float>(_translation_vector.at<double>(1)),
+                                                               static_cast<float>(_translation_vector.at<double>(2)) });
+}
+
+vector<Point2f> &HeadPoseEstimator::getPoseAxes()
+{
+       vector<Point2f> poseAxes2d;
+       projectPoints(_pose_axes_3d, _rotation_vector, _translation_vector, _camera_matrix, _camera_dist_coeff, poseAxes2d);
+
+       for (size_t i = 0; i < poseAxes2d.size(); i++) {
+               _pose_axes[i].x = poseAxes2d[i].x;
+               _pose_axes[i].y = poseAxes2d[i].y;
+       }
+
+       return _pose_axes;
+}
+
+double HeadPoseEstimator::calculateReProjectectionError()
+{
+       vector<Point2f> landmarks_2d;
+       projectPoints(_landmarks_3d, _rotation_vector, _translation_vector, _camera_matrix, _camera_dist_coeff,
+                                 landmarks_2d);
+       double error = 0.0f;
+
+       for (size_t i = 0; i < _landmarks_2d.size(); i++) {
+               double diff = norm(Mat(_landmarks_2d[i]), Mat(landmarks_2d[i]), NORM_L2);
+               error += diff * diff;
+       }
+
+       return sqrt(error / _landmarks_2d.size());
+}
+
+vector<int> &HeadPoseEstimator::getFaceComponentsIndieces()
+{
+       return _faceComponentIndices;
+}
+
+bool HeadPoseEstimator::validRotationMatrix(Mat &matrix)
+{
+       Mat matrixT;
+       transpose(matrix, matrixT);
+       Mat shouldBeIdentity = matrixT * matrix;
+       Mat I = Mat::eye(3, 3, shouldBeIdentity.type());
+
+       return norm(I, shouldBeIdentity) < 1e-6;
+}
+
+Vec3f HeadPoseEstimator::rotationMatrixToEulerAngles(Mat &matrix)
+{
+       float sy =
+                       sqrt(matrix.at<double>(0, 0) * matrix.at<double>(0, 0) + matrix.at<double>(1, 0) * matrix.at<double>(1, 0));
+
+       bool singular = sy < 1e-6; // If
+
+       float x, y, z;
+       if (!singular) {
+               x = atan2(matrix.at<double>(2, 1), matrix.at<double>(2, 2));
+               y = atan2(-matrix.at<double>(2, 0), sy);
+               z = atan2(matrix.at<double>(1, 0), matrix.at<double>(0, 0));
+       } else {
+               x = atan2(-matrix.at<double>(1, 2), matrix.at<double>(1, 1));
+               y = atan2(-matrix.at<double>(2, 0), sy);
+               z = 0;
+       }
+       return Vec3f(x, y, z);
+}
+
+} // focusfinder
+} // services
+} // singleo