Integrate gaze model to estimate gaze vector

author Tae-Young Chung <ty83.chung@samsung.com>

Mon, 7 Oct 2024 08:25:33 +0000 (17:25 +0900)

committer Tae-Young Chung <ty83.chung@samsung.com>

Mon, 7 Oct 2024 08:25:33 +0000 (17:25 +0900)
author Tae-Young Chung <ty83.chung@samsung.com>
Mon, 7 Oct 2024 08:25:33 +0000 (17:25 +0900)
committer Tae-Young Chung <ty83.chung@samsung.com>
Mon, 7 Oct 2024 08:25:33 +0000 (17:25 +0900)
diff --git a/common/include/SingleoCommonTypes.h b/common/include/SingleoCommonTypes.h

index 268b2caad2a978aa6690623f32ce74fe5d31087b..1b3400c9757487f265de5249441a1e56f1f3f73f 100644 (file)
--- a/common/include/SingleoCommonTypes.h
+++ b/common/include/SingleoCommonTypes.h
@@ -148,7 +148,7 @@ struct RawDataType : public BaseDataType {
         }
  };
  
-enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK, IMAGE_CLASSIFICATION, FACE_RECOGNITION };
+enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK, IMAGE_CLASSIFICATION, FACE_RECOGNITION, GAZE_ESTIMATION };
  
  struct BaseResultType {
         ResultType _type { ResultType::NONE };
@@ -218,6 +218,20 @@ struct FrResultType : public BaseResultType {
         }
  };
  
+struct GazeResultType : public BaseResultType {
+       GazeResultType() : BaseResultType(ResultType::GAZE_ESTIMATION)
+       {}
+
+       double _pitch;
+       double _yaw;
+       std::vector<Point> _points; // 0: center, 1: gazed
+
+       std::shared_ptr<BaseResultType> clone() override
+       {
+               return std::make_shared<GazeResultType>(*this);
+       }
+};
+
  enum class ServiceType { NONE, AUTO_ZOOM, FOCUS_FINDER };
  
  enum class InputFeedType { NONE, CAMERA, SCREEN_CAPTURE };
diff --git a/inference/backends/mediavision/CMakeLists.txt b/inference/backends/mediavision/CMakeLists.txt

index 380115cb5d1147715c2bd64a727054c6a576a6b0..d11fb1682d41fd76e943e4393b20a652ce1b8049 100644 (file)
--- a/inference/backends/mediavision/CMakeLists.txt
+++ b/inference/backends/mediavision/CMakeLists.txt
@@ -14,8 +14,9 @@ SET(SINGLEO_SERVICE_SOURCE_FILES
      ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvObjectDetection.cpp
      ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvImageClassification.cpp
      ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceRecognition.cpp
+    ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvGazeEstimation.cpp
         ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvInferenceTaskFactory.cpp
  )
  
-LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection mv_image_classification mv_face_recognition)
+LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection mv_image_classification mv_face_recognition mv_gaze_estimation)
  LIST(APPEND INFERENCE_HEADER_LIST ${INFERENCE_HEADER_LIST} ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/include /usr/include/media)
diff --git a/inference/backends/mediavision/include/MvGazeEstimation.h b/inference/backends/mediavision/include/MvGazeEstimation.h

new file mode 100644 (file)

index 0000000..ad23305
--- /dev/null
+++ b/inference/backends/mediavision/include/MvGazeEstimation.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MV_GAZE_ESTIMATION_H__
+#define __MV_GAZE_ESTIMATION_H__
+
+#include "IInferenceTaskInterface.h"
+#include "mv_gaze_estimation_internal.h"
+#include "SingleoCommonTypes.h"
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+class MvGazeEstimation : public IInferenceTaskInterface
+{
+private:
+       mv_gaze_estimation_h _handle {};
+       GazeResultType _output_data {};
+
+public:
+       MvGazeEstimation();
+       virtual ~MvGazeEstimation();
+
+       void configure() override;
+       void prepare() override;
+       void invoke(BaseDataType &input, bool async) override;
+       BaseResultType &result() override;
+};
+
+} // backends
+} // inference
+} // singleo
+
+#endif
diff --git a/inference/backends/mediavision/include/MvInferenceTaskFactory.h b/inference/backends/mediavision/include/MvInferenceTaskFactory.h

index f8023c1c2b2f4891b4f5a36abc389b83ba8f6a27..ae7841b40be9c9c7965d7f6ced43b0de2107ca0d 100644 (file)
--- a/inference/backends/mediavision/include/MvInferenceTaskFactory.h
+++ b/inference/backends/mediavision/include/MvInferenceTaskFactory.h
@@ -39,6 +39,7 @@ public:
         std::unique_ptr<IInferenceTaskInterface> createFaceRecognition() override;
         std::unique_ptr<IInferenceTaskInterface> createHandDetection() override;
         std::unique_ptr<IInferenceTaskInterface> createHandLandmark() override;
+       std::unique_ptr<IInferenceTaskInterface> createGazeEstimation() override;
  };
  
  }
diff --git a/inference/backends/mediavision/src/MvFaceLandmark.cpp b/inference/backends/mediavision/src/MvFaceLandmark.cpp

index d09c5b6460ed905b92e398e42046e6e01c1452a0..74b0db44782481f5bb3065d84943c6561e96a3ce 100644 (file)
--- a/inference/backends/mediavision/src/MvFaceLandmark.cpp
+++ b/inference/backends/mediavision/src/MvFaceLandmark.cpp
@@ -46,7 +46,12 @@ MvFaceLandmark::~MvFaceLandmark()
  
  void MvFaceLandmark::configure()
  {
-       int ret = mv_facial_landmark_configure(_handle);
+       int ret = mv_facial_landmark_set_model(_handle, "fld_u2net_160x160.tflite", "fld_u2net_160x160.json",
+                                                                                  "", "FLD_U2NET");
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to set face landmark model.");
+
+       ret = mv_facial_landmark_configure(_handle);
         if (ret != MEDIA_VISION_ERROR_NONE)
                 throw runtime_error("Fail to configure face landmark detection.");
  }
diff --git a/inference/backends/mediavision/src/MvGazeEstimation.cpp b/inference/backends/mediavision/src/MvGazeEstimation.cpp

new file mode 100644 (file)

index 0000000..f21caa6
--- /dev/null
+++ b/inference/backends/mediavision/src/MvGazeEstimation.cpp
@@ -0,0 +1,122 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdexcept>
+#include "MvGazeEstimation.h"
+#include "SingleoLog.h"
+
+using namespace std;
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+MvGazeEstimation::MvGazeEstimation()
+{
+       int ret = mv_gaze_estimation_create(&_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to create gaze estimation handle.");
+}
+
+MvGazeEstimation::~MvGazeEstimation()
+{
+       try {
+               int ret = mv_gaze_estimation_destroy(_handle);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       SINGLEO_LOGE("Fail to destroy gaze estimation handle.(%d)", ret);
+       } catch (const std::runtime_error &e) {
+               SINGLEO_LOGE("Failed to destroy gaze estimation handle: %s", e.what());
+       }
+}
+
+void MvGazeEstimation::configure()
+{
+       int ret = mv_gaze_estimation_configure(_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to configure gaze estimation.");
+}
+
+void MvGazeEstimation::prepare()
+{
+       int ret = mv_gaze_estimation_prepare(_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to prepare gaze estimation.");
+}
+
+void MvGazeEstimation::invoke(BaseDataType &input, bool async)
+{
+       ImageDataType &data = dynamic_cast<ImageDataType &>(input);
+
+       if (data._data_type != DataType::IMAGE) {
+               SINGLEO_LOGE("Invalid input type.");
+               throw invalid_argument("Input type not support.");
+       }
+
+       mv_source_h mv_src;
+
+       int ret = mv_create_source(&mv_src);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to create mv source.");
+
+       try {
+               ret = mv_source_fill_by_buffer(mv_src, data.ptr, data.width * data.height * data.byte_per_pixel, data.width,
+                                                                          data.height, MEDIA_VISION_COLORSPACE_RGB888);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to convert to mv source.");
+
+               ret = mv_gaze_estimation_inference(_handle, mv_src);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to invoke gaze estimation.");
+       } catch (std::runtime_error &e) {
+               SINGLEO_LOGE("%s", e.what());
+       }
+
+       ret = mv_destroy_source(mv_src);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to destroy mv source.");
+}
+
+BaseResultType &MvGazeEstimation::result()
+{
+       unsigned long frame_number;
+       unsigned int result_cnt;
+
+       int ret = mv_gaze_estimation_get_result_count(_handle, &frame_number, &result_cnt);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to get gaze estimaton result count.");
+
+       _output_data._frame_number = frame_number;
+       _output_data._is_empty = result_cnt == 0;
+
+       if (!_output_data._is_empty) {
+               double pitch, yaw;
+
+               ret = mv_gaze_estimation_get_direction(_handle, &pitch, &yaw);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to get gaze direction.");
+
+               _output_data._pitch = pitch;
+               _output_data._yaw = yaw;
+       }
+
+       return _output_data;
+}
+
+}
+}
+}
diff --git a/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp b/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp

index 5485fbf3ec0ff666dc6503a05360890c0937ef39..f95cf01bb991fa020124ff732c4b839d4dcf94db 100644 (file)
--- a/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp
+++ b/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp
@@ -23,6 +23,7 @@
  #include "MvImageClassification.h"
  #include "MvHandDetection.h"
  #include "MvHandLandmark.h"
+#include "MvGazeEstimation.h"
  #include "SingleoLog.h"
  #include "SingleoException.h"
  
@@ -72,5 +73,9 @@ std::unique_ptr<IInferenceTaskInterface> MvInferenceTaskFactory::createHandLandm
         return make_unique<MvHandLandmark>();
  }
  
+std::unique_ptr<IInferenceTaskInterface> MvInferenceTaskFactory::createGazeEstimation()
+{
+       return make_unique<MvGazeEstimation>();
+}
  }
  }
diff --git a/inference/include/IInferenceTaskFactory.h b/inference/include/IInferenceTaskFactory.h

index 893084d47d2a7a4c22a7bc0744949af92f5a0f8f..40a0a5f25b3664bf7f9b1e35989120bbc5fe0423 100644 (file)
--- a/inference/include/IInferenceTaskFactory.h
+++ b/inference/include/IInferenceTaskFactory.h
@@ -37,6 +37,7 @@ public:
         virtual std::unique_ptr<IInferenceTaskInterface> createFaceRecognition() = 0;
         virtual std::unique_ptr<IInferenceTaskInterface> createHandDetection() = 0;
         virtual std::unique_ptr<IInferenceTaskInterface> createHandLandmark() = 0;
+       virtual std::unique_ptr<IInferenceTaskInterface> createGazeEstimation() = 0;
  };
  
  }
diff --git a/services/focus_finder/include/FaceShapeModelManager.h b/services/focus_finder/include/FaceShapeModelManager.h

index 6e0acecfab2bba2cfc55c7429883830b524683da..64650ffc8bdd2a9cc7b74bf782bfe21ef0c1fc94 100644 (file)
--- a/services/focus_finder/include/FaceShapeModelManager.h
+++ b/services/focus_finder/include/FaceShapeModelManager.h
@@ -31,17 +31,19 @@ namespace focusfinder
  class FaceShapeModelManager
  {
  private:
-       std::vector<cv::Point3f> _faceShape;
+       std::vector<cv::Point3d> _faceShape;
  
-       void loadModelFromFile(const std::string &model_path);
         template<typename T> T ToNumber(const std::string &text);
         std::map<FaceComponents, std::vector<int> > _faceShapeIndices;
+       void creatFaceShapeIndex();
  
  public:
+       FaceShapeModelManager();
         FaceShapeModelManager(const std::string &model_path);
         ~FaceShapeModelManager();
-       const std::vector<cv::Point3f> &getFaceShape();
+       const std::vector<cv::Point3d> &getFaceShape();
         const std::vector<int> &getFaceShapeIndices(FaceComponents component);
+       void loadModelFromFile(const std::string &model_path);
  };
  } // focusfinder
  } // services
diff --git a/services/focus_finder/include/FocusFinder.h b/services/focus_finder/include/FocusFinder.h

index b0445f89337b98c0e363fbe44eb733da7ce7db46..c6644e77c5af15f0341cb26a75c52cdd81516729 100644 (file)
--- a/services/focus_finder/include/FocusFinder.h
+++ b/services/focus_finder/include/FocusFinder.h
@@ -50,8 +50,10 @@ private:
         std::mutex _input_mutex;
  
         FocusFinderResult _result {};
-       std::map<std::string, FocusFinderResultType> _result_keys = { { "X", FocusFinderResultType::X },
-                                                                                                                                 { "Y", FocusFinderResultType::Y } };
+       std::map<std::string, FocusFinderResultType> _result_keys = { { "CENTER_X", FocusFinderResultType::CX },
+                                                                                                                                 { "CENTER_Y", FocusFinderResultType::CY },
+                                                                                                                                 { "GAZE_X", FocusFinderResultType::GX },
+                                                                                                                                 { "GAZE_Y", FocusFinderResultType::GY } };
         bool _async_mode { false };
         std::unique_ptr<AsyncManager<ImageDataType, FocusFinderResult> > _async_manager;
  
@@ -59,7 +61,7 @@ private:
         void *_user_data {};
  
         bool isKeyValid(std::string key);
-       void updateResult(const std::vector<Point> &axes);
+       void updateResult(const GazeResultType& gaze);
  
  public:
         FocusFinder();
diff --git a/services/focus_finder/include/FocusFinderDataTypes.h b/services/focus_finder/include/FocusFinderDataTypes.h

index 0003883e1c063da06878c2cc2d65f872350d7583..df6d6f02453d446f2fd43124f8848d7874c1fee2 100644 (file)
--- a/services/focus_finder/include/FocusFinderDataTypes.h
+++ b/services/focus_finder/include/FocusFinderDataTypes.h
@@ -38,6 +38,7 @@ enum class FaceComponents {
         NOSE_BASE,
         LEFT_EYE_CORNERS,
         RIGHT_EYE_CORNERS,
+       MOUTH_CORNERS
  };
  }
  }
diff --git a/services/focus_finder/include/FocusFinderResult.h b/services/focus_finder/include/FocusFinderResult.h

index 63286454d6219a1d6dc17f0a28198e18b9e2a85f..376aab94d2d5c5284dcaf971179165543f030b47 100644 (file)
--- a/services/focus_finder/include/FocusFinderResult.h
+++ b/services/focus_finder/include/FocusFinderResult.h
@@ -26,15 +26,17 @@ namespace services
  {
  namespace focusfinder
  {
-enum class FocusFinderResultType { X, Y };
+enum class FocusFinderResultType { CX, CY, GX, GY };
  
  struct FocusFinderResult : public ServiceBaseResultType {
         FocusFinderResult() : ServiceBaseResultType(ServiceResultType::FOCUS_FINDER)
         {}
         unsigned int frame_number {};
         bool is_focused { false };
+       Point center;
         Point focus;
-       std::vector<Point> head_axes;
+       double pitch;
+       double yaw;
  };
  }
  }
diff --git a/services/focus_finder/include/GazeEstimator.h b/services/focus_finder/include/GazeEstimator.h

index 721ba918fba4cc3113c866399ef94e1d6668a0da..85653bfa98e757a5185d1478cbe7690d1f5bb279 100644 (file)
--- a/services/focus_finder/include/GazeEstimator.h
+++ b/services/focus_finder/include/GazeEstimator.h
@@ -21,6 +21,7 @@
  #include "FocusFinderDataTypes.h"
  #include "IFaceTaskManager.h"
  #include "HeadPoseEstimator.h"
+#include "IInferenceTaskInterface.h"
  
  namespace singleo
  {
@@ -31,14 +32,18 @@ namespace focusfinder
  class GazeEstimator
  {
  private:
+       GazeResultType _result;
         std::shared_ptr<IFaceTaskManager> _face_tasker;
         std::unique_ptr<HeadPoseEstimator> _head_pose_estimator;
         std::vector<Point> _head_axes;
+       std::unique_ptr<inference::IInferenceTaskInterface> _gaze_estimator;
+
+       void drawHeadPose(FaceResult& result, ImageDataType& image, cv::Point3f& rotvec, cv::Point3f& tranvec);
  
  public:
         GazeEstimator();
         ~GazeEstimator();
-       const std::vector<Point> run(BaseDataType &input);
+       const GazeResultType& run(BaseDataType &input);
  };
  } // focusfinder
  } // services
diff --git a/services/focus_finder/include/HeadPoseEstimator.h b/services/focus_finder/include/HeadPoseEstimator.h

index f239b216a134d8627c87fec5dbd58a3073b7df5e..f544be87b8baf8a866aae59d62fdbf64dc655a49 100644 (file)
--- a/services/focus_finder/include/HeadPoseEstimator.h
+++ b/services/focus_finder/include/HeadPoseEstimator.h
@@ -24,6 +24,7 @@
  #include <opencv2/imgproc.hpp>
  #include <opencv2/calib3d.hpp>
  #include "SingleoCommonTypes.h"
+#include "FaceShapeModelManager.h"
  
  namespace singleo
  {
@@ -34,35 +35,52 @@ namespace focusfinder
  class HeadPoseEstimator
  {
  private:
-       std::vector<cv::Point2f> _landmarks_2d;
-       std::vector<cv::Point3f> _landmarks_3d;
+       std::vector<cv::Point2d> _landmarks_2d;
+       std::vector<cv::Point3d> _landmarks_3d;
+
         std::vector<int> _faceComponentIndices;
  
         cv::Mat _camera_matrix;
+       cv::Mat _camera_matrix_inv;
+       cv::Mat _normalized_camera_matrix;
         cv::Mat _camera_dist_coeff;
  
         cv::Mat _rotation_vector;
         cv::Mat _translation_vector;
         cv::Mat _rotation_matrix;
+       cv::Mat _translation_matrix;
+       cv::Mat _normalized_rotation_matrix;
+       cv::Mat _landmarks_3d_matrix;
+       cv::Mat _normalized_landmarks_3d_matrix;
+       cv::Mat _projection_matrix;
  
         std::vector<cv::Point3f> _pose_axes_3d;
         std::vector<Point> _pose_axes;
  
+       cv::Point3d _center_point;
+       cv::Point3d _gaze_vector;
         bool _isInitialized;
+       FaceShapeModelManager _faceShapeModelmgr;
+
         bool validRotationMatrix(cv::Mat &matrix);
         cv::Vec3f rotationMatrixToEulerAngles(cv::Mat &matrix);
         double calculateReProjectectionError();
         void print3x3Matrix(cv::Mat &matrix, std::string name);
         void print3x1Matrix(cv::Mat &matrix, std::string name);
         void printVec3f(cv::Vec3f &vec, std::string name);
+       void rtvecTortMatrix(cv::Mat &rvec, cv::Mat &tvec);
+       void normalizedRotationMatrix();
+       void calculateCenter();
+       void calculateProjectionMatrix();
  
  public:
         HeadPoseEstimator(const std::vector<FaceComponents> &components);
         ~HeadPoseEstimator();
  
-       std::tuple<cv::Point3f, cv::Point3f> estimate(const FaceResult &face);
+       cv::Mat& estimate(const FaceResult &face);
         std::vector<Point> &getPoseAxes();
         std::vector<int> &getFaceComponentsIndieces();
+       GazeResultType denormalizeGazeAngle(GazeResultType& normalized_angle);
  };
  } // smartpointer
  } // services
diff --git a/services/focus_finder/res/camera_param_dummy.yaml b/services/focus_finder/res/camera_param_dummy.yaml

index 80ae3bf413b6b4bfa2b802cb1b61d69a1e4eb016..a1a6227297976f0733a7e1a059484f9c155f9151 100755 (executable)
--- a/services/focus_finder/res/camera_param_dummy.yaml
+++ b/services/focus_finder/res/camera_param_dummy.yaml
@@ -3,8 +3,8 @@ camera_matrix: !!opencv-matrix
    rows: 3
    cols: 3
    dt: f
-  data: [324.0, 0.0, 324.0,
-        0.0, 324.0, 216.0,
+  data: [1280.0, 0.0, 640.0,
+        0.0, 1280.0, 959.0,
          0.0, 0.0, 1.0]
  dist_coeff: !!opencv-matrix
    rows: 5
diff --git a/services/focus_finder/src/FaceShapeModelManager.cpp b/services/focus_finder/src/FaceShapeModelManager.cpp

index b9cdf2b761e315bae24657c2eb8a23ebc6ac79cd..55e6f34b21a4872132deea936fa81186b6d70b59 100644 (file)
--- a/services/focus_finder/src/FaceShapeModelManager.cpp
+++ b/services/focus_finder/src/FaceShapeModelManager.cpp
@@ -29,20 +29,15 @@ namespace services
  {
  namespace focusfinder
  {
+FaceShapeModelManager::FaceShapeModelManager()
+{}
+
  FaceShapeModelManager::FaceShapeModelManager(const string &model_path)
  {
         if (model_path.empty())
                 throw invalid_argument("Invalid face shape model path");
  
         loadModelFromFile(model_path);
-       _faceShapeIndices.insert(
-                       make_pair<FaceComponents, vector<int> >(FaceComponents::NOSE_RIDGE, vector<int> { 27, 28, 29, 30 }));
-       _faceShapeIndices.insert(
-                       make_pair<FaceComponents, vector<int> >(FaceComponents::NOSE_BASE, vector<int> { 31, 32, 33, 34, 35 }));
-       _faceShapeIndices.insert(
-                       make_pair<FaceComponents, vector<int> >(FaceComponents::LEFT_EYE_CORNERS, vector<int> { 36, 39 }));
-       _faceShapeIndices.insert(
-                       make_pair<FaceComponents, vector<int> >(FaceComponents::RIGHT_EYE_CORNERS, vector<int> { 42, 45 }));
  }
  
  FaceShapeModelManager::~FaceShapeModelManager()
@@ -56,6 +51,20 @@ template<typename T> T FaceShapeModelManager::ToNumber(const string &text)
         return number;
  }
  
+void FaceShapeModelManager::creatFaceShapeIndex()
+{
+       _faceShapeIndices.insert(
+                       make_pair<FaceComponents, vector<int> >(FaceComponents::NOSE_RIDGE, vector<int> { 27, 28, 29, 30 }));
+       _faceShapeIndices.insert(
+                       make_pair<FaceComponents, vector<int> >(FaceComponents::NOSE_BASE, vector<int> { 31, 32, 33, 34, 35 }));
+       _faceShapeIndices.insert(
+                       make_pair<FaceComponents, vector<int> >(FaceComponents::RIGHT_EYE_CORNERS, vector<int> { 36, 39 }));
+       _faceShapeIndices.insert(
+                       make_pair<FaceComponents, vector<int> >(FaceComponents::LEFT_EYE_CORNERS, vector<int> { 42, 45 }));
+       _faceShapeIndices.insert(
+                       make_pair<FaceComponents, vector<int> >(FaceComponents::MOUTH_CORNERS, vector<int> { 48, 54 }));
+}
+
  void FaceShapeModelManager::loadModelFromFile(const string &model_path)
  {
         fstream file(model_path, ios::in);
@@ -68,23 +77,24 @@ void FaceShapeModelManager::loadModelFromFile(const string &model_path)
         _faceShape.resize(ToNumber<int>(line));
         for (auto &point : _faceShape) {
                 getline(file, line);
-               point.x = -1.0f * ToNumber<float>(line);
+               point.x = ToNumber<double>(line) / 1000.0;
         }
  
         for (auto &point : _faceShape) {
                 getline(file, line);
-               point.y = -1.0f * ToNumber<float>(line);
+               point.y = ToNumber<double>(line) / 1000.0;
         }
  
         for (auto &point : _faceShape) {
                 getline(file, line);
-               point.z = ToNumber<float>(line);
+               point.z = ToNumber<double>(line) / 1000.0;
         }
  
         SINGLEO_LOGI("%zd landarks", _faceShape.size());
+       creatFaceShapeIndex();
  }
  
-const vector<Point3f> &FaceShapeModelManager::getFaceShape()
+const vector<Point3d> &FaceShapeModelManager::getFaceShape()
  {
         return _faceShape;
  }
diff --git a/services/focus_finder/src/FocusFinder.cpp b/services/focus_finder/src/FocusFinder.cpp

index 6dac3a1f2baf20d5aa1db486980074128dad36bc..8248630a1cb16ffa51175b5c74af77f3ee80a85b 100644 (file)
--- a/services/focus_finder/src/FocusFinder.cpp
+++ b/services/focus_finder/src/FocusFinder.cpp
@@ -179,16 +179,20 @@ void FocusFinder::performAsync()
         });
  }
  
-void FocusFinder::updateResult(const vector<Point> &axes)
+void FocusFinder::updateResult(const GazeResultType& gaze)
  {
         FocusFinderResult result;
-       // TODO: Implement focus point calculation based on head axes.
-       result.focus = Point { .x = 0, .y = 0 };
+       // TODO: Implement focus point calculation based on gaze
  
-       result.head_axes = axes;
-       if (!result.head_axes.empty()) {
-               result.is_focused = true;
-       }
+       if (gaze._points.empty())
+               return;
+
+       result.is_focused = true;
+       result.center = gaze._points[0];
+       result.focus = gaze._points[1];
+
+       result.pitch = gaze._pitch;
+       result.yaw = gaze._yaw;
  
         if (_async_mode)
                 _async_manager->pushOutput(result);
@@ -207,7 +211,29 @@ void FocusFinder::getResultCnt(unsigned int *cnt)
  }
  
  void FocusFinder::getResultInt(unsigned int idx, std::string key, unsigned int *value)
-{}
+{
+       transform(key.begin(), key.end(), key.begin(), ::toupper);
+
+       SINGLEO_LOGD("given key = %s", key.c_str());
+
+       if (!isKeyValid(key))
+               throw InvalidParameter("A given result key is inavlid.");
+
+       switch (_result_keys[key]) {
+       case FocusFinderResultType::CX:
+               *value = static_cast<unsigned int>(_result.center.x);
+               break;
+       case FocusFinderResultType::CY:
+               *value = static_cast<unsigned int>(_result.center.y);
+               break;
+       case FocusFinderResultType::GX:
+               *value = static_cast<unsigned int>(_result.focus.x);
+               break;
+       case FocusFinderResultType::GY:
+               *value = static_cast<unsigned int>(_result.focus.y);
+               break;
+       }
+}
  
  void FocusFinder::registerUserCallback(singleo_user_cb_t user_cb, void *user_data)
  {
diff --git a/services/focus_finder/src/FocusMapper.cpp b/services/focus_finder/src/FocusMapper.cpp

index c393fb5c549b8f6d208780f8155a286fd061af92..3623f4fc6001827c72034b4298e0accb1247a67b 100644 (file)
--- a/services/focus_finder/src/FocusMapper.cpp
+++ b/services/focus_finder/src/FocusMapper.cpp
@@ -17,6 +17,7 @@
  #include "SingleoException.h"
  #include "SingleoLog.h"
  #include "FocusMapper.h"
+#include <iostream>
  
  using namespace std;
  using namespace cv;
@@ -53,12 +54,8 @@ void FocusMapper::invoke(BaseDataType &data)
  
         _cv_image = cv::Mat(cv::Size(image_data.width, image_data.height), CV_MAKETYPE(CV_8U, 3), image_data.ptr).clone();
  
-       line(_cv_image, Point2i(_result.head_axes[0].x, _result.head_axes[0].y),
-                Point2i(_result.head_axes[1].x, _result.head_axes[1].y), Scalar(0, 0, 255), 2); // x - red
-       line(_cv_image, Point2i(_result.head_axes[0].x, _result.head_axes[0].y),
-                Point2i(_result.head_axes[2].x, _result.head_axes[2].y), Scalar(0, 255, 0), 2); // y - green
-       line(_cv_image, Point2i(_result.head_axes[0].x, _result.head_axes[0].y),
-                Point2i(_result.head_axes[3].x, _result.head_axes[3].y), Scalar(255, 0, 0), 2); // z - blue
+       line(_cv_image, Point2i(_result.center.x, _result.center.y),
+                Point2i(_result.focus.x, _result.focus.y), Scalar(0, 0, 255), 2); // projected gaze vector
  
         _image_data.width = _cv_image.cols;
         _image_data.height = _cv_image.rows;
diff --git a/services/focus_finder/src/GazeEstimator.cpp b/services/focus_finder/src/GazeEstimator.cpp

index b80abfebcb07aa193dca9084313d03b23710a657..5c99701a223ca4f24ce3ddb7fa6e5c323ed79a9b 100644 (file)
--- a/services/focus_finder/src/GazeEstimator.cpp
+++ b/services/focus_finder/src/GazeEstimator.cpp
@@ -19,6 +19,7 @@
  #include "FaceDetector.h"
  #include "FaceLandmarker.h"
  #include "SingleoLog.h"
+#include "InferenceTaskFactory.h"
  
  using namespace std;
  using namespace cv;
@@ -36,14 +37,19 @@ GazeEstimator::GazeEstimator()
  
         const vector<FaceComponents> _faceComponents { FaceComponents::NOSE_RIDGE, FaceComponents::NOSE_BASE,
                                                                                                    FaceComponents::LEFT_EYE_CORNERS,
-                                                                                                  FaceComponents::RIGHT_EYE_CORNERS };
+                                                                                                  FaceComponents::RIGHT_EYE_CORNERS,
+                                                                                                  FaceComponents::MOUTH_CORNERS };
         _head_pose_estimator = make_unique<HeadPoseEstimator>(_faceComponents);
+       _gaze_estimator =
+                       inference::InferenceTaskFactory::instance().create("MvInferenceTaskFactory")->createGazeEstimation();
+       _gaze_estimator->configure();
+       _gaze_estimator->prepare();
  }
  
  GazeEstimator::~GazeEstimator()
  {}
  
-const vector<Point> GazeEstimator::run(BaseDataType &input)
+const GazeResultType& GazeEstimator::run(BaseDataType &input)
  {
         FaceResult result;
         _face_tasker->handle(input, result);
@@ -53,15 +59,28 @@ const vector<Point> GazeEstimator::run(BaseDataType &input)
         if (!result._landmarks.empty()) {
                 SINGLEO_LOGI("landmarks %zu-%zu", result._landmarks.size(), result._landmarks[0].size());
  
-               auto head_pose = _head_pose_estimator->estimate(result);
-               SINGLEO_LOGI("head pose: \n\t rotation(x,y,z): %.3f, %.3f, %.3f\n\t translation(x,y,z): %.3f, %.3f, %.3f\n",
-                                        get<0>(head_pose).x, get<0>(head_pose).y, get<0>(head_pose).z, get<1>(head_pose).x,
-                                        get<1>(head_pose).y, get<1>(head_pose).z);
+               auto projection_matrix = _head_pose_estimator->estimate(result);
  
-               _head_axes = _head_pose_estimator->getPoseAxes();
+               auto image = dynamic_cast<ImageDataType&>(input);
+               Mat cv_rgb_image = cv::Mat(Size(image.width, image.height), CV_MAKE_TYPE(CV_8U, image.byte_per_pixel), image.ptr);
+               Mat cv_bgr_image;
+               cvtColor(cv_rgb_image, cv_bgr_image, COLOR_RGB2BGR);
+               Mat cv_warped_image;
+               warpPerspective(cv_bgr_image, cv_warped_image, projection_matrix, Size(224, 224));
+
+               ImageDataType normalized_image;
+               normalized_image.width = cv_warped_image.cols;
+               normalized_image.height = cv_warped_image.rows;
+               normalized_image.byte_per_pixel = cv_warped_image.channels();
+               normalized_image.ptr = cv_warped_image.data;
+
+               _gaze_estimator->invoke(normalized_image);
+               auto normalized_gaze_angle = dynamic_cast<GazeResultType &>(_gaze_estimator->result());
+
+               _result = _head_pose_estimator->denormalizeGazeAngle(normalized_gaze_angle);
         }
  
-       return _head_axes;
+       return _result;
  }
  
  } // focusfinder
diff --git a/services/focus_finder/src/HeadPoseEstimator.cpp b/services/focus_finder/src/HeadPoseEstimator.cpp

index 4bc755cdf4ec90a0feaa4e35dee789688fcaeaf6..1af45e0f732d791b0d4541c7c4b61440d57abc29 100644 (file)
--- a/services/focus_finder/src/HeadPoseEstimator.cpp
+++ b/services/focus_finder/src/HeadPoseEstimator.cpp
@@ -15,7 +15,7 @@
  */
  
  #include <tuple>
-#include "FaceShapeModelManager.h"
+#include <cmath>
  #include "CameraParamManager.h"
  #include "HeadPoseEstimator.h"
  #include "SingleoLog.h"
@@ -31,18 +31,26 @@ namespace focusfinder
  {
  HeadPoseEstimator::HeadPoseEstimator(const vector<FaceComponents> &components)
  {
-       FaceShapeModelManager faceShapeModelmgr("/usr/share/singleo/pdm.txt");
-       const vector<Point3f> faceShape = faceShapeModelmgr.getFaceShape();
+       _faceShapeModelmgr.loadModelFromFile("/usr/share/singleo/pdm.txt");
+       const vector<Point3d> faceShape = _faceShapeModelmgr.getFaceShape();
  
         for (auto &component : components) {
-               for (auto &index : faceShapeModelmgr.getFaceShapeIndices(component)) {
-                       _landmarks_3d.push_back(Point3f(faceShape[index].x, faceShape[index].y, faceShape[index].z));
+               for (auto &index : _faceShapeModelmgr.getFaceShapeIndices(component)) {
+                       _landmarks_3d.push_back(Point3d(faceShape[index].x, faceShape[index].y, faceShape[index].z));
                         _faceComponentIndices.push_back(index);
                 }
         }
  
+       Mat landmarks_3d_matrix(Size(3,_landmarks_3d.size()), CV_64F);
+       for (size_t i = 0; i < _landmarks_3d.size(); i++) {
+               landmarks_3d_matrix.at<double>(i,0) = _landmarks_3d[i].x;
+               landmarks_3d_matrix.at<double>(i,1) = _landmarks_3d[i].y;
+               landmarks_3d_matrix.at<double>(i,2) = _landmarks_3d[i].z;
+       }
+       _landmarks_3d_matrix = landmarks_3d_matrix.clone();
+
         // Nose base landmarks are 31, 32, 33, 34, 35 and 33 is the nose tip
-       int noseTipIndex = faceShapeModelmgr.getFaceShapeIndices(FaceComponents::NOSE_BASE)[2];
+       int noseTipIndex = _faceShapeModelmgr.getFaceShapeIndices(FaceComponents::NOSE_BASE)[2];
         _pose_axes_3d.push_back(Point3f(faceShape[noseTipIndex].x, faceShape[noseTipIndex].y, faceShape[noseTipIndex].z));
         _pose_axes_3d.push_back(
                         Point3f(faceShape[noseTipIndex].x - 50, faceShape[noseTipIndex].y, faceShape[noseTipIndex].z));
@@ -53,38 +61,39 @@ HeadPoseEstimator::HeadPoseEstimator(const vector<FaceComponents> &components)
         _pose_axes.resize(_pose_axes_3d.size());
  
         _landmarks_2d.resize(_landmarks_3d.size());
-       _rotation_vector = Mat(Size(3, 1), CV_32FC1);
-       _translation_vector = Mat(Size(3, 1), CV_32FC1);
-
-       CameraParamManager cam_dummy("/usr/share/singleo/camera_param_rs2_640.yaml");
-
-       _camera_matrix = Mat::zeros(Size(3, 3), CV_32FC1);
-       tuple<float, float> focalLength = cam_dummy.getFocalLengthXY();
-       tuple<float, float> principlePoint = cam_dummy.getPrinciplePointXY();
-       _camera_matrix.at<float>(0, 0) = get<0>(focalLength);
-       _camera_matrix.at<float>(1, 1) = get<1>(focalLength);
-       _camera_matrix.at<float>(0, 2) = get<0>(principlePoint);
-       _camera_matrix.at<float>(1, 2) = get<1>(principlePoint);
-       _camera_matrix.at<float>(2, 2) = 1.0;
-
-       _camera_dist_coeff = Mat::zeros(Size(1, 5), CV_32FC1);
-       tuple<float, float, float, float, float> distCoeff = cam_dummy.getDistortionCoeff();
-       _camera_dist_coeff.at<float>(0, 0) = get<0>(distCoeff);
-       _camera_dist_coeff.at<float>(0, 1) = get<1>(distCoeff);
-       _camera_dist_coeff.at<float>(0, 2) = get<2>(distCoeff);
-       _camera_dist_coeff.at<float>(0, 3) = get<3>(distCoeff);
-       _camera_dist_coeff.at<float>(0, 4) = get<4>(distCoeff);
-
-       for (int r = 0; r < _camera_matrix.rows; ++r) {
-               for (int c = 0; c < _camera_matrix.cols; ++c) {
-                       SINGLEO_LOGI("%.3f", _camera_matrix.at<float>(r, c));
-               }
-       }
-       for (int r = 0; r < _camera_dist_coeff.rows; ++r) {
-               for (int c = 0; c < _camera_dist_coeff.cols; ++c) {
-                       SINGLEO_LOGI("%.3f", _camera_dist_coeff.at<float>(r, c));
-               }
-       }
+       _rotation_vector = Mat(Size(1, 3), CV_64F);
+       _translation_vector = Mat(Size(1, 3), CV_64F);
+       _translation_matrix = Mat(Size(_landmarks_3d.size(), 3), CV_64F);
+
+       // TODO: When using image, cam_dummy should be updated based on the image size.
+       // CameraParamManager cam_dummy("/usr/share/singleo/camera_param_rs2_640.yaml");
+       CameraParamManager cam_dummy("/usr/share/singleo/camera_param_dummy.yaml");
+
+       _camera_matrix = Mat::zeros(Size(3, 3), CV_64FC1);
+       tuple<double, double> focalLength = cam_dummy.getFocalLengthXY();
+       tuple<double, double> principlePoint = cam_dummy.getPrinciplePointXY();
+       _camera_matrix.at<double>(0, 0) = get<0>(focalLength);
+       _camera_matrix.at<double>(1, 1) = get<1>(focalLength);
+       _camera_matrix.at<double>(0, 2) = get<0>(principlePoint);
+       _camera_matrix.at<double>(1, 2) = get<1>(principlePoint);
+       _camera_matrix.at<double>(2, 2) = 1.0;
+       _camera_matrix_inv = _camera_matrix.inv();
+
+       _camera_dist_coeff = Mat::zeros(Size(1, 5), CV_64FC1);
+       tuple<double, double, double, double, double> distCoeff = cam_dummy.getDistortionCoeff();
+       _camera_dist_coeff.at<double>(0, 0) = get<0>(distCoeff);
+       _camera_dist_coeff.at<double>(0, 1) = get<1>(distCoeff);
+       _camera_dist_coeff.at<double>(0, 2) = get<2>(distCoeff);
+       _camera_dist_coeff.at<double>(0, 3) = get<3>(distCoeff);
+       _camera_dist_coeff.at<double>(0, 4) = get<4>(distCoeff);
+
+       // TODO: This value is only valid when MPIIFACEGAZE. It may be changed when others are used.
+       _normalized_camera_matrix = Mat::zeros(Size(3,3), CV_64F);
+       _normalized_camera_matrix.at<double>(0,0) = 1600.0;
+       _normalized_camera_matrix.at<double>(0,2) = 112.0;
+       _normalized_camera_matrix.at<double>(1,1) = 1600.0;
+       _normalized_camera_matrix.at<double>(1,2) = 112.0;
+       _normalized_camera_matrix.at<double>(2,2) = 1.0;
  }
  
  HeadPoseEstimator::~HeadPoseEstimator()
@@ -114,31 +123,104 @@ void HeadPoseEstimator::printVec3f(Vec3f &vec, string name)
         SINGLEO_LOGD("%.3f, %.3f, %.3f", vec[0], vec[1], vec[2]);
  }
  
-tuple<Point3f, Point3f> HeadPoseEstimator::estimate(const FaceResult &face)
+void HeadPoseEstimator::rtvecTortMatrix(cv::Mat &rvec, cv::Mat &tvec)
+{
+       // _rotation_vector(w x h): 1x3, _rotation_matrix is 3x3
+       Rodrigues(_rotation_vector, _rotation_matrix);
+
+       // _translation vector(w x h): 1 x 3, _translation matrix is 15 x3
+       for (int i = 0; i < _translation_matrix.cols; ++i ) {
+               _translation_matrix.at<double>(0, i) = _translation_vector.at<double>(0, 0);
+               _translation_matrix.at<double>(1, i) = _translation_vector.at<double>(1, 0);
+               _translation_matrix.at<double>(2, i) = _translation_vector.at<double>(2, 0);
+       }
+}
+
+void HeadPoseEstimator::calculateCenter()
+{
+       int nCenter = 0;
+       vector<FaceComponents> components = { FaceComponents::RIGHT_EYE_CORNERS, FaceComponents::LEFT_EYE_CORNERS, FaceComponents::MOUTH_CORNERS };
+       _center_point.x = _center_point.y = _center_point.z = 0.0;
+
+       for (const auto& component : components) {
+               for (auto &index : _faceShapeModelmgr.getFaceShapeIndices(component)) {
+                       int mappedIndex = find(_faceComponentIndices.begin(), _faceComponentIndices.end(), index) - _faceComponentIndices.begin();
+                       _center_point.x += _normalized_landmarks_3d_matrix.at<double>(mappedIndex, 0);
+                       _center_point.y += _normalized_landmarks_3d_matrix.at<double>(mappedIndex, 1);
+                       _center_point.z += _normalized_landmarks_3d_matrix.at<double>(mappedIndex, 2);
+                       nCenter++;
+               }
+       }
+
+       _center_point.x /= static_cast<double>(nCenter);
+       _center_point.y /= static_cast<double>(nCenter);
+       _center_point.z /= static_cast<double>(nCenter);
+}
+
+void HeadPoseEstimator::normalizedRotationMatrix()
+{
+       // calculate center point of normalized 3d landmarks
+       calculateCenter();
+
+       // calculate _normalized_rotation_matrix
+       Mat z_axis = Mat(_center_point/norm(Mat(_center_point), NORM_L2));
+       Mat rot_x_axis = _rotation_matrix.col(0);
+       Mat y_tmp_axis =  z_axis.cross(rot_x_axis);
+       Mat y_axis = y_tmp_axis/norm(y_tmp_axis, NORM_L2);
+       Mat x_tmp_axis = y_axis.cross(z_axis);
+       Mat x_axis = x_tmp_axis/norm(x_tmp_axis, NORM_L2);
+
+       // (w x h): 1x3 -> 3x1
+       vector<Mat> array_axis = { x_axis.t(), y_axis.t(), z_axis.t() };
+       vconcat(array_axis, _normalized_rotation_matrix);
+}
+
+void HeadPoseEstimator::calculateProjectionMatrix()
+{
+       Mat scale_matrix = Mat::eye(Size(3,3), CV_64F);
+       scale_matrix.at<double>(2,2) = 1.0 / norm(Mat(_center_point), NORM_L2);
+
+       Mat conversion_matrix;
+       // conversion_matrix =  scale_matrix @ normalized_rotatio_matrix
+       gemm(scale_matrix, _normalized_rotation_matrix, 1.0, noArray(), 0.0, conversion_matrix, 0);
+
+       // _projection_matrix =  conversion_matrix @ _camera_matrix_inv
+       gemm(conversion_matrix, _camera_matrix_inv, 1.0, noArray(), 0.0, _projection_matrix, 0);
+       // _projection_matrix =  _normalized_camera_matrix @ _projection_matrix
+       gemm(_normalized_camera_matrix, _projection_matrix, 1.0, noArray(), 0.0, _projection_matrix, 0);
+
+}
+
+Mat& HeadPoseEstimator::estimate(const FaceResult &face)
  {
         // TODO: parallel??, c++17 structured binding??
         // TODO: use multiple faces
         size_t idx = 0;
         for (auto &compIdx : _faceComponentIndices) {
-               _landmarks_2d[idx].x = face._landmarks[0][compIdx].x + face._rects[0].left;
-               _landmarks_2d[idx].y = face._landmarks[0][compIdx].y + face._rects[0].top;
+               _landmarks_2d[idx].x = static_cast<double>(face._landmarks[0][compIdx].x + face._rects[0].left);
+               _landmarks_2d[idx].y = static_cast<double>(face._landmarks[0][compIdx].y + face._rects[0].top);
                 idx++;
         }
+
         solvePnPRansac(_landmarks_3d, _landmarks_2d, _camera_matrix, _camera_dist_coeff, _rotation_vector,
-                                  _translation_vector, false, 100, 3.0F, 0.99, noArray(), SOLVEPNP_EPNP);
+                                  _translation_vector, false, 100, 3.0f, 0.99, noArray(), SOLVEPNP_EPNP);
  
         // estimate pose
         // https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#ga357634492a94efe8858d0ce1509da869
         solvePnP(_landmarks_3d, _landmarks_2d, _camera_matrix, _camera_dist_coeff, _rotation_vector, _translation_vector,
                          true, SOLVEPNP_ITERATIVE);
  
-       Rodrigues(_rotation_vector, _rotation_matrix);
-       auto rotation_eulerAngles = rotationMatrixToEulerAngles(_rotation_matrix);
+       // convert rotation and translation vector to matrixes
+       rtvecTortMatrix(_rotation_vector, _translation_vector);
+
+       // normalized 3d landmark with rotation and translation matrix
+       gemm(_landmarks_3d_matrix, _rotation_matrix, 1.0, _translation_matrix, 1.0, _normalized_landmarks_3d_matrix, GEMM_3_T);
+
+       normalizedRotationMatrix();
  
-       return make_tuple(Point3f { rotation_eulerAngles[0], rotation_eulerAngles[1], rotation_eulerAngles[2] },
-                                         Point3f { static_cast<float>(_translation_vector.at<double>(0)),
-                                                               static_cast<float>(_translation_vector.at<double>(1)),
-                                                               static_cast<float>(_translation_vector.at<double>(2)) });
+       calculateProjectionMatrix();
+
+       return _projection_matrix;
  }
  
  vector<Point> &HeadPoseEstimator::getPoseAxes()
@@ -204,6 +286,38 @@ Vec3f HeadPoseEstimator::rotationMatrixToEulerAngles(Mat &matrix)
         return Vec3f(x, y, z);
  }
  
+GazeResultType HeadPoseEstimator::denormalizeGazeAngle(GazeResultType& normalized_angle)
+{
+       Mat gaze_vector(Size(3,1), CV_64F);
+       gaze_vector.at<double>(0,0) =  -1.0 * cos(normalized_angle._pitch) * sin(normalized_angle._yaw);
+       gaze_vector.at<double>(0,1) =  -1.0 * sin(normalized_angle._pitch);
+       gaze_vector.at<double>(0,2) =  -1.0 * cos(normalized_angle._pitch) * cos(normalized_angle._yaw);
+
+       Mat denormalized_gaze_vector;
+       gemm(gaze_vector, _normalized_rotation_matrix, 1.0, noArray(), 1.0, denormalized_gaze_vector, 0);
+
+       _gaze_vector.x = denormalized_gaze_vector.at<double>(0,0);
+       _gaze_vector.y = denormalized_gaze_vector.at<double>(0,1);
+       _gaze_vector.z = denormalized_gaze_vector.at<double>(0,2);
+
+       GazeResultType gaze_angle;
+       gaze_angle._pitch =  static_cast<float>(asin(-_gaze_vector.y)) * 180.f / M_PI;
+       gaze_angle._yaw = static_cast<float>(atan2(-_gaze_vector.x, -_gaze_vector.z)) * 180.f / M_PI;
+
+       vector<Point3d> points3d = { _center_point, _center_point + 0.05 * _gaze_vector };
+       vector<Point2d> points2d;
+
+       projectPoints(points3d, Mat::zeros(Size(1,3), CV_64F), Mat::zeros(Size(1,3), CV_64F), _camera_matrix, _camera_dist_coeff, points2d);
+
+       Point pt0 = { .x = static_cast<int>(points2d[0].x), .y = static_cast<int>(points2d[0].y) };
+       gaze_angle._points.push_back(pt0);
+
+       Point pt1 = { .x = static_cast<int>(points2d[1].x), .y = static_cast<int>(points2d[1].y) };
+       gaze_angle._points.push_back(pt1);
+
+       return gaze_angle;
+}
+
  } // focusfinder
  } // services
  } // singleo
diff --git a/test/services/test_focusfinder.cpp b/test/services/test_focusfinder.cpp

index 47b655446f92476d33a5dfda2e410e9bb9c95952..9805d02e4009d0f4d7977dde83766d19f4be194f 100644 (file)
--- a/test/services/test_focusfinder.cpp
+++ b/test/services/test_focusfinder.cpp
@@ -24,7 +24,7 @@
  #include "singleo_native_capi.h"
  #include "singleo_error.h"
  
-#define IMG_FACE TEST_RES_PATH "/usr/share/capi-media-vision/res/inference/images/faceDetection.jpg"
+#define IMG_FACE TEST_RES_PATH "/usr/share/capi-media-vision/res/inference/images/headpose_sample00.jpg"
  
  using namespace testing;
  using namespace std;
@@ -53,6 +53,25 @@ TEST(FocusFinderTest, PerformWithImageFileShouldBeOK)
         ret = singleo_service_perform(handle);
         ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
  
+       unsigned int cnt = 0;
+       ret = singleo_service_get_result_cnt(handle, &cnt);
+       ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+
+       for (unsigned int idx = 0; idx < cnt; ++idx) {
+               unsigned int cx, cy, gx, gy;
+
+               ret = singleo_service_get_result_int(handle, idx, "center_x", &cx);
+               ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+               ret = singleo_service_get_result_int(handle, idx, "center_y", &cy);
+               ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+               ret = singleo_service_get_result_int(handle, idx, "gaze_x", &gx);
+               ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+               ret = singleo_service_get_result_int(handle, idx, "gaze_y", &gy);
+               ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+
+               cout << "Result: from (" << cx << " , " << cy << ") to (" << gx << " , " << gy << ")" << endl;
+       }
+
         ret = singleo_service_destroy(handle);
         ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
  }
 \ No newline at end of file
author	Tae-Young Chung <ty83.chung@samsung.com>
	Mon, 7 Oct 2024 08:25:33 +0000 (17:25 +0900)
committer	Tae-Young Chung <ty83.chung@samsung.com>
	Mon, 7 Oct 2024 08:25:33 +0000 (17:25 +0900)
common/include/SingleoCommonTypes.h		patch \| blob \| history
inference/backends/mediavision/CMakeLists.txt		patch \| blob \| history
inference/backends/mediavision/include/MvGazeEstimation.h	[new file with mode: 0644]	patch \| blob
inference/backends/mediavision/include/MvInferenceTaskFactory.h		patch \| blob \| history
inference/backends/mediavision/src/MvFaceLandmark.cpp		patch \| blob \| history
inference/backends/mediavision/src/MvGazeEstimation.cpp	[new file with mode: 0644]	patch \| blob
inference/backends/mediavision/src/MvInferenceTaskFactory.cpp		patch \| blob \| history
inference/include/IInferenceTaskFactory.h		patch \| blob \| history
services/focus_finder/include/FaceShapeModelManager.h		patch \| blob \| history
services/focus_finder/include/FocusFinder.h		patch \| blob \| history
services/focus_finder/include/FocusFinderDataTypes.h		patch \| blob \| history
services/focus_finder/include/FocusFinderResult.h		patch \| blob \| history
services/focus_finder/include/GazeEstimator.h		patch \| blob \| history
services/focus_finder/include/HeadPoseEstimator.h		patch \| blob \| history
services/focus_finder/res/camera_param_dummy.yaml		patch \| blob \| history
services/focus_finder/src/FaceShapeModelManager.cpp		patch \| blob \| history
services/focus_finder/src/FocusFinder.cpp		patch \| blob \| history
services/focus_finder/src/FocusMapper.cpp		patch \| blob \| history
services/focus_finder/src/GazeEstimator.cpp		patch \| blob \| history
services/focus_finder/src/HeadPoseEstimator.cpp		patch \| blob \| history
test/services/test_focusfinder.cpp		patch \| blob \| history