}
};
-enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK, IMAGE_CLASSIFICATION, FACE_RECOGNITION };
+enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK, IMAGE_CLASSIFICATION, FACE_RECOGNITION, GAZE_ESTIMATION };
struct BaseResultType {
ResultType _type { ResultType::NONE };
}
};
+struct GazeResultType : public BaseResultType {
+ GazeResultType() : BaseResultType(ResultType::GAZE_ESTIMATION)
+ {}
+
+ double _pitch;
+ double _yaw;
+ std::vector<Point> _points; // 0: center, 1: gazed
+
+ std::shared_ptr<BaseResultType> clone() override
+ {
+ return std::make_shared<GazeResultType>(*this);
+ }
+};
+
enum class ServiceType { NONE, AUTO_ZOOM, FOCUS_FINDER };
enum class InputFeedType { NONE, CAMERA, SCREEN_CAPTURE };
${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvObjectDetection.cpp
${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvImageClassification.cpp
${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceRecognition.cpp
+ ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvGazeEstimation.cpp
${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvInferenceTaskFactory.cpp
)
-LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection mv_image_classification mv_face_recognition)
+LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection mv_image_classification mv_face_recognition mv_gaze_estimation)
LIST(APPEND INFERENCE_HEADER_LIST ${INFERENCE_HEADER_LIST} ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/include /usr/include/media)
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MV_GAZE_ESTIMATION_H__
+#define __MV_GAZE_ESTIMATION_H__
+
+#include "IInferenceTaskInterface.h"
+#include "mv_gaze_estimation_internal.h"
+#include "SingleoCommonTypes.h"
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+class MvGazeEstimation : public IInferenceTaskInterface
+{
+private:
+ mv_gaze_estimation_h _handle {};
+ GazeResultType _output_data {};
+
+public:
+ MvGazeEstimation();
+ virtual ~MvGazeEstimation();
+
+ void configure() override;
+ void prepare() override;
+ void invoke(BaseDataType &input, bool async) override;
+ BaseResultType &result() override;
+};
+
+} // backends
+} // inference
+} // singleo
+
+#endif
std::unique_ptr<IInferenceTaskInterface> createFaceRecognition() override;
std::unique_ptr<IInferenceTaskInterface> createHandDetection() override;
std::unique_ptr<IInferenceTaskInterface> createHandLandmark() override;
+ std::unique_ptr<IInferenceTaskInterface> createGazeEstimation() override;
};
}
void MvFaceLandmark::configure()
{
- int ret = mv_facial_landmark_configure(_handle);
+ int ret = mv_facial_landmark_set_model(_handle, "fld_u2net_160x160.tflite", "fld_u2net_160x160.json",
+ "", "FLD_U2NET");
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to set face landmark model.");
+
+ ret = mv_facial_landmark_configure(_handle);
if (ret != MEDIA_VISION_ERROR_NONE)
throw runtime_error("Fail to configure face landmark detection.");
}
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdexcept>
+#include "MvGazeEstimation.h"
+#include "SingleoLog.h"
+
+using namespace std;
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+MvGazeEstimation::MvGazeEstimation()
+{
+ int ret = mv_gaze_estimation_create(&_handle);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to create gaze estimation handle.");
+}
+
+MvGazeEstimation::~MvGazeEstimation()
+{
+ try {
+ int ret = mv_gaze_estimation_destroy(_handle);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ SINGLEO_LOGE("Fail to destroy gaze estimation handle.(%d)", ret);
+ } catch (const std::runtime_error &e) {
+ SINGLEO_LOGE("Failed to destroy gaze estimation handle: %s", e.what());
+ }
+}
+
+void MvGazeEstimation::configure()
+{
+ int ret = mv_gaze_estimation_configure(_handle);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to configure gaze estimation.");
+}
+
+void MvGazeEstimation::prepare()
+{
+ int ret = mv_gaze_estimation_prepare(_handle);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to prepare gaze estimation.");
+}
+
+void MvGazeEstimation::invoke(BaseDataType &input, bool async)
+{
+ ImageDataType &data = dynamic_cast<ImageDataType &>(input);
+
+ if (data._data_type != DataType::IMAGE) {
+ SINGLEO_LOGE("Invalid input type.");
+ throw invalid_argument("Input type not support.");
+ }
+
+ mv_source_h mv_src;
+
+ int ret = mv_create_source(&mv_src);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to create mv source.");
+
+ try {
+ ret = mv_source_fill_by_buffer(mv_src, data.ptr, data.width * data.height * data.byte_per_pixel, data.width,
+ data.height, MEDIA_VISION_COLORSPACE_RGB888);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to convert to mv source.");
+
+ ret = mv_gaze_estimation_inference(_handle, mv_src);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to invoke gaze estimation.");
+ } catch (std::runtime_error &e) {
+ SINGLEO_LOGE("%s", e.what());
+ }
+
+ ret = mv_destroy_source(mv_src);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to destroy mv source.");
+}
+
+BaseResultType &MvGazeEstimation::result()
+{
+ unsigned long frame_number;
+ unsigned int result_cnt;
+
+ int ret = mv_gaze_estimation_get_result_count(_handle, &frame_number, &result_cnt);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to get gaze estimaton result count.");
+
+ _output_data._frame_number = frame_number;
+ _output_data._is_empty = result_cnt == 0;
+
+ if (!_output_data._is_empty) {
+ double pitch, yaw;
+
+ ret = mv_gaze_estimation_get_direction(_handle, &pitch, &yaw);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to get gaze direction.");
+
+ _output_data._pitch = pitch;
+ _output_data._yaw = yaw;
+ }
+
+ return _output_data;
+}
+
+}
+}
+}
#include "MvImageClassification.h"
#include "MvHandDetection.h"
#include "MvHandLandmark.h"
+#include "MvGazeEstimation.h"
#include "SingleoLog.h"
#include "SingleoException.h"
return make_unique<MvHandLandmark>();
}
+std::unique_ptr<IInferenceTaskInterface> MvInferenceTaskFactory::createGazeEstimation()
+{
+ return make_unique<MvGazeEstimation>();
+}
}
}
virtual std::unique_ptr<IInferenceTaskInterface> createFaceRecognition() = 0;
virtual std::unique_ptr<IInferenceTaskInterface> createHandDetection() = 0;
virtual std::unique_ptr<IInferenceTaskInterface> createHandLandmark() = 0;
+ virtual std::unique_ptr<IInferenceTaskInterface> createGazeEstimation() = 0;
};
}
class FaceShapeModelManager
{
private:
- std::vector<cv::Point3f> _faceShape;
+ std::vector<cv::Point3d> _faceShape;
- void loadModelFromFile(const std::string &model_path);
template<typename T> T ToNumber(const std::string &text);
std::map<FaceComponents, std::vector<int> > _faceShapeIndices;
+ void creatFaceShapeIndex();
public:
+ FaceShapeModelManager();
FaceShapeModelManager(const std::string &model_path);
~FaceShapeModelManager();
- const std::vector<cv::Point3f> &getFaceShape();
+ const std::vector<cv::Point3d> &getFaceShape();
const std::vector<int> &getFaceShapeIndices(FaceComponents component);
+ void loadModelFromFile(const std::string &model_path);
};
} // focusfinder
} // services
std::mutex _input_mutex;
FocusFinderResult _result {};
- std::map<std::string, FocusFinderResultType> _result_keys = { { "X", FocusFinderResultType::X },
- { "Y", FocusFinderResultType::Y } };
+ std::map<std::string, FocusFinderResultType> _result_keys = { { "CENTER_X", FocusFinderResultType::CX },
+ { "CENTER_Y", FocusFinderResultType::CY },
+ { "GAZE_X", FocusFinderResultType::GX },
+ { "GAZE_Y", FocusFinderResultType::GY } };
bool _async_mode { false };
std::unique_ptr<AsyncManager<ImageDataType, FocusFinderResult> > _async_manager;
void *_user_data {};
bool isKeyValid(std::string key);
- void updateResult(const std::vector<Point> &axes);
+ void updateResult(const GazeResultType& gaze);
public:
FocusFinder();
NOSE_BASE,
LEFT_EYE_CORNERS,
RIGHT_EYE_CORNERS,
+ MOUTH_CORNERS
};
}
}
{
namespace focusfinder
{
-enum class FocusFinderResultType { X, Y };
+enum class FocusFinderResultType { CX, CY, GX, GY };
struct FocusFinderResult : public ServiceBaseResultType {
FocusFinderResult() : ServiceBaseResultType(ServiceResultType::FOCUS_FINDER)
{}
unsigned int frame_number {};
bool is_focused { false };
+ Point center;
Point focus;
- std::vector<Point> head_axes;
+ double pitch;
+ double yaw;
};
}
}
#include "FocusFinderDataTypes.h"
#include "IFaceTaskManager.h"
#include "HeadPoseEstimator.h"
+#include "IInferenceTaskInterface.h"
namespace singleo
{
class GazeEstimator
{
private:
+ GazeResultType _result;
std::shared_ptr<IFaceTaskManager> _face_tasker;
std::unique_ptr<HeadPoseEstimator> _head_pose_estimator;
std::vector<Point> _head_axes;
+ std::unique_ptr<inference::IInferenceTaskInterface> _gaze_estimator;
+
+ void drawHeadPose(FaceResult& result, ImageDataType& image, cv::Point3f& rotvec, cv::Point3f& tranvec);
public:
GazeEstimator();
~GazeEstimator();
- const std::vector<Point> run(BaseDataType &input);
+ const GazeResultType& run(BaseDataType &input);
};
} // focusfinder
} // services
#include <opencv2/imgproc.hpp>
#include <opencv2/calib3d.hpp>
#include "SingleoCommonTypes.h"
+#include "FaceShapeModelManager.h"
namespace singleo
{
class HeadPoseEstimator
{
private:
- std::vector<cv::Point2f> _landmarks_2d;
- std::vector<cv::Point3f> _landmarks_3d;
+ std::vector<cv::Point2d> _landmarks_2d;
+ std::vector<cv::Point3d> _landmarks_3d;
+
std::vector<int> _faceComponentIndices;
cv::Mat _camera_matrix;
+ cv::Mat _camera_matrix_inv;
+ cv::Mat _normalized_camera_matrix;
cv::Mat _camera_dist_coeff;
cv::Mat _rotation_vector;
cv::Mat _translation_vector;
cv::Mat _rotation_matrix;
+ cv::Mat _translation_matrix;
+ cv::Mat _normalized_rotation_matrix;
+ cv::Mat _landmarks_3d_matrix;
+ cv::Mat _normalized_landmarks_3d_matrix;
+ cv::Mat _projection_matrix;
std::vector<cv::Point3f> _pose_axes_3d;
std::vector<Point> _pose_axes;
+ cv::Point3d _center_point;
+ cv::Point3d _gaze_vector;
bool _isInitialized;
+ FaceShapeModelManager _faceShapeModelmgr;
+
bool validRotationMatrix(cv::Mat &matrix);
cv::Vec3f rotationMatrixToEulerAngles(cv::Mat &matrix);
double calculateReProjectectionError();
void print3x3Matrix(cv::Mat &matrix, std::string name);
void print3x1Matrix(cv::Mat &matrix, std::string name);
void printVec3f(cv::Vec3f &vec, std::string name);
+ void rtvecTortMatrix(cv::Mat &rvec, cv::Mat &tvec);
+ void normalizedRotationMatrix();
+ void calculateCenter();
+ void calculateProjectionMatrix();
public:
HeadPoseEstimator(const std::vector<FaceComponents> &components);
~HeadPoseEstimator();
- std::tuple<cv::Point3f, cv::Point3f> estimate(const FaceResult &face);
+ cv::Mat& estimate(const FaceResult &face);
std::vector<Point> &getPoseAxes();
std::vector<int> &getFaceComponentsIndieces();
+ GazeResultType denormalizeGazeAngle(GazeResultType& normalized_angle);
};
} // smartpointer
} // services
rows: 3
cols: 3
dt: f
- data: [324.0, 0.0, 324.0,
- 0.0, 324.0, 216.0,
+ data: [1280.0, 0.0, 640.0,
+ 0.0, 1280.0, 959.0,
0.0, 0.0, 1.0]
dist_coeff: !!opencv-matrix
rows: 5
{
namespace focusfinder
{
+FaceShapeModelManager::FaceShapeModelManager()
+{}
+
FaceShapeModelManager::FaceShapeModelManager(const string &model_path)
{
if (model_path.empty())
throw invalid_argument("Invalid face shape model path");
loadModelFromFile(model_path);
- _faceShapeIndices.insert(
- make_pair<FaceComponents, vector<int> >(FaceComponents::NOSE_RIDGE, vector<int> { 27, 28, 29, 30 }));
- _faceShapeIndices.insert(
- make_pair<FaceComponents, vector<int> >(FaceComponents::NOSE_BASE, vector<int> { 31, 32, 33, 34, 35 }));
- _faceShapeIndices.insert(
- make_pair<FaceComponents, vector<int> >(FaceComponents::LEFT_EYE_CORNERS, vector<int> { 36, 39 }));
- _faceShapeIndices.insert(
- make_pair<FaceComponents, vector<int> >(FaceComponents::RIGHT_EYE_CORNERS, vector<int> { 42, 45 }));
}
FaceShapeModelManager::~FaceShapeModelManager()
return number;
}
+void FaceShapeModelManager::creatFaceShapeIndex()
+{
+ _faceShapeIndices.insert(
+ make_pair<FaceComponents, vector<int> >(FaceComponents::NOSE_RIDGE, vector<int> { 27, 28, 29, 30 }));
+ _faceShapeIndices.insert(
+ make_pair<FaceComponents, vector<int> >(FaceComponents::NOSE_BASE, vector<int> { 31, 32, 33, 34, 35 }));
+ _faceShapeIndices.insert(
+ make_pair<FaceComponents, vector<int> >(FaceComponents::RIGHT_EYE_CORNERS, vector<int> { 36, 39 }));
+ _faceShapeIndices.insert(
+ make_pair<FaceComponents, vector<int> >(FaceComponents::LEFT_EYE_CORNERS, vector<int> { 42, 45 }));
+ _faceShapeIndices.insert(
+ make_pair<FaceComponents, vector<int> >(FaceComponents::MOUTH_CORNERS, vector<int> { 48, 54 }));
+}
+
void FaceShapeModelManager::loadModelFromFile(const string &model_path)
{
fstream file(model_path, ios::in);
_faceShape.resize(ToNumber<int>(line));
for (auto &point : _faceShape) {
getline(file, line);
- point.x = -1.0f * ToNumber<float>(line);
+ point.x = ToNumber<double>(line) / 1000.0;
}
for (auto &point : _faceShape) {
getline(file, line);
- point.y = -1.0f * ToNumber<float>(line);
+ point.y = ToNumber<double>(line) / 1000.0;
}
for (auto &point : _faceShape) {
getline(file, line);
- point.z = ToNumber<float>(line);
+ point.z = ToNumber<double>(line) / 1000.0;
}
SINGLEO_LOGI("%zd landarks", _faceShape.size());
+ creatFaceShapeIndex();
}
-const vector<Point3f> &FaceShapeModelManager::getFaceShape()
+const vector<Point3d> &FaceShapeModelManager::getFaceShape()
{
return _faceShape;
}
});
}
-void FocusFinder::updateResult(const vector<Point> &axes)
+void FocusFinder::updateResult(const GazeResultType& gaze)
{
FocusFinderResult result;
- // TODO: Implement focus point calculation based on head axes.
- result.focus = Point { .x = 0, .y = 0 };
+ // TODO: Implement focus point calculation based on gaze
- result.head_axes = axes;
- if (!result.head_axes.empty()) {
- result.is_focused = true;
- }
+ if (gaze._points.empty())
+ return;
+
+ result.is_focused = true;
+ result.center = gaze._points[0];
+ result.focus = gaze._points[1];
+
+ result.pitch = gaze._pitch;
+ result.yaw = gaze._yaw;
if (_async_mode)
_async_manager->pushOutput(result);
}
void FocusFinder::getResultInt(unsigned int idx, std::string key, unsigned int *value)
-{}
+{
+ transform(key.begin(), key.end(), key.begin(), ::toupper);
+
+ SINGLEO_LOGD("given key = %s", key.c_str());
+
+ if (!isKeyValid(key))
+ throw InvalidParameter("A given result key is inavlid.");
+
+ switch (_result_keys[key]) {
+ case FocusFinderResultType::CX:
+ *value = static_cast<unsigned int>(_result.center.x);
+ break;
+ case FocusFinderResultType::CY:
+ *value = static_cast<unsigned int>(_result.center.y);
+ break;
+ case FocusFinderResultType::GX:
+ *value = static_cast<unsigned int>(_result.focus.x);
+ break;
+ case FocusFinderResultType::GY:
+ *value = static_cast<unsigned int>(_result.focus.y);
+ break;
+ }
+}
void FocusFinder::registerUserCallback(singleo_user_cb_t user_cb, void *user_data)
{
#include "SingleoException.h"
#include "SingleoLog.h"
#include "FocusMapper.h"
+#include <iostream>
using namespace std;
using namespace cv;
_cv_image = cv::Mat(cv::Size(image_data.width, image_data.height), CV_MAKETYPE(CV_8U, 3), image_data.ptr).clone();
- line(_cv_image, Point2i(_result.head_axes[0].x, _result.head_axes[0].y),
- Point2i(_result.head_axes[1].x, _result.head_axes[1].y), Scalar(0, 0, 255), 2); // x - red
- line(_cv_image, Point2i(_result.head_axes[0].x, _result.head_axes[0].y),
- Point2i(_result.head_axes[2].x, _result.head_axes[2].y), Scalar(0, 255, 0), 2); // y - green
- line(_cv_image, Point2i(_result.head_axes[0].x, _result.head_axes[0].y),
- Point2i(_result.head_axes[3].x, _result.head_axes[3].y), Scalar(255, 0, 0), 2); // z - blue
+ line(_cv_image, Point2i(_result.center.x, _result.center.y),
+ Point2i(_result.focus.x, _result.focus.y), Scalar(0, 0, 255), 2); // projected gaze vector
_image_data.width = _cv_image.cols;
_image_data.height = _cv_image.rows;
#include "FaceDetector.h"
#include "FaceLandmarker.h"
#include "SingleoLog.h"
+#include "InferenceTaskFactory.h"
using namespace std;
using namespace cv;
const vector<FaceComponents> _faceComponents { FaceComponents::NOSE_RIDGE, FaceComponents::NOSE_BASE,
FaceComponents::LEFT_EYE_CORNERS,
- FaceComponents::RIGHT_EYE_CORNERS };
+ FaceComponents::RIGHT_EYE_CORNERS,
+ FaceComponents::MOUTH_CORNERS };
_head_pose_estimator = make_unique<HeadPoseEstimator>(_faceComponents);
+ _gaze_estimator =
+ inference::InferenceTaskFactory::instance().create("MvInferenceTaskFactory")->createGazeEstimation();
+ _gaze_estimator->configure();
+ _gaze_estimator->prepare();
}
GazeEstimator::~GazeEstimator()
{}
-const vector<Point> GazeEstimator::run(BaseDataType &input)
+const GazeResultType& GazeEstimator::run(BaseDataType &input)
{
FaceResult result;
_face_tasker->handle(input, result);
if (!result._landmarks.empty()) {
SINGLEO_LOGI("landmarks %zu-%zu", result._landmarks.size(), result._landmarks[0].size());
- auto head_pose = _head_pose_estimator->estimate(result);
- SINGLEO_LOGI("head pose: \n\t rotation(x,y,z): %.3f, %.3f, %.3f\n\t translation(x,y,z): %.3f, %.3f, %.3f\n",
- get<0>(head_pose).x, get<0>(head_pose).y, get<0>(head_pose).z, get<1>(head_pose).x,
- get<1>(head_pose).y, get<1>(head_pose).z);
+ auto projection_matrix = _head_pose_estimator->estimate(result);
- _head_axes = _head_pose_estimator->getPoseAxes();
+ auto image = dynamic_cast<ImageDataType&>(input);
+ Mat cv_rgb_image = cv::Mat(Size(image.width, image.height), CV_MAKE_TYPE(CV_8U, image.byte_per_pixel), image.ptr);
+ Mat cv_bgr_image;
+ cvtColor(cv_rgb_image, cv_bgr_image, COLOR_RGB2BGR);
+ Mat cv_warped_image;
+ warpPerspective(cv_bgr_image, cv_warped_image, projection_matrix, Size(224, 224));
+
+ ImageDataType normalized_image;
+ normalized_image.width = cv_warped_image.cols;
+ normalized_image.height = cv_warped_image.rows;
+ normalized_image.byte_per_pixel = cv_warped_image.channels();
+ normalized_image.ptr = cv_warped_image.data;
+
+ _gaze_estimator->invoke(normalized_image);
+ auto normalized_gaze_angle = dynamic_cast<GazeResultType &>(_gaze_estimator->result());
+
+ _result = _head_pose_estimator->denormalizeGazeAngle(normalized_gaze_angle);
}
- return _head_axes;
+ return _result;
}
} // focusfinder
*/
#include <tuple>
-#include "FaceShapeModelManager.h"
+#include <cmath>
#include "CameraParamManager.h"
#include "HeadPoseEstimator.h"
#include "SingleoLog.h"
{
HeadPoseEstimator::HeadPoseEstimator(const vector<FaceComponents> &components)
{
- FaceShapeModelManager faceShapeModelmgr("/usr/share/singleo/pdm.txt");
- const vector<Point3f> faceShape = faceShapeModelmgr.getFaceShape();
+ _faceShapeModelmgr.loadModelFromFile("/usr/share/singleo/pdm.txt");
+ const vector<Point3d> faceShape = _faceShapeModelmgr.getFaceShape();
for (auto &component : components) {
- for (auto &index : faceShapeModelmgr.getFaceShapeIndices(component)) {
- _landmarks_3d.push_back(Point3f(faceShape[index].x, faceShape[index].y, faceShape[index].z));
+ for (auto &index : _faceShapeModelmgr.getFaceShapeIndices(component)) {
+ _landmarks_3d.push_back(Point3d(faceShape[index].x, faceShape[index].y, faceShape[index].z));
_faceComponentIndices.push_back(index);
}
}
+ Mat landmarks_3d_matrix(Size(3,_landmarks_3d.size()), CV_64F);
+ for (size_t i = 0; i < _landmarks_3d.size(); i++) {
+ landmarks_3d_matrix.at<double>(i,0) = _landmarks_3d[i].x;
+ landmarks_3d_matrix.at<double>(i,1) = _landmarks_3d[i].y;
+ landmarks_3d_matrix.at<double>(i,2) = _landmarks_3d[i].z;
+ }
+ _landmarks_3d_matrix = landmarks_3d_matrix.clone();
+
// Nose base landmarks are 31, 32, 33, 34, 35 and 33 is the nose tip
- int noseTipIndex = faceShapeModelmgr.getFaceShapeIndices(FaceComponents::NOSE_BASE)[2];
+ int noseTipIndex = _faceShapeModelmgr.getFaceShapeIndices(FaceComponents::NOSE_BASE)[2];
_pose_axes_3d.push_back(Point3f(faceShape[noseTipIndex].x, faceShape[noseTipIndex].y, faceShape[noseTipIndex].z));
_pose_axes_3d.push_back(
Point3f(faceShape[noseTipIndex].x - 50, faceShape[noseTipIndex].y, faceShape[noseTipIndex].z));
_pose_axes.resize(_pose_axes_3d.size());
_landmarks_2d.resize(_landmarks_3d.size());
- _rotation_vector = Mat(Size(3, 1), CV_32FC1);
- _translation_vector = Mat(Size(3, 1), CV_32FC1);
-
- CameraParamManager cam_dummy("/usr/share/singleo/camera_param_rs2_640.yaml");
-
- _camera_matrix = Mat::zeros(Size(3, 3), CV_32FC1);
- tuple<float, float> focalLength = cam_dummy.getFocalLengthXY();
- tuple<float, float> principlePoint = cam_dummy.getPrinciplePointXY();
- _camera_matrix.at<float>(0, 0) = get<0>(focalLength);
- _camera_matrix.at<float>(1, 1) = get<1>(focalLength);
- _camera_matrix.at<float>(0, 2) = get<0>(principlePoint);
- _camera_matrix.at<float>(1, 2) = get<1>(principlePoint);
- _camera_matrix.at<float>(2, 2) = 1.0;
-
- _camera_dist_coeff = Mat::zeros(Size(1, 5), CV_32FC1);
- tuple<float, float, float, float, float> distCoeff = cam_dummy.getDistortionCoeff();
- _camera_dist_coeff.at<float>(0, 0) = get<0>(distCoeff);
- _camera_dist_coeff.at<float>(0, 1) = get<1>(distCoeff);
- _camera_dist_coeff.at<float>(0, 2) = get<2>(distCoeff);
- _camera_dist_coeff.at<float>(0, 3) = get<3>(distCoeff);
- _camera_dist_coeff.at<float>(0, 4) = get<4>(distCoeff);
-
- for (int r = 0; r < _camera_matrix.rows; ++r) {
- for (int c = 0; c < _camera_matrix.cols; ++c) {
- SINGLEO_LOGI("%.3f", _camera_matrix.at<float>(r, c));
- }
- }
- for (int r = 0; r < _camera_dist_coeff.rows; ++r) {
- for (int c = 0; c < _camera_dist_coeff.cols; ++c) {
- SINGLEO_LOGI("%.3f", _camera_dist_coeff.at<float>(r, c));
- }
- }
+ _rotation_vector = Mat(Size(1, 3), CV_64F);
+ _translation_vector = Mat(Size(1, 3), CV_64F);
+ _translation_matrix = Mat(Size(_landmarks_3d.size(), 3), CV_64F);
+
+ // TODO: When using image, cam_dummy should be updated based on the image size.
+ // CameraParamManager cam_dummy("/usr/share/singleo/camera_param_rs2_640.yaml");
+ CameraParamManager cam_dummy("/usr/share/singleo/camera_param_dummy.yaml");
+
+ _camera_matrix = Mat::zeros(Size(3, 3), CV_64FC1);
+ tuple<double, double> focalLength = cam_dummy.getFocalLengthXY();
+ tuple<double, double> principlePoint = cam_dummy.getPrinciplePointXY();
+ _camera_matrix.at<double>(0, 0) = get<0>(focalLength);
+ _camera_matrix.at<double>(1, 1) = get<1>(focalLength);
+ _camera_matrix.at<double>(0, 2) = get<0>(principlePoint);
+ _camera_matrix.at<double>(1, 2) = get<1>(principlePoint);
+ _camera_matrix.at<double>(2, 2) = 1.0;
+ _camera_matrix_inv = _camera_matrix.inv();
+
+ _camera_dist_coeff = Mat::zeros(Size(1, 5), CV_64FC1);
+ tuple<double, double, double, double, double> distCoeff = cam_dummy.getDistortionCoeff();
+ _camera_dist_coeff.at<double>(0, 0) = get<0>(distCoeff);
+ _camera_dist_coeff.at<double>(0, 1) = get<1>(distCoeff);
+ _camera_dist_coeff.at<double>(0, 2) = get<2>(distCoeff);
+ _camera_dist_coeff.at<double>(0, 3) = get<3>(distCoeff);
+ _camera_dist_coeff.at<double>(0, 4) = get<4>(distCoeff);
+
+ // TODO: This value is only valid when MPIIFACEGAZE. It may be changed when others are used.
+ _normalized_camera_matrix = Mat::zeros(Size(3,3), CV_64F);
+ _normalized_camera_matrix.at<double>(0,0) = 1600.0;
+ _normalized_camera_matrix.at<double>(0,2) = 112.0;
+ _normalized_camera_matrix.at<double>(1,1) = 1600.0;
+ _normalized_camera_matrix.at<double>(1,2) = 112.0;
+ _normalized_camera_matrix.at<double>(2,2) = 1.0;
}
HeadPoseEstimator::~HeadPoseEstimator()
SINGLEO_LOGD("%.3f, %.3f, %.3f", vec[0], vec[1], vec[2]);
}
-tuple<Point3f, Point3f> HeadPoseEstimator::estimate(const FaceResult &face)
+void HeadPoseEstimator::rtvecTortMatrix(cv::Mat &rvec, cv::Mat &tvec)
+{
+ // _rotation_vector(w x h): 1x3, _rotation_matrix is 3x3
+ Rodrigues(_rotation_vector, _rotation_matrix);
+
+ // _translation vector(w x h): 1 x 3, _translation matrix is 15 x3
+ for (int i = 0; i < _translation_matrix.cols; ++i ) {
+ _translation_matrix.at<double>(0, i) = _translation_vector.at<double>(0, 0);
+ _translation_matrix.at<double>(1, i) = _translation_vector.at<double>(1, 0);
+ _translation_matrix.at<double>(2, i) = _translation_vector.at<double>(2, 0);
+ }
+}
+
+void HeadPoseEstimator::calculateCenter()
+{
+ int nCenter = 0;
+ vector<FaceComponents> components = { FaceComponents::RIGHT_EYE_CORNERS, FaceComponents::LEFT_EYE_CORNERS, FaceComponents::MOUTH_CORNERS };
+ _center_point.x = _center_point.y = _center_point.z = 0.0;
+
+ for (const auto& component : components) {
+ for (auto &index : _faceShapeModelmgr.getFaceShapeIndices(component)) {
+ int mappedIndex = find(_faceComponentIndices.begin(), _faceComponentIndices.end(), index) - _faceComponentIndices.begin();
+ _center_point.x += _normalized_landmarks_3d_matrix.at<double>(mappedIndex, 0);
+ _center_point.y += _normalized_landmarks_3d_matrix.at<double>(mappedIndex, 1);
+ _center_point.z += _normalized_landmarks_3d_matrix.at<double>(mappedIndex, 2);
+ nCenter++;
+ }
+ }
+
+ _center_point.x /= static_cast<double>(nCenter);
+ _center_point.y /= static_cast<double>(nCenter);
+ _center_point.z /= static_cast<double>(nCenter);
+}
+
+void HeadPoseEstimator::normalizedRotationMatrix()
+{
+ // calculate center point of normalized 3d landmarks
+ calculateCenter();
+
+ // calculate _normalized_rotation_matrix
+ Mat z_axis = Mat(_center_point/norm(Mat(_center_point), NORM_L2));
+ Mat rot_x_axis = _rotation_matrix.col(0);
+ Mat y_tmp_axis = z_axis.cross(rot_x_axis);
+ Mat y_axis = y_tmp_axis/norm(y_tmp_axis, NORM_L2);
+ Mat x_tmp_axis = y_axis.cross(z_axis);
+ Mat x_axis = x_tmp_axis/norm(x_tmp_axis, NORM_L2);
+
+ // (w x h): 1x3 -> 3x1
+ vector<Mat> array_axis = { x_axis.t(), y_axis.t(), z_axis.t() };
+ vconcat(array_axis, _normalized_rotation_matrix);
+}
+
+void HeadPoseEstimator::calculateProjectionMatrix()
+{
+ Mat scale_matrix = Mat::eye(Size(3,3), CV_64F);
+ scale_matrix.at<double>(2,2) = 1.0 / norm(Mat(_center_point), NORM_L2);
+
+ Mat conversion_matrix;
+ // conversion_matrix = scale_matrix @ normalized_rotatio_matrix
+ gemm(scale_matrix, _normalized_rotation_matrix, 1.0, noArray(), 0.0, conversion_matrix, 0);
+
+ // _projection_matrix = conversion_matrix @ _camera_matrix_inv
+ gemm(conversion_matrix, _camera_matrix_inv, 1.0, noArray(), 0.0, _projection_matrix, 0);
+ // _projection_matrix = _normalized_camera_matrix @ _projection_matrix
+ gemm(_normalized_camera_matrix, _projection_matrix, 1.0, noArray(), 0.0, _projection_matrix, 0);
+
+}
+
+Mat& HeadPoseEstimator::estimate(const FaceResult &face)
{
// TODO: parallel??, c++17 structured binding??
// TODO: use multiple faces
size_t idx = 0;
for (auto &compIdx : _faceComponentIndices) {
- _landmarks_2d[idx].x = face._landmarks[0][compIdx].x + face._rects[0].left;
- _landmarks_2d[idx].y = face._landmarks[0][compIdx].y + face._rects[0].top;
+ _landmarks_2d[idx].x = static_cast<double>(face._landmarks[0][compIdx].x + face._rects[0].left);
+ _landmarks_2d[idx].y = static_cast<double>(face._landmarks[0][compIdx].y + face._rects[0].top);
idx++;
}
+
solvePnPRansac(_landmarks_3d, _landmarks_2d, _camera_matrix, _camera_dist_coeff, _rotation_vector,
- _translation_vector, false, 100, 3.0F, 0.99, noArray(), SOLVEPNP_EPNP);
+ _translation_vector, false, 100, 3.0f, 0.99, noArray(), SOLVEPNP_EPNP);
// estimate pose
// https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#ga357634492a94efe8858d0ce1509da869
solvePnP(_landmarks_3d, _landmarks_2d, _camera_matrix, _camera_dist_coeff, _rotation_vector, _translation_vector,
true, SOLVEPNP_ITERATIVE);
- Rodrigues(_rotation_vector, _rotation_matrix);
- auto rotation_eulerAngles = rotationMatrixToEulerAngles(_rotation_matrix);
+ // convert rotation and translation vector to matrixes
+ rtvecTortMatrix(_rotation_vector, _translation_vector);
+
+ // normalized 3d landmark with rotation and translation matrix
+ gemm(_landmarks_3d_matrix, _rotation_matrix, 1.0, _translation_matrix, 1.0, _normalized_landmarks_3d_matrix, GEMM_3_T);
+
+ normalizedRotationMatrix();
- return make_tuple(Point3f { rotation_eulerAngles[0], rotation_eulerAngles[1], rotation_eulerAngles[2] },
- Point3f { static_cast<float>(_translation_vector.at<double>(0)),
- static_cast<float>(_translation_vector.at<double>(1)),
- static_cast<float>(_translation_vector.at<double>(2)) });
+ calculateProjectionMatrix();
+
+ return _projection_matrix;
}
vector<Point> &HeadPoseEstimator::getPoseAxes()
return Vec3f(x, y, z);
}
+GazeResultType HeadPoseEstimator::denormalizeGazeAngle(GazeResultType& normalized_angle)
+{
+ Mat gaze_vector(Size(3,1), CV_64F);
+ gaze_vector.at<double>(0,0) = -1.0 * cos(normalized_angle._pitch) * sin(normalized_angle._yaw);
+ gaze_vector.at<double>(0,1) = -1.0 * sin(normalized_angle._pitch);
+ gaze_vector.at<double>(0,2) = -1.0 * cos(normalized_angle._pitch) * cos(normalized_angle._yaw);
+
+ Mat denormalized_gaze_vector;
+ gemm(gaze_vector, _normalized_rotation_matrix, 1.0, noArray(), 1.0, denormalized_gaze_vector, 0);
+
+ _gaze_vector.x = denormalized_gaze_vector.at<double>(0,0);
+ _gaze_vector.y = denormalized_gaze_vector.at<double>(0,1);
+ _gaze_vector.z = denormalized_gaze_vector.at<double>(0,2);
+
+ GazeResultType gaze_angle;
+ gaze_angle._pitch = static_cast<float>(asin(-_gaze_vector.y)) * 180.f / M_PI;
+ gaze_angle._yaw = static_cast<float>(atan2(-_gaze_vector.x, -_gaze_vector.z)) * 180.f / M_PI;
+
+ vector<Point3d> points3d = { _center_point, _center_point + 0.05 * _gaze_vector };
+ vector<Point2d> points2d;
+
+ projectPoints(points3d, Mat::zeros(Size(1,3), CV_64F), Mat::zeros(Size(1,3), CV_64F), _camera_matrix, _camera_dist_coeff, points2d);
+
+ Point pt0 = { .x = static_cast<int>(points2d[0].x), .y = static_cast<int>(points2d[0].y) };
+ gaze_angle._points.push_back(pt0);
+
+ Point pt1 = { .x = static_cast<int>(points2d[1].x), .y = static_cast<int>(points2d[1].y) };
+ gaze_angle._points.push_back(pt1);
+
+ return gaze_angle;
+}
+
} // focusfinder
} // services
} // singleo
#include "singleo_native_capi.h"
#include "singleo_error.h"
-#define IMG_FACE TEST_RES_PATH "/usr/share/capi-media-vision/res/inference/images/faceDetection.jpg"
+#define IMG_FACE TEST_RES_PATH "/usr/share/capi-media-vision/res/inference/images/headpose_sample00.jpg"
using namespace testing;
using namespace std;
ret = singleo_service_perform(handle);
ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+ unsigned int cnt = 0;
+ ret = singleo_service_get_result_cnt(handle, &cnt);
+ ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+
+ for (unsigned int idx = 0; idx < cnt; ++idx) {
+ unsigned int cx, cy, gx, gy;
+
+ ret = singleo_service_get_result_int(handle, idx, "center_x", &cx);
+ ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+ ret = singleo_service_get_result_int(handle, idx, "center_y", &cy);
+ ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+ ret = singleo_service_get_result_int(handle, idx, "gaze_x", &gx);
+ ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+ ret = singleo_service_get_result_int(handle, idx, "gaze_y", &gy);
+ ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
+
+ cout << "Result: from (" << cx << " , " << cy << ") to (" << gx << " , " << gy << ")" << endl;
+ }
+
ret = singleo_service_destroy(handle);
ASSERT_EQ(ret, SINGLEO_ERROR_NONE);
}
\ No newline at end of file