--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KALMAN_2D_H__
+#define __KALMAN_2D_H__
+
+#include <opencv2/video/tracking.hpp>
+
+namespace singleo
+{
+namespace inference
+{
+class Kalman2D {
+private:
+ cv::KalmanFilter _filter;
+ bool _isInit;
+
+public:
+ Kalman2D();
+ ~Kalman2D();
+
+ void init(float x, float y);
+ cv::Point predict();
+ void correct(float x, float y);
+ cv::Point get();
+ bool isInit();
+};
+}
+}
+#endif
\ No newline at end of file
#define __PRIVATE_INFERENCE_FACE_SERVICE_H__
#include <memory>
+#include <vector>
#include <opencv2/core.hpp>
#include "IInferenceFaceService.h"
#include "mv_facial_landmark_internal.h"
+#include "Kalman2d.h"
namespace singleo
{
bool isDetectTask;
bool isLandmarkDetectTask;
+ bool _isSkipDetectTask;
+ bool _isSkipLandmarkDetectTask;
float _xDownSizeRatio;
float _yDownSizeRatio;
float _xInvRatio;
float _yInvRatio;
+ std::vector<int> _landmarkMappingIndex;
+
+ std::vector<std::unique_ptr<Kalman2D>> _filterFaceRegion;
+ std::vector<std::unique_ptr<Kalman2D>> _filterLandmarks;
+
void getLandmarks(cv::Mat &data, Points &landmarks, int roi_left = 0, int roi_right = 0);
public:
PrivateInferenceFaceService(std::vector<inference::TaskType> &tasks);
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Kalman2d.h"
+
+using namespace cv;
+
+namespace singleo
+{
+namespace inference
+{
+Kalman2D::Kalman2D()
+{
+ _filter = KalmanFilter(4, 2, 0);
+
+ _filter.statePre = (Mat_<float>(4,1) << 0, 0, 0, 0);
+ _filter.statePost = (Mat_<float>(4,1) << 0, 0, 0, 0);
+
+ _filter.transitionMatrix = (Mat_<float>(4,4) << 1, 0, 0.5f, 0, 0, 1, 0, 0.5f, 0, 0, 0.02f, 0, 0, 0, 0, 0.02f);
+ _filter.processNoiseCov = (cv::Mat_<float>(4, 4) << 0.3f, 0, 0, 0, 0, 0.3f, 0, 0, 0, 0, 0.3f, 0, 0, 0, 0, 0.3f);
+ _filter.measurementMatrix = (cv::Mat_<float>(2, 4) << 1, 0, 0, 0, 0, 1, 0, 0);
+ _filter.measurementNoiseCov = cv::Mat::eye(2, 2, CV_32F);
+ _isInit = false;
+}
+
+Kalman2D::~Kalman2D()
+{
+
+}
+
+void Kalman2D::init(float x, float y)
+{
+ // initialize the state
+ _filter.statePre.at<float>(0) = x;
+ _filter.statePre.at<float>(1) = y;
+ _filter.statePre.at<float>(2) = 0;
+ _filter.statePre.at<float>(3) = 0;
+
+ _filter.statePost.at<float>(0) = x;
+ _filter.statePost.at<float>(1) = y;
+ _filter.statePost.at<float>(2) = 0;
+ _filter.statePost.at<float>(3) = 0;
+ _isInit = true;
+}
+
+Point Kalman2D::predict()
+{
+ Mat predictedPoint = Mat_<float>(2,1);
+ predictedPoint = _filter.predict();
+ return Point(predictedPoint.at<float>(0), predictedPoint.at<float>(1));
+}
+
+void Kalman2D::correct(float x, float y)
+{
+ Mat measuredPoint = (Mat_<float>(2,1) << x, y);
+ _filter.correct(measuredPoint);
+}
+
+Point Kalman2D::get()
+{
+ return Point(_filter.statePost.at<float>(0), _filter.statePost.at<float>(1));
+}
+
+bool Kalman2D::isInit()
+{
+ return _isInit;
+}
+}
+}
\ No newline at end of file
{
namespace inference
{
+
+enum BOX_COORDINATE {
+ LEFT_TOP,
+ RIGHT_BOTTOM
+};
+
+enum LANDMARK_COMPONENTS {
+ LEFT_EYE,
+ RIGHT_EYE,
+ NOSE,
+ LEFT_MOUTH,
+ RIGHT_MOUTH
+};
+
PrivateInferenceFaceService::PrivateInferenceFaceService(std::vector<inference::TaskType> &tasks)
{
isDetectTask = isLandmarkDetectTask = false;
+ _isSkipDetectTask = _isSkipLandmarkDetectTask = false;
for (auto &task : tasks) {
switch (task) {
case TaskType::FACE_DETECTION:
_yDownSizeRatio = 0.5f;
_xInvRatio = 1.0f / _xDownSizeRatio;
_yInvRatio = 1.0f / _yDownSizeRatio;
+
+ _filterFaceRegion.resize(2);
+ for (auto &filt : _filterFaceRegion)
+ filt = make_unique<Kalman2D>();
}
break;
case TaskType::FACE_LANDMARK_DETECTION:
ret = mv_facial_landmark_prepare(_handle);
if (ret != MEDIA_VISION_ERROR_NONE)
throw runtime_error("Fail to prepare face landmark detection.");
+
+ _landmarkMappingIndex.resize(13);
+ // nose ridge
+ _landmarkMappingIndex[0] = 27;
+ _landmarkMappingIndex[1] = 28;
+ _landmarkMappingIndex[2] = 29;
+ _landmarkMappingIndex[3] = 30;
+
+ // nose base
+ _landmarkMappingIndex[4] = 31;
+ _landmarkMappingIndex[5] = 32;
+ _landmarkMappingIndex[6] = 33;
+ _landmarkMappingIndex[7] = 34;
+ _landmarkMappingIndex[8] = 35;
+
+ // left eye
+ _landmarkMappingIndex[9] = 36;
+ _landmarkMappingIndex[10] = 39;
+
+ // right eye
+ _landmarkMappingIndex[11] = 42;
+ _landmarkMappingIndex[12] = 45;
+
+ _filterLandmarks.resize(13);
+ for (auto &filt : _filterLandmarks)
+ filt = make_unique<Kalman2D>();
}
break;
default:
throw runtime_error("Fail to get face landmark detection result count.");
Point landmark;
- for (unsigned int idx = 0; idx < result_cnt; ++idx) {
-
- ret = mv_facial_landmark_get_position(_handle, idx, &landmark.x, &landmark.y);
- if (ret != MEDIA_VISION_ERROR_NONE)
- throw runtime_error("Fail to get face landmark detection bound box.");
+ for (auto &idx : _landmarkMappingIndex) {
+ ret = mv_facial_landmark_get_position(_handle, idx, &landmark.x, &landmark.y);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to get face landmark detection bound box.");
landmark.x *= _yInvRatio;
landmark.y *= _yInvRatio;
- landmark.x += roi_left;
- landmark.y += roi_right;
- landmarks.push_back(landmark);
- }
+ landmark.x += roi_left;
+ landmark.y += roi_right;
+
+ landmarks.push_back(landmark);
+ }
}
void PrivateInferenceFaceService::invoke(BaseDataType &input)
_result._rects.clear();
_result._landmarks.clear();
if (isDetectTask) {
+ if (_isSkipDetectTask && _isSkipLandmarkDetectTask) {
+ // predict
+ cv::Point boxLT = _filterFaceRegion[LEFT_TOP]->predict();
+ cv::Point boxRB = _filterFaceRegion[RIGHT_BOTTOM]->predict();
+ Rect rect = { .left = boxLT.x,
+ .right = boxRB.x,
+ .top = boxLT.y,
+ .bottom = boxRB.y};
+ _result._rects.push_back(rect);
+
+ Points landmarks;
+ for (auto &filt : _filterLandmarks) {
+ auto cvLandmark = filt->predict();
+ Point landmark = { .x = static_cast<unsigned int>(cvLandmark.x),
+ .y = static_cast<unsigned int>(cvLandmark.y), .z= 0 };
+ landmarks.push_back(landmark);
+ }
+ _result._landmarks.push_back(landmarks);
+ _isSkipDetectTask = _isSkipLandmarkDetectTask = false;
+ SINGLEO_LOGD("PrivateFaceService takes %d ms", timer.check_ms());
+ int id = 0;
+ for (auto &rect : _result._rects) {
+ cv::rectangle(cvData_, cv::Rect(rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top), cv::Scalar(0, 255, 0), 2);
+ auto &points = _result._landmarks[id];
+ for (auto &point : points)
+ cv::circle(cvData_, cv::Point(point.x, point.y), 2, cv::Scalar(255, 255, 0), 2);
+ id++;
+ }
+ cv::cvtColor(cvData_, cvData_, cv::COLOR_RGB2BGR);
+ cv::imwrite("dump_privateFace.jpg", cvData_);
+
+ return;
+ }
partial.reset();
- // _pFace_detection_result = facedetect_cnn(_pBuffer, data.ptr, data.width, data.height, (int)(data.width*data.byte_per_pixel));
_pFace_detection_result = facedetect_cnn(_pBuffer, cvData.data, cvData.cols, cvData.rows, (int)(cvData.cols * cvData.channels()));
SINGLEO_LOGD("PrivateFaceService (Detection) takes %d ms", partial.check_ms());
for(int i = 0; i < (_pFace_detection_result ? *_pFace_detection_result : 0); i++) {
short * p = ((short*)(_pFace_detection_result + 1)) + 16*i;
- // int confidence = p[0];
+ int confidence = p[0];
+
+ if (confidence < 90)
+ continue;
+
int x = static_cast<int>(p[1]);
int y = static_cast<int>(p[2]);
int w = static_cast<int>(p[3]);
partial.reset();
getLandmarks(roiCvData, landmarks, static_cast<int>(x * _xInvRatio), static_cast<int>(y * _yInvRatio));
SINGLEO_LOGD("PrivateFaceService (Landmark) takes %d ms", partial.check_ms());
+ if (!_filterLandmarks[0]->isInit() || !_filterLandmarks[1]->isInit() || !_filterLandmarks[2]->isInit() || !_filterLandmarks[3]->isInit() ||
+ !_filterLandmarks[4]->isInit() || !_filterLandmarks[5]->isInit() || !_filterLandmarks[6]->isInit() || !_filterLandmarks[7]->isInit() || !_filterLandmarks[8]->isInit() ||
+ !_filterLandmarks[9]->isInit() || !_filterLandmarks[10]->isInit() ||
+ !_filterLandmarks[11]->isInit() || !_filterLandmarks[12]->isInit()) {
+ for (int idx = 0; idx < _landmarkMappingIndex.size(); idx++) {
+ _filterLandmarks[idx]->init(static_cast<float>(landmarks[idx].x), static_cast<float>(landmarks[idx].y));
+ }
+
+ _isSkipLandmarkDetectTask = true;
+ } else {
+ for (int idx = 0; idx < _landmarkMappingIndex.size(); idx++) {
+ _filterLandmarks[idx]->predict();
+ _filterLandmarks[idx]->correct(static_cast<float>(landmarks[idx].x), static_cast<float>(landmarks[idx].y));
+ cv::Point cvPoint = _filterLandmarks[idx]->get();
+ landmarks[idx].x = cvPoint.x;
+ landmarks[idx].y = cvPoint.y;
+ }
+
+ _isSkipLandmarkDetectTask = true;
+ }
+
_result._landmarks.push_back(landmarks);
}
.top = static_cast<int>( y * _yInvRatio),
.bottom = static_cast<int>( (y + h) * _yInvRatio) };
+ if (!_filterFaceRegion[LEFT_TOP]->isInit() || !_filterFaceRegion[RIGHT_BOTTOM]->isInit()) {
+ _filterFaceRegion[LEFT_TOP]->init(rect.left, rect.top);
+ _filterFaceRegion[RIGHT_BOTTOM]->init(rect.right, rect.bottom);
+ _isSkipDetectTask = true;
+ } else {
+ _filterFaceRegion[LEFT_TOP]->predict();
+ _filterFaceRegion[LEFT_TOP]->correct(rect.left, rect.top);
+ cv::Point cvPoint = _filterFaceRegion[LEFT_TOP]->get();
+ rect.left = cvPoint.x;
+ rect.top = cvPoint.y;
+
+ _filterFaceRegion[RIGHT_BOTTOM]->predict();
+ _filterFaceRegion[RIGHT_BOTTOM]->correct(rect.right, rect.bottom);
+ cvPoint = _filterFaceRegion[RIGHT_BOTTOM]->get();
+ rect.right = cvPoint.x;
+ rect.bottom = cvPoint.y;
+
+ _isSkipDetectTask = true;
+ }
_result._rects.push_back(rect);
+ break;
}
} else if (isLandmarkDetectTask) {
Points landmarks;
std::vector<cv::Point3f> _pose_axes_3d;
std::vector<singleo::services::smartpointer::Point2f> _pose_axes;
+ bool _isInit;
+
public:
HeadPoseEstimator();
~HeadPoseEstimator();
return _headPose;
_headPose = _head_pose_estimator->estimate(result._landmarks[0]);
- // vector<Point2f> poseAxes = _head_pose_estimator->getPoseAxes();
+ vector<Point2f> poseAxes = _head_pose_estimator->getPoseAxes();
- // ImageDataType &data = dynamic_cast<ImageDataType &>(input);
- // cv::Mat _dump(cv::Size(data.width, data.height), CV_MAKETYPE(CV_8U, data.byte_per_pixel), data.ptr);
- // cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[1].x, poseAxes[1].y), cv::Scalar(0, 0, 255), 2); // x - red
- // cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[2].x, poseAxes[2].y), cv::Scalar(0, 255, 0), 2); // y - green
- // cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[3].x, poseAxes[3].y), cv::Scalar(255, 0, 0), 2); // z - blue
+ ImageDataType &data = dynamic_cast<ImageDataType &>(input);
+ cv::Mat _dump(cv::Size(data.width, data.height), CV_MAKETYPE(CV_8U, data.byte_per_pixel), data.ptr);
+ cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[1].x, poseAxes[1].y), cv::Scalar(0, 0, 255), 2); // x - red
+ cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[2].x, poseAxes[2].y), cv::Scalar(0, 255, 0), 2); // y - green
+ cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[3].x, poseAxes[3].y), cv::Scalar(255, 0, 0), 2); // z - blue
// cv::imwrite("dump_headpose.jpg", _dump);
FaceShapeModelManager faceShapeModelmgr("/usr/share/singleo/pdm.txt");
const vector<Point3f> faceShape = faceShapeModelmgr.getFaceShape();
- for (auto &point : faceShape)
- _landmarks_3d.push_back(cv::Point3f(point.x, point.y, point.z));
-
- // 33rd landmark is the nose's tip
- _pose_axes_3d.push_back(_landmarks_3d[33]);
- _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x - 50, _landmarks_3d[33].y, _landmarks_3d[33].z));
- _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x, _landmarks_3d[33].y - 50, _landmarks_3d[33].z));
- _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x, _landmarks_3d[33].y, _landmarks_3d[33].z -50));
+ // for (auto &point : faceShape)
+ // _landmarks_3d.push_back(cv::Point3f(point.x, point.y, point.z));
+ vector<int> landmarkMappingIndex = {27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 42, 45};
+ for (auto &idx : landmarkMappingIndex) {
+ _landmarks_3d.push_back(cv::Point3f(faceShape[idx].x, faceShape[idx].y, faceShape[idx].z));
+ }
+
+ // 2nd landmark is the nose's tip
+ _pose_axes_3d.push_back(_landmarks_3d[6]);
+ _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[6].x - 50, _landmarks_3d[6].y, _landmarks_3d[6].z));
+ _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[6].x, _landmarks_3d[6].y - 50, _landmarks_3d[6].z));
+ _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[6].x, _landmarks_3d[6].y, _landmarks_3d[6].z -50));
_pose_axes.resize(_pose_axes_3d.size());
_landmarks_2d.resize(_landmarks_3d.size());
_translation_vector = Mat(Size(3,1), CV_32FC1);
CameraParamManager cam_dummy("/usr/share/singleo/camera_param_rs2.yaml");
-
+
_camera_matrix = Mat::zeros(Size(3,3), CV_32FC1);
tuple<float, float> focalLength = cam_dummy.getFocalLengthXY();
tuple<float, float> principlePoint = cam_dummy.getPrinciplePointXY();
SINGLEO_LOGI("%.3f", _camera_dist_coeff.at<float>(r, c));
}
}
+
+ _isInit = false;
}
HeadPoseEstimator::~HeadPoseEstimator()
}
SINGLEO_LOGI("Get 2d landmark takes %d ms", timer.check_ms());
- timer.reset();
- // init
- cv::solvePnPRansac(_landmarks_3d, _landmarks_2d,
- _camera_matrix, _camera_dist_coeff,
- _rotation_vector, _translation_vector, false, 100, 8.0F, 0.99, cv::noArray(), cv::SOLVEPNP_EPNP);
- SINGLEO_LOGI("1st solvePnP takes %d ms", timer.check_ms());
+ if (true/*!_isInit*/) {
+ timer.reset();
+ // init
+ cv::solvePnPRansac(_landmarks_3d, _landmarks_2d,
+ _camera_matrix, _camera_dist_coeff,
+ _rotation_vector, _translation_vector, false, 100, 8.0F, 0.99, cv::noArray(), cv::SOLVEPNP_EPNP);
+ SINGLEO_LOGI("1st solvePnP takes %d ms", timer.check_ms());
+ _isInit = true;
+ }
timer.reset();
// estimate pose
{
Context context {};
- singleo_service_create("service=smart_pointer, input_feed=camera, camera_id=4, fps=15, async=0",
+ singleo_service_create("service=smart_pointer, input_feed=camera, camera_id=4, fps=30, async=0",
&context.handle);
// user_data, &context, is ignored temporally.