[WIP-15] Add kalman filter
authorTae-Young Chung <ty83.chung@samsung.com>
Mon, 13 May 2024 00:41:05 +0000 (09:41 +0900)
committerTae-Young Chung <ty83.chung@samsung.com>
Tue, 14 May 2024 01:08:31 +0000 (10:08 +0900)
Change-Id: Ie8a5898ac8ecf308b1968ee6349a02e4f3216229
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
inference/backends/private/include/Kalman2d.h [new file with mode: 0644]
inference/backends/private/include/PrivateInferenceFaceService.h
inference/backends/private/src/Kalman2d.cpp [new file with mode: 0644]
inference/backends/private/src/PrivateInferenceFaceService.cpp
services/smart_pointer/include/HeadPoseEstimator.h
services/smart_pointer/src/GazeEstimator.cpp
services/smart_pointer/src/HeadPoseEstimator.cpp
test/services/smartpointer.cpp

diff --git a/inference/backends/private/include/Kalman2d.h b/inference/backends/private/include/Kalman2d.h
new file mode 100644 (file)
index 0000000..d9060c9
--- /dev/null
@@ -0,0 +1,43 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KALMAN_2D_H__
+#define __KALMAN_2D_H__
+
+#include <opencv2/video/tracking.hpp>
+
+namespace singleo
+{
+namespace inference
+{
+class Kalman2D {
+private:
+    cv::KalmanFilter _filter;
+    bool _isInit;
+
+public:
+    Kalman2D();
+    ~Kalman2D();
+
+    void init(float x, float y);
+    cv::Point predict();
+    void correct(float x, float y);
+    cv::Point get();
+    bool isInit();
+};
+}
+}
+#endif 
\ No newline at end of file
index 05be030186cb75a7f0b42406a961833d823c0f5e..24bb3483a79a3663f6c64beabce1d0bd56cde718 100644 (file)
 #define __PRIVATE_INFERENCE_FACE_SERVICE_H__
 
 #include <memory>
+#include <vector>
 #include <opencv2/core.hpp>
 #include "IInferenceFaceService.h"
 #include "mv_facial_landmark_internal.h"
+#include "Kalman2d.h"
 
 namespace singleo
 {
@@ -39,12 +41,19 @@ private:
 
        bool isDetectTask;
        bool isLandmarkDetectTask;
+       bool _isSkipDetectTask;
+       bool _isSkipLandmarkDetectTask;
 
        float _xDownSizeRatio;
        float _yDownSizeRatio;
        float _xInvRatio;
        float _yInvRatio;
 
+       std::vector<int> _landmarkMappingIndex;
+
+       std::vector<std::unique_ptr<Kalman2D>> _filterFaceRegion;
+       std::vector<std::unique_ptr<Kalman2D>> _filterLandmarks;
+
        void getLandmarks(cv::Mat &data, Points &landmarks, int roi_left = 0, int roi_right = 0);
 public:
        PrivateInferenceFaceService(std::vector<inference::TaskType> &tasks);
diff --git a/inference/backends/private/src/Kalman2d.cpp b/inference/backends/private/src/Kalman2d.cpp
new file mode 100644 (file)
index 0000000..cf53666
--- /dev/null
@@ -0,0 +1,82 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Kalman2d.h"
+
+using namespace cv;
+
+namespace singleo
+{
+namespace inference
+{
+Kalman2D::Kalman2D()
+{
+    _filter =  KalmanFilter(4, 2, 0);
+
+    _filter.statePre = (Mat_<float>(4,1) << 0, 0, 0, 0);
+    _filter.statePost = (Mat_<float>(4,1) << 0, 0, 0, 0);
+
+    _filter.transitionMatrix = (Mat_<float>(4,4) << 1, 0, 0.5f, 0, 0, 1, 0, 0.5f, 0, 0, 0.02f, 0, 0, 0, 0, 0.02f);
+    _filter.processNoiseCov = (cv::Mat_<float>(4, 4) << 0.3f, 0, 0, 0, 0, 0.3f, 0, 0, 0, 0, 0.3f, 0, 0, 0, 0, 0.3f);
+    _filter.measurementMatrix = (cv::Mat_<float>(2, 4) << 1, 0, 0, 0, 0, 1, 0, 0);
+       _filter.measurementNoiseCov = cv::Mat::eye(2, 2, CV_32F);
+    _isInit = false;
+}
+
+Kalman2D::~Kalman2D()
+{
+
+}
+
+void Kalman2D::init(float x, float y)
+{
+    // initialize the state
+    _filter.statePre.at<float>(0) = x;
+    _filter.statePre.at<float>(1) = y;
+    _filter.statePre.at<float>(2) = 0;
+    _filter.statePre.at<float>(3) = 0;
+
+    _filter.statePost.at<float>(0) = x;
+    _filter.statePost.at<float>(1) = y;
+    _filter.statePost.at<float>(2) = 0;
+    _filter.statePost.at<float>(3) = 0;
+    _isInit = true;
+}
+
+Point Kalman2D::predict()
+{
+    Mat predictedPoint = Mat_<float>(2,1);
+    predictedPoint = _filter.predict();
+    return Point(predictedPoint.at<float>(0), predictedPoint.at<float>(1));
+}
+
+void Kalman2D::correct(float x, float y)
+{
+    Mat measuredPoint = (Mat_<float>(2,1) << x, y);
+    _filter.correct(measuredPoint);
+}
+
+Point Kalman2D::get()
+{
+    return Point(_filter.statePost.at<float>(0), _filter.statePost.at<float>(1));
+}
+
+bool Kalman2D::isInit()
+{
+    return _isInit;
+}
+}
+}
\ No newline at end of file
index f3dc9541ee0cb8e33688a6fb18b7e369b2039112..2ef6fc9e9c47abde84de20b8c987a30be409927e 100644 (file)
@@ -30,9 +30,24 @@ namespace singleo
 {
 namespace inference
 {
+
+enum BOX_COORDINATE {
+    LEFT_TOP,
+    RIGHT_BOTTOM
+};
+
+enum LANDMARK_COMPONENTS {
+    LEFT_EYE,
+    RIGHT_EYE,
+    NOSE,
+    LEFT_MOUTH,
+    RIGHT_MOUTH
+};
+
 PrivateInferenceFaceService::PrivateInferenceFaceService(std::vector<inference::TaskType> &tasks)
 {
     isDetectTask = isLandmarkDetectTask = false;
+    _isSkipDetectTask = _isSkipLandmarkDetectTask = false;
     for (auto &task : tasks) {
         switch (task) {
         case TaskType::FACE_DETECTION:
@@ -45,6 +60,10 @@ PrivateInferenceFaceService::PrivateInferenceFaceService(std::vector<inference::
             _yDownSizeRatio = 0.5f;
             _xInvRatio = 1.0f / _xDownSizeRatio;
             _yInvRatio = 1.0f / _yDownSizeRatio;
+
+            _filterFaceRegion.resize(2);
+            for (auto &filt : _filterFaceRegion)
+                filt = make_unique<Kalman2D>();
         }
             break;
         case TaskType::FACE_LANDMARK_DETECTION:
@@ -69,6 +88,32 @@ PrivateInferenceFaceService::PrivateInferenceFaceService(std::vector<inference::
             ret = mv_facial_landmark_prepare(_handle);
             if (ret != MEDIA_VISION_ERROR_NONE)
                 throw runtime_error("Fail to prepare face landmark detection.");
+
+            _landmarkMappingIndex.resize(13);
+            // nose ridge
+            _landmarkMappingIndex[0] = 27;
+            _landmarkMappingIndex[1] = 28;
+            _landmarkMappingIndex[2] = 29;
+            _landmarkMappingIndex[3] = 30;
+
+            // nose base
+            _landmarkMappingIndex[4] = 31;
+            _landmarkMappingIndex[5] = 32;
+            _landmarkMappingIndex[6] = 33;
+            _landmarkMappingIndex[7] = 34;
+            _landmarkMappingIndex[8] = 35;
+
+            // left eye
+            _landmarkMappingIndex[9] = 36;
+            _landmarkMappingIndex[10] = 39;
+
+            // right eye
+            _landmarkMappingIndex[11] = 42;
+            _landmarkMappingIndex[12] = 45;
+
+            _filterLandmarks.resize(13);
+            for (auto &filt : _filterLandmarks)
+                filt = make_unique<Kalman2D>();
         }
             break;
         default:
@@ -118,18 +163,18 @@ void PrivateInferenceFaceService::getLandmarks(cv::Mat &data, Points &landmarks,
                throw runtime_error("Fail to get face landmark detection result count.");
 
        Point landmark;
-       for (unsigned int idx = 0; idx < result_cnt; ++idx) {
-
-               ret = mv_facial_landmark_get_position(_handle, idx, &landmark.x, &landmark.y);
-               if (ret != MEDIA_VISION_ERROR_NONE)
-                       throw runtime_error("Fail to get face landmark detection bound box.");
+    for (auto &idx : _landmarkMappingIndex) {
+        ret = mv_facial_landmark_get_position(_handle, idx, &landmark.x, &landmark.y);
+        if (ret != MEDIA_VISION_ERROR_NONE)
+            throw runtime_error("Fail to get face landmark detection bound box.");
 
         landmark.x *= _yInvRatio;
         landmark.y *= _yInvRatio;
-               landmark.x += roi_left;
-               landmark.y += roi_right;
-               landmarks.push_back(landmark);
-       }
+        landmark.x += roi_left;
+        landmark.y += roi_right;
+
+        landmarks.push_back(landmark);
+    }
 }
 
 void PrivateInferenceFaceService::invoke(BaseDataType &input)
@@ -152,15 +197,51 @@ void PrivateInferenceFaceService::invoke(BaseDataType &input)
     _result._rects.clear();
     _result._landmarks.clear();
     if (isDetectTask) {
+        if (_isSkipDetectTask && _isSkipLandmarkDetectTask) {
+            // predict
+            cv::Point boxLT = _filterFaceRegion[LEFT_TOP]->predict();
+            cv::Point boxRB = _filterFaceRegion[RIGHT_BOTTOM]->predict();
+            Rect rect = { .left = boxLT.x,
+                          .right = boxRB.x,
+                          .top = boxLT.y,
+                          .bottom = boxRB.y};
+            _result._rects.push_back(rect);
+
+            Points landmarks;
+            for (auto &filt : _filterLandmarks) {
+                auto cvLandmark = filt->predict();
+                Point landmark = { .x = static_cast<unsigned int>(cvLandmark.x),
+                                   .y = static_cast<unsigned int>(cvLandmark.y), .z= 0 };
+                landmarks.push_back(landmark);
+            }
+            _result._landmarks.push_back(landmarks);
+            _isSkipDetectTask = _isSkipLandmarkDetectTask = false;
+            SINGLEO_LOGD("PrivateFaceService takes %d ms", timer.check_ms());
+            int id = 0;
+            for (auto &rect : _result._rects) {
+                cv::rectangle(cvData_, cv::Rect(rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top), cv::Scalar(0, 255, 0), 2);
+                auto &points = _result._landmarks[id];
+                for (auto &point : points)
+                    cv::circle(cvData_, cv::Point(point.x, point.y), 2, cv::Scalar(255, 255, 0), 2);
+                id++;
+            }
+            cv::cvtColor(cvData_, cvData_, cv::COLOR_RGB2BGR);
+            cv::imwrite("dump_privateFace.jpg", cvData_);
+
+            return;
+        }
         partial.reset();
-        // _pFace_detection_result = facedetect_cnn(_pBuffer, data.ptr, data.width, data.height, (int)(data.width*data.byte_per_pixel));
         _pFace_detection_result = facedetect_cnn(_pBuffer, cvData.data, cvData.cols, cvData.rows, (int)(cvData.cols * cvData.channels()));
         SINGLEO_LOGD("PrivateFaceService (Detection) takes %d ms", partial.check_ms());
 
         for(int i = 0; i < (_pFace_detection_result ? *_pFace_detection_result : 0); i++) {
 
             short * p = ((short*)(_pFace_detection_result + 1)) + 16*i;
-            // int confidence = p[0];
+            int confidence = p[0];
+
+            if (confidence < 90)
+                continue;
+
             int x = static_cast<int>(p[1]);
             int y = static_cast<int>(p[2]);
             int w = static_cast<int>(p[3]);
@@ -173,6 +254,27 @@ void PrivateInferenceFaceService::invoke(BaseDataType &input)
                 partial.reset();
                 getLandmarks(roiCvData, landmarks, static_cast<int>(x * _xInvRatio), static_cast<int>(y * _yInvRatio));
                 SINGLEO_LOGD("PrivateFaceService (Landmark) takes %d ms", partial.check_ms());
+                if (!_filterLandmarks[0]->isInit() || !_filterLandmarks[1]->isInit() || !_filterLandmarks[2]->isInit() || !_filterLandmarks[3]->isInit() ||
+                    !_filterLandmarks[4]->isInit() || !_filterLandmarks[5]->isInit() || !_filterLandmarks[6]->isInit() || !_filterLandmarks[7]->isInit() || !_filterLandmarks[8]->isInit() ||
+                    !_filterLandmarks[9]->isInit() || !_filterLandmarks[10]->isInit() ||
+                    !_filterLandmarks[11]->isInit() || !_filterLandmarks[12]->isInit()) {
+                    for (int idx = 0; idx < _landmarkMappingIndex.size(); idx++) {
+                        _filterLandmarks[idx]->init(static_cast<float>(landmarks[idx].x), static_cast<float>(landmarks[idx].y));
+                    }
+
+                    _isSkipLandmarkDetectTask = true;
+                } else {
+                    for (int idx = 0; idx < _landmarkMappingIndex.size(); idx++) {
+                        _filterLandmarks[idx]->predict();
+                        _filterLandmarks[idx]->correct(static_cast<float>(landmarks[idx].x), static_cast<float>(landmarks[idx].y));
+                        cv::Point cvPoint = _filterLandmarks[idx]->get();
+                        landmarks[idx].x = cvPoint.x;
+                        landmarks[idx].y = cvPoint.y;
+                    }
+
+                    _isSkipLandmarkDetectTask = true;
+                }
+
                 _result._landmarks.push_back(landmarks);
             }
 
@@ -181,7 +283,27 @@ void PrivateInferenceFaceService::invoke(BaseDataType &input)
                           .top = static_cast<int>( y * _yInvRatio),
                           .bottom = static_cast<int>( (y + h) * _yInvRatio) };
 
+            if (!_filterFaceRegion[LEFT_TOP]->isInit() || !_filterFaceRegion[RIGHT_BOTTOM]->isInit()) {
+                _filterFaceRegion[LEFT_TOP]->init(rect.left, rect.top);
+                _filterFaceRegion[RIGHT_BOTTOM]->init(rect.right, rect.bottom);
+                _isSkipDetectTask = true;
+            } else {
+                _filterFaceRegion[LEFT_TOP]->predict();
+                _filterFaceRegion[LEFT_TOP]->correct(rect.left, rect.top);
+                cv::Point cvPoint = _filterFaceRegion[LEFT_TOP]->get();
+                rect.left = cvPoint.x;
+                rect.top = cvPoint.y;
+
+                _filterFaceRegion[RIGHT_BOTTOM]->predict();
+                _filterFaceRegion[RIGHT_BOTTOM]->correct(rect.right, rect.bottom);
+                cvPoint = _filterFaceRegion[RIGHT_BOTTOM]->get();
+                rect.right = cvPoint.x;
+                rect.bottom = cvPoint.y;
+
+                _isSkipDetectTask = true;
+            }
             _result._rects.push_back(rect);
+            break;
         }
     } else if (isLandmarkDetectTask) {
         Points landmarks;
index ed0f84f2367d2394a45d7dce5f3477d0aed7ac63..489724b6f0dd9c19c8638cb447b314f467407ac3 100644 (file)
@@ -48,6 +48,8 @@ private:
     std::vector<cv::Point3f> _pose_axes_3d;
     std::vector<singleo::services::smartpointer::Point2f> _pose_axes;
 
+    bool _isInit;
+
 public:
     HeadPoseEstimator();
     ~HeadPoseEstimator();
index 4a2b5e288ccb9da29f00352d8e9caf12cb3c759e..dd27f558b42ff3a99cdbd2bc482e1187adcb9aa2 100644 (file)
@@ -58,13 +58,13 @@ PoseVector &GazeEstimator::estimateHeadpose(BaseDataType &input)
         return _headPose;
 
      _headPose = _head_pose_estimator->estimate(result._landmarks[0]);
-    // vector<Point2f> poseAxes = _head_pose_estimator->getPoseAxes();
+    vector<Point2f> poseAxes = _head_pose_estimator->getPoseAxes();
 
-    // ImageDataType &data = dynamic_cast<ImageDataType &>(input);
-    // cv::Mat _dump(cv::Size(data.width, data.height), CV_MAKETYPE(CV_8U, data.byte_per_pixel), data.ptr);
-    // cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[1].x, poseAxes[1].y), cv::Scalar(0, 0, 255), 2); // x - red
-    // cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[2].x, poseAxes[2].y), cv::Scalar(0, 255, 0), 2); // y - green
-    // cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[3].x, poseAxes[3].y), cv::Scalar(255, 0, 0), 2); // z - blue
+    ImageDataType &data = dynamic_cast<ImageDataType &>(input);
+    cv::Mat _dump(cv::Size(data.width, data.height), CV_MAKETYPE(CV_8U, data.byte_per_pixel), data.ptr);
+    cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[1].x, poseAxes[1].y), cv::Scalar(0, 0, 255), 2); // x - red
+    cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[2].x, poseAxes[2].y), cv::Scalar(0, 255, 0), 2); // y - green
+    cv::line(_dump, cv::Point(poseAxes[0].x, poseAxes[0].y), cv::Point(poseAxes[3].x, poseAxes[3].y), cv::Scalar(255, 0, 0), 2); // z - blue
 
     // cv::imwrite("dump_headpose.jpg", _dump);
 
index e84927ecf27a14239e8ca2e6137804678d5d2194..41d60c283b1239c63a79415d6d07596dddd2979f 100644 (file)
@@ -35,14 +35,18 @@ HeadPoseEstimator::HeadPoseEstimator()
     FaceShapeModelManager faceShapeModelmgr("/usr/share/singleo/pdm.txt");
     const vector<Point3f> faceShape = faceShapeModelmgr.getFaceShape();
 
-    for (auto &point : faceShape)
-        _landmarks_3d.push_back(cv::Point3f(point.x, point.y, point.z));
-
-    // 33rd landmark is the nose's tip
-    _pose_axes_3d.push_back(_landmarks_3d[33]);
-    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x - 50, _landmarks_3d[33].y, _landmarks_3d[33].z));
-    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x, _landmarks_3d[33].y - 50, _landmarks_3d[33].z));
-    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[33].x, _landmarks_3d[33].y, _landmarks_3d[33].z -50));
+    // for (auto &point : faceShape)
+    //     _landmarks_3d.push_back(cv::Point3f(point.x, point.y, point.z));
+    vector<int> landmarkMappingIndex = {27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 42, 45};
+    for (auto &idx : landmarkMappingIndex) {
+        _landmarks_3d.push_back(cv::Point3f(faceShape[idx].x, faceShape[idx].y, faceShape[idx].z));
+    }
+
+    // 2nd landmark is the nose's tip
+    _pose_axes_3d.push_back(_landmarks_3d[6]);
+    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[6].x - 50, _landmarks_3d[6].y, _landmarks_3d[6].z));
+    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[6].x, _landmarks_3d[6].y - 50, _landmarks_3d[6].z));
+    _pose_axes_3d.push_back(cv::Point3f(_landmarks_3d[6].x, _landmarks_3d[6].y, _landmarks_3d[6].z -50));
     _pose_axes.resize(_pose_axes_3d.size());
 
     _landmarks_2d.resize(_landmarks_3d.size());
@@ -50,7 +54,7 @@ HeadPoseEstimator::HeadPoseEstimator()
     _translation_vector = Mat(Size(3,1), CV_32FC1);
 
     CameraParamManager cam_dummy("/usr/share/singleo/camera_param_rs2.yaml");
-    
+
     _camera_matrix = Mat::zeros(Size(3,3), CV_32FC1);
     tuple<float, float> focalLength = cam_dummy.getFocalLengthXY();
     tuple<float, float> principlePoint = cam_dummy.getPrinciplePointXY();
@@ -78,6 +82,8 @@ HeadPoseEstimator::HeadPoseEstimator()
             SINGLEO_LOGI("%.3f", _camera_dist_coeff.at<float>(r, c));
         }
     }
+
+    _isInit = false;
 }
 
 HeadPoseEstimator::~HeadPoseEstimator()
@@ -96,12 +102,15 @@ PoseVector HeadPoseEstimator::estimate(const vector<Point> &landmark2d)
     }
     SINGLEO_LOGI("Get 2d landmark takes %d ms", timer.check_ms());
 
-    timer.reset();
-    // init
-    cv::solvePnPRansac(_landmarks_3d, _landmarks_2d,
-                _camera_matrix, _camera_dist_coeff,
-                _rotation_vector, _translation_vector, false, 100, 8.0F, 0.99, cv::noArray(), cv::SOLVEPNP_EPNP);
-    SINGLEO_LOGI("1st solvePnP takes %d ms", timer.check_ms());
+    if (true/*!_isInit*/) {
+        timer.reset();
+        // init
+        cv::solvePnPRansac(_landmarks_3d, _landmarks_2d,
+                    _camera_matrix, _camera_dist_coeff,
+                    _rotation_vector, _translation_vector, false, 100, 8.0F, 0.99, cv::noArray(), cv::SOLVEPNP_EPNP);
+        SINGLEO_LOGI("1st solvePnP takes %d ms", timer.check_ms());
+        _isInit = true;
+    }
 
     timer.reset();
     // estimate pose
index ff5f361dee12d5f42af81bd53c7784c518040f04..2f7c7921d0ac899147c2c5e3cb2e9e2eb691c633 100644 (file)
@@ -68,7 +68,7 @@ int main(const int argc, const char *argv[])
 {
        Context context {};
 
-       singleo_service_create("service=smart_pointer, input_feed=camera, camera_id=4, fps=15, async=0",
+       singleo_service_create("service=smart_pointer, input_feed=camera, camera_id=4, fps=30, async=0",
                                                                         &context.handle);
 
        // user_data, &context, is ignored temporally.