Change mv_point_s from 2d to 3d 11/275811/2
authorTae-Young Chung <ty83.chung@samsung.com>
Thu, 2 Jun 2022 05:48:45 +0000 (14:48 +0900)
committerEunki, Hong <eunkiki.hong@samsung.com>
Thu, 14 Jul 2022 05:54:20 +0000 (14:54 +0900)
To test 3d results of a model, temporally change mv_point_s from 2d (x,y)
to 3d (x,y,z).
Note that z is not integer but float value of output itself.

Change-Id: Ib1f9ea429a038952bdd37f771cce6cd9b248764c
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
include/mv_common.h
mv_machine_learning/mv_inference/inference/include/Inference.h
mv_machine_learning/mv_inference/inference/include/Landmark.h
mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
mv_machine_learning/mv_inference/inference/src/Inference.cpp
mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
packaging/capi-media-vision.spec
test/testsuites/machine_learning/inference/CMakeLists.txt
test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp

index b6788944a30129805631c228f7f3418967ad5bff..090ff8f14c576ee74d8eff4d897e245cdeec0fc5 100644 (file)
@@ -41,6 +41,7 @@ extern "C" {
 typedef struct {
        int x; /**< X-axis coordinate of the point in 2D space */
        int y; /**< Y-axis coordinate of the point in 2D space */
+       float z; //int z;
 } mv_point_s;
 
 /**
index 95f285f47376d0957300ef727c169409c48d11dc..2baf83e56cb1b4ed2d51d7c30c6c311f05f0656d 100644 (file)
@@ -67,7 +67,7 @@ typedef struct _FaceDetectionResults {
 
 typedef struct _FacialLandMarkDetectionResults {
        int number_of_landmarks;
-       std::vector<cv::Point> locations;
+       std::vector<cv::Point3f> locations;
 } FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */
 
 typedef struct _PoseLandmarkDetectionResults {
index 4aae02757202a5cfbdaa94457f3c3e1b849b032a..c92e6ae160789fca26890d993843055b30382469 100644 (file)
@@ -41,7 +41,7 @@ namespace inference
        {
                float score;
                cv::Point heatMapLoc;
-               cv::Point2f decodedLoc;
+               cv::Point3f decodedLoc;
                int id;
                bool valid;
        } LandmarkPoint;
index 4960d684b3408cd9667a0fa9e315ce151cd81547..aaeb48ea1fbf5f7c23f6eb97ec6008f4bb688887 100644 (file)
@@ -85,6 +85,7 @@ namespace inference
 
                float getPointX(int poseIdx, int partIdx);
                float getPointY(int poseIdx, int partIdx);
+               float getPointZ(int poseIdx, int partIdx);
                float getScore(int poseIdx, int partIdx);
        };
 
index adf58fc41f52589cad050ff16cedf863bed844fa..a92290ceeeadb38079d7abf9868307b057604e8e 100755 (executable)
@@ -1579,8 +1579,9 @@ namespace inference
                        LOGE("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
                        for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
                                results->locations.push_back(
-                                       cv::Point(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width),
-                                                         poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height)));
+                                       cv::Point3f(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width),
+                                                         poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height),
+                                                         poseDecoder.getPointZ(0, landmarkIndex)));
                        }
 
                        results->number_of_landmarks = results->locations.size();
@@ -1607,7 +1608,7 @@ namespace inference
                                point.x = static_cast<int>(*loc++ * mSourceSize.width);
                                point.y = static_cast<int>(*loc++ * mSourceSize.height);
 
-                               LOGI("x:%d, y:%d", point.x, point.y);
+                               LOGI("x:%d, y:%d", static_cast<int>(point.x), static_cast<int>(point.y));
                        }
                }
 
@@ -1708,6 +1709,8 @@ namespace inference
                                                                                poseDecoder.getPointX(poseIndex, part) * static_cast<float>(mSourceSize.width);
                                        poseResult->landmarks[poseIndex][landmarkIndex].point.y =
                                                                                poseDecoder.getPointY(poseIndex, part) * static_cast<float>(mSourceSize.height);
+                                       poseResult->landmarks[poseIndex][landmarkIndex].point.z =
+                                                                               poseDecoder.getPointZ(poseIndex, part);
                                        poseResult->landmarks[poseIndex][landmarkIndex].label = landmarkIndex;
                                        poseResult->landmarks[poseIndex][landmarkIndex].score =
                                                                                poseDecoder.getScore(poseIndex, part);
index 89ce1ffbde7a76224576e8d1f7914102b6364693..308005f149ae6af7ee66f8f9f5889086a02721da 100644 (file)
@@ -174,16 +174,19 @@ namespace inference
 
        float PoseDecoder::getPointX(int poseIdx, int partIdx)
        {
-               LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
                return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.x;
        }
 
        float PoseDecoder::getPointY(int poseIdx, int partIdx)
        {
-               LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
                return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.y;
        }
 
+       float PoseDecoder::getPointZ(int poseIdx, int partIdx)
+       {
+               return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.z;
+       }
+
        float PoseDecoder::getScore(int poseIdx, int partIdx)
        {
                return mPoseLandmarks[poseIdx].landmarks[partIdx].score;
@@ -234,7 +237,7 @@ namespace inference
 
                mPoseLandmarks.clear();
 
-               LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
+               LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point3f(0.0f, 0.0f, 0.0f), -1, false};
 
                if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
                        mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
@@ -269,7 +272,7 @@ namespace inference
                                if (thresHoldRadius > 0.0f) {
                                        bool isSkip = false;
                                        for (auto& result : mPoseLandmarks) {
-                                               cv::Point2f dfRadius = result.landmarks[root.id].decodedLoc;
+                                               cv::Point3f dfRadius = result.landmarks[root.id].decodedLoc;
                                                dfRadius -= root.decodedLoc;
                                                float radius =
                                                        std::pow(dfRadius.x * scaleWidth, 2.0f) +
@@ -317,7 +320,7 @@ namespace inference
 
                                        mPoseLandmarks[0].landmarks[idx].score = pscore;
                                        mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
-                                       mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px / scaleWidth, py / scaleHeight);
+                                       mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point3f(px / scaleWidth, py / scaleHeight, 0.0);
                                        mPoseLandmarks[0].landmarks[idx].id = idx;
                                        mPoseLandmarks[0].landmarks[idx].valid =  true;
 
@@ -353,14 +356,15 @@ namespace inference
                        for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
                                        float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset);
                                        float py = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 1);
+                                       float pz = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 2);
 
                                        mPoseLandmarks[0].landmarks[idx].score = poseScore;
                                        mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
-                                       mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px/scaleWidth, py/scaleHeight);
+                                       mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point3f(px/scaleWidth, py/scaleHeight, pz);
                                        mPoseLandmarks[0].landmarks[idx].id = idx;
                                        mPoseLandmarks[0].landmarks[idx].valid =  true;
 
-                                       LOGI("idx[%d]: %.4f, %.4f", idx, px, py);
+                                       LOGI("idx[%d]: %.4f, %.4f, %.4f", idx, px, py, pz);
                        }
 
                        mPoseLandmarks[0].score = poseScore;
index 5faa3ad0e7a441f40a42ace58b05ad08938b610f..8ba5f87edfade260fc0bf945851605baef06a3b4 100644 (file)
@@ -795,6 +795,7 @@ int mv_inference_facial_landmark_detect_open(
        for (int landmark_idx = 0; landmark_idx < numberOfLandmarks; ++landmark_idx) {
                locations[landmark_idx].x = facialLandMarkDetectionResults.locations[landmark_idx].x;
                locations[landmark_idx].y = facialLandMarkDetectionResults.locations[landmark_idx].y;
+               locations[landmark_idx].z = facialLandMarkDetectionResults.locations[landmark_idx].z;
        }
 
        detected_cb(source, numberOfLandmarks, locations.data(), user_data);
index 33f11b473e91794304c72565275c7f5baccd9b73..bd10f096528205f686eead8f3f6cc1489770e694 100644 (file)
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.12.6
+Version:     0.12.7
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
index 593e155114c0cdae10604eb62d197a7839f55df3..6839ddeda1fe11095ff7a224e60ed32cb88d7757 100644 (file)
@@ -50,21 +50,3 @@ target_link_libraries(mv_facestream_test_suite mv_inference
 
 install(TARGETS mv_facestream_test_suite DESTINATION ${CMAKE_INSTALL_BINDIR})
 
-
-
-add_executable(mv_posestream_test_suite mv_posestream_test_suite.cpp)
-
-target_link_libraries(mv_posestream_test_suite mv_inference
-                                      ${OpenCV_LIBS}
-                                      gstreamer-1.0
-                                      glib-2.0
-                                      capi-system-info
-                                      dlog
-                                      mv_image_helper
-                                      mv_video_helper
-                                      mv_testsuite_common
-                                      cairo
-                                      ${${PROJECT_NAME}_LIBRARIES}
-                                      )
-
-install(TARGETS mv_posestream_test_suite DESTINATION ${CMAKE_INSTALL_BINDIR})
\ No newline at end of file
index e09020b0b73fffacc6909f9a6260567ec094c0bc..f8086a0c70a6e85069249287131c318777f024d2 100644 (file)
@@ -252,12 +252,14 @@ static void _facial_landmark_cb(mv_source_h source,
        float maxAlpha = 0.8f;
 
        for (int pt=0; pt < landmarks; pt++) {
-               printf("%d: x[%d], y[%d]\n", pt, locations[pt].x,locations[pt].y);
                x = static_cast<float>(locations[pt].x) / 192.f * static_cast<float>(faceSkeleton.fRoi.width);
                y = static_cast<float>(locations[pt].y) / 192.f * static_cast<float>(faceSkeleton.fRoi.height);
                faceSkeleton.fLmark[pt].x = static_cast<int>(x) + faceSkeleton.fRoi.point.x;
                faceSkeleton.fLmark[pt].y = static_cast<int>(y) + faceSkeleton.fRoi.point.y;
-
+               faceSkeleton.fLmark[pt].z = locations[pt].z;
+               printf("%d: x[%d], y[%d], z[%f]\n", pt, faceSkeleton.fLmark[pt].x,
+                                                                                               faceSkeleton.fLmark[pt].y,
+                                                                                               faceSkeleton.fLmark[pt].z);
        }
 }