From: Tae-Young Chung Date: Thu, 2 Jun 2022 05:48:45 +0000 (+0900) Subject: Change mv_point_s from 2d to 3d X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c7851039cb1d06ba2841d5518f4a52005058fbe4;p=platform%2Fcore%2Fapi%2Fmediavision.git Change mv_point_s from 2d to 3d To test 3d results of a model, temporally change mv_point_s from 2d (x,y) to 3d (x,y,z). Note that z is not integer but float value of output itself. Change-Id: Ib1f9ea429a038952bdd37f771cce6cd9b248764c Signed-off-by: Tae-Young Chung --- diff --git a/include/mv_common.h b/include/mv_common.h index b6788944..090ff8f1 100644 --- a/include/mv_common.h +++ b/include/mv_common.h @@ -41,6 +41,7 @@ extern "C" { typedef struct { int x; /**< X-axis coordinate of the point in 2D space */ int y; /**< Y-axis coordinate of the point in 2D space */ + float z; //int z; } mv_point_s; /** diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h index 95f285f4..2baf83e5 100644 --- a/mv_machine_learning/mv_inference/inference/include/Inference.h +++ b/mv_machine_learning/mv_inference/inference/include/Inference.h @@ -67,7 +67,7 @@ typedef struct _FaceDetectionResults { typedef struct _FacialLandMarkDetectionResults { int number_of_landmarks; - std::vector locations; + std::vector locations; } FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */ typedef struct _PoseLandmarkDetectionResults { diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h index 4aae0275..c92e6ae1 100644 --- a/mv_machine_learning/mv_inference/inference/include/Landmark.h +++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h @@ -41,7 +41,7 @@ namespace inference { float score; cv::Point heatMapLoc; - cv::Point2f decodedLoc; + cv::Point3f decodedLoc; int id; bool valid; } LandmarkPoint; diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h index 4960d684..aaeb48ea 100644 --- a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h +++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h @@ -85,6 +85,7 @@ namespace inference float getPointX(int poseIdx, int partIdx); float getPointY(int poseIdx, int partIdx); + float getPointZ(int poseIdx, int partIdx); float getScore(int poseIdx, int partIdx); }; diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index adf58fc4..a92290ce 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1579,8 +1579,9 @@ namespace inference LOGE("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height); for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) { results->locations.push_back( - cv::Point(poseDecoder.getPointX(0, landmarkIndex) * static_cast(mSourceSize.width), - poseDecoder.getPointY(0, landmarkIndex) * static_cast(mSourceSize.height))); + cv::Point3f(poseDecoder.getPointX(0, landmarkIndex) * static_cast(mSourceSize.width), + poseDecoder.getPointY(0, landmarkIndex) * static_cast(mSourceSize.height), + poseDecoder.getPointZ(0, landmarkIndex))); } results->number_of_landmarks = results->locations.size(); @@ -1607,7 +1608,7 @@ namespace inference point.x = static_cast(*loc++ * mSourceSize.width); point.y = static_cast(*loc++ * mSourceSize.height); - LOGI("x:%d, y:%d", point.x, point.y); + LOGI("x:%d, y:%d", static_cast(point.x), static_cast(point.y)); } } @@ -1708,6 +1709,8 @@ namespace inference poseDecoder.getPointX(poseIndex, part) * static_cast(mSourceSize.width); poseResult->landmarks[poseIndex][landmarkIndex].point.y = poseDecoder.getPointY(poseIndex, part) * static_cast(mSourceSize.height); + poseResult->landmarks[poseIndex][landmarkIndex].point.z = + poseDecoder.getPointZ(poseIndex, part); poseResult->landmarks[poseIndex][landmarkIndex].label = landmarkIndex; poseResult->landmarks[poseIndex][landmarkIndex].score = poseDecoder.getScore(poseIndex, part); diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index 89ce1ffb..308005f1 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -174,16 +174,19 @@ namespace inference float PoseDecoder::getPointX(int poseIdx, int partIdx) { - LOGI("idx[%d]-part[%d]", poseIdx, partIdx); return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.x; } float PoseDecoder::getPointY(int poseIdx, int partIdx) { - LOGI("idx[%d]-part[%d]", poseIdx, partIdx); return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.y; } + float PoseDecoder::getPointZ(int poseIdx, int partIdx) + { + return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.z; + } + float PoseDecoder::getScore(int poseIdx, int partIdx) { return mPoseLandmarks[poseIdx].landmarks[partIdx].score; @@ -234,7 +237,7 @@ namespace inference mPoseLandmarks.clear(); - LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false}; + LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point3f(0.0f, 0.0f, 0.0f), -1, false}; if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { @@ -269,7 +272,7 @@ namespace inference if (thresHoldRadius > 0.0f) { bool isSkip = false; for (auto& result : mPoseLandmarks) { - cv::Point2f dfRadius = result.landmarks[root.id].decodedLoc; + cv::Point3f dfRadius = result.landmarks[root.id].decodedLoc; dfRadius -= root.decodedLoc; float radius = std::pow(dfRadius.x * scaleWidth, 2.0f) + @@ -317,7 +320,7 @@ namespace inference mPoseLandmarks[0].landmarks[idx].score = pscore; mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1); - mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px / scaleWidth, py / scaleHeight); + mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point3f(px / scaleWidth, py / scaleHeight, 0.0); mPoseLandmarks[0].landmarks[idx].id = idx; mPoseLandmarks[0].landmarks[idx].valid = true; @@ -353,14 +356,15 @@ namespace inference for (int idx = 0; idx < mNumberOfLandmarks; ++idx) { float px = mTensorBuffer.getValue(mMeta.GetLandmarkName(), idx * landmarkOffset); float py = mTensorBuffer.getValue(mMeta.GetLandmarkName(), idx * landmarkOffset + 1); + float pz = mTensorBuffer.getValue(mMeta.GetLandmarkName(), idx * landmarkOffset + 2); mPoseLandmarks[0].landmarks[idx].score = poseScore; mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1); - mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px/scaleWidth, py/scaleHeight); + mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point3f(px/scaleWidth, py/scaleHeight, pz); mPoseLandmarks[0].landmarks[idx].id = idx; mPoseLandmarks[0].landmarks[idx].valid = true; - LOGI("idx[%d]: %.4f, %.4f", idx, px, py); + LOGI("idx[%d]: %.4f, %.4f, %.4f", idx, px, py, pz); } mPoseLandmarks[0].score = poseScore; diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp index 5faa3ad0..8ba5f87e 100644 --- a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp +++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp @@ -795,6 +795,7 @@ int mv_inference_facial_landmark_detect_open( for (int landmark_idx = 0; landmark_idx < numberOfLandmarks; ++landmark_idx) { locations[landmark_idx].x = facialLandMarkDetectionResults.locations[landmark_idx].x; locations[landmark_idx].y = facialLandMarkDetectionResults.locations[landmark_idx].y; + locations[landmark_idx].z = facialLandMarkDetectionResults.locations[landmark_idx].z; } detected_cb(source, numberOfLandmarks, locations.data(), user_data); diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 33f11b47..bd10f096 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.12.6 +Version: 0.12.7 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause diff --git a/test/testsuites/machine_learning/inference/CMakeLists.txt b/test/testsuites/machine_learning/inference/CMakeLists.txt index 593e1551..6839dded 100644 --- a/test/testsuites/machine_learning/inference/CMakeLists.txt +++ b/test/testsuites/machine_learning/inference/CMakeLists.txt @@ -50,21 +50,3 @@ target_link_libraries(mv_facestream_test_suite mv_inference install(TARGETS mv_facestream_test_suite DESTINATION ${CMAKE_INSTALL_BINDIR}) - - -add_executable(mv_posestream_test_suite mv_posestream_test_suite.cpp) - -target_link_libraries(mv_posestream_test_suite mv_inference - ${OpenCV_LIBS} - gstreamer-1.0 - glib-2.0 - capi-system-info - dlog - mv_image_helper - mv_video_helper - mv_testsuite_common - cairo - ${${PROJECT_NAME}_LIBRARIES} - ) - -install(TARGETS mv_posestream_test_suite DESTINATION ${CMAKE_INSTALL_BINDIR}) \ No newline at end of file diff --git a/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp b/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp index e09020b0..f8086a0c 100644 --- a/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp +++ b/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp @@ -252,12 +252,14 @@ static void _facial_landmark_cb(mv_source_h source, float maxAlpha = 0.8f; for (int pt=0; pt < landmarks; pt++) { - printf("%d: x[%d], y[%d]\n", pt, locations[pt].x,locations[pt].y); x = static_cast(locations[pt].x) / 192.f * static_cast(faceSkeleton.fRoi.width); y = static_cast(locations[pt].y) / 192.f * static_cast(faceSkeleton.fRoi.height); faceSkeleton.fLmark[pt].x = static_cast(x) + faceSkeleton.fRoi.point.x; faceSkeleton.fLmark[pt].y = static_cast(y) + faceSkeleton.fRoi.point.y; - + faceSkeleton.fLmark[pt].z = locations[pt].z; + printf("%d: x[%d], y[%d], z[%f]\n", pt, faceSkeleton.fLmark[pt].x, + faceSkeleton.fLmark[pt].y, + faceSkeleton.fLmark[pt].z); } }