Add PoseDecoder and Landmark to decode various type of pose output tensor

author Tae-Young Chung <ty83.chung@samsung.com>

Thu, 20 May 2021 00:57:02 +0000 (09:57 +0900)

committer Tae-Young Chung <ty83.chung@samsung.com>

Wed, 23 Jun 2021 03:28:23 +0000 (03:28 +0000)
author Tae-Young Chung <ty83.chung@samsung.com>
Thu, 20 May 2021 00:57:02 +0000 (09:57 +0900)
committer Tae-Young Chung <ty83.chung@samsung.com>
Wed, 23 Jun 2021 03:28:23 +0000 (03:28 +0000)
diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h

new file mode 100644 (file)

index 0000000..63ccf60
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h
@@ -0,0 +1,53 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_LANDMARK_H__
+#define __MEDIA_VISION_LANDMARK_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <opencv2/core.hpp>
+
+/**
+ * @file Landmark.h
+ * @brief This file contains the Landmark class definition which
+ *        provides landmark information.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+       typedef struct _LandmarkPoint
+       {
+               float score;
+               cv::Point heatMapLoc;
+               cv::Point2f decodedLoc;
+               int id;
+               bool valid;
+       } LandmarkPoint;
+
+       typedef struct _LandmarkResults
+       {
+               std::vector<LandmarkPoint> landmarks;
+               float score;
+       } LandmarkResults;
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_LANDMARK_H__ */
diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h

new file mode 100644 (file)

index 0000000..c910d62
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
@@ -0,0 +1,95 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_POSEDECODER_H__
+#define __MEDIA_VISION_POSEDECODER_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <list>
+
+
+#include "TensorBuffer.h"
+#include "OutputMetadata.h"
+#include "Landmark.h"
+
+/**
+ * @file PoseDecoder.h
+ * @brief This file contains the PoseDecoder class definition which
+ *        provides pose decoder.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+       class PoseDecoder
+       {
+       private:
+               TensorBuffer mTensorBuffer;
+               OutputMetadata mMeta;
+               int mHeatMapWidth;
+               int mHeatMapHeight;
+               int mHeatMapChannel;
+               int mNumberOfLandmarks;
+
+               std::list<LandmarkPoint> mCandidates;
+               std::vector<LandmarkResults> mPoseLandmarks;
+
+               int getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH);
+               int getPosToIndex(LandmarkPoint& landmark);
+               int getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal);
+               int findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
+                                                       float scaleW, float scaleH);
+               int traverseToNeighbor(int edgeId, int toId, int dir,
+                                                               LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
+                                                               float scaleW, float scaleH);
+               int getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector);
+
+               int convertXYZtoX(int x, int y, int c);
+
+               cv::Point convertXYZtoXY(int x, int y, int c);
+
+       public:
+               PoseDecoder(TensorBuffer& buffer, const OutputMetadata& metaData,
+                                       int heatMapWidth, int heatMapHeight, int heatMapChannel,
+                                       int numberOfLandmarks) :
+                                       mTensorBuffer(buffer),
+                                       mHeatMapWidth(heatMapWidth),
+                                       mHeatMapHeight(heatMapHeight),
+                                       mHeatMapChannel(heatMapChannel),
+                                       mNumberOfLandmarks(numberOfLandmarks) {
+                                               mMeta = metaData;
+                                       };
+
+               ~PoseDecoder() = default;
+
+               int init();
+
+               int decode(float scaleWidth, float scaleHeight, float thresHoldRadius);
+
+               int getNumberOfPose();
+
+               float getPointX(int poseIdx, int partIdx);
+               float getPointY(int poseIdx, int partIdx);
+               float getScore(int poseIdx, int partIdx);
+       };
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_POSEDECODER_H__ */
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp

new file mode 100644 (file)

index 0000000..f30fbf9
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -0,0 +1,483 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "PoseDecoder.h"
+#include "PostProcess.h"
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+
+#define MAX_NUMBER_OF_POSE 5
+#define MAX_NUMBER_OF_CORRECTION 3
+
+namespace mediavision
+{
+namespace inference
+{
+       int PoseDecoder::convertXYZtoX(int x, int y, int c)
+       {
+               return y * mHeatMapWidth * mHeatMapChannel
+                                       + x * mHeatMapChannel
+                                       + c;
+       }
+
+       cv::Point PoseDecoder::convertXYZtoXY(int x, int y, int c)
+       {
+               int idxY = y * mHeatMapWidth * mHeatMapChannel * 2
+                                       + x * mHeatMapChannel * 2
+                                       + c;
+
+               int idxX = idxY + mHeatMapChannel;
+
+               return cv::Point(idxX, idxY);
+       }
+
+       int PoseDecoder::init()
+       {
+               LOGI("ENTER");
+
+               Landmark& landmarkInfo = mMeta.GetLandmark();
+
+               if (landmarkInfo.GetType() < 0 || landmarkInfo.GetType() >= 3) {
+                       LOGE("Not supported landmark type");
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
+               if (landmarkInfo.GetDecodingType() == 0) {
+                       LOGI("Skip init");
+                       return MEDIA_VISION_ERROR_NONE;
+               }
+
+               int x,y,c;
+               int sx, sy, ex, ey, dx, dy;
+               float score, localScore;
+               int idx;
+               bool isLocalMax;
+               ScoreInfo& scoreInfo = mMeta.GetScore();
+
+               mCandidates.clear();
+
+               if (landmarkInfo.GetType() == 0 ||
+                       landmarkInfo.GetType() == 2) {
+                       mCandidates.resize(mHeatMapChannel);
+               }
+
+               for (y = 0; y < mHeatMapHeight; ++y) {
+                       for (x = 0; x < mHeatMapWidth; ++x) {
+                               std::list<LandmarkPoint>::iterator candidate = mCandidates.begin();
+                               for (c = 0; c < mHeatMapChannel; ++c, candidate++) {
+                                       isLocalMax = true;
+                                       idx = convertXYZtoX(x, y, c);
+                                       score = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
+                                       if (scoreInfo.GetType() == 1) {
+                                               score = PostProcess::sigmoid(score);
+                                       }
+
+                                       if (score < scoreInfo.GetThresHold())
+                                               continue;
+
+                                       if (landmarkInfo.GetType() == 0 ||
+                                               landmarkInfo.GetType() == 2) {
+                                               if (score <= candidate->score)
+                                                       continue;
+
+                                               candidate->score = score;
+                                               candidate->heatMapLoc.x = x;
+                                               candidate->heatMapLoc.y = y;
+                                               candidate->id = c;
+
+                                       } else { //landmarkInfo.type == 1
+                                               sx = std::max(x - 1, 0);
+                                               sy = std::max(y - 1, 0);
+                                               ex = std::min(x + 2, mHeatMapWidth);
+                                               ey = std::min(y + 2, mHeatMapHeight);
+
+                                               for (dy = sy; dy < ey; ++dy) {
+                                                       for (dx = sx; dx < ex; ++dx) {
+                                                               idx = convertXYZtoX(dx, dy, c);
+                                                               localScore =  mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
+                                                               if (scoreInfo.GetType() == 1) {
+                                                                       localScore = PostProcess::sigmoid(localScore);
+                                                               }
+                                                               if (localScore > score) {
+                                                                       isLocalMax = false;
+                                                                       break;
+                                                               }
+                                                       }
+                                                       if (isLocalMax == false)
+                                                               break;
+                                               }
+
+                                               if (isLocalMax == false)
+                                                       continue;
+
+                                               // add this to list
+                                               LOGI("[%d x %d][%d]: score %.3f", y, x, c, score);
+                                               std::list<LandmarkPoint>::iterator iter;
+                                               for (iter = mCandidates.begin(); iter != mCandidates.end(); ++iter) {
+                                                       if ((*iter).score < score) {
+                                                               break;
+                                                       }
+                                               }
+
+                                               LandmarkPoint localLandmark;
+                                               localLandmark.score = score;
+                                               localLandmark.heatMapLoc.x = x;
+                                               localLandmark.heatMapLoc.y = y;
+                                               localLandmark.id = c;
+                                               localLandmark.valid = false;
+                                               mCandidates.insert(iter, localLandmark);
+                                       }
+                               }
+                       }
+               } // end of init
+
+               LOGI("LEAVE");
+
+               return MEDIA_VISION_ERROR_NONE;
+       }
+
+       int PoseDecoder::getNumberOfPose()
+       {
+               return std::min(static_cast<int>(mPoseLandmarks.size()), MAX_NUMBER_OF_POSE);
+       }
+
+       int PoseDecoder::getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal)
+       {
+               if (!mTensorBuffer.exist(mMeta.GetOffset().GetName())) {
+                       offsetVal.x = offsetVal.y = 0.f;
+                       LOGI("No offset value");
+                       LOGI("LEAVE");
+                       return MEDIA_VISION_ERROR_NONE;
+               }
+
+               cv::Point idx = convertXYZtoXY(landmark.heatMapLoc.x, landmark.heatMapLoc.y, landmark.id);
+
+               try {
+                       offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.x);
+                       offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.y);
+               } catch (const std::exception& e) {
+                       LOGE("Fail to get value at (%d, %d) from %s",
+                                               idx.x, idx.y, mMeta.GetOffset().GetName().c_str());
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
+               return MEDIA_VISION_ERROR_NONE;
+       }
+
+       float PoseDecoder::getPointX(int poseIdx, int partIdx)
+       {
+               LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
+               return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.x;
+       }
+
+       float PoseDecoder::getPointY(int poseIdx, int partIdx)
+       {
+               LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
+               return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.y;
+       }
+
+       float PoseDecoder::getScore(int poseIdx, int partIdx)
+       {
+               return mPoseLandmarks[poseIdx].landmarks[partIdx].score;
+       }
+
+       int PoseDecoder::getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH)
+       {
+               if (scaleW <= 0.0f || scaleH <= 0.0f) {
+                       LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+               }
+
+               cv::Point2f offsetVal;
+               getOffsetValue(point, offsetVal);
+
+               point.decodedLoc.x = static_cast<float>(point.heatMapLoc.x) / static_cast<float>(mHeatMapWidth - 1);
+               point.decodedLoc.y = static_cast<float>(point.heatMapLoc.y) / static_cast<float>(mHeatMapHeight - 1);
+
+               point.decodedLoc.x += offsetVal.x / scaleW;
+               point.decodedLoc.y += offsetVal.y / scaleH;
+
+               return MEDIA_VISION_ERROR_NONE;
+       }
+
+       int PoseDecoder::getPosToIndex(LandmarkPoint& point)
+       {
+               cv::Point posVal;
+
+               posVal.x = roundf(point.decodedLoc.x * static_cast<float>(mHeatMapWidth - 1));
+               posVal.y = roundf(point.decodedLoc.y * static_cast<float>(mHeatMapHeight - 1));
+
+               posVal.x = std::max(std::min(posVal.x, mHeatMapWidth - 1), 0);
+               posVal.y = std::max(std::min(posVal.y, mHeatMapHeight - 1), 0);
+
+               point.heatMapLoc = posVal;
+
+               return MEDIA_VISION_ERROR_NONE;
+       }
+
+       int PoseDecoder::decode(float scaleWidth, float scaleHeight, float thresHoldRadius)
+       {
+               LOGI("ENTER");
+
+               if (scaleWidth <= 0.0f || scaleHeight <= 0.0f) {
+                       LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleWidth, scaleHeight);
+                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+               }
+
+               mPoseLandmarks.clear();
+               LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
+
+               Landmark& landmarkInfo = mMeta.GetLandmark();
+               ScoreInfo& scoreInfo = mMeta.GetScore();
+
+               if (landmarkInfo.GetType() == 0 ||
+                       landmarkInfo.GetType() == 2) { // single pose
+                       mPoseLandmarks.resize(1);
+
+                       if (landmarkInfo.GetDecodingType() == 0) { // direct decoding
+                               mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
+                       } else { // heatmap decoding
+                               mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
+                       }
+               }
+
+               if (landmarkInfo.GetDecodingType() != 0) { // heatmap decoding
+                       while (!mCandidates.empty()) {
+
+                               LandmarkPoint &root = mCandidates.front();
+
+                               getIndexToPos(root, scaleWidth, scaleHeight);
+
+                               if (landmarkInfo.GetType() == 0) {
+                                       root.valid = true;
+                                       mPoseLandmarks[0].landmarks[root.id] = root;
+                                       mPoseLandmarks[0].score += root.score;
+                                       mCandidates.pop_front();
+                                       continue;
+                               }
+
+                               LOGI("root id: %d", root.id);
+
+                               if (thresHoldRadius > 0.0f) {
+                                       bool isSkip = false;
+                                       for (auto& result : mPoseLandmarks) {
+                                               cv::Point2f dfRadius = result.landmarks[root.id].decodedLoc;
+                                               dfRadius -= root.decodedLoc;
+                                               float radius =
+                                                       std::pow(dfRadius.x * scaleWidth, 2.0f) +
+                                                       std::pow(dfRadius.y     * scaleHeight, 2.0f);
+                                               LOGI("id[%d], radius: %.f vs. %.f", root.id, radius, std::pow(thresHoldRadius, 2.0f));
+                                               if (radius <= std::pow(thresHoldRadius, 2.0f)) {
+                                                       LOGI("Not local maximum, Skip this");
+                                                       isSkip = true;
+                                                       break;
+                                               }
+                                       }
+                                       if (isSkip) {
+                                               mCandidates.pop_front();
+                                               continue;
+                                       }
+                               }
+
+                               LOGI("Local maximum. Add this");
+
+                               std::vector<LandmarkPoint> decodedLandmarks(mHeatMapChannel, initValue);
+
+                               findPose(root, decodedLandmarks, scaleWidth, scaleHeight);
+
+                               float poseScore = 0.0f;
+                               for (auto& landmark : decodedLandmarks) {
+                                       poseScore += landmark.score;
+                                       LOGI("%.3f, %.3f", landmark.decodedLoc.x, landmark.decodedLoc.y);
+                               }
+
+                               mPoseLandmarks.push_back(LandmarkResults {decodedLandmarks, poseScore});
+                               if (mPoseLandmarks.size() > MAX_NUMBER_OF_POSE)
+                                       break;
+                               mCandidates.pop_front();
+                       }
+
+                       for (auto& pose : mPoseLandmarks) {
+                               pose.score /= static_cast<float>(mHeatMapChannel);
+                       }
+               } else {
+                       // multi pose is not supported
+                       std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+                       float poseScore  = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]);
+                       if (scoreInfo.GetType() == 1) {
+                               poseScore = PostProcess::sigmoid(poseScore);
+                       }
+                       if (poseScore < scoreInfo.GetThresHold()) {
+                               LOGI("pose score %.4f is lower than %.4f", poseScore, scoreInfo.GetThresHold());
+                               LOGI("LEAVE");
+                               return MEDIA_VISION_ERROR_NONE;
+                       }
+
+                       int landmarkOffset = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 1) ? 2 : 3;
+                       if (landmarkInfo.GetDecodingType() == 0) {
+                               landmarkOffset = landmarkInfo.GetOffset();
+                       }
+                       for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
+                                       float px = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset);
+                                       float py = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset + 1);
+
+                                       mPoseLandmarks[0].landmarks[idx].score = poseScore;
+                                       mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
+                                       mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px/scaleWidth, py/scaleHeight);
+                                       mPoseLandmarks[0].landmarks[idx].id = idx;
+                                       mPoseLandmarks[0].landmarks[idx].valid =  true;
+
+                                       LOGI("idx[%d]: %.4f, %.4f", idx, px, py);
+                       }
+
+                       mPoseLandmarks[0].score = poseScore;
+               }
+
+               LOGI("LEAVE");
+               return MEDIA_VISION_ERROR_NONE;
+       }
+
+       int PoseDecoder::findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
+                                                       float scaleW, float scaleH)
+       {
+               LOGI("ENTER");
+
+               if (scaleW <= 0.0f || scaleH <= 0.0f) {
+                       LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+               }
+
+               decodedLandmarks[root.id] = root;
+               decodedLandmarks[root.id].valid = true;
+               LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y);
+               LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score);
+
+               int index = static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) - 1;
+               for (auto riter = mMeta.GetEdge().GetEdgesAll().rbegin();
+                       riter != mMeta.GetEdge().GetEdgesAll().rend(); ++riter) {
+                       int fromKeyId = riter->second;
+                       int toKeyId = riter->first;
+
+                       if (decodedLandmarks[fromKeyId].valid == true &&
+                               decodedLandmarks[toKeyId].valid == false) {
+                               LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId);
+                               traverseToNeighbor(index, toKeyId,  1,
+                                                       decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
+                                                       scaleW, scaleH);
+                               LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId,
+                                                                               decodedLandmarks[toKeyId].decodedLoc.x,
+                                                                               decodedLandmarks[toKeyId].decodedLoc.y,
+                                                                               decodedLandmarks[toKeyId].score);
+                       }
+                       index--;
+               }
+
+               index = 0;
+               for (auto iter = mMeta.GetEdge().GetEdgesAll().begin();
+                       iter != mMeta.GetEdge().GetEdgesAll().end(); ++iter) {
+                       int fromKeyId = iter->first;
+                       int toKeyId = iter->second;
+
+                       if (decodedLandmarks[fromKeyId].valid == true &&
+                               decodedLandmarks[toKeyId].valid == false) {
+                               LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId);
+                               traverseToNeighbor(index, toKeyId,  0,
+                                                       decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
+                                                       scaleW, scaleH);
+                       }
+                       index++;
+               }
+               LOGI("LEAVE");
+               return MEDIA_VISION_ERROR_NONE;
+       }
+
+       int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir,
+                                                               LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
+                                                               float scaleW, float scaleH)
+       {
+               if (scaleW <= 0.0f || scaleH <= 0.0f) {
+                       LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+               }
+
+               cv::Point2f edgeVector(0.f, 0.f);
+               cv::Point nearHeatMapLoc;
+
+               LOGI("org: %.4f, %.4f", fromLandmark.decodedLoc.x, fromLandmark.decodedLoc.y);
+
+               // update heatMapLoc from decodedLoc;
+               nearHeatMapLoc.x = roundf(fromLandmark.decodedLoc.x
+                                       * static_cast<float>(mHeatMapWidth - 1));
+               nearHeatMapLoc.y = roundf(fromLandmark.decodedLoc.y
+                                       * static_cast<float>(mHeatMapHeight - 1));
+
+               nearHeatMapLoc.x = std::max(std::min(nearHeatMapLoc.x, mHeatMapWidth - 1), 0);
+               nearHeatMapLoc.y = std::max(std::min(nearHeatMapLoc.y, mHeatMapHeight - 1), 0);
+
+               LOGI("src: %d, %d", nearHeatMapLoc.x, nearHeatMapLoc.y);
+
+               getEdgeVector(nearHeatMapLoc, edgeId, dir, edgeVector);
+
+               LOGI("vector: %.4f, %.4f with edgeId %d", edgeVector.x, edgeVector.y, edgeId);
+               toLandmark.decodedLoc.x = fromLandmark.decodedLoc.x + edgeVector.x / scaleW;
+               toLandmark.decodedLoc.y = fromLandmark.decodedLoc.y + edgeVector.y / scaleH;
+               toLandmark.id = toId;
+               LOGI("tgt: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y);
+
+               for (int iter = 0; iter < MAX_NUMBER_OF_CORRECTION; ++iter) {
+                       getPosToIndex(toLandmark);
+                       getIndexToPos(toLandmark, scaleW, scaleH);
+               }
+
+               int idx  = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id);
+               toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScore().GetName(), idx);
+               if (mMeta.GetScore().GetType() == 1) {
+                       toLandmark.score = PostProcess::sigmoid(toLandmark.score);
+               }
+
+               toLandmark.valid = true;
+               LOGI("Final: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y);
+
+               return MEDIA_VISION_ERROR_NONE;
+       }
+
+       int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector)
+       {
+               LOGI("ENTER");
+
+               LOGI("edge size: %zd", mMeta.GetEdge().GetEdgesAll().size());
+               int idxY = index.y * mHeatMapWidth
+                                       * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2;
+               idxY += index.x * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2 + edgeId;
+
+               int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
+
+               for(auto& dispVec : mMeta.GetDispVecAll()){
+                       if (dispVec.GetType() == dir) { // 0: forward
+                               LOGI("%s", dispVec.GetName().c_str());
+                               vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
+                               vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
+                       }
+               }
+
+               LOGI("LEAVE");
+               return MEDIA_VISION_ERROR_NONE;
+       }
+}
+}
author	Tae-Young Chung <ty83.chung@samsung.com>
	Thu, 20 May 2021 00:57:02 +0000 (09:57 +0900)
committer	Tae-Young Chung <ty83.chung@samsung.com>
	Wed, 23 Jun 2021 03:28:23 +0000 (03:28 +0000)
mv_machine_learning/mv_inference/inference/include/Landmark.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/mv_inference/inference/include/PoseDecoder.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp	[new file with mode: 0644]	patch \| blob