--- /dev/null
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "PoseDecoder.h"
+#include "PostProcess.h"
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+
+#define MAX_NUMBER_OF_POSE 5
+#define MAX_NUMBER_OF_CORRECTION 3
+
+namespace mediavision
+{
+namespace inference
+{
+ int PoseDecoder::convertXYZtoX(int x, int y, int c)
+ {
+ return y * mHeatMapWidth * mHeatMapChannel
+ + x * mHeatMapChannel
+ + c;
+ }
+
+ cv::Point PoseDecoder::convertXYZtoXY(int x, int y, int c)
+ {
+ int idxY = y * mHeatMapWidth * mHeatMapChannel * 2
+ + x * mHeatMapChannel * 2
+ + c;
+
+ int idxX = idxY + mHeatMapChannel;
+
+ return cv::Point(idxX, idxY);
+ }
+
+ int PoseDecoder::init()
+ {
+ LOGI("ENTER");
+
+ Landmark& landmarkInfo = mMeta.GetLandmark();
+
+ if (landmarkInfo.GetType() < 0 || landmarkInfo.GetType() >= 3) {
+ LOGE("Not supported landmark type");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ if (landmarkInfo.GetDecodingType() == 0) {
+ LOGI("Skip init");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int x,y,c;
+ int sx, sy, ex, ey, dx, dy;
+ float score, localScore;
+ int idx;
+ bool isLocalMax;
+ ScoreInfo& scoreInfo = mMeta.GetScore();
+
+ mCandidates.clear();
+
+ if (landmarkInfo.GetType() == 0 ||
+ landmarkInfo.GetType() == 2) {
+ mCandidates.resize(mHeatMapChannel);
+ }
+
+ for (y = 0; y < mHeatMapHeight; ++y) {
+ for (x = 0; x < mHeatMapWidth; ++x) {
+ std::list<LandmarkPoint>::iterator candidate = mCandidates.begin();
+ for (c = 0; c < mHeatMapChannel; ++c, candidate++) {
+ isLocalMax = true;
+ idx = convertXYZtoX(x, y, c);
+ score = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
+ if (scoreInfo.GetType() == 1) {
+ score = PostProcess::sigmoid(score);
+ }
+
+ if (score < scoreInfo.GetThresHold())
+ continue;
+
+ if (landmarkInfo.GetType() == 0 ||
+ landmarkInfo.GetType() == 2) {
+ if (score <= candidate->score)
+ continue;
+
+ candidate->score = score;
+ candidate->heatMapLoc.x = x;
+ candidate->heatMapLoc.y = y;
+ candidate->id = c;
+
+ } else { //landmarkInfo.type == 1
+ sx = std::max(x - 1, 0);
+ sy = std::max(y - 1, 0);
+ ex = std::min(x + 2, mHeatMapWidth);
+ ey = std::min(y + 2, mHeatMapHeight);
+
+ for (dy = sy; dy < ey; ++dy) {
+ for (dx = sx; dx < ex; ++dx) {
+ idx = convertXYZtoX(dx, dy, c);
+ localScore = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
+ if (scoreInfo.GetType() == 1) {
+ localScore = PostProcess::sigmoid(localScore);
+ }
+ if (localScore > score) {
+ isLocalMax = false;
+ break;
+ }
+ }
+ if (isLocalMax == false)
+ break;
+ }
+
+ if (isLocalMax == false)
+ continue;
+
+ // add this to list
+ LOGI("[%d x %d][%d]: score %.3f", y, x, c, score);
+ std::list<LandmarkPoint>::iterator iter;
+ for (iter = mCandidates.begin(); iter != mCandidates.end(); ++iter) {
+ if ((*iter).score < score) {
+ break;
+ }
+ }
+
+ LandmarkPoint localLandmark;
+ localLandmark.score = score;
+ localLandmark.heatMapLoc.x = x;
+ localLandmark.heatMapLoc.y = y;
+ localLandmark.id = c;
+ localLandmark.valid = false;
+ mCandidates.insert(iter, localLandmark);
+ }
+ }
+ }
+ } // end of init
+
+ LOGI("LEAVE");
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::getNumberOfPose()
+ {
+ return std::min(static_cast<int>(mPoseLandmarks.size()), MAX_NUMBER_OF_POSE);
+ }
+
+ int PoseDecoder::getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal)
+ {
+ if (!mTensorBuffer.exist(mMeta.GetOffset().GetName())) {
+ offsetVal.x = offsetVal.y = 0.f;
+ LOGI("No offset value");
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ cv::Point idx = convertXYZtoXY(landmark.heatMapLoc.x, landmark.heatMapLoc.y, landmark.id);
+
+ try {
+ offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.x);
+ offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.y);
+ } catch (const std::exception& e) {
+ LOGE("Fail to get value at (%d, %d) from %s",
+ idx.x, idx.y, mMeta.GetOffset().GetName().c_str());
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ float PoseDecoder::getPointX(int poseIdx, int partIdx)
+ {
+ LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
+ return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.x;
+ }
+
+ float PoseDecoder::getPointY(int poseIdx, int partIdx)
+ {
+ LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
+ return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.y;
+ }
+
+ float PoseDecoder::getScore(int poseIdx, int partIdx)
+ {
+ return mPoseLandmarks[poseIdx].landmarks[partIdx].score;
+ }
+
+ int PoseDecoder::getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH)
+ {
+ if (scaleW <= 0.0f || scaleH <= 0.0f) {
+ LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
+ cv::Point2f offsetVal;
+ getOffsetValue(point, offsetVal);
+
+ point.decodedLoc.x = static_cast<float>(point.heatMapLoc.x) / static_cast<float>(mHeatMapWidth - 1);
+ point.decodedLoc.y = static_cast<float>(point.heatMapLoc.y) / static_cast<float>(mHeatMapHeight - 1);
+
+ point.decodedLoc.x += offsetVal.x / scaleW;
+ point.decodedLoc.y += offsetVal.y / scaleH;
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::getPosToIndex(LandmarkPoint& point)
+ {
+ cv::Point posVal;
+
+ posVal.x = roundf(point.decodedLoc.x * static_cast<float>(mHeatMapWidth - 1));
+ posVal.y = roundf(point.decodedLoc.y * static_cast<float>(mHeatMapHeight - 1));
+
+ posVal.x = std::max(std::min(posVal.x, mHeatMapWidth - 1), 0);
+ posVal.y = std::max(std::min(posVal.y, mHeatMapHeight - 1), 0);
+
+ point.heatMapLoc = posVal;
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::decode(float scaleWidth, float scaleHeight, float thresHoldRadius)
+ {
+ LOGI("ENTER");
+
+ if (scaleWidth <= 0.0f || scaleHeight <= 0.0f) {
+ LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleWidth, scaleHeight);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
+ mPoseLandmarks.clear();
+ LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
+
+ Landmark& landmarkInfo = mMeta.GetLandmark();
+ ScoreInfo& scoreInfo = mMeta.GetScore();
+
+ if (landmarkInfo.GetType() == 0 ||
+ landmarkInfo.GetType() == 2) { // single pose
+ mPoseLandmarks.resize(1);
+
+ if (landmarkInfo.GetDecodingType() == 0) { // direct decoding
+ mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
+ } else { // heatmap decoding
+ mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
+ }
+ }
+
+ if (landmarkInfo.GetDecodingType() != 0) { // heatmap decoding
+ while (!mCandidates.empty()) {
+
+ LandmarkPoint &root = mCandidates.front();
+
+ getIndexToPos(root, scaleWidth, scaleHeight);
+
+ if (landmarkInfo.GetType() == 0) {
+ root.valid = true;
+ mPoseLandmarks[0].landmarks[root.id] = root;
+ mPoseLandmarks[0].score += root.score;
+ mCandidates.pop_front();
+ continue;
+ }
+
+ LOGI("root id: %d", root.id);
+
+ if (thresHoldRadius > 0.0f) {
+ bool isSkip = false;
+ for (auto& result : mPoseLandmarks) {
+ cv::Point2f dfRadius = result.landmarks[root.id].decodedLoc;
+ dfRadius -= root.decodedLoc;
+ float radius =
+ std::pow(dfRadius.x * scaleWidth, 2.0f) +
+ std::pow(dfRadius.y * scaleHeight, 2.0f);
+ LOGI("id[%d], radius: %.f vs. %.f", root.id, radius, std::pow(thresHoldRadius, 2.0f));
+ if (radius <= std::pow(thresHoldRadius, 2.0f)) {
+ LOGI("Not local maximum, Skip this");
+ isSkip = true;
+ break;
+ }
+ }
+ if (isSkip) {
+ mCandidates.pop_front();
+ continue;
+ }
+ }
+
+ LOGI("Local maximum. Add this");
+
+ std::vector<LandmarkPoint> decodedLandmarks(mHeatMapChannel, initValue);
+
+ findPose(root, decodedLandmarks, scaleWidth, scaleHeight);
+
+ float poseScore = 0.0f;
+ for (auto& landmark : decodedLandmarks) {
+ poseScore += landmark.score;
+ LOGI("%.3f, %.3f", landmark.decodedLoc.x, landmark.decodedLoc.y);
+ }
+
+ mPoseLandmarks.push_back(LandmarkResults {decodedLandmarks, poseScore});
+ if (mPoseLandmarks.size() > MAX_NUMBER_OF_POSE)
+ break;
+ mCandidates.pop_front();
+ }
+
+ for (auto& pose : mPoseLandmarks) {
+ pose.score /= static_cast<float>(mHeatMapChannel);
+ }
+ } else {
+ // multi pose is not supported
+ std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+ float poseScore = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]);
+ if (scoreInfo.GetType() == 1) {
+ poseScore = PostProcess::sigmoid(poseScore);
+ }
+ if (poseScore < scoreInfo.GetThresHold()) {
+ LOGI("pose score %.4f is lower than %.4f", poseScore, scoreInfo.GetThresHold());
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int landmarkOffset = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 1) ? 2 : 3;
+ if (landmarkInfo.GetDecodingType() == 0) {
+ landmarkOffset = landmarkInfo.GetOffset();
+ }
+ for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
+ float px = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset);
+ float py = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset + 1);
+
+ mPoseLandmarks[0].landmarks[idx].score = poseScore;
+ mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
+ mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px/scaleWidth, py/scaleHeight);
+ mPoseLandmarks[0].landmarks[idx].id = idx;
+ mPoseLandmarks[0].landmarks[idx].valid = true;
+
+ LOGI("idx[%d]: %.4f, %.4f", idx, px, py);
+ }
+
+ mPoseLandmarks[0].score = poseScore;
+ }
+
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
+ float scaleW, float scaleH)
+ {
+ LOGI("ENTER");
+
+ if (scaleW <= 0.0f || scaleH <= 0.0f) {
+ LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
+ decodedLandmarks[root.id] = root;
+ decodedLandmarks[root.id].valid = true;
+ LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y);
+ LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score);
+
+ int index = static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) - 1;
+ for (auto riter = mMeta.GetEdge().GetEdgesAll().rbegin();
+ riter != mMeta.GetEdge().GetEdgesAll().rend(); ++riter) {
+ int fromKeyId = riter->second;
+ int toKeyId = riter->first;
+
+ if (decodedLandmarks[fromKeyId].valid == true &&
+ decodedLandmarks[toKeyId].valid == false) {
+ LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId);
+ traverseToNeighbor(index, toKeyId, 1,
+ decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
+ scaleW, scaleH);
+ LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId,
+ decodedLandmarks[toKeyId].decodedLoc.x,
+ decodedLandmarks[toKeyId].decodedLoc.y,
+ decodedLandmarks[toKeyId].score);
+ }
+ index--;
+ }
+
+ index = 0;
+ for (auto iter = mMeta.GetEdge().GetEdgesAll().begin();
+ iter != mMeta.GetEdge().GetEdgesAll().end(); ++iter) {
+ int fromKeyId = iter->first;
+ int toKeyId = iter->second;
+
+ if (decodedLandmarks[fromKeyId].valid == true &&
+ decodedLandmarks[toKeyId].valid == false) {
+ LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId);
+ traverseToNeighbor(index, toKeyId, 0,
+ decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
+ scaleW, scaleH);
+ }
+ index++;
+ }
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir,
+ LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
+ float scaleW, float scaleH)
+ {
+ if (scaleW <= 0.0f || scaleH <= 0.0f) {
+ LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
+ cv::Point2f edgeVector(0.f, 0.f);
+ cv::Point nearHeatMapLoc;
+
+ LOGI("org: %.4f, %.4f", fromLandmark.decodedLoc.x, fromLandmark.decodedLoc.y);
+
+ // update heatMapLoc from decodedLoc;
+ nearHeatMapLoc.x = roundf(fromLandmark.decodedLoc.x
+ * static_cast<float>(mHeatMapWidth - 1));
+ nearHeatMapLoc.y = roundf(fromLandmark.decodedLoc.y
+ * static_cast<float>(mHeatMapHeight - 1));
+
+ nearHeatMapLoc.x = std::max(std::min(nearHeatMapLoc.x, mHeatMapWidth - 1), 0);
+ nearHeatMapLoc.y = std::max(std::min(nearHeatMapLoc.y, mHeatMapHeight - 1), 0);
+
+ LOGI("src: %d, %d", nearHeatMapLoc.x, nearHeatMapLoc.y);
+
+ getEdgeVector(nearHeatMapLoc, edgeId, dir, edgeVector);
+
+ LOGI("vector: %.4f, %.4f with edgeId %d", edgeVector.x, edgeVector.y, edgeId);
+ toLandmark.decodedLoc.x = fromLandmark.decodedLoc.x + edgeVector.x / scaleW;
+ toLandmark.decodedLoc.y = fromLandmark.decodedLoc.y + edgeVector.y / scaleH;
+ toLandmark.id = toId;
+ LOGI("tgt: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y);
+
+ for (int iter = 0; iter < MAX_NUMBER_OF_CORRECTION; ++iter) {
+ getPosToIndex(toLandmark);
+ getIndexToPos(toLandmark, scaleW, scaleH);
+ }
+
+ int idx = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id);
+ toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScore().GetName(), idx);
+ if (mMeta.GetScore().GetType() == 1) {
+ toLandmark.score = PostProcess::sigmoid(toLandmark.score);
+ }
+
+ toLandmark.valid = true;
+ LOGI("Final: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y);
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector)
+ {
+ LOGI("ENTER");
+
+ LOGI("edge size: %zd", mMeta.GetEdge().GetEdgesAll().size());
+ int idxY = index.y * mHeatMapWidth
+ * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2;
+ idxY += index.x * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2 + edgeId;
+
+ int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
+
+ for(auto& dispVec : mMeta.GetDispVecAll()){
+ if (dispVec.GetType() == dir) { // 0: forward
+ LOGI("%s", dispVec.GetName().c_str());
+ vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
+ vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
+ }
+ }
+
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+}
+}