#include "Inference.h"
#include "InferenceIni.h"
#include "ObjectDecoder.h"
+#include "PoseDecoder.h"
#include <map>
+#include <list>
#include <unistd.h>
#include <fstream>
if (!outputMeta.GetNumber().GetName().empty())
mConfig.mOutputLayerNames.push_back(outputMeta.GetNumber().GetName());
+
+ if (!outputMeta.GetLandmark().GetName().empty())
+ mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmark().GetName());
+
+ if (!outputMeta.GetOffset().GetName().empty())
+ mConfig.mOutputLayerNames.push_back(outputMeta.GetOffset().GetName());
+
+ for (auto& dispVec : outputMeta.GetDispVecAll()) {
+ mConfig.mOutputLayerNames.push_back(dispVec.GetName());
+ }
}
inference_engine_layer_property property;
INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
1};
for (auto& name : mConfig.mOutputLayerNames) {
+ LOGI("Configure %s layer as output", name.c_str());
property.layers.insert(std::make_pair(name, tensor_info));
}
int Inference::GetPoseLandmarkDetectionResults(
mv_inference_pose_result_h *detectionResults, int width, int height)
{
- tensor_t outputData;
+ LOGI("ENTER");
+ OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+ if (outputMeta.IsParsed()) {
+ auto& landmarkInfo = outputMeta.GetLandmark();
+ auto& scoreInfo = outputMeta.GetScore();
+ if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
+ !mOutputTensorBuffers.exist(scoreInfo.GetName())) {
+ LOGE("output buffers named of %s or %s are NULL",
+ landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
- // Get inference result and contain it to outputData.
- int ret = FillOutputResult(outputData);
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to get output result.");
- return ret;
- }
+ int heatMapWidth = 0;
+ int heatMapHeight = 0;
+ int heatMapChannel = 0;
+ if (landmarkInfo.GetDecodingType() != 0) {
+ heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
+ heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
+ heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
+ }
- std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
- std::vector<void *> inferResults(outputData.data.begin(),
- outputData.data.end());
+ LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
- long number_of_poses = 1;
- long number_of_landmarks = inferDimInfo[0][3];
- float *tmp = static_cast<float *>(inferResults[0]);
- cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]);
+ if (mPoseResult == NULL) {
+ mPoseResult = new(std::nothrow) mv_inference_pose_s;
+ if (mPoseResult == NULL) {
+ LOGE("Fail to create result handle");
+ return MEDIA_VISION_ERROR_INTERNAL;
+ }
+ // 2d+single or 2d+multi or 3d+single or 3d+multi
+ int defaultNumberOfPose = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 2) ? 1 : MAX_NUMBER_OF_POSE;
+ std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+
+ // In case of DecodingType == 0,
+ // the landmarkChannel is guessed from the shape of the landmark output tensor.
+ // Otherwise, decoding heatmap, it is guessed from the heatMapChannel.
+ int landmarkChannel = 0;
+ if (landmarkInfo.GetDecodingType() == 0) {
+ landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
+ / landmarkInfo.GetOffset();
+ } else {
+ landmarkChannel = heatMapChannel;
+ }
- cv::Point loc;
- cv::Point2f loc2f;
- double score;
- cv::Mat blurredHeatMap;
+ mPoseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel :
+ static_cast<int>(mUserListName.size());
- cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]),
- CV_32FC(inferDimInfo[0][3]), (void *) tmp);
+ LOGE("number of landmarks per pose: %d", mPoseResult->number_of_landmarks_per_pose );
+ mPoseResult->landmarks = new mv_inference_landmark_s* [defaultNumberOfPose];
+ for (int idx = 0; idx < defaultNumberOfPose; ++idx) {
+ mPoseResult->landmarks[idx] = new mv_inference_landmark_s [mPoseResult->number_of_landmarks_per_pose];
+ }
+ }
+
+ // decoding
+ PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta,
+ heatMapWidth, heatMapHeight, heatMapChannel,
+ mPoseResult->number_of_landmarks_per_pose);
+
+ // initialize decorder queue with landmarks to be decoded.
+ int ret = poseDecoder.init();
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to init poseDecoder");
+ return ret;
+ }
+
+ float inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
+ float inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+ float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+ poseDecoder.decode(inputW, inputH, thresRadius);
+
+ int part = 0;
+ mPoseResult->number_of_poses = poseDecoder.getNumberOfPose();
+ for (int poseIndex = 0; poseIndex < mPoseResult->number_of_poses; ++poseIndex) {
+ for (int landmarkIndex = 0; landmarkIndex < mPoseResult->number_of_landmarks_per_pose; ++ landmarkIndex) {
+ part = landmarkIndex;
+ if (!mUserListName.empty()) {
+ part = std::stoi(mUserListName[landmarkIndex]) - 1;
+ if (part < 0) {
+ continue;
+ }
+ }
- cv::Mat multiChannels[inferDimInfo[0][3]];
- split(reShapeTest, multiChannels);
+ mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = true;
+ mPoseResult->landmarks[poseIndex][landmarkIndex].point.x =
+ poseDecoder.getPointX(poseIndex, part) * static_cast<float>(mSourceSize.width);
+ mPoseResult->landmarks[poseIndex][landmarkIndex].point.y =
+ poseDecoder.getPointY(poseIndex, part) * static_cast<float>(mSourceSize.height);
+ mPoseResult->landmarks[poseIndex][landmarkIndex].label = landmarkIndex;
+ mPoseResult->landmarks[poseIndex][landmarkIndex].score =
+ poseDecoder.getScore(poseIndex, part);
+ }
+ }
+ *detectionResults = static_cast<mv_inference_pose_result_h>(mPoseResult);
- float ratioX = static_cast<float>(inferDimInfo[0][2]);
- float ratioY = static_cast<float>(inferDimInfo[0][1]);
+ } else {
+ tensor_t outputData;
- if (mPoseResult == NULL) {
- if(!mUserListName.empty()) {
- number_of_landmarks = mUserListName.size();
+ // Get inference result and contain it to outputData.
+ int ret = FillOutputResult(outputData);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to get output result.");
+ return ret;
}
- mPoseResult = new mv_inference_pose_s;
+
+ std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
+ std::vector<void *> inferResults(outputData.data.begin(),
+ outputData.data.end());
+
+ long number_of_poses = 1;
+ long number_of_landmarks = inferDimInfo[0][3];
+ float *tmp = static_cast<float *>(inferResults[0]);
+ cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]);
+
+ cv::Point loc;
+ cv::Point2f loc2f;
+ double score;
+ cv::Mat blurredHeatMap;
+
+ cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]),
+ CV_32FC(inferDimInfo[0][3]), (void *) tmp);
+
+ cv::Mat multiChannels[inferDimInfo[0][3]];
+ split(reShapeTest, multiChannels);
+
+ float ratioX = static_cast<float>(inferDimInfo[0][2]);
+ float ratioY = static_cast<float>(inferDimInfo[0][1]);
+
if (mPoseResult == NULL) {
- LOGE("Fail to create result handle");
- return MEDIA_VISION_ERROR_INTERNAL;
- }
+ if(!mUserListName.empty()) {
+ number_of_landmarks = mUserListName.size();
+ }
+ mPoseResult = new mv_inference_pose_s;
+ if (mPoseResult == NULL) {
+ LOGE("Fail to create result handle");
+ return MEDIA_VISION_ERROR_INTERNAL;
+ }
- mPoseResult->number_of_poses= number_of_poses;
- mPoseResult->number_of_landmarks_per_pose = number_of_landmarks;
- mPoseResult->landmarks = new mv_inference_landmark_s*[number_of_poses];
- for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) {
- mPoseResult->landmarks[poseIndex] = new mv_inference_landmark_s[number_of_landmarks];
- for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; ++landmarkIndex) {
- mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = false;
- mPoseResult->landmarks[poseIndex][landmarkIndex].point.x = -1;
- mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = -1;
- mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1;
- mPoseResult->landmarks[poseIndex][landmarkIndex].score = -1.0f;
+ mPoseResult->number_of_poses= number_of_poses;
+ mPoseResult->number_of_landmarks_per_pose = number_of_landmarks;
+ mPoseResult->landmarks = new mv_inference_landmark_s*[number_of_poses];
+ for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) {
+ mPoseResult->landmarks[poseIndex] = new mv_inference_landmark_s[number_of_landmarks];
}
}
- }
- int part = 0;
- for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) {
- for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
+ int part = 0;
+ for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) {
+ for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
part = landmarkIndex;
if (!mUserListName.empty()) {
part = std::stoi(mUserListName[landmarkIndex]) - 1;
mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = static_cast<int>(static_cast<float>(height) * loc2f.y);
mPoseResult->landmarks[poseIndex][landmarkIndex].score = score;
mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1;
+ }
}
+ *detectionResults = static_cast<mv_inference_pose_result_h>(mPoseResult);
}
- *detectionResults = static_cast<mv_inference_pose_result_h>(mPoseResult);
-
return MEDIA_VISION_ERROR_NONE;
}