mSourceSize(cv::Size()),
engine_config(),
mBackend(),
- mPoseResult(NULL),
mMetadata(),
mPreProc(),
mPostProc()
mOutputLayerProperty.layers);
}
- if (mPoseResult) {
- for (int poseIndex = 0; poseIndex < mPoseResult->number_of_poses; ++poseIndex) {
- delete [] mPoseResult->landmarks[poseIndex];
- }
- delete [] mPoseResult->landmarks;
- delete mPoseResult;
- }
-
mModelFormats.clear();
// Release backend engine.
}
int Inference::GetPoseLandmarkDetectionResults(
- mv_inference_pose_result_h *detectionResults, int width, int height)
+ std::unique_ptr<mv_inference_pose_s> &detectionResults, int width, int height)
{
LOGI("ENTER");
+
OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+ auto poseResult = std::make_unique<mv_inference_pose_s>();
+
if (outputMeta.IsParsed()) {
auto& landmarkInfo = outputMeta.GetLandmark();
auto& scoreInfo = outputMeta.GetScore();
+
if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
!mOutputTensorBuffers.exist(scoreInfo.GetName())) {
LOGE("output buffers named of %s or %s are NULL",
int heatMapWidth = 0;
int heatMapHeight = 0;
int heatMapChannel = 0;
+
if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
- if (mPoseResult == NULL) {
- mPoseResult = new(std::nothrow) mv_inference_pose_s;
- if (mPoseResult == NULL) {
- LOGE("Fail to create result handle");
- return MEDIA_VISION_ERROR_INTERNAL;
- }
- // 2d+single or 2d+multi or 3d+single or 3d+multi
- int defaultNumberOfPose = (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
- landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) ? 1 : MAX_NUMBER_OF_POSE;
- std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
-
- // If INFERENCE_LANDMARK_DECODING_TYPE_BYPASS,
- // the landmarkChannel is guessed from the shape of the landmark output tensor.
- // Otherwise, it is guessed from the heatMapChannel.
- int landmarkChannel = 0;
- if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
- landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
- / landmarkInfo.GetOffset();
- } else {
- landmarkChannel = heatMapChannel;
- }
+ std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
- mPoseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel :
- static_cast<int>(mUserListName.size());
+ // If INFERENCE_LANDMARK_DECODING_TYPE_BYPASS,
+ // the landmarkChannel is guessed from the shape of the landmark output tensor.
+ // Otherwise, it is guessed from the heatMapChannel. (heatMapChannel is used in default).
+ int landmarkChannel = heatMapChannel;
- LOGE("number of landmarks per pose: %d", mPoseResult->number_of_landmarks_per_pose );
- mPoseResult->landmarks = new mv_inference_landmark_s* [defaultNumberOfPose];
- for (int idx = 0; idx < defaultNumberOfPose; ++idx) {
- mPoseResult->landmarks[idx] = new mv_inference_landmark_s [mPoseResult->number_of_landmarks_per_pose];
- }
+ if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS)
+ landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] / landmarkInfo.GetOffset();
+
+ poseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel :
+ static_cast<int>(mUserListName.size());
+
+ LOGE("number of landmarks per pose: %d", poseResult->number_of_landmarks_per_pose );
+
+ if (poseResult->number_of_landmarks_per_pose >= MAX_NUMBER_OF_LANDMARKS_PER_POSE) {
+ LOGE("Exceeded maxinum number of landmarks per pose(%d >= %d).",
+ poseResult->number_of_landmarks_per_pose, MAX_NUMBER_OF_LANDMARKS_PER_POSE);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
}
// decoding
PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta,
heatMapWidth, heatMapHeight, heatMapChannel,
- mPoseResult->number_of_landmarks_per_pose);
+ poseResult->number_of_landmarks_per_pose);
// initialize decorder queue with landmarks to be decoded.
int ret = poseDecoder.init();
poseDecoder.decode(inputW, inputH, thresRadius);
int part = 0;
- mPoseResult->number_of_poses = poseDecoder.getNumberOfPose();
- for (int poseIndex = 0; poseIndex < mPoseResult->number_of_poses; ++poseIndex) {
- for (int landmarkIndex = 0; landmarkIndex < mPoseResult->number_of_landmarks_per_pose; ++ landmarkIndex) {
+ poseResult->number_of_poses = poseDecoder.getNumberOfPose();
+ for (int poseIndex = 0; poseIndex < poseResult->number_of_poses; ++poseIndex) {
+ for (int landmarkIndex = 0; landmarkIndex < poseResult->number_of_landmarks_per_pose; ++ landmarkIndex) {
part = landmarkIndex;
if (!mUserListName.empty()) {
part = std::stoi(mUserListName[landmarkIndex]) - 1;
}
}
- mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = true;
- mPoseResult->landmarks[poseIndex][landmarkIndex].point.x =
+ poseResult->landmarks[poseIndex][landmarkIndex].isAvailable = true;
+ poseResult->landmarks[poseIndex][landmarkIndex].point.x =
poseDecoder.getPointX(poseIndex, part) * static_cast<float>(mSourceSize.width);
- mPoseResult->landmarks[poseIndex][landmarkIndex].point.y =
+ poseResult->landmarks[poseIndex][landmarkIndex].point.y =
poseDecoder.getPointY(poseIndex, part) * static_cast<float>(mSourceSize.height);
- mPoseResult->landmarks[poseIndex][landmarkIndex].label = landmarkIndex;
- mPoseResult->landmarks[poseIndex][landmarkIndex].score =
+ poseResult->landmarks[poseIndex][landmarkIndex].label = landmarkIndex;
+ poseResult->landmarks[poseIndex][landmarkIndex].score =
poseDecoder.getScore(poseIndex, part);
}
}
- *detectionResults = static_cast<mv_inference_pose_result_h>(mPoseResult);
+ detectionResults = std::move(poseResult);
} else {
tensor_t outputData;
return ret;
}
- long number_of_poses = 1;
- long number_of_landmarks = outputData.dimInfo[0][3];
- float *tmp = static_cast<float *>(outputData.data[0]);
- cv::Size heatMapSize(outputData.dimInfo[0][1], outputData.dimInfo[0][2]);
-
- cv::Point loc;
- cv::Point2f loc2f;
- double score;
- cv::Mat blurredHeatMap;
-
cv::Mat reShapeTest(cv::Size(outputData.dimInfo[0][2], outputData.dimInfo[0][1]),
- CV_32FC(outputData.dimInfo[0][3]), (void *) tmp);
-
+ CV_32FC(outputData.dimInfo[0][3]), outputData.data[0]);
cv::Mat multiChannels[outputData.dimInfo[0][3]];
+
split(reShapeTest, multiChannels);
float ratioX = static_cast<float>(outputData.dimInfo[0][2]);
float ratioY = static_cast<float>(outputData.dimInfo[0][1]);
- if (mPoseResult == NULL) {
- if(!mUserListName.empty()) {
- number_of_landmarks = mUserListName.size();
- }
- mPoseResult = new mv_inference_pose_s;
- if (mPoseResult == NULL) {
- LOGE("Fail to create result handle");
- return MEDIA_VISION_ERROR_INTERNAL;
- }
+ poseResult->number_of_poses = 1;
+ poseResult->number_of_landmarks_per_pose = outputData.dimInfo[0][3];
- mPoseResult->number_of_poses= number_of_poses;
- mPoseResult->number_of_landmarks_per_pose = number_of_landmarks;
- mPoseResult->landmarks = new mv_inference_landmark_s*[number_of_poses];
- for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) {
- mPoseResult->landmarks[poseIndex] = new mv_inference_landmark_s[number_of_landmarks];
- }
+ if (poseResult->number_of_landmarks_per_pose >= MAX_NUMBER_OF_LANDMARKS_PER_POSE) {
+ LOGE("Exeeded maxinum number of landmarks per pose(%d >= %d).",
+ poseResult->number_of_landmarks_per_pose, MAX_NUMBER_OF_LANDMARKS_PER_POSE);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
}
- int part = 0;
- for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) {
- for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
- part = landmarkIndex;
+ for (int poseIndex = 0; poseIndex < poseResult->number_of_poses; ++poseIndex) {
+ for (int landmarkIndex = 0; landmarkIndex < poseResult->number_of_landmarks_per_pose; landmarkIndex++) {
+ int part = landmarkIndex;
if (!mUserListName.empty()) {
part = std::stoi(mUserListName[landmarkIndex]) - 1;
if (part < 0) {
}
cv::Mat heatMap = multiChannels[part];
+ double score;
+ cv::Point loc;
+ cv::Point2f loc2f;
+ cv::Mat blurredHeatMap;
cv::GaussianBlur(heatMap, blurredHeatMap, cv::Size(), 5.0, 5.0);
cv::minMaxLoc(heatMap, NULL, &score, NULL, &loc);
LOGI("landmarkIndex[%2d] - mapping to [%2d]: x[%.3f], y[%.3f], score[%.3f]",
landmarkIndex, part, loc2f.x, loc2f.y, score);
- mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = true;
- mPoseResult->landmarks[poseIndex][landmarkIndex].point.x = static_cast<int>(static_cast<float>(width) * loc2f.x);
- mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = static_cast<int>(static_cast<float>(height) * loc2f.y);
- mPoseResult->landmarks[poseIndex][landmarkIndex].score = score;
- mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1;
+ poseResult->landmarks[poseIndex][landmarkIndex].isAvailable = true;
+ poseResult->landmarks[poseIndex][landmarkIndex].point.x = static_cast<int>(static_cast<float>(width) * loc2f.x);
+ poseResult->landmarks[poseIndex][landmarkIndex].point.y = static_cast<int>(static_cast<float>(height) * loc2f.y);
+ poseResult->landmarks[poseIndex][landmarkIndex].score = score;
+ poseResult->landmarks[poseIndex][landmarkIndex].label = -1;
}
}
- *detectionResults = static_cast<mv_inference_pose_result_h>(mPoseResult);
+
+ detectionResults = std::move(poseResult);
}
return MEDIA_VISION_ERROR_NONE;