return MEDIA_VISION_ERROR_NONE;
}
- int Inference::GetFacialLandMarkDetectionResults(
- FacialLandMarkDetectionResults *detectionResults, int width, int height)
+ int Inference::GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results)
{
LOGI("ENTER");
- FacialLandMarkDetectionResults results;
+
OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
if (outputMeta.IsParsed()) {
auto& landmarkInfo = outputMeta.GetLandmark();
auto& scoreInfo = outputMeta.GetScore();
+
if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
!mOutputTensorBuffers.exist(scoreInfo.GetName())) {
LOGE("output buffers named of %s or %s are NULL",
int heatMapWidth = 0;
int heatMapHeight = 0;
int heatMapChannel = 0;
- if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
- heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
- heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
- heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
- }
-
- int number_of_landmarks = 0;
std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+ int number_of_landmarks = heatMapChannel;
+
if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
/ landmarkInfo.GetOffset();
} else {
- number_of_landmarks = heatMapChannel;
+ heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
+ heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
+ heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
}
+
LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
// decoding
PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta,
heatMapWidth, heatMapHeight, heatMapChannel,
number_of_landmarks);
+
// initialize decorder queue with landmarks to be decoded.
int ret = poseDecoder.init();
if (ret != MEDIA_VISION_ERROR_NONE) {
float inputW = 1.f;
float inputH = 1.f;
+
if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
}
+
float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+
poseDecoder.decode(inputW, inputH, thresRadius);
for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
- results.locations.push_back(
+ results->locations.push_back(
cv::Point(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width),
poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height)));
}
- results.number_of_landmarks = results.locations.size();
- *detectionResults = results;
+
+ results->number_of_landmarks = results->locations.size();
} else {
tensor_t outputData;
}
int number_of_detections = outputData.dimInfo[0][1] >> 1;
- float *loc = reinterpret_cast<float *>(outputData.data[0]);
- results.number_of_landmarks = number_of_detections;
- results.locations.resize(number_of_detections);
+
+ results->number_of_landmarks = number_of_detections;
+ results->locations.resize(number_of_detections);
LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
- for (auto& point : results.locations) {
+
+ float *loc = reinterpret_cast<float *>(outputData.data[0]);
+
+ for (auto& point : results->locations) {
point.x = static_cast<int>(*loc++ * mSourceSize.width);
point.y = static_cast<int>(*loc++ * mSourceSize.height);
LOGI("x:%d, y:%d", point.x, point.y);
}
-
- *detectionResults = results;
}
+
LOGI("Inference: FacialLandmarkDetectionResults: %d\n",
- results.number_of_landmarks);
+ results->number_of_landmarks);
return MEDIA_VISION_ERROR_NONE;
}
mv_inference_facial_landmark_detected_cb detected_cb, void *user_data)
{
Inference *pInfer = static_cast<Inference *>(infer);
- unsigned int width, height;
-
- int ret = mv_source_get_width(source, &width);
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to get width");
- return ret;
- }
-
- ret = mv_source_get_height(source, &height);
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to get height");
- return ret;
- }
-
std::vector<mv_source_h> sources;
std::vector<mv_rectangle_s> rects;
if (roi != NULL)
rects.push_back(*roi);
- ret = pInfer->Run(sources, rects);
+ int ret = pInfer->Run(sources, rects);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to run inference");
return ret;
FacialLandMarkDetectionResults facialLandMarkDetectionResults;
- ret = pInfer->GetFacialLandMarkDetectionResults(
- &facialLandMarkDetectionResults, width, height);
+ ret = pInfer->GetFacialLandMarkDetectionResults(&facialLandMarkDetectionResults);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to get inference results");
return ret;
int numberOfLandmarks = facialLandMarkDetectionResults.number_of_landmarks;
std::vector<mv_point_s> locations(numberOfLandmarks);
- for (int n = 0; n < numberOfLandmarks; ++n) {
- locations[n].x = facialLandMarkDetectionResults.locations[n].x;
- locations[n].y = facialLandMarkDetectionResults.locations[n].y;
+ for (int landmark_idx = 0; landmark_idx < numberOfLandmarks; ++landmark_idx) {
+ locations[landmark_idx].x = facialLandMarkDetectionResults.locations[landmark_idx].x;
+ locations[landmark_idx].y = facialLandMarkDetectionResults.locations[landmark_idx].y;
}
detected_cb(source, numberOfLandmarks, locations.data(), user_data);