}
int Inference::GetFacialLandMarkDetectionResults(
- FacialLandMarkDetectionResults *detectionResults)
+ FacialLandMarkDetectionResults *detectionResults, int width, int height)
{
- tensor_t outputData;
+ LOGI("ENTER");
+ FacialLandMarkDetectionResults results;
+ OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+ if (outputMeta.IsParsed()) {
+ auto& landmarkInfo = outputMeta.GetLandmark();
+ auto& scoreInfo = outputMeta.GetScore();
+ if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
+ !mOutputTensorBuffers.exist(scoreInfo.GetName())) {
+ LOGE("output buffers named of %s or %s are NULL",
+ landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
- // Get inference result and contain it to outputData.
- int ret = FillOutputResult(outputData);
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to get output result.");
- return ret;
- }
+ int heatMapWidth = 0;
+ int heatMapHeight = 0;
+ int heatMapChannel = 0;
+ if (landmarkInfo.GetDecodingType() != 0) {
+ heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
+ heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
+ heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
+ }
+
+ int number_of_landmarks = 0;
+ std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+ if (landmarkInfo.GetDecodingType() == 0) {
+ LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
+ number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
+ / landmarkInfo.GetOffset();
+ } else {
+ number_of_landmarks = heatMapChannel;
+ }
+ LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
+
+ // decoding
+ PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta,
+ heatMapWidth, heatMapHeight, heatMapChannel,
+ number_of_landmarks);
+ // initialize decorder queue with landmarks to be decoded.
+ int ret = poseDecoder.init();
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to init poseDecoder");
+ return ret;
+ }
- std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
- std::vector<void *> inferResults(outputData.data.begin(),
- outputData.data.end());
+ float inputW = 1.f;
+ float inputH = 1.f;
+ if (landmarkInfo.GetCoordinate() == 1) {
+ inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
+ inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+ }
+ float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+ poseDecoder.decode(inputW, inputH, thresRadius);
- long number_of_detections = inferDimInfo[0][1];
- float *loc = reinterpret_cast<float *>(inferResults[0]);
+ for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
+ results.locations.push_back(
+ cv::Point(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width),
+ poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height)));
+ }
+ results.number_of_landmarks = results.locations.size();
+ *detectionResults = results;
+ } else {
+ tensor_t outputData;
- FacialLandMarkDetectionResults results;
- results.number_of_landmarks = 0;
+ // Get inference result and contain it to outputData.
+ int ret = FillOutputResult(outputData);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to get output result.");
+ return ret;
+ }
+
+ std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
+ std::vector<void *> inferResults(outputData.data.begin(),
+ outputData.data.end());
- cv::Point point(0, 0);
- results.number_of_landmarks = 0;
- LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
- for (int idx = 0; idx < number_of_detections; idx += 2) {
- point.x = static_cast<int>(loc[idx] * mSourceSize.width);
- point.y = static_cast<int>(loc[idx + 1] * mSourceSize.height);
+ long number_of_detections = inferDimInfo[0][1];
+ float *loc = reinterpret_cast<float *>(inferResults[0]);
- results.locations.push_back(point);
- results.number_of_landmarks++;
+ results.number_of_landmarks = 0;
- LOGI("x:%d, y:%d", point.x, point.y);
- }
+ cv::Point point(0, 0);
+ LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
+ for (int idx = 0; idx < number_of_detections; idx += 2) {
+ point.x = static_cast<int>(loc[idx] * mSourceSize.width);
+ point.y = static_cast<int>(loc[idx + 1] * mSourceSize.height);
+
+ results.locations.push_back(point);
+ results.number_of_landmarks++;
- *detectionResults = results;
+ LOGI("x:%d, y:%d", point.x, point.y);
+ }
+
+ *detectionResults = results;
+ }
LOGE("Inference: FacialLandmarkDetectionResults: %d\n",
results.number_of_landmarks);
return MEDIA_VISION_ERROR_NONE;