mv_machine_learning: code cleanup to GetFacialLandMarkDetectionResults function 72/265072/3
authorInki Dae <inki.dae@samsung.com>
Thu, 7 Oct 2021 06:30:36 +0000 (15:30 +0900)
committerInki Dae <inki.dae@samsung.com>
Wed, 13 Oct 2021 04:20:11 +0000 (13:20 +0900)
Did code cleanup to GetFacialLandMarkDetectionResults function
by doing,
   - code sliding
   - change variable name to meaningful name for readability.

Change-Id: Ib59786c085c8202a1f7d9eb85a01d528220c728f
Signed-off-by: Inki Dae <inki.dae@samsung.com>
mv_machine_learning/mv_inference/inference/include/Inference.h
mv_machine_learning/mv_inference/inference/src/Inference.cpp
mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp

index cad69e9fe0ace9b639919f813f908f95cb8c9f4c..a0506f2022ead7e0ab1c24e2074a5ce8d96478ab 100644 (file)
@@ -317,8 +317,7 @@ namespace inference
                 * @since_tizen 5.5
                 * @return @c true on success, otherwise a negative error value
                 */
-               int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results,
-                                                                               int width, int height);
+               int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results);
 
                /**
                 * @brief       Gets the PoseLandmarkDetectionResults
index 756f0419688030ddcf67851531f4b32bca297d97..c25a4a4e13af9f7f011cfc3a8628cf0c08b76dbb 100755 (executable)
@@ -1536,15 +1536,15 @@ namespace inference
                return MEDIA_VISION_ERROR_NONE;
        }
 
-       int Inference::GetFacialLandMarkDetectionResults(
-                       FacialLandMarkDetectionResults *detectionResults, int width, int height)
+       int Inference::GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results)
        {
                LOGI("ENTER");
-               FacialLandMarkDetectionResults results;
+
                OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
                if (outputMeta.IsParsed()) {
                        auto& landmarkInfo = outputMeta.GetLandmark();
                        auto& scoreInfo = outputMeta.GetScore();
+
                        if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
                                !mOutputTensorBuffers.exist(scoreInfo.GetName())) {
                                LOGE("output buffers named of %s or %s are NULL",
@@ -1555,27 +1555,26 @@ namespace inference
                        int heatMapWidth = 0;
                        int heatMapHeight = 0;
                        int heatMapChannel = 0;
-                       if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
-                               heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
-                               heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
-                               heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
-                       }
-
-                       int number_of_landmarks = 0;
                        std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+                       int number_of_landmarks = heatMapChannel;
+
                        if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
                                LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
                                number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
                                                                        / landmarkInfo.GetOffset();
                        } else {
-                               number_of_landmarks = heatMapChannel;
+                               heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
+                               heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
+                               heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
                        }
+
                        LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
 
                        // decoding
                        PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta,
                                                                        heatMapWidth, heatMapHeight, heatMapChannel,
                                                                        number_of_landmarks);
+
                        // initialize decorder queue with landmarks to be decoded.
                        int ret = poseDecoder.init();
                        if (ret != MEDIA_VISION_ERROR_NONE) {
@@ -1585,21 +1584,24 @@ namespace inference
 
                        float inputW = 1.f;
                        float inputH = 1.f;
+
                        if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
                                inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
                                inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
                        }
+
                        float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
                                                                                                                outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+
                        poseDecoder.decode(inputW, inputH, thresRadius);
 
                        for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
-                               results.locations.push_back(
+                               results->locations.push_back(
                                        cv::Point(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width),
                                                          poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height)));
                        }
-                       results.number_of_landmarks = results.locations.size();
-                       *detectionResults = results;
+
+                       results->number_of_landmarks = results->locations.size();
                } else {
                        tensor_t outputData;
 
@@ -1611,22 +1613,24 @@ namespace inference
                        }
 
                        int number_of_detections = outputData.dimInfo[0][1] >> 1;
-                       float *loc = reinterpret_cast<float *>(outputData.data[0]);
-                       results.number_of_landmarks = number_of_detections;
-                       results.locations.resize(number_of_detections);
+
+                       results->number_of_landmarks = number_of_detections;
+                       results->locations.resize(number_of_detections);
 
                        LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
-                       for (auto& point : results.locations) {
+
+                       float *loc = reinterpret_cast<float *>(outputData.data[0]);
+
+                       for (auto& point : results->locations) {
                                point.x = static_cast<int>(*loc++ * mSourceSize.width);
                                point.y = static_cast<int>(*loc++ * mSourceSize.height);
 
                                LOGI("x:%d, y:%d", point.x, point.y);
                        }
-
-                       *detectionResults = results;
                }
+
                LOGI("Inference: FacialLandmarkDetectionResults: %d\n",
-                        results.number_of_landmarks);
+                        results->number_of_landmarks);
                return MEDIA_VISION_ERROR_NONE;
        }
 
index 41c62df12c2703356b1c7ad29f0ada3d233d77bc..c945ac4d5fc6fec66e8cd34f1c6cf8ae37468bf8 100644 (file)
@@ -765,20 +765,6 @@ int mv_inference_facial_landmark_detect_open(
                mv_inference_facial_landmark_detected_cb detected_cb, void *user_data)
 {
        Inference *pInfer = static_cast<Inference *>(infer);
-       unsigned int width, height;
-
-       int ret = mv_source_get_width(source, &width);
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to get width");
-               return ret;
-       }
-
-       ret = mv_source_get_height(source, &height);
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to get height");
-               return ret;
-       }
-
        std::vector<mv_source_h> sources;
        std::vector<mv_rectangle_s> rects;
 
@@ -787,7 +773,7 @@ int mv_inference_facial_landmark_detect_open(
        if (roi != NULL)
                rects.push_back(*roi);
 
-       ret = pInfer->Run(sources, rects);
+       int ret = pInfer->Run(sources, rects);
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to run inference");
                return ret;
@@ -795,8 +781,7 @@ int mv_inference_facial_landmark_detect_open(
 
        FacialLandMarkDetectionResults facialLandMarkDetectionResults;
 
-       ret = pInfer->GetFacialLandMarkDetectionResults(
-                       &facialLandMarkDetectionResults, width, height);
+       ret = pInfer->GetFacialLandMarkDetectionResults(&facialLandMarkDetectionResults);
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to get inference results");
                return ret;
@@ -805,9 +790,9 @@ int mv_inference_facial_landmark_detect_open(
        int numberOfLandmarks = facialLandMarkDetectionResults.number_of_landmarks;
        std::vector<mv_point_s> locations(numberOfLandmarks);
 
-       for (int n = 0; n < numberOfLandmarks; ++n) {
-               locations[n].x = facialLandMarkDetectionResults.locations[n].x;
-               locations[n].y = facialLandMarkDetectionResults.locations[n].y;
+       for (int landmark_idx = 0; landmark_idx < numberOfLandmarks; ++landmark_idx) {
+               locations[landmark_idx].x = facialLandMarkDetectionResults.locations[landmark_idx].x;
+               locations[landmark_idx].y = facialLandMarkDetectionResults.locations[landmark_idx].y;
        }
 
        detected_cb(source, numberOfLandmarks, locations.data(), user_data);