mv_machine_learning: code cleanup to Inference class 87/265287/1
authorInki Dae <inki.dae@samsung.com>
Wed, 13 Oct 2021 09:55:41 +0000 (18:55 +0900)
committerInki Dae <inki.dae@samsung.com>
Thu, 14 Oct 2021 05:44:36 +0000 (14:44 +0900)
Cleaned up Inference class.

What this patch does,
 - code sliding.
 - change variable name to meaningful one such as 'n' to 'output_idx'.
 - drop unnecessary variable.
 - use same parameter name for Get*Results functions.

Change-Id: I47ac3eb241116174e4a6a7bc2a1b90ab9378de25
Signed-off-by: Inki Dae <inki.dae@samsung.com>
mv_machine_learning/mv_inference/inference/include/Inference.h
mv_machine_learning/mv_inference/inference/src/Inference.cpp
mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp

index a0506f2..95f285f 100644 (file)
@@ -293,7 +293,7 @@ namespace inference
                 * @since_tizen 5.5
                 * @return @c true on success, otherwise a negative error value
                 */
-               int GetClassficationResults(ImageClassificationResults &classificationResults);
+               int GetClassficationResults(ImageClassificationResults *results);
 
                /**
                 * @brief       Gets the ObjectDetectioResults
@@ -301,7 +301,7 @@ namespace inference
                 * @since_tizen 5.5
                 * @return @c true on success, otherwise a negative error value
                 */
-               int GetObjectDetectionResults(ObjectDetectionResults *detectionResults);
+               int GetObjectDetectionResults(ObjectDetectionResults *results);
 
                /**
                 * @brief       Gets the FaceDetectioResults
@@ -309,7 +309,7 @@ namespace inference
                 * @since_tizen 5.5
                 * @return @c true on success, otherwise a negative error value
                 */
-               int GetFaceDetectionResults(FaceDetectionResults *detectionResults);
+               int GetFaceDetectionResults(FaceDetectionResults *results);
 
                /**
                 * @brief       Gets the FacialLandmarkDetectionResults
index 05c9c07..d79b3ff 100755 (executable)
@@ -1091,14 +1091,12 @@ namespace inference
                return mSupportedInferenceBackend[backend];
        }
 
-       int Inference::GetClassficationResults(ImageClassificationResults &results)
+       int Inference::GetClassficationResults(ImageClassificationResults *results)
        {
                // Will contain top N results in ascending order.
                std::vector<std::pair<float, int>> topScore;
                auto threadHold = mConfig.mConfidenceThresHold;
 
-               results.number_of_classes = 0;
-
                if (mMetadata.GetOutputMeta().IsParsed()) {
                        OutputMetadata outputMetadata = mMetadata.GetOutputMeta();
                        std::vector<int> indexes = outputMetadata.GetScoreDimInfo().GetValidIndexAll();
@@ -1191,23 +1189,25 @@ namespace inference
                        std::reverse(topScore.begin(), topScore.end());
                }
 
+               results->number_of_classes = 0;
+
                for (auto& score : topScore) {
                        LOGI("score: %.3f, threshold: %.3f", score.first, threadHold);
                        LOGI("idx:%d", score.second);
                        LOGI("classProb: %.3f", score.first);
 
-                       results.indices.push_back(score.second);
-                       results.confidences.push_back(score.first);
-                       results.names.push_back(mUserListName[score.second]);
-                       results.number_of_classes++;
+                       results->indices.push_back(score.second);
+                       results->confidences.push_back(score.first);
+                       results->names.push_back(mUserListName[score.second]);
+                       results->number_of_classes++;
                }
 
-               LOGE("Inference: GetClassificationResults: %d\n", results.number_of_classes);
+               LOGE("Inference: GetClassificationResults: %d\n", results->number_of_classes);
                return MEDIA_VISION_ERROR_NONE;
        }
 
        int Inference::GetObjectDetectionResults(
-                       ObjectDetectionResults *detectionResults)
+                       ObjectDetectionResults *results)
        {
                if (mMetadata.GetOutputMeta().IsParsed()) {
                        OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
@@ -1222,6 +1222,7 @@ namespace inference
 
                        int boxOffset = 0;
                        int numberOfObjects = 0;
+
                        if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                                std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
                                if (boxIndexes.size() != 1) {
@@ -1252,23 +1253,21 @@ namespace inference
 
                        objDecoder.init();
                        objDecoder.decode();
-                       ObjectDetectionResults results;
-                       results.number_of_objects = 0;
+                       results->number_of_objects = 0;
 
                        for (auto& box : objDecoder.getObjectAll()) {
-                               results.indices.push_back(box.index);
-                               results.names.push_back(mUserListName[box.index]);
-                               results.confidences.push_back(box.score);
-                               results.locations.push_back(cv::Rect(
+                               results->indices.push_back(box.index);
+                               results->names.push_back(mUserListName[box.index]);
+                               results->confidences.push_back(box.score);
+                               results->locations.push_back(cv::Rect(
                                                static_cast<int>((box.location.x -  box.location.width * 0.5f) * static_cast<float>(mSourceSize.width)),
                                                static_cast<int>((box.location.y -  box.location.height * 0.5f) * static_cast<float>(mSourceSize.height)),
                                                static_cast<int>(box.location.width *  static_cast<float>(mSourceSize.width)),
                                                static_cast<int>(box.location.height * static_cast<float>(mSourceSize.height))));
-                               results.number_of_objects++;
+                               results->number_of_objects++;
                        }
-                       *detectionResults = results;
-                       LOGI("Inference: GetObjectDetectionResults: %d\n",
-                               results.number_of_objects);
+
+                       LOGI("Inference: GetObjectDetectionResults: %d\n", results->number_of_objects);
                } else {
                        tensor_t outputData;
 
@@ -1288,7 +1287,6 @@ namespace inference
                        float *scores = nullptr;
                        int number_of_detections = 0;
 
-                       cv::Mat cvScores, cvClasses, cvBoxes;
                        if (outputData.dimInfo.size() == 1) {
                                // there is no way to know how many objects are detect unless the number of objects aren't
                                // provided. In the case, each backend should provide the number of results manually.
@@ -1307,8 +1305,9 @@ namespace inference
                                cv::Mat cvTop = cvOutputData.col(4).clone();
                                cv::Mat cvRight = cvOutputData.col(5).clone();
                                cv::Mat cvBottom = cvOutputData.col(6).clone();
-
+                               cv::Mat cvScores, cvClasses, cvBoxes;
                                cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
+
                                cv::hconcat(cvBoxElems, 4, cvBoxes);
 
                                // classes
@@ -1320,42 +1319,38 @@ namespace inference
                                boxes = cvBoxes.ptr<float>(0);
                                classes = cvClasses.ptr<float>(0);
                                scores = cvScores.ptr<float>(0);
-
                        } else {
                                boxes = reinterpret_cast<float *>(outputData.data[0]);
                                classes = reinterpret_cast<float *>(outputData.data[1]);
                                scores = reinterpret_cast<float *>(outputData.data[2]);
-                               number_of_detections =
-                                               (int) (*reinterpret_cast<float *>(outputData.data[3]));
+                               number_of_detections = (int) (*reinterpret_cast<float *>(outputData.data[3]));
                        }
 
                        LOGI("number_of_detections = %d", number_of_detections);
 
-                       int left, top, right, bottom;
-                       cv::Rect loc;
+                       results->number_of_objects = 0;
 
-                       ObjectDetectionResults results;
-                       results.number_of_objects = 0;
                        for (int idx = 0; idx < number_of_detections; ++idx) {
                                if (scores[idx] < mThreshold)
                                        continue;
 
-                               left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
-                               top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
-                               right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
-                               bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+                               int left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
+                               int top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
+                               int right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
+                               int bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+                               cv::Rect loc;
 
                                loc.x = left;
                                loc.y = top;
                                loc.width = right - left + 1;
                                loc.height = bottom - top + 1;
 
-                               results.indices.push_back(static_cast<int>(classes[idx]));
-                               results.confidences.push_back(scores[idx]);
-                               results.names.push_back(
+                               results->indices.push_back(static_cast<int>(classes[idx]));
+                               results->confidences.push_back(scores[idx]);
+                               results->names.push_back(
                                                mUserListName[static_cast<int>(classes[idx])]);
-                               results.locations.push_back(loc);
-                               results.number_of_objects++;
+                               results->locations.push_back(loc);
+                               results->number_of_objects++;
 
                                LOGI("objectClass: %d", static_cast<int>(classes[idx]));
                                LOGI("confidence:%f", scores[idx]);
@@ -1363,16 +1358,13 @@ namespace inference
                                        bottom);
                        }
 
-                       *detectionResults = results;
-                       LOGI("Inference: GetObjectDetectionResults: %d\n",
-                               results.number_of_objects);
+                       LOGI("Inference: GetObjectDetectionResults: %d\n", results->number_of_objects);
                }
 
                return MEDIA_VISION_ERROR_NONE;
        }
 
-       int
-       Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults)
+       int Inference::GetFaceDetectionResults(FaceDetectionResults *results)
        {
                if (mMetadata.GetOutputMeta().IsParsed()) {
                        OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
@@ -1387,6 +1379,7 @@ namespace inference
 
                        int boxOffset = 0;
                        int numberOfFaces = 0;
+
                        if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                                std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
                                if (boxIndexes.size() != 1) {
@@ -1417,21 +1410,19 @@ namespace inference
 
                        objDecoder.init();
                        objDecoder.decode();
-                       FaceDetectionResults results;
-                       results.number_of_faces = 0;
+                       results->number_of_faces = 0;
 
                        for (auto& face : objDecoder.getObjectAll()) {
-                               results.confidences.push_back(face.score);
-                               results.locations.push_back(cv::Rect(
+                               results->confidences.push_back(face.score);
+                               results->locations.push_back(cv::Rect(
                                                static_cast<int>((face.location.x -  face.location.width * 0.5f) * static_cast<float>(mSourceSize.width)),
                                                static_cast<int>((face.location.y -  face.location.height * 0.5f) * static_cast<float>(mSourceSize.height)),
                                                static_cast<int>(face.location.width *  static_cast<float>(mSourceSize.width)),
                                                static_cast<int>(face.location.height * static_cast<float>(mSourceSize.height))));
-                               results.number_of_faces++;
+                               results->number_of_faces++;
                        }
-                       *detectionResults = results;
-                       LOGE("Inference: GetFaceDetectionResults: %d\n",
-                               results.number_of_faces);
+
+                       LOGE("Inference: GetFaceDetectionResults: %d\n", results->number_of_faces);
                } else {
                        tensor_t outputData;
 
@@ -1450,8 +1441,8 @@ namespace inference
                        float *classes = nullptr;
                        float *scores = nullptr;
                        int number_of_detections = 0;
-
                        cv::Mat cvScores, cvClasses, cvBoxes;
+
                        if (outputData.dimInfo.size() == 1) {
                                // there is no way to know how many objects are detect unless the number of objects aren't
                                // provided. In the case, each backend should provide the number of results manually.
@@ -1460,17 +1451,14 @@ namespace inference
                                // indicates the image id. But it is useless if a batch mode isn't supported.
                                // So, use the 1st of 7.
 
-                               number_of_detections = static_cast<int>(
-                                               *reinterpret_cast<float *>(outputData.data[0]));
-                               cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3],
-                                                                       CV_32F, outputData.data[0]);
+                               number_of_detections = static_cast<int>(*reinterpret_cast<float *>(outputData.data[0]));
+                               cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3], CV_32F, outputData.data[0]);
 
                                // boxes
                                cv::Mat cvLeft = cvOutputData.col(3).clone();
                                cv::Mat cvTop = cvOutputData.col(4).clone();
                                cv::Mat cvRight = cvOutputData.col(5).clone();
                                cv::Mat cvBottom = cvOutputData.col(6).clone();
-
                                cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
                                cv::hconcat(cvBoxElems, 4, cvBoxes);
 
@@ -1483,49 +1471,41 @@ namespace inference
                                boxes = cvBoxes.ptr<float>(0);
                                classes = cvClasses.ptr<float>(0);
                                scores = cvScores.ptr<float>(0);
-
                        } else {
                                boxes = reinterpret_cast<float *>(outputData.data[0]);
                                classes = reinterpret_cast<float *>(outputData.data[1]);
                                scores = reinterpret_cast<float *>(outputData.data[2]);
-                               number_of_detections = static_cast<int>(
-                                               *reinterpret_cast<float *>(outputData.data[3]));
+                               number_of_detections = static_cast<int>(*reinterpret_cast<float *>(outputData.data[3]));
                        }
 
-                       int left, top, right, bottom;
-                       cv::Rect loc;
+                       results->number_of_faces = 0;
 
-                       FaceDetectionResults results;
-                       results.number_of_faces = 0;
                        for (int idx = 0; idx < number_of_detections; ++idx) {
                                if (scores[idx] < mThreshold)
                                        continue;
 
-                               left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
-                               top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
-                               right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
-                               bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+                               int left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
+                               int top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
+                               int right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
+                               int bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+                               cv::Rect loc;
 
                                loc.x = left;
                                loc.y = top;
                                loc.width = right - left + 1;
                                loc.height = bottom - top + 1;
-
-                               results.confidences.push_back(scores[idx]);
-                               results.locations.push_back(loc);
-                               results.number_of_faces++;
+                               results->confidences.push_back(scores[idx]);
+                               results->locations.push_back(loc);
+                               results->number_of_faces++;
 
                                LOGI("confidence:%f", scores[idx]);
                                LOGI("class: %f", classes[idx]);
                                LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx * 4 + 1],
                                        boxes[idx * 4 + 0], boxes[idx * 4 + 3], boxes[idx * 4 + 2]);
-                               LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right,
-                                       bottom);
+                               LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom);
                        }
 
-                       *detectionResults = results;
-                       LOGE("Inference: GetFaceDetectionResults: %d\n",
-                               results.number_of_faces);
+                       LOGE("Inference: GetFaceDetectionResults: %d\n", results->number_of_faces);
                }
 
                return MEDIA_VISION_ERROR_NONE;
@@ -1699,12 +1679,11 @@ namespace inference
                        }
 
                        poseDecoder.decode(inputW, inputH, thresRadius);
-
-                       int part = 0;
                        poseResult->number_of_poses = poseDecoder.getNumberOfPose();
+
                        for (int poseIndex = 0; poseIndex < poseResult->number_of_poses; ++poseIndex) {
                                for (int landmarkIndex = 0; landmarkIndex < poseResult->number_of_landmarks_per_pose; ++ landmarkIndex) {
-                                       part = landmarkIndex;
+                                       int part = landmarkIndex;
                                        if (!mUserListName.empty()) {
                                                part = std::stoi(mUserListName[landmarkIndex]) - 1;
                                                if (part < 0) {
@@ -1773,6 +1752,7 @@ namespace inference
 
                                        loc2f.x = (static_cast<float>(loc.x) / ratioX);
                                        loc2f.y = (static_cast<float>(loc.y) / ratioY);
+
                                        LOGI("landmarkIndex[%2d] - mapping to [%2d]: x[%.3f], y[%.3f], score[%.3f]",
                                                        landmarkIndex, part, loc2f.x, loc2f.y, score);
 
index c945ac4..a9fd490 100644 (file)
@@ -638,7 +638,7 @@ int mv_inference_image_classify_open(
 
        ImageClassificationResults classificationResults;
 
-       ret = pInfer->GetClassficationResults(classificationResults);
+       ret = pInfer->GetClassficationResults(&classificationResults);
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to get inference results");
                return ret;
@@ -699,14 +699,14 @@ int mv_inference_object_detect_open(mv_source_h source, mv_inference_h infer,
                locations.resize(numberOfOutputs);
        }
 
-       for (int n = 0; n < numberOfOutputs; ++n) {
-               LOGE("names: %s", objectDetectionResults.names[n].c_str());
-               names[n] = objectDetectionResults.names[n].c_str();
+       for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) {
+               LOGE("names: %s", objectDetectionResults.names[output_idx].c_str());
+               names[output_idx] = objectDetectionResults.names[output_idx].c_str();
 
-               locations[n].point.x = objectDetectionResults.locations[n].x;
-               locations[n].point.y = objectDetectionResults.locations[n].y;
-               locations[n].width = objectDetectionResults.locations[n].width;
-               locations[n].height = objectDetectionResults.locations[n].height;
+               locations[output_idx].point.x = objectDetectionResults.locations[output_idx].x;
+               locations[output_idx].point.y = objectDetectionResults.locations[output_idx].y;
+               locations[output_idx].width = objectDetectionResults.locations[output_idx].width;
+               locations[output_idx].height = objectDetectionResults.locations[output_idx].height;
        }
 
        int *indices = objectDetectionResults.indices.data();
@@ -745,11 +745,11 @@ int mv_inference_face_detect_open(mv_source_h source, mv_inference_h infer,
        int numberOfOutputs = faceDetectionResults.number_of_faces;
        std::vector<mv_rectangle_s> locations(numberOfOutputs);
 
-       for (int n = 0; n < numberOfOutputs; ++n) {
-               locations[n].point.x = faceDetectionResults.locations[n].x;
-               locations[n].point.y = faceDetectionResults.locations[n].y;
-               locations[n].width = faceDetectionResults.locations[n].width;
-               locations[n].height = faceDetectionResults.locations[n].height;
+       for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) {
+               locations[output_idx].point.x = faceDetectionResults.locations[output_idx].x;
+               locations[output_idx].point.y = faceDetectionResults.locations[output_idx].y;
+               locations[output_idx].width = faceDetectionResults.locations[output_idx].width;
+               locations[output_idx].height = faceDetectionResults.locations[output_idx].height;
        }
 
        float *confidences = faceDetectionResults.confidences.data();