mv_machine_learning: code cleanup to Inference class

author Inki Dae <inki.dae@samsung.com>

Wed, 13 Oct 2021 09:55:41 +0000 (18:55 +0900)

committer Inki Dae <inki.dae@samsung.com>

Thu, 14 Oct 2021 05:44:36 +0000 (14:44 +0900)
author Inki Dae <inki.dae@samsung.com>
Wed, 13 Oct 2021 09:55:41 +0000 (18:55 +0900)
committer Inki Dae <inki.dae@samsung.com>
Thu, 14 Oct 2021 05:44:36 +0000 (14:44 +0900)
diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h

index a0506f2..95f285f 100644 (file)
--- a/mv_machine_learning/mv_inference/inference/include/Inference.h
+++ b/mv_machine_learning/mv_inference/inference/include/Inference.h
@@ -293,7 +293,7 @@ namespace inference
                  * @since_tizen 5.5
                  * @return @c true on success, otherwise a negative error value
                  */
-               int GetClassficationResults(ImageClassificationResults &classificationResults);
+               int GetClassficationResults(ImageClassificationResults *results);
  
                 /**
                  * @brief       Gets the ObjectDetectioResults
@@ -301,7 +301,7 @@ namespace inference
                  * @since_tizen 5.5
                  * @return @c true on success, otherwise a negative error value
                  */
-               int GetObjectDetectionResults(ObjectDetectionResults *detectionResults);
+               int GetObjectDetectionResults(ObjectDetectionResults *results);
  
                 /**
                  * @brief       Gets the FaceDetectioResults
@@ -309,7 +309,7 @@ namespace inference
                  * @since_tizen 5.5
                  * @return @c true on success, otherwise a negative error value
                  */
-               int GetFaceDetectionResults(FaceDetectionResults *detectionResults);
+               int GetFaceDetectionResults(FaceDetectionResults *results);
  
                 /**
                  * @brief       Gets the FacialLandmarkDetectionResults
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp

index 05c9c07..d79b3ff 100755 (executable)
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -1091,14 +1091,12 @@ namespace inference
                 return mSupportedInferenceBackend[backend];
         }
  
-       int Inference::GetClassficationResults(ImageClassificationResults &results)
+       int Inference::GetClassficationResults(ImageClassificationResults *results)
         {
                 // Will contain top N results in ascending order.
                 std::vector<std::pair<float, int>> topScore;
                 auto threadHold = mConfig.mConfidenceThresHold;
  
-               results.number_of_classes = 0;
-
                 if (mMetadata.GetOutputMeta().IsParsed()) {
                         OutputMetadata outputMetadata = mMetadata.GetOutputMeta();
                         std::vector<int> indexes = outputMetadata.GetScoreDimInfo().GetValidIndexAll();
@@ -1191,23 +1189,25 @@ namespace inference
                         std::reverse(topScore.begin(), topScore.end());
                 }
  
+               results->number_of_classes = 0;
+
                 for (auto& score : topScore) {
                         LOGI("score: %.3f, threshold: %.3f", score.first, threadHold);
                         LOGI("idx:%d", score.second);
                         LOGI("classProb: %.3f", score.first);
  
-                       results.indices.push_back(score.second);
-                       results.confidences.push_back(score.first);
-                       results.names.push_back(mUserListName[score.second]);
-                       results.number_of_classes++;
+                       results->indices.push_back(score.second);
+                       results->confidences.push_back(score.first);
+                       results->names.push_back(mUserListName[score.second]);
+                       results->number_of_classes++;
                 }
  
-               LOGE("Inference: GetClassificationResults: %d\n", results.number_of_classes);
+               LOGE("Inference: GetClassificationResults: %d\n", results->number_of_classes);
                 return MEDIA_VISION_ERROR_NONE;
         }
  
         int Inference::GetObjectDetectionResults(
-                       ObjectDetectionResults *detectionResults)
+                       ObjectDetectionResults *results)
         {
                 if (mMetadata.GetOutputMeta().IsParsed()) {
                         OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
@@ -1222,6 +1222,7 @@ namespace inference
  
                         int boxOffset = 0;
                         int numberOfObjects = 0;
+
                         if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                                 std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
                                 if (boxIndexes.size() != 1) {
@@ -1252,23 +1253,21 @@ namespace inference
  
                         objDecoder.init();
                         objDecoder.decode();
-                       ObjectDetectionResults results;
-                       results.number_of_objects = 0;
+                       results->number_of_objects = 0;
  
                         for (auto& box : objDecoder.getObjectAll()) {
-                               results.indices.push_back(box.index);
-                               results.names.push_back(mUserListName[box.index]);
-                               results.confidences.push_back(box.score);
-                               results.locations.push_back(cv::Rect(
+                               results->indices.push_back(box.index);
+                               results->names.push_back(mUserListName[box.index]);
+                               results->confidences.push_back(box.score);
+                               results->locations.push_back(cv::Rect(
                                                 static_cast<int>((box.location.x -  box.location.width * 0.5f) * static_cast<float>(mSourceSize.width)),
                                                 static_cast<int>((box.location.y -  box.location.height * 0.5f) * static_cast<float>(mSourceSize.height)),
                                                 static_cast<int>(box.location.width *  static_cast<float>(mSourceSize.width)),
                                                 static_cast<int>(box.location.height * static_cast<float>(mSourceSize.height))));
-                               results.number_of_objects++;
+                               results->number_of_objects++;
                         }
-                       *detectionResults = results;
-                       LOGI("Inference: GetObjectDetectionResults: %d\n",
-                               results.number_of_objects);
+
+                       LOGI("Inference: GetObjectDetectionResults: %d\n", results->number_of_objects);
                 } else {
                         tensor_t outputData;
  
@@ -1288,7 +1287,6 @@ namespace inference
                         float *scores = nullptr;
                         int number_of_detections = 0;
  
-                       cv::Mat cvScores, cvClasses, cvBoxes;
                         if (outputData.dimInfo.size() == 1) {
                                 // there is no way to know how many objects are detect unless the number of objects aren't
                                 // provided. In the case, each backend should provide the number of results manually.
@@ -1307,8 +1305,9 @@ namespace inference
                                 cv::Mat cvTop = cvOutputData.col(4).clone();
                                 cv::Mat cvRight = cvOutputData.col(5).clone();
                                 cv::Mat cvBottom = cvOutputData.col(6).clone();
-
+                               cv::Mat cvScores, cvClasses, cvBoxes;
                                 cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
+
                                 cv::hconcat(cvBoxElems, 4, cvBoxes);
  
                                 // classes
@@ -1320,42 +1319,38 @@ namespace inference
                                 boxes = cvBoxes.ptr<float>(0);
                                 classes = cvClasses.ptr<float>(0);
                                 scores = cvScores.ptr<float>(0);
-
                         } else {
                                 boxes = reinterpret_cast<float *>(outputData.data[0]);
                                 classes = reinterpret_cast<float *>(outputData.data[1]);
                                 scores = reinterpret_cast<float *>(outputData.data[2]);
-                               number_of_detections =
-                                               (int) (*reinterpret_cast<float *>(outputData.data[3]));
+                               number_of_detections = (int) (*reinterpret_cast<float *>(outputData.data[3]));
                         }
  
                         LOGI("number_of_detections = %d", number_of_detections);
  
-                       int left, top, right, bottom;
-                       cv::Rect loc;
+                       results->number_of_objects = 0;
  
-                       ObjectDetectionResults results;
-                       results.number_of_objects = 0;
                         for (int idx = 0; idx < number_of_detections; ++idx) {
                                 if (scores[idx] < mThreshold)
                                         continue;
  
-                               left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
-                               top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
-                               right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
-                               bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+                               int left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
+                               int top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
+                               int right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
+                               int bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+                               cv::Rect loc;
  
                                 loc.x = left;
                                 loc.y = top;
                                 loc.width = right - left + 1;
                                 loc.height = bottom - top + 1;
  
-                               results.indices.push_back(static_cast<int>(classes[idx]));
-                               results.confidences.push_back(scores[idx]);
-                               results.names.push_back(
+                               results->indices.push_back(static_cast<int>(classes[idx]));
+                               results->confidences.push_back(scores[idx]);
+                               results->names.push_back(
                                                 mUserListName[static_cast<int>(classes[idx])]);
-                               results.locations.push_back(loc);
-                               results.number_of_objects++;
+                               results->locations.push_back(loc);
+                               results->number_of_objects++;
  
                                 LOGI("objectClass: %d", static_cast<int>(classes[idx]));
                                 LOGI("confidence:%f", scores[idx]);
@@ -1363,16 +1358,13 @@ namespace inference
                                         bottom);
                         }
  
-                       *detectionResults = results;
-                       LOGI("Inference: GetObjectDetectionResults: %d\n",
-                               results.number_of_objects);
+                       LOGI("Inference: GetObjectDetectionResults: %d\n", results->number_of_objects);
                 }
  
                 return MEDIA_VISION_ERROR_NONE;
         }
  
-       int
-       Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults)
+       int Inference::GetFaceDetectionResults(FaceDetectionResults *results)
         {
                 if (mMetadata.GetOutputMeta().IsParsed()) {
                         OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
@@ -1387,6 +1379,7 @@ namespace inference
  
                         int boxOffset = 0;
                         int numberOfFaces = 0;
+
                         if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                                 std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
                                 if (boxIndexes.size() != 1) {
@@ -1417,21 +1410,19 @@ namespace inference
  
                         objDecoder.init();
                         objDecoder.decode();
-                       FaceDetectionResults results;
-                       results.number_of_faces = 0;
+                       results->number_of_faces = 0;
  
                         for (auto& face : objDecoder.getObjectAll()) {
-                               results.confidences.push_back(face.score);
-                               results.locations.push_back(cv::Rect(
+                               results->confidences.push_back(face.score);
+                               results->locations.push_back(cv::Rect(
                                                 static_cast<int>((face.location.x -  face.location.width * 0.5f) * static_cast<float>(mSourceSize.width)),
                                                 static_cast<int>((face.location.y -  face.location.height * 0.5f) * static_cast<float>(mSourceSize.height)),
                                                 static_cast<int>(face.location.width *  static_cast<float>(mSourceSize.width)),
                                                 static_cast<int>(face.location.height * static_cast<float>(mSourceSize.height))));
-                               results.number_of_faces++;
+                               results->number_of_faces++;
                         }
-                       *detectionResults = results;
-                       LOGE("Inference: GetFaceDetectionResults: %d\n",
-                               results.number_of_faces);
+
+                       LOGE("Inference: GetFaceDetectionResults: %d\n", results->number_of_faces);
                 } else {
                         tensor_t outputData;
  
@@ -1450,8 +1441,8 @@ namespace inference
                         float *classes = nullptr;
                         float *scores = nullptr;
                         int number_of_detections = 0;
-
                         cv::Mat cvScores, cvClasses, cvBoxes;
+
                         if (outputData.dimInfo.size() == 1) {
                                 // there is no way to know how many objects are detect unless the number of objects aren't
                                 // provided. In the case, each backend should provide the number of results manually.
@@ -1460,17 +1451,14 @@ namespace inference
                                 // indicates the image id. But it is useless if a batch mode isn't supported.
                                 // So, use the 1st of 7.
  
-                               number_of_detections = static_cast<int>(
-                                               *reinterpret_cast<float *>(outputData.data[0]));
-                               cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3],
-                                                                       CV_32F, outputData.data[0]);
+                               number_of_detections = static_cast<int>(*reinterpret_cast<float *>(outputData.data[0]));
+                               cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3], CV_32F, outputData.data[0]);
  
                                 // boxes
                                 cv::Mat cvLeft = cvOutputData.col(3).clone();
                                 cv::Mat cvTop = cvOutputData.col(4).clone();
                                 cv::Mat cvRight = cvOutputData.col(5).clone();
                                 cv::Mat cvBottom = cvOutputData.col(6).clone();
-
                                 cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
                                 cv::hconcat(cvBoxElems, 4, cvBoxes);
  
@@ -1483,49 +1471,41 @@ namespace inference
                                 boxes = cvBoxes.ptr<float>(0);
                                 classes = cvClasses.ptr<float>(0);
                                 scores = cvScores.ptr<float>(0);
-
                         } else {
                                 boxes = reinterpret_cast<float *>(outputData.data[0]);
                                 classes = reinterpret_cast<float *>(outputData.data[1]);
                                 scores = reinterpret_cast<float *>(outputData.data[2]);
-                               number_of_detections = static_cast<int>(
-                                               *reinterpret_cast<float *>(outputData.data[3]));
+                               number_of_detections = static_cast<int>(*reinterpret_cast<float *>(outputData.data[3]));
                         }
  
-                       int left, top, right, bottom;
-                       cv::Rect loc;
+                       results->number_of_faces = 0;
  
-                       FaceDetectionResults results;
-                       results.number_of_faces = 0;
                         for (int idx = 0; idx < number_of_detections; ++idx) {
                                 if (scores[idx] < mThreshold)
                                         continue;
  
-                               left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
-                               top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
-                               right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
-                               bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+                               int left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
+                               int top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
+                               int right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
+                               int bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+                               cv::Rect loc;
  
                                 loc.x = left;
                                 loc.y = top;
                                 loc.width = right - left + 1;
                                 loc.height = bottom - top + 1;
-
-                               results.confidences.push_back(scores[idx]);
-                               results.locations.push_back(loc);
-                               results.number_of_faces++;
+                               results->confidences.push_back(scores[idx]);
+                               results->locations.push_back(loc);
+                               results->number_of_faces++;
  
                                 LOGI("confidence:%f", scores[idx]);
                                 LOGI("class: %f", classes[idx]);
                                 LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx * 4 + 1],
                                         boxes[idx * 4 + 0], boxes[idx * 4 + 3], boxes[idx * 4 + 2]);
-                               LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right,
-                                       bottom);
+                               LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom);
                         }
  
-                       *detectionResults = results;
-                       LOGE("Inference: GetFaceDetectionResults: %d\n",
-                               results.number_of_faces);
+                       LOGE("Inference: GetFaceDetectionResults: %d\n", results->number_of_faces);
                 }
  
                 return MEDIA_VISION_ERROR_NONE;
@@ -1699,12 +1679,11 @@ namespace inference
                         }
  
                         poseDecoder.decode(inputW, inputH, thresRadius);
-
-                       int part = 0;
                         poseResult->number_of_poses = poseDecoder.getNumberOfPose();
+
                         for (int poseIndex = 0; poseIndex < poseResult->number_of_poses; ++poseIndex) {
                                 for (int landmarkIndex = 0; landmarkIndex < poseResult->number_of_landmarks_per_pose; ++ landmarkIndex) {
-                                       part = landmarkIndex;
+                                       int part = landmarkIndex;
                                         if (!mUserListName.empty()) {
                                                 part = std::stoi(mUserListName[landmarkIndex]) - 1;
                                                 if (part < 0) {
@@ -1773,6 +1752,7 @@ namespace inference
  
                                         loc2f.x = (static_cast<float>(loc.x) / ratioX);
                                         loc2f.y = (static_cast<float>(loc.y) / ratioY);
+
                                         LOGI("landmarkIndex[%2d] - mapping to [%2d]: x[%.3f], y[%.3f], score[%.3f]",
                                                         landmarkIndex, part, loc2f.x, loc2f.y, score);
  
diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp

index c945ac4..a9fd490 100644 (file)
--- a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
@@ -638,7 +638,7 @@ int mv_inference_image_classify_open(
  
         ImageClassificationResults classificationResults;
  
-       ret = pInfer->GetClassficationResults(classificationResults);
+       ret = pInfer->GetClassficationResults(&classificationResults);
         if (ret != MEDIA_VISION_ERROR_NONE) {
                 LOGE("Fail to get inference results");
                 return ret;
@@ -699,14 +699,14 @@ int mv_inference_object_detect_open(mv_source_h source, mv_inference_h infer,
                 locations.resize(numberOfOutputs);
         }
  
-       for (int n = 0; n < numberOfOutputs; ++n) {
-               LOGE("names: %s", objectDetectionResults.names[n].c_str());
-               names[n] = objectDetectionResults.names[n].c_str();
+       for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) {
+               LOGE("names: %s", objectDetectionResults.names[output_idx].c_str());
+               names[output_idx] = objectDetectionResults.names[output_idx].c_str();
  
-               locations[n].point.x = objectDetectionResults.locations[n].x;
-               locations[n].point.y = objectDetectionResults.locations[n].y;
-               locations[n].width = objectDetectionResults.locations[n].width;
-               locations[n].height = objectDetectionResults.locations[n].height;
+               locations[output_idx].point.x = objectDetectionResults.locations[output_idx].x;
+               locations[output_idx].point.y = objectDetectionResults.locations[output_idx].y;
+               locations[output_idx].width = objectDetectionResults.locations[output_idx].width;
+               locations[output_idx].height = objectDetectionResults.locations[output_idx].height;
         }
  
         int *indices = objectDetectionResults.indices.data();
@@ -745,11 +745,11 @@ int mv_inference_face_detect_open(mv_source_h source, mv_inference_h infer,
         int numberOfOutputs = faceDetectionResults.number_of_faces;
         std::vector<mv_rectangle_s> locations(numberOfOutputs);
  
-       for (int n = 0; n < numberOfOutputs; ++n) {
-               locations[n].point.x = faceDetectionResults.locations[n].x;
-               locations[n].point.y = faceDetectionResults.locations[n].y;
-               locations[n].width = faceDetectionResults.locations[n].width;
-               locations[n].height = faceDetectionResults.locations[n].height;
+       for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) {
+               locations[output_idx].point.x = faceDetectionResults.locations[output_idx].x;
+               locations[output_idx].point.y = faceDetectionResults.locations[output_idx].y;
+               locations[output_idx].width = faceDetectionResults.locations[output_idx].width;
+               locations[output_idx].height = faceDetectionResults.locations[output_idx].height;
         }
  
         float *confidences = faceDetectionResults.confidences.data();
author	Inki Dae <inki.dae@samsung.com>
	Wed, 13 Oct 2021 09:55:41 +0000 (18:55 +0900)
committer	Inki Dae <inki.dae@samsung.com>
	Thu, 14 Oct 2021 05:44:36 +0000 (14:44 +0900)
mv_machine_learning/mv_inference/inference/include/Inference.h		patch \| blob \| history
mv_machine_learning/mv_inference/inference/src/Inference.cpp		patch \| blob \| history
mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp		patch \| blob \| history