mv_machine_learning: code refactoring to input and output metadata classes

author Inki Dae <inki.dae@samsung.com>

Thu, 7 Oct 2021 10:03:23 +0000 (19:03 +0900)

committer Inki Dae <inki.dae@samsung.com>

Wed, 13 Oct 2021 05:35:35 +0000 (14:35 +0900)
author Inki Dae <inki.dae@samsung.com>
Thu, 7 Oct 2021 10:03:23 +0000 (19:03 +0900)
committer Inki Dae <inki.dae@samsung.com>
Wed, 13 Oct 2021 05:35:35 +0000 (14:35 +0900)
diff --git a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h

index 8b5e2bd..bdd12c2 100644 (file)
--- a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h
@@ -88,10 +88,6 @@ namespace inference
         class InputMetadata
         {
         public:
-               bool parsed;
-               std::map<std::string, LayerInfo> layer;
-               std::map<std::string, Options> option;
-
                 /**
                  * @brief   Creates an InputMetadata class instance.
                  *
@@ -113,11 +109,17 @@ namespace inference
                  * @since_tizen 6.5
                  */
                 int Parse(JsonObject *root);
+               bool IsParsed(void) { return parsed; }
+               std::map<std::string, LayerInfo>& GetLayer() { return layer; }
+               std::map<std::string, Options>& GetOption() { return option; }
  
         private:
+               bool parsed;
                 std::map<std::string, inference_tensor_shape_type_e> mSupportedShapeType;
                 std::map<std::string, mv_inference_data_type_e> mSupportedDataType;
                 std::map<std::string, mv_colorspace_e> mSupportedColorSpace;
+               std::map<std::string, LayerInfo> layer;
+               std::map<std::string, Options> option;
  
                 template <typename T>
                 static T GetSupportedType(JsonObject* root, std::string typeName,
diff --git a/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h b/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h

index f5324f2..11c5cc2 100755 (executable)
--- a/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h
+++ b/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h
@@ -44,13 +44,8 @@ namespace inference
                 OutputMetadata mMeta;
                 int mBoxOffset;
                 int mNumberOfOjects;
-
-               ScoreInfo& mScoreInfo;
-               BoxInfo& mBoxInfo;
-
                 float mScaleW;
                 float mScaleH;
-
                 Boxes mResultBoxes;
  
                 float decodeScore(int idx);
@@ -62,9 +57,7 @@ namespace inference
                                         int boxOffset, float scaleW, float scaleH, int numberOfObjects = 0) :
                                         mTensorBuffer(buffer), mMeta(metaData),
                                         mBoxOffset(boxOffset), mNumberOfOjects(numberOfObjects),
-                                       mScoreInfo(mMeta.GetScore()), mBoxInfo(mMeta.GetBox()),
-                                       mScaleW(scaleW), mScaleH(scaleH),
-                                       mResultBoxes() {
+                                       mScaleW(scaleW), mScaleH(scaleH), mResultBoxes() {
                                         };
  
                 ~ObjectDecoder() = default;
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h

index ca4df80..d223726 100644 (file)
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
@@ -42,6 +42,7 @@ namespace inference
         {
         private:
                 std::vector<int> dims;
+
         public:
                 std::vector<int> GetValidIndexAll() const;
                 void SetValidIndex(int index);
@@ -52,6 +53,7 @@ namespace inference
         private:
                 double scale;
                 double zeropoint;
+
         public:
                 DeQuantization(double s, double z) : scale(s), zeropoint(z) {};
                 ~DeQuantization() = default;
@@ -66,10 +68,9 @@ namespace inference
                 std::string name;
                 DimInfo dimInfo;
                 double threshold;
-               inference_score_type_e type;
                 int topNumber;
+               inference_score_type_e type;
                 std::shared_ptr<DeQuantization> deQuantization;
-
                 std::map<std::string, inference_score_type_e> supportedScoreTypes;
  
         public:
@@ -82,141 +83,130 @@ namespace inference
                 inference_score_type_e GetType() { return type; }
                 int GetTopNumber() { return topNumber; }
                 std::shared_ptr<DeQuantization> GetDeQuant() { return deQuantization; }
-
                 int ParseScore(JsonObject *root);
         };
  
-       class BoxInfo
-       {
+       struct AnchorParam {
+               int mode; /**< 0: generate anchor, 1:load pre-anchor*/
+               int numLayers;
+               float minScale;
+               float maxScale;
+               int inputSizeHeight;
+               int inputSizeWidth;
+               float anchorOffsetX;
+               float anchorOffsetY;
+               std::vector<int> strides;
+               std::vector<float> aspectRatios;
+               bool isReduceBoxedInLowestLayer;
+               float interpolatedScaleAspectRatio;
+               bool isFixedAnchorSize;
+               bool isExponentialBoxScale;
+               float xScale;
+               float yScale;
+               float wScale;
+               float hScale;
+       };
+
+       struct NMSParam {
+               inference_box_nms_type_e mode; /**< 0: standard */
+               float iouThreshold;
+               std::map<std::string, inference_box_nms_type_e> supportedBoxNmsTypes;
+       };
+
+       struct RotateParam {
+               int startPointIndex;
+               int endPointIndex;
+               cv::Point2f startPoint;
+               cv::Point2f endPoint;
+               float baseAngle;
+       };
+
+       struct RoiOptionParam {
+               int startPointIndex;
+               int endPointIndex;
+               int centerPointIndex;
+               cv::Point2f centerPoint;
+               float shiftX;
+               float shiftY;
+               float scaleX;
+               float scaleY;
+               int mode;
+       };
+
+       class DecodeInfo {
+       private:
+               AnchorParam anchorParam;
+               std::vector<cv::Rect2f> anchorBoxes;
+               NMSParam nmsParam;
+               RotateParam rotParam;
+               RoiOptionParam roiOptParam;
+
         public:
-               class DecodeInfo {
-               public:
-                       class AnchorParam {
-                       public:
-                               int mode; /**< 0: generate anchor, 1:load pre-anchor*/
-                               int numLayers;
-                               float minScale;
-                               float maxScale;
-                               int inputSizeHeight;
-                               int inputSizeWidth;
-                               float anchorOffsetX;
-                               float anchorOffsetY;
-                               std::vector<int> strides;
-                               std::vector<float> aspectRatios;
-                               bool isReduceBoxedInLowestLayer;
-                               float interpolatedScaleAspectRatio;
-                               bool isFixedAnchorSize;
-                               bool isExponentialBoxScale;
-                               float xScale;
-                               float yScale;
-                               float wScale;
-                               float hScale;
-
-                               AnchorParam() = default;
-                               ~AnchorParam() =  default;
-                       };
-
-                       class NMSParam {
-                       public:
-                               inference_box_nms_type_e mode; /**< 0: standard */
-                               float iouThreshold;
-
-                               std::map<std::string, inference_box_nms_type_e> supportedBoxNmsTypes;
-
-                               NMSParam() : mode(INFERENCE_BOX_NMS_TYPE_NONE), iouThreshold(0.2f) {
-                                       supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
-                               };
-
-                               ~NMSParam() = default;
-                       };
-
-                       class RotateParam {
-                       public:
-                               int startPointIndex;
-                               int endPointIndex;
-                               cv::Point2f startPoint;
-                               cv::Point2f endPoint;
-                               float baseAngle;
-
-                               RotateParam() : startPointIndex(-1),
-                                                               endPointIndex(-1),
-                                                               startPoint(cv::Point2f(0.f,0.f)),
-                                                               endPoint(cv::Point2f(0.f,0.f)),
-                                                               baseAngle(0.f){};
-                               ~RotateParam() = default;
-                       };
-
-                       class RoiOptionParam {
-                       public:
-                               int startPointIndex;
-                               int endPointIndex;
-                               int centerPointIndex;
-                               cv::Point2f centerPoint;
-                               float shiftX;
-                               float shiftY;
-                               float scaleX;
-                               float scaleY;
-                               int mode;
-
-                               RoiOptionParam() : startPointIndex(-1),
-                                                                  endPointIndex(-1),
-                                                                  centerPointIndex(-1),
-                                                                  centerPoint(cv::Point2f(0.f, 0.f)),
-                                                                  shiftX(0.f), shiftY(0.f),
-                                                                  scaleX(1.f), scaleY(1.f),
-                                                                  mode(-1) {};
-                               ~RoiOptionParam() = default;
-                       };
-
-               private:
-                       AnchorParam anchorParam;
-                       std::vector<cv::Rect2f> anchorBoxes;
-                       NMSParam nmsParam;
-                       RotateParam rotParam;
-                       RoiOptionParam roiOptParam;
-
-               public:
-                       DecodeInfo() = default;
-                       ~DecodeInfo() = default;
-                       std::vector<cv::Rect2f>& GetAnchorBoxAll();
-                       bool IsAnchorBoxEmpty();
-                       void AddAnchorBox(cv::Rect2f& ahcnor);
-                       void ClearAnchorBox();
-
-                       // Anchor param
-                       int ParseAnchorParam(JsonObject *root);
-                       int GenerateAnchor();
-                       bool IsFixedAnchorSize();
-                       bool IsExponentialBoxScale();
-                       float GetAnchorXscale();
-                       float GetAnchorYscale();
-                       float GetAnchorWscale();
-                       float GetAnchorHscale();
-                       float CalculateScale(float min, float max, int index, int maxStride);
-
-                       // Nms param
-                       int ParseNms(JsonObject *root);
-                       int GetNmsMode();
-                       float GetNmsIouThreshold();
-
-                       // Rotate param
-                       int ParseRotate(JsonObject *root);
-                       int GetRotStartPointIndex();
-                       int GetRotEndPointIndex();
-                       float GetBaseAngle();
-
-                       // Roi option param
-                       int ParseRoiOption(JsonObject *root);
-                       int GetRoiMode();
-                       int GetRoiCenterPointIndex();
-                       int GetRoiStartPointIndex();
-                       int GetRoiEndPointIndex();
-                       float GetShiftX();
-                       float GetShiftY();
-                       float GetScaleX();
-                       float GetScaleY();
-               };
+               DecodeInfo() {
+                       nmsParam.mode = INFERENCE_BOX_NMS_TYPE_NONE;
+                       nmsParam.iouThreshold = 0.2f;
+                       nmsParam.supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
+
+                       rotParam.startPointIndex = -1;
+                       rotParam.endPointIndex = -1;
+                       rotParam.startPoint = cv::Point2f(0.f,0.f);
+                       rotParam.endPoint = cv::Point2f(0.f,0.f);
+                       rotParam.baseAngle = 0.f;
+
+                       roiOptParam.startPointIndex = -1;
+                       roiOptParam.endPointIndex = -1;
+                       roiOptParam.centerPointIndex = -1;
+                       roiOptParam.centerPoint = cv::Point2f(0.f, 0.f);
+                       roiOptParam.shiftX = 0.f;
+                       roiOptParam.shiftY = 0.f;
+                       roiOptParam.scaleX = 1.f;
+                       roiOptParam.scaleY = 1.f;
+                       roiOptParam.mode = -1;
+               }
+
+               ~DecodeInfo() = default;
+
+               std::vector<cv::Rect2f>& GetAnchorBoxAll();
+               bool IsAnchorBoxEmpty();
+               void AddAnchorBox(cv::Rect2f& ahcnor);
+               void ClearAnchorBox();
+
+               // Anchor param
+               int ParseAnchorParam(JsonObject *root);
+               int GenerateAnchor();
+               bool IsFixedAnchorSize();
+               bool IsExponentialBoxScale();
+               float GetAnchorXscale();
+               float GetAnchorYscale();
+               float GetAnchorWscale();
+               float GetAnchorHscale();
+               float CalculateScale(float min, float max, int index, int maxStride);
+
+               // Nms param
+               int ParseNms(JsonObject *root);
+               int GetNmsMode();
+               float GetNmsIouThreshold();
+
+               // Rotate param
+               int ParseRotate(JsonObject *root);
+               int GetRotStartPointIndex();
+               int GetRotEndPointIndex();
+               float GetBaseAngle();
+
+               // Roi option param
+               int ParseRoiOption(JsonObject *root);
+               int GetRoiMode();
+               int GetRoiCenterPointIndex();
+               int GetRoiStartPointIndex();
+               int GetRoiEndPointIndex();
+               float GetShiftX();
+               float GetShiftY();
+               float GetScaleX();
+               float GetScaleY();
+       };
  
+       class BoxInfo
+       {
         private:
                 std::string name;
                 DimInfo dimInfo;
@@ -275,25 +265,16 @@ namespace inference
                 int ParseNumber(JsonObject *root);
         };
  
+       struct HeatMapInfo {
+               int wIdx;
+               int hIdx;
+               int cIdx;
+               float nmsRadius;
+               inference_tensor_shape_type_e shapeType;
+       };
+
         class Landmark
         {
-       public:
-               class DecodeInfo {
-               public:
-                       class HeatMapInfo {
-                       public:
-                               int wIdx;
-                               int hIdx;
-                               int cIdx;
-                               inference_tensor_shape_type_e shapeType;
-                               float nmsRadius;
-                               HeatMapInfo() = default;
-                               ~HeatMapInfo() = default;
-                       };
-                       HeatMapInfo heatMap;
-                       DecodeInfo() = default;
-                       ~DecodeInfo() = default;
-               };
         private:
                 std::string name;
                 DimInfo dimInfo;
@@ -303,7 +284,7 @@ namespace inference
                 inference_landmark_decoding_type_e decodingType; /**< 0: decoding  unnecessary,
                                                                                                                         1: decoding heatmap,
                                                                                                                         2: decoding heatmap with refinement */
-               DecodeInfo decodingInfo;
+               HeatMapInfo heatMapInfo;
  
                 std::map<std::string, inference_landmark_type_e> supportedLandmarkTypes;
                 std::map<std::string, inference_landmark_coorindate_type_e> supportedLandmarkCoordinateTypes;
@@ -318,7 +299,7 @@ namespace inference
                 int GetOffset();
                 inference_landmark_coorindate_type_e GetCoordinate();
                 inference_landmark_decoding_type_e GetDecodingType();
-               DecodeInfo& GetDecodingInfo();
+               HeatMapInfo& GetHeatMapInfo();
  
                 int ParseLandmark(JsonObject *root);
         };
@@ -366,7 +347,7 @@ namespace inference
                 Edge() = default;
                 ~Edge() = default;
                 int ParseEdge(JsonObject *root);
-               std::vector<std::pair<int, int>>& GetEdgesAll();
+               std::vector<std::pair<int, int>>& GetEdgesAll() { return edges; }
         };
  
         class OutputMetadata
@@ -416,20 +397,38 @@ namespace inference
                  */
                 int Parse(JsonObject *root);
  
-               bool IsParsed();
-               ScoreInfo& GetScore();
-               BoxInfo& GetBox();
-               Label& GetLabel();
-               Number& GetNumber();
-               Landmark& GetLandmark();
-               OffsetVec& GetOffset();
-               std::vector<DispVec>& GetDispVecAll();
-               Edge& GetEdge();
+               bool IsParsed() { return parsed; }
+
+               std::string GetScoreName() { return score.GetName(); }
+               DimInfo GetScoreDimInfo() { return score.GetDimInfo(); }
+               inference_score_type_e GetScoreType() { return score.GetType(); }
+               double GetScoreThreshold() { return score.GetThresHold(); }
+               int GetScoreTopNumber() { return score.GetTopNumber(); }
+               std::shared_ptr<DeQuantization> GetScoreDeQuant() { return score.GetDeQuant(); }
+               std::string GetBoxName() { return box.GetName(); }
+               DimInfo GetBoxDimInfo() { return box.GetDimInfo(); }
+               std::vector<int> GetBoxOrder() { return box.GetOrder(); }
+               DecodeInfo& GetBoxDecodeInfo() { return box.GetDecodeInfo(); }
+               inference_box_type_e GetBoxType() { return box.GetType(); }
+               int GetScoreCoordinate() { return box.GetCoordinate(); }
+               std::string GetLabelName() { return label.GetName(); }
+               std::string GetNumberName() { return number.GetName(); }
+               DimInfo GetNumberDimInfo() { return number.GetDimInfo(); }
+               std::string GetLandmarkName() { return landmark.GetName(); }
+               int GetLandmarkOffset() { return landmark.GetOffset(); }
+               inference_landmark_type_e GetLandmarkType() { return landmark.GetType(); }
+               DimInfo GetLandmarkDimInfo() { return landmark.GetDimInfo(); }
+               HeatMapInfo& GetLandmarkHeatMapInfo() { return landmark.GetHeatMapInfo(); }
+               inference_landmark_coorindate_type_e GetLandmarkCoordinate() { return landmark.GetCoordinate(); }
+               inference_landmark_decoding_type_e GetLandmarkDecodingType() { return landmark.GetDecodingType(); }
+               std::string GetOffsetVecName() { return offsetVec.GetName(); }
+               inference_box_decoding_type_e GetBoxDecodingType() { return box.GetDecodingType(); }
+               std::vector<DispVec>& GetDispVecAll() { return dispVecs; }
+               std::vector<std::pair<int, int>>& GetEdges() { return edgeMap.GetEdgesAll(); }
                 template <typename T>
                 static T GetSupportedType(JsonObject* root, std::string typeName,
                                                                 std::map<std::string, T>& supportedTypes);
         };
-
  } /* Inference */
  } /* MediaVision */
  
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp

index c25a4a4..05c9c07 100755 (executable)
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -371,10 +371,9 @@ namespace inference
                 mConfig.mDataType = static_cast<mv_inference_data_type_e>(dataType);
                 mConfig.mInputLayerNames = names;
  
-               const InputMetadata& inputMeta = mMetadata.GetInputMeta();
-               if (inputMeta.parsed) {
+               if (mMetadata.GetInputMeta().IsParsed()) {
                         LOGI("use input meta");
-                       auto& layerInfo = inputMeta.layer.begin()->second;
+                       auto& layerInfo = mMetadata.GetInputMeta().GetLayer().begin()->second;
                         if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { // NCHW
                                 mConfig.mTensorInfo.ch = layerInfo.dims[1];
                                 mConfig.mTensorInfo.dim = layerInfo.dims[0];
@@ -389,8 +388,8 @@ namespace inference
                                 LOGE("Invalid shape type[%d]", layerInfo.shapeType);
                         }
  
-                       if (!inputMeta.option.empty()) {
-                               auto& option = inputMeta.option.begin()->second;
+                       if (!mMetadata.GetInputMeta().GetOption().empty()) {
+                               auto& option = mMetadata.GetInputMeta().GetOption().begin()->second;
                                 if (option.normalization.use) {
                                         mConfig.mMeanValue = option.normalization.mean[0];
                                         mConfig.mStdValue = option.normalization.std[0];
@@ -399,7 +398,7 @@ namespace inference
  
                         mConfig.mDataType = layerInfo.dataType;
                         mConfig.mInputLayerNames.clear();
-                       for (auto& layer : inputMeta.layer) {
+                       for (auto& layer : mMetadata.GetInputMeta().GetLayer()) {
                                 mConfig.mInputLayerNames.push_back(layer.first);
                         }
                 }
@@ -444,25 +443,25 @@ namespace inference
                 mConfig.mOutputLayerNames = names;
  
                 OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
-               if (outputMeta.IsParsed()) {
+               if (mMetadata.GetOutputMeta().IsParsed()) {
                         mConfig.mOutputLayerNames.clear();
-                       if (!outputMeta.GetScore().GetName().empty())
-                               mConfig.mOutputLayerNames.push_back(outputMeta.GetScore().GetName());
+                       if (!outputMeta.GetScoreName().empty())
+                               mConfig.mOutputLayerNames.push_back(outputMeta.GetScoreName());
  
-                       if (!outputMeta.GetBox().GetName().empty())
-                               mConfig.mOutputLayerNames.push_back(outputMeta.GetBox().GetName());
+                       if (!outputMeta.GetBoxName().empty())
+                               mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxName());
  
-                       if (!outputMeta.GetLabel().GetName().empty())
-                               mConfig.mOutputLayerNames.push_back(outputMeta.GetLabel().GetName());
+                       if (!outputMeta.GetLabelName().empty())
+                               mConfig.mOutputLayerNames.push_back(outputMeta.GetLabelName());
  
-                       if (!outputMeta.GetNumber().GetName().empty())
-                               mConfig.mOutputLayerNames.push_back(outputMeta.GetNumber().GetName());
+                       if (!outputMeta.GetNumberName().empty())
+                               mConfig.mOutputLayerNames.push_back(outputMeta.GetNumberName());
  
-                       if (!outputMeta.GetLandmark().GetName().empty())
-                               mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmark().GetName());
+                       if (!outputMeta.GetLandmarkName().empty())
+                               mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName());
  
-                       if (!outputMeta.GetOffset().GetName().empty())
-                               mConfig.mOutputLayerNames.push_back(outputMeta.GetOffset().GetName());
+                       if (!outputMeta.GetOffsetVecName().empty())
+                               mConfig.mOutputLayerNames.push_back(outputMeta.GetOffsetVecName());
  
                         for (auto& dispVec : outputMeta.GetDispVecAll()) {
                                 mConfig.mOutputLayerNames.push_back(dispVec.GetName());
@@ -1044,12 +1043,11 @@ namespace inference
                         return MEDIA_VISION_ERROR_INVALID_PARAMETER;
                 }
  
-               const InputMetadata& inputMeta = mMetadata.GetInputMeta();
-               if (inputMeta.parsed) {
+               if (mMetadata.GetInputMeta().IsParsed()) {
                         for (auto& buffer : mInputTensorBuffers.getAllTensorBuffer()) {
                                 inference_engine_tensor_buffer& tensor_buffer = buffer.second;
-                               const LayerInfo& layerInfo = inputMeta.layer.at(buffer.first);
-                               const Options& opt = inputMeta.option.empty() ? Options() : inputMeta.option.at(buffer.first);
+                               const LayerInfo& layerInfo = mMetadata.GetInputMeta().GetLayer().at(buffer.first);
+                               const Options& opt = mMetadata.GetInputMeta().GetOption().empty() ? Options() : mMetadata.GetInputMeta().GetOption().at(buffer.first);
  
                                 int data_type = ConvertToCv(tensor_buffer.data_type);
  
@@ -1095,51 +1093,50 @@ namespace inference
  
         int Inference::GetClassficationResults(ImageClassificationResults &results)
         {
-               OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
                 // Will contain top N results in ascending order.
                 std::vector<std::pair<float, int>> topScore;
                 auto threadHold = mConfig.mConfidenceThresHold;
  
                 results.number_of_classes = 0;
  
-               if (outputMeta.IsParsed()) {
-                       auto& info = outputMeta.GetScore();
-                       std::vector<int> indexes = info.GetDimInfo().GetValidIndexAll();
+               if (mMetadata.GetOutputMeta().IsParsed()) {
+                       OutputMetadata outputMetadata = mMetadata.GetOutputMeta();
+                       std::vector<int> indexes = outputMetadata.GetScoreDimInfo().GetValidIndexAll();
  
                         if (indexes.size() != 1) {
                                 LOGE("Invalid dim size. It should be 1");
                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
                         }
  
-                       int classes = mOutputLayerProperty.layers[info.GetName()].shape[indexes[0]];
+                       int classes = mOutputLayerProperty.layers[outputMetadata.GetScoreName()].shape[indexes[0]];
  
-                       if (!mOutputTensorBuffers.exist(info.GetName())) {
+                       if (!mOutputTensorBuffers.exist(outputMetadata.GetScoreName())) {
                                 LOGE("output buffe is NULL");
                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
                         }
  
                         PostProcess postProc;
  
-                       postProc.ScoreClear(info.GetTopNumber());
-                       threadHold = info.GetThresHold();
+                       postProc.ScoreClear(outputMetadata.GetScoreTopNumber());
+                       threadHold = outputMetadata.GetScoreThreshold();
  
                         for (int cId = 0; cId < classes; ++cId) {
                                 float value = 0.0f;
  
                                 try {
-                                       value = mOutputTensorBuffers.getValue<float>(info.GetName(), cId);
+                                       value = mOutputTensorBuffers.getValue<float>(outputMetadata.GetScoreName(), cId);
                                 } catch (const std::exception& e) {
                                         LOGE(" Fail to get getValue with %s", e.what());
                                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
                                 }
  
-                               if (info.GetDeQuant()) {
+                               if (outputMetadata.GetScoreDeQuant()) {
                                         value = PostProcess::dequant(value,
-                                                                                       info.GetDeQuant()->GetScale(),
-                                                                                       info.GetDeQuant()->GetZeroPoint());
+                                                                                       outputMetadata.GetScoreDeQuant()->GetScale(),
+                                                                                       outputMetadata.GetScoreDeQuant()->GetZeroPoint());
                                 }
  
-                               if (info.GetType() == INFERENCE_SCORE_TYPE_SIGMOID)
+                               if (outputMetadata.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID)
                                         value = PostProcess::sigmoid(value);
  
                                 if (value < threadHold)
@@ -1212,46 +1209,45 @@ namespace inference
         int Inference::GetObjectDetectionResults(
                         ObjectDetectionResults *detectionResults)
         {
-               OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
-               if (outputMeta.IsParsed()) {
+               if (mMetadata.GetOutputMeta().IsParsed()) {
+                       OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+
                         // decoding type
-                       auto& boxInfo = outputMeta.GetBox();
-                       auto& scoreInfo = outputMeta.GetScore();
-                       if (!mOutputTensorBuffers.exist(boxInfo.GetName()) ||
-                               !mOutputTensorBuffers.exist(scoreInfo.GetName()) ){
+                       if (!mOutputTensorBuffers.exist(outputMeta.GetBoxName()) ||
+                               !mOutputTensorBuffers.exist(outputMeta.GetScoreName()) ){
                                 LOGE("output buffers named of %s or %s are NULL",
-                                       boxInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+                                       outputMeta.GetBoxName().c_str(), outputMeta.GetScoreName().c_str());
                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
                         }
  
                         int boxOffset = 0;
                         int numberOfObjects = 0;
-                       if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-                               std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+                       if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+                               std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
                                 if (boxIndexes.size() != 1) {
                                         LOGE("Invalid dim size. It should be 1");
                                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
                                 }
-                               boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+                               boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
                         } else {
-                               std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+                               std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
                                 if (boxIndexes.size() != 1) {
                                         LOGE("Invalid dim size. It should be 1");
                                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
                                 }
-                               boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+                               boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
  
-                               std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+                               std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll();
                                 if (scoreIndexes.size() != 1) {
                                         LOGE("Invalid dim size. It should be 1");
                                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
                                 }
-                               numberOfObjects = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]];
+                               numberOfObjects = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]];
                         }
  
                         ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset,
-                                               static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()),
-                                               static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()),
+                                               static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()),
+                                               static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()),
                                                 numberOfObjects);
  
                         objDecoder.init();
@@ -1378,46 +1374,45 @@ namespace inference
         int
         Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults)
         {
-               OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
-               if (outputMeta.IsParsed()) {
+               if (mMetadata.GetOutputMeta().IsParsed()) {
+                       OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+
                         // decoding type
-                       auto& boxInfo = outputMeta.GetBox();
-                       auto& scoreInfo = outputMeta.GetScore();
-                       if (!mOutputTensorBuffers.exist(boxInfo.GetName()) ||
-                               !mOutputTensorBuffers.exist(scoreInfo.GetName())){
+                       if (!mOutputTensorBuffers.exist(outputMeta.GetBoxName()) ||
+                               !mOutputTensorBuffers.exist(outputMeta.GetScoreName())){
                                 LOGE("output buffers named of %s or %s are NULL",
-                                       boxInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+                                       outputMeta.GetBoxName().c_str(), outputMeta.GetScoreName().c_str());
                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
                         }
  
                         int boxOffset = 0;
                         int numberOfFaces = 0;
-                       if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-                               std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+                       if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+                               std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
                                 if (boxIndexes.size() != 1) {
                                         LOGE("Invalid dim size. It should be 1");
                                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
                                 }
-                               boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+                               boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
                         } else {
-                               std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+                               std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
                                 if (boxIndexes.size() != 1) {
                                         LOGE("Invalid dim size. It should be 1");
                                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
                                 }
-                               boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+                               boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
  
-                               std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+                               std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll();
                                 if (scoreIndexes.size() != 1) {
                                         LOGE("Invaid dim size. It should be 1");
                                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
                                 }
-                               numberOfFaces = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]];
+                               numberOfFaces = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]];
                         }
  
                         ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset,
-                                               static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()),
-                                               static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()),
+                                               static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()),
+                                               static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()),
                                                 numberOfFaces);
  
                         objDecoder.init();
@@ -1540,32 +1535,30 @@ namespace inference
         {
                 LOGI("ENTER");
  
-               OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
-               if (outputMeta.IsParsed()) {
-                       auto& landmarkInfo = outputMeta.GetLandmark();
-                       auto& scoreInfo = outputMeta.GetScore();
+               if (mMetadata.GetOutputMeta().IsParsed()) {
+                       OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
  
-                       if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
-                               !mOutputTensorBuffers.exist(scoreInfo.GetName())) {
+                       if (!mOutputTensorBuffers.exist(outputMeta.GetLandmarkName()) ||
+                               !mOutputTensorBuffers.exist(outputMeta.GetScoreName())) {
                                 LOGE("output buffers named of %s or %s are NULL",
-                                       landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+                                       outputMeta.GetLandmarkName().c_str(), outputMeta.GetScoreName().c_str());
                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
                         }
  
                         int heatMapWidth = 0;
                         int heatMapHeight = 0;
                         int heatMapChannel = 0;
-                       std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+                       std::vector<int> channelIndexes = outputMeta.GetLandmarkDimInfo().GetValidIndexAll();
                         int number_of_landmarks = heatMapChannel;
  
-                       if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+                       if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
                                 LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
-                               number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
-                                                                       / landmarkInfo.GetOffset();
+                               number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]]
+                                                                       / outputMeta.GetLandmarkOffset();
                         } else {
-                               heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
-                               heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
-                               heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
+                               heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx];
+                               heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx];
+                               heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx];
                         }
  
                         LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
@@ -1585,13 +1578,13 @@ namespace inference
                         float inputW = 1.f;
                         float inputH = 1.f;
  
-                       if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
-                               inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
-                               inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+                       if (outputMeta.GetLandmarkCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
+                               inputW = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth());
+                               inputH = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight());
                         }
  
-                       float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
-                                                                                                               outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+                       float thresRadius = outputMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
+                                                                                                               outputMeta.GetLandmarkHeatMapInfo().nmsRadius;
  
                         poseDecoder.decode(inputW, inputH, thresRadius);
  
@@ -1639,17 +1632,15 @@ namespace inference
         {
                 LOGI("ENTER");
  
-               OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
                 auto poseResult = std::make_unique<mv_inference_pose_s>();
  
-               if (outputMeta.IsParsed()) {
-                       auto& landmarkInfo = outputMeta.GetLandmark();
-                       auto& scoreInfo = outputMeta.GetScore();
+               if (mMetadata.GetOutputMeta().IsParsed()) {
+                       OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
  
-                       if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
-                               !mOutputTensorBuffers.exist(scoreInfo.GetName())) {
+                       if (!mOutputTensorBuffers.exist(outputMeta.GetLandmarkName()) ||
+                               !mOutputTensorBuffers.exist(outputMeta.GetScoreName())) {
                                 LOGE("output buffers named of %s or %s are NULL",
-                                       landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+                                       outputMeta.GetLandmarkName().c_str(), outputMeta.GetScoreName().c_str());
                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
                         }
  
@@ -1657,23 +1648,23 @@ namespace inference
                         int heatMapHeight = 0;
                         int heatMapChannel = 0;
  
-                       if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
-                               heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
-                               heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
-                               heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
+                       if (outputMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+                               heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx];
+                               heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx];
+                               heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx];
                         }
  
                         LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
  
-                       std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+                       std::vector<int> channelIndexes = outputMeta.GetLandmarkDimInfo().GetValidIndexAll();
  
                         // If INFERENCE_LANDMARK_DECODING_TYPE_BYPASS,
                         // the landmarkChannel is guessed from the shape of the landmark output tensor.
                         // Otherwise, it is guessed from the heatMapChannel. (heatMapChannel is used in default).
                         int landmarkChannel = heatMapChannel;
  
-                       if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS)
-                               landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] / landmarkInfo.GetOffset();
+                       if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS)
+                               landmarkChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]] / outputMeta.GetLandmarkOffset();
  
                         poseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel :
                                                                                                                 static_cast<int>(mUserListName.size());
@@ -1700,11 +1691,11 @@ namespace inference
  
                         float inputW = 1.f;
                         float inputH = 1.f;
-                       float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
-                                                                                                               outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
-                       if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
-                               inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
-                               inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+                       float thresRadius = outputMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
+                                                                                                               outputMeta.GetLandmarkHeatMapInfo().nmsRadius;
+                       if (outputMeta.GetLandmarkCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
+                               inputW = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth());
+                               inputH = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight());
                         }
  
                         poseDecoder.decode(inputW, inputH, thresRadius);
diff --git a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp

index 2831edc..4d5e36e 100755 (executable)
--- a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp
@@ -27,16 +27,16 @@ namespace inference
  {
         int ObjectDecoder::init()
         {
-               if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-                       if (!mTensorBuffer.exist(mMeta.GetLabel().GetName()) ||
-                               !mTensorBuffer.exist(mMeta.GetNumber().GetName()) ) {
+               if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+                       if (!mTensorBuffer.exist(mMeta.GetLabelName()) ||
+                               !mTensorBuffer.exist(mMeta.GetNumberName()) ) {
                                 LOGE("buffer buffers named of %s or %s are NULL",
-                                       mMeta.GetLabel().GetName().c_str(),
-                                       mMeta.GetNumber().GetName().c_str());
+                                       mMeta.GetLabelName().c_str(), mMeta.GetNumberName().c_str());
+
                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
                         }
  
-                       std::vector<int> indexes = mMeta.GetNumber().GetDimInfo().GetValidIndexAll();
+                       std::vector<int> indexes = mMeta.GetNumberDimInfo().GetValidIndexAll();
                         if (indexes.size() != 1) {
                                 LOGE("Invalid dim size. It should be 1");
                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
@@ -45,9 +45,9 @@ namespace inference
                         // mNumberOfObjects is set again if INFERENCE_BOX_DECODING_TYPE_BYPASS.
                         // Otherwise it is set already within ctor.
                         mNumberOfOjects = mTensorBuffer.getValue<int>(
-                                                               mMeta.GetNumber().GetName(), indexes[0]);
+                                                               mMeta.GetNumberName(), indexes[0]);
                 } else {
-                       if (mBoxInfo.GetDecodeInfo().IsAnchorBoxEmpty()) {
+                       if (mMeta.GetBoxDecodeInfo().IsAnchorBoxEmpty()) {
                                 LOGE("Anchor boxes are required but empty.");
                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
                         }
@@ -58,32 +58,32 @@ namespace inference
  
         float ObjectDecoder::decodeScore(int idx)
         {
-               float score = mTensorBuffer.getValue<float>(mScoreInfo.GetName(), idx);
-               if (mScoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+               float score = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+               if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
                         score = PostProcess::sigmoid(score);
                 }
  
-               return score < mScoreInfo.GetThresHold() ? 0.0f : score;
+               return score < mMeta.GetScoreThreshold() ? 0.0f : score;
         }
  
         Box ObjectDecoder::decodeBox(int idx, float score, int label)
         {
                 // assume type is (cx,cy,w,h)
                 // left or cx
-               float cx = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
-                                                                       idx * mBoxOffset + mBoxInfo.GetOrder()[0]);
+               float cx = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+                                                                       idx * mBoxOffset + mMeta.GetBoxOrder()[0]);
                 // top or cy
-               float cy = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
-                                                                       idx * mBoxOffset + mBoxInfo.GetOrder()[1]);
+               float cy = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+                                                                       idx * mBoxOffset + mMeta.GetBoxOrder()[1]);
                 // right or width
-               float cWidth = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
-                                                                       idx * mBoxOffset + mBoxInfo.GetOrder()[2]);
+               float cWidth = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+                                                                       idx * mBoxOffset + mMeta.GetBoxOrder()[2]);
                 // bottom or height
-               float cHeight = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
-                                                                       idx * mBoxOffset + mBoxInfo.GetOrder()[3]);
+               float cHeight = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+                                                                       idx * mBoxOffset + mMeta.GetBoxOrder()[3]);
  
                 // convert type to ORIGIN_CENTER if ORIGIN_LEFTTOP
-               if (mBoxInfo.GetType() == INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP) {
+               if (mMeta.GetBoxType() == INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP) {
                         float tmpCx = cx;
                         float tmpCy = cy;
                         cx = (cx + cWidth) * 0.5f; // (left + right)/2
@@ -93,7 +93,7 @@ namespace inference
                 }
  
                 // convert coordinate to RATIO if PIXEL
-               if (mBoxInfo.GetCoordinate() == INFERENCE_BOX_COORDINATE_TYPE_PIXEL) {
+               if (mMeta.GetScoreCoordinate() == INFERENCE_BOX_COORDINATE_TYPE_PIXEL) {
                         cx /= mScaleW;
                         cy /= mScaleH;
                         cWidth /= mScaleW;
@@ -101,9 +101,9 @@ namespace inference
                 }
  
                 Box box = {
-                       .index = mMeta.GetLabel().GetName().empty() ?
+                       .index = mMeta.GetLabelName().empty() ?
                                                 label :
-                                               mTensorBuffer.getValue<int>(mMeta.GetLabel().GetName(), idx),
+                                               mTensorBuffer.getValue<int>(mMeta.GetLabelName(), idx),
                         .score = score,
                         .location = cv::Rect2f(cx, cy, cWidth, cHeight)
                 };
@@ -116,26 +116,26 @@ namespace inference
                 // location coordinate of box, the output of decodeBox(), is relative between 0 ~ 1
                 Box box = decodeBox(anchorIdx, score, idx);
  
-               if (mBoxInfo.GetDecodeInfo().IsFixedAnchorSize()) {
+               if (mMeta.GetBoxDecodeInfo().IsFixedAnchorSize()) {
                         box.location.x += anchor.x;
                         box.location.y += anchor.y;
                 } else {
-                       box.location.x = box.location.x / mBoxInfo.GetDecodeInfo().GetAnchorXscale() *
+                       box.location.x = box.location.x / mMeta.GetBoxDecodeInfo().GetAnchorXscale() *
                                                          anchor.width + anchor.x;
-                       box.location.y = box.location.y / mBoxInfo.GetDecodeInfo().GetAnchorYscale() *
+                       box.location.y = box.location.y / mMeta.GetBoxDecodeInfo().GetAnchorYscale() *
                                                          anchor.height + anchor.y;
                 }
  
-               if (mBoxInfo.GetDecodeInfo().IsExponentialBoxScale()) {
+               if (mMeta.GetBoxDecodeInfo().IsExponentialBoxScale()) {
                         box.location.width = anchor.width *
-                                               std::exp(box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale());
+                                               std::exp(box.location.width / mMeta.GetBoxDecodeInfo().GetAnchorWscale());
                         box.location.height = anchor.height *
-                                               std::exp(box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale());
+                                               std::exp(box.location.height / mMeta.GetBoxDecodeInfo().GetAnchorHscale());
                 } else {
                         box.location.width = anchor.width *
-                                               box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale();
+                                               box.location.width / mMeta.GetBoxDecodeInfo().GetAnchorWscale();
                         box.location.height = anchor.height *
-                                               box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale();
+                                               box.location.height / mMeta.GetBoxDecodeInfo().GetAnchorHscale();
                 }
  
                 return box;
@@ -147,7 +147,7 @@ namespace inference
                 int ret = MEDIA_VISION_ERROR_NONE;
  
                 for (int idx = 0; idx < mNumberOfOjects; ++idx) {
-                       if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+                       if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                                 float score = decodeScore(idx);
                                 if (score <= 0.0f)
                                         continue;
@@ -158,7 +158,7 @@ namespace inference
                                 int anchorIdx = -1;
  
                                 Boxes boxes;
-                               for (auto& anchorBox : mBoxInfo.GetDecodeInfo().GetAnchorBoxAll()) {
+                               for (auto& anchorBox : mMeta.GetBoxDecodeInfo().GetAnchorBoxAll()) {
                                         anchorIdx++;
  
                                         float score = decodeScore(anchorIdx * mNumberOfOjects + idx);
@@ -176,8 +176,8 @@ namespace inference
                 if (!boxList.empty()) {
                         PostProcess postProc;
                         ret = postProc.Nms(boxList,
-                                       mBoxInfo.GetDecodeInfo().GetNmsMode(),
-                                       mBoxInfo.GetDecodeInfo().GetNmsIouThreshold(),
+                                       mMeta.GetBoxDecodeInfo().GetNmsMode(),
+                                       mMeta.GetBoxDecodeInfo().GetNmsIouThreshold(),
                                         mResultBoxes);
                         if (ret != MEDIA_VISION_ERROR_NONE) {
                                 LOGE("Fail to non-maximum suppression[%d]", ret);
diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp

index 8dec322..176b0eb 100755 (executable)
--- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
@@ -35,6 +35,7 @@ namespace inference
                         score(),
                         box(),
                         label(),
+                       number(),
                         landmark(),
                         offsetVec(),
                         dispVecs(),
@@ -47,8 +48,8 @@ namespace inference
                         name(),
                         dimInfo(),
                         threshold(0.0),
-                       type(INFERENCE_SCORE_TYPE_NORMAL),
                         topNumber(1),
+                       type(INFERENCE_SCORE_TYPE_NORMAL),
                         deQuantization(nullptr)
         {
                 // Score type
@@ -195,22 +196,22 @@ namespace inference
                 return MEDIA_VISION_ERROR_NONE;
         }
  
-       void BoxInfo::DecodeInfo::AddAnchorBox(cv::Rect2f& anchor)
+       void DecodeInfo::AddAnchorBox(cv::Rect2f& anchor)
         {
                 anchorBoxes.push_back(anchor);
         }
  
-       void BoxInfo::DecodeInfo::ClearAnchorBox()
+       void DecodeInfo::ClearAnchorBox()
         {
                 anchorBoxes.clear();
         }
  
-       std::vector<cv::Rect2f>& BoxInfo::DecodeInfo::GetAnchorBoxAll()
+       std::vector<cv::Rect2f>& DecodeInfo::GetAnchorBoxAll()
         {
                 return anchorBoxes;
         }
  
-       bool BoxInfo::DecodeInfo::IsAnchorBoxEmpty()
+       bool DecodeInfo::IsAnchorBoxEmpty()
         {
                 return anchorBoxes.empty();
         }
@@ -374,7 +375,7 @@ namespace inference
  
         }
  
-       int BoxInfo::DecodeInfo::ParseAnchorParam(JsonObject *root)
+       int DecodeInfo::ParseAnchorParam(JsonObject *root)
         {
                 JsonObject *object = json_object_get_object_member(root, "anchor") ;
  
@@ -420,45 +421,43 @@ namespace inference
                 return MEDIA_VISION_ERROR_NONE;
         }
  
-       float BoxInfo::DecodeInfo::CalculateScale(float min, float max, int index, int maxStride)
+       float DecodeInfo::CalculateScale(float min, float max, int index, int maxStride)
         {
                 return min + (max - min) * 1.0 * index / (maxStride - 1.0f);
         }
  
-       bool BoxInfo::DecodeInfo::IsFixedAnchorSize()
+       bool DecodeInfo::IsFixedAnchorSize()
         {
                 return this->anchorParam.isFixedAnchorSize;;
         }
  
-       bool BoxInfo::DecodeInfo::IsExponentialBoxScale()
+       bool DecodeInfo::IsExponentialBoxScale()
         {
                 return this->anchorParam.isExponentialBoxScale;
         }
  
-       float BoxInfo::DecodeInfo::GetAnchorXscale()
+       float DecodeInfo::GetAnchorXscale()
         {
                 return this->anchorParam.xScale;
         }
  
-       float BoxInfo::DecodeInfo::GetAnchorYscale()
+       float DecodeInfo::GetAnchorYscale()
         {
                 return this->anchorParam.yScale;
         }
  
-       float BoxInfo::DecodeInfo::GetAnchorWscale()
+       float DecodeInfo::GetAnchorWscale()
         {
                 return this->anchorParam.wScale;
         }
  
-       float BoxInfo::DecodeInfo::GetAnchorHscale()
+       float DecodeInfo::GetAnchorHscale()
         {
                 return this->anchorParam.hScale;
         }
  
-       int BoxInfo::DecodeInfo::GenerateAnchor()
+       int DecodeInfo::GenerateAnchor()
         {
-               //BoxInfo::DecodeInfo& decodeInfo = box.GetDecodeInfo();
-
                 if (this->anchorParam.strides.empty() ||
                         this->anchorParam.aspectRatios.empty()) {
                         LOGE("Invalid anchor parameters");
@@ -552,7 +551,7 @@ namespace inference
                 return MEDIA_VISION_ERROR_NONE;
         }
  
-       int BoxInfo::DecodeInfo::ParseNms(JsonObject *root)
+       int DecodeInfo::ParseNms(JsonObject *root)
         {
                 if (!json_object_has_member(root, "nms")) {
                         LOGI("nms is empty. skip it");
@@ -572,17 +571,17 @@ namespace inference
                 return MEDIA_VISION_ERROR_NONE;
         }
  
-       int BoxInfo::DecodeInfo::GetNmsMode()
+       int DecodeInfo::GetNmsMode()
         {
                 return this->nmsParam.mode;
         }
  
-       float BoxInfo::DecodeInfo::GetNmsIouThreshold()
+       float DecodeInfo::GetNmsIouThreshold()
         {
                 return this->nmsParam.iouThreshold;
         }
  
-       int BoxInfo::DecodeInfo::ParseRotate(JsonObject *root)
+       int DecodeInfo::ParseRotate(JsonObject *root)
         {
                 if (!json_object_has_member(root, "rotate")) {
                         LOGI("rotate is empty. skip it");
@@ -597,62 +596,62 @@ namespace inference
                 return MEDIA_VISION_ERROR_NONE;
         }
  
-       int BoxInfo::DecodeInfo::GetRotStartPointIndex()
+       int DecodeInfo::GetRotStartPointIndex()
         {
                 return this->rotParam.startPointIndex;
         }
  
-       int BoxInfo::DecodeInfo::GetRotEndPointIndex()
+       int DecodeInfo::GetRotEndPointIndex()
         {
                 return this->rotParam.endPointIndex;
         }
  
-       float BoxInfo::DecodeInfo::GetBaseAngle()
+       float DecodeInfo::GetBaseAngle()
         {
                 return this->rotParam.baseAngle;
         }
  
-       int BoxInfo::DecodeInfo::GetRoiMode()
+       int DecodeInfo::GetRoiMode()
         {
                 return this->roiOptParam.mode;
         }
  
-       int BoxInfo::DecodeInfo::GetRoiStartPointIndex()
+       int DecodeInfo::GetRoiStartPointIndex()
         {
                 return this->roiOptParam.startPointIndex;
         }
  
-       int BoxInfo::DecodeInfo::GetRoiEndPointIndex()
+       int DecodeInfo::GetRoiEndPointIndex()
         {
                 return this->roiOptParam.endPointIndex;
         }
  
-       int BoxInfo::DecodeInfo::GetRoiCenterPointIndex()
+       int DecodeInfo::GetRoiCenterPointIndex()
         {
                 return this->roiOptParam.centerPointIndex;
         }
  
-       float BoxInfo::DecodeInfo::GetShiftX()
+       float DecodeInfo::GetShiftX()
         {
                 return this->roiOptParam.shiftX;
         }
  
-       float BoxInfo::DecodeInfo::GetShiftY()
+       float DecodeInfo::GetShiftY()
         {
                 return this->roiOptParam.shiftY;
         }
  
-       float BoxInfo::DecodeInfo::GetScaleX()
+       float DecodeInfo::GetScaleX()
         {
                 return this->roiOptParam.scaleX;
         }
  
-       float BoxInfo::DecodeInfo::GetScaleY()
+       float DecodeInfo::GetScaleY()
         {
                 return this->roiOptParam.scaleY;
         }
  
-       int BoxInfo::DecodeInfo::ParseRoiOption(JsonObject *root)
+       int DecodeInfo::ParseRoiOption(JsonObject *root)
         {
                 if (!json_object_has_member(root, "roi")) {
                         LOGI("roi is empty. skip it");
@@ -672,56 +671,6 @@ namespace inference
                 return MEDIA_VISION_ERROR_NONE;
         }
  
-       ScoreInfo& OutputMetadata::GetScore()
-       {
-               return score;
-       }
-
-       BoxInfo& OutputMetadata::GetBox()
-       {
-               return box;
-       }
-
-       Label& OutputMetadata::GetLabel()
-       {
-               return label;
-       }
-
-       Number& OutputMetadata::GetNumber()
-       {
-               return number;
-       }
-
-       Landmark& OutputMetadata::GetLandmark()
-       {
-               return landmark;
-       }
-
-       OffsetVec& OutputMetadata::GetOffset()
-       {
-               return offsetVec;
-       }
-
-       std::vector<DispVec>& OutputMetadata::GetDispVecAll()
-       {
-               return dispVecs;
-       }
-
-       Edge& OutputMetadata::GetEdge()
-       {
-               return edgeMap;
-       }
-
-       std::vector<std::pair<int, int>>& Edge::GetEdgesAll()
-       {
-               return edges;
-       }
-
-       bool OutputMetadata::IsParsed()
-       {
-               return parsed;
-       }
-
         Landmark::Landmark() :
                         name(),
                         dimInfo(),
@@ -729,7 +678,7 @@ namespace inference
                         offset(),
                         coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO),
                         decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS),
-                       decodingInfo()
+                       heatMapInfo()
  
         {
                 supportedLandmarkTypes.insert({"2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE});
@@ -805,9 +754,9 @@ namespace inference
                 return decodingType;
         }
  
-       Landmark::DecodeInfo& Landmark::GetDecodingInfo()
+       HeatMapInfo& Landmark::GetHeatMapInfo()
         {
-               return decodingInfo;
+               return heatMapInfo;
         }
  
         int OutputMetadata::ParseLandmark(JsonObject *root)
@@ -863,26 +812,26 @@ namespace inference
  
                         JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
                         try {
-                               landmark.GetDecodingInfo().heatMap.shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
+                               landmark.GetHeatMapInfo().shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
                         } catch (const std::exception& e) {
                                 LOGE("Invalid %s", e.what());
                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
                         }
  
                         std::vector<int> heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll();
-                       if (landmark.GetDecodingInfo().heatMap.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
-                               landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[0];
-                               landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[1];
-                               landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[2];
+                       if (landmark.GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
+                               landmark.GetHeatMapInfo().cIdx = heatMapIndexes[0];
+                               landmark.GetHeatMapInfo().hIdx = heatMapIndexes[1];
+                               landmark.GetHeatMapInfo().wIdx = heatMapIndexes[2];
                         } else {
-                               landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[0];
-                               landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[1];
-                               landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[2];
+                               landmark.GetHeatMapInfo().hIdx = heatMapIndexes[0];
+                               landmark.GetHeatMapInfo().wIdx = heatMapIndexes[1];
+                               landmark.GetHeatMapInfo().cIdx = heatMapIndexes[2];
                         }
  
                         if (json_object_has_member(object, "nms_radius")) {
-                               landmark.GetDecodingInfo().heatMap.nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
-                               LOGI("nms is enabled with %3.f", landmark.GetDecodingInfo().heatMap.nmsRadius );
+                               landmark.GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
+                               LOGI("nms is enabled with %3.f", landmark.GetHeatMapInfo().nmsRadius );
                         }
                 }
  
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp

index 1ae33a7..a1efd2d 100644 (file)
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -50,15 +50,13 @@ namespace inference
         {
                 LOGI("ENTER");
  
-               Landmark& landmarkInfo = mMeta.GetLandmark();
-
-               if (landmarkInfo.GetType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-                       landmarkInfo.GetType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+               if (mMeta.GetLandmarkType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+                       mMeta.GetLandmarkType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
                         LOGE("Not supported landmark type");
                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
                 }
  
-               if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+               if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
                         LOGI("Skip init");
                         return MEDIA_VISION_ERROR_NONE;
                 }
@@ -68,12 +66,11 @@ namespace inference
                 float score, localScore;
                 int idx;
                 bool isLocalMax;
-               ScoreInfo& scoreInfo = mMeta.GetScore();
  
                 mCandidates.clear();
  
-               if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-                       landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+               if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+                       mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
                         mCandidates.resize(mHeatMapChannel);
                 }
  
@@ -83,16 +80,16 @@ namespace inference
                                 for (c = 0; c < mHeatMapChannel; ++c, candidate++) {
                                         isLocalMax = true;
                                         idx = convertXYZtoX(x, y, c);
-                                       score = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
-                                       if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+                                       score = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+                                       if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
                                                 score = PostProcess::sigmoid(score);
                                         }
  
-                                       if (score < scoreInfo.GetThresHold())
+                                       if (score < mMeta.GetScoreThreshold())
                                                 continue;
  
-                                       if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-                                               landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+                                       if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+                                               mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
                                                 if (score <= candidate->score)
                                                         continue;
  
@@ -110,8 +107,8 @@ namespace inference
                                                 for (dy = sy; dy < ey; ++dy) {
                                                         for (dx = sx; dx < ex; ++dx) {
                                                                 idx = convertXYZtoX(dx, dy, c);
-                                                               localScore =  mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
-                                                               if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+                                                               localScore =  mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+                                                               if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
                                                                         localScore = PostProcess::sigmoid(localScore);
                                                                 }
                                                                 if (localScore > score) {
@@ -159,7 +156,7 @@ namespace inference
  
         int PoseDecoder::getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal)
         {
-               if (!mTensorBuffer.exist(mMeta.GetOffset().GetName())) {
+               if (!mTensorBuffer.exist(mMeta.GetOffsetVecName())) {
                         offsetVal.x = offsetVal.y = 0.f;
                         LOGI("No offset value");
                         LOGI("LEAVE");
@@ -169,11 +166,11 @@ namespace inference
                 cv::Point idx = convertXYZtoXY(landmark.heatMapLoc.x, landmark.heatMapLoc.y, landmark.id);
  
                 try {
-                       offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.x);
-                       offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.y);
+                       offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffsetVecName(), idx.x);
+                       offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffsetVecName(), idx.y);
                 } catch (const std::exception& e) {
                         LOGE("Fail to get value at (%d, %d) from %s",
-                                               idx.x, idx.y, mMeta.GetOffset().GetName().c_str());
+                                               idx.x, idx.y, mMeta.GetOffsetVecName().c_str());
                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
                 }
  
@@ -241,30 +238,28 @@ namespace inference
                 }
  
                 mPoseLandmarks.clear();
-               LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
  
-               Landmark& landmarkInfo = mMeta.GetLandmark();
-               ScoreInfo& scoreInfo = mMeta.GetScore();
+               LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
  
-               if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-                       landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+               if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+                       mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
                         mPoseLandmarks.resize(1);
  
-                       if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+                       if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
                                 mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
                         } else {
                                 mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
                         }
                 }
  
-               if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+               if (mMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
                         while (!mCandidates.empty()) {
  
                                 LandmarkPoint &root = mCandidates.front();
  
                                 getIndexToPos(root, scaleWidth, scaleHeight);
  
-                               if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) {
+                               if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) {
                                         root.valid = true;
                                         mPoseLandmarks[0].landmarks[root.id] = root;
                                         mPoseLandmarks[0].score += root.score;
@@ -318,27 +313,27 @@ namespace inference
                         }
                 } else {
                         // multi pose is not supported
-                       std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
-                       float poseScore = scoreInfo.GetThresHold();
+                       std::vector<int> scoreIndexes = mMeta.GetScoreDimInfo().GetValidIndexAll();
+                       float poseScore = mMeta.GetScoreThreshold();
                         if (!scoreIndexes.empty()) {
-                               poseScore  = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]);
-                               if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+                               poseScore  = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), scoreIndexes[scoreIndexes[0]]);
+                               if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
                                         poseScore = PostProcess::sigmoid(poseScore);
                                 }
-                               if (poseScore < scoreInfo.GetThresHold()) {
-                                       LOGI("pose score %.4f is lower than %.4f\n[LEAVE]", poseScore, scoreInfo.GetThresHold());
+                               if (poseScore < mMeta.GetScoreThreshold()) {
+                                       LOGI("pose score %.4f is lower than %.4f\n[LEAVE]", poseScore, mMeta.GetScoreThreshold());
                                         return MEDIA_VISION_ERROR_NONE;
                                 }
                         }
  
-                       int landmarkOffset = (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-                                                                 landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3;
-                       if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
-                               landmarkOffset = landmarkInfo.GetOffset();
+                       int landmarkOffset = (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+                                                                 mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3;
+                       if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+                               landmarkOffset = mMeta.GetLandmarkOffset();
                         }
                         for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
-                                       float px = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset);
-                                       float py = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset + 1);
+                                       float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset);
+                                       float py = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 1);
  
                                         mPoseLandmarks[0].landmarks[idx].score = poseScore;
                                         mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
@@ -371,9 +366,9 @@ namespace inference
                 LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y);
                 LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score);
  
-               int index = static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) - 1;
-               for (auto riter = mMeta.GetEdge().GetEdgesAll().rbegin();
-                       riter != mMeta.GetEdge().GetEdgesAll().rend(); ++riter) {
+               int index = static_cast<int>(mMeta.GetEdges().size()) - 1;
+               for (auto riter = mMeta.GetEdges().rbegin();
+                       riter != mMeta.GetEdges().rend(); ++riter) {
                         int fromKeyId = riter->second;
                         int toKeyId = riter->first;
  
@@ -392,8 +387,8 @@ namespace inference
                 }
  
                 index = 0;
-               for (auto iter = mMeta.GetEdge().GetEdgesAll().begin();
-                       iter != mMeta.GetEdge().GetEdgesAll().end(); ++iter) {
+               for (auto iter = mMeta.GetEdges().begin();
+                       iter != mMeta.GetEdges().end(); ++iter) {
                         int fromKeyId = iter->first;
                         int toKeyId = iter->second;
  
@@ -449,8 +444,8 @@ namespace inference
                 }
  
                 int idx  = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id);
-               toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScore().GetName(), idx);
-               if (mMeta.GetScore().GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+               toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+               if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
                         toLandmark.score = PostProcess::sigmoid(toLandmark.score);
                 }
  
@@ -464,12 +459,14 @@ namespace inference
         {
                 LOGI("ENTER");
  
-               LOGI("edge size: %zd", mMeta.GetEdge().GetEdgesAll().size());
+               LOGI("edge size: %zd", mMeta.GetEdges().size());
+
                 int idxY = index.y * mHeatMapWidth
-                                       * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2;
-               idxY += index.x * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2 + edgeId;
+                                       * static_cast<int>(mMeta.GetEdges().size()) * 2;
+
+               idxY += index.x * static_cast<int>(mMeta.GetEdges().size()) * 2 + edgeId;
  
-               int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
+               int idxX = idxY + static_cast<int>(mMeta.GetEdges().size());
  
                 for(auto& dispVec : mMeta.GetDispVecAll()){
                         if (dispVec.GetType() == type) { // 0: forward
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec

index ee7e44e..205f377 100644 (file)
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
  Name:        capi-media-vision
  Summary:     Media Vision library for Tizen Native API
-Version:     0.8.17
+Version:     0.8.18
  Release:     1
  Group:       Multimedia/Framework
  License:     Apache-2.0 and BSD-3-Clause
author	Inki Dae <inki.dae@samsung.com>
	Thu, 7 Oct 2021 10:03:23 +0000 (19:03 +0900)
committer	Inki Dae <inki.dae@samsung.com>
	Wed, 13 Oct 2021 05:35:35 +0000 (14:35 +0900)
mv_machine_learning/mv_inference/inference/include/InputMetadata.h		patch \| blob \| history
mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h		patch \| blob \| history
mv_machine_learning/mv_inference/inference/include/OutputMetadata.h		patch \| blob \| history
mv_machine_learning/mv_inference/inference/src/Inference.cpp		patch \| blob \| history
mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp		patch \| blob \| history
mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp		patch \| blob \| history
mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp		patch \| blob \| history
packaging/capi-media-vision.spec		patch \| blob \| history