mv_machine_learning: code refactoring to meta file approach
authorInki Dae <inki.dae@samsung.com>
Mon, 17 Oct 2022 09:00:46 +0000 (18:00 +0900)
committerInki Dae <inki.dae@samsung.com>
Mon, 7 Nov 2022 03:12:39 +0000 (12:12 +0900)
[Issue type] code refactoring

Did code refactoring to meta file approach by doing,
- Change existing meta file format with more generic way.
  With this change, we can describe all input and output tensors of models.
  Existing meta file didn't describe output tensor information but only
  decoding way, and it made the use of multiple tensor descriptions
  with their pre/post process ways not possible because tensor and
  pre/post process information was handled separetely.
  So we don't need to thinking of them separetely because each tensor
  can or can't include pre/post process task according to a given model
  file and even there are many combinations of them.
- Introduce MetadataType.h header file which includes meta file approach
  common types.
    - Introduce LabelInfo.h/cpp, NumberInfo.h/cpp.

The main purpose of this refactoring applies the enhanced meta file format,

Before
======

{
"inputmetadata" :
{
"tensor_info" : [
{
"name" : "xxx",
...
}
]
"preprocess" : [
{
"normalization" : [
{
...
}
]
...
}
]
}
"outputmetadata" :
{
"box" :
{
"name" : "tensor1_name",
..
},
"score" :
{
"name" : "tensor2_name",
},
...
}
}

[After]
=======

{
"input" : [
{
"tensor1" : {
"name" : "xxx",
...
"preprocess" : {
"normalization" : {
...
}
}
}
...
}
]

"output" : [
{
"tensor1" : {
"name" : "tensor1_name",
...
"postprocess" : {
"box" : {
...
}
}
},
"tensor2" : {
"name" : "tensor2_name",
...
"postprocess" : {
"score" : {
...
}
}
}
...
}
]
}

Change-Id: I9d9be615dc3dd972d506b807030c745d8a0916a9
Signed-off-by: Inki Dae <inki.dae@samsung.com>
21 files changed:
mv_machine_learning/inference/include/BoxInfo.h
mv_machine_learning/inference/include/DecodeInfo.h
mv_machine_learning/inference/include/DispVec.h
mv_machine_learning/inference/include/InputMetadata.h
mv_machine_learning/inference/include/LabelInfo.h [new file with mode: 0644]
mv_machine_learning/inference/include/Landmark.h
mv_machine_learning/inference/include/MetadataType.h [moved from mv_machine_learning/inference/include/OutputMetadataTypes.h with 64% similarity]
mv_machine_learning/inference/include/NumberInfo.h [new file with mode: 0644]
mv_machine_learning/inference/include/OffsetVec.h
mv_machine_learning/inference/include/OutputMetadata.h
mv_machine_learning/inference/include/ScoreInfo.h
mv_machine_learning/inference/src/BoxInfo.cpp
mv_machine_learning/inference/src/Inference.cpp
mv_machine_learning/inference/src/InputMetadata.cpp
mv_machine_learning/inference/src/LabelInfo.cpp [new file with mode: 0644]
mv_machine_learning/inference/src/Metadata.cpp
mv_machine_learning/inference/src/NumberInfo.cpp [new file with mode: 0644]
mv_machine_learning/inference/src/ObjectDecoder.cpp
mv_machine_learning/inference/src/OutputMetadata.cpp
mv_machine_learning/inference/src/PoseDecoder.cpp
mv_machine_learning/inference/src/ScoreInfo.cpp

index 7813a3d..fb12a92 100644 (file)
@@ -36,29 +36,14 @@ namespace inference
 {
 namespace box
 {
-struct Label
-{
-       std::string name;
-       DimInfo dimInfo;
-};
-
-struct Number
-{
-       std::string name;
-       DimInfo dimInfo;
-};
-
 struct BoxInfo
 {
-       std::vector<std::string> names;
        DimInfo dimInfo;
        inference_box_type_e type = INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP; // 0:L-T-R-B, 1: Cx-Cy-W-H
        std::vector<int> order; // Order based on box type
        inference_box_coordinate_type_e coordinate = INFERENCE_BOX_COORDINATE_TYPE_RATIO; // 0: ratio, 1: pixel
        inference_box_decoding_type_e decodingType = INFERENCE_BOX_DECODING_TYPE_BYPASS; // 0: bypass , 1:ssd with anchor
        DecodeInfo decodingInfo;
-       Label label;
-       Number number;
 
        std::map<std::string, inference_box_type_e> supportedBoxTypes = {
                { "ORIGIN_LEFTTOP", INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP },
@@ -77,8 +62,6 @@ struct BoxInfo
 
        ~BoxInfo() = default;
 
-       std::string GetName();
-
        DimInfo GetDimInfo()
        {
                return dimInfo;
@@ -103,25 +86,8 @@ struct BoxInfo
        {
                return decodingInfo;
        }
-       std::string GetLabelName()
-       {
-               return label.name;
-       }
-       std::string GetNumberName()
-       {
-               return number.name;
-       }
-       DimInfo GetNumberDimInfo()
-       {
-               return number.dimInfo;
-       }
-
        int ParseBox(JsonObject *root);
 
-       int ParseLabel(JsonObject *root);
-
-       int ParseNumber(JsonObject *root);
-
        int ParseDecodeInfo(JsonObject *root);
 };
 } /* box */
index 6a38e54..024ef69 100644 (file)
@@ -22,7 +22,7 @@
 #include <map>
 #include <memory>
 
-#include <OutputMetadataTypes.h>
+#include <MetadataType.h>
 #include <mv_inference_type.h>
 #include <opencv2/core.hpp>
 #include "Utils.h"
index b9685bd..9860be7 100644 (file)
@@ -70,9 +70,6 @@ public:
        {
                LOGI("ENTER");
 
-               name = static_cast<const char *>(json_object_get_string_member(root, "name"));
-               LOGI("layer: %s", name.c_str());
-
                JsonArray *array = json_object_get_array_member(root, "index");
                unsigned int elements2 = json_array_get_length(array);
 
index f1c762c..366be75 100644 (file)
@@ -21,7 +21,7 @@
 #include <vector>
 #include <map>
 
-#include <mv_inference_type.h>
+#include "MetadataType.h"
 #include <inference_engine_type.h>
 #include <json-glib/json-glib.h>
 
@@ -55,42 +55,6 @@ struct Options
        Quantization quantization;
 };
 
-struct LayerInfo
-{
-       std::string name;
-       std::vector<int> dims;
-       mv_colorspace_e colorSpace {};
-       mv_inference_data_type_e dataType {};
-       inference_tensor_shape_type_e shapeType {}; // TODO: define mv_inference_shape_type_e
-
-       int getWidth() const
-       {
-               if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
-                       return dims[3];
-               } else { // INFERENCE_TENSOR_SHAPE_NHWC
-                       return dims[2];
-               }
-       }
-
-       int getHeight() const
-       {
-               if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
-                       return dims[2];
-               } else { // INFERENCE_TENSOR_SHAPE_NHWC
-                       return dims[1];
-               }
-       }
-
-       int getChannel() const
-       {
-               if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
-                       return dims[1];
-               } else { // INFERENCE_TENSOR_SHAPE_NHWC
-                       return dims[3];
-               }
-       }
-};
-
 class InputMetadata
 {
 public:
@@ -114,7 +78,7 @@ public:
                 *
                 * @since_tizen 6.5
                 */
-       int Parse(JsonObject *root);
+       int Parse(JsonObject *root, std::string key_name);
        bool IsParsed(void)
        {
                return parsed;
@@ -136,7 +100,7 @@ private:
        std::map<std::string, LayerInfo> layer;
        std::map<std::string, Options> option;
 
-       int GetTensorInfo(JsonObject *root);
+       int GetTensorInfo(JsonObject *root, std::string key_name);
        int GetPreProcess(JsonObject *root);
 };
 
diff --git a/mv_machine_learning/inference/include/LabelInfo.h b/mv_machine_learning/inference/include/LabelInfo.h
new file mode 100644 (file)
index 0000000..53232fd
--- /dev/null
@@ -0,0 +1,49 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LABEL_INFO_H__
+#define __LABEL_INFO_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+#include <json-glib/json-glib.h>
+#include <mv_inference_type.h>
+#include <inference_engine_type.h>
+
+#include "DecodeInfo.h"
+#include "DimInfo.h"
+#include "Utils.h"
+
+namespace mediavision
+{
+namespace inference
+{
+namespace label
+{
+struct LabelInfo
+{
+       DimInfo dimInfo;
+
+       int Parse(JsonObject *root);
+};
+} /* label */
+} /* Inference */
+} /* MediaVision */
+
+#endif
index f429361..44c38b9 100644 (file)
@@ -64,7 +64,6 @@ typedef struct _HeatMapInfo
 class Landmark
 {
 private:
-       std::string name;
        DimInfo dimInfo;
        inference_landmark_type_e type; /**< 0: 2D_SINGLE, 1: 2D_MULTI, 2: 3D_SINGLE */
        int offset;
@@ -73,7 +72,7 @@ private:
                                                                                                                        1: decoding heatmap,
                                                                                                                        2: decoding heatmap with refinement */
        HeatMapInfo heatMapInfo;
-       std::vector<DispVec> dispVecs;
+       DispVec dispVec;
        Edge edgeMap;
 
        std::map<std::string, inference_landmark_type_e> supportedLandmarkTypes;
@@ -82,13 +81,14 @@ private:
 
 public:
        Landmark()
-                       : name()
-                       , dimInfo()
+                       : dimInfo()
                        , type(INFERENCE_LANDMARK_TYPE_2D_SINGLE)
                        , offset()
                        , coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO)
                        , decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS)
                        , heatMapInfo()
+                       , dispVec()
+                       , edgeMap()
 
        {
                supportedLandmarkTypes.insert({ "2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE });
@@ -110,37 +110,29 @@ public:
        int ParseLandmark(JsonObject *root)
        {
                // box
-               JsonArray *rootArray = json_object_get_array_member(root, "landmark");
-               unsigned int elements = json_array_get_length(rootArray);
-
-               // TODO: handling error
-               for (unsigned int elem = 0; elem < elements; ++elem) {
-                       JsonNode *pNode = json_array_get_element(rootArray, elem);
-                       JsonObject *pObject = json_node_get_object(pNode);
-
-                       name = static_cast<const char *>(json_object_get_string_member(pObject, "name"));
-                       LOGI("layer: %s", name.c_str());
-
-                       JsonArray *array = json_object_get_array_member(pObject, "index");
-                       unsigned int elements2 = json_array_get_length(array);
-                       LOGI("range dim: size[%u]", elements2);
-                       for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-                               if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-                                       dimInfo.SetValidIndex(elem2);
-                       }
-
-                       try {
-                               type = GetSupportedType(pObject, "landmark_type", supportedLandmarkTypes);
-                               coordinate = GetSupportedType(pObject, "landmark_coordinate", supportedLandmarkCoordinateTypes);
-                               decodingType = GetSupportedType(pObject, "decoding_type", supportedLandmarkDecodingTypes);
-                       } catch (const std::exception &e) {
-                               LOGE("Invalid %s", e.what());
-                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-                       }
-                       if (json_object_has_member(pObject, "landmark_offset")) {
-                               offset = static_cast<int>(json_object_get_int_member(pObject, "landmark_offset"));
-                               LOGI("(optional) landmark offset: %d", offset);
-                       }
+               JsonNode *pNode = json_object_get_member(root, "landmark");
+               JsonObject *pObject = json_node_get_object(pNode);
+               JsonArray *array = json_object_get_array_member(pObject, "index");
+               unsigned int elements2 = json_array_get_length(array);
+
+               LOGI("range dim: size[%u]", elements2);
+
+               for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+                       if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+                               dimInfo.SetValidIndex(elem2);
+               }
+
+               try {
+                       type = GetSupportedType(pObject, "landmark_type", supportedLandmarkTypes);
+                       coordinate = GetSupportedType(pObject, "landmark_coordinate", supportedLandmarkCoordinateTypes);
+                       decodingType = GetSupportedType(pObject, "decoding_type", supportedLandmarkDecodingTypes);
+               } catch (const std::exception &e) {
+                       LOGE("Invalid %s", e.what());
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+               if (json_object_has_member(pObject, "landmark_offset")) {
+                       offset = static_cast<int>(json_object_get_int_member(pObject, "landmark_offset"));
+                       LOGI("(optional) landmark offset: %d", offset);
                }
 
                LOGI("LEAVE");
@@ -158,17 +150,10 @@ public:
                        return MEDIA_VISION_ERROR_INVALID_OPERATION;
                }
 
-               JsonArray *rootArray = json_object_get_array_member(root, "displacement");
-               unsigned int elements = json_array_get_length(rootArray);
-
-               dispVecs.resize(elements);
-               unsigned int elem = 0;
-               for (auto &disp : dispVecs) {
-                       JsonNode *pNode = json_array_get_element(rootArray, elem++);
-                       JsonObject *pObject = json_node_get_object(pNode);
+               JsonNode *node = json_object_get_member(root, "displacement");
+               JsonObject *pObject = json_node_get_object(node);
 
-                       disp.ParseDisplacement(pObject, supportedShapeType);
-               }
+               dispVec.ParseDisplacement(pObject, supportedShapeType);
 
                LOGI("LEAVE");
                return MEDIA_VISION_ERROR_NONE;
@@ -196,52 +181,46 @@ public:
                LOGI("ENTER");
 
                // box
-               JsonArray *rootArray = json_object_get_array_member(root, "landmark");
-               unsigned int elements = json_array_get_length(rootArray);
-
-               // TODO: handling error
-               for (unsigned int elem = 0; elem < elements; ++elem) {
-                       JsonNode *pNode = json_array_get_element(rootArray, elem);
-                       JsonObject *pObject = json_node_get_object(pNode);
-
-                       if (!json_object_has_member(pObject, "decoding_info")) {
-                               LOGE("decoding_info is mandatory. Invalid metadata");
-                               LOGI("LEAVE");
-
-                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-                       }
-
-                       JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info");
-                       if (!json_object_has_member(cObject, "heatmap")) {
-                               LOGE("heatmap is mandatory. Invalid metadata");
-                               LOGI("LEAVE");
-
-                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-                       }
-
-                       JsonObject *object = json_object_get_object_member(cObject, "heatmap");
-                       try {
-                               GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", supportedShapeType);
-                       } catch (const std::exception &e) {
-                               LOGE("Invalid %s", e.what());
-                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-                       }
-
-                       std::vector<int> heatMapIndexes = GetDimInfo().GetValidIndexAll();
-                       if (GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
-                               GetHeatMapInfo().cIdx = heatMapIndexes[0];
-                               GetHeatMapInfo().hIdx = heatMapIndexes[1];
-                               GetHeatMapInfo().wIdx = heatMapIndexes[2];
-                       } else {
-                               GetHeatMapInfo().hIdx = heatMapIndexes[0];
-                               GetHeatMapInfo().wIdx = heatMapIndexes[1];
-                               GetHeatMapInfo().cIdx = heatMapIndexes[2];
-                       }
-
-                       if (json_object_has_member(object, "nms_radius")) {
-                               GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
-                               LOGI("nms is enabled with %3.f", GetHeatMapInfo().nmsRadius);
-                       }
+               JsonNode *node = json_object_get_member(root, "landmark");
+               JsonObject *pObject = json_node_get_object(node);
+
+               if (!json_object_has_member(pObject, "decoding_info")) {
+                       LOGE("decoding_info is mandatory. Invalid metadata");
+                       LOGI("LEAVE");
+
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
+               JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info");
+               if (!json_object_has_member(cObject, "heatmap")) {
+                       LOGE("heatmap is mandatory. Invalid metadata");
+                       LOGI("LEAVE");
+
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
+               JsonObject *object = json_object_get_object_member(cObject, "heatmap");
+               try {
+                       GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", supportedShapeType);
+               } catch (const std::exception &e) {
+                       LOGE("Invalid %s", e.what());
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
+               std::vector<int> heatMapIndexes = GetDimInfo().GetValidIndexAll();
+               if (GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
+                       GetHeatMapInfo().cIdx = heatMapIndexes[0];
+                       GetHeatMapInfo().hIdx = heatMapIndexes[1];
+                       GetHeatMapInfo().wIdx = heatMapIndexes[2];
+               } else {
+                       GetHeatMapInfo().hIdx = heatMapIndexes[0];
+                       GetHeatMapInfo().wIdx = heatMapIndexes[1];
+                       GetHeatMapInfo().cIdx = heatMapIndexes[2];
+               }
+
+               if (json_object_has_member(object, "nms_radius")) {
+                       GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
+                       LOGI("nms is enabled with %3.f", GetHeatMapInfo().nmsRadius);
                }
 
                LOGI("LEAVE");
@@ -273,19 +252,14 @@ public:
                return heatMapInfo;
        }
 
-       std::string GetName()
-       {
-               return name;
-       }
-
        DimInfo GetDimInfo()
        {
                return dimInfo;
        }
 
-       std::vector<DispVec> &GetDispVecAll()
+       DispVec &GetDispVec()
        {
-               return dispVecs;
+               return dispVec;
        }
 
        std::vector<std::pair<int, int> > &GetEdges()
 #ifndef __MEDIA_VISION_OUTPUTMETADATA_TYPES_H__
 #define __MEDIA_VISION_OUTPUTMETADATA_TYPES_H__
 
+#include <mv_common.h>
+#include <mv_inference_type.h>
+#include <inference_engine_type.h>
+
 /**
  * @file OutputMetadataTypes.h
  * @brief This file contains supported output metadata types.
@@ -26,6 +30,18 @@ namespace mediavision
 {
 namespace inference
 {
+// Postprocess type
+typedef enum {
+       POSTPROCESS_DECODING_TYPE_NONE,
+       POSTPROCESS_DECODING_TYPE_SCORE = 1 << 0,
+       POSTPROCESS_DECODING_TYPE_BOX = 1 << 1,
+       POSTPROCESS_DECODING_TYPE_LABEL = 1 << 2,
+       POSTPROCESS_DECODING_TYPE_NUMBER = 1 << 3,
+       POSTPROCESS_DECODING_TYPE_LANDMARK = 1 << 4,
+       POSTPROCESS_DECODING_TYPE_DISPLACEMENT = 1 << 5,
+       POSTPROCESS_DECODING_TYPE_OFFSETVEC = 1 << 6
+} postprocess_decoding_type_e;
+
 // score
 typedef enum
 {
@@ -87,6 +103,42 @@ typedef enum
        INFERENCE_DISPLACEMENT_TYPE_FORWARD,
        INFERENCE_DISPLACEMENT_TYPE_BACKWARD
 } inference_displacement_type_e;
+
+struct LayerInfo
+{
+       std::string name;
+       std::vector<int> dims;
+       mv_colorspace_e colorSpace {};
+       mv_inference_data_type_e dataType {};
+       inference_tensor_shape_type_e shapeType {}; // TODO: define mv_inference_shape_type_e
+       // A output tensor can have one more decoding types.
+       unsigned int decodingType {};
+
+       int getWidth() const
+       {
+               if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW)
+                       return dims[3];
+
+               return dims[2];
+       }
+
+       int getHeight() const
+       {
+               if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW)
+                       return dims[2];
+
+               return dims[1];
+       }
+
+       int getChannel() const
+       {
+               if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW)
+                       return dims[1];
+
+               return dims[3];
+       }
+};
+
 }
 }
 
diff --git a/mv_machine_learning/inference/include/NumberInfo.h b/mv_machine_learning/inference/include/NumberInfo.h
new file mode 100644 (file)
index 0000000..ed8703c
--- /dev/null
@@ -0,0 +1,49 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NUMBER_INFO_H__
+#define __NUMBER_INFO_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+#include <json-glib/json-glib.h>
+#include <mv_inference_type.h>
+#include <inference_engine_type.h>
+
+#include "DecodeInfo.h"
+#include "DimInfo.h"
+#include "Utils.h"
+
+namespace mediavision
+{
+namespace inference
+{
+namespace number
+{
+struct NumberInfo
+{
+       DimInfo dimInfo;
+
+       int Parse(JsonObject *root);
+};
+} /* number */
+} /* Inference */
+} /* MediaVision */
+
+#endif
index e3504ba..6b3746e 100644 (file)
@@ -32,18 +32,14 @@ namespace inference
 class OffsetVec
 {
 private:
-       std::string name;
        DimInfo dimInfo;
        int shapeType;
 
 public:
-       OffsetVec() : name(), dimInfo(), shapeType()
+       OffsetVec() : dimInfo(), shapeType()
        {}
        ~OffsetVec() = default;
-       std::string GetName()
-       {
-               return name;
-       }
+
        DimInfo GetDimInfo()
        {
                return dimInfo;
@@ -55,31 +51,22 @@ public:
 
        int ParseOffset(JsonObject *root, const std::map<std::string, inference_tensor_shape_type_e> &supportedShapeType)
        {
-               JsonArray *rootArray = json_object_get_array_member(root, "offset");
-               unsigned int elements = json_array_get_length(rootArray);
+               JsonNode *node = json_object_get_member(root, "offset");
+               JsonObject *pObject = json_node_get_object(node);
 
-               // TODO: handling error
-               for (unsigned int elem = 0; elem < elements; ++elem) {
-                       JsonNode *pNode = json_array_get_element(rootArray, elem);
-                       JsonObject *pObject = json_node_get_object(pNode);
-
-                       name = static_cast<const char *>(json_object_get_string_member(pObject, "name"));
-                       LOGI("layer: %s", name.c_str());
-
-                       JsonArray *array = json_object_get_array_member(pObject, "index");
-                       unsigned int elements2 = json_array_get_length(array);
-                       LOGI("range dim: size[%u]", elements2);
-                       for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-                               if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-                                       dimInfo.SetValidIndex(elem2);
-                       }
+               JsonArray *array = json_object_get_array_member(pObject, "index");
+               unsigned int elements2 = json_array_get_length(array);
+               LOGI("range dim: size[%u]", elements2);
+               for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+                       if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+                               dimInfo.SetValidIndex(elem2);
+               }
 
-                       try {
-                               shapeType = GetSupportedType(pObject, "shape_type", supportedShapeType);
-                       } catch (const std::exception &e) {
-                               LOGE("Invalid %s", e.what());
-                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-                       }
+               try {
+                       shapeType = GetSupportedType(pObject, "shape_type", supportedShapeType);
+               } catch (const std::exception &e) {
+                       LOGE("Invalid %s", e.what());
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
                }
 
                LOGI("LEAVE");
index f2d514a..7aabf4f 100644 (file)
@@ -26,7 +26,7 @@
 #include <inference_engine_type.h>
 #include <json-glib/json-glib.h>
 #include <opencv2/core.hpp>
-#include "OutputMetadataTypes.h"
+#include "MetadataType.h"
 #include "DecodeInfo.h"
 #include "Edge.h"
 #include "DispVec.h"
@@ -34,6 +34,8 @@
 #include "OffsetVec.h"
 #include "Landmark.h"
 #include "BoxInfo.h"
+#include "LabelInfo.h"
+#include "NumberInfo.h"
 #include "ScoreInfo.h"
 
 /**
@@ -51,17 +53,20 @@ struct OutputMetadata
        bool parsed = false;
        ScoreInfo score;
        box::BoxInfo box;
+       label::LabelInfo label;
+       number::NumberInfo number;
        Landmark landmark;
        OffsetVec offsetVec;
        std::map<std::string, inference_tensor_shape_type_e> mSupportedShapeType = {
                { "NCHW", INFERENCE_TENSOR_SHAPE_NCHW },
                { "NHWC", INFERENCE_TENSOR_SHAPE_NHWC }
        };
+       std::map<std::string, LayerInfo> _tensor_info;
 
-       int ParseScore(JsonObject *root);
-       int ParseBox(JsonObject *root);
-       int ParseLandmark(JsonObject *root);
-       int ParseOffset(JsonObject *root);
+       int ParseScore(JsonObject *root, LayerInfo& layer);
+       int ParseBox(JsonObject *root, LayerInfo& layer);
+       int ParseLandmark(JsonObject *root, LayerInfo& layer);
+       int ParseOffset(JsonObject *root, LayerInfo& layer);
 
        /**
                 * @brief   Destroys an OutputMetadata class instance including
@@ -75,7 +80,9 @@ struct OutputMetadata
                 *
                 * @since_tizen 6.5
                 */
-       int Parse(JsonObject *root);
+       int Parse(JsonObject *root, std::string key_name);
+
+       int GetPostProcess(JsonObject *root, LayerInfo& layer);
 
        bool IsParsed()
        {
@@ -84,8 +91,14 @@ struct OutputMetadata
 
        std::string GetScoreName()
        {
-               return score.GetName();
+               for (auto& info : _tensor_info) {
+                       if (info.second.decodingType & POSTPROCESS_DECODING_TYPE_SCORE)
+                               return info.first;
+               }
+
+               return "";
        }
+
        DimInfo GetScoreDimInfo()
        {
                return score.GetDimInfo();
@@ -116,7 +129,12 @@ struct OutputMetadata
        }
        std::string GetBoxName()
        {
-               return box.GetName();
+               for (auto& info : _tensor_info) {
+                       if (info.second.decodingType & POSTPROCESS_DECODING_TYPE_BOX)
+                               return info.first;
+               }
+
+               return "";
        }
        DimInfo GetBoxDimInfo()
        {
@@ -134,17 +152,31 @@ struct OutputMetadata
        {
                return box.GetType();
        }
-       std::string GetBoxLabelName()
+       std::string GetLabelName()
+       {
+               for (auto& info : _tensor_info) {
+                       if (info.second.decodingType & POSTPROCESS_DECODING_TYPE_LABEL)
+                               return info.first;
+               }
+
+               return "";
+       }
+       DimInfo GetLabelDimInfo()
        {
-               return box.GetLabelName();
+               return label.dimInfo;
        }
-       std::string GetBoxNumberName()
+       std::string GetNumberName()
        {
-               return box.GetNumberName();
+               for (auto& info : _tensor_info) {
+                       if (info.second.decodingType & POSTPROCESS_DECODING_TYPE_NUMBER)
+                               return info.first;
+               }
+
+               return "";
        }
-       DimInfo GetBoxNumberDimInfo()
+       DimInfo GetNumberDimInfo()
        {
-               return box.GetNumberDimInfo();
+               return number.dimInfo;
        }
 
        int GetScoreCoordinate()
@@ -153,7 +185,12 @@ struct OutputMetadata
        }
        std::string GetLandmarkName()
        {
-               return landmark.GetName();
+               for (auto& info : _tensor_info) {
+                       if (info.second.decodingType & POSTPROCESS_DECODING_TYPE_LANDMARK)
+                               return info.first;
+               }
+
+               return "";
        }
        int GetLandmarkOffset()
        {
@@ -179,9 +216,9 @@ struct OutputMetadata
        {
                return landmark.GetDecodingType();
        }
-       std::vector<DispVec> &GetLandmarkDispVecAll()
+       DispVec &GetLandmarkDispVec()
        {
-               return landmark.GetDispVecAll();
+               return landmark.GetDispVec();
        }
        std::vector<std::pair<int, int> > &GetLandmarkEdges()
        {
@@ -189,7 +226,12 @@ struct OutputMetadata
        }
        std::string GetOffsetVecName()
        {
-               return offsetVec.GetName();
+               for (auto& info : _tensor_info) {
+                       if (info.second.decodingType & POSTPROCESS_DECODING_TYPE_OFFSETVEC)
+                               return info.first;
+               }
+
+               return "";
        }
        inference_box_decoding_type_e GetBoxDecodingType()
        {
index 4592f7f..b003706 100644 (file)
@@ -24,7 +24,7 @@
 
 #include <mv_inference_type.h>
 #include <mv_private.h>
-#include <OutputMetadataTypes.h>
+#include <MetadataType.h>
 #include "DimInfo.h"
 #include "Utils.h"
 
@@ -43,7 +43,6 @@ struct DeQuantization
 
 struct ScoreInfo
 {
-       std::vector<std::string> names;
        DimInfo dimInfo;
        double threshold = 0.0;
        int topNumber = 1;
@@ -53,10 +52,6 @@ struct ScoreInfo
                                                                                                                                                  { "SIGMOID", INFERENCE_SCORE_TYPE_SIGMOID } };
        ~ScoreInfo() = default;
 
-       std::string GetName()
-       {
-               return names[0];
-       }
        DimInfo GetDimInfo()
        {
                return dimInfo;
index 43f055d..290f5d9 100644 (file)
 
 using namespace mediavision::inference::box;
 
-std::string BoxInfo::GetName()
-{
-       // OutputMetadata needs empty sting
-       if (names.empty())
-               return "";
-
-       return names[0];
-}
-
 int BoxInfo::ParseBox(JsonObject *root)
 {
        LOGI("ENTER");
@@ -40,17 +31,7 @@ int BoxInfo::ParseBox(JsonObject *root)
 
        JsonObject *pObject = json_object_get_object_member(root, "box");
 
-       JsonArray *array = json_object_get_array_member(pObject, "name");
-       MEDIA_VISION_NULL_ARG_CHECK(array);
-
-       unsigned int elements1 = json_array_get_length(array);
-       MEDIA_VISION_CHECK_CONDITION(elements1 > 0, MEDIA_VISION_ERROR_INVALID_PARAMETER, "No name on meta file");
-
-       for (unsigned int elem1 = 0; elem1 < elements1; ++elem1) {
-               names.push_back(json_array_get_string_element(array, elem1));
-       }
-
-       array = json_object_get_array_member(pObject, "index");
+       JsonArray *array = json_object_get_array_member(pObject, "index");
        unsigned int elements2 = json_array_get_length(array);
 
        LOGI("range dim: size[%u]", elements2);
@@ -82,77 +63,6 @@ int BoxInfo::ParseBox(JsonObject *root)
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int BoxInfo::ParseLabel(JsonObject *root)
-{
-       LOGI("ENTER");
-
-       if (!json_object_has_member(root, "label")) {
-               LOGE("No box outputmetadata");
-               LOGI("LEAVE");
-               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-       }
-
-       JsonArray *rootArray = json_object_get_array_member(root, "label");
-       unsigned int elements = json_array_get_length(rootArray);
-
-       // TODO: handling error
-       for (unsigned int elem = 0; elem < elements; ++elem) {
-               JsonNode *pNode = json_array_get_element(rootArray, elem);
-               JsonObject *pObject = json_node_get_object(pNode);
-
-               label.name = json_object_get_string_member(pObject, "name");
-               LOGI("layer: %s", label.name.c_str());
-
-               JsonArray *array = json_object_get_array_member(pObject, "index");
-               unsigned int elements2 = json_array_get_length(array);
-               LOGI("range dim: size[%u]", elements2);
-               for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-                       if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-                               label.dimInfo.SetValidIndex(elem2);
-               }
-       }
-
-       LOGI("LEAVE");
-       return MEDIA_VISION_ERROR_NONE;
-}
-
-int BoxInfo::ParseNumber(JsonObject *root)
-{
-       LOGI("ENTER");
-
-       if (!json_object_has_member(root, "number")) {
-               LOGE("No number outputmetadata");
-               LOGI("LEAVE");
-               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-       }
-
-       // box
-       JsonArray *rootArray = json_object_get_array_member(root, "number");
-       unsigned int elements = json_array_get_length(rootArray);
-
-       // TODO: handling error
-       for (unsigned int elem = 0; elem < elements; ++elem) {
-               JsonNode *pNode = json_array_get_element(rootArray, elem);
-               JsonObject *pObject = json_node_get_object(pNode);
-
-               number.name = json_object_get_string_member(pObject, "name");
-
-               LOGI("layer: %s", number.name.c_str());
-
-               JsonArray *array = json_object_get_array_member(pObject, "index");
-               unsigned int elements2 = json_array_get_length(array);
-
-               LOGI("range dim: size[%u]", elements2);
-
-               for (unsigned int elem2 = 0; elem2 < elements2; ++elem2)
-                       if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-                               number.dimInfo.SetValidIndex(elem2);
-       }
-
-       LOGI("LEAVE");
-       return MEDIA_VISION_ERROR_NONE;
-}
-
 int BoxInfo::ParseDecodeInfo(JsonObject *root)
 {
        LOGI("ENTER");
index 1f6420c..471bc2d 100644 (file)
@@ -408,26 +408,10 @@ int Inference::configureOutputMetaInfo()
 
        mConfig.mOutputLayerNames.clear();
 
-       if (!outputMeta.GetScoreName().empty())
-               mConfig.mOutputLayerNames.push_back(outputMeta.GetScoreName());
-
-       if (!outputMeta.GetBoxName().empty())
-               mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxName());
-
-       if (!outputMeta.GetBoxLabelName().empty())
-               mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxLabelName());
-
-       if (!outputMeta.GetBoxNumberName().empty())
-               mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxNumberName());
-
-       if (!outputMeta.GetLandmarkName().empty())
-               mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName());
-
-       if (!outputMeta.GetOffsetVecName().empty())
-               mConfig.mOutputLayerNames.push_back(outputMeta.GetOffsetVecName());
-
-       for (auto &dispVec : outputMeta.GetLandmarkDispVecAll())
-               mConfig.mOutputLayerNames.push_back(dispVec.GetName());
+       if (!outputMeta._tensor_info.empty()) {
+               for (auto& info : outputMeta._tensor_info)
+                       mConfig.mOutputLayerNames.push_back(info.first);
+       }
 
        inference_engine_tensor_info tensor_info = { std::vector<size_t> { 1 }, INFERENCE_TENSOR_SHAPE_NCHW,
                                                                                                        INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1 };
index a2bbceb..ae5e660 100644 (file)
@@ -44,155 +44,165 @@ InputMetadata::InputMetadata() : parsed(false), layer(), option()
        mSupportedColorSpace.insert({ "GRAY8", MEDIA_VISION_COLORSPACE_Y800 });
 }
 
-int InputMetadata::GetTensorInfo(JsonObject *root)
+int InputMetadata::GetTensorInfo(JsonObject *root, std::string key_name)
 {
        LOGI("ENTER");
 
-       if (!json_object_has_member(root, "tensor_info")) {
-               LOGE("No tensor_info inputmetadata");
+       if (!json_object_has_member(root, key_name.c_str())) {
+               LOGE("No input.");
                return MEDIA_VISION_ERROR_INVALID_OPERATION;
        }
 
+       LOGI("Parse tensor name : %s", key_name.c_str());
+
        // tensor_info
-       int ret = MEDIA_VISION_ERROR_NONE;
-       JsonArray *rootArray = json_object_get_array_member(root, "tensor_info");
-       unsigned int elements = json_array_get_length(rootArray);
+       JsonNode *node = json_object_get_member(root, key_name.c_str());
+       JsonObject *object = json_node_get_object(node);
 
        std::map<std::string, LayerInfo>().swap(layer);
        // TODO: handling error
        // FIXEME: LayerInfo.set()??
-       for (unsigned int elem = 0; elem < elements; ++elem) {
-               LayerInfo info;
-               JsonNode *pNode = json_array_get_element(rootArray, elem);
-               JsonObject *pObject = json_node_get_object(pNode);
-
-               info.name = static_cast<const char *>(json_object_get_string_member(pObject, "name"));
-               LOGI("layer: %s", info.name.c_str());
-
-               try {
-                       info.shapeType = GetSupportedType(pObject, "shape_type", mSupportedShapeType);
-                       info.dataType = GetSupportedType(pObject, "data_type", mSupportedDataType);
-                       info.colorSpace = GetSupportedType(pObject, "color_space", mSupportedColorSpace);
-               } catch (const std::exception &e) {
-                       LOGE("Invalid %s", e.what());
-                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
-               }
+       LayerInfo info;
+
+       info.name = static_cast<const char *>(json_object_get_string_member(object, "name"));
+       LOGI("layer: %s", info.name.c_str());
+       LOGI("tensor name : %s", json_to_string(node, 0));
+
+       try {
+               info.shapeType = GetSupportedType(object, "shape_type", mSupportedShapeType);
+               info.dataType = GetSupportedType(object, "data_type", mSupportedDataType);
+               info.colorSpace = GetSupportedType(object, "color_space", mSupportedColorSpace);
+       } catch (const std::exception &e) {
+               LOGE("Invalid %s", e.what());
+               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+       }
 
-               // dims
-               JsonArray *array = json_object_get_array_member(pObject, "shape_dims");
-               unsigned int elements2 = json_array_get_length(array);
-               LOGI("shape dim: size[%u]", elements2);
-               for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-                       auto dim = static_cast<int>(json_array_get_int_element(array, elem2));
-                       info.dims.push_back(dim);
-                       LOGI("%d", dim);
-               }
+       // dims
+       JsonArray *array = json_object_get_array_member(object, "shape_dims");
+       unsigned int elements2 = json_array_get_length(array);
+       LOGI("shape dim: size[%u]", elements2);
+       for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+               auto dim = static_cast<int>(json_array_get_int_element(array, elem2));
+               info.dims.push_back(dim);
+               LOGI("%d", dim);
+       }
+
+       layer.insert(std::make_pair(info.name, info));
 
-               layer.insert(std::make_pair(info.name, info));
+       if (json_object_has_member(object, "preprocess")) {
+               // Parse preprocess.
+               int ret = GetPreProcess(object);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to get preprocess element.");
+                       return ret;
+               }
        }
 
        LOGI("LEAVE");
 
-       return ret;
+       return MEDIA_VISION_ERROR_NONE;
 }
 
 int InputMetadata::GetPreProcess(JsonObject *root)
 {
        LOGI("ENTER");
 
-       if (!json_object_has_member(root, "preprocess")) {
-               LOGI("No preprocess inputmetadata");
-               return MEDIA_VISION_ERROR_NONE;
-       }
-
-       // preprocess
-       JsonArray *rootArray = json_object_get_array_member(root, "preprocess");
-       unsigned int elements = json_array_get_length(rootArray);
+       JsonNode *preprocess_node = json_object_get_member(root, "preprocess");
+       JsonObject *preprocess_object = json_node_get_object(preprocess_node);
 
        std::map<std::string, Options>().swap(option);
        // TODO: iterLayer should be the same with elements.
        auto iterLayer = layer.begin();
        // TODO: handling error
-       for (unsigned int elem = 0; elem < elements; ++elem, ++iterLayer) {
-               Options opt;
-
-               JsonNode *pNode = json_array_get_element(rootArray, elem);
-               JsonObject *pObject = json_node_get_object(pNode);
-
-               // normalization
-               if (json_object_has_member(pObject, "normalization")) {
-                       JsonArray *array = json_object_get_array_member(pObject, "normalization");
-                       JsonNode *node = json_array_get_element(array, 0);
-                       JsonObject *object = json_node_get_object(node);
-
-                       opt.normalization.use = true;
-                       LOGI("use normalization");
-
-                       JsonArray *arrayMean = json_object_get_array_member(object, "mean");
-                       JsonArray *arrayStd = json_object_get_array_member(object, "std");
-                       unsigned int elemMean = json_array_get_length(arrayMean);
-                       unsigned int elemStd = json_array_get_length(arrayStd);
-                       if (elemMean != elemStd) {
-                               LOGE("Invalid mean and std values");
-                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-                       }
-
-                       for (unsigned int elem = 0; elem < elemMean; ++elem) {
-                               auto m = static_cast<double>(json_array_get_double_element(arrayMean, elem));
-                               auto s = static_cast<double>(json_array_get_double_element(arrayStd, elem));
-                               opt.normalization.mean.push_back(m);
-                               opt.normalization.std.push_back(s);
-                               LOGI("%u: mean[%3.2f], std[%3.2f]", elem, m, s);
-                       }
+       Options opt;
+
+       // normalization
+       if (json_object_has_member(preprocess_object, "normalization")) {
+               JsonNode *node = json_object_get_member(preprocess_object, "normalization");
+               JsonObject *object = json_node_get_object(node);
+
+               opt.normalization.use = true;
+               LOGI("use normalization");
+
+               JsonArray *arrayMean = json_object_get_array_member(object, "mean");
+               JsonArray *arrayStd = json_object_get_array_member(object, "std");
+               unsigned int elemMean = json_array_get_length(arrayMean);
+               unsigned int elemStd = json_array_get_length(arrayStd);
+               if (elemMean != elemStd) {
+                       LOGE("Invalid mean and std values");
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
+               for (unsigned int elem = 0; elem < elemMean; ++elem) {
+                       auto m = static_cast<double>(json_array_get_double_element(arrayMean, elem));
+                       auto s = static_cast<double>(json_array_get_double_element(arrayStd, elem));
+                       opt.normalization.mean.push_back(m);
+                       opt.normalization.std.push_back(s);
+                       LOGI("%u: mean[%3.2f], std[%3.2f]", elem, m, s);
+               }
+       }
+
+       if (json_object_has_member(preprocess_object, "quantization")) {
+               JsonNode *node = json_object_get_member(preprocess_object, "quantization");
+               JsonObject *object = json_node_get_object(node);
+
+               opt.quantization.use = true;
+               LOGI("use quantization");
+
+               JsonArray *arrayScale = json_object_get_array_member(object, "scale");
+               JsonArray *arrayZero = json_object_get_array_member(object, "zeropoint");
+               unsigned int elemScale = json_array_get_length(arrayScale);
+               unsigned int elemZero = json_array_get_length(arrayZero);
+               if (elemScale != elemZero) {
+                       LOGE("Invalid scale and zero values");
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
                }
 
-               if (json_object_has_member(pObject, "quantization")) {
-                       JsonArray *array = json_object_get_array_member(pObject, "quantization");
-                       JsonNode *node = json_array_get_element(array, 0);
-                       JsonObject *object = json_node_get_object(node);
-
-                       opt.quantization.use = true;
-                       LOGI("use quantization");
-
-                       JsonArray *arrayScale = json_object_get_array_member(object, "scale");
-                       JsonArray *arrayZero = json_object_get_array_member(object, "zeropoint");
-                       unsigned int elemScale = json_array_get_length(arrayScale);
-                       unsigned int elemZero = json_array_get_length(arrayZero);
-                       if (elemScale != elemZero) {
-                               LOGE("Invalid scale and zero values");
-                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-                       }
-
-                       for (unsigned int elem = 0; elem < elemScale; ++elem) {
-                               auto s = static_cast<double>(json_array_get_double_element(arrayScale, elem));
-                               auto z = static_cast<double>(json_array_get_double_element(arrayZero, elem));
-                               opt.quantization.scale.push_back(s);
-                               opt.quantization.zeropoint.push_back(z);
-                               LOGI("%u: scale[%3.2f], zeropoint[%3.2f]", elem, s, z);
-                       }
+               for (unsigned int elem = 0; elem < elemScale; ++elem) {
+                       auto s = static_cast<double>(json_array_get_double_element(arrayScale, elem));
+                       auto z = static_cast<double>(json_array_get_double_element(arrayZero, elem));
+                       opt.quantization.scale.push_back(s);
+                       opt.quantization.zeropoint.push_back(z);
+                       LOGI("%u: scale[%3.2f], zeropoint[%3.2f]", elem, s, z);
                }
-               option.insert(std::make_pair(iterLayer->first, opt));
        }
 
+       option.insert(std::make_pair(iterLayer->first, opt));
+
        LOGI("LEAVE");
 
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int InputMetadata::Parse(JsonObject *root)
+int InputMetadata::Parse(JsonObject *root, std::string key_name)
 {
        LOGI("ENTER");
 
-       int ret = GetTensorInfo(root);
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to GetTensorInfo[%d]", ret);
-               return ret;
-       }
+       JsonArray *inputList = json_object_get_array_member(root, key_name.c_str());
+       LOGI("input tensor count : %d", json_array_get_length(inputList));
 
-       ret = GetPreProcess(root);
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to GetPreProcess[%d]", ret);
-               return ret;
+       for (auto idx = 0; idx < json_array_get_length(inputList); ++idx) {
+               JsonNode *node = json_array_get_element(inputList, idx);
+           std::string token(json_to_string(node, 1));
+               int pos = token.find(":");
+               std::string tensor_name = token.substr(0, pos);
+               const std::vector<char> delimiters = {'{', ' ', ':', '\n', '\"'};
+
+               for (auto& delimiter : delimiters)
+                       tensor_name.erase(std::remove(tensor_name.begin(), tensor_name.end(), delimiter), tensor_name.end());
+
+               if (tensor_name.compare((std::string("tensor") + std::to_string(idx + 1))) != 0) {
+                       LOGE("Invalid tensor element. A tensor element form should be `tensorN`.");
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
+               JsonObject *object = json_node_get_object(node);
+
+               int ret = GetTensorInfo(object, tensor_name);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to GetTensorInfo[%d]", ret);
+                       return ret;
+               }
        }
 
        parsed = true;
diff --git a/mv_machine_learning/inference/src/LabelInfo.cpp b/mv_machine_learning/inference/src/LabelInfo.cpp
new file mode 100644 (file)
index 0000000..57b31c4
--- /dev/null
@@ -0,0 +1,46 @@
+
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <mv_private.h>
+#include "LabelInfo.h"
+
+using namespace mediavision::inference::label;
+
+int LabelInfo::Parse(JsonObject *root)
+{
+       LOGI("ENTER");
+
+       if (!json_object_has_member(root, "label")) {
+               LOGE("No box outputmetadata");
+               LOGI("LEAVE");
+               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+       }
+
+       JsonNode *node = json_object_get_member(root, "label");
+       JsonObject *object = json_node_get_object(node);
+
+       JsonArray *array = json_object_get_array_member(object, "index");
+       unsigned int elements2 = json_array_get_length(array);
+       LOGI("range dim: size[%u]", elements2);
+       for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+               if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+                       dimInfo.SetValidIndex(elem2);
+       }
+
+       LOGI("LEAVE");
+       return MEDIA_VISION_ERROR_NONE;
+}
index 34f83a6..74c6ae5 100644 (file)
@@ -25,6 +25,8 @@
 #include <queue>
 #include <algorithm>
 
+using namespace std;
+
 namespace mediavision
 {
 namespace inference
@@ -87,13 +89,23 @@ int Metadata::Parse()
                goto _ERROR_;
        }
 
-       ret = mInputMeta.Parse(json_object_get_object_member(object, "inputmetadata"));
+       if (!json_object_has_member(object, "input")) {
+               LOGE("No input node.");
+               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+       }
+
+       ret = mInputMeta.Parse(object, "input");
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to parse input Meta[%d]", ret);
                goto _ERROR_;
        }
 
-       ret = mOutputMeta.Parse(json_object_get_object_member(object, "outputmetadata"));
+       if (!json_object_has_member(object, "output")) {
+               LOGE("No output node.");
+               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+       }
+
+       ret = mOutputMeta.Parse(object, "output");
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to parse output meta[%d]", ret);
                goto _ERROR_;
diff --git a/mv_machine_learning/inference/src/NumberInfo.cpp b/mv_machine_learning/inference/src/NumberInfo.cpp
new file mode 100644 (file)
index 0000000..cdc3fcb
--- /dev/null
@@ -0,0 +1,48 @@
+
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <mv_private.h>
+#include "NumberInfo.h"
+
+using namespace mediavision::inference::number;
+
+int NumberInfo::Parse(JsonObject *root)
+{
+       LOGI("ENTER");
+
+       if (!json_object_has_member(root, "number")) {
+               LOGE("No number outputmetadata");
+               LOGI("LEAVE");
+               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+       }
+
+       // box
+       JsonNode *node = json_object_get_member(root, "number");
+       JsonObject *object = json_node_get_object(node);
+
+       JsonArray *array = json_object_get_array_member(object, "index");
+       unsigned int elements2 = json_array_get_length(array);
+
+       LOGI("range dim: size[%u]", elements2);
+
+       for (unsigned int elem2 = 0; elem2 < elements2; ++elem2)
+               if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+                       dimInfo.SetValidIndex(elem2);
+
+       LOGI("LEAVE");
+       return MEDIA_VISION_ERROR_NONE;
+}
index 82939f2..15166d3 100644 (file)
@@ -28,14 +28,14 @@ namespace inference
 int ObjectDecoder::init()
 {
        if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-               if (!mTensorBuffer.exist(mMeta.GetBoxLabelName()) || !mTensorBuffer.exist(mMeta.GetBoxNumberName())) {
-                       LOGE("buffer buffers named of %s or %s are NULL", mMeta.GetBoxLabelName().c_str(),
-                                mMeta.GetBoxNumberName().c_str());
+               if (!mTensorBuffer.exist(mMeta.GetLabelName()) || !mTensorBuffer.exist(mMeta.GetNumberName())) {
+                       LOGE("buffer buffers named of %s or %s are NULL", mMeta.GetLabelName().c_str(),
+                                mMeta.GetNumberName().c_str());
 
                        return MEDIA_VISION_ERROR_INVALID_OPERATION;
                }
 
-               std::vector<int> indexes = mMeta.GetBoxNumberDimInfo().GetValidIndexAll();
+               std::vector<int> indexes = mMeta.GetNumberDimInfo().GetValidIndexAll();
                if (indexes.size() != 1) {
                        LOGE("Invalid dim size. It should be 1");
                        return MEDIA_VISION_ERROR_INVALID_OPERATION;
@@ -43,7 +43,7 @@ int ObjectDecoder::init()
 
                // mNumberOfObjects is set again if INFERENCE_BOX_DECODING_TYPE_BYPASS.
                // Otherwise it is set already within ctor.
-               mNumberOfOjects = mTensorBuffer.getValue<int>(mMeta.GetBoxNumberName(), indexes[0]);
+               mNumberOfOjects = mTensorBuffer.getValue<int>(mMeta.GetNumberName(), indexes[0]);
        } else if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
                if (mMeta.GetBoxDecodeInfo().IsAnchorBoxEmpty()) {
                        LOGE("Anchor boxes are required but empty.");
@@ -99,8 +99,8 @@ Box ObjectDecoder::decodeBox(int idx, float score, int label, int offset)
                cHeight /= mScaleH;
        }
 
-       Box box = { .index = mMeta.GetBoxLabelName().empty() ? label :
-                                                                                                                  mTensorBuffer.getValue<int>(mMeta.GetBoxLabelName(), idx),
+       Box box = { .index = mMeta.GetLabelName().empty() ? label :
+                                                                                                                  mTensorBuffer.getValue<int>(mMeta.GetLabelName(), idx),
                                .score = score,
                                .location = cv::Rect2f(cx, cy, cWidth, cHeight) };
 
index 7ac595f..4508856 100644 (file)
@@ -31,27 +31,41 @@ namespace mediavision
 {
 namespace inference
 {
-int OutputMetadata::ParseScore(JsonObject *root)
+int OutputMetadata::ParseScore(JsonObject *root, LayerInfo& layer)
 {
        if (!json_object_has_member(root, "score")) {
                LOGI("No score outputmetadata");
                return MEDIA_VISION_ERROR_NONE;
        }
 
-       return score.ParseScore(root);
+       int ret = score.ParseScore(root);
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               LOGE("Fail to parse score.");
+               return ret;
+       }
+
+       layer.decodingType |= POSTPROCESS_DECODING_TYPE_SCORE;
+       return ret;
 }
 
-int OutputMetadata::ParseBox(JsonObject *root)
+int OutputMetadata::ParseBox(JsonObject *root, LayerInfo& layer)
 {
        if (!json_object_has_member(root, "box")) {
                LOGI("No box outputmetadata");
                return MEDIA_VISION_ERROR_NONE;
        }
 
-       return box.ParseBox(root);
+       int ret = box.ParseBox(root);
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               LOGE("Fail to parse box.");
+               return ret;
+       }
+
+       layer.decodingType |= POSTPROCESS_DECODING_TYPE_BOX;
+       return ret;
 }
 
-int OutputMetadata::ParseLandmark(JsonObject *root)
+int OutputMetadata::ParseLandmark(JsonObject *root, LayerInfo& layer)
 {
        LOGI("ENTER");
 
@@ -62,12 +76,13 @@ int OutputMetadata::ParseLandmark(JsonObject *root)
        }
 
        landmark.ParseLandmark(root);
+       layer.decodingType |= POSTPROCESS_DECODING_TYPE_LANDMARK;
 
        LOGI("LEAVE");
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int OutputMetadata::ParseOffset(JsonObject *root)
+int OutputMetadata::ParseOffset(JsonObject *root, LayerInfo& layer)
 {
        LOGI("ENTER");
 
@@ -78,49 +93,37 @@ int OutputMetadata::ParseOffset(JsonObject *root)
        }
 
        offsetVec.ParseOffset(root, mSupportedShapeType);
+       layer.decodingType |= POSTPROCESS_DECODING_TYPE_OFFSETVEC;
 
        LOGI("LEAVE");
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int OutputMetadata::Parse(JsonObject *root)
+int OutputMetadata::GetPostProcess(JsonObject *root, LayerInfo& layer)
 {
        LOGI("ENTER");
 
-       int ret = ParseScore(root);
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to GetScore[%d]", ret);
-               return ret;
-       }
+       JsonNode *node = json_object_get_member(root, "postprocess");
+       JsonObject *object = json_node_get_object(node);
 
-       ret = ParseBox(root);
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to GetBox[%d]", ret);
-               return ret;
+       if (json_object_has_member(object, "score")) {
+               int ret = ParseScore(object, layer);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to GetScore[%d]", ret);
+                       return ret;
+               }
        }
 
-       if (!box.GetName().empty()) {
-               // In case of object detection 3d for single object, other property isn't needed.
-               if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SINGLE_3D) {
-                       parsed = true;
-                       return MEDIA_VISION_ERROR_NONE;
+       if (json_object_has_member(object, "box")) {
+               int ret = ParseBox(object, layer);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to GetBox[%d]", ret);
+                       return ret;
                }
 
                // addtional parsing is required according to decoding type
-               if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-                       ret = box.ParseLabel(root);
-                       if (ret != MEDIA_VISION_ERROR_NONE) {
-                               LOGE("Fail to GetLabel[%d]", ret);
-                               return ret;
-                       }
-
-                       ret = box.ParseNumber(root);
-                       if (ret != MEDIA_VISION_ERROR_NONE) {
-                               LOGE("Fail to GetNumber[%d]", ret);
-                               return ret;
-                       }
-               } else {
-                       ret = box.ParseDecodeInfo(root);
+               if (box.GetDecodingType() != INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+                       int ret = box.ParseDecodeInfo(object);
                        if (ret != MEDIA_VISION_ERROR_NONE) {
                                LOGE("Fail to GetBoxDecodeInfo[%d]", ret);
                                return ret;
@@ -134,43 +137,133 @@ int OutputMetadata::Parse(JsonObject *root)
                                }
                        }
                }
+
        }
 
-       ret = ParseLandmark(root);
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to GetLandmark[%d]", ret);
-               return ret;
+       if (json_object_has_member(object, "label")) {
+               int ret = label.Parse(object);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to GetLabel[%d]", ret);
+                       return ret;
+               }
+
+               layer.decodingType |= POSTPROCESS_DECODING_TYPE_LABEL;
+       }
+
+       if (json_object_has_member(object, "number")) {
+               int ret = number.Parse(object);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to GetNumber[%d]", ret);
+                       return ret;
+               }
+
+               layer.decodingType |= POSTPROCESS_DECODING_TYPE_NUMBER;
        }
 
-       if (!landmark.GetName().empty()) {
+       if (json_object_has_member(object, "offset")) {
+               int ret = ParseOffset(object, layer);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to GetOffsetVector[%d]", ret);
+                       return ret;
+               }
+       }
+
+       if (json_object_has_member(object, "landmark")) {
+               int ret = ParseLandmark(object, layer);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to GetLandmark[%d]", ret);
+                       return ret;
+               }
+
                if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
                        landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
-                       ret = landmark.ParseDecodeInfo(root, mSupportedShapeType);
+                       ret = landmark.ParseDecodeInfo(object, mSupportedShapeType);
                        if (ret != MEDIA_VISION_ERROR_NONE) {
                                LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret);
                                return ret;
                        }
                }
+       }
 
-               if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
-                       ret = ParseOffset(root);
-                       if (ret != MEDIA_VISION_ERROR_NONE) {
-                               LOGE("Fail to GetOffsetVector[%d]", ret);
-                               return ret;
-                       }
+       if (json_object_has_member(object, "displacement")) {
+               int ret = landmark.ParseDisplacement(object, mSupportedShapeType);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to GetDispVector[%d]", ret);
+                       return ret;
+               }
+
+               layer.decodingType |= POSTPROCESS_DECODING_TYPE_DISPLACEMENT;
 
-                       ret = landmark.ParseDisplacement(root, mSupportedShapeType);
+               // edgemap node is needed by posenet model(multi pose model) which has "displayment" node and
+               // decoding type of the multi pose model is INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE.
+               if (json_object_has_member(object, "edgemap")) {
+                       ret = landmark.ParseEdgeMap(object);
                        if (ret != MEDIA_VISION_ERROR_NONE) {
-                               LOGE("Fail to GetDispVector[%d]", ret);
+                               LOGE("Fail to GetEdgeConnection[%d]", ret);
                                return ret;
                        }
+               }
+       }
+
+       parsed = true;
+
+       LOGI("LEAVE");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int OutputMetadata::Parse(JsonObject *root, std::string key_name)
+{
+       LOGI("ENTER");
 
-                       ret = landmark.ParseEdgeMap(root);
+       JsonArray *outputList = json_object_get_array_member(root, key_name.c_str());
+
+       LOGI("output tensor count = %d", json_array_get_length(outputList));
+
+       for (auto idx = 0; idx < json_array_get_length(outputList); ++idx) {
+               JsonNode *output_node = json_array_get_element(outputList, idx);
+           std::string token(json_to_string(output_node, 1));
+               int pos = token.find(":");
+               std::string tensor_name = token.substr(0, pos);
+               const std::vector<char> delimiters = {'{', ' ', ':', '\n', '\"'};
+
+               for (auto& delimiter : delimiters)
+                       tensor_name.erase(std::remove(tensor_name.begin(), tensor_name.end(), delimiter), tensor_name.end());
+
+               if (tensor_name.compare((std::string("tensor") + std::to_string(idx + 1))) != 0) {
+                       LOGE("Invalid tensor element. A tensor element form should be `tensorN`.");
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
+               LOGI("Parse tensor name : %s", tensor_name.c_str());
+
+               JsonObject *output_object = json_node_get_object(output_node);
+
+               if (!json_object_has_member(output_object, tensor_name.c_str())) {
+                       LOGE("No tensor member.");
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
+               JsonNode *tensor_node = json_object_get_member(output_object, tensor_name.c_str());
+               JsonObject *tensor_object = json_node_get_object(tensor_node);
+               std::string name_value = json_object_get_string_member(tensor_object, "name");
+
+               LOGI("name = %s", name_value.c_str());
+
+               // TODO. add tensor information here.
+
+               LayerInfo layer = { name_value, };
+
+               if (json_object_has_member(tensor_object, "postprocess")) {
+                       int ret = GetPostProcess(tensor_object, layer);
                        if (ret != MEDIA_VISION_ERROR_NONE) {
-                               LOGE("Fail to GetEdgeConnection[%d]", ret);
+                               LOGE("Fail to GetPostProcess.");
                                return ret;
                        }
                }
+
+               _tensor_info.insert(std::make_pair(name_value, layer));
+
        }
 
        parsed = true;
index 99ad700..5a0a932 100644 (file)
@@ -461,12 +461,11 @@ int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, inference_displaceme
 
        int idxX = idxY + static_cast<int>(mMeta.GetLandmarkEdges().size());
 
-       for (auto &dispVec : mMeta.GetLandmarkDispVecAll()) {
-               if (dispVec.GetType() == type) { // 0: forward
-                       LOGI("%s", dispVec.GetName().c_str());
-                       vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
-                       vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
-               }
+       DispVec& dispVec = mMeta.GetLandmarkDispVec();
+       if (dispVec.GetType() == type) { // 0: forward
+               LOGI("%s", dispVec.GetName().c_str());
+               vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
+               vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
        }
 
        LOGI("LEAVE");
index 0560218..bcfcf7f 100644 (file)
@@ -27,17 +27,7 @@ int ScoreInfo::ParseScore(JsonObject *root)
        }
        JsonObject *pObject = json_object_get_object_member(root, "score");
 
-       JsonArray *array = json_object_get_array_member(pObject, "name");
-       MEDIA_VISION_NULL_ARG_CHECK(array);
-
-       unsigned int elements1 = json_array_get_length(array);
-       MEDIA_VISION_CHECK_CONDITION(elements1 > 0, MEDIA_VISION_ERROR_INVALID_PARAMETER, "No name on meta file");
-
-       for (unsigned int elem1 = 0; elem1 < elements1; ++elem1) {
-               names.push_back(json_array_get_string_element(array, elem1));
-       }
-
-       array = json_object_get_array_member(pObject, "index");
+       JsonArray *array = json_object_get_array_member(pObject, "index");
        unsigned int elements2 = json_array_get_length(array);
        LOGI("range dim: size[%u]", elements2);
        for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
@@ -60,8 +50,7 @@ int ScoreInfo::ParseScore(JsonObject *root)
        }
 
        if (json_object_has_member(pObject, "dequantization")) {
-               array = json_object_get_array_member(pObject, "dequantization");
-               JsonNode *node = json_array_get_element(array, 0);
+               JsonNode *node = json_object_get_member(pObject, "dequantization");
                JsonObject *object = json_node_get_object(node);
 
                deQuantization = std::make_shared<DeQuantization>(json_object_get_double_member(object, "scale"),