Change BoxInfo's box_coordinate, decoding_type and NMSParam's mode to string 89/262889/2
authorTae-Young Chung <ty83.chung@samsung.com>
Mon, 23 Aug 2021 06:38:13 +0000 (15:38 +0900)
committerTae-Young Chung <ty83.chung@samsung.com>
Mon, 23 Aug 2021 07:13:40 +0000 (16:13 +0900)
A user can use the string while understanding purpose of
box_coordinate, decoding_type, and mode metadata.
The string values are parsed and converted to enumeration type
inference_box_coordinate_type_e, inference_box_decoding_type_e,
and inference_box_nms_type_e, respectively.

Change-Id: I4419a288f070456c9f8d411feed7706cd0f806f0
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
meta-template/fd_blazeface_front_128x128.json
meta-template/fd_mobilenet_v1_ssd_postop_300x300.json
meta-template/od_mobilenet_v1_ssd_postop_300x300.json
meta-template/od_mobilenet_v2_ssd_320x320.json
mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
mv_machine_learning/mv_inference/inference/src/Inference.cpp
mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp
mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
packaging/capi-media-vision.spec

index 2cf11b7..545e101 100644 (file)
@@ -38,8 +38,8 @@
                 "index" : [-1, -1, 1],
                 "box_type" : "ORIGIN_CENTER",
                 "box_order" : [1, 0, 3, 2],
-                "box_coordinate" : 1,
-                "decoding_type" : 1,
+                "box_coordinate" : "PIXEL",
+                "decoding_type" : "SSD_ANCHOR",
                 "decoding_info" :
                 {
                     "anchor" :
@@ -65,8 +65,8 @@
                     },
                     "nms" :
                     {
-                        "mode": 0,
-                        "threshold": 0.25
+                        "mode": "STANDARD",
+                        "iou_threshold": 0.25
                     }
                 }
             }
index 734f5af..887071e 100644 (file)
@@ -38,8 +38,8 @@
                "index" : [-1, -1, 1],
                "box_type" : "ORIGIN_LEFTTOP",
                "box_order" : [1, 0, 3, 2],
-               "box_coordinate" : 0,
-               "decoding_type": 0
+               "box_coordinate" : "RATIO",
+               "decoding_type": "BYPASS"
             }
         ],
         "label" : [
index 734f5af..887071e 100644 (file)
@@ -38,8 +38,8 @@
                "index" : [-1, -1, 1],
                "box_type" : "ORIGIN_LEFTTOP",
                "box_order" : [1, 0, 3, 2],
-               "box_coordinate" : 0,
-               "decoding_type": 0
+               "box_coordinate" : "RATIO",
+               "decoding_type": "BYPASS"
             }
         ],
         "label" : [
index 0429a46..281e7fc 100644 (file)
@@ -38,8 +38,8 @@
                 "index" : [-1, -1, 1],
                 "box_type" : "ORIGIN_CENTER",
                 "box_order" : [1, 0, 3, 2],
-                "box_coordinate" : 0,
-                "decoding_type" : 1,
+                "box_coordinate" : "RATIO",
+                "decoding_type" : "SSD_ANCHOR",
                 "decoding_info" :
                 {
                     "anchor" :
@@ -65,8 +65,8 @@
                     },
                     "nms" :
                     {
-                        "mode": 0,
-                        "threshold": 0.4
+                        "mode": "STANDARD",
+                        "iou_threshold": 0.4
                     }
                 }
             }
index 1931425..07488ec 100644 (file)
@@ -118,10 +118,15 @@ namespace inference
 
                        class NMSParam {
                        public:
-                               int mode; /**< 0: IOU */
-                               float threshold;
+                               inference_box_nms_type_e mode; /**< 0: standard */
+                               float iouThreshold;
+
+                               std::map<std::string, inference_box_nms_type_e> supportedBoxNmsTypes;
+
+                               NMSParam() : mode(INFERENCE_BOX_NMS_TYPE_NONE), iouThreshold(0.2f) {
+                                       supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
+                               };
 
-                               NMSParam() : mode(-1), threshold(0.2f) {};
                                ~NMSParam() = default;
                        };
 
@@ -192,7 +197,7 @@ namespace inference
                        // Nms param
                        int ParseNms(JsonObject *root);
                        int GetNmsMode();
-                       float GetNmsThreshold();
+                       float GetNmsIouThreshold();
 
                        // Rotate param
                        int ParseRotate(JsonObject *root);
@@ -217,11 +222,13 @@ namespace inference
                DimInfo dimInfo;
                inference_box_type_e type; // 0:L-T-R-B, 1: Cx-Cy-W-H
                std::vector<int> order; // Order based on box type
-               int coordinate; // 0: ratio, 1: pixel
-               int decodingType; // 0: post-op, 1: achorbox(ssd), 2:yolo(?)
+               inference_box_coordinate_type_e coordinate; // 0: ratio, 1: pixel
+               inference_box_decoding_type_e decodingType; // 0: bypass , 1:ssd with anchor
                DecodeInfo decodingInfo;
 
                std::map<std::string, inference_box_type_e> supportedBoxTypes;
+               std::map<std::string, inference_box_coordinate_type_e> supportedBoxCoordinateTypes;
+               std::map<std::string, inference_box_decoding_type_e> supportedBoxDecodingTypes;
 
        public:
                BoxInfo();
@@ -230,7 +237,7 @@ namespace inference
                std::string GetName() { return name; }
                DimInfo GetDimInfo() { return dimInfo; }
                inference_box_type_e GetType() { return type; }
-               int GetDecodingType() { return decodingType; }
+               inference_box_decoding_type_e GetDecodingType() { return decodingType; }
                std::vector<int> GetOrder() { return order; }
                int GetCoordinate() { return coordinate; }
                DecodeInfo& GetDecodeInfo() {return decodingInfo; }
index f82dbbc..523d0cb 100644 (file)
@@ -37,6 +37,21 @@ namespace inference
                INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP,
                INFERENCE_BOX_TYPE_ORIGIN_CENTER
        } inference_box_type_e;
+
+       typedef enum {
+               INFERENCE_BOX_COORDINATE_TYPE_RATIO,
+               INFERENCE_BOX_COORDINATE_TYPE_PIXEL
+       } inference_box_coordinate_type_e;
+
+       typedef enum {
+               INFERENCE_BOX_DECODING_TYPE_BYPASS,
+               INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR,
+       } inference_box_decoding_type_e;
+
+       typedef enum {
+               INFERENCE_BOX_NMS_TYPE_NONE = -1,
+               INFERENCE_BOX_NMS_TYPE_STANDARD
+       } inference_box_nms_type_e;
 }
 }
 
index b0380c6..e58a675 100755 (executable)
@@ -1305,7 +1305,7 @@ namespace inference
 
                        int boxOffset = 0;
                        int numberOfObjects = 0;
-                       if (boxInfo.GetDecodingType() == 0) {
+                       if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                                std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
                                if (boxIndexes.size() != 1) {
                                        LOGE("Invalid dim size. It should be 1");
@@ -1471,7 +1471,7 @@ namespace inference
 
                        int boxOffset = 0;
                        int numberOfFaces = 0;
-                       if (boxInfo.GetDecodingType() == 0) {
+                       if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                                std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
                                if (boxIndexes.size() != 1) {
                                        LOGE("Invalid dim size. It should be 1");
index a1b006b..2831edc 100755 (executable)
@@ -27,7 +27,7 @@ namespace inference
 {
        int ObjectDecoder::init()
        {
-               if (mBoxInfo.GetDecodingType() == 0) {
+               if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                        if (!mTensorBuffer.exist(mMeta.GetLabel().GetName()) ||
                                !mTensorBuffer.exist(mMeta.GetNumber().GetName()) ) {
                                LOGE("buffer buffers named of %s or %s are NULL",
@@ -42,8 +42,8 @@ namespace inference
                                return MEDIA_VISION_ERROR_INVALID_OPERATION;
                        }
 
-                       // When GetDecodingType() == 0, the mNumberOfObjects should be set again.
-                       // otherwise, it is set already within ctor.
+                       // mNumberOfObjects is set again if INFERENCE_BOX_DECODING_TYPE_BYPASS.
+                       // Otherwise it is set already within ctor.
                        mNumberOfOjects = mTensorBuffer.getValue<int>(
                                                                mMeta.GetNumber().GetName(), indexes[0]);
                } else {
@@ -92,8 +92,8 @@ namespace inference
                        cHeight = cHeight - tmpCy; // bottom - top
                }
 
-               // normalize if GetCoordinate() == 1 which is pixel coordinate (absolute)
-               if (mBoxInfo.GetCoordinate() == 1) {
+               // convert coordinate to RATIO if PIXEL
+               if (mBoxInfo.GetCoordinate() == INFERENCE_BOX_COORDINATE_TYPE_PIXEL) {
                        cx /= mScaleW;
                        cy /= mScaleH;
                        cWidth /= mScaleW;
@@ -147,7 +147,7 @@ namespace inference
                int ret = MEDIA_VISION_ERROR_NONE;
 
                for (int idx = 0; idx < mNumberOfOjects; ++idx) {
-                       if (mBoxInfo.GetDecodingType() == 0) {
+                       if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                                float score = decodeScore(idx);
                                if (score <= 0.0f)
                                        continue;
@@ -177,7 +177,7 @@ namespace inference
                        PostProcess postProc;
                        ret = postProc.Nms(boxList,
                                        mBoxInfo.GetDecodeInfo().GetNmsMode(),
-                                       mBoxInfo.GetDecodeInfo().GetNmsThreshold(),
+                                       mBoxInfo.GetDecodeInfo().GetNmsIouThreshold(),
                                        mResultBoxes);
                        if (ret != MEDIA_VISION_ERROR_NONE) {
                                LOGE("Fail to non-maximum suppression[%d]", ret);
index 77f841d..edd1596 100755 (executable)
@@ -124,13 +124,19 @@ namespace inference
                        dimInfo(),
                        type(INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP),
                        order(),
-                       coordinate(0),
-                       decodingType(0),
+                       coordinate(INFERENCE_BOX_COORDINATE_TYPE_RATIO),
+                       decodingType(INFERENCE_BOX_DECODING_TYPE_BYPASS),
                        decodingInfo()
 
        {
                supportedBoxTypes.insert({"ORIGIN_LEFTTOP", INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP});
                supportedBoxTypes.insert({"ORIGIN_CENTER", INFERENCE_BOX_TYPE_ORIGIN_CENTER});
+
+               supportedBoxCoordinateTypes.insert({"RATIO", INFERENCE_BOX_COORDINATE_TYPE_RATIO});
+               supportedBoxCoordinateTypes.insert({"PIXEL", INFERENCE_BOX_COORDINATE_TYPE_PIXEL});
+
+               supportedBoxDecodingTypes.insert({"BYPASS", INFERENCE_BOX_DECODING_TYPE_BYPASS});
+               supportedBoxDecodingTypes.insert({"SSD_ANCHOR", INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR});
        }
 
        int BoxInfo::ParseBox(JsonObject *root)
@@ -157,6 +163,8 @@ namespace inference
 
                        try {
                                type = OutputMetadata::GetSupportedType(pObject, "box_type", supportedBoxTypes);
+                               coordinate = OutputMetadata::GetSupportedType(pObject, "box_coordinate", supportedBoxCoordinateTypes);
+                               decodingType = OutputMetadata::GetSupportedType(pObject, "decoding_type", supportedBoxDecodingTypes);
                        } catch (const std::exception& e) {
                                LOGE("Invalid %s", e.what());
                        }
@@ -169,12 +177,6 @@ namespace inference
                                order.push_back(val);
                                LOGI("%d", val);
                        }
-
-                       coordinate = static_cast<int>(json_object_get_int_member(pObject, "box_coordinate"));
-                       LOGI("box coordinate: %d", coordinate);
-
-                       decodingType = static_cast<int>(json_object_get_int_member(pObject, "decoding_type"));
-                       LOGI("box decodeing type: %d", decodingType);
                }
 
                LOGI("LEAVE");
@@ -552,8 +554,13 @@ namespace inference
                }
 
                JsonObject *object = json_object_get_object_member(root, "nms");
-               this->nmsParam.mode = static_cast<int>(json_object_get_int_member(object, "mode"));
-               this->nmsParam.threshold = static_cast<float>(json_object_get_double_member(object,"threshold"));
+               try {
+                       this->nmsParam.mode = OutputMetadata::GetSupportedType(object, "mode", this->nmsParam.supportedBoxNmsTypes);
+               } catch (const std::exception& e) {
+                       LOGE("Invalid %s", e.what());
+               }
+
+               this->nmsParam.iouThreshold = static_cast<float>(json_object_get_double_member(object,"iou_threshold"));
 
                return MEDIA_VISION_ERROR_NONE;
        }
@@ -563,9 +570,9 @@ namespace inference
                return this->nmsParam.mode;
        }
 
-       float BoxInfo::DecodeInfo::GetNmsThreshold()
+       float BoxInfo::DecodeInfo::GetNmsIouThreshold()
        {
-               return this->nmsParam.threshold;
+               return this->nmsParam.iouThreshold;
        }
 
        int BoxInfo::DecodeInfo::ParseRotate(JsonObject *root)
@@ -1009,7 +1016,7 @@ namespace inference
 
                if (!box.GetName().empty()) {
                        // addtional parsing is required according to decoding type
-                       if (box.GetDecodingType() == 0) {
+                       if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
 
                                ret = ParseLabel(root);
                                if (ret != MEDIA_VISION_ERROR_NONE) {
@@ -1023,7 +1030,7 @@ namespace inference
                                        return ret;
                                }
 
-                       } else if (box.GetDecodingType() == 1) {
+                       } else if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
                                ret = ParseBoxDecodeInfo(root);
                                if (ret != MEDIA_VISION_ERROR_NONE) {
                                        LOGE("Fail to GetBoxDecodeInfo[%d]", ret);
index 1f05fba..be18794 100644 (file)
@@ -1,7 +1,7 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.8.9
-Release:     1
+Version:     0.8.10
+Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
 Source0:     %{name}-%{version}.tar.gz