Change offset's shape_type and displacement's shape_type and type to string 35/263435/1
authorTae-Young Chung <ty83.chung@samsung.com>
Thu, 2 Sep 2021 07:04:05 +0000 (16:04 +0900)
committerTae-Young Chung <ty83.chung@samsung.com>
Thu, 2 Sep 2021 07:04:05 +0000 (16:04 +0900)
Change-Id: Ide16c57e44532d10e9633a1399c6d787991baf33
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
meta-template/pld_cpm_192x192.json
meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
packaging/capi-media-vision.spec

index a0bb6e6..1b24378 100644 (file)
@@ -43,7 +43,7 @@
                 {
                     "heatmap" :
                      {
-                         "shape_type": 1
+                         "shape_type": "NHWC"
                      }
                 }
             }
index 2ef057a..a65951a 100644 (file)
@@ -43,7 +43,7 @@
                 {
                     "heatmap" :
                      {
-                         "shape_type" : 1,
+                         "shape_type" : "NHWC",
                          "nms_radius" : 50.0
                      }
                 }
@@ -53,7 +53,7 @@
             {
                 "name" : "MobilenetV1/offset_2/BiasAdd",
                 "index" : [-1, 1, 1, 1],
-                "shape_type" : 1,
+                "shape_type" : "NHWC",
                 "dim_type" : 2
             }
         ],
             {
                 "name" : "MobilenetV1/displacement_fwd_2/BiasAdd",
                 "index" : [-1, 1, 1, 1],
-                "shape_type" : 1,
+                "shape_type" : "NHWC",
                 "dim_type" : 2,
-                "type" : 0
+                "type" : "FORWARD"
             },
             {
                 "name" : "MobilenetV1/displacement_bwd_2/BiasAdd",
                 "index" : [-1, 1, 1, 1],
-                "shape_type" : 1,
+                "shape_type" : "NHWC",
                 "dim_type" : 2,
-                "type" : 1
+                "type" : "BACKWARD"
             }
         ],
         "edgemap" : [
index fe917ad..97c8821 100644 (file)
@@ -346,15 +346,16 @@ namespace inference
        private:
                std::string name;
                DimInfo dimInfo;
-               int type;
+               inference_displacement_type_e type;
                int shapeType;
                int dimType;
+               std::map<std::string, inference_displacement_type_e> supportedDispTypes;
        public:
-               DispVec() = default;
+               DispVec();
                ~DispVec() = default;
                std::string GetName() { return name; }
                DimInfo GetDimInfo() { return dimInfo; }
-               int GetType() { return type; }
+               inference_displacement_type_e GetType() { return type; }
                int GetShapeType() { return shapeType; }
                int GetDimType() { return dimType; }
 
@@ -397,12 +398,13 @@ namespace inference
                int ParseEdgeMap(JsonObject * root);
 
        public:
+               static std::map<std::string, inference_tensor_shape_type_e> supportedTensorShapes;
                /**
                 * @brief   Creates an OutputMetadata class instance.
                 *
                 * @since_tizen 6.5
                 */
-               OutputMetadata() : parsed(false) {};
+               OutputMetadata();
 
                /**
                 * @brief   Destroys an OutputMetadata class instance including
index 0a0aadc..440fa76 100644 (file)
@@ -70,6 +70,11 @@ namespace inference
                INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP,
                INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE
        } inference_landmark_decoding_type_e;
+
+       typedef enum {
+               INFERENCE_DISPLACEMENT_TYPE_FORWARD,
+               INFERENCE_DISPLACEMENT_TYPE_BACKWARD
+       } inference_displacement_type_e;
 }
 }
 
index 545c385..11289ac 100644 (file)
@@ -56,10 +56,10 @@ namespace inference
                int getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal);
                int findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
                                                        float scaleW, float scaleH);
-               int traverseToNeighbor(int edgeId, int toId, int dir,
+               int traverseToNeighbor(int edgeId, int toId, inference_displacement_type_e dir,
                                                                LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
                                                                float scaleW, float scaleH);
-               int getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector);
+               int getEdgeVector(cv::Point index, int edgeId, inference_displacement_type_e dir, cv::Point2f& vector);
 
                int convertXYZtoX(int x, int y, int c);
 
index 968bea3..d04daff 100755 (executable)
@@ -27,6 +27,22 @@ namespace mediavision
 {
 namespace inference
 {
+       std::map<std::string, inference_tensor_shape_type_e> OutputMetadata::supportedTensorShapes =
+               {{"NCHW", INFERENCE_TENSOR_SHAPE_NCHW}, {"NHWC", INFERENCE_TENSOR_SHAPE_NHWC}};
+
+       OutputMetadata::OutputMetadata() :
+                       parsed(false),
+                       score(),
+                       box(),
+                       label(),
+                       landmark(),
+                       offsetVec(),
+                       dispVecs(),
+                       edgeMap()
+       {
+
+       }
+
        ScoreInfo::ScoreInfo() :
                        name(),
                        dimInfo(),
@@ -846,10 +862,15 @@ namespace inference
                        }
 
                        JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
-                       landmark.GetDecodingInfo().heatMap.shapeType =
-                                                       static_cast<inference_tensor_shape_type_e>(json_object_get_int_member(object, "shape_type"));
+                       try {
+                               landmark.GetDecodingInfo().heatMap.shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
+                       } catch (const std::exception& e) {
+                               LOGE("Invalid %s", e.what());
+                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+                       }
+
                        std::vector<int> heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll();
-                       if (landmark.GetDecodingInfo().heatMap.shapeType == 0) {
+                       if (landmark.GetDecodingInfo().heatMap.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
                                landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[0];
                                landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[1];
                                landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[2];
@@ -892,8 +913,12 @@ namespace inference
                                        dimInfo.SetValidIndex(elem2);
                        }
 
-                       shapeType = static_cast<int>(json_object_get_int_member(pObject, "shape_type"));
-                       LOGI("shape type: %d", shapeType);
+                       try {
+                               shapeType = OutputMetadata::GetSupportedType(pObject, "shape_type", OutputMetadata::supportedTensorShapes);
+                       } catch (const std::exception& e) {
+                               LOGE("Invalid %s", e.what());
+                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+                       }
 
                        dimType = static_cast<int>(json_object_get_int_member(pObject, "dim_type"));
                        LOGI("dim type: %d", dimType);
@@ -919,6 +944,17 @@ namespace inference
                return MEDIA_VISION_ERROR_NONE;
        }
 
+       DispVec::DispVec() :
+                       name(),
+                       dimInfo(),
+                       type(INFERENCE_DISPLACEMENT_TYPE_FORWARD),
+                       shapeType(INFERENCE_TENSOR_SHAPE_NCHW),
+                       dimType(2)
+       {
+               supportedDispTypes.insert({"FORWARD", INFERENCE_DISPLACEMENT_TYPE_FORWARD});
+               supportedDispTypes.insert({"BACKWARD", INFERENCE_DISPLACEMENT_TYPE_BACKWARD});
+       }
+
        int DispVec::ParseDisplacement(JsonObject *root)
        {
                LOGI("ENTER");
@@ -934,15 +970,17 @@ namespace inference
                                dimInfo.SetValidIndex(elem2);
                }
 
-               shapeType = static_cast<int>(json_object_get_int_member(root, "shape_type"));
-               LOGI("shape type: %d", shapeType);
+               try {
+                       shapeType = OutputMetadata::GetSupportedType(root, "shape_type", OutputMetadata::supportedTensorShapes);
+                       type = OutputMetadata::GetSupportedType(root, "type", supportedDispTypes);
+               } catch (const std::exception& e) {
+                       LOGE("Invalid %s", e.what());
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
 
                dimType = static_cast<int>(json_object_get_int_member(root, "dim_type"));
                LOGI("dim type: %d", dimType);
 
-               type = static_cast<int>(json_object_get_int_member(root, "type"));
-               LOGI("type: %d", type);
-
                LOGI("LEAVE");
                return MEDIA_VISION_ERROR_NONE;
        }
index 9798dfc..1ae33a7 100644 (file)
@@ -380,7 +380,7 @@ namespace inference
                        if (decodedLandmarks[fromKeyId].valid == true &&
                                decodedLandmarks[toKeyId].valid == false) {
                                LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId);
-                               traverseToNeighbor(index, toKeyId,  1,
+                               traverseToNeighbor(index, toKeyId,  INFERENCE_DISPLACEMENT_TYPE_BACKWARD,
                                                        decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
                                                        scaleW, scaleH);
                                LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId,
@@ -400,7 +400,7 @@ namespace inference
                        if (decodedLandmarks[fromKeyId].valid == true &&
                                decodedLandmarks[toKeyId].valid == false) {
                                LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId);
-                               traverseToNeighbor(index, toKeyId,  0,
+                               traverseToNeighbor(index, toKeyId,  INFERENCE_DISPLACEMENT_TYPE_FORWARD,
                                                        decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
                                                        scaleW, scaleH);
                        }
@@ -410,7 +410,7 @@ namespace inference
                return MEDIA_VISION_ERROR_NONE;
        }
 
-       int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir,
+       int PoseDecoder::traverseToNeighbor(int edgeId, int toId, inference_displacement_type_e dir,
                                                                LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
                                                                float scaleW, float scaleH)
        {
@@ -460,7 +460,7 @@ namespace inference
                return MEDIA_VISION_ERROR_NONE;
        }
 
-       int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector)
+       int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, inference_displacement_type_e type, cv::Point2f& vector)
        {
                LOGI("ENTER");
 
@@ -472,7 +472,7 @@ namespace inference
                int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
 
                for(auto& dispVec : mMeta.GetDispVecAll()){
-                       if (dispVec.GetType() == dir) { // 0: forward
+                       if (dispVec.GetType() == type) { // 0: forward
                                LOGI("%s", dispVec.GetName().c_str());
                                vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
                                vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
index a1ca707..cf01750 100644 (file)
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.8.13
+Version:     0.8.14
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause