Change offset's shape_type and displacement's shape_type and type to string

author Tae-Young Chung <ty83.chung@samsung.com>

Thu, 2 Sep 2021 07:04:05 +0000 (16:04 +0900)

committer Tae-Young Chung <ty83.chung@samsung.com>

Thu, 2 Sep 2021 07:04:05 +0000 (16:04 +0900)
author Tae-Young Chung <ty83.chung@samsung.com>
Thu, 2 Sep 2021 07:04:05 +0000 (16:04 +0900)
committer Tae-Young Chung <ty83.chung@samsung.com>
Thu, 2 Sep 2021 07:04:05 +0000 (16:04 +0900)
diff --git a/meta-template/pld_cpm_192x192.json b/meta-template/pld_cpm_192x192.json

index a0bb6e6e67889d70360f0001542a484454aa3749..1b243784bbdff5651995d0e111fb2282c9b0b8c7 100644 (file)
--- a/meta-template/pld_cpm_192x192.json
+++ b/meta-template/pld_cpm_192x192.json
@@ -43,7 +43,7 @@
                  {
                      "heatmap" :
                       {
-                         "shape_type": 1
+                         "shape_type": "NHWC"
                       }
                  }
              }
diff --git a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json

index 2ef057abb26f64d57bf5ca60384afcb88fc71298..a65951a6bb01987ffc9912602c5895dcb2e07968 100644 (file)
--- a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
+++ b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
@@ -43,7 +43,7 @@
                  {
                      "heatmap" :
                       {
-                         "shape_type" : 1,
+                         "shape_type" : "NHWC",
                           "nms_radius" : 50.0
                       }
                  }
@@ -53,7 +53,7 @@
              {
                  "name" : "MobilenetV1/offset_2/BiasAdd",
                  "index" : [-1, 1, 1, 1],
-                "shape_type" : 1,
+                "shape_type" : "NHWC",
                  "dim_type" : 2
              }
          ],
@@ -61,16 +61,16 @@
              {
                  "name" : "MobilenetV1/displacement_fwd_2/BiasAdd",
                  "index" : [-1, 1, 1, 1],
-                "shape_type" : 1,
+                "shape_type" : "NHWC",
                  "dim_type" : 2,
-                "type" : 0
+                "type" : "FORWARD"
              },
              {
                  "name" : "MobilenetV1/displacement_bwd_2/BiasAdd",
                  "index" : [-1, 1, 1, 1],
-                "shape_type" : 1,
+                "shape_type" : "NHWC",
                  "dim_type" : 2,
-                "type" : 1
+                "type" : "BACKWARD"
              }
          ],
          "edgemap" : [
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h

index fe917ad4c96bd68c5f27cc4ac99ac68ad57dcbaa..97c8821e23659d6419592561db7a1dd7bd7931d9 100644 (file)
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
@@ -346,15 +346,16 @@ namespace inference
         private:
                 std::string name;
                 DimInfo dimInfo;
-               int type;
+               inference_displacement_type_e type;
                 int shapeType;
                 int dimType;
+               std::map<std::string, inference_displacement_type_e> supportedDispTypes;
         public:
-               DispVec() = default;
+               DispVec();
                 ~DispVec() = default;
                 std::string GetName() { return name; }
                 DimInfo GetDimInfo() { return dimInfo; }
-               int GetType() { return type; }
+               inference_displacement_type_e GetType() { return type; }
                 int GetShapeType() { return shapeType; }
                 int GetDimType() { return dimType; }
  
@@ -397,12 +398,13 @@ namespace inference
                 int ParseEdgeMap(JsonObject * root);
  
         public:
+               static std::map<std::string, inference_tensor_shape_type_e> supportedTensorShapes;
                 /**
                  * @brief   Creates an OutputMetadata class instance.
                  *
                  * @since_tizen 6.5
                  */
-               OutputMetadata() : parsed(false) {};
+               OutputMetadata();
  
                 /**
                  * @brief   Destroys an OutputMetadata class instance including
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h

index 0a0aadce5360cc7fd3c52eb36195d6d70de97c88..440fa76a83be751b402b46255ff2818ca42c2d21 100644 (file)
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
@@ -70,6 +70,11 @@ namespace inference
                 INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP,
                 INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE
         } inference_landmark_decoding_type_e;
+
+       typedef enum {
+               INFERENCE_DISPLACEMENT_TYPE_FORWARD,
+               INFERENCE_DISPLACEMENT_TYPE_BACKWARD
+       } inference_displacement_type_e;
  }
  }
  
diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h

index 545c385de8cd240274f616b64efd0921137a3db0..11289ac1d9a4ef4fad90031bc36c21098616371a 100644 (file)
--- a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
+++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
@@ -56,10 +56,10 @@ namespace inference
                 int getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal);
                 int findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
                                                         float scaleW, float scaleH);
-               int traverseToNeighbor(int edgeId, int toId, int dir,
+               int traverseToNeighbor(int edgeId, int toId, inference_displacement_type_e dir,
                                                                 LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
                                                                 float scaleW, float scaleH);
-               int getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector);
+               int getEdgeVector(cv::Point index, int edgeId, inference_displacement_type_e dir, cv::Point2f& vector);
  
                 int convertXYZtoX(int x, int y, int c);
  
diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp

index 968bea384cb74a234ad5c19f5d115ebb021ddc89..d04daff7344d2b0b6df594d5aba4ddd12f7ab5e4 100755 (executable)
--- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
@@ -27,6 +27,22 @@ namespace mediavision
  {
  namespace inference
  {
+       std::map<std::string, inference_tensor_shape_type_e> OutputMetadata::supportedTensorShapes =
+               {{"NCHW", INFERENCE_TENSOR_SHAPE_NCHW}, {"NHWC", INFERENCE_TENSOR_SHAPE_NHWC}};
+
+       OutputMetadata::OutputMetadata() :
+                       parsed(false),
+                       score(),
+                       box(),
+                       label(),
+                       landmark(),
+                       offsetVec(),
+                       dispVecs(),
+                       edgeMap()
+       {
+
+       }
+
         ScoreInfo::ScoreInfo() :
                         name(),
                         dimInfo(),
@@ -846,10 +862,15 @@ namespace inference
                         }
  
                         JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
-                       landmark.GetDecodingInfo().heatMap.shapeType =
-                                                       static_cast<inference_tensor_shape_type_e>(json_object_get_int_member(object, "shape_type"));
+                       try {
+                               landmark.GetDecodingInfo().heatMap.shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
+                       } catch (const std::exception& e) {
+                               LOGE("Invalid %s", e.what());
+                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+                       }
+
                         std::vector<int> heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll();
-                       if (landmark.GetDecodingInfo().heatMap.shapeType == 0) {
+                       if (landmark.GetDecodingInfo().heatMap.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
                                 landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[0];
                                 landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[1];
                                 landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[2];
@@ -892,8 +913,12 @@ namespace inference
                                         dimInfo.SetValidIndex(elem2);
                         }
  
-                       shapeType = static_cast<int>(json_object_get_int_member(pObject, "shape_type"));
-                       LOGI("shape type: %d", shapeType);
+                       try {
+                               shapeType = OutputMetadata::GetSupportedType(pObject, "shape_type", OutputMetadata::supportedTensorShapes);
+                       } catch (const std::exception& e) {
+                               LOGE("Invalid %s", e.what());
+                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+                       }
  
                         dimType = static_cast<int>(json_object_get_int_member(pObject, "dim_type"));
                         LOGI("dim type: %d", dimType);
@@ -919,6 +944,17 @@ namespace inference
                 return MEDIA_VISION_ERROR_NONE;
         }
  
+       DispVec::DispVec() :
+                       name(),
+                       dimInfo(),
+                       type(INFERENCE_DISPLACEMENT_TYPE_FORWARD),
+                       shapeType(INFERENCE_TENSOR_SHAPE_NCHW),
+                       dimType(2)
+       {
+               supportedDispTypes.insert({"FORWARD", INFERENCE_DISPLACEMENT_TYPE_FORWARD});
+               supportedDispTypes.insert({"BACKWARD", INFERENCE_DISPLACEMENT_TYPE_BACKWARD});
+       }
+
         int DispVec::ParseDisplacement(JsonObject *root)
         {
                 LOGI("ENTER");
@@ -934,15 +970,17 @@ namespace inference
                                 dimInfo.SetValidIndex(elem2);
                 }
  
-               shapeType = static_cast<int>(json_object_get_int_member(root, "shape_type"));
-               LOGI("shape type: %d", shapeType);
+               try {
+                       shapeType = OutputMetadata::GetSupportedType(root, "shape_type", OutputMetadata::supportedTensorShapes);
+                       type = OutputMetadata::GetSupportedType(root, "type", supportedDispTypes);
+               } catch (const std::exception& e) {
+                       LOGE("Invalid %s", e.what());
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
  
                 dimType = static_cast<int>(json_object_get_int_member(root, "dim_type"));
                 LOGI("dim type: %d", dimType);
  
-               type = static_cast<int>(json_object_get_int_member(root, "type"));
-               LOGI("type: %d", type);
-
                 LOGI("LEAVE");
                 return MEDIA_VISION_ERROR_NONE;
         }
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp

index 9798dfcfab3468059b9ab4531be73fb5cee4048a..1ae33a77a367b73d2f3e3a13050462fa17a392fc 100644 (file)
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -380,7 +380,7 @@ namespace inference
                         if (decodedLandmarks[fromKeyId].valid == true &&
                                 decodedLandmarks[toKeyId].valid == false) {
                                 LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId);
-                               traverseToNeighbor(index, toKeyId,  1,
+                               traverseToNeighbor(index, toKeyId,  INFERENCE_DISPLACEMENT_TYPE_BACKWARD,
                                                         decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
                                                         scaleW, scaleH);
                                 LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId,
@@ -400,7 +400,7 @@ namespace inference
                         if (decodedLandmarks[fromKeyId].valid == true &&
                                 decodedLandmarks[toKeyId].valid == false) {
                                 LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId);
-                               traverseToNeighbor(index, toKeyId,  0,
+                               traverseToNeighbor(index, toKeyId,  INFERENCE_DISPLACEMENT_TYPE_FORWARD,
                                                         decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
                                                         scaleW, scaleH);
                         }
@@ -410,7 +410,7 @@ namespace inference
                 return MEDIA_VISION_ERROR_NONE;
         }
  
-       int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir,
+       int PoseDecoder::traverseToNeighbor(int edgeId, int toId, inference_displacement_type_e dir,
                                                                 LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
                                                                 float scaleW, float scaleH)
         {
@@ -460,7 +460,7 @@ namespace inference
                 return MEDIA_VISION_ERROR_NONE;
         }
  
-       int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector)
+       int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, inference_displacement_type_e type, cv::Point2f& vector)
         {
                 LOGI("ENTER");
  
@@ -472,7 +472,7 @@ namespace inference
                 int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
  
                 for(auto& dispVec : mMeta.GetDispVecAll()){
-                       if (dispVec.GetType() == dir) { // 0: forward
+                       if (dispVec.GetType() == type) { // 0: forward
                                 LOGI("%s", dispVec.GetName().c_str());
                                 vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
                                 vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec

index a1ca7071019de29bf01a3bc382466bb638aecfec..cf01750fc78c60548debc63c9ba989cf3b75fe23 100644 (file)
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
  Name:        capi-media-vision
  Summary:     Media Vision library for Tizen Native API
-Version:     0.8.13
+Version:     0.8.14
  Release:     0
  Group:       Multimedia/Framework
  License:     Apache-2.0 and BSD-3-Clause
author	Tae-Young Chung <ty83.chung@samsung.com>
	Thu, 2 Sep 2021 07:04:05 +0000 (16:04 +0900)
committer	Tae-Young Chung <ty83.chung@samsung.com>
	Thu, 2 Sep 2021 07:04:05 +0000 (16:04 +0900)
meta-template/pld_cpm_192x192.json		patch \| blob \| history
meta-template/pld_mobilenet_v1_posenet_multi_257x257.json		patch \| blob \| history
mv_machine_learning/mv_inference/inference/include/OutputMetadata.h		patch \| blob \| history
mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h		patch \| blob \| history
mv_machine_learning/mv_inference/inference/include/PoseDecoder.h		patch \| blob \| history
mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp		patch \| blob \| history
mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp		patch \| blob \| history
packaging/capi-media-vision.spec		patch \| blob \| history