From 4d6ec3881bfec8d16f68644c2029b575ba590865 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Thu, 2 Sep 2021 16:04:05 +0900 Subject: [PATCH] Change offset's shape_type and displacement's shape_type and type to string Change-Id: Ide16c57e44532d10e9633a1399c6d787991baf33 Signed-off-by: Tae-Young Chung --- meta-template/pld_cpm_192x192.json | 2 +- .../pld_mobilenet_v1_posenet_multi_257x257.json | 12 ++--- .../inference/include/OutputMetadata.h | 10 ++-- .../inference/include/OutputMetadataTypes.h | 5 ++ .../mv_inference/inference/include/PoseDecoder.h | 4 +- .../mv_inference/inference/src/OutputMetadata.cpp | 58 ++++++++++++++++++---- .../mv_inference/inference/src/PoseDecoder.cpp | 10 ++-- packaging/capi-media-vision.spec | 2 +- 8 files changed, 74 insertions(+), 29 deletions(-) diff --git a/meta-template/pld_cpm_192x192.json b/meta-template/pld_cpm_192x192.json index a0bb6e6..1b24378 100644 --- a/meta-template/pld_cpm_192x192.json +++ b/meta-template/pld_cpm_192x192.json @@ -43,7 +43,7 @@ { "heatmap" : { - "shape_type": 1 + "shape_type": "NHWC" } } } diff --git a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json index 2ef057a..a65951a 100644 --- a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json +++ b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json @@ -43,7 +43,7 @@ { "heatmap" : { - "shape_type" : 1, + "shape_type" : "NHWC", "nms_radius" : 50.0 } } @@ -53,7 +53,7 @@ { "name" : "MobilenetV1/offset_2/BiasAdd", "index" : [-1, 1, 1, 1], - "shape_type" : 1, + "shape_type" : "NHWC", "dim_type" : 2 } ], @@ -61,16 +61,16 @@ { "name" : "MobilenetV1/displacement_fwd_2/BiasAdd", "index" : [-1, 1, 1, 1], - "shape_type" : 1, + "shape_type" : "NHWC", "dim_type" : 2, - "type" : 0 + "type" : "FORWARD" }, { "name" : "MobilenetV1/displacement_bwd_2/BiasAdd", "index" : [-1, 1, 1, 1], - "shape_type" : 1, + "shape_type" : "NHWC", "dim_type" : 2, - "type" : 1 + "type" : "BACKWARD" } ], "edgemap" : [ diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h index fe917ad..97c8821 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h @@ -346,15 +346,16 @@ namespace inference private: std::string name; DimInfo dimInfo; - int type; + inference_displacement_type_e type; int shapeType; int dimType; + std::map supportedDispTypes; public: - DispVec() = default; + DispVec(); ~DispVec() = default; std::string GetName() { return name; } DimInfo GetDimInfo() { return dimInfo; } - int GetType() { return type; } + inference_displacement_type_e GetType() { return type; } int GetShapeType() { return shapeType; } int GetDimType() { return dimType; } @@ -397,12 +398,13 @@ namespace inference int ParseEdgeMap(JsonObject * root); public: + static std::map supportedTensorShapes; /** * @brief Creates an OutputMetadata class instance. * * @since_tizen 6.5 */ - OutputMetadata() : parsed(false) {}; + OutputMetadata(); /** * @brief Destroys an OutputMetadata class instance including diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h index 0a0aadc..440fa76 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h @@ -70,6 +70,11 @@ namespace inference INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP, INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE } inference_landmark_decoding_type_e; + + typedef enum { + INFERENCE_DISPLACEMENT_TYPE_FORWARD, + INFERENCE_DISPLACEMENT_TYPE_BACKWARD + } inference_displacement_type_e; } } diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h index 545c385..11289ac 100644 --- a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h +++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h @@ -56,10 +56,10 @@ namespace inference int getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal); int findPose(LandmarkPoint& root, std::vector& decodedLandmarks, float scaleW, float scaleH); - int traverseToNeighbor(int edgeId, int toId, int dir, + int traverseToNeighbor(int edgeId, int toId, inference_displacement_type_e dir, LandmarkPoint fromLandmark, LandmarkPoint& toLandmark, float scaleW, float scaleH); - int getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector); + int getEdgeVector(cv::Point index, int edgeId, inference_displacement_type_e dir, cv::Point2f& vector); int convertXYZtoX(int x, int y, int c); diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp index 968bea3..d04daff 100755 --- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -27,6 +27,22 @@ namespace mediavision { namespace inference { + std::map OutputMetadata::supportedTensorShapes = + {{"NCHW", INFERENCE_TENSOR_SHAPE_NCHW}, {"NHWC", INFERENCE_TENSOR_SHAPE_NHWC}}; + + OutputMetadata::OutputMetadata() : + parsed(false), + score(), + box(), + label(), + landmark(), + offsetVec(), + dispVecs(), + edgeMap() + { + + } + ScoreInfo::ScoreInfo() : name(), dimInfo(), @@ -846,10 +862,15 @@ namespace inference } JsonObject *object = json_object_get_object_member(cObject, "heatmap") ; - landmark.GetDecodingInfo().heatMap.shapeType = - static_cast(json_object_get_int_member(object, "shape_type")); + try { + landmark.GetDecodingInfo().heatMap.shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + std::vector heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll(); - if (landmark.GetDecodingInfo().heatMap.shapeType == 0) { + if (landmark.GetDecodingInfo().heatMap.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[0]; landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[1]; landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[2]; @@ -892,8 +913,12 @@ namespace inference dimInfo.SetValidIndex(elem2); } - shapeType = static_cast(json_object_get_int_member(pObject, "shape_type")); - LOGI("shape type: %d", shapeType); + try { + shapeType = OutputMetadata::GetSupportedType(pObject, "shape_type", OutputMetadata::supportedTensorShapes); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } dimType = static_cast(json_object_get_int_member(pObject, "dim_type")); LOGI("dim type: %d", dimType); @@ -919,6 +944,17 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } + DispVec::DispVec() : + name(), + dimInfo(), + type(INFERENCE_DISPLACEMENT_TYPE_FORWARD), + shapeType(INFERENCE_TENSOR_SHAPE_NCHW), + dimType(2) + { + supportedDispTypes.insert({"FORWARD", INFERENCE_DISPLACEMENT_TYPE_FORWARD}); + supportedDispTypes.insert({"BACKWARD", INFERENCE_DISPLACEMENT_TYPE_BACKWARD}); + } + int DispVec::ParseDisplacement(JsonObject *root) { LOGI("ENTER"); @@ -934,15 +970,17 @@ namespace inference dimInfo.SetValidIndex(elem2); } - shapeType = static_cast(json_object_get_int_member(root, "shape_type")); - LOGI("shape type: %d", shapeType); + try { + shapeType = OutputMetadata::GetSupportedType(root, "shape_type", OutputMetadata::supportedTensorShapes); + type = OutputMetadata::GetSupportedType(root, "type", supportedDispTypes); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } dimType = static_cast(json_object_get_int_member(root, "dim_type")); LOGI("dim type: %d", dimType); - type = static_cast(json_object_get_int_member(root, "type")); - LOGI("type: %d", type); - LOGI("LEAVE"); return MEDIA_VISION_ERROR_NONE; } diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index 9798dfc..1ae33a7 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -380,7 +380,7 @@ namespace inference if (decodedLandmarks[fromKeyId].valid == true && decodedLandmarks[toKeyId].valid == false) { LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId); - traverseToNeighbor(index, toKeyId, 1, + traverseToNeighbor(index, toKeyId, INFERENCE_DISPLACEMENT_TYPE_BACKWARD, decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId], scaleW, scaleH); LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId, @@ -400,7 +400,7 @@ namespace inference if (decodedLandmarks[fromKeyId].valid == true && decodedLandmarks[toKeyId].valid == false) { LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId); - traverseToNeighbor(index, toKeyId, 0, + traverseToNeighbor(index, toKeyId, INFERENCE_DISPLACEMENT_TYPE_FORWARD, decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId], scaleW, scaleH); } @@ -410,7 +410,7 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir, + int PoseDecoder::traverseToNeighbor(int edgeId, int toId, inference_displacement_type_e dir, LandmarkPoint fromLandmark, LandmarkPoint& toLandmark, float scaleW, float scaleH) { @@ -460,7 +460,7 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector) + int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, inference_displacement_type_e type, cv::Point2f& vector) { LOGI("ENTER"); @@ -472,7 +472,7 @@ namespace inference int idxX = idxY + static_cast(mMeta.GetEdge().GetEdgesAll().size()); for(auto& dispVec : mMeta.GetDispVecAll()){ - if (dispVec.GetType() == dir) { // 0: forward + if (dispVec.GetType() == type) { // 0: forward LOGI("%s", dispVec.GetName().c_str()); vector.x = mTensorBuffer.getValue(dispVec.GetName(), idxX); vector.y = mTensorBuffer.getValue(dispVec.GetName(), idxY); diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index a1ca707..cf01750 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.8.13 +Version: 0.8.14 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause -- 2.7.4