From ab38ff259fcd0dfa44f0711f0e18cb1183d16da4 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Tue, 18 May 2021 19:18:35 +0900 Subject: [PATCH] Add new Landmark, OffsetVec, DispVec, Edge outputmetadata for pose landmark detection Change-Id: I7a2799005ebb887cf273c672132b3d9eb5a7ce40 Signed-off-by: Tae-Young Chung --- .../inference/include/OutputMetadata.h | 160 +++++++- .../mv_inference/inference/src/OutputMetadata.cpp | 440 ++++++++++++++++++++- 2 files changed, 598 insertions(+), 2 deletions(-) diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h index f311ee4..45c6311 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -121,10 +122,50 @@ namespace inference ~NMSParam() = default; }; + class RotateParam { + public: + int startPointIndex; + int endPointIndex; + cv::Point2f startPoint; + cv::Point2f endPoint; + float baseAngle; + + RotateParam() : startPointIndex(-1), + endPointIndex(-1), + startPoint(cv::Point2f(0.f,0.f)), + endPoint(cv::Point2f(0.f,0.f)), + baseAngle(0.f){}; + ~RotateParam() = default; + }; + + class RoiOptionParam { + public: + int startPointIndex; + int endPointIndex; + int centerPointIndex; + cv::Point2f centerPoint; + float shiftX; + float shiftY; + float scaleX; + float scaleY; + int mode; + + RoiOptionParam() : startPointIndex(-1), + endPointIndex(-1), + centerPointIndex(-1), + centerPoint(cv::Point2f(0.f, 0.f)), + shiftX(0.f), shiftY(0.f), + scaleX(1.f), scaleY(1.f), + mode(-1) {}; + ~RoiOptionParam() = default; + }; + private: AnchorParam anchorParam; std::vector anchorBoxes; NMSParam nmsParam; + RotateParam rotParam; + RoiOptionParam roiOptParam; public: DecodeInfo() = default; @@ -149,6 +190,23 @@ namespace inference int ParseNms(JsonObject *root); int GetNmsMode(); float GetNmsThreshold(); + + // Rotate param + int ParseRotate(JsonObject *root); + int GetRotStartPointIndex(); + int GetRotEndPointIndex(); + float GetBaseAngle(); + + // Roi option param + int ParseRoiOption(JsonObject *root); + int GetRoiMode(); + int GetRoiCenterPointIndex(); + int GetRoiStartPointIndex(); + int GetRoiEndPointIndex(); + float GetShiftX(); + float GetShiftY(); + float GetScaleX(); + float GetScaleY(); }; private: @@ -167,9 +225,9 @@ namespace inference std::string GetName() { return name; } DimInfo GetDimInfo() { return dimInfo; } int GetType() { return type; } + int GetDecodingType() { return decodingType; } std::vector GetOrder() { return order; } int GetCoordinate() { return coordinate; } - int GetDecodingType() { return decodingType; } DecodeInfo& GetDecodeInfo() {return decodingInfo; } int ParseBox(JsonObject *root); @@ -205,6 +263,93 @@ namespace inference int ParseNumber(JsonObject *root); }; + class Landmark + { + public: + class DecodeInfo { + public: + class HeatMapInfo { + public: + int wIdx; + int hIdx; + int cIdx; + inference_tensor_shape_type_e shapeType; + float nmsRadius; + }; + HeatMapInfo heatMap; + }; + private: + std::string name; + DimInfo dimInfo; + int type; /**< 0: 2d-single, 1: 2d-multi, 2: 3-single */ + int offset; + int decodingType; /**< 0: decoding unnecessary, + 1: decoding heatmap, + 2: decoding heatmap with additional refine data */ + DecodeInfo decodingInfo; + + public: + Landmark() = default; + ~Landmark() = default; + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + int GetType(); + int GetOffset(); + int GetDecodingType(); + DecodeInfo& GetDecodingInfo(); + + int ParseLandmark(JsonObject *root); + }; + + class OffsetVec + { + private: + std::string name; + DimInfo dimInfo; + int shapeType; + int dimType; + public: + OffsetVec() = default; + ~OffsetVec() = default; + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + int GetShapeType() { return shapeType; } + int GetDimType() { return dimType; } + + int ParseOffset(JsonObject *root); + }; + + class DispVec + { + private: + std::string name; + DimInfo dimInfo; + int type; + int shapeType; + int dimType; + public: + DispVec() = default; + ~DispVec() = default; + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + int GetType() { return type; } + int GetShapeType() { return shapeType; } + int GetDimType() { return dimType; } + + int ParseDisplacement(JsonObject *root); + }; + + class Edge + { + private: + std::vector> edges; + public: + Edge() = default; + ~Edge() = default; + int ParseEdge(JsonObject *root); + std::vector>& GetEdgesAll(); + }; + class OutputMetadata { private: @@ -213,12 +358,21 @@ namespace inference BoxInfo box; Label label; Number number; + Landmark landmark; + OffsetVec offsetVec; + std::vector dispVecs; + Edge edgeMap; int ParseScore(JsonObject *root); int ParseBox(JsonObject *root); int ParseLabel(JsonObject *root); int ParseNumber(JsonObject *root); int ParseBoxDecodeInfo(JsonObject *root); + int ParseLandmark(JsonObject *root); + int ParseLandmarkDecodeInfo(JsonObject *root); + int ParseOffset(JsonObject *root); + int ParseDisplacement(JsonObject *root); + int ParseEdgeMap(JsonObject * root); public: /** @@ -247,6 +401,10 @@ namespace inference BoxInfo& GetBox(); Label& GetLabel(); Number& GetNumber(); + Landmark& GetLandmark(); + OffsetVec& GetOffset(); + std::vector& GetDispVecAll(); + Edge& GetEdge(); }; } /* Inference */ diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp index cbfe6ad..6c396f5 100755 --- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -300,6 +300,18 @@ namespace inference LOGE("Fail to ParseNms[%d]", ret); return ret; } + + ret = box.GetDecodeInfo().ParseRotate(cObject); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to ParseRotate[%d]", ret); + return ret; + } + + ret = box.GetDecodeInfo().ParseRoiOption(cObject); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to ParseRoiOption[%d]", ret); + return ret; + } } LOGI("LEAVE"); @@ -485,7 +497,6 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - int BoxInfo::DecodeInfo::ParseNms(JsonObject *root) { if (json_object_has_member(root, "nms") == false) { @@ -510,6 +521,96 @@ namespace inference return this->nmsParam.threshold; } + int BoxInfo::DecodeInfo::ParseRotate(JsonObject *root) + { + if (json_object_has_member(root, "rotate") == false) { + LOGI("rotate is empty. skip it"); + return MEDIA_VISION_ERROR_NONE; + } + + JsonObject *object = json_object_get_object_member(root, "rotate"); + this->rotParam.baseAngle = static_cast(json_object_get_double_member(object, "base_angle")); + this->rotParam.startPointIndex = static_cast(json_object_get_int_member(object, "start_point_index")); + this->rotParam.endPointIndex = static_cast(json_object_get_int_member(object, "end_point_index")); + + return MEDIA_VISION_ERROR_NONE; + } + + int BoxInfo::DecodeInfo::GetRotStartPointIndex() + { + return this->rotParam.startPointIndex; + } + + int BoxInfo::DecodeInfo::GetRotEndPointIndex() + { + return this->rotParam.endPointIndex; + } + + float BoxInfo::DecodeInfo::GetBaseAngle() + { + return this->rotParam.baseAngle; + } + + int BoxInfo::DecodeInfo::GetRoiMode() + { + return this->roiOptParam.mode; + } + + int BoxInfo::DecodeInfo::GetRoiStartPointIndex() + { + return this->roiOptParam.startPointIndex; + } + + int BoxInfo::DecodeInfo::GetRoiEndPointIndex() + { + return this->roiOptParam.endPointIndex; + } + + int BoxInfo::DecodeInfo::GetRoiCenterPointIndex() + { + return this->roiOptParam.centerPointIndex; + } + + float BoxInfo::DecodeInfo::GetShiftX() + { + return this->roiOptParam.shiftX; + } + + float BoxInfo::DecodeInfo::GetShiftY() + { + return this->roiOptParam.shiftY; + } + + float BoxInfo::DecodeInfo::GetScaleX() + { + return this->roiOptParam.scaleX; + } + + float BoxInfo::DecodeInfo::GetScaleY() + { + return this->roiOptParam.scaleY; + } + + int BoxInfo::DecodeInfo::ParseRoiOption(JsonObject *root) + { + if (json_object_has_member(root, "roi") == false) { + LOGI("roi is empty. skip it"); + return MEDIA_VISION_ERROR_NONE; + } + + JsonObject *object = json_object_get_object_member(root, "roi"); + this->roiOptParam.startPointIndex = static_cast(json_object_get_int_member(object, "start_point_index")); + this->roiOptParam.endPointIndex = static_cast(json_object_get_int_member(object, "end_point_index")); + this->roiOptParam.centerPointIndex = static_cast(json_object_get_int_member(object, "center_point_index")); + this->roiOptParam.shiftX = static_cast(json_object_get_double_member(object, "shift_x")); + this->roiOptParam.shiftY = static_cast(json_object_get_double_member(object, "shift_y")); + this->roiOptParam.scaleX = static_cast(json_object_get_double_member(object, "scale_x")); + this->roiOptParam.scaleY = static_cast(json_object_get_double_member(object, "scale_y")); + this->roiOptParam.mode = static_cast(json_object_get_int_member(object, "scale_mode")); + + return MEDIA_VISION_ERROR_NONE; + } + ScoreInfo& OutputMetadata::GetScore() { return score; @@ -530,11 +631,311 @@ namespace inference return number; } + Landmark& OutputMetadata::GetLandmark() + { + return landmark; + } + + OffsetVec& OutputMetadata::GetOffset() + { + return offsetVec; + } + + std::vector& OutputMetadata::GetDispVecAll() + { + return dispVecs; + } + + Edge& OutputMetadata::GetEdge() + { + return edgeMap; + } + + std::vector>& Edge::GetEdgesAll() + { + return edges; + } + bool OutputMetadata::IsParsed() { return parsed; } + int Landmark::ParseLandmark(JsonObject *root) + { + // box + JsonArray * rootArray = json_object_get_array_member(root, "landmark"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + name = + static_cast(json_object_get_string_member(pObject,"name")); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + + type = static_cast(json_object_get_int_member(pObject, "landmark_type")); + LOGI("landmark type: %d", type); + + offset = static_cast(json_object_get_int_member(pObject, "landmark_offset")); + LOGI("landmark offset: %d", offset); + + decodingType = static_cast(json_object_get_int_member(pObject, "decoding_type")); + LOGI("landmark decodeing type: %d", decodingType); + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int Landmark::GetType() + { + return type; + } + + int Landmark::GetOffset() + { + return offset; + } + + int Landmark::GetDecodingType() + { + return decodingType; + } + + Landmark::DecodeInfo& Landmark::GetDecodingInfo() + { + return decodingInfo; + } + + int OutputMetadata::ParseLandmark(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "landmark") == false) { + LOGI("No landmark outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + landmark.ParseLandmark(root); + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int OutputMetadata::ParseLandmarkDecodeInfo(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "landmark") == false) { + LOGI("No landmark outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + // box + JsonArray * rootArray = json_object_get_array_member(root, "landmark"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + if (json_object_has_member(pObject, "decoding_info") == false) { + LOGE("decoding_info is mandatory. Invalid metadata"); + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info"); + if (json_object_has_member(cObject, "heatmap") == false) { + LOGE("heatmap is mandatory. Invalid metadata"); + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + JsonObject *object = json_object_get_object_member(cObject, "heatmap") ; + landmark.GetDecodingInfo().heatMap.shapeType = + static_cast(json_object_get_int_member(object, "shape_type")); + std::vector heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll(); + if (landmark.GetDecodingInfo().heatMap.shapeType == 0) { + landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[0]; + landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[1]; + landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[2]; + } else { + landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[0]; + landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[1]; + landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[2]; + } + + if (json_object_has_member(object, "nms_radius")) { + landmark.GetDecodingInfo().heatMap.nmsRadius = static_cast(json_object_get_double_member(object, "nms_radius")); + LOGI("nms is enabled with %3.f", landmark.GetDecodingInfo().heatMap.nmsRadius ); + } + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int OffsetVec::ParseOffset(JsonObject *root) + { + JsonArray * rootArray = json_object_get_array_member(root, "offset"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + name = + static_cast(json_object_get_string_member(pObject,"name")); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + + shapeType = static_cast(json_object_get_int_member(pObject, "shape_type")); + LOGI("shape type: %d", shapeType); + + dimType = static_cast(json_object_get_int_member(pObject, "dim_type")); + LOGI("dim type: %d", dimType); + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int OutputMetadata::ParseOffset(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "offset") == false) { + LOGI("No offset outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + offsetVec.ParseOffset(root); + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int DispVec::ParseDisplacement(JsonObject *root) + { + LOGI("ENTER"); + name = + static_cast(json_object_get_string_member(root,"name")); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(root, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if(static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + + shapeType = static_cast(json_object_get_int_member(root, "shape_type")); + LOGI("shape type: %d", shapeType); + + dimType = static_cast(json_object_get_int_member(root, "dim_type")); + LOGI("dim type: %d", dimType); + + type = static_cast(json_object_get_int_member(root, "type")); + LOGI("type: %d", type); + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int OutputMetadata::ParseDisplacement(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "displacement") == false) { + LOGI("No displacement outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + JsonArray * rootArray = json_object_get_array_member(root, "displacement"); + unsigned int elements = json_array_get_length(rootArray); + + dispVecs.resize(elements); + unsigned int elem = 0; + for (auto& disp : dispVecs) { + JsonNode *pNode = json_array_get_element(rootArray, elem++); + JsonObject *pObject = json_node_get_object(pNode); + disp.ParseDisplacement(pObject); + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int Edge::ParseEdge(JsonObject *root) + { + LOGI("ENTER"); + JsonArray * rootArray = json_object_get_array_member(root, "edgemap"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + int pEdgeNode, cEdgeNode; + for (unsigned int elem = 0; elem < elements; ++elem) { + + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + pEdgeNode = json_object_get_int_member(pObject, "parent"); + cEdgeNode = json_object_get_int_member(pObject, "child"); + + edges.push_back(std::make_pair(pEdgeNode, cEdgeNode)); + LOGI("%ud: parent - child: %d - %d", elem, pEdgeNode, cEdgeNode); + } + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int OutputMetadata::ParseEdgeMap(JsonObject * root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "edgemap") == false) { + LOGI("No edgemap outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + edgeMap.ParseEdge(root); + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + int OutputMetadata::Parse(JsonObject *root) { LOGI("ENTER"); @@ -585,6 +986,43 @@ namespace inference } } + ret = ParseLandmark(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetLandmark[%d]", ret); + return ret; + } + + if (!landmark.GetName().empty()) { + if (landmark.GetDecodingType() == 1 || + landmark.GetDecodingType() == 2) { + ret = ParseLandmarkDecodeInfo(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret); + return ret; + } + } + + if (landmark.GetDecodingType() == 2) {// landmark.decodingType == 2 + ret = ParseOffset(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetOffsetVector[%d]", ret); + return ret; + } + + ret = ParseDisplacement(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetDispVector[%d]", ret); + return ret; + } + + ret = ParseEdgeMap(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetEdgeConnection[%d]", ret); + return ret; + } + } + } + parsed = true; LOGI("LEAVE"); -- 2.7.4