{
"name" : "conv2d_20",
"index" : [-1, -1, -1, 1],
- "landmark_type" : 0,
- "landmark_coordinate" : 1,
- "decoding_type" : 0,
+ "landmark_type" : "2D_SINGLE",
+ "landmark_coordinate" : "PIXEL",
+ "decoding_type" : "BYPASS",
"landmark_offset" : 3
}
]
{
"name" : "fanet8ss_inference/fully_connected_1/Sigmoid",
"index" : [-1, 1],
- "landmark_type" : 0,
- "landmark_coordinate" : 0,
+ "landmark_type" : "2D_SINGLE",
+ "landmark_coordinate" : "RATIO",
"landmark_offset" : 2,
- "decoding_type" : 0
+ "decoding_type" : "BYPASS"
}
]
}
{
"name" : "Convolutional_Pose_Machine/stage_5_out",
"index" : [-1, 1, 1, 1],
- "landmark_type" : 0,
- "landmark_coordinate" : 1,
- "decoding_type" : 1,
+ "landmark_type" : "2D_SINGLE",
+ "landmark_coordinate" : "PIXEL",
+ "decoding_type" : "HEATMAP",
"decoding_info" :
{
"heatmap" :
{
"name" : "MobilenetV1/heatmap_2/BiasAdd",
"index" : [-1, 1, 1, 1],
- "landmark_type" : 1,
- "landmark_coordinate" : 1,
- "decoding_type" : 2,
+ "landmark_type" : "2D_MULTI",
+ "landmark_coordinate" : "PIXEL",
+ "decoding_type" : "HEATMAP_REFINE",
"decoding_info" :
{
"heatmap" :
int cIdx;
inference_tensor_shape_type_e shapeType;
float nmsRadius;
+ HeatMapInfo() = default;
+ ~HeatMapInfo() = default;
};
HeatMapInfo heatMap;
+ DecodeInfo() = default;
+ ~DecodeInfo() = default;
};
private:
std::string name;
DimInfo dimInfo;
- int type; /**< 0: 2d-single, 1: 2d-multi, 2: 3-single */
+ inference_landmark_type_e type; /**< 0: 2D_SINGLE, 1: 2D_MULTI, 2: 3D_SINGLE */
int offset;
- int coordinate; /**< 0: ratio, 1: pixel */
- int decodingType; /**< 0: decoding unnecessary,
- 1: decoding heatmap,
- 2: decoding heatmap with additional refine data */
+ inference_landmark_coorindate_type_e coordinate; /**< 0: RATIO, 1: PIXEL */
+ inference_landmark_decoding_type_e decodingType; /**< 0: decoding unnecessary,
+ 1: decoding heatmap,
+ 2: decoding heatmap with refinement */
DecodeInfo decodingInfo;
+ std::map<std::string, inference_landmark_type_e> supportedLandmarkTypes;
+ std::map<std::string, inference_landmark_coorindate_type_e> supportedLandmarkCoordinateTypes;
+ std::map<std::string, inference_landmark_decoding_type_e> supportedLandmarkDecodingTypes;
+
public:
- Landmark() = default;
+ Landmark();
~Landmark() = default;
std::string GetName() { return name; }
DimInfo GetDimInfo() { return dimInfo; }
- int GetType();
+ inference_landmark_type_e GetType();
int GetOffset();
- int GetCoordinate();
- int GetDecodingType();
+ inference_landmark_coorindate_type_e GetCoordinate();
+ inference_landmark_decoding_type_e GetDecodingType();
DecodeInfo& GetDecodingInfo();
int ParseLandmark(JsonObject *root);
INFERENCE_BOX_NMS_TYPE_NONE = -1,
INFERENCE_BOX_NMS_TYPE_STANDARD
} inference_box_nms_type_e;
+
+ // landmark
+ typedef enum {
+ INFERENCE_LANDMARK_TYPE_2D_SINGLE,
+ INFERENCE_LANDMARK_TYPE_2D_MULTI,
+ INFERENCE_LANDMARK_TYPE_3D_SINGLE
+ } inference_landmark_type_e;
+
+ typedef enum {
+ INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO,
+ INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL
+ } inference_landmark_coorindate_type_e;
+
+ typedef enum {
+ INFERENCE_LANDMARK_DECODING_TYPE_BYPASS,
+ INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP,
+ INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE
+ } inference_landmark_decoding_type_e;
}
}
int heatMapWidth = 0;
int heatMapHeight = 0;
int heatMapChannel = 0;
- if (landmarkInfo.GetDecodingType() != 0) {
+ if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
int number_of_landmarks = 0;
std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
- if (landmarkInfo.GetDecodingType() == 0) {
+ if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
/ landmarkInfo.GetOffset();
float inputW = 1.f;
float inputH = 1.f;
- if (landmarkInfo.GetCoordinate() == 1) {
+ if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
}
- float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+ float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
+ outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
poseDecoder.decode(inputW, inputH, thresRadius);
for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
int heatMapWidth = 0;
int heatMapHeight = 0;
int heatMapChannel = 0;
- if (landmarkInfo.GetDecodingType() != 0) {
+ if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
return MEDIA_VISION_ERROR_INTERNAL;
}
// 2d+single or 2d+multi or 3d+single or 3d+multi
- int defaultNumberOfPose = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 2) ? 1 : MAX_NUMBER_OF_POSE;
+ int defaultNumberOfPose = (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) ? 1 : MAX_NUMBER_OF_POSE;
std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
- // In case of DecodingType == 0,
+ // If INFERENCE_LANDMARK_DECODING_TYPE_BYPASS,
// the landmarkChannel is guessed from the shape of the landmark output tensor.
- // Otherwise, decoding heatmap, it is guessed from the heatMapChannel.
+ // Otherwise, it is guessed from the heatMapChannel.
int landmarkChannel = 0;
- if (landmarkInfo.GetDecodingType() == 0) {
+ if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
/ landmarkInfo.GetOffset();
} else {
float inputW = 1.f;
float inputH = 1.f;
- float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
- if (landmarkInfo.GetCoordinate() == 1) {
+ float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
+ outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+ if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
}
return parsed;
}
+ Landmark::Landmark() :
+ name(),
+ dimInfo(),
+ type(INFERENCE_LANDMARK_TYPE_2D_SINGLE),
+ offset(),
+ coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO),
+ decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS),
+ decodingInfo()
+
+ {
+ supportedLandmarkTypes.insert({"2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE});
+ supportedLandmarkTypes.insert({"2D_MULTI", INFERENCE_LANDMARK_TYPE_2D_MULTI});
+ supportedLandmarkTypes.insert({"3D_SINGLE", INFERENCE_LANDMARK_TYPE_3D_SINGLE});
+
+ supportedLandmarkCoordinateTypes.insert({"RATIO", INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO});
+ supportedLandmarkCoordinateTypes.insert({"PIXEL", INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL});
+
+ supportedLandmarkDecodingTypes.insert({"BYPASS", INFERENCE_LANDMARK_DECODING_TYPE_BYPASS});
+ supportedLandmarkDecodingTypes.insert({"HEATMAP", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP});
+ supportedLandmarkDecodingTypes.insert({"HEATMAP_REFINE", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE});
+ }
+
int Landmark::ParseLandmark(JsonObject *root)
{
// box
dimInfo.SetValidIndex(elem2);
}
- type = static_cast<int>(json_object_get_int_member(pObject, "landmark_type"));
- LOGI("landmark type: %d", type);
+ try {
+ type = OutputMetadata::GetSupportedType(pObject, "landmark_type", supportedLandmarkTypes);
+ coordinate = OutputMetadata::GetSupportedType(pObject, "landmark_coordinate", supportedLandmarkCoordinateTypes);
+ decodingType = OutputMetadata::GetSupportedType(pObject, "decoding_type", supportedLandmarkDecodingTypes);
+ } catch (const std::exception& e) {
+ LOGE("Invalid %s", e.what());
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
offset = static_cast<int>(json_object_get_int_member(pObject, "landmark_offset"));
LOGI("landmark offset: %d", offset);
-
- coordinate = static_cast<int>(json_object_get_int_member(pObject, "landmark_coordinate"));
- LOGI("landmark coordinate: %d", coordinate);
-
- decodingType = static_cast<int>(json_object_get_int_member(pObject, "decoding_type"));
- LOGI("landmark decodeing type: %d", decodingType);
}
LOGI("LEAVE");
return MEDIA_VISION_ERROR_NONE;
}
- int Landmark::GetType()
+ inference_landmark_type_e Landmark::GetType()
{
return type;
}
return offset;
}
- int Landmark::GetCoordinate()
+ inference_landmark_coorindate_type_e Landmark::GetCoordinate()
{
return coordinate;
}
- int Landmark::GetDecodingType()
+ inference_landmark_decoding_type_e Landmark::GetDecodingType()
{
return decodingType;
}
}
if (!landmark.GetName().empty()) {
- if (landmark.GetDecodingType() == 1 ||
- landmark.GetDecodingType() == 2) {
+ if (landmark.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
ret = ParseLandmarkDecodeInfo(root);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret);
}
}
- if (landmark.GetDecodingType() == 2) {// landmark.decodingType == 2
+ if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
ret = ParseOffset(root);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to GetOffsetVector[%d]", ret);
Landmark& landmarkInfo = mMeta.GetLandmark();
- if (landmarkInfo.GetType() < 0 || landmarkInfo.GetType() >= 3) {
+ if (landmarkInfo.GetType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ landmarkInfo.GetType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
LOGE("Not supported landmark type");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- if (landmarkInfo.GetDecodingType() == 0) {
+ if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
LOGI("Skip init");
return MEDIA_VISION_ERROR_NONE;
}
mCandidates.clear();
- if (landmarkInfo.GetType() == 0 ||
- landmarkInfo.GetType() == 2) {
+ if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
mCandidates.resize(mHeatMapChannel);
}
if (score < scoreInfo.GetThresHold())
continue;
- if (landmarkInfo.GetType() == 0 ||
- landmarkInfo.GetType() == 2) {
+ if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
if (score <= candidate->score)
continue;
Landmark& landmarkInfo = mMeta.GetLandmark();
ScoreInfo& scoreInfo = mMeta.GetScore();
- if (landmarkInfo.GetType() == 0 ||
- landmarkInfo.GetType() == 2) { // single pose
+ if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
mPoseLandmarks.resize(1);
- if (landmarkInfo.GetDecodingType() == 0) { // direct decoding
+ if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
- } else { // heatmap decoding
+ } else {
mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
}
}
- if (landmarkInfo.GetDecodingType() != 0) { // heatmap decoding
+ if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
while (!mCandidates.empty()) {
LandmarkPoint &root = mCandidates.front();
getIndexToPos(root, scaleWidth, scaleHeight);
- if (landmarkInfo.GetType() == 0) {
+ if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) {
root.valid = true;
mPoseLandmarks[0].landmarks[root.id] = root;
mPoseLandmarks[0].score += root.score;
}
}
- int landmarkOffset = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 1) ? 2 : 3;
- if (landmarkInfo.GetDecodingType() == 0) {
+ int landmarkOffset = (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3;
+ if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
landmarkOffset = landmarkInfo.GetOffset();
}
for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
Name: capi-media-vision
Summary: Media Vision library for Tizen Native API
-Version: 0.8.12
-Release: 2
+Version: 0.8.13
+Release: 0
Group: Multimedia/Framework
License: Apache-2.0 and BSD-3-Clause
Source0: %{name}-%{version}.tar.gz