"index" : [-1, -1, 1],
"box_type" : "ORIGIN_CENTER",
"box_order" : [1, 0, 3, 2],
- "box_coordinate" : 1,
- "decoding_type" : 1,
+ "box_coordinate" : "PIXEL",
+ "decoding_type" : "SSD_ANCHOR",
"decoding_info" :
{
"anchor" :
},
"nms" :
{
- "mode": 0,
- "threshold": 0.25
+ "mode": "STANDARD",
+ "iou_threshold": 0.25
}
}
}
"index" : [-1, -1, 1],
"box_type" : "ORIGIN_LEFTTOP",
"box_order" : [1, 0, 3, 2],
- "box_coordinate" : 0,
- "decoding_type": 0
+ "box_coordinate" : "RATIO",
+ "decoding_type": "BYPASS"
}
],
"label" : [
"index" : [-1, -1, 1],
"box_type" : "ORIGIN_LEFTTOP",
"box_order" : [1, 0, 3, 2],
- "box_coordinate" : 0,
- "decoding_type": 0
+ "box_coordinate" : "RATIO",
+ "decoding_type": "BYPASS"
}
],
"label" : [
"index" : [-1, -1, 1],
"box_type" : "ORIGIN_CENTER",
"box_order" : [1, 0, 3, 2],
- "box_coordinate" : 0,
- "decoding_type" : 1,
+ "box_coordinate" : "RATIO",
+ "decoding_type" : "SSD_ANCHOR",
"decoding_info" :
{
"anchor" :
},
"nms" :
{
- "mode": 0,
- "threshold": 0.4
+ "mode": "STANDARD",
+ "iou_threshold": 0.4
}
}
}
class NMSParam {
public:
- int mode; /**< 0: IOU */
- float threshold;
+ inference_box_nms_type_e mode; /**< 0: standard */
+ float iouThreshold;
+
+ std::map<std::string, inference_box_nms_type_e> supportedBoxNmsTypes;
+
+ NMSParam() : mode(INFERENCE_BOX_NMS_TYPE_NONE), iouThreshold(0.2f) {
+ supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
+ };
- NMSParam() : mode(-1), threshold(0.2f) {};
~NMSParam() = default;
};
// Nms param
int ParseNms(JsonObject *root);
int GetNmsMode();
- float GetNmsThreshold();
+ float GetNmsIouThreshold();
// Rotate param
int ParseRotate(JsonObject *root);
DimInfo dimInfo;
inference_box_type_e type; // 0:L-T-R-B, 1: Cx-Cy-W-H
std::vector<int> order; // Order based on box type
- int coordinate; // 0: ratio, 1: pixel
- int decodingType; // 0: post-op, 1: achorbox(ssd), 2:yolo(?)
+ inference_box_coordinate_type_e coordinate; // 0: ratio, 1: pixel
+ inference_box_decoding_type_e decodingType; // 0: bypass , 1:ssd with anchor
DecodeInfo decodingInfo;
std::map<std::string, inference_box_type_e> supportedBoxTypes;
+ std::map<std::string, inference_box_coordinate_type_e> supportedBoxCoordinateTypes;
+ std::map<std::string, inference_box_decoding_type_e> supportedBoxDecodingTypes;
public:
BoxInfo();
std::string GetName() { return name; }
DimInfo GetDimInfo() { return dimInfo; }
inference_box_type_e GetType() { return type; }
- int GetDecodingType() { return decodingType; }
+ inference_box_decoding_type_e GetDecodingType() { return decodingType; }
std::vector<int> GetOrder() { return order; }
int GetCoordinate() { return coordinate; }
DecodeInfo& GetDecodeInfo() {return decodingInfo; }
INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP,
INFERENCE_BOX_TYPE_ORIGIN_CENTER
} inference_box_type_e;
+
+ typedef enum {
+ INFERENCE_BOX_COORDINATE_TYPE_RATIO,
+ INFERENCE_BOX_COORDINATE_TYPE_PIXEL
+ } inference_box_coordinate_type_e;
+
+ typedef enum {
+ INFERENCE_BOX_DECODING_TYPE_BYPASS,
+ INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR,
+ } inference_box_decoding_type_e;
+
+ typedef enum {
+ INFERENCE_BOX_NMS_TYPE_NONE = -1,
+ INFERENCE_BOX_NMS_TYPE_STANDARD
+ } inference_box_nms_type_e;
}
}
int boxOffset = 0;
int numberOfObjects = 0;
- if (boxInfo.GetDecodingType() == 0) {
+ if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
if (boxIndexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
int boxOffset = 0;
int numberOfFaces = 0;
- if (boxInfo.GetDecodingType() == 0) {
+ if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
if (boxIndexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
{
int ObjectDecoder::init()
{
- if (mBoxInfo.GetDecodingType() == 0) {
+ if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
if (!mTensorBuffer.exist(mMeta.GetLabel().GetName()) ||
!mTensorBuffer.exist(mMeta.GetNumber().GetName()) ) {
LOGE("buffer buffers named of %s or %s are NULL",
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- // When GetDecodingType() == 0, the mNumberOfObjects should be set again.
- // otherwise, it is set already within ctor.
+ // mNumberOfObjects is set again if INFERENCE_BOX_DECODING_TYPE_BYPASS.
+ // Otherwise it is set already within ctor.
mNumberOfOjects = mTensorBuffer.getValue<int>(
mMeta.GetNumber().GetName(), indexes[0]);
} else {
cHeight = cHeight - tmpCy; // bottom - top
}
- // normalize if GetCoordinate() == 1 which is pixel coordinate (absolute)
- if (mBoxInfo.GetCoordinate() == 1) {
+ // convert coordinate to RATIO if PIXEL
+ if (mBoxInfo.GetCoordinate() == INFERENCE_BOX_COORDINATE_TYPE_PIXEL) {
cx /= mScaleW;
cy /= mScaleH;
cWidth /= mScaleW;
int ret = MEDIA_VISION_ERROR_NONE;
for (int idx = 0; idx < mNumberOfOjects; ++idx) {
- if (mBoxInfo.GetDecodingType() == 0) {
+ if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
float score = decodeScore(idx);
if (score <= 0.0f)
continue;
PostProcess postProc;
ret = postProc.Nms(boxList,
mBoxInfo.GetDecodeInfo().GetNmsMode(),
- mBoxInfo.GetDecodeInfo().GetNmsThreshold(),
+ mBoxInfo.GetDecodeInfo().GetNmsIouThreshold(),
mResultBoxes);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to non-maximum suppression[%d]", ret);
dimInfo(),
type(INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP),
order(),
- coordinate(0),
- decodingType(0),
+ coordinate(INFERENCE_BOX_COORDINATE_TYPE_RATIO),
+ decodingType(INFERENCE_BOX_DECODING_TYPE_BYPASS),
decodingInfo()
{
supportedBoxTypes.insert({"ORIGIN_LEFTTOP", INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP});
supportedBoxTypes.insert({"ORIGIN_CENTER", INFERENCE_BOX_TYPE_ORIGIN_CENTER});
+
+ supportedBoxCoordinateTypes.insert({"RATIO", INFERENCE_BOX_COORDINATE_TYPE_RATIO});
+ supportedBoxCoordinateTypes.insert({"PIXEL", INFERENCE_BOX_COORDINATE_TYPE_PIXEL});
+
+ supportedBoxDecodingTypes.insert({"BYPASS", INFERENCE_BOX_DECODING_TYPE_BYPASS});
+ supportedBoxDecodingTypes.insert({"SSD_ANCHOR", INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR});
}
int BoxInfo::ParseBox(JsonObject *root)
try {
type = OutputMetadata::GetSupportedType(pObject, "box_type", supportedBoxTypes);
+ coordinate = OutputMetadata::GetSupportedType(pObject, "box_coordinate", supportedBoxCoordinateTypes);
+ decodingType = OutputMetadata::GetSupportedType(pObject, "decoding_type", supportedBoxDecodingTypes);
} catch (const std::exception& e) {
LOGE("Invalid %s", e.what());
}
order.push_back(val);
LOGI("%d", val);
}
-
- coordinate = static_cast<int>(json_object_get_int_member(pObject, "box_coordinate"));
- LOGI("box coordinate: %d", coordinate);
-
- decodingType = static_cast<int>(json_object_get_int_member(pObject, "decoding_type"));
- LOGI("box decodeing type: %d", decodingType);
}
LOGI("LEAVE");
}
JsonObject *object = json_object_get_object_member(root, "nms");
- this->nmsParam.mode = static_cast<int>(json_object_get_int_member(object, "mode"));
- this->nmsParam.threshold = static_cast<float>(json_object_get_double_member(object,"threshold"));
+ try {
+ this->nmsParam.mode = OutputMetadata::GetSupportedType(object, "mode", this->nmsParam.supportedBoxNmsTypes);
+ } catch (const std::exception& e) {
+ LOGE("Invalid %s", e.what());
+ }
+
+ this->nmsParam.iouThreshold = static_cast<float>(json_object_get_double_member(object,"iou_threshold"));
return MEDIA_VISION_ERROR_NONE;
}
return this->nmsParam.mode;
}
- float BoxInfo::DecodeInfo::GetNmsThreshold()
+ float BoxInfo::DecodeInfo::GetNmsIouThreshold()
{
- return this->nmsParam.threshold;
+ return this->nmsParam.iouThreshold;
}
int BoxInfo::DecodeInfo::ParseRotate(JsonObject *root)
if (!box.GetName().empty()) {
// addtional parsing is required according to decoding type
- if (box.GetDecodingType() == 0) {
+ if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
ret = ParseLabel(root);
if (ret != MEDIA_VISION_ERROR_NONE) {
return ret;
}
- } else if (box.GetDecodingType() == 1) {
+ } else if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
ret = ParseBoxDecodeInfo(root);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to GetBoxDecodeInfo[%d]", ret);
Name: capi-media-vision
Summary: Media Vision library for Tizen Native API
-Version: 0.8.9
-Release: 1
+Version: 0.8.10
+Release: 0
Group: Multimedia/Framework
License: Apache-2.0 and BSD-3-Clause
Source0: %{name}-%{version}.tar.gz