{
namespace inference
{
- int OutputMetadata::GetScore(JsonObject *root)
+ int ScoreInfo::ParseScore(JsonObject *root)
+ {
+ LOGI("ENTER");
+
+ JsonArray * rootArray = json_object_get_array_member(root, "score");
+ unsigned int elements = json_array_get_length(rootArray);
+
+ for (unsigned int elem = 0; elem < elements; ++elem) {
+ JsonNode *pNode = json_array_get_element(rootArray, elem);
+ JsonObject *pObject = json_node_get_object(pNode);
+
+ name = json_object_get_string_member(pObject,"name");
+ LOGI("layer: %s", name.c_str());
+
+ JsonArray * array = json_object_get_array_member(pObject, "index");
+ unsigned int elements2 = json_array_get_length(array);
+ LOGI("range dim: size[%u]", elements2);
+ for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+ if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+ dimInfo.SetValidIndex(elem2);
+ }
+
+ topNumber = static_cast<int>(json_object_get_int_member(pObject, "top_number"));
+ LOGI("top number: %d", topNumber);
+
+ threshold = static_cast<double>(json_object_get_double_member(pObject, "threshold"));
+ LOGI("threshold: %1.3f", threshold);
+
+ type = static_cast<int>(json_object_get_int_member(pObject, "score_type"));
+ LOGI("score type: %d", type);
+
+ if (json_object_has_member(pObject, "dequantization")) {
+ array = json_object_get_array_member(pObject, "dequantization");
+ JsonNode *node = json_array_get_element(array, 0);
+ JsonObject *object = json_node_get_object(node);
+
+ deQuantization = std::make_shared<DeQuantization>(
+ json_object_get_double_member(object, "scale"),
+ json_object_get_double_member(object, "zeropoint"));
+ }
+ }
+
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int OutputMetadata::ParseScore(JsonObject *root)
{
LOGI("ENTER");
return MEDIA_VISION_ERROR_NONE;
}
- // score
- JsonArray * rootArray = json_object_get_array_member(root, "score");
+ score.ParseScore(root);
+
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int BoxInfo::ParseBox(JsonObject *root)
+ {
+ LOGI("ENTER");
+
+ JsonArray * rootArray = json_object_get_array_member(root, "box");
+ unsigned int elements = json_array_get_length(rootArray);
+
+ for (unsigned int elem = 0; elem < elements; ++elem) {
+ JsonNode *pNode = json_array_get_element(rootArray, elem);
+ JsonObject *pObject = json_node_get_object(pNode);
+
+ name = json_object_get_string_member(pObject,"name");
+ LOGI("layer: %s", name.c_str());
+
+ JsonArray * array = json_object_get_array_member(pObject, "index");
+ unsigned int elements2 = json_array_get_length(array);
+ LOGI("range dim: size[%u]", elements2);
+ for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+ if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+ dimInfo.SetValidIndex(elem2);
+ }
+
+ type = static_cast<int>(json_object_get_int_member(pObject, "box_type"));
+ LOGI("box type: %d", type);
+
+ array = json_object_get_array_member(pObject, "box_order");
+ elements2 = json_array_get_length(array);
+ LOGI("box order should have 4 elements and it has [%u]", elements2);
+ for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+ auto val = static_cast<int>(json_array_get_int_element(array, elem2));
+ order.push_back(val);
+ LOGI("%d", val);
+ }
+
+ coordinate = static_cast<int>(json_object_get_int_member(pObject, "box_coordinate"));
+ LOGI("box coordinate: %d", coordinate);
+
+ decodingType = static_cast<int>(json_object_get_int_member(pObject, "decoding_type"));
+ LOGI("box decodeing type: %d", decodingType);
+ }
+
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int OutputMetadata::ParseBox(JsonObject *root)
+ {
+ LOGI("ENTER");
+
+ if (json_object_has_member(root, "box") == false) {
+ LOGE("No box outputmetadata");
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ box.ParseBox(root);
+
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int Label::ParseLabel(JsonObject *root)
+ {
+ LOGI("ENTER");
+
+ JsonArray * rootArray = json_object_get_array_member(root, "label");
unsigned int elements = json_array_get_length(rootArray);
// TODO: handling error
- // FIXEME: ScoreInfo.set()??
for (unsigned int elem = 0; elem < elements; ++elem) {
+ JsonNode *pNode = json_array_get_element(rootArray, elem);
+ JsonObject *pObject = json_node_get_object(pNode);
+
+ name = json_object_get_string_member(pObject,"name");
+ LOGI("layer: %s", name.c_str());
+
+ JsonArray * array = json_object_get_array_member(pObject, "index");
+ unsigned int elements2 = json_array_get_length(array);
+ LOGI("range dim: size[%u]", elements2);
+ for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+ if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+ dimInfo.SetValidIndex(elem2);
+ }
+ }
+
+ LOGI("LEAVEL");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int OutputMetadata::ParseLabel(JsonObject *root)
+ {
+ LOGI("ENTER");
+
+ if (json_object_has_member(root, "label") == false) {
+ LOGE("No box outputmetadata");
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ label.ParseLabel(root);
+
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int Number::ParseNumber(JsonObject *root)
+ {
+ // box
+ JsonArray * rootArray = json_object_get_array_member(root, "number");
+ unsigned int elements = json_array_get_length(rootArray);
+ // TODO: handling error
+ for (unsigned int elem = 0; elem < elements; ++elem) {
JsonNode *pNode = json_array_get_element(rootArray, elem);
JsonObject *pObject = json_node_get_object(pNode);
- score.name =
- static_cast<const char*>(json_object_get_string_member(pObject,"name"));
- LOGI("layer: %s", score.name.c_str());
+ name = json_object_get_string_member(pObject,"name");
+ LOGI("layer: %s", name.c_str());
JsonArray * array = json_object_get_array_member(pObject, "index");
unsigned int elements2 = json_array_get_length(array);
LOGI("range dim: size[%u]", elements2);
for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
- auto index = static_cast<int>(json_array_get_int_element(array, elem2));
- score.dimInfo.index.push_back(index);
- LOGI("%d", index);
+ if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+ dimInfo.SetValidIndex(elem2);
}
+ }
- score.topNumber = static_cast<int>(json_object_get_int_member(pObject, "top_number"));
- LOGI("top number: %d", score.topNumber);
+ return MEDIA_VISION_ERROR_NONE;
+ }
- score.threshold = static_cast<double>(json_object_get_double_member(pObject, "threshold"));
- LOGI("threshold: %1.3f", score.threshold);
+ int OutputMetadata::ParseNumber(JsonObject *root)
+ {
+ LOGI("ENTER");
- score.type = static_cast<int>(json_object_get_int_member(pObject, "score_type"));
- LOGI("score type: %d", score.type);
+ if (json_object_has_member(root, "number") == false) {
+ LOGE("No number outputmetadata");
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+ number.ParseNumber(root);
- if (json_object_has_member(pObject, "dequantization")) {
- array = json_object_get_array_member(pObject, "dequantization");
- JsonNode *node = json_array_get_element(array, 0);
- JsonObject *object = json_node_get_object(node);
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
- score.deQuantization = std::make_unique<DeQuantization>(
- json_object_get_double_member(object, "scale"),
- json_object_get_double_member(object, "zeropoint"));
+ int OutputMetadata::ParseBoxDecodeInfo(JsonObject *root)
+ {
+ LOGI("ENTER");
+
+ if (json_object_has_member(root, "box") == false) {
+ LOGE("No box outputmetadata");
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ // box
+ JsonArray * rootArray = json_object_get_array_member(root, "box");
+ unsigned int elements = json_array_get_length(rootArray);
+
+ // TODO: handling error
+ for (unsigned int elem = 0; elem < elements; ++elem) {
+ JsonNode *pNode = json_array_get_element(rootArray, elem);
+ JsonObject *pObject = json_node_get_object(pNode);
+
+ if (json_object_has_member(pObject, "decoding_info") == false) {
+ LOGE("decoding_info is mandatory. Invalid metadata");
+ LOGI("LEAVE");
+
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info");
+ if (json_object_has_member(cObject, "anchor") == false) {
+ LOGE("anchor is mandatory. Invalid metadata");
+ LOGI("LEAVE");
+
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
+
+ JsonObject *object = json_object_get_object_member(cObject, "anchor") ;
+ BoxInfo::DecodeInfo& decodeInfo = box.GetDecodeInfo();
+ decodeInfo.anchorParam.mode = static_cast<int>(json_object_get_int_member(object, "mode"));
+ decodeInfo.anchorParam.numLayers = static_cast<int>(json_object_get_int_member(object, "num_layers"));
+ decodeInfo.anchorParam.minScale = static_cast<float>(json_object_get_double_member(object, "min_scale"));
+ decodeInfo.anchorParam.maxScale = static_cast<float>(json_object_get_double_member(object, "max_scale"));
+ decodeInfo.anchorParam.inputSizeHeight = static_cast<int>(json_object_get_int_member(object, "input_size_height"));
+ decodeInfo.anchorParam.inputSizeWidth = static_cast<int>(json_object_get_int_member(object, "input_size_width"));
+ decodeInfo.anchorParam.anchorOffsetX = static_cast<float>(json_object_get_double_member(object, "anchor_offset_x"));
+ decodeInfo.anchorParam.anchorOffsetY = static_cast<float>(json_object_get_double_member(object, "anchor_offset_y"));
+ decodeInfo.anchorParam.isReduceBoxedInLowestLayer =
+ static_cast<bool>(json_object_get_boolean_member(object, "reduce_boxed_in_lowest_layer"));
+ decodeInfo.anchorParam.interpolatedScaleAspectRatio =
+ static_cast<float>(json_object_get_double_member(object, "interpolated_scale_aspect_ratio"));
+ decodeInfo.anchorParam.isFixedAnchorSize =
+ static_cast<bool>(json_object_get_boolean_member(object, "fixed_anchor_size"));
+ decodeInfo.anchorParam.isExponentialBoxScale =
+ static_cast<bool>(json_object_get_boolean_member(object, "exponential_box_scale"));
+
+ decodeInfo.anchorParam.xScale = static_cast<float>(json_object_get_double_member(object, "x_scale"));
+ decodeInfo.anchorParam.yScale = static_cast<float>(json_object_get_double_member(object, "y_scale"));
+ decodeInfo.anchorParam.wScale = static_cast<float>(json_object_get_double_member(object, "w_scale"));
+ decodeInfo.anchorParam.hScale = static_cast<float>(json_object_get_double_member(object, "h_scale"));
+
+ JsonArray * array = json_object_get_array_member(object, "strides");
+ unsigned int elements2 = json_array_get_length(array);
+ for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+ auto stride = static_cast<int>(json_array_get_int_element(array, elem2));
+ decodeInfo.anchorParam.strides.push_back(stride);
+ LOGI("stride: %d", stride);
+ }
+
+ array = json_object_get_array_member(object, "aspect_ratios");
+ elements2 = json_array_get_length(array);
+ for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+ auto aspectRatio = static_cast<float>(json_array_get_double_element(array, elem2));
+ decodeInfo.anchorParam.aspectRatios.push_back(aspectRatio);
+ LOGI("aspectRatio: %.4f", aspectRatio);
+ }
+
+ if (json_object_has_member(cObject, "nms") == false) {
+ LOGI("nms is empty. skip it");
+ continue;
+ }
+
+ object = json_object_get_object_member(cObject, "nms");
+ decodeInfo.nmsParam.mode = static_cast<int>(json_object_get_int_member(object, "mode"));
+ decodeInfo.nmsParam.threshold = static_cast<float>(json_object_get_double_member(object,"threshold"));
}
LOGI("LEAVE");
return MEDIA_VISION_ERROR_NONE;
+
+ }
+
+ float OutputMetadata::CalculateScale(float min, float max, int index, int maxStride)
+ {
+ return min + (max - min) * 1.0 * index / (maxStride - 1.0f);
+ }
+
+ int OutputMetadata::GenerateAnchor()
+ {
+ BoxInfo::DecodeInfo& decodeInfo = box.GetDecodeInfo();
+
+ if (decodeInfo.anchorParam.strides.empty() ||
+ decodeInfo.anchorParam.aspectRatios.empty()) {
+ LOGE("Invalid anchor parameters");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ int layerId = 0;
+ while (layerId < decodeInfo.anchorParam.numLayers) {
+ std::vector<float> anchorHeight;
+ std::vector<float> anchorWidth;
+ std::vector<float> aspectRatios;
+ std::vector<float> scales;
+
+ int lastSameStrideLayer = layerId;
+ std::vector<float>::iterator iter1, iter2;
+ while ((lastSameStrideLayer < decodeInfo.anchorParam.numLayers) &&
+ (decodeInfo.anchorParam.strides[lastSameStrideLayer] ==
+ decodeInfo.anchorParam.strides[layerId])) {
+ const float scale = CalculateScale( decodeInfo.anchorParam.minScale,
+ decodeInfo.anchorParam.maxScale,
+ lastSameStrideLayer,
+ decodeInfo.anchorParam.strides.size());
+
+ if (lastSameStrideLayer == 0 &&
+ decodeInfo.anchorParam.isReduceBoxedInLowestLayer) {
+ aspectRatios.push_back(1.0);
+ aspectRatios.push_back(2.0);
+ aspectRatios.push_back(0.5);
+ scales.push_back(0.1);
+ scales.push_back(scale);
+ scales.push_back(scale);
+ } else {
+ for (iter1 = decodeInfo.anchorParam.aspectRatios.begin();
+ iter1 != decodeInfo.anchorParam.aspectRatios.end();
+ ++iter1) {
+ aspectRatios.push_back((*iter1));
+ scales.push_back(scale);
+ }
+ if (decodeInfo.anchorParam.interpolatedScaleAspectRatio > 0.0f) {
+ const float scaleNext =
+ lastSameStrideLayer == (int) decodeInfo.anchorParam.strides.size() -1
+ ? 1.0f
+ : CalculateScale(decodeInfo.anchorParam.minScale,
+ decodeInfo.anchorParam.maxScale,
+ lastSameStrideLayer + 1,
+ decodeInfo.anchorParam.strides.size());
+ scales.push_back(std::sqrt(scale * scaleNext));
+ aspectRatios.push_back(decodeInfo.anchorParam.interpolatedScaleAspectRatio);
+ }
+ }
+ lastSameStrideLayer++;
+ }
+
+ for (iter1 = aspectRatios.begin(), iter2 = scales.begin();
+ (iter1 != aspectRatios.end() && iter2 != scales.end());
+ ++iter1, ++iter2) {
+ const float ratioSqrts = std::sqrt((*iter1));
+ anchorHeight.push_back((*iter2) / ratioSqrts);
+ anchorWidth.push_back((*iter2) * ratioSqrts);
+ }
+
+ const int stride = decodeInfo.anchorParam.strides[layerId];
+ int featureMapHeight = std::ceil(1.0f * decodeInfo.anchorParam.inputSizeHeight / stride);
+ int featureMapWidth = std::ceil(1.0f * decodeInfo.anchorParam.inputSizeWidth / stride);
+
+ for (int y = 0; y < featureMapHeight; ++y) {
+ for (int x = 0; x < featureMapWidth; ++x) {
+ for (int anchorId = 0; anchorId < (int)anchorHeight.size(); ++anchorId) {
+ cv::Rect2f anchor = {
+ cv::Point2f {
+ (x + decodeInfo.anchorParam.anchorOffsetX) * 1.0f / featureMapWidth,
+ (y + decodeInfo.anchorParam.anchorOffsetY) * 1.0f / featureMapHeight
+ },
+ decodeInfo.anchorParam.isFixedAnchorSize ?
+ cv::Size2f {1.0f, 1.0f} :
+ cv::Size2f {anchorWidth[anchorId], anchorWidth[anchorId]}
+ };
+ decodeInfo.anchorBoxes.push_back(anchor);
+ }
+ }
+ }
+ layerId = lastSameStrideLayer;
+ }
+
+ if (decodeInfo.anchorBoxes.empty()) {
+ LOGE("Anchor boxes are empty");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ ScoreInfo& OutputMetadata::GetScore()
+ {
+ return score;
+ }
+
+ BoxInfo& OutputMetadata::GetBox()
+ {
+ return box;
+ }
+
+ Label& OutputMetadata::GetLabel()
+ {
+ return label;
+ }
+
+ Number& OutputMetadata::GetNumber()
+ {
+ return number;
+ }
+
+ bool OutputMetadata::IsParsed()
+ {
+ return parsed;
}
int OutputMetadata::Parse(JsonObject *root)
{
LOGI("ENTER");
- int ret = GetScore(root);
+ int ret = ParseScore(root);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to GetScore[%d]", ret);
return ret;
}
+ ret = ParseBox(root);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to GetBox[%d]", ret);
+ return ret;
+ }
+
+ if (!box.GetName().empty()) {
+ // addtional parsing is required according to decoding type
+ if (box.GetDecoddingType() == 0) {
+
+ ret = ParseLabel(root);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to GetLabel[%d]", ret);
+ return ret;
+ }
+
+ ret = ParseNumber(root);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to GetNumber[%d]", ret);
+ return ret;
+ }
+
+ } else if (box.GetDecoddingType() == 1) {
+ ret = ParseBoxDecodeInfo(root);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to GetBoxDecodeInfo[%d]", ret);
+ return ret;
+ }
+
+ ret = GenerateAnchor();
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to GenerateAnchor[%d]", ret);
+ return ret;
+ }
+
+ } else {
+ LOGW("Unknow box decoding type. Ignore");
+ }
+ }
+
parsed = true;
LOGI("LEAVE");
return MEDIA_VISION_ERROR_NONE;
}
- int ScoreInfo::GetIndex() const
+ void DimInfo::SetValidIndex(int index)
{
LOGI("ENTER");
- int ret = 0;
- for (auto& index : dimInfo.index) {
- if (index > 0) {
- break;
- }
- ret++;
- }
+ dims.push_back(index);
LOGI("LEAVE");
+ }
+
+ std::vector<int> DimInfo::GetValidIndexAll() const
+ {
+ LOGI("ENTER");
- return ret;
+ LOGI("LEAVE");
+ return dims;
}
} /* Inference */
} /* MediaVision */