--- /dev/null
+{
+ "inputmetadata" :
+ {
+ "tensor_info" : [
+ {
+ "name" : "input_1:0",
+ "shape_type" : "NHWC",
+ "shape_dims" : [ 1, 320, 320, 3],
+ "data_type" : "FLOAT32",
+ "color_space" : "RGB888"
+ }
+ ],
+ "preprocess" : [
+ {
+ "normalization" : [
+ {
+ "mean" : [0.0, 0.0, 0.0],
+ "std" : [255.0, 255.0, 255.0]
+ }
+ ]
+ }
+ ]
+ },
+ "outputmetadata" :
+ {
+ "score" : [
+ {
+ "name" : "Identity:0",
+ "index" : [-1, -1, 1],
+ "top_number" : 5,
+ "threshold" : 0.4,
+ "score_type" : "NORMAL"
+ }
+ ],
+ "box" : [
+ {
+ "name" : "Identity:0",
+ "index" : [-1, -1, 1],
+ "box_type" : "ORIGIN_CENTER",
+ "box_order" : [0, 1, 2, 3],
+ "box_coordinate" : "RATIO",
+ "decoding_type" : "YOLO_ANCHOR",
+ "decoding_info" :
+ {
+ "cell" :
+ {
+ "num_scales" : 3,
+ "scales": [8, 16, 32]
+ },
+ "nms" :
+ {
+ "mode": "STANDARD",
+ "iou_threshold": 0.2
+ }
+ }
+ }
+ ]
+ }
+}
supportedBoxDecodingTypes.insert({"BYPASS", INFERENCE_BOX_DECODING_TYPE_BYPASS});
supportedBoxDecodingTypes.insert({"SSD_ANCHOR", INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR});
+ supportedBoxDecodingTypes.insert({"YOLO_ANCHOR", INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR});
}
~BoxInfo() = default;
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
+ int ret = MEDIA_VISION_ERROR_NONE;
JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info");
- if (!json_object_has_member(cObject, "anchor")) {
+ if (json_object_has_member(cObject, "anchor")) {
+ ret = GetDecodeInfo().ParseAnchorParam(cObject);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to ParseAnchorParam[%d]", ret);
+ return ret;
+ }
+ } else if (json_object_has_member(cObject, "cell")) {
+ ret = GetDecodeInfo().ParseCellParam(cObject);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to ParseCellParam[%d]", ret);
+ return ret;
+ }
+ } else {
+
LOGE("anchor is mandatory. Invalid metadata");
LOGI("LEAVE");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- int ret = GetDecodeInfo().ParseAnchorParam(cObject);
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to ParseAnchorParam[%d]", ret);
- return ret;
- }
-
ret = GetDecodeInfo().ParseNms(cObject);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to ParseNms[%d]", ret);
float hScale;
};
+ struct CellParam {
+ int numScales;
+ std::vector<int> scales;
+ };
+
struct NMSParam {
inference_box_nms_type_e mode; /**< 0: standard */
float iouThreshold;
private:
AnchorParam anchorParam;
std::vector<cv::Rect2f> anchorBoxes;
+ CellParam cellParam;
NMSParam nmsParam;
RotateParam rotParam;
RoiOptionParam roiOptParam;
float GetAnchorHscale();
float CalculateScale(float min, float max, int index, int maxStride);
+ // Cell param
+ int ParseCellParam(JsonObject *root);
+ std::vector<int>& GetCellScalesAll();
+ int GetCellNumScales();
+
// Nms param
int ParseNms(JsonObject *root);
int GetNmsMode();
typedef enum {
INFERENCE_BOX_DECODING_TYPE_BYPASS,
INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR,
+ INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR
} inference_box_decoding_type_e;
typedef enum {
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
- } else {
+ } else if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
if (boxIndexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
numberOfObjects = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]];
+ } else { // INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR
+ std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
+ if (boxIndexes.size() != 1) {
+ LOGE("Invalid dim size. It should be 1");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+ boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+ numberOfObjects = boxOffset - 5;
}
ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset,
// Otherwise it is set already within ctor.
mNumberOfOjects = mTensorBuffer.getValue<int>(
mMeta.GetBoxNumberName(), indexes[0]);
- } else {
+ } else if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
if (mMeta.GetBoxDecodeInfo().IsAnchorBoxEmpty()) {
LOGE("Anchor boxes are required but empty.");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
+ } else {
+ LOGI("YOLO_ANCHOR does nothing");
}
return MEDIA_VISION_ERROR_NONE;
int ObjectDecoder::decode()
{
+ LOGI("ENTER");
+
BoxesList boxList;
+ Boxes boxes;
int ret = MEDIA_VISION_ERROR_NONE;
+ int totalIdx = mNumberOfOjects;
+
+ if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR) {
+ totalIdx = 0;
+ for (auto& scale : mMeta.GetBoxDecodeInfo().GetCellScalesAll()) {
+ totalIdx += (static_cast<int>(mScaleW) / scale
+ * static_cast<int>(mScaleH) / scale)
+ * mMeta.GetBoxDecodeInfo().GetCellNumScales();
+ }
+ boxList.reserve(mNumberOfOjects);
+ }
- for (int idx = 0; idx < mNumberOfOjects; ++idx) {
+ for (int idx = 0; idx < totalIdx; ++idx) {
if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
float score = decodeScore(idx);
if (score <= 0.0f)
Box box = decodeBox(idx, score);
mResultBoxes.push_back(box);
- } else {
+ } else if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
int anchorIdx = -1;
- Boxes boxes;
+ boxes.clear();
for (auto& anchorBox : mMeta.GetBoxDecodeInfo().GetAnchorBoxAll()) {
anchorIdx++;
boxes.push_back(box);
}
boxList.push_back(boxes);
+ } else { // INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR
+ int cellIdx = idx * mBoxOffset;
+ float score = decodeScore(cellIdx + 4);
+ if (score <= 0.0f) {
+ continue;
+ }
+ // need to check the score
+ float objScore = 0.0f;
+ int objIdx = 0;
+ for (int objIdx_ = 0; objIdx_ < mNumberOfOjects; ++objIdx_) {
+ float objScore_ = decodeScore(cellIdx + 5 + objIdx_);
+ if (objScore_ > objScore) {
+ objScore = objScore_;
+ objIdx = objIdx_;
+ }
+ }
+
+ if (objScore < mMeta.GetScoreThreshold())
+ continue;
+
+ Box box = decodeBox(idx, objScore, objIdx);
+ boxes.push_back(box);
}
}
+ if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR ||
+ mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR)
+ boxList.push_back(boxes);
+
if (!boxList.empty()) {
PostProcess postProc;
ret = postProc.Nms(boxList,
LOGE("Fail to non-maximum suppression[%d]", ret);
return ret;
}
+ } else {
+ LOGW("boxlist empty!");
}
+ LOGI("LEAVE");
+
return ret;
}
return MEDIA_VISION_ERROR_NONE;
}
+ int DecodeInfo::ParseCellParam(JsonObject *root)
+ {
+ JsonObject *object = json_object_get_object_member(root, "cell") ;
+
+ this->cellParam.numScales = static_cast<int>(json_object_get_int_member(object, "num_scales"));
+
+ JsonArray * array = json_object_get_array_member(object, "scales");
+ unsigned int elements2 = json_array_get_length(array);
+ for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+ auto scale = static_cast<int>(json_array_get_int_element(array, elem2));
+ this->cellParam.scales.push_back(scale);
+ LOGI("scale: %d", scale);
+ }
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ std::vector<int>& DecodeInfo::GetCellScalesAll()
+ {
+ return this->cellParam.scales;
+ }
+
+ int DecodeInfo::GetCellNumScales()
+ {
+ return this->cellParam.numScales;
+ }
+
float DecodeInfo::CalculateScale(float min, float max, int index, int maxStride)
{
return min + (max - min) * 1.0 * index / (maxStride - 1.0f);
return ret;
}
- } else if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
+ } else {
ret = box.ParseDecodeInfo(root);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to GetBoxDecodeInfo[%d]", ret);
return ret;
}
- ret = box.GetDecodeInfo().GenerateAnchor();
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to GenerateAnchor[%d]", ret);
- return ret;
+ if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
+ ret = box.GetDecodeInfo().GenerateAnchor();
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to GenerateAnchor[%d]", ret);
+ return ret;
+ }
}
-
- } else {
- LOGW("Unknow box decoding type. Ignore");
}
}
Name: capi-media-vision
Summary: Media Vision library for Tizen Native API
-Version: 0.12.3
+Version: 0.13.0
Release: 0
Group: Multimedia/Framework
License: Apache-2.0 and BSD-3-Clause
"/usr/share/capi-media-vision/models/OD/tflite/od_efficientdet.tflite"
#define OD_TFLITE_META_QUANT_EFFICIENT_PATH \
"/usr/share/capi-media-vision/models/OD/tflite/od_efficientdet.json"
+#define OD_TFLITE_WEIGHT_YOLO_V5_320_PATH \
+ "/usr/share/capi-media-vision/models/OD/tflite/od_yolo_v5_320x320.tflite"
+#define OD_TFLITE_META_YOLO_V5_320_PATH \
+ "/usr/share/capi-media-vision/models/OD/tflite/od_yolo_v5_320x320.json"
+#define OD_LABLE_YOLO_V5_320_PATH \
+ "/usr/share/capi-media-vision/models/OD/tflite/od_yolo_v5_label.txt"
//Face Detection
#define FD_TFLITE_WEIGHT_PATH \
"Hosted[o]: TFLite(cpu + MobilenetV1+SSD)",
"Hosted[o]: TFLite(cpu + MobilenetV2+SSD)",
"Hosted[o]: TFLite(Quant + EfficientDet)",
+ "Hosted[o]: TFLite(cpu + YoloV5)"
};
int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
OD_LABEL_QUANT_EFFICIENT_PATH,
OD_TFLITE_META_QUANT_EFFICIENT_PATH);
} break;
+ case 7: {
+ err = engine_config_user_hosted_tflite_cpu(
+ engine_cfg, OD_TFLITE_WEIGHT_YOLO_V5_320_PATH,
+ OD_LABLE_YOLO_V5_320_PATH,
+ OD_TFLITE_META_YOLO_V5_320_PATH);
+ } break;
}
if (err != MEDIA_VISION_ERROR_NONE) {
printf("Fail to perform config [err:%i]\n", err);