Drop Yolo CellParm 04/281704/1
authorKwanghoon Son <k.son@samsung.com>
Wed, 21 Sep 2022 02:37:37 +0000 (22:37 -0400)
committerKwanghoon Son <k.son@samsung.com>
Wed, 21 Sep 2022 02:37:37 +0000 (22:37 -0400)
Yolo object detection not use CellParm anymore

Change-Id: Id9842754634b6f134b02f417c96ea24f5fc69096
Signed-off-by: Kwanghoon Son <k.son@samsung.com>
mv_machine_learning/inference/include/DecodeInfo.h
mv_machine_learning/inference/src/BoxInfo.cpp
mv_machine_learning/inference/src/DecodeInfo.cpp
mv_machine_learning/inference/src/Inference.cpp
mv_machine_learning/inference/src/ObjectDecoder.cpp

index 67c7e32..6a38e54 100644 (file)
@@ -60,16 +60,6 @@ struct AnchorParam
        std::map<std::string, inference_score_type_e> supportedCellType;
        std::vector<std::vector<double> > vxScales;
        std::vector<std::vector<double> > vyScales;
-       int numAnchorsPerCell;
-};
-
-struct CellParam
-{
-       int numScales;
-       std::vector<int> scales;
-       int offsetScales;
-       inference_score_type_e type;
-       std::map<std::string, inference_score_type_e> supportedCellType;
 };
 
 struct NMSParam
@@ -105,17 +95,12 @@ struct DecodeInfo
 {
        AnchorParam anchorParam;
        std::vector<cv::Rect2f> anchorBoxes;
-       CellParam cellParam;
        NMSParam nmsParam;
        RotateParam rotParam;
        RoiOptionParam roiOptParam;
 
        DecodeInfo()
        {
-               cellParam.type = INFERENCE_SCORE_TYPE_NORMAL;
-               cellParam.supportedCellType.insert({ "NORMAL", INFERENCE_SCORE_TYPE_NORMAL });
-               cellParam.supportedCellType.insert({ "SIGMOID", INFERENCE_SCORE_TYPE_SIGMOID });
-
                nmsParam.mode = INFERENCE_BOX_NMS_TYPE_NONE;
                nmsParam.iouThreshold = 0.2f;
                nmsParam.supportedBoxNmsTypes.insert({ "STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD });
@@ -155,13 +140,6 @@ struct DecodeInfo
        float GetAnchorHscale();
        float CalculateScale(float min, float max, int index, int maxStride);
 
-       // Cell param
-       int ParseCellParam(JsonObject *root);
-       std::vector<int> &GetCellScalesAll();
-       int GetCellNumScales();
-       int GetCellOffsetScales();
-       inference_score_type_e GetCellType();
-
        // Nms param
        int ParseNms(JsonObject *root);
        int GetNmsMode();
index 5091db2..43f055d 100644 (file)
@@ -179,12 +179,6 @@ int BoxInfo::ParseDecodeInfo(JsonObject *root)
                        LOGE("Fail to ParseAnchorParam[%d]", ret);
                        return ret;
                }
-       } else if (json_object_has_member(cObject, "cell")) {
-               ret = GetDecodeInfo().ParseCellParam(cObject);
-               if (ret != MEDIA_VISION_ERROR_NONE) {
-                       LOGE("Fail to ParseCellParam[%d]", ret);
-                       return ret;
-               }
        } else {
                LOGE("anchor is mandatory. Invalid metadata");
                LOGI("LEAVE");
index e49245a..f33f062 100644 (file)
@@ -73,8 +73,6 @@ int DecodeInfo::ParseAnchorParam(JsonObject *root)
                        LOGI("aspectRatio: %.4f", aspectRatio);
                }
        } else if (anchorParam.mode == 1) { // Yolo
-               anchorParam.numAnchorsPerCell = static_cast<int>(json_object_get_int_member(object, "num_anchors"));
-
                anchorParam.offsetAnchors = static_cast<int>(json_object_get_int_member(object, "offset_anchors"));
                JsonArray *xScales = json_object_get_array_member(object, "x_scales");
                JsonArray *yScales = json_object_get_array_member(object, "y_scales");
@@ -127,51 +125,6 @@ int DecodeInfo::ParseAnchorParam(JsonObject *root)
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int DecodeInfo::ParseCellParam(JsonObject *root)
-{
-       JsonObject *object = json_object_get_object_member(root, "cell");
-
-       cellParam.numScales = static_cast<int>(json_object_get_int_member(object, "num_scales"));
-
-       JsonArray *array = json_object_get_array_member(object, "scales");
-       unsigned int elements2 = json_array_get_length(array);
-       for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-               auto scale = static_cast<int>(json_array_get_int_element(array, elem2));
-               cellParam.scales.push_back(scale);
-               LOGI("scale: %d", scale);
-       }
-
-       cellParam.offsetScales = static_cast<int>(json_object_get_int_member(object, "offset_scales"));
-       try {
-               cellParam.type = GetSupportedType(object, "type", cellParam.supportedCellType);
-       } catch (const std::exception &e) {
-               LOGE("Invalid %s", e.what());
-               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-       }
-
-       return MEDIA_VISION_ERROR_NONE;
-}
-
-std::vector<int> &DecodeInfo::GetCellScalesAll()
-{
-       return cellParam.scales;
-}
-
-int DecodeInfo::GetCellNumScales()
-{
-       return cellParam.numScales;
-}
-
-int DecodeInfo::GetCellOffsetScales()
-{
-       return cellParam.offsetScales;
-}
-
-inference_score_type_e DecodeInfo::GetCellType()
-{
-       return cellParam.type;
-}
-
 float DecodeInfo::CalculateScale(float min, float max, int index, int maxStride)
 {
        return min + (max - min) * 1.0 * index / (maxStride - 1.0f);
index 41a446d..9f3b25c 100644 (file)
@@ -1073,8 +1073,6 @@ int Inference::GetObjectDetectionResults(ObjectDetectionResults *results)
                                return MEDIA_VISION_ERROR_INVALID_OPERATION;
                        }
                        numberOfObjects = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]];
-               } else if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR) {
-                       numberOfObjects = boxOffset / outputMeta.GetBoxDecodeInfo().GetCellNumScales() - 5;
                }
 
                ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset,
index c617f2d..06e8653 100644 (file)
@@ -80,13 +80,6 @@ Box ObjectDecoder::decodeBox(int idx, float score, int label, int offset)
        float cHeight =
                        mTensorBuffer.getValue<float>(mMeta.GetBoxName(), idx * mBoxOffset + offset + mMeta.GetBoxOrder()[3]);
 
-       if (mMeta.GetBoxDecodeInfo().GetCellType() == INFERENCE_SCORE_TYPE_SIGMOID) {
-               cx = PostProcess::sigmoid(cx);
-               cy = PostProcess::sigmoid(cy);
-               cWidth = PostProcess::sigmoid(cWidth);
-               cHeight = PostProcess::sigmoid(cHeight);
-       }
-
        LOGI("cx:%.2f, cy:%.2f, cW:%.2f, cH:%.2f", cx, cy, cWidth, cHeight);
        // convert type to ORIGIN_CENTER if ORIGIN_LEFTTOP
        if (mMeta.GetBoxType() == INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP) {
@@ -148,15 +141,6 @@ int ObjectDecoder::decode()
        int ret = MEDIA_VISION_ERROR_NONE;
        int totalIdx = mNumberOfOjects;
 
-       if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR) {
-               totalIdx = 0;
-               for (auto &scale : mMeta.GetBoxDecodeInfo().GetCellScalesAll()) {
-                       totalIdx += (static_cast<int>(mScaleW) / scale * static_cast<int>(mScaleH) / scale) *
-                                               mMeta.GetBoxDecodeInfo().GetCellNumScales() / mMeta.GetBoxDecodeInfo().GetCellOffsetScales();
-               }
-               boxList.reserve(mNumberOfOjects);
-       }
-
        for (int idx = 0; idx < totalIdx; ++idx) {
                if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
                        float score = decodeScore(idx);
@@ -183,29 +167,6 @@ int ObjectDecoder::decode()
                        boxList.push_back(boxes);
                } else { // INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR
                        int cellIdx = idx * mBoxOffset;
-                       for (int j = 0; j < mMeta.GetBoxDecodeInfo().GetCellOffsetScales(); ++j) {
-                               float score = decodeScore(cellIdx + (mNumberOfOjects + 5) * j + 4);
-                               if (score <= 0.0f) {
-                                       continue;
-                               }
-                               LOGI("score[%d]: %.2f", j, score);
-                               // need to check the score
-                               float topObjScore = 0.0f;
-                               int topObjIdx = 0;
-                               for (int objIdx_ = 0; objIdx_ < mNumberOfOjects; ++objIdx_) {
-                                       float objScore_ = decodeScore(cellIdx + (mNumberOfOjects + 5) * j + 5 + objIdx_);
-                                       if (objScore_ > topObjScore) {
-                                               topObjScore = objScore_;
-                                               topObjIdx = objIdx_;
-                                       }
-                               }
-
-                               if (topObjScore < mMeta.GetScoreThreshold())
-                                       continue;
-
-                               Box box = decodeBox(idx, topObjScore, topObjIdx, (mNumberOfOjects + 5) * j);
-                               boxes.push_back(box);
-                       }
                }
        }