Add Yolo anchor parser and test 20/281120/3
authorKwanghoon Son <k.son@samsung.com>
Tue, 13 Sep 2022 02:03:32 +0000 (22:03 -0400)
committerKwanghoon Son <k.son@samsung.com>
Tue, 13 Sep 2022 08:13:42 +0000 (04:13 -0400)
[Issue type] New feature

commit from https://review.tizen.org/gerrit/c/platform/core/api/mediavision/+/272777

Change-Id: I3652cd0adb4a0c1b34d8b3b6ba5ff9c61838ae1e
Signed-off-by: Kwanghoon Son <k.son@samsung.com>
mv_machine_learning/inference/include/DecodeInfo.h
mv_machine_learning/inference/src/DecodeInfo.cpp
test/testsuites/machine_learning/inference/test_object_detection.cpp

index 83c53db..3a5fef8 100644 (file)
@@ -35,7 +35,7 @@ namespace box
 {
 struct AnchorParam
 {
-       int mode; /**< 0: generate anchor, 1:load pre-anchor*/
+       int mode;
        int numLayers;
        float minScale;
        float maxScale;
@@ -53,6 +53,14 @@ struct AnchorParam
        float yScale;
        float wScale;
        float hScale;
+
+       // Yolo
+       int offsetAnchors;
+       inference_score_type_e type;
+       std::map<std::string, inference_score_type_e> supportedCellType;
+       std::vector<std::vector<double> > vxScales;
+       std::vector<std::vector<double> > vyScales;
+       int numAnchorsPerCell;
 };
 
 struct CellParam
index fd5ed84..e49245a 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include <DecodeInfo.h>
+#include <Utils.h>
 
 using namespace mediavision::inference;
 using namespace mediavision::inference::box;
@@ -45,25 +46,75 @@ int DecodeInfo::ParseAnchorParam(JsonObject *root)
 
        anchorParam.mode = static_cast<int>(json_object_get_int_member(object, "mode"));
 
-       anchorParam.numLayers = static_cast<int>(json_object_get_int_member(object, "num_layers"));
-       anchorParam.minScale = static_cast<float>(json_object_get_double_member(object, "min_scale"));
-       anchorParam.maxScale = static_cast<float>(json_object_get_double_member(object, "max_scale"));
+       if (anchorParam.mode == 0) { // SSD
+               anchorParam.numLayers = static_cast<int>(json_object_get_int_member(object, "num_layers"));
+
+               anchorParam.minScale = static_cast<float>(json_object_get_double_member(object, "min_scale"));
+               anchorParam.maxScale = static_cast<float>(json_object_get_double_member(object, "max_scale"));
+
+               anchorParam.isReduceBoxedInLowestLayer =
+                               static_cast<bool>(json_object_get_boolean_member(object, "reduce_boxed_in_lowest_layer"));
+               anchorParam.interpolatedScaleAspectRatio =
+                               static_cast<float>(json_object_get_double_member(object, "interpolated_scale_aspect_ratio"));
+               anchorParam.isFixedAnchorSize = static_cast<bool>(json_object_get_boolean_member(object, "fixed_anchor_size"));
+               anchorParam.isExponentialBoxScale =
+                               static_cast<bool>(json_object_get_boolean_member(object, "exponential_box_scale"));
+
+               anchorParam.xScale = static_cast<float>(json_object_get_double_member(object, "x_scale"));
+               anchorParam.yScale = static_cast<float>(json_object_get_double_member(object, "y_scale"));
+               anchorParam.wScale = static_cast<float>(json_object_get_double_member(object, "w_scale"));
+               anchorParam.hScale = static_cast<float>(json_object_get_double_member(object, "h_scale"));
+
+               JsonArray *array = json_object_get_array_member(object, "aspect_ratios");
+               auto elements = json_array_get_length(array);
+               for (unsigned int elem2 = 0; elem2 < elements; ++elem2) {
+                       auto aspectRatio = static_cast<float>(json_array_get_double_element(array, elem2));
+                       anchorParam.aspectRatios.push_back(aspectRatio);
+                       LOGI("aspectRatio: %.4f", aspectRatio);
+               }
+       } else if (anchorParam.mode == 1) { // Yolo
+               anchorParam.numAnchorsPerCell = static_cast<int>(json_object_get_int_member(object, "num_anchors"));
+
+               anchorParam.offsetAnchors = static_cast<int>(json_object_get_int_member(object, "offset_anchors"));
+               JsonArray *xScales = json_object_get_array_member(object, "x_scales");
+               JsonArray *yScales = json_object_get_array_member(object, "y_scales");
+               unsigned int xElements2 = json_array_get_length(xScales);
+               unsigned int yElements2 = json_array_get_length(yScales);
+               if (xElements2 != yElements2) {
+                       LOGE("Invalid x and y scales. They should be the same size");
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
+               for (unsigned int elem2 = 0; elem2 < xElements2; ++elem2) {
+                       JsonArray *xArray = json_array_get_array_element(xScales, elem2);
+                       JsonArray *yArray = json_array_get_array_element(yScales, elem2);
+                       unsigned int xArrayElements2 = json_array_get_length(xArray);
+                       unsigned int yArrayElements2 = json_array_get_length(yArray);
+                       if (xArrayElements2 != yArrayElements2) {
+                               LOGE("Invalid x and y scales. They should be the same size");
+                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+                       }
+                       std::vector<double> xScale_;
+                       std::vector<double> yScale_;
+                       for (unsigned int arrayElem2 = 0; arrayElem2 < xArrayElements2; ++arrayElem2) {
+                               auto xScale = static_cast<double>(json_array_get_double_element(xArray, arrayElem2));
+                               auto yScale = static_cast<double>(json_array_get_double_element(yArray, arrayElem2));
+                               LOGI("xScale:%lf, yScale:%lf", xScale, yScale);
+                               xScale_.push_back(xScale);
+                               yScale_.push_back(yScale);
+                       }
+                       anchorParam.vxScales.push_back(xScale_);
+                       anchorParam.vyScales.push_back(yScale_);
+               }
+       } else {
+               LOGE("Invalid anchor mode [%d]", anchorParam.mode);
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
        anchorParam.inputSizeHeight = static_cast<int>(json_object_get_int_member(object, "input_size_height"));
        anchorParam.inputSizeWidth = static_cast<int>(json_object_get_int_member(object, "input_size_width"));
        anchorParam.anchorOffsetX = static_cast<float>(json_object_get_double_member(object, "anchor_offset_x"));
        anchorParam.anchorOffsetY = static_cast<float>(json_object_get_double_member(object, "anchor_offset_y"));
-       anchorParam.isReduceBoxedInLowestLayer =
-                       static_cast<bool>(json_object_get_boolean_member(object, "reduce_boxed_in_lowest_layer"));
-       anchorParam.interpolatedScaleAspectRatio =
-                       static_cast<float>(json_object_get_double_member(object, "interpolated_scale_aspect_ratio"));
-       anchorParam.isFixedAnchorSize = static_cast<bool>(json_object_get_boolean_member(object, "fixed_anchor_size"));
-       anchorParam.isExponentialBoxScale =
-                       static_cast<bool>(json_object_get_boolean_member(object, "exponential_box_scale"));
-
-       anchorParam.xScale = static_cast<float>(json_object_get_double_member(object, "x_scale"));
-       anchorParam.yScale = static_cast<float>(json_object_get_double_member(object, "y_scale"));
-       anchorParam.wScale = static_cast<float>(json_object_get_double_member(object, "w_scale"));
-       anchorParam.hScale = static_cast<float>(json_object_get_double_member(object, "h_scale"));
 
        JsonArray *array = json_object_get_array_member(object, "strides");
        unsigned int elements2 = json_array_get_length(array);
@@ -73,14 +124,6 @@ int DecodeInfo::ParseAnchorParam(JsonObject *root)
                LOGI("stride: %d", stride);
        }
 
-       array = json_object_get_array_member(object, "aspect_ratios");
-       elements2 = json_array_get_length(array);
-       for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-               auto aspectRatio = static_cast<float>(json_array_get_double_element(array, elem2));
-               anchorParam.aspectRatios.push_back(aspectRatio);
-               LOGI("aspectRatio: %.4f", aspectRatio);
-       }
-
        return MEDIA_VISION_ERROR_NONE;
 }
 
index 3eb039b..3132a31 100644 (file)
@@ -24,6 +24,9 @@
 #define OD_TFLITE_WEIGHT_MOBILENET_V2_SSD_320_PATH \
        MV_CONFIG_PATH                                 \
        "/models/OD/tflite/od_mobilenet_v2_ssd_320x320.tflite"
+#define OD_SNPE_WEIGHT_QC_YOLO_V5_MULTIANCHOR_PATH \
+       MV_CONFIG_PATH                                 \
+       "/models/OD/snpe/yolov5s_quantize.dlc"
 
 void _object_detected_cb(mv_source_h source, const int number_of_objects, const int *indices, const char **names,
                                                 const float *confidences, const mv_rectangle_s *locations, void *user_data)
@@ -122,5 +125,15 @@ TEST_P(TestObjectDetectionSnpe, DISABLED_EFDLite2QC)
        inferenceDog();
 }
 
+TEST_P(TestObjectDetectionSnpe, DISABLED_YoloV5MultiAnchor)
+
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_snpe_model(engine_cfg, OD_SNPE_WEIGHT_QC_YOLO_V5_MULTIANCHOR_PATH,
+                                                                       OD_LABEL_EFFICIENTDET_LITE2_448_PATH, _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
 INSTANTIATE_TEST_CASE_P(Prefix, TestObjectDetectionSnpe,
                                                ::testing::Values(ParamTypes(true, MV_INFERENCE_TARGET_DEVICE_CUSTOM)));
\ No newline at end of file