inference testsuite: add opencv with caffe model
authorTae-Young Chung <ty83.chung@samsung.com>
Fri, 3 Apr 2020 03:05:54 +0000 (12:05 +0900)
committerInki Dae <inki.dae@samsung.com>
Tue, 14 Apr 2020 00:42:19 +0000 (09:42 +0900)
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
mv_inference/inference/src/Inference.cpp
test/testsuites/inference/inference_test_suite.c

index 3585300eb08f556794cd4f6b4e8c88cf87d4bfc2..375852ce3dfd1d95c8ce3c8d6bb89f945ef4961d 100755 (executable)
@@ -1048,9 +1048,6 @@ int Inference::GetObjectDetectionResults(ObjectDetectionResults *detectionResult
                cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
                cv::hconcat(cvBoxElems, 4, cvBoxes);
 
-               LOGI("cvBoxes size: %d x %d", cvBoxes.size[0], cvBoxes.size[1]);
-               LOGI("0: %f, %f, %f, %f", cvBoxes.at<float>(0,0), cvBoxes.at<float>(0,1), cvBoxes.at<float>(0,2),cvBoxes.at<float>(0,3));
-
                // classes
                cvClasses = cvOutputData.col(1).clone();
 
@@ -1133,22 +1130,21 @@ int Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults)
 
        cv::Mat cvScores, cvClasses, cvBoxes;
        if (outputData.dimInfo.size() == 1) {
-
                // there is no way to know how many objects are detect unless the number of objects aren't
                // provided. In the case, each backend should provide the number of results manually.
                // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are
-               // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7 
-               // indicats the image id. But it is useless if a batch mode isn't supported. 
+               // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7
+               // indicats the image id. But it is useless if a batch mode isn't supported.
                // So, use the 1st of 7.
 
                number_of_detections = (int)(*reinterpret_cast<float*>(outputData.data[0]));
                cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], CV_32F, outputData.data[0]);
 
                // boxes
-               cv::Mat cvTop = cvOutputData.col(6).clone();
                cv::Mat cvLeft = cvOutputData.col(3).clone();
-               cv::Mat cvBottom = cvOutputData.col(4).clone();
+               cv::Mat cvTop = cvOutputData.col(4).clone();
                cv::Mat cvRight = cvOutputData.col(5).clone();
+               cv::Mat cvBottom = cvOutputData.col(6).clone();
 
                cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
                cv::hconcat(cvBoxElems, 4, cvBoxes);
index d984e38ab434a1d2c425e0f407809233cd6bc492..4c2885f7afd44cde9708b57a3867f9a4e0f86698 100644 (file)
 //Face Detection
 #define FD_TFLITE_WEIGHT_PATH "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite"
 
+#define FD_OPENCV_WEIGHT_CAFFE_PATH "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.caffemodel"
+#define FD_OPENCV_CONFIG_CAFFE_PATH "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt"
+
 //Facila LandmarkDetection
 #define FLD_TFLITE_WEIGHT_PATH "/usr/share/capi-media-vision/models/FLD/tflite/fld_tflite_model1.tflite"
 
+#define FLD_OPENCV_WEIGHT_CAFFE_PATH "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.caffemodel"
+#define FLD_OPENCV_CONFIG_CAFFE_PATH "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt"
+
 //Pose Estimation
 #define PE_TFLITE_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite"
 
@@ -767,7 +773,7 @@ int perform_opencv_caffe_squeezenet_config(mv_engine_config_h *engine_cfg)
 
     mv_engine_config_set_double_attribute(handle,
                         MV_INFERENCE_CONFIDENCE_THRESHOLD,
-                        0.6);
+                        0.3);
 
     mv_engine_config_set_int_attribute(handle,
                         MV_INFERENCE_BACKEND_TYPE,
@@ -1597,6 +1603,83 @@ int perform_tflite_mobilenetv1ssd_face(mv_engine_config_h *engine_cfg)
     return err;
 }
 
+int perform_opencv_resnet10ssd_face(mv_engine_config_h *engine_cfg)
+{
+    int err = MEDIA_VISION_ERROR_NONE;
+
+    mv_engine_config_h handle = NULL;
+    err = mv_create_engine_config(&handle);
+    if (err != MEDIA_VISION_ERROR_NONE) {
+        printf("Fail to create engine configuration handle.\n");
+        if (handle) {
+            int err2 = mv_destroy_engine_config(handle);
+            if (err2 != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to destroy engine cofniguration.\n");
+            }
+        }
+        return err;
+    }
+
+    char *inputNodeName = "data";
+    char *outputNodeName[1] = {"detection_out"};
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        FD_OPENCV_WEIGHT_CAFFE_PATH);
+
+       mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH,
+                        FD_OPENCV_CONFIG_CAFFE_PATH);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        135.7);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_CONFIDENCE_THRESHOLD,
+                        0.3);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_OPENCV);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_TARGET_TYPE,
+                        MV_INFERENCE_TARGET_CPU);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        300);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        300);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(handle,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeName,
+                        1);
+
+    *engine_cfg = handle;
+    return err;
+}
+
 int perform_armnn_mobilenetv1ssd_face(mv_engine_config_h *engine_cfg)
 {
     int err = MEDIA_VISION_ERROR_NONE;
@@ -1678,9 +1761,10 @@ int perform_face_detection()
     int err = MEDIA_VISION_ERROR_NONE;
 
     int sel_opt = 0;
-    const int options[5] = {1, 2, 3, 4, 5, 6};
-    const *names[5] = { "Configuration",
+    const int options[7] = {1, 2, 3, 4, 5, 6, 7};
+    const char *names[7] = { "Configuration",
                         "TFLite(CPU) + MobileNetV1 + SSD",
+                        "OPENCV(CPU) + Resnet10 + SSD",
                         "ARMNN(CPU) + MobileNetV1 + SSD",
                         "Prepare",
                         "Run",
@@ -1691,7 +1775,7 @@ int perform_face_detection()
     mv_source_h mvSource = NULL;
 
     while(sel_opt == 0) {
-        sel_opt = show_menu("Select Action:", options, names, 5);
+        sel_opt = show_menu("Select Action:", options, names, 7);
         switch (sel_opt) {
         case 1:
         {
@@ -1728,10 +1812,22 @@ int perform_face_detection()
                     printf("Fail to destroy engine_cfg [err:%i]\n", err2);
             }
 
-            err = perform_armnn_mobilenetv1ssd_face(&engine_cfg);
+            err = perform_opencv_resnet10ssd_face(&engine_cfg);
         }
             break;
         case 4:
+        {
+            //perform TF Lite Mobilenetssd config
+            if (engine_cfg) {
+                int err2 = mv_destroy_engine_config(engine_cfg);
+                if (err2 != MEDIA_VISION_ERROR_NONE)
+                    printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+            }
+
+            err = perform_armnn_mobilenetv1ssd_face(&engine_cfg);
+        }
+            break;
+        case 5:
         {
             // create - configure - prepare
             if (infer) {
@@ -1764,7 +1860,7 @@ int perform_face_detection()
             }
         }
             break;
-        case 5:
+        case 6:
         {
             if (mvSource) {
                 int err2 = mv_destroy_source(mvSource);
@@ -1811,7 +1907,7 @@ int perform_face_detection()
             printf("elased time : %lu(ms)\n", timeDiff);
         }
             break;
-        case 6:
+        case 7:
         {
             //perform destroy
             if (engine_cfg) {
@@ -1962,14 +2058,88 @@ int perform_tflite_TweakCNN(mv_engine_config_h *engine_cfg)
     return err;
 }
 
+int perform_opencv_cnncascade(mv_engine_config_h *engine_cfg)
+{
+    int err = MEDIA_VISION_ERROR_NONE;
+
+    mv_engine_config_h handle = NULL;
+    err = mv_create_engine_config(&handle);
+    if (err != MEDIA_VISION_ERROR_NONE) {
+        printf("Fail to create engine configuration handle.\n");
+        if (handle) {
+            int err2 = mv_destroy_engine_config(handle);
+            if (err2 != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to destroy engine cofniguration.\n");
+            }
+        }
+        return err;
+    }
+
+    char *inputNodeName = "data";
+    char *outputNodeName[1] = {"Sigmoid_fc2"};
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        FLD_OPENCV_WEIGHT_CAFFE_PATH);
+
+       mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH,
+                        FLD_OPENCV_CONFIG_CAFFE_PATH);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        127.5);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        127.5);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_OPENCV);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_TARGET_TYPE,
+                        MV_INFERENCE_TARGET_CPU);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        128);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        128);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(handle,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeName,
+                        1);
+
+    *engine_cfg = handle;
+    return err;
+}
+
 int perform_facial_landmark_detection()
 {
     int err = MEDIA_VISION_ERROR_NONE;
 
     int sel_opt = 0;
-    const int options[5] = {1, 2, 3, 4, 5};
-    const *names[5] = { "Configuration",
-                        "tflite(CPU) + TweakCNN",
+    const int options[6] = {1, 2, 3, 4, 5, 6};
+    const *names[6] = { "Configuration",
+                        "Tflite(CPU) + TweakCNN",
+                        "OPENCV(CPU) + TweakCNN",
                         "Prepare",
                         "Run",
                         "Back"};
@@ -1979,7 +2149,7 @@ int perform_facial_landmark_detection()
     mv_source_h mvSource = NULL;
 
     while(sel_opt == 0) {
-        sel_opt = show_menu("Select Action:", options, names, 5);
+        sel_opt = show_menu("Select Action:", options, names, 6);
         switch (sel_opt) {
         case 1:
         {
@@ -2007,6 +2177,17 @@ int perform_facial_landmark_detection()
         }
             break;
         case 3:
+        {
+            //perform CNN cascade
+            if (engine_cfg) {
+                int err2 = mv_destroy_engine_config(engine_cfg);
+                if (err2 != MEDIA_VISION_ERROR_NONE)
+                    printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+            }
+            err = perform_opencv_cnncascade(&engine_cfg);
+        }
+            break;
+        case 4:
         {
             // create - configure - prepare
             if (infer) {
@@ -2039,7 +2220,7 @@ int perform_facial_landmark_detection()
             }
         }
             break;
-        case 4:
+        case 5:
         {
             if (mvSource) {
                 int err2 = mv_destroy_source(mvSource);
@@ -2086,7 +2267,7 @@ int perform_facial_landmark_detection()
             printf("elased time : %lu(ms)\n", timeDiff);
         }
             break;
-        case 5:
+        case 6:
         {
             //perform destroy
             if (engine_cfg) {