test: Add hand gesture model test for ARMNN
authorInki Dae <inki.dae@samsung.com>
Fri, 29 May 2020 05:51:39 +0000 (14:51 +0900)
committerInki Dae <inki.dae@samsung.com>
Fri, 29 May 2020 05:51:39 +0000 (14:51 +0900)
Change-Id: Id0e5adcbaf952ecda44ef300957fd8a3d416b4ac
Signed-off-by: Inki Dae <inki.dae@samsung.com>
test/testsuites/inference/inference_test_suite.c

index 4c2885f7afd44cde9708b57a3867f9a4e0f86698..ce505cd63fd618bc0b2647630abf64056b504408 100644 (file)
@@ -52,6 +52,9 @@
 //Face Detection
 #define FD_TFLITE_WEIGHT_PATH "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite"
 
+//Hand Detection
+#define HD_TFLITE_WEIGHT_PATH "/usr/share/capi-media-vision/models/HD/tflite/hd_tflite_model.tflite"
+
 #define FD_OPENCV_WEIGHT_CAFFE_PATH "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.caffemodel"
 #define FD_OPENCV_CONFIG_CAFFE_PATH "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt"
 
@@ -163,6 +166,25 @@ void _pose_estimation_detected_cb (
     }
 }
 
+void _hand_detected_cb (
+        mv_source_h source,
+        const int number_of_hands,
+        const float *confidences,
+        const mv_rectangle_s *locations,
+        void *user_data)
+{
+    printf("In callback: %d hands\n", number_of_hands);
+
+    for (int n = 0; n < number_of_hands; n++) {
+        printf("%.3f\n", confidences[n]);
+        printf("%d,%d,%d,%d\n", locations[n].point.x,
+                                locations[n].point.y,
+                                locations[n].width,
+                                locations[n].height);
+    }
+
+}
+
 void _image_classified_cb (
            mv_source_h source,
            const int number_of_classes,
@@ -2594,21 +2616,267 @@ int perform_pose_estimation_detection()
     return MEDIA_VISION_ERROR_NONE;
 }
 
+int perform_armnn_hand_detection(mv_engine_config_h *engine_cfg)
+{
+    int err = MEDIA_VISION_ERROR_NONE;
+
+    mv_engine_config_h handle = NULL;
+    err = mv_create_engine_config(&handle);
+    if (err != MEDIA_VISION_ERROR_NONE) {
+        printf("Fail to create engine configuration handle.\n");
+        if (handle) {
+            int err2 = mv_destroy_engine_config(handle);
+            if (err2 != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to destroy engine cofniguration.\n");
+            }
+        }
+        return err;
+    }
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"};
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        HD_TFLITE_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_ARMNN);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        224);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        224);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(handle,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    *engine_cfg = handle;
+    return err;
+}
+
+int perform_hand_detection()
+{
+    int err = MEDIA_VISION_ERROR_NONE;
+
+    int sel_opt = 0;
+    const int options[5] = {1, 2, 3, 4, 5};
+    const *names[5] = { "Configuration",
+                        "ARMNN(CPU) + HandDetection",
+                        "Prepare",
+                        "Run",
+                        "Back"};
+
+    mv_engine_config_h engine_cfg = NULL;
+    mv_inference_h infer = NULL;
+    mv_source_h mvSource = NULL;
+
+    while(sel_opt == 0) {
+        sel_opt = show_menu("Select Action:", options, names, 5);
+        switch (sel_opt) {
+        case 1:
+        {
+            //perform configuration
+            if (engine_cfg) {
+                int err2 = mv_destroy_engine_config(engine_cfg);
+                if (err2 != MEDIA_VISION_ERROR_NONE)
+                    printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+            }
+
+            err = perform_configuration(&engine_cfg);
+        }
+            break;
+        case 2:
+        {
+            //perform pose estimation config
+            if (engine_cfg) {
+                int err2 = mv_destroy_engine_config(engine_cfg);
+                if (err2 != MEDIA_VISION_ERROR_NONE)
+                    printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+            }
+            err = perform_armnn_hand_detection(&engine_cfg);
+        }
+            break;
+        case 3:
+        {
+            // create - configure - prepare
+            if (infer) {
+                int err2 = mv_inference_destroy(infer);
+                if (err2 != MEDIA_VISION_ERROR_NONE) {
+                    printf("Fail to destroy inference handle [err:%i]\n", err2);
+                }
+            }
+
+            // inference
+            // create handle
+            err = mv_inference_create(&infer);
+            if (err != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to create inference handle [err:%i]\n", err);
+                break;
+            }
+
+            //configure
+            err = mv_inference_configure(infer, engine_cfg);
+            if (err != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to configure inference handle [err:%i]\n", err);
+                break;
+            }
+
+            //prepare
+            err = mv_inference_prepare(infer);
+            if (err != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to prepare inference handle");
+                break;
+            }
+        }
+            break;
+        case 4:
+        {
+            if (mvSource) {
+                int err2 = mv_destroy_source(mvSource);
+                if (err2 != MEDIA_VISION_ERROR_NONE);
+                    printf("Fail to destroy mvSource\n");
+            }
+
+            char *in_file_name = NULL;
+            /* Load media source */
+            while (input_string("Input file name to be inferred:", 1024, &(in_file_name)) == -1)
+                printf("Incorrect input! Try again.\n");
+
+            err = mv_create_source(&mvSource);
+            if (err != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to create mvSource.\n");
+                free(in_file_name);
+                break;
+            }
+
+            err = load_mv_source_from_file(in_file_name, mvSource);
+            if (err != MEDIA_VISION_ERROR_NONE) {
+                int err2 = mv_destroy_source(mvSource);
+                if (err2 != MEDIA_VISION_ERROR_NONE) {
+                    printf("Fail to destroy mvSource.\n", err2);
+                }
+                free(in_file_name);
+                break;
+            }
+            free(in_file_name);
+
+            struct timespec s_tspec;
+            struct timespec e_tspec;
+
+            clock_gettime(CLOCK_MONOTONIC, &s_tspec);
+
+            // Hand detection
+                       err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, NULL);
+
+            clock_gettime(CLOCK_MONOTONIC, &e_tspec);
+
+            struct timespec diffspec = diff(s_tspec, e_tspec);
+            unsigned long timeDiff = gettotalmillisec(diffspec);
+            printf("elased time : %lu(ms)\n", timeDiff);
+
+            break;
+        }
+        case 5:
+        {
+            //perform destroy
+            if (engine_cfg) {
+                err = mv_destroy_engine_config(engine_cfg);
+                if (err != MEDIA_VISION_ERROR_NONE) {
+                    printf("Fail to destroy engine_cfg [err:%i]\n", err);
+                }
+            }
+
+            if (infer) {
+                err = mv_inference_destroy(infer);
+                if (err != MEDIA_VISION_ERROR_NONE) {
+                    printf("Fail to destroy inference handle [err:%i]\n", err);
+                }
+            }
+        }
+            break;
+        default:
+            printf("Invalid option.\n");
+            sel_opt = 0;
+            continue;
+        }
+
+        int do_another = 0;
+        if (err != MEDIA_VISION_ERROR_NONE) {
+            printf("ERROR: Action is finished with error code: %i\n");
+        }
+
+        sel_opt = 0;
+        const int options_last[2] = {1, 2};
+        const char *names_last[2] = { "Yes", "No" };
+
+        while (sel_opt == 0) {
+            sel_opt = show_menu("Run Hand Detection again?:", options_last, names_last, 2);
+            switch(sel_opt) {
+            case 1:
+                do_another = 1;
+                break;
+            case 2:
+                do_another = 0;
+                break;
+            default:
+                printf("Invalid option.\n");
+                sel_opt = 0;
+            }
+        }
+
+        sel_opt = (do_another == 1) ? 0 : 1;
+    }
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
 int main()
 {
     int sel_opt = 0;
 
-    const int options[6] = {1, 2, 3, 4, 5, 6};
-    const char *names[6] = { "Image Classification",
+    const int options[7] = {1, 2, 3, 4, 5, 6, 7};
+    const char *names[7] = { "Image Classification",
                              "Object Detection",
                              "Face Detection",
                              "Facial LandmarkDetection",
                              "Pose Estimation",
+                                                        "Hand Detection",
                              "Exit"};
 
     int err = MEDIA_VISION_ERROR_NONE;
     while (sel_opt == 0) {
-        sel_opt = show_menu("Select Action:", options, names, 5);
+        sel_opt = show_menu("Select Action:", options, names, 6);
         switch (sel_opt) {
         case 1:
         {
@@ -2651,6 +2919,14 @@ int main()
             break;
         }
         case 6:
+        {
+            err = perform_hand_detection();
+            if (err != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to perform hand detection");
+            }
+            break;
+        }
+        case 7:
         {
             printf("Exit");
         }