[WIP #4] add hand models

author Tae-Young Chung <ty83.chung@samsung.com>

Tue, 14 Jul 2020 01:19:25 +0000 (10:19 +0900)

committer Tae-Young Chung <ty83.chung@samsung.com>

Tue, 14 Jul 2020 01:19:25 +0000 (10:19 +0900)
author Tae-Young Chung <ty83.chung@samsung.com>
Tue, 14 Jul 2020 01:19:25 +0000 (10:19 +0900)
committer Tae-Young Chung <ty83.chung@samsung.com>
Tue, 14 Jul 2020 01:19:25 +0000 (10:19 +0900)
diff --git a/test/testsuites/inference/inference_test_suite.c b/test/testsuites/inference/inference_test_suite.c

index 7a63c5f92ec468b630c6b26c273206549418ede5..af11b9ea5ed88c730bbe9ece48849fb8ff381cbb 100644 (file)
--- a/test/testsuites/inference/inference_test_suite.c
+++ b/test/testsuites/inference/inference_test_suite.c
@@ -110,10 +110,16 @@
         "/usr/share/capi-media-vision/models/PLD/mocap/example-mocap-mapping.txt"
  
  //Hand Pose Detection
-#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1.tflite"
-#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1.tflite"
-//#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_tf113_int32.tflite"
-//#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1_tf113.tflite"
+#define PE_TFLITE_AICLite_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_lite_224.tflite"
+#define PE_TFLITE_AICLite_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_lite_224.tflite"
+
+#define PE_TFLITE_AICLite2_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_0709_f.tflite"
+#define PE_TFLITE_AICLite2_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_0709_f.tflite"
+
+#define PE_TFLITE_AICLite2Q_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_0709_dq.tflite"
+#define PE_TFLITE_AICLite2Q_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_0709_f.tflite"
+
+int gHandModelOption = 0;
  
  /******
   * Public model:
@@ -236,7 +242,145 @@ void _pose_landmark_detected_cb(mv_source_h source,
         return;
  }
  
-int perform_tflite_hand_detection2(mv_engine_config_h *engine_cfg)
+int perform_tflite_AICLite_hand_detection2(mv_engine_config_h *engine_cfg)
+{
+    int err = MEDIA_VISION_ERROR_NONE;
+
+    mv_engine_config_h handle = NULL;
+    err = mv_create_engine_config(&handle);
+    if (err != MEDIA_VISION_ERROR_NONE) {
+        printf("Fail to create engine configuration handle.\n");
+        if (handle) {
+            int err2 = mv_destroy_engine_config(handle);
+            if (err2 != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to destroy engine cofniguration.\n");
+            }
+        }
+        return err;
+    }
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"};
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AICLite_2_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        56);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        56);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        21);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(handle,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    *engine_cfg = handle;
+    return err;
+}
+
+int perform_tflite_AICLite2_hand_detection2(mv_engine_config_h *engine_cfg)
+{
+    int err = MEDIA_VISION_ERROR_NONE;
+
+    mv_engine_config_h handle = NULL;
+    err = mv_create_engine_config(&handle);
+    if (err != MEDIA_VISION_ERROR_NONE) {
+        printf("Fail to create engine configuration handle.\n");
+        if (handle) {
+            int err2 = mv_destroy_engine_config(handle);
+            if (err2 != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to destroy engine cofniguration.\n");
+            }
+        }
+        return err;
+    }
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"};
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AICLite2_2_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        56);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        56);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        21);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(handle,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    *engine_cfg = handle;
+    return err;
+}
+
+int perform_tflite_AICLite2Q_hand_detection2(mv_engine_config_h *engine_cfg)
  {
      int err = MEDIA_VISION_ERROR_NONE;
  
@@ -258,7 +402,7 @@ int perform_tflite_hand_detection2(mv_engine_config_h *engine_cfg)
  
      mv_engine_config_set_string_attribute(handle,
                          MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-                        PE_TFLITE_AIC_2_WEIGHT_PATH);
+                        PE_TFLITE_AICLite2Q_2_WEIGHT_PATH);
  
         mv_engine_config_set_int_attribute(handle,
                          MV_INFERENCE_INPUT_DATA_TYPE,
@@ -308,13 +452,13 @@ int perform_tflite_hand_detection2(mv_engine_config_h *engine_cfg)
  void _hand_pose_cb (
          mv_source_h source,
          const int number_of_pose_estimation,
-        const mv_point_s *locations,
+        const mv_inference_pose_s *locations,
          void *user_data)
  {
      printf("In callback, %d pose estimation\n", number_of_pose_estimation);
      if (!user_data) {
          for (int n = 0; n < number_of_pose_estimation; n++) {
-            printf("%d: x[%d], y[%d]\n", n, locations[n].x, locations[n].y);
+            printf("%d: x[%d], y[%d]\n", n, locations->landmarks[n].point.x, locations->landmarks[n].point.y);
          }
      } else {
          printf("%p\n", user_data);
@@ -350,31 +494,34 @@ void _hand_detected_cb (
          mv_engine_config_h engine_cfg2;
          mv_create_engine_config(&engine_cfg2);
  
-        perform_tflite_hand_detection2(&engine_cfg2);
+               if (gHandModelOption == 2) {
+                       perform_tflite_AICLite_hand_detection2(&engine_cfg2);
+               } else if (gHandModelOption == 3) {
+                       perform_tflite_AICLite2_hand_detection2(&engine_cfg2);
+               } else if (gHandModelOption == 4) {
+                       perform_tflite_AICLite2Q_hand_detection2(&engine_cfg2);
+               } else {
+                       printf("Invalid operation\n");
+               }
  
          mv_inference_h infer2;
          int err = mv_inference_create(&infer2);
          if (err != MEDIA_VISION_ERROR_NONE) {
              printf("Fail to create inference handle [err:%i]\n", err);
          }
-        printf("infer2 created\n");
  
          //configure
          err = mv_inference_configure(infer2, engine_cfg2);
          if (err != MEDIA_VISION_ERROR_NONE) {
              printf("Fail to configure inference handle [err:%i]\n", err);
          }
-        printf("engine_cfg2 configured\n");
  
          //prepare
          err = mv_inference_prepare(infer2);
-        printf("infer2 prepared\n");
  
          err = mv_inference_pose_landmark_detect(source2, infer2, NULL, _hand_pose_cb, (&source));
-        printf("pose estimated\n");
  
          mv_destroy_source(source2);
-        printf("destroy source2");
  
          mv_inference_destroy(infer2);
          mv_destroy_engine_config(engine_cfg2);
@@ -2753,7 +2900,77 @@ int perform_pose_landmark_detection()
  }
  
  
-int perform_tflite_hand_detection(mv_engine_config_h *engine_cfg)
+int perform_tflite_AICLite_hand_detection(mv_engine_config_h *engine_cfg)
+{
+    int err = MEDIA_VISION_ERROR_NONE;
+
+    mv_engine_config_h handle = NULL;
+    err = mv_create_engine_config(&handle);
+    if (err != MEDIA_VISION_ERROR_NONE) {
+        printf("Fail to create engine configuration handle.\n");
+        if (handle) {
+            int err2 = mv_destroy_engine_config(handle);
+            if (err2 != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to destroy engine cofniguration.\n");
+            }
+        }
+        return err;
+    }
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"};
+
+    outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AICLite_1_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        224);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        224);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(handle,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    *engine_cfg = handle;
+    return err;
+}
+
+int perform_tflite_AICLite2_hand_detection(mv_engine_config_h *engine_cfg)
  {
      int err = MEDIA_VISION_ERROR_NONE;
  
@@ -2776,7 +2993,7 @@ int perform_tflite_hand_detection(mv_engine_config_h *engine_cfg)
      outputTensorData = (void*)calloc(56*56*21, sizeof(float));
      mv_engine_config_set_string_attribute(handle,
                          MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-                        PE_TFLITE_AIC_1_WEIGHT_PATH);
+                        PE_TFLITE_AICLite2_1_WEIGHT_PATH);
  
         mv_engine_config_set_int_attribute(handle,
                          MV_INFERENCE_INPUT_DATA_TYPE,
@@ -2823,14 +3040,86 @@ int perform_tflite_hand_detection(mv_engine_config_h *engine_cfg)
      return err;
  }
  
+int perform_tflite_AICLite2Q_hand_detection(mv_engine_config_h *engine_cfg)
+{
+    int err = MEDIA_VISION_ERROR_NONE;
+
+    mv_engine_config_h handle = NULL;
+    err = mv_create_engine_config(&handle);
+    if (err != MEDIA_VISION_ERROR_NONE) {
+        printf("Fail to create engine configuration handle.\n");
+        if (handle) {
+            int err2 = mv_destroy_engine_config(handle);
+            if (err2 != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to destroy engine cofniguration.\n");
+            }
+        }
+        return err;
+    }
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"};
+
+    outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AICLite2Q_1_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_UINT8);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        224);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        224);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(handle,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    *engine_cfg = handle;
+    return err;
+}
+
  int perform_hand_detection()
  {
      int err = MEDIA_VISION_ERROR_NONE;
  
      int sel_opt = 0;
-    const int options[] = {1, 2, 3, 4, 5};
+    const int options[] = {1, 2, 3, 4, 5, 6, 7};
      const *names[] = { "Configuration",
-                                               "TFLITE(CPU) + HandDetection",
+                                               "TFLITE(CPU) + AIC-Lite",
+                                               "TFLITE(CPU) + AIC-Lite2",
+                                               "TFLITE(CPU) + AIC-Lite2Q",
                          "Prepare",
                          "Run",
                          "Back"};
@@ -2862,10 +3151,35 @@ int perform_hand_detection()
                  if (err2 != MEDIA_VISION_ERROR_NONE)
                      printf("Fail to destroy engine_cfg [err:%i]\n", err2);
              }
-            err = perform_tflite_hand_detection(&engine_cfg);
+            err = perform_tflite_AICLite_hand_detection(&engine_cfg);
+                       gHandModelOption = 2;
          }
              break;
          case 3:
+        {
+            //perform pose estimation config
+            if (engine_cfg) {
+                int err2 = mv_destroy_engine_config(engine_cfg);
+                if (err2 != MEDIA_VISION_ERROR_NONE)
+                    printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+            }
+            err = perform_tflite_AICLite2_hand_detection(&engine_cfg);
+                       gHandModelOption = 3;
+        }
+            break;
+        case 4:
+        {
+            //perform pose estimation config
+            if (engine_cfg) {
+                int err2 = mv_destroy_engine_config(engine_cfg);
+                if (err2 != MEDIA_VISION_ERROR_NONE)
+                    printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+            }
+            err = perform_tflite_AICLite2Q_hand_detection(&engine_cfg);
+                       gHandModelOption = 4;
+        }
+            break;
+        case 5:
          {
              // create - configure - prepare
              if (infer) {
@@ -2898,7 +3212,7 @@ int perform_hand_detection()
              }
          }
              break;
-        case 4:
+        case 6:
          {
              if (mvSource) {
                  int err2 = mv_destroy_source(mvSource);
@@ -2935,10 +3249,7 @@ int perform_hand_detection()
              clock_gettime(CLOCK_MONOTONIC, &s_tspec);
  
              // Hand detection
-                       //err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, NULL);
-
              printf("mem: %p\n", outputTensorData);
-            //err = mv_inference_pose_estimation_detect(mvSource, infer, NULL, _hand_pose_cb, outputTensorData);
              err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, outputTensorData);
  
              clock_gettime(CLOCK_MONOTONIC, &e_tspec);
@@ -2949,7 +3260,7 @@ int perform_hand_detection()
  
              break;
          }
-        case 5:
+        case 7:
          {
              //perform destroy
              if (engine_cfg) {
author	Tae-Young Chung <ty83.chung@samsung.com>
	Tue, 14 Jul 2020 01:19:25 +0000 (10:19 +0900)
committer	Tae-Young Chung <ty83.chung@samsung.com>
	Tue, 14 Jul 2020 01:19:25 +0000 (10:19 +0900)