From: Kwang Son <k.son@samsung.com>
Date: Tue, 15 Sep 2020 01:29:36 +0000 (+0900)
Subject: Add DLDT(Openvino) backend support
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=89b93c9dc88b34a072f9d49b7e230645ca42383c;p=platform%2Fcore%2Fapi%2Fmediavision.git

Add DLDT(Openvino) backend support

Change-Id: Ib77f33843268e00a8bbc3ea56a234a98c5b5ba89
Signed-off-by: Kwang Son <k.son@samsung.com>
---

diff --git a/include/mv_inference_type.h b/include/mv_inference_type.h
index 156e99e0..26788647 100644
--- a/include/mv_inference_type.h
+++ b/include/mv_inference_type.h
@@ -48,6 +48,7 @@ typedef enum {
     MV_INFERENCE_BACKEND_ARMNN,     /**< ARMNN (Since 6.0) */
     MV_INFERENCE_BACKEND_MLAPI,     /**< ML Single API of NNStreamer (Since 6.0) */
     MV_INFERENCE_BACKEND_ONE,       /**< On-device Neural Engine (Since 6.0) */
+    MV_INFERENCE_BACKEND_DLDT,       /**< Openvino (Since 6.0) */
     MV_INFERENCE_BACKEND_MAX        /**< Backend MAX */
 } mv_inference_backend_type_e;
 
diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp
index 359bca34..419d60be 100755
--- a/mv_inference/inference/src/Inference.cpp
+++ b/mv_inference/inference/src/Inference.cpp
@@ -92,6 +92,8 @@ namespace inference
 				MV_INFERENCE_BACKEND_MLAPI, std::make_pair("mlapi", false)));
 		mSupportedInferenceBackend.insert(std::make_pair(
 				MV_INFERENCE_BACKEND_ONE, std::make_pair("mlapi", false)));
+		mSupportedInferenceBackend.insert(std::make_pair(
+				MV_INFERENCE_BACKEND_DLDT, std::make_pair("dldt", false)));
 
 		CheckSupportedInferenceBackend();
 
diff --git a/test/testsuites/stream_handpose_browser/stream_handpose_browser.c b/test/testsuites/stream_handpose_browser/stream_handpose_browser.c
index f002d2d9..0db7469c 100644
--- a/test/testsuites/stream_handpose_browser/stream_handpose_browser.c
+++ b/test/testsuites/stream_handpose_browser/stream_handpose_browser.c
@@ -91,7 +91,8 @@ typedef enum {
 	MODEL_TYPE_POSE_CPM = 0,
 	MODEL_TYPE_POSE_HAND_AICLite, // posenet lite 224
 	MODEL_TYPE_POSE_HAND_AICLite2, // posenet 0709 f
-	MODEL_TYPE_POSE_HAND_AICLite2Q // posenet 0709 q
+	MODEL_TYPE_POSE_HAND_AICLite2Q, // posenet 0709 q
+	MODEL_TYPE_POSE_HAND_DLDT // posenet 0709 q
 };
 
 typedef struct {
@@ -130,6 +131,11 @@ static efl_util_inputgen_h my_cursor = NULL;
 #define PE_TFLITE_AICLiteQ_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_0709_dq.tflite"
 #define PE_TFLITE_AICLiteQ_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_0709_f.tflite"
 
+#define PE_DLDT_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/DLDT/0831_posenet1.bin"
+#define PE_DLDT_1_XML_PATH "/usr/share/capi-media-vision/models/PE/DLDT/0831_posenet1.xml"
+#define PE_DLDT_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/DLDT/0831_posenet2.bin"
+#define PE_DLDT_2_XML_PATH "/usr/share/capi-media-vision/models/PE/DLDT/0831_posenet2.xml"
+
 static float thValNeck = 0.3f; // 15%
 static float thValArm = 0.1f; // 10 %
 static float thValLeg = 0.2f; // 5%
@@ -1351,6 +1357,120 @@ int perform_tflite_hand_detection_AICLite2(mv_engine_config_h mv_engine_cfg)
     return MEDIA_VISION_ERROR_NONE;
 }
 
+int perform_dldt_hand_detection(mv_engine_config_h mv_engine_cfg)
+{
+	if (mv_engine_cfg == NULL) {
+		printf("mv_engine_cfg is null\n");
+		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+	}
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"ban_1_6/add_4", "ban_1_6/boundingbox/Squeeze"};
+
+    //outputTensorData = (void*)calloc(56*56*21, sizeof(char));
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_DLDT_1_WEIGHT_PATH);
+
+	mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_DLDT);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CUSTOM);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+
+int perform_dldt_hand_detection2(mv_engine_config_h mv_engine_cfg)
+{
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"ban_1_6/gesture/Squeeze", "ban_1_6/stack/Concat_"};
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_DLDT_2_WEIGHT_PATH);
+
+	mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_DLDT);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CUSTOM);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        21);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+
 int perform_tflite_hand_detection_AICLiteQ(mv_engine_config_h mv_engine_cfg)
 {
 	if (mv_engine_cfg == NULL) {
@@ -1714,6 +1834,13 @@ static int app_create(void *data)
 		err = perform_tflite_hand_detection_AICLite(hp_mv_engine_cfg);
 
 		err = perform_tflite_hand_detection_AICLite2(hp_mv_engine_cfg2);
+	} else if (ad->modelType == MODEL_TYPE_POSE_HAND_DLDT) {
+		outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+
+		err = perform_dldt_hand_detection(hp_mv_engine_cfg);
+
+		err = perform_dldt_hand_detection2(hp_mv_engine_cfg2);
+
 	} else {
 		outputTensorData = (void*)calloc(56*56*21, sizeof(float));
 		err = perform_tflite_hand_detection_AICLiteQ(hp_mv_engine_cfg);
@@ -1882,6 +2009,7 @@ static int app_create(void *data)
 
 	if (ad->modelType == MODEL_TYPE_POSE_HAND_AICLite ||
 		ad->modelType == MODEL_TYPE_POSE_HAND_AICLite2 ||
+		ad->modelType == MODEL_TYPE_POSE_HAND_DLDT ||
 		ad->modelType == MODEL_TYPE_POSE_HAND_AICLite2Q) {
 		g_object_set(G_OBJECT(vsfilter), "caps", gst_caps_from_string("video/x-raw, width=224, height=224"), NULL);
 		poseCropSize = 224;
@@ -2074,18 +2202,19 @@ int main (int argc, char *argv[])
 
 	if (argc < 2) {
 		printf("usage: mv_stream_infer model [NeckThresVal, ArmThresVal, LegThresVal, [filename]]");
-		printf("model: 0(CPM), 1(AIC Hand), 2(AIC Lite Hand), 3(AIC Lite Q Hand)\n");
+		printf("model: 0(CPM), 1(AIC Hand), 2(AIC Lite Hand), 3(AIC Lite Q Hand) 4(DLDT)\n");
 		return -1;
 	}
 
 	ad.modelType = atoi(argv[1]);
-	if (ad.modelType < 0 || ad.modelType > 3) {
+	if (ad.modelType < 0 || ad.modelType > 4) {
 		printf("not supported model type [%d]\n", ad.modelType);
 		return -1;
 	}
 
 	if (ad.modelType != MODEL_TYPE_POSE_HAND_AICLite &&
 		ad.modelType != MODEL_TYPE_POSE_HAND_AICLite2 &&
+		ad.modelType != MODEL_TYPE_POSE_HAND_DLDT &&
 		ad.modelType != MODEL_TYPE_POSE_HAND_AICLite2Q) {
 		thValNeck = (float)atoi(argv[2])/100.f;
 		thValArm = (float)atoi(argv[3])/100.f;