Add DLDT(Openvino) backend support 78/244078/1
authorKwang Son <k.son@samsung.com>
Tue, 15 Sep 2020 01:29:36 +0000 (10:29 +0900)
committerKwang Son <k.son@samsung.com>
Tue, 15 Sep 2020 01:31:28 +0000 (10:31 +0900)
Change-Id: Ib77f33843268e00a8bbc3ea56a234a98c5b5ba89
Signed-off-by: Kwang Son <k.son@samsung.com>
include/mv_inference_type.h
mv_inference/inference/src/Inference.cpp
test/testsuites/stream_handpose_browser/stream_handpose_browser.c

index 156e99e0e9f65c487b01ae082de8998cd171b6a7..26788647bcfab1dcdd56da2812acc66e568cfa09 100644 (file)
@@ -48,6 +48,7 @@ typedef enum {
     MV_INFERENCE_BACKEND_ARMNN,     /**< ARMNN (Since 6.0) */
     MV_INFERENCE_BACKEND_MLAPI,     /**< ML Single API of NNStreamer (Since 6.0) */
     MV_INFERENCE_BACKEND_ONE,       /**< On-device Neural Engine (Since 6.0) */
+    MV_INFERENCE_BACKEND_DLDT,       /**< Openvino (Since 6.0) */
     MV_INFERENCE_BACKEND_MAX        /**< Backend MAX */
 } mv_inference_backend_type_e;
 
index 359bca34ac64204959dd9fb6fc87e99bb93a6d78..419d60be5a1c200c639d48bbd79a9a7e998eb512 100755 (executable)
@@ -92,6 +92,8 @@ namespace inference
                                MV_INFERENCE_BACKEND_MLAPI, std::make_pair("mlapi", false)));
                mSupportedInferenceBackend.insert(std::make_pair(
                                MV_INFERENCE_BACKEND_ONE, std::make_pair("mlapi", false)));
+               mSupportedInferenceBackend.insert(std::make_pair(
+                               MV_INFERENCE_BACKEND_DLDT, std::make_pair("dldt", false)));
 
                CheckSupportedInferenceBackend();
 
index f002d2d97697d9cc4105fb65a8cad098cb216ad9..0db7469ccd8e2fd4c9336085873cabeb65da12b5 100644 (file)
@@ -91,7 +91,8 @@ typedef enum {
        MODEL_TYPE_POSE_CPM = 0,
        MODEL_TYPE_POSE_HAND_AICLite, // posenet lite 224
        MODEL_TYPE_POSE_HAND_AICLite2, // posenet 0709 f
-       MODEL_TYPE_POSE_HAND_AICLite2Q // posenet 0709 q
+       MODEL_TYPE_POSE_HAND_AICLite2Q, // posenet 0709 q
+       MODEL_TYPE_POSE_HAND_DLDT // posenet 0709 q
 };
 
 typedef struct {
@@ -130,6 +131,11 @@ static efl_util_inputgen_h my_cursor = NULL;
 #define PE_TFLITE_AICLiteQ_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_0709_dq.tflite"
 #define PE_TFLITE_AICLiteQ_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_0709_f.tflite"
 
+#define PE_DLDT_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/DLDT/0831_posenet1.bin"
+#define PE_DLDT_1_XML_PATH "/usr/share/capi-media-vision/models/PE/DLDT/0831_posenet1.xml"
+#define PE_DLDT_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/DLDT/0831_posenet2.bin"
+#define PE_DLDT_2_XML_PATH "/usr/share/capi-media-vision/models/PE/DLDT/0831_posenet2.xml"
+
 static float thValNeck = 0.3f; // 15%
 static float thValArm = 0.1f; // 10 %
 static float thValLeg = 0.2f; // 5%
@@ -1351,6 +1357,120 @@ int perform_tflite_hand_detection_AICLite2(mv_engine_config_h mv_engine_cfg)
     return MEDIA_VISION_ERROR_NONE;
 }
 
+int perform_dldt_hand_detection(mv_engine_config_h mv_engine_cfg)
+{
+       if (mv_engine_cfg == NULL) {
+               printf("mv_engine_cfg is null\n");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"ban_1_6/add_4", "ban_1_6/boundingbox/Squeeze"};
+
+    //outputTensorData = (void*)calloc(56*56*21, sizeof(char));
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_DLDT_1_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_DLDT);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CUSTOM);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+
+int perform_dldt_hand_detection2(mv_engine_config_h mv_engine_cfg)
+{
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"ban_1_6/gesture/Squeeze", "ban_1_6/stack/Concat_"};
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_DLDT_2_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_DLDT);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CUSTOM);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        21);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+
 int perform_tflite_hand_detection_AICLiteQ(mv_engine_config_h mv_engine_cfg)
 {
        if (mv_engine_cfg == NULL) {
@@ -1714,6 +1834,13 @@ static int app_create(void *data)
                err = perform_tflite_hand_detection_AICLite(hp_mv_engine_cfg);
 
                err = perform_tflite_hand_detection_AICLite2(hp_mv_engine_cfg2);
+       } else if (ad->modelType == MODEL_TYPE_POSE_HAND_DLDT) {
+               outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+
+               err = perform_dldt_hand_detection(hp_mv_engine_cfg);
+
+               err = perform_dldt_hand_detection2(hp_mv_engine_cfg2);
+
        } else {
                outputTensorData = (void*)calloc(56*56*21, sizeof(float));
                err = perform_tflite_hand_detection_AICLiteQ(hp_mv_engine_cfg);
@@ -1882,6 +2009,7 @@ static int app_create(void *data)
 
        if (ad->modelType == MODEL_TYPE_POSE_HAND_AICLite ||
                ad->modelType == MODEL_TYPE_POSE_HAND_AICLite2 ||
+               ad->modelType == MODEL_TYPE_POSE_HAND_DLDT ||
                ad->modelType == MODEL_TYPE_POSE_HAND_AICLite2Q) {
                g_object_set(G_OBJECT(vsfilter), "caps", gst_caps_from_string("video/x-raw, width=224, height=224"), NULL);
                poseCropSize = 224;
@@ -2074,18 +2202,19 @@ int main (int argc, char *argv[])
 
        if (argc < 2) {
                printf("usage: mv_stream_infer model [NeckThresVal, ArmThresVal, LegThresVal, [filename]]");
-               printf("model: 0(CPM), 1(AIC Hand), 2(AIC Lite Hand), 3(AIC Lite Q Hand)\n");
+               printf("model: 0(CPM), 1(AIC Hand), 2(AIC Lite Hand), 3(AIC Lite Q Hand) 4(DLDT)\n");
                return -1;
        }
 
        ad.modelType = atoi(argv[1]);
-       if (ad.modelType < 0 || ad.modelType > 3) {
+       if (ad.modelType < 0 || ad.modelType > 4) {
                printf("not supported model type [%d]\n", ad.modelType);
                return -1;
        }
 
        if (ad.modelType != MODEL_TYPE_POSE_HAND_AICLite &&
                ad.modelType != MODEL_TYPE_POSE_HAND_AICLite2 &&
+               ad.modelType != MODEL_TYPE_POSE_HAND_DLDT &&
                ad.modelType != MODEL_TYPE_POSE_HAND_AICLite2Q) {
                thValNeck = (float)atoi(argv[2])/100.f;
                thValArm = (float)atoi(argv[3])/100.f;