test: Add AIC's hand gesture model verification support 38/246638/1
authorInki Dae <inki.dae@samsung.com>
Mon, 2 Nov 2020 08:20:35 +0000 (17:20 +0900)
committerInki Dae <inki.dae@samsung.com>
Tue, 3 Nov 2020 02:00:41 +0000 (11:00 +0900)
Change-Id: I2beab7041e7b57f93f4fef22e2e7e8178470a3b2
Signed-off-by: Inki Dae <inki.dae@samsung.com>
start_profiler.sh
test/res/boundingbox.answer [new file with mode: 0644]
test/res/hand.bin [new file with mode: 0644]
test/res/heatmap.answer [new file with mode: 0644]
test/src/inference_engine_profiler.cpp
test/src/inference_engine_test_common.cpp
test/src/inference_engine_test_common.h

index d8dbbd5..5bb9253 100644 (file)
@@ -11,17 +11,28 @@ do
   /usr/bin/inference_engine_profiler --gtest_filter=Prefix/InferenceEngineTfliteTest.Inference/$i
 done
 
-# Caffe model
-CNT=7
+# Hand gesture model from AIC
+CNT=9
 
-echo "Caffe model test case count = $CNT"
+echo "Hand gesture model from AIC test case count = $CNT"
 
 LIST=$(seq 0 $CNT)
 for i in $LIST
 do
-  /usr/bin/inference_engine_profiler --gtest_filter=Prefix/InferenceEngineCaffeTest.Inference/$i
+  /usr/bin/inference_engine_profiler --gtest_filter=Prefix/InferenceEngineHandGestureTest.Inference/$i
 done
 
+# Caffe model
+#CNT=7
+
+#echo "Caffe model test case count = $CNT"
+
+#LIST=$(seq 0 $CNT)
+#for i in $LIST
+#do
+#  /usr/bin/inference_engine_profiler --gtest_filter=Prefix/InferenceEngineCaffeTest.Inference/$i
+#done
+
 # If you want to add new model tests then add script for it below
 #
 # CNT=[a number of test cases]
diff --git a/test/res/boundingbox.answer b/test/res/boundingbox.answer
new file mode 100644 (file)
index 0000000..9cdc845
Binary files /dev/null and b/test/res/boundingbox.answer differ
diff --git a/test/res/hand.bin b/test/res/hand.bin
new file mode 100644 (file)
index 0000000..91e97ea
Binary files /dev/null and b/test/res/hand.bin differ
diff --git a/test/res/heatmap.answer b/test/res/heatmap.answer
new file mode 100644 (file)
index 0000000..a4c72db
Binary files /dev/null and b/test/res/heatmap.answer differ
index 1101448..7b34a37 100644 (file)
@@ -40,6 +40,8 @@ class InferenceEngineCaffeTest : public testing::TestWithParam<ParamType_Infer>
 {};
 class InferenceEngineDldtTest : public testing::TestWithParam<ParamType_Infer>
 {};
+class InferenceEngineHandGestureTest : public testing::TestWithParam<ParamType_Infer>
+{};
 
 TEST_P(InferenceEngineTfliteTest, Inference)
 {
@@ -652,6 +654,191 @@ TEST_P(InferenceEngineDldtTest, Inference)
        models.clear();
 }
 
+TEST_P(InferenceEngineHandGestureTest, Inference)
+{
+       std::string backend_name;
+       int target_devices;
+       int test_type;
+       int iteration;
+       int tensor_type;
+       std::vector<std::string> image_paths;
+       size_t height;
+       size_t width;
+       size_t ch;
+       std::vector<std::string> input_layers;
+       std::vector<std::string> output_layers;
+       std::vector<std::string> model_paths;
+       std::vector<int> answers;
+
+       std::tie(backend_name, target_devices, test_type, iteration, tensor_type, image_paths, height, width, ch, input_layers, output_layers, model_paths, answers) = GetParam();
+
+       if (iteration < 1) {
+               iteration = 1;
+       }
+
+       MachineCapacity *Cap = GetMachineCapacity();
+       if (Cap == NULL) {
+               std::cout << "Failed to get machine capacity" << std::endl;
+               return;
+       }
+
+       // If current machine doesn't support inference engine then skip this test.
+       if (Cap->available == false) {
+               return;
+       }
+
+       // If current machine doesn't support OpenCL then skip the inference on GPU.
+       if (target_devices == INFERENCE_TARGET_GPU && Cap->has_gpu == false) {
+               return;
+       }
+
+       std::string test_name;
+       switch (test_type) {
+       case TEST_AIC_HAND_GESTURE_1:
+               test_name.append("AIC Hand Gesture detection 1");
+               break;
+       case TEST_AIC_HAND_GESTURE_2:
+               test_name.append("AIC Hand Gesture detection 2");
+               break;
+       }
+
+
+       std::cout << test_name << " inference test : backend = " << backend_name << ", target device = " << Target_Formats[target_devices] << std::endl;
+
+       int backend_type = -1;
+
+       // If backend name is "one" then change it to "mlapi"
+       // and set backend_type to INFERENCE_BACKEND_ONE.
+       if (backend_name.compare("one") == 0) {
+               backend_name = "mlapi";
+               backend_type = INFERENCE_BACKEND_ONE;
+       }
+
+       inference_engine_config config = {
+               .backend_name = backend_name,
+               .backend_type = backend_type,
+               .target_devices = target_devices
+       };
+
+       auto engine = std::make_unique<InferenceEngineCommon>();
+       if (engine == nullptr) {
+               ASSERT_TRUE(engine);
+               return;
+       }
+
+       int ret = engine->EnableProfiler(true);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       if (backend_type == INFERENCE_BACKEND_ONE)
+               backend_name = "one";
+
+       ret = engine->DumpProfileToFile("profile_data_" + backend_name +
+                                                                       "_hand_gesture_model.txt");
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       ret = engine->LoadConfigFile();
+       ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+
+       ret = engine->BindBackend(&config);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       inference_engine_capacity capacity;
+       ret = engine->GetBackendCapacity(&capacity);
+       EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+
+       ret = engine->SetTargetDevices(target_devices);
+       EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+
+       std::vector <std::string> models;
+       int model_type = GetModelInfo(model_paths, models);
+       if (model_type == -1) {
+               ASSERT_NE(model_type, -1);
+               return;
+       }
+
+       inference_engine_layer_property input_property;
+       std::vector<std::string>::iterator iter;
+
+       for (iter = input_layers.begin(); iter != input_layers.end(); iter++) {
+               inference_engine_tensor_info tensor_info = {
+                       { 1, ch, height, width },
+                       (inference_tensor_shape_type_e)INFERENCE_TENSOR_SHAPE_NCHW,
+                       (inference_tensor_data_type_e)tensor_type,
+                       (size_t)(1 * ch * height * width)
+               };
+
+               input_property.layer_names.push_back(*iter);
+               input_property.tensor_infos.push_back(tensor_info);
+    }
+
+       ret = engine->SetInputLayerProperty(input_property);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       inference_engine_layer_property output_property;
+
+       for (iter = output_layers.begin(); iter != output_layers.end(); iter++) {
+               output_property.layer_names.push_back(*iter);
+       }
+
+       ret = engine->SetOutputLayerProperty(output_property);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       ret = engine->Load(models, (inference_model_format_e)model_type);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       std::vector<inference_engine_tensor_buffer> inputs, outputs;
+       ret = PrepareTensorBuffers(engine.get(), inputs, outputs);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       // Copy input image tensor data from a given file to input tensor buffer.
+       for (int i = 0; i < (int)image_paths.size(); ++i) {
+               CopyFileToMemory(image_paths[i].c_str(), inputs[i], inputs[i].size);
+       }
+
+       for (int repeat = 0; repeat < iteration; ++repeat) {
+               ret = engine->Run(inputs, outputs);
+               EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+       }
+
+       switch (test_type) {
+       case TEST_AIC_HAND_GESTURE_1:
+               ret = VerifyAICHandGesture1Results(outputs);
+               EXPECT_EQ(ret, 1);
+               break;
+       case TEST_AIC_HAND_GESTURE_2:
+               ret = VerifyAICHandGesture2Results(outputs, answers);
+               EXPECT_EQ(ret, 1);
+               break;
+       }
+
+       CleanupTensorBuffers(inputs, outputs);
+
+       engine->UnbindBackend();
+       models.clear();
+}
+
+
 INSTANTIATE_TEST_CASE_P(
                Prefix, InferenceEngineTfliteTest,
                testing::Values(
@@ -1172,3 +1359,67 @@ INSTANTIATE_TEST_CASE_P(
                                                { "/usr/share/capi-media-vision/models/IC/dldt/googlenet-v1.xml",
                                                  "/usr/share/capi-media-vision/models/IC/dldt/googlenet-v1.bin" },
                                                { 954 })));
+
+INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineHandGestureTest,
+               testing::Values(
+                               // TFLITE
+                               ParamType_Infer("tflite", INFERENCE_TARGET_CPU, TEST_AIC_HAND_GESTURE_1, 100, INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
+                                               { "/opt/usr/images/hand.bin" }, 224, 224, 3, { "input" }, { "mobilenetv2/boundingbox", "mobilenetv2/heatmap" },
+                                               { "/usr/share/capi-media-vision/models/PE_1/tflite/posenet1_lite_224.tflite" }, { 0 }),
+                               ParamType_Infer("tflite", INFERENCE_TARGET_CPU, TEST_AIC_HAND_GESTURE_2, 100, INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
+                                               { "/opt/usr/images/hand.bin" }, 56, 56, 21, { "input" }, { "mobilenetv2/coord_refine", "mobilenetv2/gesture" },
+                                               { "/usr/share/capi-media-vision/models/PE_2/tflite/posenet2_lite_224.tflite" },
+                                               { 55, 39, 51, 40, 50, 42, 61, 43, 71, 39,
+                                                 78, 36, 82, 42, 82, 44, 83, 45, 35, 37,
+                                                 61, 36, 59, 36, 52, 39, 35, 32, 40, 34,
+                                                 62, 39, 70, 40, 58, 41, 34, 42, 34, 41,
+                                                 38, 38, 12 }),
+                               // TFLITE via MLAPI
+                               ParamType_Infer("mlapi", INFERENCE_TARGET_CPU, TEST_AIC_HAND_GESTURE_1, 100, INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
+                                               { "/opt/usr/images/hand.bin" }, 224, 224, 3, { "input" }, { "mobilenetv2/boundingbox", "mobilenetv2/heatmap" },
+                                               { "/usr/share/capi-media-vision/models/PE_1/tflite/posenet1_lite_224.tflite" }, { 0 }),
+                               ParamType_Infer("mlapi", INFERENCE_TARGET_CPU, TEST_AIC_HAND_GESTURE_2, 100, INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
+                                               { "/opt/usr/images/hand.bin" }, 56, 56, 21, { "input" }, { "mobilenetv2/coord_refine", "mobilenetv2/gesture" },
+                                               { "/usr/share/capi-media-vision/models/PE_2/tflite/posenet2_lite_224.tflite" },
+                                               { 55, 39, 51, 40, 50, 42, 61, 43, 71, 39,
+                                                 78, 36, 82, 42, 82, 44, 83, 45, 35, 37,
+                                                 61, 36, 59, 36, 52, 39, 35, 32, 40, 34,
+                                                 62, 39, 70, 40, 58, 41, 34, 42, 34, 41,
+                                                 38, 38, 12 }),
+                               // ARMNN
+                               ParamType_Infer("armnn", INFERENCE_TARGET_CPU, TEST_AIC_HAND_GESTURE_1, 100, INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
+                                               { "/opt/usr/images/hand.bin" }, 224, 224, 3, { "input" }, { "mobilenetv2/boundingbox", "mobilenetv2/heatmap" },
+                                               { "/usr/share/capi-media-vision/models/PE_1/tflite/posenet1_lite_224.tflite" }, { 0 }),
+                               ParamType_Infer("armnn", INFERENCE_TARGET_CPU, TEST_AIC_HAND_GESTURE_2, 100, INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
+                                               { "/opt/usr/images/hand.bin" }, 56, 56, 21, { "input" }, { "mobilenetv2/coord_refine", "mobilenetv2/gesture" },
+                                               { "/usr/share/capi-media-vision/models/PE_2/tflite/posenet2_lite_224.tflite" },
+                                               { 55, 39, 51, 40, 50, 42, 61, 43, 71, 39,
+                                                 78, 36, 82, 42, 82, 44, 83, 45, 35, 37,
+                                                 61, 36, 59, 36, 52, 39, 35, 32, 40, 34,
+                                                 62, 39, 70, 40, 58, 41, 34, 42, 34, 41,
+                                                 38, 38, 12 }),
+                               ParamType_Infer("armnn", INFERENCE_TARGET_GPU, TEST_AIC_HAND_GESTURE_1, 100, INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
+                                               { "/opt/usr/images/hand.bin" }, 224, 224, 3, { "input" }, { "mobilenetv2/boundingbox", "mobilenetv2/heatmap" },
+                                               { "/usr/share/capi-media-vision/models/PE_1/tflite/posenet1_lite_224.tflite" }, { 0 }),
+                               ParamType_Infer("armnn", INFERENCE_TARGET_GPU, TEST_AIC_HAND_GESTURE_2, 100, INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
+                                               { "/opt/usr/images/hand.bin" }, 56, 56, 21, { "input" }, { "mobilenetv2/coord_refine", "mobilenetv2/gesture" },
+                                               { "/usr/share/capi-media-vision/models/PE_2/tflite/posenet2_lite_224.tflite" },
+                                               { 55, 39, 51, 40, 50, 42, 61, 43, 71, 39,
+                                                 78, 36, 82, 42, 82, 44, 83, 45, 35, 37,
+                                                 61, 36, 59, 36, 52, 39, 35, 32, 40, 34,
+                                                 62, 39, 70, 40, 58, 41, 34, 42, 34, 41,
+                                                 38, 38, 12 }),
+                               // ONE via MLAPI
+                               ParamType_Infer("one", INFERENCE_TARGET_GPU, TEST_AIC_HAND_GESTURE_1, 100, INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
+                                               { "/opt/usr/images/hand.bin" }, 224, 224, 3, { "input" }, { "mobilenetv2/boundingbox", "mobilenetv2/heatmap" },
+                                               { "/usr/share/capi-media-vision/models/PE_1/tflite/posenet1_lite_224.tflite" }, { 0 }),
+                               ParamType_Infer("one", INFERENCE_TARGET_GPU, TEST_AIC_HAND_GESTURE_2, 100, INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
+                                               { "/opt/usr/images/hand.bin" }, 56, 56, 21, { "input" }, { "mobilenetv2/coord_refine", "mobilenetv2/gesture" },
+                                               { "/usr/share/capi-media-vision/models/PE_2/tflite/posenet2_lite_224.tflite" },
+                                               { 55, 39, 51, 40, 50, 42, 61, 43, 71, 39,
+                                                 78, 36, 82, 42, 82, 44, 83, 45, 35, 37,
+                                                 61, 36, 59, 36, 52, 39, 35, 32, 40, 34,
+                                                 62, 39, 70, 40, 58, 41, 34, 42, 34, 41,
+                                                 38, 38, 12 })
+                               /* TODO */
+                               ));
index ad0a1e1..45770b9 100644 (file)
@@ -185,6 +185,12 @@ int PrepareTensorBuffers(InferenceEngineCommon *engine,
                                tensor_buffer.buffer =
                                                (void *) (new unsigned char[tensor_info.size]);
                                tensor_buffer.size = tensor_info.size;
+                       } else if (tensor_info.data_type == INFERENCE_TENSOR_DATA_TYPE_INT64) {
+                               tensor_buffer.buffer = (void *)(new long long[tensor_info.size]);
+                               tensor_buffer.size = tensor_info.size * 8;
+                       } else if (tensor_info.data_type == INFERENCE_TENSOR_DATA_TYPE_UINT16) {
+                               tensor_buffer.buffer = (void *)(new unsigned short[tensor_info.size]);
+                               tensor_buffer.size = tensor_info.size * 2;
                        }
 
                        EXPECT_TRUE(tensor_buffer.buffer);
@@ -468,3 +474,98 @@ int VerifyPoseEstimationResults(tensor_t &outputData, std::vector<int> &answers,
 
        return ret;
 }
+
+int VerifyAICHandGesture1Results(std::vector<inference_engine_tensor_buffer> &output)
+{
+       // ### output[0] ###
+       // output name : "mobilenetv2/boundingbox"
+       // data type   : int64
+       // tensor shape : 1 * 56 * 56
+       std::ifstream fin("/opt/usr/images/boundingbox.answer", std::ios_base::in | std::ios_base::binary);
+       char *o_buffer = new (std::nothrow) char[output[0].size];
+       if (!o_buffer) {
+               std::cout << "failed to alloc o_buffer." << std::endl;
+               return 0;
+       }
+
+       fin.read(o_buffer, output[0].size);
+       fin.close();
+
+       const int *f_answer = (const int *)o_buffer;
+       const unsigned int output_size = output[0].size / 8;
+       for (unsigned int i = 0; i < output_size; ++i) {
+               if (static_cast<int *>(output[0].buffer)[i] != f_answer[i]) {
+                       std::cout << "boundingbox wrong answer at index[" << i << "]" << std::endl;
+                       std::cout << static_cast<int *>(output[0].buffer)[i] << " vs " << f_answer[i] << std::endl;
+                       delete[] o_buffer;
+                       return 0;
+               }
+       }
+
+       delete[] o_buffer;
+
+       // ### output[1] ###
+       // output name : "mobilenetv2/heatmap"
+       // data type   : float
+       // tensor shape : 1 * 56 * 56 *21
+       std::ifstream fin_2("/opt/usr/images/heatmap.answer", std::ios_base::in | std::ios_base::binary);
+       char *o_buffer_2 = new (std::nothrow) char[output[1].size];
+       if (!o_buffer_2) {
+               std::cout << "failed to alloc o_buffer_2." << std::endl;
+               return 0;
+       }
+
+       fin_2.read(o_buffer_2, output[1].size);
+       fin_2.close();
+
+       const float *f_answer_2 = (const float *)o_buffer_2;
+       const unsigned int output_size_2 = output[1].size / 8;
+       const int margin = 2;
+       for (unsigned int i = 0; i < output_size_2; ++i) {
+               const int value_left = static_cast<int>((static_cast<float *>(output[1].buffer)[i]));
+               const int value_right = static_cast<int>(f_answer_2[i]);
+               int diff = value_left - value_right;
+               diff = diff < 0 ? diff * -1 : diff;
+               if (diff > margin) {
+                       std::cout << "heatmap wrong answer at index[" << i << "]" << std::endl;
+                       std::cout << value_left << " vs " << value_right << std::endl;
+                       delete[] o_buffer_2;
+                       return 0;
+               }
+       }
+
+       delete[] o_buffer_2;
+
+       return 1;
+}
+
+int VerifyAICHandGesture2Results(std::vector<inference_engine_tensor_buffer> &output,
+                                                                std::vector<int> &answers)
+{
+       // ### output[0] ###
+       // output name : "mobilenetv2/coord_refine"
+       // data type   : float
+       // tensor shape : 1 * 21 * 2
+       unsigned int size = output[0].size / 4;
+       for (unsigned int i = 0; i < size; ++i) {
+               unsigned int value = static_cast<unsigned int>(static_cast<float *>(output[0].buffer)[i] * 100.0f);
+               if (value != static_cast<unsigned int>(answers[i])) {
+                       std::cout << "coord_refine wrong answer at index[" << i << "]" << std::endl;
+                       std::cout << value << " vs " << answers[i] << std::endl;
+                       return 0;
+               }
+       }
+
+       // ### output[1] ###
+       // output name : "mobilenetv2/gesture"
+       // data type   : int64
+       // tensor shape : 1 * 1 * 1
+       unsigned int value = static_cast<unsigned int>(static_cast<long long *>(output[1].buffer)[0]);
+       if (value != static_cast<unsigned int>(answers[answers.size() - 1])) {
+                       std::cout << "gesture wrong answer at index[0]" << std::endl;
+                       std::cout << value << " vs " << answers[0] << std::endl;
+                       return 0;
+       }
+
+       return 1;
+}
index 17f4116..b95e164 100644 (file)
@@ -42,6 +42,8 @@ enum {
        TEST_IMAGE_CLASSIFICATION = 0,
        TEST_OBJECT_DETECTION,
        TEST_FACE_DETECTION,
+       TEST_AIC_HAND_GESTURE_1,
+       TEST_AIC_HAND_GESTURE_2,
        TEST_FACIAL_LANDMARK_DETECTION,
        TEST_POSE_ESTIMATION
 };
@@ -85,3 +87,8 @@ int VerifyFacialLandmarkDetectionResults(tensor_t &outputData,
 
 int VerifyPoseEstimationResults(tensor_t &outputData, std::vector<int> &answers,
                                                                int height, int width);
+
+int VerifyAICHandGesture1Results(std::vector<inference_engine_tensor_buffer> &output);
+
+int VerifyAICHandGesture2Results(std::vector<inference_engine_tensor_buffer> &output,
+                                                                std::vector<int> &answers);