test: Add OPENCV backedn test cases
authorTae-Young Chung <ty83.chung@samsung.com>
Wed, 8 Apr 2020 04:09:26 +0000 (13:09 +0900)
committerInki Dae <inki.dae@samsung.com>
Tue, 14 Apr 2020 00:42:53 +0000 (09:42 +0900)
Change-Id: Ie39cad370751d89adba608ca3c9e781369c52bba
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
test/res/face_detection_caffe.bin [new file with mode: 0644]
test/res/faciallandmark_detection_caffe.bin [new file with mode: 0644]
test/res/image_classification_caffe.bin [new file with mode: 0644]
test/res/object_detection_caffe.bin [new file with mode: 0644]
test/src/inference_engine_test.cpp

diff --git a/test/res/face_detection_caffe.bin b/test/res/face_detection_caffe.bin
new file mode 100644 (file)
index 0000000..cca305a
Binary files /dev/null and b/test/res/face_detection_caffe.bin differ
diff --git a/test/res/faciallandmark_detection_caffe.bin b/test/res/faciallandmark_detection_caffe.bin
new file mode 100644 (file)
index 0000000..d777b57
Binary files /dev/null and b/test/res/faciallandmark_detection_caffe.bin differ
diff --git a/test/res/image_classification_caffe.bin b/test/res/image_classification_caffe.bin
new file mode 100644 (file)
index 0000000..3bab81b
Binary files /dev/null and b/test/res/image_classification_caffe.bin differ
diff --git a/test/res/object_detection_caffe.bin b/test/res/object_detection_caffe.bin
new file mode 100644 (file)
index 0000000..7749b5a
Binary files /dev/null and b/test/res/object_detection_caffe.bin differ
index 48b194cdd1cc27b41068f7a2502e996ca1f48e81..82c386f08d95344fd141c4b29c0485952cb2b675 100644 (file)
@@ -36,6 +36,7 @@ typedef std::tuple<std::string, int, int, int, int, std::vector<std::string>, in
 class InferenceEngineCommonTest : public testing::TestWithParam<ParamType> { };
 class InferenceEngineCommonTest_2 : public testing::TestWithParam<ParamType_Load> { };
 class InferenceEngineTfliteTest : public testing::TestWithParam<ParamType_Infer> { };
+class InferenceEngineCaffeTest : public testing::TestWithParam<ParamType_Infer> { };
 
 std::map<std::string, int> Model_Formats = {
        { "caffemodel", INFERENCE_MODEL_CAFFE },
@@ -51,7 +52,7 @@ enum {
        TEST_IMAGE_CLASSIFICATION = 0,
        TEST_OBJECT_DETECTION,
        TEST_FACE_DETECTION,
-       TEST_FACILA_LANDMARK_DETECTION,
+       TEST_FACIAL_LANDMARK_DETECTION,
        TEST_POSE_ESTIMATION
 };
 
@@ -356,10 +357,42 @@ int VerifyObjectDetectionResults(tensor_t &outputData, std::vector<int> &answers
 {
        std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
        std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
-       float *boxes = reinterpret_cast<float *>(inferResults[0]);
-       float *scores = reinterpret_cast<float *>(inferResults[2]);
 
-       int num_of_detections = (int)(*reinterpret_cast<float *>(inferResults[3]));
+       float* boxes = nullptr;
+       float* classes = nullptr;
+       float* scores = nullptr;
+       int num_of_detections = 0;
+
+       if (outputData.dimInfo.size() == 1) {
+               // there is no way to know how many objects are detect unless the number of objects aren't
+               // provided. In the case, each backend should provide the number of results manually.
+               // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are
+               // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7
+               // indicats the image id. But it is useless if a batch mode isn't supported.
+               // So, use the 1st of 7.
+
+               num_of_detections = (int)(*reinterpret_cast<float*>(outputData.data[0]));
+
+               boxes = new float[num_of_detections * 4];
+               classes = new float[num_of_detections];
+               scores = new float[num_of_detections];
+
+               for (int idx = 0; idx < num_of_detections; ++idx) {
+                       classes[idx] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 1];
+                       scores[idx] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 2];
+
+                       boxes[idx*4] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 4];
+                       boxes[idx*4  + 1] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 3];
+                       boxes[idx*4  + 2] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 6];
+                       boxes[idx*4  + 3] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 5];
+               }
+       } else {
+               boxes = reinterpret_cast<float*>(inferResults[0]);
+               classes = reinterpret_cast<float*>(inferResults[1]);
+               scores = reinterpret_cast<float*>(inferResults[2]);
+               num_of_detections = (int)(*reinterpret_cast<float*>(inferResults[3]));
+       }
+
        int left = 0, top = 0, right = 0, bottom = 0;
        float max_score = 0.0f;
 
@@ -374,9 +407,40 @@ int VerifyObjectDetectionResults(tensor_t &outputData, std::vector<int> &answers
                }
        }
 
+       if (outputData.dimInfo.size() == 1) {
+               delete [] boxes;
+               delete [] classes;
+               delete [] scores;
+       }
+
        return (answers[0] == left && answers[1] == top && answers[2] == right && answers[3] == bottom);
 }
 
+int VerifyFacialLandmarkDetectionResults(tensor_t &outputData, std::vector<int> &answers, int height, int width)
+{
+       std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
+       std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
+       std::vector<int> result_x, result_y;
+
+       long number_of_detections = inferDimInfo[0][1];
+       float* loc = reinterpret_cast<float*>(inferResults[0]);
+
+       for (int idx = 0; idx < number_of_detections; idx+=2) {
+               result_x.push_back((int)(loc[idx] * width));
+               result_y.push_back((int)(loc[idx+1] * height));
+       }
+
+       int ret = 1;
+       for (int i = 0; i < (number_of_detections>>1); i++) {
+               if (result_x[i] != answers[i*2] || result_y[i] != answers[i*2 + 1]) {
+                       ret = 0;
+                       break;
+               }
+       }
+
+       return ret;
+}
+
 int VerifyPoseEstimationResults(tensor_t &outputData, std::vector<int> &answers, int height, int width)
 {
        std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
@@ -454,8 +518,8 @@ TEST_P(InferenceEngineTfliteTest, Inference)
        case TEST_FACE_DETECTION:
                test_name.append("Face detection");
                break;
-       case TEST_FACILA_LANDMARK_DETECTION:
-               test_name.append("Facila landmark detection");
+       case TEST_FACIAL_LANDMARK_DETECTION:
+               test_name.append("Facial landmark detection");
                break;
        case TEST_POSE_ESTIMATION:
                test_name.append("Pose estimation");
@@ -589,7 +653,7 @@ TEST_P(InferenceEngineTfliteTest, Inference)
                ret = VerifyObjectDetectionResults(result, answers, 1152, 1536);
                EXPECT_EQ(ret, 1);
                break;
-       case TEST_FACILA_LANDMARK_DETECTION:
+       case TEST_FACIAL_LANDMARK_DETECTION:
                // TODO.
                break;
        case TEST_POSE_ESTIMATION:
@@ -607,6 +671,194 @@ TEST_P(InferenceEngineTfliteTest, Inference)
        delete engine;
 }
 
+TEST_P(InferenceEngineCaffeTest, Inference)
+{
+       std::string backend_name;
+       int target_devices;
+       int test_type;
+       int iteration;
+       int tensor_type;
+       std::vector<std::string> image_paths;
+       int height;
+       int width;
+       int ch;
+       std::vector<std::string> input_layers;
+       std::vector<std::string> output_layers;
+       std::vector<std::string> model_paths;
+       std::vector<int> answers;
+
+       std::tie(backend_name, target_devices, test_type, iteration, tensor_type, image_paths, height, width, ch, input_layers, output_layers, model_paths, answers) = GetParam();
+
+       if (iteration < 1) {
+               iteration = 1;
+       }
+
+       std::string test_name;
+       switch (test_type) {
+       case TEST_IMAGE_CLASSIFICATION:
+               test_name.append("Image classification");
+               break;
+       case TEST_OBJECT_DETECTION:
+               test_name.append("Object detection");
+               break;
+       case TEST_FACE_DETECTION:
+               test_name.append("Face detection");
+               break;
+       case TEST_FACIAL_LANDMARK_DETECTION:
+               test_name.append("Facial landmark detection");
+               break;
+       case TEST_POSE_ESTIMATION:
+               test_name.append("Pose estimation");
+               break;
+       }
+
+       std::cout << test_name << " inference test : backend = " << backend_name << ", target device = " << (target_devices == INFERENCE_TARGET_CPU ? "CPU" : "GPU")  << "\n";
+
+       inference_engine_config config = {
+               .backend_name = backend_name,
+               .target_devices = target_devices
+       };
+
+       InferenceEngineCommon *engine = new InferenceEngineCommon(&config);
+       if (engine == nullptr) {
+               ASSERT_TRUE(engine);
+               return;
+       }
+
+       int ret = engine->EnableProfiler(true);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               delete engine;
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       ret = engine->DumpProfileToFile("dump.txt");
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               delete engine;
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       ret = engine->BindBackend(&config);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               delete engine;
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       inference_engine_capacity capacity;
+       ret = engine->GetBackendCapacity(&capacity);
+       EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+
+       ret = engine->SetTargetDevices(target_devices);
+       EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+
+       std::vector <std::string> models;
+       int model_type = GetModelInfo(model_paths, models);
+       if (model_type == -1) {
+               delete engine;
+               ASSERT_NE(model_type, -1);
+               return;
+       }
+
+       inference_engine_layer_property input_property;
+       std::vector<std::string>::iterator iter;
+
+       for (iter = input_layers.begin(); iter != input_layers.end(); iter++) {
+               inference_engine_tensor_info tensor_info = {
+                       { 1, ch, height, width },
+                       (inference_tensor_shape_type_e)TENSOR_SHAPE_NCHW,
+                       (inference_tensor_data_type_e)tensor_type,
+                       (size_t)(1 * ch * height * width)
+               };
+
+               input_property.layer_names.push_back(*iter);
+               input_property.tensor_infos.push_back(tensor_info);
+    }
+
+       ret = engine->SetInputLayerProperty(input_property);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               delete engine;
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       inference_engine_layer_property output_property;
+
+       for (iter = output_layers.begin(); iter != output_layers.end(); iter++) {
+               output_property.layer_names.push_back(*iter);
+       }
+
+       ret = engine->SetOutputLayerProperty(output_property);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               delete engine;
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       ret = engine->Load(models, (inference_model_format_e)model_type);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               delete engine;
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       std::vector<inference_engine_tensor_buffer> inputs, outputs;
+       ret = PrepareTensorBuffers(engine, inputs, outputs);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               delete engine;
+               ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+               return;
+       }
+
+       // Copy input image tensor data from a given file to input tensor buffer.
+       for (int i = 0; i < (int)image_paths.size(); ++i) {
+               CopyFileToMemory(image_paths[i].c_str(), inputs[i], inputs[i].size);
+       }
+
+       for (int repeat = 0; repeat < iteration; ++repeat) {
+               ret = engine->Run(inputs, outputs);
+               EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+       }
+
+       tensor_t result;
+       FillOutputResult(engine, outputs, result);
+
+       switch (test_type) {
+       case TEST_IMAGE_CLASSIFICATION:
+               ret = VerifyImageClassificationResults(result, answers[0]);
+               EXPECT_EQ(ret, 1);
+               break;
+       case TEST_OBJECT_DETECTION:
+               // 1024 : fixed height size of dumped image, 636 : fixed width size of dumped image.
+               ret = VerifyObjectDetectionResults(result, answers, 636, 1024);
+               EXPECT_EQ(ret, 1);
+               break;
+       case TEST_FACE_DETECTION:
+               // 1152 : fixed height size of dumped image, 1536 : fixed width size of dumped image.
+               ret = VerifyObjectDetectionResults(result, answers, 1152, 1536);
+               EXPECT_EQ(ret, 1);
+               break;
+       case TEST_FACIAL_LANDMARK_DETECTION:
+               // 128 : fixed height size of dumped image, 128 : fixed width size of dumped image.
+               ret = VerifyFacialLandmarkDetectionResults(result, answers, 128, 128);
+               EXPECT_EQ(ret, 1);
+               break;
+       case TEST_POSE_ESTIMATION:
+               // 563 : fixed height size of dumped image, 750 : fixed width size of dumped image.
+               ret = VerifyPoseEstimationResults(result, answers, 563, 750);
+               EXPECT_EQ(ret, 1);
+               break;
+       }
+
+       CleanupTensorBuffers(inputs, outputs);
+
+       engine->UnbindBackend();
+       models.clear();
+
+       delete engine;
+}
+
 INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCommonTest,
                testing::Values(
                        // parameter order : backend name, target device
@@ -618,6 +870,11 @@ INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCommonTest,
                        ParamType("tflite", INFERENCE_TARGET_GPU),
                        // DLDT.
                        ParamType("dldt", INFERENCE_TARGET_CUSTOM)
+                       // OPENCV.
+                       ParamType("opencv", INFERENCE_TARGET_CPU),
+                       ParamType("opencv", INFERENCE_TARGET_GPU)
+                       /* TODO */
+
                )
 );
 
@@ -630,7 +887,10 @@ INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCommonTest_2,
                        ParamType_Load("armnn", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }),
                        // TFLITE.
                        ParamType_Load("tflite", INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }),
-                       ParamType_Load("tflite", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" })
+                       ParamType_Load("tflite", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }),
+                       // OPENCV.
+                       ParamType_Load("opencv", INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" }),
+                       ParamType_Load("opencv", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" })
                        /* TODO */
                )
 );
@@ -682,3 +942,28 @@ INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineTfliteTest,
                        /* TODO */
                )
 );
+
+INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCaffeTest,
+               testing::Values(
+                       // parameter order : backend_name, target_devices, test_type, iteration, tensor_type, image_paths, height, width, ch, input_layers, output_layers, model_paths, answers
+                       // OPENCV
+                       // squeezenet based image classification test
+                       ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_IMAGE_CLASSIFICATION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/image_classification_caffe.bin" }, 227, 227, 3, { "data" }, { "prob" }, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" }, { 281 }),
+                       ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_IMAGE_CLASSIFICATION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/image_classification_caffe.bin" }, 227, 227, 3, { "data" }, { "prob" }, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" }, { 281 }),
+
+                       // mobilenet-ssd based object detection test
+                       ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_OBJECT_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/object_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.caffemodel", "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.prototxt" }, { 15, 19, 335, 557 }),
+                       ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_OBJECT_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/object_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.caffemodel", "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.prototxt" }, { 15, 19, 335, 557 }),
+
+                       // mobilenet-ssd based object detection test
+                       ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACE_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/face_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.caffemodel", "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt" }, { 733, 233, 965, 539 }),
+                       ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACE_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/face_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.caffemodel", "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt" }, { 733, 233, 965, 539 }),
+
+                       // tweakcnn based facial landmark detection test
+                       ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACIAL_LANDMARK_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/faciallandmark_detection_caffe.bin" }, 128, 128, 3, { "data" }, { "Sigmoid_fc2" }, { "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.caffemodel", "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt" },
+                                                       { 53, 45, 85, 46, 66, 64, 54, 78, 82, 79}),
+                       ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACIAL_LANDMARK_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/faciallandmark_detection_caffe.bin" }, 128, 128, 3, { "data" }, { "Sigmoid_fc2" }, { "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.caffemodel", "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt" },
+                                                       { 53, 45, 85, 46, 66, 64, 54, 78, 82, 79})
+                       /* TODO */
+               )
+);
\ No newline at end of file