From: Tae-Young Chung Date: Wed, 8 Apr 2020 04:09:26 +0000 (+0900) Subject: test: Add OPENCV backedn test cases X-Git-Tag: submit/tizen/20200423.063253~15 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d88a87d197b42429accd58d00613e0eba9a923d8;p=platform%2Fcore%2Fmultimedia%2Finference-engine-interface.git test: Add OPENCV backedn test cases Change-Id: Ie39cad370751d89adba608ca3c9e781369c52bba Signed-off-by: Tae-Young Chung --- diff --git a/test/res/face_detection_caffe.bin b/test/res/face_detection_caffe.bin new file mode 100644 index 0000000..cca305a Binary files /dev/null and b/test/res/face_detection_caffe.bin differ diff --git a/test/res/faciallandmark_detection_caffe.bin b/test/res/faciallandmark_detection_caffe.bin new file mode 100644 index 0000000..d777b57 Binary files /dev/null and b/test/res/faciallandmark_detection_caffe.bin differ diff --git a/test/res/image_classification_caffe.bin b/test/res/image_classification_caffe.bin new file mode 100644 index 0000000..3bab81b Binary files /dev/null and b/test/res/image_classification_caffe.bin differ diff --git a/test/res/object_detection_caffe.bin b/test/res/object_detection_caffe.bin new file mode 100644 index 0000000..7749b5a Binary files /dev/null and b/test/res/object_detection_caffe.bin differ diff --git a/test/src/inference_engine_test.cpp b/test/src/inference_engine_test.cpp index 48b194c..82c386f 100644 --- a/test/src/inference_engine_test.cpp +++ b/test/src/inference_engine_test.cpp @@ -36,6 +36,7 @@ typedef std::tuple, in class InferenceEngineCommonTest : public testing::TestWithParam { }; class InferenceEngineCommonTest_2 : public testing::TestWithParam { }; class InferenceEngineTfliteTest : public testing::TestWithParam { }; +class InferenceEngineCaffeTest : public testing::TestWithParam { }; std::map Model_Formats = { { "caffemodel", INFERENCE_MODEL_CAFFE }, @@ -51,7 +52,7 @@ enum { TEST_IMAGE_CLASSIFICATION = 0, TEST_OBJECT_DETECTION, TEST_FACE_DETECTION, - TEST_FACILA_LANDMARK_DETECTION, + TEST_FACIAL_LANDMARK_DETECTION, TEST_POSE_ESTIMATION }; @@ -356,10 +357,42 @@ int VerifyObjectDetectionResults(tensor_t &outputData, std::vector &answers { std::vector> inferDimInfo(outputData.dimInfo); std::vector inferResults(outputData.data.begin(), outputData.data.end()); - float *boxes = reinterpret_cast(inferResults[0]); - float *scores = reinterpret_cast(inferResults[2]); - int num_of_detections = (int)(*reinterpret_cast(inferResults[3])); + float* boxes = nullptr; + float* classes = nullptr; + float* scores = nullptr; + int num_of_detections = 0; + + if (outputData.dimInfo.size() == 1) { + // there is no way to know how many objects are detect unless the number of objects aren't + // provided. In the case, each backend should provide the number of results manually. + // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are + // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7 + // indicats the image id. But it is useless if a batch mode isn't supported. + // So, use the 1st of 7. + + num_of_detections = (int)(*reinterpret_cast(outputData.data[0])); + + boxes = new float[num_of_detections * 4]; + classes = new float[num_of_detections]; + scores = new float[num_of_detections]; + + for (int idx = 0; idx < num_of_detections; ++idx) { + classes[idx] = (reinterpret_cast(outputData.data[0]))[idx*inferDimInfo[0][3] + 1]; + scores[idx] = (reinterpret_cast(outputData.data[0]))[idx*inferDimInfo[0][3] + 2]; + + boxes[idx*4] = (reinterpret_cast(outputData.data[0]))[idx*inferDimInfo[0][3] + 4]; + boxes[idx*4 + 1] = (reinterpret_cast(outputData.data[0]))[idx*inferDimInfo[0][3] + 3]; + boxes[idx*4 + 2] = (reinterpret_cast(outputData.data[0]))[idx*inferDimInfo[0][3] + 6]; + boxes[idx*4 + 3] = (reinterpret_cast(outputData.data[0]))[idx*inferDimInfo[0][3] + 5]; + } + } else { + boxes = reinterpret_cast(inferResults[0]); + classes = reinterpret_cast(inferResults[1]); + scores = reinterpret_cast(inferResults[2]); + num_of_detections = (int)(*reinterpret_cast(inferResults[3])); + } + int left = 0, top = 0, right = 0, bottom = 0; float max_score = 0.0f; @@ -374,9 +407,40 @@ int VerifyObjectDetectionResults(tensor_t &outputData, std::vector &answers } } + if (outputData.dimInfo.size() == 1) { + delete [] boxes; + delete [] classes; + delete [] scores; + } + return (answers[0] == left && answers[1] == top && answers[2] == right && answers[3] == bottom); } +int VerifyFacialLandmarkDetectionResults(tensor_t &outputData, std::vector &answers, int height, int width) +{ + std::vector> inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), outputData.data.end()); + std::vector result_x, result_y; + + long number_of_detections = inferDimInfo[0][1]; + float* loc = reinterpret_cast(inferResults[0]); + + for (int idx = 0; idx < number_of_detections; idx+=2) { + result_x.push_back((int)(loc[idx] * width)); + result_y.push_back((int)(loc[idx+1] * height)); + } + + int ret = 1; + for (int i = 0; i < (number_of_detections>>1); i++) { + if (result_x[i] != answers[i*2] || result_y[i] != answers[i*2 + 1]) { + ret = 0; + break; + } + } + + return ret; +} + int VerifyPoseEstimationResults(tensor_t &outputData, std::vector &answers, int height, int width) { std::vector> inferDimInfo(outputData.dimInfo); @@ -454,8 +518,8 @@ TEST_P(InferenceEngineTfliteTest, Inference) case TEST_FACE_DETECTION: test_name.append("Face detection"); break; - case TEST_FACILA_LANDMARK_DETECTION: - test_name.append("Facila landmark detection"); + case TEST_FACIAL_LANDMARK_DETECTION: + test_name.append("Facial landmark detection"); break; case TEST_POSE_ESTIMATION: test_name.append("Pose estimation"); @@ -589,7 +653,7 @@ TEST_P(InferenceEngineTfliteTest, Inference) ret = VerifyObjectDetectionResults(result, answers, 1152, 1536); EXPECT_EQ(ret, 1); break; - case TEST_FACILA_LANDMARK_DETECTION: + case TEST_FACIAL_LANDMARK_DETECTION: // TODO. break; case TEST_POSE_ESTIMATION: @@ -607,6 +671,194 @@ TEST_P(InferenceEngineTfliteTest, Inference) delete engine; } +TEST_P(InferenceEngineCaffeTest, Inference) +{ + std::string backend_name; + int target_devices; + int test_type; + int iteration; + int tensor_type; + std::vector image_paths; + int height; + int width; + int ch; + std::vector input_layers; + std::vector output_layers; + std::vector model_paths; + std::vector answers; + + std::tie(backend_name, target_devices, test_type, iteration, tensor_type, image_paths, height, width, ch, input_layers, output_layers, model_paths, answers) = GetParam(); + + if (iteration < 1) { + iteration = 1; + } + + std::string test_name; + switch (test_type) { + case TEST_IMAGE_CLASSIFICATION: + test_name.append("Image classification"); + break; + case TEST_OBJECT_DETECTION: + test_name.append("Object detection"); + break; + case TEST_FACE_DETECTION: + test_name.append("Face detection"); + break; + case TEST_FACIAL_LANDMARK_DETECTION: + test_name.append("Facial landmark detection"); + break; + case TEST_POSE_ESTIMATION: + test_name.append("Pose estimation"); + break; + } + + std::cout << test_name << " inference test : backend = " << backend_name << ", target device = " << (target_devices == INFERENCE_TARGET_CPU ? "CPU" : "GPU") << "\n"; + + inference_engine_config config = { + .backend_name = backend_name, + .target_devices = target_devices + }; + + InferenceEngineCommon *engine = new InferenceEngineCommon(&config); + if (engine == nullptr) { + ASSERT_TRUE(engine); + return; + } + + int ret = engine->EnableProfiler(true); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + delete engine; + ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + return; + } + + ret = engine->DumpProfileToFile("dump.txt"); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + delete engine; + ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + return; + } + + ret = engine->BindBackend(&config); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + delete engine; + ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + return; + } + + inference_engine_capacity capacity; + ret = engine->GetBackendCapacity(&capacity); + EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + + ret = engine->SetTargetDevices(target_devices); + EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + + std::vector models; + int model_type = GetModelInfo(model_paths, models); + if (model_type == -1) { + delete engine; + ASSERT_NE(model_type, -1); + return; + } + + inference_engine_layer_property input_property; + std::vector::iterator iter; + + for (iter = input_layers.begin(); iter != input_layers.end(); iter++) { + inference_engine_tensor_info tensor_info = { + { 1, ch, height, width }, + (inference_tensor_shape_type_e)TENSOR_SHAPE_NCHW, + (inference_tensor_data_type_e)tensor_type, + (size_t)(1 * ch * height * width) + }; + + input_property.layer_names.push_back(*iter); + input_property.tensor_infos.push_back(tensor_info); + } + + ret = engine->SetInputLayerProperty(input_property); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + delete engine; + ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + return; + } + + inference_engine_layer_property output_property; + + for (iter = output_layers.begin(); iter != output_layers.end(); iter++) { + output_property.layer_names.push_back(*iter); + } + + ret = engine->SetOutputLayerProperty(output_property); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + delete engine; + ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + return; + } + + ret = engine->Load(models, (inference_model_format_e)model_type); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + delete engine; + ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + return; + } + + std::vector inputs, outputs; + ret = PrepareTensorBuffers(engine, inputs, outputs); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + delete engine; + ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + return; + } + + // Copy input image tensor data from a given file to input tensor buffer. + for (int i = 0; i < (int)image_paths.size(); ++i) { + CopyFileToMemory(image_paths[i].c_str(), inputs[i], inputs[i].size); + } + + for (int repeat = 0; repeat < iteration; ++repeat) { + ret = engine->Run(inputs, outputs); + EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + } + + tensor_t result; + FillOutputResult(engine, outputs, result); + + switch (test_type) { + case TEST_IMAGE_CLASSIFICATION: + ret = VerifyImageClassificationResults(result, answers[0]); + EXPECT_EQ(ret, 1); + break; + case TEST_OBJECT_DETECTION: + // 1024 : fixed height size of dumped image, 636 : fixed width size of dumped image. + ret = VerifyObjectDetectionResults(result, answers, 636, 1024); + EXPECT_EQ(ret, 1); + break; + case TEST_FACE_DETECTION: + // 1152 : fixed height size of dumped image, 1536 : fixed width size of dumped image. + ret = VerifyObjectDetectionResults(result, answers, 1152, 1536); + EXPECT_EQ(ret, 1); + break; + case TEST_FACIAL_LANDMARK_DETECTION: + // 128 : fixed height size of dumped image, 128 : fixed width size of dumped image. + ret = VerifyFacialLandmarkDetectionResults(result, answers, 128, 128); + EXPECT_EQ(ret, 1); + break; + case TEST_POSE_ESTIMATION: + // 563 : fixed height size of dumped image, 750 : fixed width size of dumped image. + ret = VerifyPoseEstimationResults(result, answers, 563, 750); + EXPECT_EQ(ret, 1); + break; + } + + CleanupTensorBuffers(inputs, outputs); + + engine->UnbindBackend(); + models.clear(); + + delete engine; +} + INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCommonTest, testing::Values( // parameter order : backend name, target device @@ -618,6 +870,11 @@ INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCommonTest, ParamType("tflite", INFERENCE_TARGET_GPU), // DLDT. ParamType("dldt", INFERENCE_TARGET_CUSTOM) + // OPENCV. + ParamType("opencv", INFERENCE_TARGET_CPU), + ParamType("opencv", INFERENCE_TARGET_GPU) + /* TODO */ + ) ); @@ -630,7 +887,10 @@ INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCommonTest_2, ParamType_Load("armnn", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }), // TFLITE. ParamType_Load("tflite", INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }), - ParamType_Load("tflite", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }) + ParamType_Load("tflite", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }), + // OPENCV. + ParamType_Load("opencv", INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" }), + ParamType_Load("opencv", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" }) /* TODO */ ) ); @@ -682,3 +942,28 @@ INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineTfliteTest, /* TODO */ ) ); + +INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCaffeTest, + testing::Values( + // parameter order : backend_name, target_devices, test_type, iteration, tensor_type, image_paths, height, width, ch, input_layers, output_layers, model_paths, answers + // OPENCV + // squeezenet based image classification test + ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_IMAGE_CLASSIFICATION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/image_classification_caffe.bin" }, 227, 227, 3, { "data" }, { "prob" }, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" }, { 281 }), + ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_IMAGE_CLASSIFICATION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/image_classification_caffe.bin" }, 227, 227, 3, { "data" }, { "prob" }, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" }, { 281 }), + + // mobilenet-ssd based object detection test + ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_OBJECT_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/object_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.caffemodel", "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.prototxt" }, { 15, 19, 335, 557 }), + ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_OBJECT_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/object_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.caffemodel", "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.prototxt" }, { 15, 19, 335, 557 }), + + // mobilenet-ssd based object detection test + ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACE_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/face_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.caffemodel", "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt" }, { 733, 233, 965, 539 }), + ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACE_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/face_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.caffemodel", "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt" }, { 733, 233, 965, 539 }), + + // tweakcnn based facial landmark detection test + ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACIAL_LANDMARK_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/faciallandmark_detection_caffe.bin" }, 128, 128, 3, { "data" }, { "Sigmoid_fc2" }, { "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.caffemodel", "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt" }, + { 53, 45, 85, 46, 66, 64, 54, 78, 82, 79}), + ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACIAL_LANDMARK_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/faciallandmark_detection_caffe.bin" }, 128, 128, 3, { "data" }, { "Sigmoid_fc2" }, { "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.caffemodel", "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt" }, + { 53, 45, 85, 46, 66, 64, 54, 78, 82, 79}) + /* TODO */ + ) +); \ No newline at end of file