From: Inki Dae Date: Wed, 4 Mar 2020 10:00:42 +0000 (+0900) Subject: test: Add inference result verification support X-Git-Tag: submit/tizen/20200423.063253~42 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c4c528005e24c8f18dcffd9fdaf4016950495f57;p=platform%2Fcore%2Fmultimedia%2Finference-engine-interface.git test: Add inference result verification support This patch implements inference verification to each inference test - image classification, object/face detection and pose estimation. As for the verification, it uses pre-dumped file which contains in-memory tensor buffer in bytes to a given sample image file, and uses pre-calculated inference result for the verification. Change-Id: I46342f832a59361165b54d5357c4653c5acad566 Signed-off-by: Inki Dae --- diff --git a/test/src/inference_engine_test.cpp b/test/src/inference_engine_test.cpp index df8d958..27125d1 100644 --- a/test/src/inference_engine_test.cpp +++ b/test/src/inference_engine_test.cpp @@ -17,6 +17,10 @@ #include #include #include +#include +#include +#include +#include #include "gtest/gtest.h" @@ -27,7 +31,7 @@ using namespace InferenceEngineInterface::Common; typedef std::tuple ParamType; typedef std::tuple> ParamType_Load; -typedef std::tuple, std::vector, std::vector> ParamType_Infer; +typedef std::tuple, int, int, int, std::vector, std::vector, std::vector, std::vector> ParamType_Infer; class InferenceEngineCommonTest : public testing::TestWithParam { }; class InferenceEngineCommonTest_2 : public testing::TestWithParam { }; @@ -43,6 +47,14 @@ std::map Model_Formats = { { "onnx", INFERENCE_MODEL_ONNX } }; +enum { + TEST_IMAGE_CLASSIFICATION = 0, + TEST_OBJECT_DETECTION, + TEST_FACE_DETECTION, + TEST_FACILA_LANDMARK_DETECTION, + TEST_POSE_ESTIMATION +}; + TEST_P(InferenceEngineCommonTest, Bind) { std::string backend_name; @@ -123,11 +135,12 @@ int PrepareTensorBuffers(InferenceEngineCommon *engine, std::vector &outputs, tensor_t &outputData) +{ + inference_engine_layer_property property; + int ret = engine->GetOutputLayerProperty(property); + EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + + for (int i = 0; i < (int)property.tensor_infos.size(); ++i) { + inference_engine_tensor_info tensor_info = property.tensor_infos[i]; + + std::vector tmpDimInfo; + for (int i = 0; i < (int)tensor_info.shape.size(); i++) { + tmpDimInfo.push_back(tensor_info.shape[i]); + } + + outputData.dimInfo.push_back(tmpDimInfo); + + // Normalize output tensor data converting it to float type in case of quantized model. + if (tensor_info.data_type == TENSOR_DATA_TYPE_UINT8) { + unsigned char *ori_buf = (unsigned char *)outputs[i].buffer; + float *new_buf = new float[tensor_info.size]; + ASSERT_TRUE(new_buf); + + for (int j = 0; j < (int)tensor_info.size; j++) { + new_buf[j] = (float)ori_buf[j] / 255.0f; + } + + // replace original buffer with new one, and release origin one. + outputs[i].buffer = new_buf; + delete[] ori_buf; + } + + outputData.data.push_back((void *)outputs[i].buffer); + } +} + +int VerifyImageClassificationResults(tensor_t &outputData, int answer) +{ + std::vector> inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), outputData.data.end()); + + int idx = -1; + int count = inferDimInfo[0][1]; + float value = 0.0f; + + float *prediction = reinterpret_cast(inferResults[0]); + for (int i = 0; i < count; ++i) { + if (value < prediction[i]) { + value = prediction[i]; + idx = i; + } + } + + return idx == answer; +} + +int VerifyObjectDetectionResults(tensor_t &outputData, std::vector &answers, int height, int width) +{ + std::vector> inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), outputData.data.end()); + float *boxes = reinterpret_cast(inferResults[0]); + float *scores = reinterpret_cast(inferResults[2]); + + int num_of_detections = (int)(*reinterpret_cast(inferResults[3])); + int left = 0, top = 0, right = 0, bottom = 0; + float max_score = 0.0f; + + for (int i = 0; i < num_of_detections; ++i) { + if (max_score < scores[i]) { + max_score = scores[i]; + + left = (int)(boxes[i * 4 + 1] * width); + top = (int)(boxes[i * 4 + 0] * height); + right = (int)(boxes[i * 4 + 3] * width); + bottom = (int)(boxes[i * 4 + 2] * height); + } + } + + return (answers[0] == left && answers[1] == top && answers[2] == right && answers[3] == bottom); +} + +int VerifyPoseEstimationResults(tensor_t &outputData, std::vector &answers, int height, int width) +{ + std::vector> inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), outputData.data.end()); + std::vector result_x, result_y; + + const int heat_map_width = 96, heat_map_height = 96; + int num_of_pose = inferDimInfo[0][3]; + float *data = static_cast(inferResults[0]); + + float ratio_x = (float)width / (float)inferDimInfo[0][2]; + float ratio_y = (float)height / (float)inferDimInfo[0][1]; + + for (int idx = 0; idx < num_of_pose; ++idx) { + float max_score = 0.0f; + int max_x = 0, max_y = 0; + + for (int y = 0; y < heat_map_height; ++y) { + for (int x = 0; x < heat_map_width; ++x) { + // head_map[Yn][Xn][Kn] = (Yn * heat_map_height * num_of_pose) + (Xn * num_of_pose) + Kn + float score = data[(y * heat_map_width * num_of_pose) + (x * num_of_pose) + idx]; + if (score > max_score) { + max_score = score; + max_x = x; + max_y = y; + } + } + } + + result_x.push_back((int)((float)(max_x + 1) * ratio_x)); + result_y.push_back((int)((float)(max_y + 1) * ratio_y)); + } + + int ret = 1; + for (int i = 0; i < num_of_pose; ++i) { + if (result_x[i] != answers[i] || result_y[i] != answers[num_of_pose + i]) { + ret = 0; + break; + } + } + + return ret; +} + TEST_P(InferenceEngineCommonTest_3, Inference) { std::string backend_name; int target_devices; + int test_type; + std::vector image_paths; int height; int width; int ch; std::vector input_layers; std::vector output_layers; std::vector model_paths; + std::vector answers; + + std::tie(backend_name, target_devices, test_type, image_paths, height, width, ch, input_layers, output_layers, model_paths, answers) = GetParam(); + + std::string test_name; + switch (test_type) { + case TEST_IMAGE_CLASSIFICATION: + test_name.append("Image classification"); + break; + case TEST_OBJECT_DETECTION: + test_name.append("Object detection"); + break; + case TEST_FACE_DETECTION: + test_name.append("Face detection"); + break; + case TEST_FACILA_LANDMARK_DETECTION: + test_name.append("Facila landmark detection"); + break; + case TEST_POSE_ESTIMATION: + test_name.append("Pose estimation"); + break; + } - std::tie(backend_name, target_devices, height, width, ch, input_layers, output_layers, model_paths) = GetParam(); - - std::cout <<"Inference test : backend = " << backend_name << ", target device = " << target_devices << "\n"; + std::cout << test_name << " inference test : backend = " << backend_name << ", target device = " << (target_devices == INFERENCE_TARGET_CPU ? "CPU" : "GPU") << "\n"; inference_engine_config config = { .backend_name = backend_name, @@ -250,6 +421,7 @@ TEST_P(InferenceEngineCommonTest_3, Inference) for (iter = input_layers.begin(); iter != input_layers.end(); iter++) { input_property.layer_names.push_back(*iter); + // TODO. Update tensor_info too at here. } ret = engine->SetInputLayerProperty(input_property); @@ -267,28 +439,64 @@ TEST_P(InferenceEngineCommonTest_3, Inference) ret = engine->Load(models, (inference_model_format_e)model_type); ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); - std::vector inputs, outputs; + std::vector inputs, outputs; ret = PrepareTensorBuffers(engine, inputs, outputs); ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); // If backend is OpenCV then allocate input tensor buffer at here. if (inputs.empty()) { - inference_engine_tensor_buffer tensor_buffer; - unsigned int tensor_size; - if (ch == 3) { - tensor_size = height * width * 4; - tensor_buffer.buffer = (void *)(new float[tensor_size]); - } else { - tensor_size = height * width; - tensor_buffer.buffer = (void *)(new unsigned char[tensor_size]); - } + for (int i = 0; i < (int)input_property.tensor_infos.size(); ++i) { + inference_engine_tensor_info tensor_info = input_property.tensor_infos[i]; + inference_engine_tensor_buffer tensor_buffer; + unsigned int tensor_size = height * width * ch; + if (tensor_info.data_type == TENSOR_DATA_TYPE_FLOAT32) { + tensor_buffer.buffer = (void *)(new float[tensor_size]); + tensor_buffer.size = tensor_size * 4; + } else { + tensor_buffer.buffer = (void *)(new unsigned char[tensor_size]); + tensor_buffer.size = tensor_size; + } + + inputs.push_back(tensor_buffer); + } + } - inputs.push_back(tensor_buffer); + // Copy input image tensor data from a given file to input tensor buffer. + for (int i = 0; i < (int)image_paths.size(); ++i) { + CopyFileToMemory(image_paths[i].c_str(), inputs[i], inputs[i].size); } ret = engine->Run(inputs, outputs); EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE); + tensor_t result; + FillOutputResult(engine, outputs, result); + + switch (test_type) { + case TEST_IMAGE_CLASSIFICATION: + ret = VerifyImageClassificationResults(result, answers[0]); + EXPECT_EQ(ret, 1); + break; + case TEST_OBJECT_DETECTION: + // 1072 : fixed height size of dumped image, 1608 : fixed width size of dumped image. + ret = VerifyObjectDetectionResults(result, answers, 1072, 1608); + EXPECT_EQ(ret, 1); + break; + case TEST_FACE_DETECTION: + // 1152 : fixed height size of dumped image, 1536 : fixed width size of dumped image. + ret = VerifyObjectDetectionResults(result, answers, 1152, 1536); + EXPECT_EQ(ret, 1); + break; + case TEST_FACILA_LANDMARK_DETECTION: + // TODO. + break; + case TEST_POSE_ESTIMATION: + // 563 : fixed height size of dumped image, 750 : fixed width size of dumped image. + ret = VerifyPoseEstimationResults(result, answers, 563, 750); + EXPECT_EQ(ret, 1); + break; + } + engine->UnbindBackend(); delete engine; @@ -296,52 +504,51 @@ TEST_P(InferenceEngineCommonTest_3, Inference) INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCommonTest, testing::Values( - // backend name, target device + // parameter order : backend name, target device ParamType("armnn", INFERENCE_TARGET_CPU), - ParamType("armnn", INFERENCE_TARGET_GPU), - ParamType("armnn", INFERENCE_TARGET_GPU | INFERENCE_TARGET_CPU) + ParamType("armnn", INFERENCE_TARGET_GPU) /* TODO */ ) ); INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCommonTest_2, testing::Values( - // backend name, target device, model path/s + // parameter order : backend name, target device, model path/s // mobilenet based image classification model loading test ParamType_Load("armnn", INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }), ParamType_Load("armnn", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }), - ParamType_Load("armnn", INFERENCE_TARGET_GPU | INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }), // object detection model loading test ParamType_Load("armnn", INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/OD/tflite/od_tflite_model.tflite" }), ParamType_Load("armnn", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/OD/tflite/od_tflite_model.tflite" }), - ParamType_Load("armnn", INFERENCE_TARGET_GPU | INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/OD/tflite/od_tflite_model.tflite" }), // face detection model loading test ParamType_Load("armnn", INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite" }), ParamType_Load("armnn", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite" }), - ParamType_Load("armnn", INFERENCE_TARGET_GPU | INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite" }), // pose estimation model loading test ParamType_Load("armnn", INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite" }), - ParamType_Load("armnn", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite" }), - ParamType_Load("armnn", INFERENCE_TARGET_GPU | INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite" }) + ParamType_Load("armnn", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite" }) /* TODO */ ) ); INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCommonTest_3, testing::Values( - // backend name, target device, height, width, channel count, input layer names, output layer names, model path/s + // parameter order : backend name, target device, input image path/s, height, width, channel count, input layer names, output layer names, model path/s, inference result // mobilenet based image classification test - ParamType_Infer("armnn", INFERENCE_TARGET_CPU, 224, 224, 3, { "input_2" }, { "dense_3/Softmax" }, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }), - ParamType_Infer("armnn", INFERENCE_TARGET_GPU, 224, 224, 3, { "input_2" }, { "dense_3/Softmax" }, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }), + ParamType_Infer("armnn", INFERENCE_TARGET_CPU, TEST_IMAGE_CLASSIFICATION, { "/opt/usr/images/image_classification.bin" }, 224, 224, 3, { "input_2" }, { "dense_3/Softmax" }, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }, { 3 }), + ParamType_Infer("armnn", INFERENCE_TARGET_GPU, TEST_IMAGE_CLASSIFICATION, { "/opt/usr/images/image_classification.bin" }, 224, 224, 3, { "input_2" }, { "dense_3/Softmax" }, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }, { 3 }), // object detection test - ParamType_Infer("armnn", INFERENCE_TARGET_CPU, 300, 300, 3, { "normalized_input_image_tensor" }, { "TFLite_Detection_PostProcess", "TFLite_Detection_PostProcess:1", "TFLite_Detection_PostProcess:2", "TFLite_Detection_PostProcess:3" }, { "/usr/share/capi-media-vision/models/OD/tflite/od_tflite_model.tflite" }), - ParamType_Infer("armnn", INFERENCE_TARGET_GPU, 300, 300, 3, { "normalized_input_image_tensor" }, { "TFLite_Detection_PostProcess", "TFLite_Detection_PostProcess:1", "TFLite_Detection_PostProcess:2", "TFLite_Detection_PostProcess:3" }, { "/usr/share/capi-media-vision/models/OD/tflite/od_tflite_model.tflite" }), + ParamType_Infer("armnn", INFERENCE_TARGET_CPU, TEST_OBJECT_DETECTION, { "/opt/usr/images/object_detection.bin" }, 300, 300, 3, { "normalized_input_image_tensor" }, { "TFLite_Detection_PostProcess", "TFLite_Detection_PostProcess:1", "TFLite_Detection_PostProcess:2", "TFLite_Detection_PostProcess:3" }, { "/usr/share/capi-media-vision/models/OD/tflite/od_tflite_model.tflite" }, { 451, 474, 714, 969 }), + ParamType_Infer("armnn", INFERENCE_TARGET_GPU, TEST_OBJECT_DETECTION, { "/opt/usr/images/object_detection.bin" }, 300, 300, 3, { "normalized_input_image_tensor" }, { "TFLite_Detection_PostProcess", "TFLite_Detection_PostProcess:1", "TFLite_Detection_PostProcess:2", "TFLite_Detection_PostProcess:3" }, { "/usr/share/capi-media-vision/models/OD/tflite/od_tflite_model.tflite" }, { 451, 474, 714, 969 }), // face detection test - ParamType_Infer("armnn", INFERENCE_TARGET_CPU, 300, 300, 3, { "normalized_input_image_tensor" }, { "TFLite_Detection_PostProcess", "TFLite_Detection_PostProcess:1", "TFLite_Detection_PostProcess:2", "TFLite_Detection_PostProcess:3" }, { "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite" }), - ParamType_Infer("armnn", INFERENCE_TARGET_GPU, 300, 300, 3, { "normalized_input_image_tensor" }, { "TFLite_Detection_PostProcess", "TFLite_Detection_PostProcess:1", "TFLite_Detection_PostProcess:2", "TFLite_Detection_PostProcess:3" }, { "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite" }), + ParamType_Infer("armnn", INFERENCE_TARGET_CPU, TEST_FACE_DETECTION, { "/opt/usr/images/face_detection.bin" }, 300, 300, 3, { "normalized_input_image_tensor" }, { "TFLite_Detection_PostProcess", "TFLite_Detection_PostProcess:1", "TFLite_Detection_PostProcess:2", "TFLite_Detection_PostProcess:3" }, { "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite" }, { 727, 225, 960, 555 }), + ParamType_Infer("armnn", INFERENCE_TARGET_GPU, TEST_FACE_DETECTION, { "/opt/usr/images/face_detection.bin" }, 300, 300, 3, { "normalized_input_image_tensor" }, { "TFLite_Detection_PostProcess", "TFLite_Detection_PostProcess:1", "TFLite_Detection_PostProcess:2", "TFLite_Detection_PostProcess:3" }, { "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite" }, { 727, 225, 960, 555 }), // pose estimation test - ParamType_Infer("armnn", INFERENCE_TARGET_CPU, 192, 192, 3, { "image" }, { "Convolutional_Pose_Machine/stage_5_out" }, { "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite" }), - ParamType_Infer("armnn", INFERENCE_TARGET_GPU, 192, 192, 3, { "image" }, { "Convolutional_Pose_Machine/stage_5_out" }, { "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite" }) + ParamType_Infer("armnn", INFERENCE_TARGET_CPU, TEST_POSE_ESTIMATION, { "/opt/usr/images/pose_estimation.bin" }, 192, 192, 3, { "image" }, { "Convolutional_Pose_Machine/stage_5_out" }, { "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite" }, + { 382, 351, 320, 257, 226, 414, 414, 445, 351, 351, 351, 382, 382, 382, + 76, 146, 170, 193, 216, 146, 123, 99, 287, 381, 451, 287, 381, 475 }), + ParamType_Infer("armnn", INFERENCE_TARGET_GPU, TEST_POSE_ESTIMATION, { "/opt/usr/images/pose_estimation.bin" }, 192, 192, 3, { "image" }, { "Convolutional_Pose_Machine/stage_5_out" }, { "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite" }, + { 382, 351, 320, 257, 226, 414, 414, 445, 351, 351, 351, 382, 382, 382, + 76, 146, 170, 193, 216, 146, 123, 99, 287, 381, 451, 287, 381, 475 }) /* TODO */ ) );