class InferenceEngineCommonTest : public testing::TestWithParam<ParamType> { };
class InferenceEngineCommonTest_2 : public testing::TestWithParam<ParamType_Load> { };
class InferenceEngineTfliteTest : public testing::TestWithParam<ParamType_Infer> { };
+class InferenceEngineCaffeTest : public testing::TestWithParam<ParamType_Infer> { };
std::map<std::string, int> Model_Formats = {
{ "caffemodel", INFERENCE_MODEL_CAFFE },
TEST_IMAGE_CLASSIFICATION = 0,
TEST_OBJECT_DETECTION,
TEST_FACE_DETECTION,
- TEST_FACILA_LANDMARK_DETECTION,
+ TEST_FACIAL_LANDMARK_DETECTION,
TEST_POSE_ESTIMATION
};
{
std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
- float *boxes = reinterpret_cast<float *>(inferResults[0]);
- float *scores = reinterpret_cast<float *>(inferResults[2]);
- int num_of_detections = (int)(*reinterpret_cast<float *>(inferResults[3]));
+ float* boxes = nullptr;
+ float* classes = nullptr;
+ float* scores = nullptr;
+ int num_of_detections = 0;
+
+ if (outputData.dimInfo.size() == 1) {
+ // there is no way to know how many objects are detect unless the number of objects aren't
+ // provided. In the case, each backend should provide the number of results manually.
+ // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are
+ // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7
+ // indicats the image id. But it is useless if a batch mode isn't supported.
+ // So, use the 1st of 7.
+
+ num_of_detections = (int)(*reinterpret_cast<float*>(outputData.data[0]));
+
+ boxes = new float[num_of_detections * 4];
+ classes = new float[num_of_detections];
+ scores = new float[num_of_detections];
+
+ for (int idx = 0; idx < num_of_detections; ++idx) {
+ classes[idx] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 1];
+ scores[idx] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 2];
+
+ boxes[idx*4] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 4];
+ boxes[idx*4 + 1] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 3];
+ boxes[idx*4 + 2] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 6];
+ boxes[idx*4 + 3] = (reinterpret_cast<float*>(outputData.data[0]))[idx*inferDimInfo[0][3] + 5];
+ }
+ } else {
+ boxes = reinterpret_cast<float*>(inferResults[0]);
+ classes = reinterpret_cast<float*>(inferResults[1]);
+ scores = reinterpret_cast<float*>(inferResults[2]);
+ num_of_detections = (int)(*reinterpret_cast<float*>(inferResults[3]));
+ }
+
int left = 0, top = 0, right = 0, bottom = 0;
float max_score = 0.0f;
}
}
+ if (outputData.dimInfo.size() == 1) {
+ delete [] boxes;
+ delete [] classes;
+ delete [] scores;
+ }
+
return (answers[0] == left && answers[1] == top && answers[2] == right && answers[3] == bottom);
}
+int VerifyFacialLandmarkDetectionResults(tensor_t &outputData, std::vector<int> &answers, int height, int width)
+{
+ std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
+ std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
+ std::vector<int> result_x, result_y;
+
+ long number_of_detections = inferDimInfo[0][1];
+ float* loc = reinterpret_cast<float*>(inferResults[0]);
+
+ for (int idx = 0; idx < number_of_detections; idx+=2) {
+ result_x.push_back((int)(loc[idx] * width));
+ result_y.push_back((int)(loc[idx+1] * height));
+ }
+
+ int ret = 1;
+ for (int i = 0; i < (number_of_detections>>1); i++) {
+ if (result_x[i] != answers[i*2] || result_y[i] != answers[i*2 + 1]) {
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
+
int VerifyPoseEstimationResults(tensor_t &outputData, std::vector<int> &answers, int height, int width)
{
std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
case TEST_FACE_DETECTION:
test_name.append("Face detection");
break;
- case TEST_FACILA_LANDMARK_DETECTION:
- test_name.append("Facila landmark detection");
+ case TEST_FACIAL_LANDMARK_DETECTION:
+ test_name.append("Facial landmark detection");
break;
case TEST_POSE_ESTIMATION:
test_name.append("Pose estimation");
ret = VerifyObjectDetectionResults(result, answers, 1152, 1536);
EXPECT_EQ(ret, 1);
break;
- case TEST_FACILA_LANDMARK_DETECTION:
+ case TEST_FACIAL_LANDMARK_DETECTION:
// TODO.
break;
case TEST_POSE_ESTIMATION:
delete engine;
}
+TEST_P(InferenceEngineCaffeTest, Inference)
+{
+ std::string backend_name;
+ int target_devices;
+ int test_type;
+ int iteration;
+ int tensor_type;
+ std::vector<std::string> image_paths;
+ int height;
+ int width;
+ int ch;
+ std::vector<std::string> input_layers;
+ std::vector<std::string> output_layers;
+ std::vector<std::string> model_paths;
+ std::vector<int> answers;
+
+ std::tie(backend_name, target_devices, test_type, iteration, tensor_type, image_paths, height, width, ch, input_layers, output_layers, model_paths, answers) = GetParam();
+
+ if (iteration < 1) {
+ iteration = 1;
+ }
+
+ std::string test_name;
+ switch (test_type) {
+ case TEST_IMAGE_CLASSIFICATION:
+ test_name.append("Image classification");
+ break;
+ case TEST_OBJECT_DETECTION:
+ test_name.append("Object detection");
+ break;
+ case TEST_FACE_DETECTION:
+ test_name.append("Face detection");
+ break;
+ case TEST_FACIAL_LANDMARK_DETECTION:
+ test_name.append("Facial landmark detection");
+ break;
+ case TEST_POSE_ESTIMATION:
+ test_name.append("Pose estimation");
+ break;
+ }
+
+ std::cout << test_name << " inference test : backend = " << backend_name << ", target device = " << (target_devices == INFERENCE_TARGET_CPU ? "CPU" : "GPU") << "\n";
+
+ inference_engine_config config = {
+ .backend_name = backend_name,
+ .target_devices = target_devices
+ };
+
+ InferenceEngineCommon *engine = new InferenceEngineCommon(&config);
+ if (engine == nullptr) {
+ ASSERT_TRUE(engine);
+ return;
+ }
+
+ int ret = engine->EnableProfiler(true);
+ if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+ delete engine;
+ ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+ return;
+ }
+
+ ret = engine->DumpProfileToFile("dump.txt");
+ if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+ delete engine;
+ ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+ return;
+ }
+
+ ret = engine->BindBackend(&config);
+ if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+ delete engine;
+ ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+ return;
+ }
+
+ inference_engine_capacity capacity;
+ ret = engine->GetBackendCapacity(&capacity);
+ EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+
+ ret = engine->SetTargetDevices(target_devices);
+ EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+
+ std::vector <std::string> models;
+ int model_type = GetModelInfo(model_paths, models);
+ if (model_type == -1) {
+ delete engine;
+ ASSERT_NE(model_type, -1);
+ return;
+ }
+
+ inference_engine_layer_property input_property;
+ std::vector<std::string>::iterator iter;
+
+ for (iter = input_layers.begin(); iter != input_layers.end(); iter++) {
+ inference_engine_tensor_info tensor_info = {
+ { 1, ch, height, width },
+ (inference_tensor_shape_type_e)TENSOR_SHAPE_NCHW,
+ (inference_tensor_data_type_e)tensor_type,
+ (size_t)(1 * ch * height * width)
+ };
+
+ input_property.layer_names.push_back(*iter);
+ input_property.tensor_infos.push_back(tensor_info);
+ }
+
+ ret = engine->SetInputLayerProperty(input_property);
+ if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+ delete engine;
+ ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+ return;
+ }
+
+ inference_engine_layer_property output_property;
+
+ for (iter = output_layers.begin(); iter != output_layers.end(); iter++) {
+ output_property.layer_names.push_back(*iter);
+ }
+
+ ret = engine->SetOutputLayerProperty(output_property);
+ if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+ delete engine;
+ ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+ return;
+ }
+
+ ret = engine->Load(models, (inference_model_format_e)model_type);
+ if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+ delete engine;
+ ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+ return;
+ }
+
+ std::vector<inference_engine_tensor_buffer> inputs, outputs;
+ ret = PrepareTensorBuffers(engine, inputs, outputs);
+ if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+ delete engine;
+ ASSERT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+ return;
+ }
+
+ // Copy input image tensor data from a given file to input tensor buffer.
+ for (int i = 0; i < (int)image_paths.size(); ++i) {
+ CopyFileToMemory(image_paths[i].c_str(), inputs[i], inputs[i].size);
+ }
+
+ for (int repeat = 0; repeat < iteration; ++repeat) {
+ ret = engine->Run(inputs, outputs);
+ EXPECT_EQ(ret, INFERENCE_ENGINE_ERROR_NONE);
+ }
+
+ tensor_t result;
+ FillOutputResult(engine, outputs, result);
+
+ switch (test_type) {
+ case TEST_IMAGE_CLASSIFICATION:
+ ret = VerifyImageClassificationResults(result, answers[0]);
+ EXPECT_EQ(ret, 1);
+ break;
+ case TEST_OBJECT_DETECTION:
+ // 1024 : fixed height size of dumped image, 636 : fixed width size of dumped image.
+ ret = VerifyObjectDetectionResults(result, answers, 636, 1024);
+ EXPECT_EQ(ret, 1);
+ break;
+ case TEST_FACE_DETECTION:
+ // 1152 : fixed height size of dumped image, 1536 : fixed width size of dumped image.
+ ret = VerifyObjectDetectionResults(result, answers, 1152, 1536);
+ EXPECT_EQ(ret, 1);
+ break;
+ case TEST_FACIAL_LANDMARK_DETECTION:
+ // 128 : fixed height size of dumped image, 128 : fixed width size of dumped image.
+ ret = VerifyFacialLandmarkDetectionResults(result, answers, 128, 128);
+ EXPECT_EQ(ret, 1);
+ break;
+ case TEST_POSE_ESTIMATION:
+ // 563 : fixed height size of dumped image, 750 : fixed width size of dumped image.
+ ret = VerifyPoseEstimationResults(result, answers, 563, 750);
+ EXPECT_EQ(ret, 1);
+ break;
+ }
+
+ CleanupTensorBuffers(inputs, outputs);
+
+ engine->UnbindBackend();
+ models.clear();
+
+ delete engine;
+}
+
INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCommonTest,
testing::Values(
// parameter order : backend name, target device
ParamType("tflite", INFERENCE_TARGET_GPU),
// DLDT.
ParamType("dldt", INFERENCE_TARGET_CUSTOM)
+ // OPENCV.
+ ParamType("opencv", INFERENCE_TARGET_CPU),
+ ParamType("opencv", INFERENCE_TARGET_GPU)
+ /* TODO */
+
)
);
ParamType_Load("armnn", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }),
// TFLITE.
ParamType_Load("tflite", INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }),
- ParamType_Load("tflite", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" })
+ ParamType_Load("tflite", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" }),
+ // OPENCV.
+ ParamType_Load("opencv", INFERENCE_TARGET_CPU, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" }),
+ ParamType_Load("opencv", INFERENCE_TARGET_GPU, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" })
/* TODO */
)
);
/* TODO */
)
);
+
+INSTANTIATE_TEST_CASE_P(Prefix, InferenceEngineCaffeTest,
+ testing::Values(
+ // parameter order : backend_name, target_devices, test_type, iteration, tensor_type, image_paths, height, width, ch, input_layers, output_layers, model_paths, answers
+ // OPENCV
+ // squeezenet based image classification test
+ ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_IMAGE_CLASSIFICATION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/image_classification_caffe.bin" }, 227, 227, 3, { "data" }, { "prob" }, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" }, { 281 }),
+ ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_IMAGE_CLASSIFICATION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/image_classification_caffe.bin" }, 227, 227, 3, { "data" }, { "prob" }, { "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel", "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" }, { 281 }),
+
+ // mobilenet-ssd based object detection test
+ ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_OBJECT_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/object_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.caffemodel", "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.prototxt" }, { 15, 19, 335, 557 }),
+ ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_OBJECT_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/object_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.caffemodel", "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.prototxt" }, { 15, 19, 335, 557 }),
+
+ // mobilenet-ssd based object detection test
+ ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACE_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/face_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.caffemodel", "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt" }, { 733, 233, 965, 539 }),
+ ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACE_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/face_detection_caffe.bin" }, 300, 300, 3, { "data" }, { "detection_out" }, { "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.caffemodel", "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt" }, { 733, 233, 965, 539 }),
+
+ // tweakcnn based facial landmark detection test
+ ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACIAL_LANDMARK_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/faciallandmark_detection_caffe.bin" }, 128, 128, 3, { "data" }, { "Sigmoid_fc2" }, { "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.caffemodel", "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt" },
+ { 53, 45, 85, 46, 66, 64, 54, 78, 82, 79}),
+ ParamType_Infer("opencv", INFERENCE_TARGET_CPU, TEST_FACIAL_LANDMARK_DETECTION, 10, TENSOR_DATA_TYPE_FLOAT32, { "/opt/usr/images/faciallandmark_detection_caffe.bin" }, 128, 128, 3, { "data" }, { "Sigmoid_fc2" }, { "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.caffemodel", "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt" },
+ { 53, 45, 85, 46, 66, 64, 54, 78, 82, 79})
+ /* TODO */
+ )
+);
\ No newline at end of file