Run entire SSDs from TensorFlow using Intel's Inference Engine
[platform/upstream/opencv.git] / modules / dnn / test / test_tf_importer.cpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 // Copyright (C) 2017, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7
8 /*
9 Test for Tensorflow models loading
10 */
11
12 #include "test_precomp.hpp"
13 #include "npy_blob.hpp"
14
15 #include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
16
17 namespace opencv_test
18 {
19
20 using namespace cv;
21 using namespace cv::dnn;
22
23 template<typename TString>
24 static std::string _tf(TString filename)
25 {
26     return (getOpenCVExtraDir() + "/dnn/") + filename;
27 }
28
29 TEST(Test_TensorFlow, read_inception)
30 {
31     Net net;
32     {
33         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
34         net = readNetFromTensorflow(model);
35         ASSERT_FALSE(net.empty());
36     }
37     net.setPreferableBackend(DNN_BACKEND_OPENCV);
38
39     Mat sample = imread(_tf("grace_hopper_227.png"));
40     ASSERT_TRUE(!sample.empty());
41     Mat input;
42     resize(sample, input, Size(224, 224));
43     input -= 128; // mean sub
44
45     Mat inputBlob = blobFromImage(input);
46
47     net.setInput(inputBlob, "input");
48     Mat out = net.forward("softmax2");
49
50     std::cout << out.dims << std::endl;
51 }
52
53 TEST(Test_TensorFlow, inception_accuracy)
54 {
55     Net net;
56     {
57         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
58         net = readNetFromTensorflow(model);
59         ASSERT_FALSE(net.empty());
60     }
61     net.setPreferableBackend(DNN_BACKEND_OPENCV);
62
63     Mat sample = imread(_tf("grace_hopper_227.png"));
64     ASSERT_TRUE(!sample.empty());
65     resize(sample, sample, Size(224, 224));
66     Mat inputBlob = blobFromImage(sample);
67
68     net.setInput(inputBlob, "input");
69     Mat out = net.forward("softmax2");
70
71     Mat ref = blobFromNPY(_tf("tf_inception_prob.npy"));
72
73     normAssert(ref, out);
74 }
75
76 static std::string path(const std::string& file)
77 {
78     return findDataFile("dnn/tensorflow/" + file, false);
79 }
80
81 class Test_TensorFlow_layers : public DNNTestLayer
82 {
83 public:
84     void runTensorFlowNet(const std::string& prefix, bool hasText = false,
85                           double l1 = 0.0, double lInf = 0.0, bool memoryLoad = false)
86     {
87         std::string netPath = path(prefix + "_net.pb");
88         std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : "");
89         std::string inpPath = path(prefix + "_in.npy");
90         std::string outPath = path(prefix + "_out.npy");
91
92         cv::Mat input = blobFromNPY(inpPath);
93         cv::Mat ref = blobFromNPY(outPath);
94         checkBackend(&input, &ref);
95
96         Net net;
97         if (memoryLoad)
98         {
99             // Load files into a memory buffers
100             string dataModel;
101             ASSERT_TRUE(readFileInMemory(netPath, dataModel));
102
103             string dataConfig;
104             if (hasText)
105                 ASSERT_TRUE(readFileInMemory(netConfig, dataConfig));
106
107             net = readNetFromTensorflow(dataModel.c_str(), dataModel.size(),
108                                         dataConfig.c_str(), dataConfig.size());
109         }
110         else
111             net = readNetFromTensorflow(netPath, netConfig);
112
113         ASSERT_FALSE(net.empty());
114
115         net.setPreferableBackend(backend);
116         net.setPreferableTarget(target);
117         net.setInput(input);
118         cv::Mat output = net.forward();
119         normAssert(ref, output, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
120     }
121 };
122
123 TEST_P(Test_TensorFlow_layers, conv)
124 {
125     runTensorFlowNet("single_conv");
126     runTensorFlowNet("atrous_conv2d_valid");
127     runTensorFlowNet("atrous_conv2d_same");
128     runTensorFlowNet("depthwise_conv2d");
129     runTensorFlowNet("keras_atrous_conv2d_same");
130     runTensorFlowNet("conv_pool_nchw");
131 }
132
133 TEST_P(Test_TensorFlow_layers, padding)
134 {
135     runTensorFlowNet("padding_same");
136     runTensorFlowNet("padding_valid");
137     runTensorFlowNet("spatial_padding");
138 }
139
140 TEST_P(Test_TensorFlow_layers, eltwise_add_mul)
141 {
142     runTensorFlowNet("eltwise_add_mul");
143 }
144
145 TEST_P(Test_TensorFlow_layers, pad_and_concat)
146 {
147     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
148         throw SkipTestException("");
149     runTensorFlowNet("pad_and_concat");
150 }
151
152 TEST_P(Test_TensorFlow_layers, concat_axis_1)
153 {
154     runTensorFlowNet("concat_axis_1");
155 }
156
157 TEST_P(Test_TensorFlow_layers, batch_norm)
158 {
159     runTensorFlowNet("batch_norm");
160     runTensorFlowNet("batch_norm", false, 0.0, 0.0, true);
161     runTensorFlowNet("fused_batch_norm");
162     runTensorFlowNet("fused_batch_norm", false, 0.0, 0.0, true);
163     runTensorFlowNet("batch_norm_text", true);
164     runTensorFlowNet("batch_norm_text", true, 0.0, 0.0, true);
165     runTensorFlowNet("unfused_batch_norm");
166     runTensorFlowNet("fused_batch_norm_no_gamma");
167     runTensorFlowNet("unfused_batch_norm_no_gamma");
168 }
169
170 TEST_P(Test_TensorFlow_layers, mvn_batch_norm)
171 {
172     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
173         throw SkipTestException("");
174     runTensorFlowNet("mvn_batch_norm");
175     runTensorFlowNet("mvn_batch_norm_1x1");
176 }
177
178 TEST_P(Test_TensorFlow_layers, pooling)
179 {
180     runTensorFlowNet("max_pool_even");
181     runTensorFlowNet("max_pool_odd_valid");
182     runTensorFlowNet("max_pool_odd_same");
183     runTensorFlowNet("reduce_mean");  // an average pooling over all spatial dimensions.
184 }
185
186 // TODO: fix tests and replace to pooling
187 TEST_P(Test_TensorFlow_layers, ave_pool_same)
188 {
189     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
190         throw SkipTestException("");
191     runTensorFlowNet("ave_pool_same");
192 }
193
194 TEST_P(Test_TensorFlow_layers, deconvolution)
195 {
196     runTensorFlowNet("deconvolution");
197     runTensorFlowNet("deconvolution_same");
198     runTensorFlowNet("deconvolution_stride_2_same");
199     runTensorFlowNet("deconvolution_adj_pad_valid");
200     runTensorFlowNet("deconvolution_adj_pad_same");
201     runTensorFlowNet("keras_deconv_valid");
202     runTensorFlowNet("keras_deconv_same");
203 }
204
205 TEST_P(Test_TensorFlow_layers, matmul)
206 {
207     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
208         throw SkipTestException("");
209     runTensorFlowNet("matmul");
210     runTensorFlowNet("nhwc_reshape_matmul");
211     runTensorFlowNet("nhwc_transpose_reshape_matmul");
212 }
213
214 TEST_P(Test_TensorFlow_layers, reshape)
215 {
216     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
217         throw SkipTestException("");
218     runTensorFlowNet("shift_reshape_no_reorder");
219     runTensorFlowNet("reshape_no_reorder");
220     runTensorFlowNet("reshape_reduce");
221 }
222
223 TEST_P(Test_TensorFlow_layers, flatten)
224 {
225     if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
226         (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
227         throw SkipTestException("");
228     runTensorFlowNet("flatten", true);
229     runTensorFlowNet("unfused_flatten");
230     runTensorFlowNet("unfused_flatten_unknown_batch");
231 }
232
233 TEST_P(Test_TensorFlow_layers, l2_normalize)
234 {
235     runTensorFlowNet("l2_normalize");
236 }
237
238 // TODO: fix it and add to l2_normalize
239 TEST_P(Test_TensorFlow_layers, l2_normalize_3d)
240 {
241     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
242         throw SkipTestException("");
243     runTensorFlowNet("l2_normalize_3d");
244 }
245
246 class Test_TensorFlow_nets : public DNNTestLayer {};
247
248 TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
249 {
250     checkBackend();
251     if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) ||
252         (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
253         throw SkipTestException("");
254
255     std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false);
256     std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false);
257     std::string imgPath = findDataFile("dnn/street.png", false);
258
259     Mat inp;
260     resize(imread(imgPath), inp, Size(300, 300));
261     inp = blobFromImage(inp, 1.0f / 127.5, Size(), Scalar(127.5, 127.5, 127.5), true);
262
263     std::vector<String> outNames(3);
264     outNames[0] = "concat";
265     outNames[1] = "concat_1";
266     outNames[2] = "detection_out";
267
268     std::vector<Mat> refs(outNames.size());
269     for (int i = 0; i < outNames.size(); ++i)
270     {
271         std::string path = findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco." + outNames[i] + ".npy", false);
272         refs[i] = blobFromNPY(path);
273     }
274
275     Net net = readNetFromTensorflow(netPath, netConfig);
276     net.setPreferableBackend(backend);
277     net.setPreferableTarget(target);
278
279     net.setInput(inp);
280
281     std::vector<Mat> output;
282     net.forward(output, outNames);
283
284     normAssert(refs[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4);
285     normAssert(refs[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4);
286     normAssertDetections(refs[2], output[2], "", 0.2);
287 }
288
289 TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
290 {
291     checkBackend();
292     std::string proto = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", false);
293     std::string model = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pb", false);
294
295     Net net = readNetFromTensorflow(model, proto);
296     Mat img = imread(findDataFile("dnn/street.png", false));
297     Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
298
299     net.setPreferableBackend(backend);
300     net.setPreferableTarget(target);
301
302     net.setInput(blob);
303     // Output has shape 1x1xNx7 where N - number of detections.
304     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
305     Mat out = net.forward();
306     Mat ref = (Mat_<float>(5, 7) << 0, 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729,
307                                     0, 3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131,
308                                     0, 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
309                                     0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
310                                     0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
311     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5e-3 : default_l1;
312     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : default_lInf;
313     normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
314 }
315
316 TEST_P(Test_TensorFlow_nets, Inception_v2_Faster_RCNN)
317 {
318     checkBackend();
319     if (backend == DNN_BACKEND_INFERENCE_ENGINE ||
320         (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
321         throw SkipTestException("");
322
323     std::string proto = findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt", false);
324     std::string model = findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pb", false);
325
326     Net net = readNetFromTensorflow(model, proto);
327     net.setPreferableBackend(backend);
328     net.setPreferableTarget(target);
329     Mat img = imread(findDataFile("dnn/dog416.png", false));
330     Mat blob = blobFromImage(img, 1.0f / 127.5, Size(800, 600), Scalar(127.5, 127.5, 127.5), true, false);
331
332     net.setInput(blob);
333     Mat out = net.forward();
334
335     Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/faster_rcnn_inception_v2_coco_2018_01_28.detection_out.npy"));
336     normAssertDetections(ref, out, "", 0.3);
337 }
338
339 TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
340 {
341     checkBackend();
342     if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
343         (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD))
344         throw SkipTestException("");
345
346     std::string proto = findDataFile("dnn/opencv_face_detector.pbtxt", false);
347     std::string model = findDataFile("dnn/opencv_face_detector_uint8.pb", false);
348
349     Net net = readNetFromTensorflow(model, proto);
350     Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
351     Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
352
353     net.setPreferableBackend(backend);
354     net.setPreferableTarget(target);
355     net.setInput(blob);
356     // Output has shape 1x1xNx7 where N - number of detections.
357     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
358     Mat out = net.forward();
359
360     // References are from test for Caffe model.
361     Mat ref = (Mat_<float>(6, 7) << 0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
362                                     0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
363                                     0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
364                                     0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
365                                     0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
366                                     0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
367     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 4e-3 : 3.4e-3;
368     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.017 : 1e-2;
369     normAssertDetections(ref, out, "", 0.9, scoreDiff, iouDiff);
370 }
371
372 // inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png')
373 // inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3)
374 // outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'),
375 //                  sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')],
376 //                 feed_dict={'input_images:0': inp})
377 // scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2))
378 // geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2))
379 // np.save('east_text_detection.scores.npy', scores)
380 // np.save('east_text_detection.geometry.npy', geometry)
381 TEST_P(Test_TensorFlow_nets, EAST_text_detection)
382 {
383     checkBackend();
384     if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
385         throw SkipTestException("");
386
387     std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
388     std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false);
389     std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false);
390     std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false);
391
392     Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false));
393
394     net.setPreferableBackend(backend);
395     net.setPreferableTarget(target);
396
397     Mat img = imread(imgPath);
398     Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false);
399     net.setInput(inp);
400
401     std::vector<Mat> outs;
402     std::vector<String> outNames(2);
403     outNames[0] = "feature_fusion/Conv_7/Sigmoid";
404     outNames[1] = "feature_fusion/concat_3";
405     net.forward(outs, outNames);
406
407     Mat scores = outs[0];
408     Mat geometry = outs[1];
409
410     normAssert(scores, blobFromNPY(refScoresPath), "scores");
411     normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3);
412 }
413
414 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, dnnBackendsAndTargets());
415
416 TEST_P(Test_TensorFlow_layers, fp16_weights)
417 {
418     const float l1 = 0.00071;
419     const float lInf = 0.012;
420     runTensorFlowNet("fp16_single_conv", false, l1, lInf);
421     runTensorFlowNet("fp16_deconvolution", false, l1, lInf);
422     runTensorFlowNet("fp16_max_pool_odd_same", false, l1, lInf);
423     runTensorFlowNet("fp16_padding_valid", false, l1, lInf);
424     runTensorFlowNet("fp16_eltwise_add_mul", false, l1, lInf);
425     runTensorFlowNet("fp16_max_pool_odd_valid", false, l1, lInf);
426     runTensorFlowNet("fp16_max_pool_even", false, l1, lInf);
427     runTensorFlowNet("fp16_padding_same", false, l1, lInf);
428 }
429
430 // TODO: fix pad_and_concat and add this test case to fp16_weights
431 TEST_P(Test_TensorFlow_layers, fp16_pad_and_concat)
432 {
433     const float l1 = 0.00071;
434     const float lInf = 0.012;
435     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
436         throw SkipTestException("");
437     runTensorFlowNet("fp16_pad_and_concat", false, l1, lInf);
438 }
439
440 TEST_P(Test_TensorFlow_layers, defun)
441 {
442     runTensorFlowNet("defun_dropout");
443 }
444
445 TEST_P(Test_TensorFlow_layers, quantized)
446 {
447     runTensorFlowNet("uint8_single_conv");
448 }
449
450 TEST_P(Test_TensorFlow_layers, lstm)
451 {
452     if (backend == DNN_BACKEND_INFERENCE_ENGINE ||
453         (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
454         throw SkipTestException("");
455     runTensorFlowNet("lstm", true);
456     runTensorFlowNet("lstm", true, 0.0, 0.0, true);
457 }
458
459 TEST_P(Test_TensorFlow_layers, split)
460 {
461     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
462         throw SkipTestException("");
463     runTensorFlowNet("split_equals");
464 }
465
466 TEST_P(Test_TensorFlow_layers, resize_nearest_neighbor)
467 {
468     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_MYRIAD)
469         throw SkipTestException("");
470     runTensorFlowNet("resize_nearest_neighbor");
471     runTensorFlowNet("keras_upsampling2d");
472 }
473
474 TEST_P(Test_TensorFlow_layers, slice)
475 {
476     if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
477         (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
478         throw SkipTestException("");
479     runTensorFlowNet("slice_4d");
480 }
481
482 TEST_P(Test_TensorFlow_layers, softmax)
483 {
484     runTensorFlowNet("keras_softmax");
485 }
486
487 TEST_P(Test_TensorFlow_layers, relu6)
488 {
489     runTensorFlowNet("keras_relu6");
490     runTensorFlowNet("keras_relu6", /*hasText*/ true);
491 }
492
493 TEST_P(Test_TensorFlow_layers, keras_mobilenet_head)
494 {
495     runTensorFlowNet("keras_mobilenet_head");
496 }
497
498 TEST_P(Test_TensorFlow_layers, resize_bilinear)
499 {
500     runTensorFlowNet("resize_bilinear");
501     runTensorFlowNet("resize_bilinear_factor");
502 }
503
504 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, dnnBackendsAndTargets());
505
506 TEST(Test_TensorFlow, two_inputs)
507 {
508     Net net = readNet(path("two_inputs_net.pbtxt"));
509     net.setPreferableBackend(DNN_BACKEND_OPENCV);
510
511     Mat firstInput(2, 3, CV_32FC1), secondInput(2, 3, CV_32FC1);
512     randu(firstInput, -1, 1);
513     randu(secondInput, -1, 1);
514
515     net.setInput(firstInput, "first_input");
516     net.setInput(secondInput, "second_input");
517     Mat out = net.forward();
518
519     normAssert(out, firstInput + secondInput);
520 }
521
522 }