ea8cd1565945872346def47d31df9a8072a5b210
[platform/upstream/opencv.git] / modules / dnn / test / test_tf_importer.cpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 // Copyright (C) 2017-2019, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7
8 /*
9 Test for Tensorflow models loading
10 */
11
12 #include "test_precomp.hpp"
13 #include "npy_blob.hpp"
14
15 #include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
16
17 namespace opencv_test
18 {
19
20 using namespace cv;
21 using namespace cv::dnn;
22
23 template<typename TString>
24 static std::string _tf(TString filename)
25 {
26     return (getOpenCVExtraDir() + "/dnn/") + filename;
27 }
28
29 TEST(Test_TensorFlow, read_inception)
30 {
31     Net net;
32     {
33         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
34         net = readNetFromTensorflow(model);
35         ASSERT_FALSE(net.empty());
36     }
37     net.setPreferableBackend(DNN_BACKEND_OPENCV);
38
39     Mat sample = imread(_tf("grace_hopper_227.png"));
40     ASSERT_TRUE(!sample.empty());
41     Mat input;
42     resize(sample, input, Size(224, 224));
43     input -= Scalar::all(117); // mean sub
44
45     Mat inputBlob = blobFromImage(input);
46
47     net.setInput(inputBlob, "input");
48     Mat out = net.forward("softmax2");
49
50     std::cout << out.dims << std::endl;
51 }
52
53 TEST(Test_TensorFlow, inception_accuracy)
54 {
55     Net net;
56     {
57         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
58         net = readNetFromTensorflow(model);
59         ASSERT_FALSE(net.empty());
60     }
61     net.setPreferableBackend(DNN_BACKEND_OPENCV);
62
63     Mat sample = imread(_tf("grace_hopper_227.png"));
64     ASSERT_TRUE(!sample.empty());
65     Mat inputBlob = blobFromImage(sample, 1.0, Size(224, 224), Scalar(), /*swapRB*/true);
66
67     net.setInput(inputBlob, "input");
68     Mat out = net.forward("softmax2");
69
70     Mat ref = blobFromNPY(_tf("tf_inception_prob.npy"));
71
72     normAssert(ref, out);
73 }
74
75 static std::string path(const std::string& file)
76 {
77     return findDataFile("dnn/tensorflow/" + file);
78 }
79
80 class Test_TensorFlow_layers : public DNNTestLayer
81 {
82 public:
83     void runTensorFlowNet(const std::string& prefix, bool hasText = false,
84                           double l1 = 0.0, double lInf = 0.0, bool memoryLoad = false)
85     {
86         std::string netPath = path(prefix + "_net.pb");
87         std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : "");
88         std::string inpPath = path(prefix + "_in.npy");
89         std::string outPath = path(prefix + "_out.npy");
90
91         cv::Mat input = blobFromNPY(inpPath);
92         cv::Mat ref = blobFromNPY(outPath);
93         checkBackend(&input, &ref);
94
95         Net net;
96         if (memoryLoad)
97         {
98             // Load files into a memory buffers
99             std::vector<char> dataModel;
100             readFileContent(netPath, dataModel);
101
102             std::vector<char> dataConfig;
103             if (hasText)
104             {
105                 readFileContent(netConfig, dataConfig);
106             }
107
108             net = readNetFromTensorflow(dataModel.data(), dataModel.size(),
109                                         dataConfig.data(), dataConfig.size());
110         }
111         else
112             net = readNetFromTensorflow(netPath, netConfig);
113
114         ASSERT_FALSE(net.empty());
115
116         net.setPreferableBackend(backend);
117         net.setPreferableTarget(target);
118         net.setInput(input);
119         cv::Mat output = net.forward();
120         normAssert(ref, output, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
121     }
122 };
123
124 TEST_P(Test_TensorFlow_layers, conv)
125 {
126     runTensorFlowNet("single_conv");
127     runTensorFlowNet("atrous_conv2d_valid");
128     runTensorFlowNet("atrous_conv2d_same");
129     runTensorFlowNet("depthwise_conv2d");
130     runTensorFlowNet("keras_atrous_conv2d_same");
131     runTensorFlowNet("conv_pool_nchw");
132 }
133
134 TEST_P(Test_TensorFlow_layers, Convolution3D)
135 {
136 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
137     throw SkipTestException("Test is enabled starts from 2019R1");
138 #endif
139     if (target != DNN_TARGET_CPU)
140         throw SkipTestException("Only CPU is supported");
141     runTensorFlowNet("conv3d");
142 }
143
144 TEST_P(Test_TensorFlow_layers, padding)
145 {
146     runTensorFlowNet("padding_valid");
147     runTensorFlowNet("spatial_padding");
148     runTensorFlowNet("mirror_pad");
149 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019020000)
150     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
151     {
152         if (target == DNN_TARGET_MYRIAD)
153             applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_2019R3, CV_TEST_TAG_DNN_SKIP_IE_2019R2);
154         if (target == DNN_TARGET_OPENCL_FP16)
155             applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_2019R3, CV_TEST_TAG_DNN_SKIP_IE_2019R2);
156     }
157 #endif
158     runTensorFlowNet("keras_pad_concat");
159 }
160
161 TEST_P(Test_TensorFlow_layers, padding_same)
162 {
163     // Reference output values are in range [0.0006, 2.798]
164     runTensorFlowNet("padding_same");
165 }
166
167 TEST_P(Test_TensorFlow_layers, eltwise)
168 {
169     runTensorFlowNet("eltwise_add_mul");
170     runTensorFlowNet("eltwise_sub");
171 }
172
173 TEST_P(Test_TensorFlow_layers, pad_and_concat)
174 {
175     runTensorFlowNet("pad_and_concat");
176 }
177
178 TEST_P(Test_TensorFlow_layers, concat_axis_1)
179 {
180     runTensorFlowNet("concat_axis_1");
181 }
182
183 TEST_P(Test_TensorFlow_layers, batch_norm)
184 {
185     runTensorFlowNet("batch_norm");
186     runTensorFlowNet("batch_norm", false, 0.0, 0.0, true);
187     runTensorFlowNet("fused_batch_norm");
188     runTensorFlowNet("fused_batch_norm", false, 0.0, 0.0, true);
189     runTensorFlowNet("batch_norm_text", true);
190     runTensorFlowNet("batch_norm_text", true, 0.0, 0.0, true);
191     runTensorFlowNet("unfused_batch_norm");
192     runTensorFlowNet("fused_batch_norm_no_gamma");
193     runTensorFlowNet("unfused_batch_norm_no_gamma");
194     runTensorFlowNet("mvn_batch_norm");
195     runTensorFlowNet("mvn_batch_norm_1x1");
196     runTensorFlowNet("switch_identity");
197     runTensorFlowNet("keras_batch_norm_training");
198 }
199
200 TEST_P(Test_TensorFlow_layers, batch_norm3D)
201 {
202     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
203     {
204         if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
205         if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL);
206         if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
207         throw SkipTestException("");
208     }
209     runTensorFlowNet("batch_norm3d");
210 }
211
212 TEST_P(Test_TensorFlow_layers, slim_batch_norm)
213 {
214     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
215         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
216     // Output values range: [-40.0597, 207.827]
217     double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.041 : default_l1;
218     double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.33 : default_lInf;
219     runTensorFlowNet("slim_batch_norm", false, l1, lInf);
220 }
221
222 TEST_P(Test_TensorFlow_layers, pooling)
223 {
224     runTensorFlowNet("max_pool_even");
225     runTensorFlowNet("max_pool_odd_valid");
226     runTensorFlowNet("max_pool_odd_same");
227     runTensorFlowNet("reduce_mean");  // an average pooling over all spatial dimensions.
228 }
229
230 TEST_P(Test_TensorFlow_layers, max_pool_grad)
231 {
232     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
233         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
234     runTensorFlowNet("max_pool_grad");
235 }
236
237 // TODO: fix tests and replace to pooling
238 TEST_P(Test_TensorFlow_layers, ave_pool_same)
239 {
240     // Reference output values are in range [-0.519531, 0.112976]
241 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
242     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
243             && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
244     )
245         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
246 #endif
247     runTensorFlowNet("ave_pool_same");
248 }
249
250 TEST_P(Test_TensorFlow_layers, MaxPooling3D)
251 {
252 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
253     throw SkipTestException("Test is enabled starts from 2019R1");
254 #endif
255     if (target != DNN_TARGET_CPU)
256         throw SkipTestException("Only CPU is supported");
257     runTensorFlowNet("max_pool3d");
258 }
259
260 TEST_P(Test_TensorFlow_layers, AvePooling3D)
261 {
262 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
263     throw SkipTestException("Test is enabled starts from 2019R1");
264 #endif
265     if (target != DNN_TARGET_CPU)
266         throw SkipTestException("Only CPU is supported");
267     runTensorFlowNet("ave_pool3d");
268 }
269
270 TEST_P(Test_TensorFlow_layers, deconvolution)
271 {
272     runTensorFlowNet("deconvolution");
273     runTensorFlowNet("deconvolution_same");
274     runTensorFlowNet("deconvolution_stride_2_same");
275     runTensorFlowNet("deconvolution_adj_pad_valid");
276     runTensorFlowNet("deconvolution_adj_pad_same");
277     runTensorFlowNet("keras_deconv_valid");
278     runTensorFlowNet("keras_deconv_same");
279     runTensorFlowNet("keras_deconv_same_v2");
280 }
281
282 TEST_P(Test_TensorFlow_layers, matmul)
283 {
284     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
285         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
286     runTensorFlowNet("matmul");
287     runTensorFlowNet("nhwc_transpose_reshape_matmul");
288     // Reference output values are in range [-5.688, 4.484]
289     double l1 = target == DNN_TARGET_MYRIAD ? 6.1e-3 : default_l1;
290     runTensorFlowNet("nhwc_reshape_matmul", false, l1);
291     runTensorFlowNet("matmul_layout");
292 }
293
294 TEST_P(Test_TensorFlow_layers, reshape)
295 {
296     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
297         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
298     runTensorFlowNet("shift_reshape_no_reorder");
299     runTensorFlowNet("reshape_no_reorder");
300     runTensorFlowNet("reshape_reduce");
301     runTensorFlowNet("reshape_as_shape");
302 }
303
304 TEST_P(Test_TensorFlow_layers, flatten)
305 {
306 #if defined(INF_ENGINE_RELEASE)
307     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
308             && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2
309     )
310         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_2);
311 #endif
312
313     runTensorFlowNet("flatten", true);
314 }
315
316 TEST_P(Test_TensorFlow_layers, unfused_flatten)
317 {
318     runTensorFlowNet("unfused_flatten");
319     runTensorFlowNet("unfused_flatten_unknown_batch");
320 }
321
322 TEST_P(Test_TensorFlow_layers, leaky_relu)
323 {
324 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
325     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
326         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2018R5);
327 #endif
328     runTensorFlowNet("leaky_relu_order1");
329     runTensorFlowNet("leaky_relu_order2");
330     runTensorFlowNet("leaky_relu_order3");
331 }
332
333 TEST_P(Test_TensorFlow_layers, l2_normalize)
334 {
335 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
336     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
337             && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
338     )
339         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
340 #endif
341
342     runTensorFlowNet("l2_normalize");
343 }
344
345 // TODO: fix it and add to l2_normalize
346 TEST_P(Test_TensorFlow_layers, l2_normalize_3d)
347 {
348 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
349     if (backend == DNN_BACKEND_INFERENCE_ENGINE
350             && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)
351     )
352         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
353 #endif
354 #if defined(INF_ENGINE_RELEASE)
355     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
356         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
357 #endif
358
359     runTensorFlowNet("l2_normalize_3d");
360 }
361
362 class Test_TensorFlow_nets : public DNNTestLayer {};
363
364 TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
365 {
366 #if defined(INF_ENGINE_RELEASE)
367     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
368     {
369 #if INF_ENGINE_VER_MAJOR_GE(2019020000)
370         if (getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
371             applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
372 #endif
373     }
374 #endif
375
376     checkBackend();
377     std::string imgPath = findDataFile("dnn/street.png");
378     std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt");
379     std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false);
380
381     Mat inp;
382     resize(imread(imgPath), inp, Size(300, 300));
383     inp = blobFromImage(inp, 1.0f / 127.5, Size(), Scalar(127.5, 127.5, 127.5), true);
384
385     Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco.detection_out.npy"));
386
387     Net net = readNetFromTensorflow(netPath, netConfig);
388     net.setPreferableBackend(backend);
389     net.setPreferableTarget(target);
390
391     net.setInput(inp);
392     Mat out = net.forward();
393
394     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0043 : default_l1;
395     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.037 : default_lInf;
396     normAssertDetections(ref, out, "", 0.2, scoreDiff, iouDiff);
397 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2019010000
398     expectNoFallbacksFromIE(net);
399 #endif
400 }
401
402 TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
403 {
404     applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
405 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
406     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD &&
407         getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
408         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
409 #endif
410
411     checkBackend();
412     Mat img = imread(findDataFile("dnn/street.png"));
413     std::string proto = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pbtxt");
414     std::string model = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pb", false);
415
416     Net net = readNetFromTensorflow(model, proto);
417     Mat blob = blobFromImage(img, 1.0f, Size(300, 300), Scalar(), true, false);
418
419     net.setPreferableBackend(backend);
420     net.setPreferableTarget(target);
421
422     net.setInput(blob);
423     // Output has shape 1x1xNx7 where N - number of detections.
424     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
425     Mat out = net.forward();
426     Mat ref = (Mat_<float>(5, 7) << 0, 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729,
427                                     0, 3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131,
428                                     0, 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
429                                     0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
430                                     0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
431
432     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0097 : default_l1;
433     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : default_lInf;
434     normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
435     expectNoFallbacksFromIE(net);
436 }
437
438 TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD)
439 {
440     checkBackend();
441     std::string proto = findDataFile("dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt");
442     std::string model = findDataFile("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", false);
443
444     Net net = readNetFromTensorflow(model, proto);
445     Mat img = imread(findDataFile("dnn/dog416.png"));
446     Mat blob = blobFromImage(img, 1.0f, Size(300, 300), Scalar(), true, false);
447
448     net.setPreferableBackend(backend);
449     net.setPreferableTarget(target);
450
451     net.setInput(blob);
452     Mat out = net.forward();
453
454     Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco_2017_11_17.detection_out.npy"));
455     float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 1.5e-5;
456     float iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.012 : 1e-3;
457     float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.35 : 0.3;
458
459 #if defined(INF_ENGINE_RELEASE)
460     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD &&
461         getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
462     {
463         scoreDiff = 0.061;
464         iouDiff = 0.12;
465         detectionConfThresh = 0.36;
466     }
467 #endif
468     normAssertDetections(ref, out, "", detectionConfThresh, scoreDiff, iouDiff);
469     expectNoFallbacksFromIE(net);
470 }
471
472 TEST_P(Test_TensorFlow_nets, Faster_RCNN)
473 {
474     // FIXIT split test
475     applyTestTag(
476         (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB),
477         CV_TEST_TAG_LONG,
478         CV_TEST_TAG_DEBUG_VERYLONG
479     );
480     static std::string names[] = {"faster_rcnn_inception_v2_coco_2018_01_28",
481                                   "faster_rcnn_resnet50_coco_2018_01_28"};
482
483     checkBackend();
484 #ifdef INF_ENGINE_RELEASE
485     if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
486         (INF_ENGINE_VER_MAJOR_LT(2019020000) || target != DNN_TARGET_CPU))
487         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
488 #endif
489     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
490         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
491
492     double scoresDiff = backend == DNN_BACKEND_INFERENCE_ENGINE ? 2.9e-5 : 1e-5;
493     for (int i = 0; i < 2; ++i)
494     {
495         std::string proto = findDataFile("dnn/" + names[i] + ".pbtxt");
496         std::string model = findDataFile("dnn/" + names[i] + ".pb", false);
497
498         Net net = readNetFromTensorflow(model, proto);
499         net.setPreferableBackend(backend);
500         net.setPreferableTarget(target);
501         Mat img = imread(findDataFile("dnn/dog416.png"));
502         Mat blob = blobFromImage(img, 1.0f, Size(800, 600), Scalar(), true, false);
503
504         net.setInput(blob);
505         Mat out = net.forward();
506
507         Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/" + names[i] + ".detection_out.npy"));
508         normAssertDetections(ref, out, names[i].c_str(), 0.3, scoresDiff);
509     }
510 }
511
512 TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN)
513 {
514 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
515     if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
516         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
517 #endif
518     checkBackend();
519     std::string proto = findDataFile("dnn/ssd_mobilenet_v1_ppn_coco.pbtxt");
520     std::string model = findDataFile("dnn/ssd_mobilenet_v1_ppn_coco.pb", false);
521
522     Net net = readNetFromTensorflow(model, proto);
523     Mat img = imread(findDataFile("dnn/dog416.png"));
524     Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_ppn_coco.detection_out.npy"));
525     Mat blob = blobFromImage(img, 1.0f, Size(300, 300), Scalar(), true, false);
526
527     net.setPreferableBackend(backend);
528     net.setPreferableTarget(target);
529
530     net.setInput(blob);
531     Mat out = net.forward();
532
533     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.048 : 1.1e-5;
534     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.058 : default_lInf;
535     normAssertDetections(ref, out, "", 0.45, scoreDiff, iouDiff);
536     expectNoFallbacksFromIE(net);
537 }
538
539 TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
540 {
541     checkBackend();
542     std::string proto = findDataFile("dnn/opencv_face_detector.pbtxt");
543     std::string model = findDataFile("dnn/opencv_face_detector_uint8.pb", false);
544
545     Net net = readNetFromTensorflow(model, proto);
546     Mat img = imread(findDataFile("gpu/lbpcascade/er.png"));
547     Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
548
549     net.setPreferableBackend(backend);
550     net.setPreferableTarget(target);
551     net.setInput(blob);
552     // Output has shape 1x1xNx7 where N - number of detections.
553     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
554     Mat out = net.forward();
555
556     // References are from test for Caffe model.
557     Mat ref = (Mat_<float>(6, 7) << 0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
558                                     0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
559                                     0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
560                                     0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
561                                     0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
562                                     0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
563     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 4e-3 : 3.4e-3;
564     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.024 : 1e-2;
565     normAssertDetections(ref, out, "", 0.9, scoreDiff, iouDiff);
566     expectNoFallbacksFromIE(net);
567 }
568
569 // inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png')
570 // inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3)
571 // outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'),
572 //                  sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')],
573 //                 feed_dict={'input_images:0': inp})
574 // scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2))
575 // geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2))
576 // np.save('east_text_detection.scores.npy', scores)
577 // np.save('east_text_detection.geometry.npy', geometry)
578 TEST_P(Test_TensorFlow_nets, EAST_text_detection)
579 {
580     applyTestTag(
581         (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB),
582         CV_TEST_TAG_DEBUG_LONG
583     );
584
585 #if defined(INF_ENGINE_RELEASE)
586     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
587         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
588
589     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16 &&
590         INF_ENGINE_VER_MAJOR_EQ(2019020000))
591         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_2019R2);
592 #endif
593
594     checkBackend();
595
596     std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
597     std::string imgPath = findDataFile("cv/ximgproc/sources/08.png");
598     std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy");
599     std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy");
600
601     Net net = readNet(netPath);
602
603     net.setPreferableBackend(backend);
604     net.setPreferableTarget(target);
605
606     Mat img = imread(imgPath);
607     Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false);
608     net.setInput(inp);
609
610     std::vector<Mat> outs;
611     std::vector<String> outNames(2);
612     outNames[0] = "feature_fusion/Conv_7/Sigmoid";
613     outNames[1] = "feature_fusion/concat_3";
614     net.forward(outs, outNames);
615
616     Mat scores = outs[0];
617     Mat geometry = outs[1];
618
619     // Scores are in range [0, 1]. Geometry values are in range [-0.23, 290]
620     double l1_scores = default_l1, lInf_scores = default_lInf;
621     double l1_geometry = default_l1, lInf_geometry = default_lInf;
622     if (target == DNN_TARGET_OPENCL_FP16)
623     {
624         lInf_scores = backend == DNN_BACKEND_INFERENCE_ENGINE ? 0.16 : 0.11;
625         l1_geometry = 0.28; lInf_geometry = 5.94;
626     }
627     else if (target == DNN_TARGET_MYRIAD)
628     {
629         lInf_scores = 0.41;
630         l1_geometry = 0.28; lInf_geometry = 5.94;
631     }
632     else
633     {
634         l1_geometry = 1e-4, lInf_geometry = 3e-3;
635     }
636     normAssert(scores, blobFromNPY(refScoresPath), "scores", l1_scores, lInf_scores);
637     normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", l1_geometry, lInf_geometry);
638     expectNoFallbacksFromIE(net);
639 }
640
641 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, dnnBackendsAndTargets());
642
643 TEST_P(Test_TensorFlow_layers, fp16_weights)
644 {
645     float l1 = 0.00078;
646     float lInf = 0.012;
647     runTensorFlowNet("fp16_single_conv", false, l1, lInf);
648     runTensorFlowNet("fp16_max_pool_odd_same", false, l1, lInf);
649     runTensorFlowNet("fp16_eltwise_add_mul", false, l1, lInf);
650     runTensorFlowNet("fp16_pad_and_concat", false, l1, lInf);
651     runTensorFlowNet("fp16_padding_valid", false, l1, lInf);
652     // Reference output values are in range [0.0889, 1.651]
653     runTensorFlowNet("fp16_max_pool_even", false, (target == DNN_TARGET_MYRIAD) ? 0.003 : l1, lInf);
654     if (target == DNN_TARGET_MYRIAD) {
655         l1 = 0.0041;
656         lInf = 0.024;
657     }
658     // Reference output values are in range [0, 10.75]
659     runTensorFlowNet("fp16_deconvolution", false, l1, lInf);
660     // Reference output values are in range [0.418, 2.297]
661     runTensorFlowNet("fp16_max_pool_odd_valid", false, l1, lInf);
662 }
663
664 TEST_P(Test_TensorFlow_layers, fp16_padding_same)
665 {
666     // Reference output values are in range [-3.504, -0.002]
667     runTensorFlowNet("fp16_padding_same", false, 7e-4, 4e-3);
668 }
669
670 TEST_P(Test_TensorFlow_layers, defun)
671 {
672     runTensorFlowNet("defun_dropout");
673 }
674
675 TEST_P(Test_TensorFlow_layers, quantized)
676 {
677     runTensorFlowNet("uint8_single_conv");
678 }
679
680 TEST_P(Test_TensorFlow_layers, lstm)
681 {
682     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
683         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
684     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
685         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
686     runTensorFlowNet("lstm", true);
687     runTensorFlowNet("lstm", true, 0.0, 0.0, true);
688 }
689
690 TEST_P(Test_TensorFlow_layers, split)
691 {
692     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD &&
693         getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2)
694         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_2);
695     runTensorFlowNet("split");
696     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
697         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
698     runTensorFlowNet("split_equals");
699 }
700
701 TEST_P(Test_TensorFlow_layers, resize_nearest_neighbor)
702 {
703     runTensorFlowNet("resize_nearest_neighbor");
704     runTensorFlowNet("keras_upsampling2d");
705 }
706
707 TEST_P(Test_TensorFlow_layers, slice)
708 {
709     if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
710         (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
711         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
712     runTensorFlowNet("slice_4d");
713     runTensorFlowNet("strided_slice");
714 }
715
716 TEST_P(Test_TensorFlow_layers, softmax)
717 {
718     runTensorFlowNet("keras_softmax");
719     runTensorFlowNet("slim_softmax");
720 }
721
722 TEST_P(Test_TensorFlow_layers, slim_softmax_v2)
723 {
724 #if defined(INF_ENGINE_RELEASE)
725     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD &&
726         getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2
727     )
728         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_2);
729 #endif
730     runTensorFlowNet("slim_softmax_v2");
731 }
732
733 TEST_P(Test_TensorFlow_layers, relu6)
734 {
735     runTensorFlowNet("keras_relu6");
736     runTensorFlowNet("keras_relu6", /*hasText*/ true);
737 }
738
739 TEST_P(Test_TensorFlow_layers, subpixel)
740 {
741     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
742         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
743     runTensorFlowNet("subpixel");
744 }
745
746 TEST_P(Test_TensorFlow_layers, keras_mobilenet_head)
747 {
748     runTensorFlowNet("keras_mobilenet_head");
749     runTensorFlowNet("keras_learning_phase");
750 }
751
752 TEST_P(Test_TensorFlow_layers, resize_bilinear)
753 {
754     runTensorFlowNet("resize_bilinear");
755     runTensorFlowNet("resize_bilinear_factor");
756 }
757
758 TEST_P(Test_TensorFlow_layers, squeeze)
759 {
760 #if defined(INF_ENGINE_RELEASE)
761     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
762             && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2
763     )
764         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_2);
765 #endif
766     int inpShapes[][4] = {{1, 3, 4, 2}, {1, 3, 1, 2}, {1, 3, 4, 1}, {1, 3, 4, 1}};  // TensorFlow's shape (NHWC)
767     int outShapes[][3] = {{3, 4, 2}, {1, 3, 2}, {1, 3, 4}, {1, 3, 4}};
768     int squeeze_dims[] = {0, 2, 3, -1};
769     for (int i = 0; i < 4; ++i)
770     {
771         SCOPED_TRACE(format("i=%d", i));
772         std::string pbtxt =
773             "node { name: \"input\" op: \"Placeholder\""
774             "attr { key: \"data_format\" value { s: \"NHWC\" } } }"
775             "node { name: \"squeeze\" op: \"Squeeze\" input: \"input\""
776               "attr { key: \"squeeze_dims\" value { list { i:" + format("%d", squeeze_dims[i]) + "}}}}";
777         Net net = readNetFromTensorflow(0, 0, pbtxt.c_str(), pbtxt.size());
778         net.setPreferableBackend(backend);
779         net.setPreferableTarget(target);
780         Mat tfInp(4, &inpShapes[i][0], CV_32F);
781         randu(tfInp, -1, 1);
782
783         // NHWC to NCHW
784         CV_Assert(inpShapes[i][0] == 1);
785         std::swap(inpShapes[i][2], inpShapes[i][3]);
786         std::swap(inpShapes[i][1], inpShapes[i][2]);
787         Mat cvInp = tfInp.reshape(1, tfInp.total() / inpShapes[i][1]).t();
788         cvInp = cvInp.reshape(1, 4, &inpShapes[i][0]);
789
790         net.setInput(cvInp);
791         Mat out = net.forward();
792         normAssert(tfInp.reshape(1, 3, &outShapes[i][0]), out, "", default_l1, default_lInf);
793     }
794 }
795
796 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, dnnBackendsAndTargets());
797
798 TEST(Test_TensorFlow, two_inputs)
799 {
800     Net net = readNet(path("two_inputs_net.pbtxt"));
801     net.setPreferableBackend(DNN_BACKEND_OPENCV);
802
803     Mat firstInput(2, 3, CV_32FC1), secondInput(2, 3, CV_32FC1);
804     randu(firstInput, -1, 1);
805     randu(secondInput, -1, 1);
806
807     net.setInput(firstInput, "first_input");
808     net.setInput(secondInput, "second_input");
809     Mat out = net.forward();
810
811     normAssert(out, firstInput + secondInput);
812 }
813
814 TEST(Test_TensorFlow, Mask_RCNN)
815 {
816     applyTestTag(CV_TEST_TAG_MEMORY_1GB, CV_TEST_TAG_DEBUG_VERYLONG);
817     Mat img = imread(findDataFile("dnn/street.png"));
818     std::string proto = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt");
819     std::string model = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pb", false);
820
821     Net net = readNetFromTensorflow(model, proto);
822     Mat refDetections = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_out.npy"));
823     Mat refMasks = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_masks.npy"));
824     Mat blob = blobFromImage(img, 1.0f, Size(800, 800), Scalar(), true, false);
825
826     net.setPreferableBackend(DNN_BACKEND_OPENCV);
827
828     net.setInput(blob);
829
830     // Mask-RCNN predicts bounding boxes and segmentation masks.
831     std::vector<String> outNames(2);
832     outNames[0] = "detection_out_final";
833     outNames[1] = "detection_masks";
834
835     std::vector<Mat> outs;
836     net.forward(outs, outNames);
837
838     Mat outDetections = outs[0];
839     Mat outMasks = outs[1];
840     normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5);
841
842     // Output size of masks is NxCxHxW where
843     // N - number of detected boxes
844     // C - number of classes (excluding background)
845     // HxW - segmentation shape
846     const int numDetections = outDetections.size[2];
847
848     int masksSize[] = {1, numDetections, outMasks.size[2], outMasks.size[3]};
849     Mat masks(4, &masksSize[0], CV_32F);
850
851     std::vector<cv::Range> srcRanges(4, cv::Range::all());
852     std::vector<cv::Range> dstRanges(4, cv::Range::all());
853
854     outDetections = outDetections.reshape(1, outDetections.total() / 7);
855     for (int i = 0; i < numDetections; ++i)
856     {
857         // Get a class id for this bounding box and copy mask only for that class.
858         int classId = static_cast<int>(outDetections.at<float>(i, 1));
859         srcRanges[0] = dstRanges[1] = cv::Range(i, i + 1);
860         srcRanges[1] = cv::Range(classId, classId + 1);
861         outMasks(srcRanges).copyTo(masks(dstRanges));
862     }
863     cv::Range topRefMasks[] = {Range::all(), Range(0, numDetections), Range::all(), Range::all()};
864     normAssert(masks, refMasks(&topRefMasks[0]));
865 }
866
867 }