Merge pull request #11627 from alalek:fix_python_cast_method_with_kw
[platform/upstream/opencv.git] / modules / dnn / test / test_tf_importer.cpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 // Copyright (C) 2017, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7
8 /*
9 Test for Tensorflow models loading
10 */
11
12 #include "test_precomp.hpp"
13 #include "npy_blob.hpp"
14
15 #include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
16
17 namespace opencv_test
18 {
19
20 using namespace cv;
21 using namespace cv::dnn;
22
23 template<typename TString>
24 static std::string _tf(TString filename)
25 {
26     return (getOpenCVExtraDir() + "/dnn/") + filename;
27 }
28
29 TEST(Test_TensorFlow, read_inception)
30 {
31     Net net;
32     {
33         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
34         net = readNetFromTensorflow(model);
35         ASSERT_FALSE(net.empty());
36     }
37
38     Mat sample = imread(_tf("grace_hopper_227.png"));
39     ASSERT_TRUE(!sample.empty());
40     Mat input;
41     resize(sample, input, Size(224, 224));
42     input -= 128; // mean sub
43
44     Mat inputBlob = blobFromImage(input);
45
46     net.setInput(inputBlob, "input");
47     Mat out = net.forward("softmax2");
48
49     std::cout << out.dims << std::endl;
50 }
51
52 TEST(Test_TensorFlow, inception_accuracy)
53 {
54     Net net;
55     {
56         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
57         net = readNetFromTensorflow(model);
58         ASSERT_FALSE(net.empty());
59     }
60
61     Mat sample = imread(_tf("grace_hopper_227.png"));
62     ASSERT_TRUE(!sample.empty());
63     resize(sample, sample, Size(224, 224));
64     Mat inputBlob = blobFromImage(sample);
65
66     net.setInput(inputBlob, "input");
67     Mat out = net.forward("softmax2");
68
69     Mat ref = blobFromNPY(_tf("tf_inception_prob.npy"));
70
71     normAssert(ref, out);
72 }
73
74 static std::string path(const std::string& file)
75 {
76     return findDataFile("dnn/tensorflow/" + file, false);
77 }
78
79 static void runTensorFlowNet(const std::string& prefix, int targetId = DNN_TARGET_CPU, bool hasText = false,
80                              double l1 = 1e-5, double lInf = 1e-4,
81                              bool memoryLoad = false)
82 {
83     std::string netPath = path(prefix + "_net.pb");
84     std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : "");
85     std::string inpPath = path(prefix + "_in.npy");
86     std::string outPath = path(prefix + "_out.npy");
87
88     Net net;
89     if (memoryLoad)
90     {
91         // Load files into a memory buffers
92         string dataModel;
93         ASSERT_TRUE(readFileInMemory(netPath, dataModel));
94
95         string dataConfig;
96         if (hasText)
97             ASSERT_TRUE(readFileInMemory(netConfig, dataConfig));
98
99         net = readNetFromTensorflow(dataModel.c_str(), dataModel.size(),
100                                     dataConfig.c_str(), dataConfig.size());
101     }
102     else
103         net = readNetFromTensorflow(netPath, netConfig);
104
105     ASSERT_FALSE(net.empty());
106
107     net.setPreferableBackend(DNN_BACKEND_DEFAULT);
108     net.setPreferableTarget(targetId);
109
110     cv::Mat input = blobFromNPY(inpPath);
111     cv::Mat target = blobFromNPY(outPath);
112
113     net.setInput(input);
114     cv::Mat output = net.forward();
115     normAssert(target, output, "", l1, lInf);
116 }
117
118 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_layers;
119
120 TEST_P(Test_TensorFlow_layers, conv)
121 {
122     int targetId = GetParam();
123     runTensorFlowNet("single_conv", targetId);
124     runTensorFlowNet("atrous_conv2d_valid", targetId);
125     runTensorFlowNet("atrous_conv2d_same", targetId);
126     runTensorFlowNet("depthwise_conv2d", targetId);
127     runTensorFlowNet("keras_atrous_conv2d_same", targetId);
128 }
129
130 TEST_P(Test_TensorFlow_layers, padding)
131 {
132     int targetId = GetParam();
133     runTensorFlowNet("padding_same", targetId);
134     runTensorFlowNet("padding_valid", targetId);
135     runTensorFlowNet("spatial_padding", targetId);
136 }
137
138 TEST_P(Test_TensorFlow_layers, eltwise_add_mul)
139 {
140     runTensorFlowNet("eltwise_add_mul", GetParam());
141 }
142
143 TEST_P(Test_TensorFlow_layers, pad_and_concat)
144 {
145     runTensorFlowNet("pad_and_concat", GetParam());
146 }
147
148 TEST_P(Test_TensorFlow_layers, batch_norm)
149 {
150     int targetId = GetParam();
151     runTensorFlowNet("batch_norm", targetId);
152     runTensorFlowNet("fused_batch_norm", targetId);
153     runTensorFlowNet("batch_norm_text", targetId, true);
154     runTensorFlowNet("mvn_batch_norm", targetId);
155     runTensorFlowNet("mvn_batch_norm_1x1", targetId);
156     runTensorFlowNet("unfused_batch_norm", targetId);
157     runTensorFlowNet("fused_batch_norm_no_gamma", targetId);
158     runTensorFlowNet("unfused_batch_norm_no_gamma", targetId);
159 }
160
161 TEST_P(Test_TensorFlow_layers, pooling)
162 {
163     int targetId = GetParam();
164     cv::ocl::Device d = cv::ocl::Device::getDefault();
165     bool loosenFlag = targetId == DNN_TARGET_OPENCL && d.isIntel() && d.type() == cv::ocl::Device::TYPE_CPU;
166     runTensorFlowNet("max_pool_even", targetId);
167     runTensorFlowNet("max_pool_odd_valid", targetId);
168     runTensorFlowNet("ave_pool_same", targetId);
169     runTensorFlowNet("max_pool_odd_same", targetId, false, loosenFlag ? 3e-5 : 1e-5, loosenFlag ? 3e-4 : 1e-4);
170     runTensorFlowNet("reduce_mean", targetId);  // an average pooling over all spatial dimensions.
171 }
172
173 TEST_P(Test_TensorFlow_layers, deconvolution)
174 {
175     int targetId = GetParam();
176     runTensorFlowNet("deconvolution", targetId);
177     runTensorFlowNet("deconvolution_same", targetId);
178     runTensorFlowNet("deconvolution_stride_2_same", targetId);
179     runTensorFlowNet("deconvolution_adj_pad_valid", targetId);
180     runTensorFlowNet("deconvolution_adj_pad_same", targetId);
181     runTensorFlowNet("keras_deconv_valid", targetId);
182     runTensorFlowNet("keras_deconv_same", targetId);
183 }
184
185 TEST_P(Test_TensorFlow_layers, matmul)
186 {
187     int targetId = GetParam();
188     runTensorFlowNet("matmul", targetId);
189     runTensorFlowNet("nhwc_reshape_matmul", targetId);
190     runTensorFlowNet("nhwc_transpose_reshape_matmul", targetId);
191 }
192
193 TEST_P(Test_TensorFlow_layers, reshape)
194 {
195     int targetId = GetParam();
196     runTensorFlowNet("shift_reshape_no_reorder", targetId);
197     runTensorFlowNet("reshape_reduce", targetId);
198     runTensorFlowNet("flatten", targetId, true);
199     runTensorFlowNet("unfused_flatten", targetId);
200     runTensorFlowNet("unfused_flatten_unknown_batch", targetId);
201 }
202
203 TEST_P(Test_TensorFlow_layers, l2_normalize)
204 {
205     int targetId = GetParam();
206     runTensorFlowNet("l2_normalize", targetId);
207     runTensorFlowNet("l2_normalize_3d", targetId);
208 }
209
210 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets());
211
212 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets;
213
214 TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
215 {
216     std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false);
217     std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false);
218     std::string imgPath = findDataFile("dnn/street.png", false);
219
220     Mat inp;
221     resize(imread(imgPath), inp, Size(300, 300));
222     inp = blobFromImage(inp, 1.0f / 127.5, Size(), Scalar(127.5, 127.5, 127.5), true);
223
224     std::vector<String> outNames(3);
225     outNames[0] = "concat";
226     outNames[1] = "concat_1";
227     outNames[2] = "detection_out";
228
229     std::vector<Mat> target(outNames.size());
230     for (int i = 0; i < outNames.size(); ++i)
231     {
232         std::string path = findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco." + outNames[i] + ".npy", false);
233         target[i] = blobFromNPY(path);
234     }
235
236     Net net = readNetFromTensorflow(netPath, netConfig);
237
238     net.setPreferableTarget(GetParam());
239
240     net.setInput(inp);
241
242     std::vector<Mat> output;
243     net.forward(output, outNames);
244
245     normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4);
246     normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4);
247     normAssertDetections(target[2], output[2], "", 0.2);
248 }
249
250 TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
251 {
252     std::string proto = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", false);
253     std::string model = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pb", false);
254
255     Net net = readNetFromTensorflow(model, proto);
256     Mat img = imread(findDataFile("dnn/street.png", false));
257     Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
258
259     net.setPreferableTarget(GetParam());
260
261     net.setInput(blob);
262     // Output has shape 1x1xNx7 where N - number of detections.
263     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
264     Mat out = net.forward();
265     Mat ref = (Mat_<float>(5, 7) << 0, 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729,
266                                     0, 3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131,
267                                     0, 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
268                                     0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
269                                     0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
270     normAssertDetections(ref, out, "", 0.5);
271 }
272
273 TEST_P(Test_TensorFlow_nets, Inception_v2_Faster_RCNN)
274 {
275     std::string proto = findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt", false);
276     std::string model = findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pb", false);
277
278     Net net = readNetFromTensorflow(model, proto);
279     Mat img = imread(findDataFile("dnn/dog416.png", false));
280     Mat blob = blobFromImage(img, 1.0f / 127.5, Size(800, 600), Scalar(127.5, 127.5, 127.5), true, false);
281
282     net.setInput(blob);
283     Mat out = net.forward();
284
285     Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/faster_rcnn_inception_v2_coco_2018_01_28.detection_out.npy"));
286     normAssertDetections(ref, out, "", 0.3);
287 }
288
289 TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
290 {
291     std::string proto = findDataFile("dnn/opencv_face_detector.pbtxt", false);
292     std::string model = findDataFile("dnn/opencv_face_detector_uint8.pb", false);
293
294     Net net = readNetFromTensorflow(model, proto);
295     Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
296     Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
297
298     net.setPreferableTarget(GetParam());
299
300     net.setInput(blob);
301     // Output has shape 1x1xNx7 where N - number of detections.
302     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
303     Mat out = net.forward();
304
305     // References are from test for Caffe model.
306     Mat ref = (Mat_<float>(6, 7) << 0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
307                                     0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
308                                     0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
309                                     0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
310                                     0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
311                                     0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
312     normAssertDetections(ref, out, "", 0.9, 3.4e-3, 1e-2);
313 }
314
315 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, availableDnnTargets());
316
317 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_fp16;
318
319 TEST_P(Test_TensorFlow_fp16, tests)
320 {
321     int targetId = GetParam();
322     const float l1 = 7e-4;
323     const float lInf = 1e-2;
324     runTensorFlowNet("fp16_single_conv", targetId, false, l1, lInf);
325     runTensorFlowNet("fp16_deconvolution", targetId, false, l1, lInf);
326     runTensorFlowNet("fp16_max_pool_odd_same", targetId, false, l1, lInf);
327     runTensorFlowNet("fp16_padding_valid", targetId, false, l1, lInf);
328     runTensorFlowNet("fp16_eltwise_add_mul", targetId, false, l1, lInf);
329     runTensorFlowNet("fp16_max_pool_odd_valid", targetId, false, l1, lInf);
330     runTensorFlowNet("fp16_pad_and_concat", targetId, false, l1, lInf);
331     runTensorFlowNet("fp16_max_pool_even", targetId, false, l1, lInf);
332     runTensorFlowNet("fp16_padding_same", targetId, false, l1, lInf);
333 }
334
335 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_fp16,
336                         Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16));
337
338 TEST(Test_TensorFlow, defun)
339 {
340     runTensorFlowNet("defun_dropout");
341 }
342
343 TEST(Test_TensorFlow, quantized)
344 {
345     runTensorFlowNet("uint8_single_conv");
346 }
347
348 TEST(Test_TensorFlow, lstm)
349 {
350     runTensorFlowNet("lstm", DNN_TARGET_CPU, true);
351 }
352
353 TEST(Test_TensorFlow, split)
354 {
355     runTensorFlowNet("split_equals");
356 }
357
358 TEST(Test_TensorFlow, resize_nearest_neighbor)
359 {
360     runTensorFlowNet("resize_nearest_neighbor");
361 }
362
363 TEST(Test_TensorFlow, slice)
364 {
365     runTensorFlowNet("slice_4d");
366 }
367
368 TEST(Test_TensorFlow, softmax)
369 {
370     runTensorFlowNet("keras_softmax");
371 }
372
373 TEST(Test_TensorFlow, relu6)
374 {
375     runTensorFlowNet("keras_relu6");
376 }
377
378 TEST(Test_TensorFlow, keras_mobilenet_head)
379 {
380     runTensorFlowNet("keras_mobilenet_head");
381 }
382
383 TEST(Test_TensorFlow, memory_read)
384 {
385     double l1 = 1e-5;
386     double lInf = 1e-4;
387     runTensorFlowNet("lstm", DNN_TARGET_CPU, true, l1, lInf, true);
388
389     runTensorFlowNet("batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
390     runTensorFlowNet("fused_batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
391     runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
392 }
393
394 // Test a custom layer.
395 class ResizeBilinearLayer CV_FINAL : public Layer
396 {
397 public:
398     ResizeBilinearLayer(const LayerParams &params) : Layer(params),
399         outWidth(0), outHeight(0), factorWidth(1), factorHeight(1)
400     {
401         CV_Assert(!params.get<bool>("align_corners", false));
402         CV_Assert(!blobs.empty());
403
404         for (size_t i = 0; i < blobs.size(); ++i)
405             CV_Assert(blobs[i].type() == CV_32SC1);
406
407         if (blobs.size() == 1)
408         {
409             CV_Assert(blobs[0].total() == 2);
410             outHeight = blobs[0].at<int>(0, 0);
411             outWidth = blobs[0].at<int>(0, 1);
412         }
413         else
414         {
415             CV_Assert(blobs.size() == 2, blobs[0].total() == 1, blobs[1].total() == 1);
416             factorHeight = blobs[0].at<int>(0, 0);
417             factorWidth = blobs[1].at<int>(0, 0);
418             outHeight = outWidth = 0;
419         }
420     }
421
422     static Ptr<Layer> create(LayerParams& params)
423     {
424         return Ptr<Layer>(new ResizeBilinearLayer(params));
425     }
426
427     virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
428                                  const int requiredOutputs,
429                                  std::vector<std::vector<int> > &outputs,
430                                  std::vector<std::vector<int> > &internals) const CV_OVERRIDE
431     {
432         std::vector<int> outShape(4);
433         outShape[0] = inputs[0][0];  // batch size
434         outShape[1] = inputs[0][1];  // number of channels
435         outShape[2] = outHeight != 0 ? outHeight : (inputs[0][2] * factorHeight);
436         outShape[3] = outWidth != 0 ? outWidth : (inputs[0][3] * factorWidth);
437         outputs.assign(1, outShape);
438         return false;
439     }
440
441     virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
442     {
443         if (!outWidth && !outHeight)
444         {
445             outHeight = outputs[0].size[2];
446             outWidth = outputs[0].size[3];
447         }
448     }
449
450     // This implementation is based on a reference implementation from
451     // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
452     virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
453     {
454         Mat& inp = *inputs[0];
455         Mat& out = outputs[0];
456         const float* inpData = (float*)inp.data;
457         float* outData = (float*)out.data;
458
459         const int batchSize = inp.size[0];
460         const int numChannels = inp.size[1];
461         const int inpHeight = inp.size[2];
462         const int inpWidth = inp.size[3];
463
464         float heightScale = static_cast<float>(inpHeight) / outHeight;
465         float widthScale = static_cast<float>(inpWidth) / outWidth;
466         for (int b = 0; b < batchSize; ++b)
467         {
468             for (int y = 0; y < outHeight; ++y)
469             {
470                 float input_y = y * heightScale;
471                 int y0 = static_cast<int>(std::floor(input_y));
472                 int y1 = std::min(y0 + 1, inpHeight - 1);
473                 for (int x = 0; x < outWidth; ++x)
474                 {
475                     float input_x = x * widthScale;
476                     int x0 = static_cast<int>(std::floor(input_x));
477                     int x1 = std::min(x0 + 1, inpWidth - 1);
478                     for (int c = 0; c < numChannels; ++c)
479                     {
480                         float interpolation =
481                             inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) +
482                             inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) +
483                             inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) +
484                             inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0);
485                         outData[offset(out.size, c, x, y, b)] = interpolation;
486                     }
487                 }
488             }
489         }
490     }
491
492     virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
493
494 private:
495     static inline int offset(const MatSize& size, int c, int x, int y, int b)
496     {
497         return x + size[3] * (y + size[2] * (c + size[1] * b));
498     }
499
500     int outWidth, outHeight, factorWidth, factorHeight;
501 };
502
503 TEST(Test_TensorFlow, resize_bilinear)
504 {
505     CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
506     runTensorFlowNet("resize_bilinear");
507     runTensorFlowNet("resize_bilinear_factor");
508     LayerFactory::unregisterLayer("ResizeBilinear");
509 }
510
511 // inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png')
512 // inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3)
513 // outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'),
514 //                  sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')],
515 //                 feed_dict={'input_images:0': inp})
516 // scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2))
517 // geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2))
518 // np.save('east_text_detection.scores.npy', scores)
519 // np.save('east_text_detection.geometry.npy', geometry)
520 TEST(Test_TensorFlow, EAST_text_detection)
521 {
522     CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
523     std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
524     std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false);
525     std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false);
526     std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false);
527
528     Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false));
529
530     Mat img = imread(imgPath);
531     Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false);
532     net.setInput(inp);
533
534     std::vector<Mat> outs;
535     std::vector<String> outNames(2);
536     outNames[0] = "feature_fusion/Conv_7/Sigmoid";
537     outNames[1] = "feature_fusion/concat_3";
538     net.forward(outs, outNames);
539
540     Mat scores = outs[0];
541     Mat geometry = outs[1];
542
543     normAssert(scores, blobFromNPY(refScoresPath), "scores");
544     normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3);
545     LayerFactory::unregisterLayer("ResizeBilinear");
546 }
547
548 }