Merge pull request #11618 from alalek:ffmpeg_debug_log
[platform/upstream/opencv.git] / modules / dnn / test / test_tf_importer.cpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 // Copyright (C) 2017, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7
8 /*
9 Test for Tensorflow models loading
10 */
11
12 #include "test_precomp.hpp"
13 #include "npy_blob.hpp"
14
15 #include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
16
17 namespace opencv_test
18 {
19
20 using namespace cv;
21 using namespace cv::dnn;
22
23 template<typename TString>
24 static std::string _tf(TString filename)
25 {
26     return (getOpenCVExtraDir() + "/dnn/") + filename;
27 }
28
29 TEST(Test_TensorFlow, read_inception)
30 {
31     Net net;
32     {
33         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
34         net = readNetFromTensorflow(model);
35         ASSERT_FALSE(net.empty());
36     }
37
38     Mat sample = imread(_tf("grace_hopper_227.png"));
39     ASSERT_TRUE(!sample.empty());
40     Mat input;
41     resize(sample, input, Size(224, 224));
42     input -= 128; // mean sub
43
44     Mat inputBlob = blobFromImage(input);
45
46     net.setInput(inputBlob, "input");
47     Mat out = net.forward("softmax2");
48
49     std::cout << out.dims << std::endl;
50 }
51
52 TEST(Test_TensorFlow, inception_accuracy)
53 {
54     Net net;
55     {
56         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
57         net = readNetFromTensorflow(model);
58         ASSERT_FALSE(net.empty());
59     }
60
61     Mat sample = imread(_tf("grace_hopper_227.png"));
62     ASSERT_TRUE(!sample.empty());
63     resize(sample, sample, Size(224, 224));
64     Mat inputBlob = blobFromImage(sample);
65
66     net.setInput(inputBlob, "input");
67     Mat out = net.forward("softmax2");
68
69     Mat ref = blobFromNPY(_tf("tf_inception_prob.npy"));
70
71     normAssert(ref, out);
72 }
73
74 static std::string path(const std::string& file)
75 {
76     return findDataFile("dnn/tensorflow/" + file, false);
77 }
78
79 static void runTensorFlowNet(const std::string& prefix, int targetId = DNN_TARGET_CPU, bool hasText = false,
80                              double l1 = 1e-5, double lInf = 1e-4,
81                              bool memoryLoad = false)
82 {
83     std::string netPath = path(prefix + "_net.pb");
84     std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : "");
85     std::string inpPath = path(prefix + "_in.npy");
86     std::string outPath = path(prefix + "_out.npy");
87
88     Net net;
89     if (memoryLoad)
90     {
91         // Load files into a memory buffers
92         string dataModel;
93         ASSERT_TRUE(readFileInMemory(netPath, dataModel));
94
95         string dataConfig;
96         if (hasText)
97             ASSERT_TRUE(readFileInMemory(netConfig, dataConfig));
98
99         net = readNetFromTensorflow(dataModel.c_str(), dataModel.size(),
100                                     dataConfig.c_str(), dataConfig.size());
101     }
102     else
103         net = readNetFromTensorflow(netPath, netConfig);
104
105     ASSERT_FALSE(net.empty());
106
107     net.setPreferableBackend(DNN_BACKEND_DEFAULT);
108     net.setPreferableTarget(targetId);
109
110     cv::Mat input = blobFromNPY(inpPath);
111     cv::Mat target = blobFromNPY(outPath);
112
113     net.setInput(input);
114     cv::Mat output = net.forward();
115     normAssert(target, output, "", l1, lInf);
116 }
117
118 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_layers;
119
120 TEST_P(Test_TensorFlow_layers, conv)
121 {
122     int targetId = GetParam();
123     runTensorFlowNet("single_conv", targetId);
124     runTensorFlowNet("atrous_conv2d_valid", targetId);
125     runTensorFlowNet("atrous_conv2d_same", targetId);
126     runTensorFlowNet("depthwise_conv2d", targetId);
127     runTensorFlowNet("keras_atrous_conv2d_same", targetId);
128 }
129
130 TEST_P(Test_TensorFlow_layers, padding)
131 {
132     int targetId = GetParam();
133     runTensorFlowNet("padding_same", targetId);
134     runTensorFlowNet("padding_valid", targetId);
135     runTensorFlowNet("spatial_padding", targetId);
136 }
137
138 TEST_P(Test_TensorFlow_layers, eltwise_add_mul)
139 {
140     runTensorFlowNet("eltwise_add_mul", GetParam());
141 }
142
143 TEST_P(Test_TensorFlow_layers, pad_and_concat)
144 {
145     runTensorFlowNet("pad_and_concat", GetParam());
146 }
147
148 TEST_P(Test_TensorFlow_layers, batch_norm)
149 {
150     int targetId = GetParam();
151     runTensorFlowNet("batch_norm", targetId);
152     runTensorFlowNet("fused_batch_norm", targetId);
153     runTensorFlowNet("batch_norm_text", targetId, true);
154     runTensorFlowNet("mvn_batch_norm", targetId);
155     runTensorFlowNet("mvn_batch_norm_1x1", targetId);
156     runTensorFlowNet("unfused_batch_norm", targetId);
157     runTensorFlowNet("fused_batch_norm_no_gamma", targetId);
158     runTensorFlowNet("unfused_batch_norm_no_gamma", targetId);
159 }
160
161 TEST_P(Test_TensorFlow_layers, pooling)
162 {
163     int targetId = GetParam();
164     cv::ocl::Device d = cv::ocl::Device::getDefault();
165     bool loosenFlag = targetId == DNN_TARGET_OPENCL && d.isIntel() && d.type() == cv::ocl::Device::TYPE_CPU;
166     runTensorFlowNet("max_pool_even", targetId);
167     runTensorFlowNet("max_pool_odd_valid", targetId);
168     runTensorFlowNet("ave_pool_same", targetId);
169     runTensorFlowNet("max_pool_odd_same", targetId, false, loosenFlag ? 3e-5 : 1e-5, loosenFlag ? 3e-4 : 1e-4);
170     runTensorFlowNet("reduce_mean", targetId);  // an average pooling over all spatial dimensions.
171 }
172
173 TEST_P(Test_TensorFlow_layers, deconvolution)
174 {
175     int targetId = GetParam();
176     runTensorFlowNet("deconvolution", targetId);
177     runTensorFlowNet("deconvolution_same", targetId);
178     runTensorFlowNet("deconvolution_stride_2_same", targetId);
179     runTensorFlowNet("deconvolution_adj_pad_valid", targetId);
180     runTensorFlowNet("deconvolution_adj_pad_same", targetId);
181     runTensorFlowNet("keras_deconv_valid", targetId);
182     runTensorFlowNet("keras_deconv_same", targetId);
183 }
184
185 TEST_P(Test_TensorFlow_layers, matmul)
186 {
187     int targetId = GetParam();
188     runTensorFlowNet("matmul", targetId);
189     runTensorFlowNet("nhwc_reshape_matmul", targetId);
190     runTensorFlowNet("nhwc_transpose_reshape_matmul", targetId);
191 }
192
193 TEST_P(Test_TensorFlow_layers, reshape)
194 {
195     int targetId = GetParam();
196     runTensorFlowNet("shift_reshape_no_reorder", targetId);
197     runTensorFlowNet("reshape_reduce", targetId);
198     runTensorFlowNet("flatten", targetId, true);
199     runTensorFlowNet("unfused_flatten", targetId);
200     runTensorFlowNet("unfused_flatten_unknown_batch", targetId);
201 }
202
203 TEST_P(Test_TensorFlow_layers, l2_normalize)
204 {
205     int targetId = GetParam();
206     runTensorFlowNet("l2_normalize", targetId);
207     runTensorFlowNet("l2_normalize_3d", targetId);
208 }
209
210 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets());
211
212 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets;
213
214 TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
215 {
216     std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false);
217     std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false);
218     std::string imgPath = findDataFile("dnn/street.png", false);
219
220     Mat inp;
221     resize(imread(imgPath), inp, Size(300, 300));
222     inp = blobFromImage(inp, 1.0f / 127.5, Size(), Scalar(127.5, 127.5, 127.5), true);
223
224     std::vector<String> outNames(3);
225     outNames[0] = "concat";
226     outNames[1] = "concat_1";
227     outNames[2] = "detection_out";
228
229     std::vector<Mat> target(outNames.size());
230     for (int i = 0; i < outNames.size(); ++i)
231     {
232         std::string path = findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco." + outNames[i] + ".npy", false);
233         target[i] = blobFromNPY(path);
234     }
235
236     Net net = readNetFromTensorflow(netPath, netConfig);
237
238     net.setPreferableTarget(GetParam());
239
240     net.setInput(inp);
241
242     std::vector<Mat> output;
243     net.forward(output, outNames);
244
245     normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4);
246     normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4);
247     normAssertDetections(target[2], output[2], "", 0.2);
248 }
249
250 TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
251 {
252     std::string proto = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", false);
253     std::string model = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pb", false);
254
255     Net net = readNetFromTensorflow(model, proto);
256     Mat img = imread(findDataFile("dnn/street.png", false));
257     Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
258
259     net.setPreferableTarget(GetParam());
260
261     net.setInput(blob);
262     // Output has shape 1x1xNx7 where N - number of detections.
263     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
264     Mat out = net.forward();
265     Mat ref = (Mat_<float>(5, 7) << 0, 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729,
266                                     0, 3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131,
267                                     0, 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
268                                     0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
269                                     0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
270     normAssertDetections(ref, out, "", 0.5);
271 }
272
273 TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
274 {
275     std::string proto = findDataFile("dnn/opencv_face_detector.pbtxt", false);
276     std::string model = findDataFile("dnn/opencv_face_detector_uint8.pb", false);
277
278     Net net = readNetFromTensorflow(model, proto);
279     Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
280     Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
281
282     net.setPreferableTarget(GetParam());
283
284     net.setInput(blob);
285     // Output has shape 1x1xNx7 where N - number of detections.
286     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
287     Mat out = net.forward();
288
289     // References are from test for Caffe model.
290     Mat ref = (Mat_<float>(6, 7) << 0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
291                                     0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
292                                     0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
293                                     0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
294                                     0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
295                                     0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
296     normAssertDetections(ref, out, "", 0.9, 3.4e-3, 1e-2);
297 }
298
299 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, availableDnnTargets());
300
301 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_fp16;
302
303 TEST_P(Test_TensorFlow_fp16, tests)
304 {
305     int targetId = GetParam();
306     const float l1 = 7e-4;
307     const float lInf = 1e-2;
308     runTensorFlowNet("fp16_single_conv", targetId, false, l1, lInf);
309     runTensorFlowNet("fp16_deconvolution", targetId, false, l1, lInf);
310     runTensorFlowNet("fp16_max_pool_odd_same", targetId, false, l1, lInf);
311     runTensorFlowNet("fp16_padding_valid", targetId, false, l1, lInf);
312     runTensorFlowNet("fp16_eltwise_add_mul", targetId, false, l1, lInf);
313     runTensorFlowNet("fp16_max_pool_odd_valid", targetId, false, l1, lInf);
314     runTensorFlowNet("fp16_pad_and_concat", targetId, false, l1, lInf);
315     runTensorFlowNet("fp16_max_pool_even", targetId, false, l1, lInf);
316     runTensorFlowNet("fp16_padding_same", targetId, false, l1, lInf);
317 }
318
319 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_fp16,
320                         Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16));
321
322 TEST(Test_TensorFlow, defun)
323 {
324     runTensorFlowNet("defun_dropout");
325 }
326
327 TEST(Test_TensorFlow, quantized)
328 {
329     runTensorFlowNet("uint8_single_conv");
330 }
331
332 TEST(Test_TensorFlow, lstm)
333 {
334     runTensorFlowNet("lstm", DNN_TARGET_CPU, true);
335 }
336
337 TEST(Test_TensorFlow, split)
338 {
339     runTensorFlowNet("split_equals");
340 }
341
342 TEST(Test_TensorFlow, resize_nearest_neighbor)
343 {
344     runTensorFlowNet("resize_nearest_neighbor");
345 }
346
347 TEST(Test_TensorFlow, slice)
348 {
349     runTensorFlowNet("slice_4d");
350 }
351
352 TEST(Test_TensorFlow, softmax)
353 {
354     runTensorFlowNet("keras_softmax");
355 }
356
357 TEST(Test_TensorFlow, relu6)
358 {
359     runTensorFlowNet("keras_relu6");
360 }
361
362 TEST(Test_TensorFlow, keras_mobilenet_head)
363 {
364     runTensorFlowNet("keras_mobilenet_head");
365 }
366
367 TEST(Test_TensorFlow, memory_read)
368 {
369     double l1 = 1e-5;
370     double lInf = 1e-4;
371     runTensorFlowNet("lstm", DNN_TARGET_CPU, true, l1, lInf, true);
372
373     runTensorFlowNet("batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
374     runTensorFlowNet("fused_batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
375     runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
376 }
377
378 // Test a custom layer.
379 class ResizeBilinearLayer CV_FINAL : public Layer
380 {
381 public:
382     ResizeBilinearLayer(const LayerParams &params) : Layer(params),
383         outWidth(0), outHeight(0), factorWidth(1), factorHeight(1)
384     {
385         CV_Assert(!params.get<bool>("align_corners", false));
386         CV_Assert(!blobs.empty());
387
388         for (size_t i = 0; i < blobs.size(); ++i)
389             CV_Assert(blobs[i].type() == CV_32SC1);
390
391         if (blobs.size() == 1)
392         {
393             CV_Assert(blobs[0].total() == 2);
394             outHeight = blobs[0].at<int>(0, 0);
395             outWidth = blobs[0].at<int>(0, 1);
396         }
397         else
398         {
399             CV_Assert(blobs.size() == 2, blobs[0].total() == 1, blobs[1].total() == 1);
400             factorHeight = blobs[0].at<int>(0, 0);
401             factorWidth = blobs[1].at<int>(0, 0);
402             outHeight = outWidth = 0;
403         }
404     }
405
406     static Ptr<Layer> create(LayerParams& params)
407     {
408         return Ptr<Layer>(new ResizeBilinearLayer(params));
409     }
410
411     virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
412                                  const int requiredOutputs,
413                                  std::vector<std::vector<int> > &outputs,
414                                  std::vector<std::vector<int> > &internals) const CV_OVERRIDE
415     {
416         std::vector<int> outShape(4);
417         outShape[0] = inputs[0][0];  // batch size
418         outShape[1] = inputs[0][1];  // number of channels
419         outShape[2] = outHeight != 0 ? outHeight : (inputs[0][2] * factorHeight);
420         outShape[3] = outWidth != 0 ? outWidth : (inputs[0][3] * factorWidth);
421         outputs.assign(1, outShape);
422         return false;
423     }
424
425     virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
426     {
427         if (!outWidth && !outHeight)
428         {
429             outHeight = outputs[0].size[2];
430             outWidth = outputs[0].size[3];
431         }
432     }
433
434     // This implementation is based on a reference implementation from
435     // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
436     virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
437     {
438         Mat& inp = *inputs[0];
439         Mat& out = outputs[0];
440         const float* inpData = (float*)inp.data;
441         float* outData = (float*)out.data;
442
443         const int batchSize = inp.size[0];
444         const int numChannels = inp.size[1];
445         const int inpHeight = inp.size[2];
446         const int inpWidth = inp.size[3];
447
448         float heightScale = static_cast<float>(inpHeight) / outHeight;
449         float widthScale = static_cast<float>(inpWidth) / outWidth;
450         for (int b = 0; b < batchSize; ++b)
451         {
452             for (int y = 0; y < outHeight; ++y)
453             {
454                 float input_y = y * heightScale;
455                 int y0 = static_cast<int>(std::floor(input_y));
456                 int y1 = std::min(y0 + 1, inpHeight - 1);
457                 for (int x = 0; x < outWidth; ++x)
458                 {
459                     float input_x = x * widthScale;
460                     int x0 = static_cast<int>(std::floor(input_x));
461                     int x1 = std::min(x0 + 1, inpWidth - 1);
462                     for (int c = 0; c < numChannels; ++c)
463                     {
464                         float interpolation =
465                             inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) +
466                             inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) +
467                             inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) +
468                             inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0);
469                         outData[offset(out.size, c, x, y, b)] = interpolation;
470                     }
471                 }
472             }
473         }
474     }
475
476     virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
477
478 private:
479     static inline int offset(const MatSize& size, int c, int x, int y, int b)
480     {
481         return x + size[3] * (y + size[2] * (c + size[1] * b));
482     }
483
484     int outWidth, outHeight, factorWidth, factorHeight;
485 };
486
487 TEST(Test_TensorFlow, resize_bilinear)
488 {
489     CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
490     runTensorFlowNet("resize_bilinear");
491     runTensorFlowNet("resize_bilinear_factor");
492     LayerFactory::unregisterLayer("ResizeBilinear");
493 }
494
495 // inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png')
496 // inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3)
497 // outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'),
498 //                  sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')],
499 //                 feed_dict={'input_images:0': inp})
500 // scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2))
501 // geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2))
502 // np.save('east_text_detection.scores.npy', scores)
503 // np.save('east_text_detection.geometry.npy', geometry)
504 TEST(Test_TensorFlow, EAST_text_detection)
505 {
506     CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
507     std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
508     std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false);
509     std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false);
510     std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false);
511
512     Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false));
513
514     Mat img = imread(imgPath);
515     Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false);
516     net.setInput(inp);
517
518     std::vector<Mat> outs;
519     std::vector<String> outNames(2);
520     outNames[0] = "feature_fusion/Conv_7/Sigmoid";
521     outNames[1] = "feature_fusion/concat_3";
522     net.forward(outs, outNames);
523
524     Mat scores = outs[0];
525     Mat geometry = outs[1];
526
527     normAssert(scores, blobFromNPY(refScoresPath), "scores");
528     normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3);
529     LayerFactory::unregisterLayer("ResizeBilinear");
530 }
531
532 }