Merge pull request #11567 from alalek:code_quality
[platform/upstream/opencv.git] / modules / dnn / test / test_tf_importer.cpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 // Copyright (C) 2017, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7
8 /*
9 Test for Tensorflow models loading
10 */
11
12 #include "test_precomp.hpp"
13 #include "npy_blob.hpp"
14
15 #include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
16
17 namespace opencv_test
18 {
19
20 using namespace cv;
21 using namespace cv::dnn;
22
23 template<typename TString>
24 static std::string _tf(TString filename)
25 {
26     return (getOpenCVExtraDir() + "/dnn/") + filename;
27 }
28
29 TEST(Test_TensorFlow, read_inception)
30 {
31     Net net;
32     {
33         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
34         net = readNetFromTensorflow(model);
35         ASSERT_FALSE(net.empty());
36     }
37
38     Mat sample = imread(_tf("grace_hopper_227.png"));
39     ASSERT_TRUE(!sample.empty());
40     Mat input;
41     resize(sample, input, Size(224, 224));
42     input -= 128; // mean sub
43
44     Mat inputBlob = blobFromImage(input);
45
46     net.setInput(inputBlob, "input");
47     Mat out = net.forward("softmax2");
48
49     std::cout << out.dims << std::endl;
50 }
51
52 TEST(Test_TensorFlow, inception_accuracy)
53 {
54     Net net;
55     {
56         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
57         net = readNetFromTensorflow(model);
58         ASSERT_FALSE(net.empty());
59     }
60
61     Mat sample = imread(_tf("grace_hopper_227.png"));
62     ASSERT_TRUE(!sample.empty());
63     resize(sample, sample, Size(224, 224));
64     Mat inputBlob = blobFromImage(sample);
65
66     net.setInput(inputBlob, "input");
67     Mat out = net.forward("softmax2");
68
69     Mat ref = blobFromNPY(_tf("tf_inception_prob.npy"));
70
71     normAssert(ref, out);
72 }
73
74 static std::string path(const std::string& file)
75 {
76     return findDataFile("dnn/tensorflow/" + file, false);
77 }
78
79 static void runTensorFlowNet(const std::string& prefix, int targetId = DNN_TARGET_CPU, bool hasText = false,
80                              double l1 = 1e-5, double lInf = 1e-4,
81                              bool memoryLoad = false)
82 {
83     std::string netPath = path(prefix + "_net.pb");
84     std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : "");
85     std::string inpPath = path(prefix + "_in.npy");
86     std::string outPath = path(prefix + "_out.npy");
87
88     Net net;
89     if (memoryLoad)
90     {
91         // Load files into a memory buffers
92         string dataModel;
93         ASSERT_TRUE(readFileInMemory(netPath, dataModel));
94
95         string dataConfig;
96         if (hasText)
97             ASSERT_TRUE(readFileInMemory(netConfig, dataConfig));
98
99         net = readNetFromTensorflow(dataModel.c_str(), dataModel.size(),
100                                     dataConfig.c_str(), dataConfig.size());
101     }
102     else
103         net = readNetFromTensorflow(netPath, netConfig);
104
105     ASSERT_FALSE(net.empty());
106
107     net.setPreferableBackend(DNN_BACKEND_DEFAULT);
108     net.setPreferableTarget(targetId);
109
110     cv::Mat input = blobFromNPY(inpPath);
111     cv::Mat target = blobFromNPY(outPath);
112
113     net.setInput(input);
114     cv::Mat output = net.forward();
115     normAssert(target, output, "", l1, lInf);
116 }
117
118 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_layers;
119
120 TEST_P(Test_TensorFlow_layers, conv)
121 {
122     int targetId = GetParam();
123     runTensorFlowNet("single_conv", targetId);
124     runTensorFlowNet("atrous_conv2d_valid", targetId);
125     runTensorFlowNet("atrous_conv2d_same", targetId);
126     runTensorFlowNet("depthwise_conv2d", targetId);
127 }
128
129 TEST_P(Test_TensorFlow_layers, padding)
130 {
131     int targetId = GetParam();
132     runTensorFlowNet("padding_same", targetId);
133     runTensorFlowNet("padding_valid", targetId);
134     runTensorFlowNet("spatial_padding", targetId);
135 }
136
137 TEST_P(Test_TensorFlow_layers, eltwise_add_mul)
138 {
139     runTensorFlowNet("eltwise_add_mul", GetParam());
140 }
141
142 TEST_P(Test_TensorFlow_layers, pad_and_concat)
143 {
144     runTensorFlowNet("pad_and_concat", GetParam());
145 }
146
147 TEST_P(Test_TensorFlow_layers, batch_norm)
148 {
149     int targetId = GetParam();
150     runTensorFlowNet("batch_norm", targetId);
151     runTensorFlowNet("fused_batch_norm", targetId);
152     runTensorFlowNet("batch_norm_text", targetId, true);
153     runTensorFlowNet("mvn_batch_norm", targetId);
154     runTensorFlowNet("mvn_batch_norm_1x1", targetId);
155     runTensorFlowNet("unfused_batch_norm", targetId);
156     runTensorFlowNet("fused_batch_norm_no_gamma", targetId);
157     runTensorFlowNet("unfused_batch_norm_no_gamma", targetId);
158 }
159
160 TEST_P(Test_TensorFlow_layers, pooling)
161 {
162     int targetId = GetParam();
163     runTensorFlowNet("max_pool_even", targetId);
164     runTensorFlowNet("max_pool_odd_valid", targetId);
165     runTensorFlowNet("ave_pool_same", targetId);
166     runTensorFlowNet("max_pool_odd_same", targetId);
167     runTensorFlowNet("reduce_mean", targetId);  // an average pooling over all spatial dimensions.
168 }
169
170 TEST_P(Test_TensorFlow_layers, deconvolution)
171 {
172     int targetId = GetParam();
173     runTensorFlowNet("deconvolution", targetId);
174     runTensorFlowNet("deconvolution_same", targetId);
175     runTensorFlowNet("deconvolution_stride_2_same", targetId);
176     runTensorFlowNet("deconvolution_adj_pad_valid", targetId);
177     runTensorFlowNet("deconvolution_adj_pad_same", targetId);
178     runTensorFlowNet("keras_deconv_valid", targetId);
179     runTensorFlowNet("keras_deconv_same", targetId);
180 }
181
182 TEST_P(Test_TensorFlow_layers, matmul)
183 {
184     int targetId = GetParam();
185     runTensorFlowNet("matmul", targetId);
186     runTensorFlowNet("nhwc_reshape_matmul", targetId);
187     runTensorFlowNet("nhwc_transpose_reshape_matmul", targetId);
188 }
189
190 TEST_P(Test_TensorFlow_layers, reshape)
191 {
192     int targetId = GetParam();
193     runTensorFlowNet("shift_reshape_no_reorder", targetId);
194     runTensorFlowNet("reshape_reduce", targetId);
195     runTensorFlowNet("flatten", targetId, true);
196     runTensorFlowNet("unfused_flatten", targetId);
197     runTensorFlowNet("unfused_flatten_unknown_batch", targetId);
198 }
199
200 TEST_P(Test_TensorFlow_layers, l2_normalize)
201 {
202     int targetId = GetParam();
203     runTensorFlowNet("l2_normalize", targetId);
204     runTensorFlowNet("l2_normalize_3d", targetId);
205 }
206
207 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets());
208
209 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets;
210
211 TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
212 {
213     std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false);
214     std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false);
215     std::string imgPath = findDataFile("dnn/street.png", false);
216
217     Mat inp;
218     resize(imread(imgPath), inp, Size(300, 300));
219     inp = blobFromImage(inp, 1.0f / 127.5, Size(), Scalar(127.5, 127.5, 127.5), true);
220
221     std::vector<String> outNames(3);
222     outNames[0] = "concat";
223     outNames[1] = "concat_1";
224     outNames[2] = "detection_out";
225
226     std::vector<Mat> target(outNames.size());
227     for (int i = 0; i < outNames.size(); ++i)
228     {
229         std::string path = findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco." + outNames[i] + ".npy", false);
230         target[i] = blobFromNPY(path);
231     }
232
233     Net net = readNetFromTensorflow(netPath, netConfig);
234
235     net.setPreferableTarget(GetParam());
236
237     net.setInput(inp);
238
239     std::vector<Mat> output;
240     net.forward(output, outNames);
241
242     normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4);
243     normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4);
244     normAssertDetections(target[2], output[2], "", 0.2);
245 }
246
247 TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
248 {
249     std::string proto = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", false);
250     std::string model = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pb", false);
251
252     Net net = readNetFromTensorflow(model, proto);
253     Mat img = imread(findDataFile("dnn/street.png", false));
254     Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
255
256     net.setPreferableTarget(GetParam());
257
258     net.setInput(blob);
259     // Output has shape 1x1xNx7 where N - number of detections.
260     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
261     Mat out = net.forward();
262     Mat ref = (Mat_<float>(5, 7) << 0, 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729,
263                                     0, 3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131,
264                                     0, 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
265                                     0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
266                                     0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
267     normAssertDetections(ref, out, "", 0.5);
268 }
269
270 TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
271 {
272     std::string proto = findDataFile("dnn/opencv_face_detector.pbtxt", false);
273     std::string model = findDataFile("dnn/opencv_face_detector_uint8.pb", false);
274
275     Net net = readNetFromTensorflow(model, proto);
276     Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
277     Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
278
279     net.setPreferableTarget(GetParam());
280
281     net.setInput(blob);
282     // Output has shape 1x1xNx7 where N - number of detections.
283     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
284     Mat out = net.forward();
285
286     // References are from test for Caffe model.
287     Mat ref = (Mat_<float>(6, 7) << 0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
288                                     0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
289                                     0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
290                                     0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
291                                     0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
292                                     0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
293     normAssertDetections(ref, out, "", 0.9, 3.4e-3, 1e-2);
294 }
295
296 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, availableDnnTargets());
297
298 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_fp16;
299
300 TEST_P(Test_TensorFlow_fp16, tests)
301 {
302     int targetId = GetParam();
303     const float l1 = 7e-4;
304     const float lInf = 1e-2;
305     runTensorFlowNet("fp16_single_conv", targetId, false, l1, lInf);
306     runTensorFlowNet("fp16_deconvolution", targetId, false, l1, lInf);
307     runTensorFlowNet("fp16_max_pool_odd_same", targetId, false, l1, lInf);
308     runTensorFlowNet("fp16_padding_valid", targetId, false, l1, lInf);
309     runTensorFlowNet("fp16_eltwise_add_mul", targetId, false, l1, lInf);
310     runTensorFlowNet("fp16_max_pool_odd_valid", targetId, false, l1, lInf);
311     runTensorFlowNet("fp16_pad_and_concat", targetId, false, l1, lInf);
312     runTensorFlowNet("fp16_max_pool_even", targetId, false, l1, lInf);
313     runTensorFlowNet("fp16_padding_same", targetId, false, l1, lInf);
314 }
315
316 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_fp16,
317                         Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16));
318
319 TEST(Test_TensorFlow, defun)
320 {
321     runTensorFlowNet("defun_dropout");
322 }
323
324 TEST(Test_TensorFlow, quantized)
325 {
326     runTensorFlowNet("uint8_single_conv");
327 }
328
329 TEST(Test_TensorFlow, lstm)
330 {
331     runTensorFlowNet("lstm", DNN_TARGET_CPU, true);
332 }
333
334 TEST(Test_TensorFlow, split)
335 {
336     runTensorFlowNet("split_equals");
337 }
338
339 TEST(Test_TensorFlow, resize_nearest_neighbor)
340 {
341     runTensorFlowNet("resize_nearest_neighbor");
342 }
343
344 TEST(Test_TensorFlow, slice)
345 {
346     runTensorFlowNet("slice_4d");
347 }
348
349 TEST(Test_TensorFlow, softmax)
350 {
351     runTensorFlowNet("keras_softmax");
352 }
353
354 TEST(Test_TensorFlow, relu6)
355 {
356     runTensorFlowNet("keras_relu6");
357 }
358
359 TEST(Test_TensorFlow, keras_mobilenet_head)
360 {
361     runTensorFlowNet("keras_mobilenet_head");
362 }
363
364 TEST(Test_TensorFlow, memory_read)
365 {
366     double l1 = 1e-5;
367     double lInf = 1e-4;
368     runTensorFlowNet("lstm", DNN_TARGET_CPU, true, l1, lInf, true);
369
370     runTensorFlowNet("batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
371     runTensorFlowNet("fused_batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
372     runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
373 }
374
375 // Test a custom layer.
376 class ResizeBilinearLayer CV_FINAL : public Layer
377 {
378 public:
379     ResizeBilinearLayer(const LayerParams &params) : Layer(params),
380         outWidth(0), outHeight(0), factorWidth(1), factorHeight(1)
381     {
382         CV_Assert(!params.get<bool>("align_corners", false));
383         CV_Assert(!blobs.empty());
384
385         for (size_t i = 0; i < blobs.size(); ++i)
386             CV_Assert(blobs[i].type() == CV_32SC1);
387
388         if (blobs.size() == 1)
389         {
390             CV_Assert(blobs[0].total() == 2);
391             outHeight = blobs[0].at<int>(0, 0);
392             outWidth = blobs[0].at<int>(0, 1);
393         }
394         else
395         {
396             CV_Assert(blobs.size() == 2, blobs[0].total() == 1, blobs[1].total() == 1);
397             factorHeight = blobs[0].at<int>(0, 0);
398             factorWidth = blobs[1].at<int>(0, 0);
399             outHeight = outWidth = 0;
400         }
401     }
402
403     static Ptr<Layer> create(LayerParams& params)
404     {
405         return Ptr<Layer>(new ResizeBilinearLayer(params));
406     }
407
408     virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
409                                  const int requiredOutputs,
410                                  std::vector<std::vector<int> > &outputs,
411                                  std::vector<std::vector<int> > &internals) const CV_OVERRIDE
412     {
413         std::vector<int> outShape(4);
414         outShape[0] = inputs[0][0];  // batch size
415         outShape[1] = inputs[0][1];  // number of channels
416         outShape[2] = outHeight != 0 ? outHeight : (inputs[0][2] * factorHeight);
417         outShape[3] = outWidth != 0 ? outWidth : (inputs[0][3] * factorWidth);
418         outputs.assign(1, outShape);
419         return false;
420     }
421
422     virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
423     {
424         if (!outWidth && !outHeight)
425         {
426             outHeight = outputs[0].size[2];
427             outWidth = outputs[0].size[3];
428         }
429     }
430
431     // This implementation is based on a reference implementation from
432     // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
433     virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
434     {
435         Mat& inp = *inputs[0];
436         Mat& out = outputs[0];
437         const float* inpData = (float*)inp.data;
438         float* outData = (float*)out.data;
439
440         const int batchSize = inp.size[0];
441         const int numChannels = inp.size[1];
442         const int inpHeight = inp.size[2];
443         const int inpWidth = inp.size[3];
444
445         float heightScale = static_cast<float>(inpHeight) / outHeight;
446         float widthScale = static_cast<float>(inpWidth) / outWidth;
447         for (int b = 0; b < batchSize; ++b)
448         {
449             for (int y = 0; y < outHeight; ++y)
450             {
451                 float input_y = y * heightScale;
452                 int y0 = static_cast<int>(std::floor(input_y));
453                 int y1 = std::min(y0 + 1, inpHeight - 1);
454                 for (int x = 0; x < outWidth; ++x)
455                 {
456                     float input_x = x * widthScale;
457                     int x0 = static_cast<int>(std::floor(input_x));
458                     int x1 = std::min(x0 + 1, inpWidth - 1);
459                     for (int c = 0; c < numChannels; ++c)
460                     {
461                         float interpolation =
462                             inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) +
463                             inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) +
464                             inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) +
465                             inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0);
466                         outData[offset(out.size, c, x, y, b)] = interpolation;
467                     }
468                 }
469             }
470         }
471     }
472
473     virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
474
475 private:
476     static inline int offset(const MatSize& size, int c, int x, int y, int b)
477     {
478         return x + size[3] * (y + size[2] * (c + size[1] * b));
479     }
480
481     int outWidth, outHeight, factorWidth, factorHeight;
482 };
483
484 TEST(Test_TensorFlow, resize_bilinear)
485 {
486     CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
487     runTensorFlowNet("resize_bilinear");
488     runTensorFlowNet("resize_bilinear_factor");
489     LayerFactory::unregisterLayer("ResizeBilinear");
490 }
491
492 // inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png')
493 // inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3)
494 // outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'),
495 //                  sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')],
496 //                 feed_dict={'input_images:0': inp})
497 // scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2))
498 // geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2))
499 // np.save('east_text_detection.scores.npy', scores)
500 // np.save('east_text_detection.geometry.npy', geometry)
501 TEST(Test_TensorFlow, EAST_text_detection)
502 {
503     CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
504     std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
505     std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false);
506     std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false);
507     std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false);
508
509     Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false));
510
511     Mat img = imread(imgPath);
512     Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false);
513     net.setInput(inp);
514
515     std::vector<Mat> outs;
516     std::vector<String> outNames(2);
517     outNames[0] = "feature_fusion/Conv_7/Sigmoid";
518     outNames[1] = "feature_fusion/concat_3";
519     net.forward(outs, outNames);
520
521     Mat scores = outs[0];
522     Mat geometry = outs[1];
523
524     normAssert(scores, blobFromNPY(refScoresPath), "scores");
525     normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3);
526     LayerFactory::unregisterLayer("ResizeBilinear");
527 }
528
529 }