Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / test / test_torch_importer.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of the copyright holders may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 #include "test_precomp.hpp"
43 #include "npy_blob.hpp"
44 #include <opencv2/dnn/shape_utils.hpp>
45 #include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
46
47 namespace opencv_test
48 {
49
50 using namespace std;
51 using namespace testing;
52 using namespace cv;
53 using namespace cv::dnn;
54
55 template<typename TStr>
56 static std::string _tf(TStr filename, bool inTorchDir = true, bool required = true)
57 {
58     String path = "dnn/";
59     if (inTorchDir)
60         path += "torch/";
61     path += filename;
62     return findDataFile(path, required);
63 }
64
65 TEST(Torch_Importer, simple_read)
66 {
67     Net net;
68     ASSERT_NO_THROW(net = readNetFromTorch(_tf("net_simple_net.txt"), false));
69     ASSERT_FALSE(net.empty());
70 }
71
72 class Test_Torch_layers : public DNNTestLayer
73 {
74 public:
75     void runTorchNet(const String& prefix, String outLayerName = "",
76                      bool check2ndBlob = false, bool isBinary = false, bool evaluate = true,
77                      double l1 = 0.0, double lInf = 0.0)
78     {
79         String suffix = (isBinary) ? ".dat" : ".txt";
80
81         Mat inp, outRef;
82         ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );
83         ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );
84
85         checkBackend(backend, target, &inp, &outRef);
86
87         Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary, evaluate);
88         ASSERT_FALSE(net.empty());
89
90         net.setPreferableBackend(backend);
91         net.setPreferableTarget(target);
92
93         if (outLayerName.empty())
94             outLayerName = net.getLayerNames().back();
95
96         net.setInput(inp);
97         std::vector<Mat> outBlobs;
98         net.forward(outBlobs, outLayerName);
99         l1 = l1 ? l1 : default_l1;
100         lInf = lInf ? lInf : default_lInf;
101         normAssert(outRef, outBlobs[0], "", l1, lInf);
102
103         if (check2ndBlob && backend != DNN_BACKEND_INFERENCE_ENGINE)
104         {
105             Mat out2 = outBlobs[1];
106             Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary);
107             normAssert(out2, ref2, "", l1, lInf);
108         }
109     }
110 };
111
112 TEST_P(Test_Torch_layers, run_convolution)
113 {
114     // Output reference values are in range [23.4018, 72.0181]
115     double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.08 : default_l1;
116     double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.42 : default_lInf;
117     runTorchNet("net_conv", "", false, true, true, l1, lInf);
118 }
119
120 TEST_P(Test_Torch_layers, run_pool_max)
121 {
122     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
123         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
124     runTorchNet("net_pool_max", "", true);
125 }
126
127 TEST_P(Test_Torch_layers, run_pool_ave)
128 {
129     runTorchNet("net_pool_ave");
130 }
131
132 TEST_P(Test_Torch_layers, run_reshape_change_batch_size)
133 {
134     runTorchNet("net_reshape");
135 }
136
137 TEST_P(Test_Torch_layers, run_reshape)
138 {
139     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
140         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
141     runTorchNet("net_reshape_batch");
142     runTorchNet("net_reshape_channels", "", false, true);
143 }
144
145 TEST_P(Test_Torch_layers, run_reshape_single_sample)
146 {
147     // Reference output values in range [14.4586, 18.4492].
148     runTorchNet("net_reshape_single_sample", "", false, false, true,
149                 (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.033 : default_l1,
150                 (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.05 : default_lInf);
151 }
152
153 TEST_P(Test_Torch_layers, run_linear)
154 {
155     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
156         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
157     runTorchNet("net_linear_2d");
158 }
159
160 TEST_P(Test_Torch_layers, run_concat)
161 {
162     runTorchNet("net_concat", "l5_torchMerge");
163 }
164
165 TEST_P(Test_Torch_layers, run_depth_concat)
166 {
167     runTorchNet("net_depth_concat", "", false, true, true, 0.0,
168                 target == DNN_TARGET_OPENCL_FP16 ? 0.021 : 0.0);
169 }
170
171 TEST_P(Test_Torch_layers, run_deconv)
172 {
173     runTorchNet("net_deconv");
174 }
175
176 TEST_P(Test_Torch_layers, run_batch_norm)
177 {
178     runTorchNet("net_batch_norm", "", false, true);
179     runTorchNet("net_batch_norm_train", "", false, true, false);
180 }
181
182 TEST_P(Test_Torch_layers, net_prelu)
183 {
184     runTorchNet("net_prelu");
185 }
186
187 TEST_P(Test_Torch_layers, net_cadd_table)
188 {
189     runTorchNet("net_cadd_table");
190 }
191
192 TEST_P(Test_Torch_layers, net_softmax)
193 {
194     runTorchNet("net_softmax");
195     runTorchNet("net_softmax_spatial");
196 }
197
198 TEST_P(Test_Torch_layers, net_logsoftmax)
199 {
200     runTorchNet("net_logsoftmax");
201     runTorchNet("net_logsoftmax_spatial");
202 }
203
204 TEST_P(Test_Torch_layers, net_lp_pooling)
205 {
206     runTorchNet("net_lp_pooling_square", "", false, true);
207     runTorchNet("net_lp_pooling_power", "", false, true);
208 }
209
210 TEST_P(Test_Torch_layers, net_conv_gemm_lrn)
211 {
212     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
213         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
214     runTorchNet("net_conv_gemm_lrn", "", false, true, true,
215                 target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0,
216                 target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0);
217 }
218
219 TEST_P(Test_Torch_layers, net_inception_block)
220 {
221     runTorchNet("net_inception_block", "", false, true);
222 }
223
224 TEST_P(Test_Torch_layers, net_normalize)
225 {
226     if(backend == DNN_BACKEND_CUDA)
227         applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); /* only L1 and L2 norms are supported */
228     runTorchNet("net_normalize", "", false, true);
229 }
230
231 TEST_P(Test_Torch_layers, net_padding)
232 {
233     runTorchNet("net_padding", "", false, true);
234     runTorchNet("net_spatial_zero_padding", "", false, true);
235     runTorchNet("net_spatial_reflection_padding", "", false, true);
236 }
237
238 TEST_P(Test_Torch_layers, net_non_spatial)
239 {
240     if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
241         (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
242         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
243     runTorchNet("net_non_spatial", "", false, true);
244 }
245
246 TEST_P(Test_Torch_layers, run_paralel)
247 {
248     if (backend != DNN_BACKEND_OPENCV || target != DNN_TARGET_CPU)
249         throw SkipTestException("");  // TODO: Check this
250     runTorchNet("net_parallel", "l5_torchMerge");
251 }
252
253 TEST_P(Test_Torch_layers, net_residual)
254 {
255 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
256     if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL ||
257                                                     target == DNN_TARGET_OPENCL_FP16))
258         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
259 #endif
260     runTorchNet("net_residual", "", false, true);
261 }
262
263 class Test_Torch_nets : public DNNTestLayer {};
264
265 TEST_P(Test_Torch_nets, OpenFace_accuracy)
266 {
267 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
268     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
269         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
270 #endif
271     checkBackend();
272
273     const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false);
274     Net net = readNetFromTorch(model);
275
276     net.setPreferableBackend(backend);
277     net.setPreferableTarget(target);
278
279     Mat sample = imread(findDataFile("cv/shared/lena.png"));
280     Mat sampleF32(sample.size(), CV_32FC3);
281     sample.convertTo(sampleF32, sampleF32.type());
282     sampleF32 /= 255;
283     resize(sampleF32, sampleF32, Size(96, 96), 0, 0, INTER_NEAREST);
284
285     Mat inputBlob = blobFromImage(sampleF32, 1.0, Size(), Scalar(), /*swapRB*/true);
286
287     net.setInput(inputBlob);
288     Mat out = net.forward();
289
290     // Reference output values are in range [-0.17212, 0.263492]
291     // on Myriad problem layer: l4_Pooling - does not use pads_begin
292     float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 2e-3 : 1e-5;
293     float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5e-3 : 1e-3;
294     Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true);
295     normAssert(out, outRef, "", l1, lInf);
296 }
297
298 static Mat getSegmMask(const Mat& scores)
299 {
300     const int rows = scores.size[2];
301     const int cols = scores.size[3];
302     const int numClasses = scores.size[1];
303
304     Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
305     Mat maxVal(rows, cols, CV_32FC1, Scalar(0));
306     for (int ch = 0; ch < numClasses; ch++)
307     {
308         for (int row = 0; row < rows; row++)
309         {
310             const float *ptrScore = scores.ptr<float>(0, ch, row);
311             uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);
312             float *ptrMaxVal = maxVal.ptr<float>(row);
313             for (int col = 0; col < cols; col++)
314             {
315                 if (ptrScore[col] > ptrMaxVal[col])
316                 {
317                     ptrMaxVal[col] = ptrScore[col];
318                     ptrMaxCl[col] = (uchar)ch;
319                 }
320             }
321         }
322     }
323     return maxCl;
324 }
325
326 // Computer per-class intersection over union metric.
327 static void normAssertSegmentation(const Mat& ref, const Mat& test)
328 {
329     CV_Assert_N(ref.dims == 4, test.dims == 4);
330     const int numClasses = ref.size[1];
331     CV_Assert(numClasses == test.size[1]);
332
333     Mat refMask = getSegmMask(ref);
334     Mat testMask = getSegmMask(test);
335     EXPECT_EQ(countNonZero(refMask != testMask), 0);
336 }
337
338 TEST_P(Test_Torch_nets, ENet_accuracy)
339 {
340     applyTestTag(target == DNN_TARGET_CPU ? "" : CV_TEST_TAG_MEMORY_512MB);
341     checkBackend();
342     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
343         throw SkipTestException("");
344     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
345     {
346         if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
347         if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL);
348         if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
349         throw SkipTestException("");
350     }
351
352     Net net;
353     {
354         const string model = findDataFile("dnn/Enet-model-best.net", false);
355         net = readNetFromTorch(model, true);
356         ASSERT_TRUE(!net.empty());
357     }
358
359     net.setPreferableBackend(backend);
360     net.setPreferableTarget(target);
361
362     Mat sample = imread(_tf("street.png", false));
363     Mat inputBlob = blobFromImage(sample, 1./255, Size(), Scalar(), /*swapRB*/true);
364
365     net.setInput(inputBlob, "");
366     Mat out = net.forward();
367     Mat ref = blobFromNPY(_tf("torch_enet_prob.npy", false));
368     // Due to numerical instability in Pooling-Unpooling layers (indexes jittering)
369     // thresholds for ENet must be changed. Accuracy of results was checked on
370     // Cityscapes dataset and difference in mIOU with Torch is 10E-4%
371     normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.552);
372     normAssertSegmentation(ref, out);
373
374     const int N = 3;
375     for (int i = 0; i < N; i++)
376     {
377         net.setInput(inputBlob, "");
378         Mat out = net.forward();
379         normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.552);
380         normAssertSegmentation(ref, out);
381     }
382 }
383
384 // Check accuracy of style transfer models from https://github.com/jcjohnson/fast-neural-style
385 // th fast_neural_style.lua \
386 //   -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
387 //   -output_image lena.png \
388 //   -median_filter 0 \
389 //   -image_size 0 \
390 //   -model models/eccv16/starry_night.t7
391 // th fast_neural_style.lua \
392 //   -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
393 //   -output_image lena.png \
394 //   -median_filter 0 \
395 //   -image_size 0 \
396 //   -model models/instance_norm/feathers.t7
397 TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
398 {
399 #if defined INF_ENGINE_RELEASE
400     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
401             && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
402         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
403 #endif
404
405     checkBackend();
406
407 #if defined(INF_ENGINE_RELEASE)
408 #if INF_ENGINE_RELEASE <= 2018050000
409     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
410         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2018R5);
411 #endif
412 #endif
413
414     std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
415                             "dnn/fast_neural_style_instance_norm_feathers.t7"};
416     std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
417
418     for (int i = 0; i < 2; ++i)
419     {
420         const string model = findDataFile(models[i], false);
421         Net net = readNetFromTorch(model);
422
423         net.setPreferableBackend(backend);
424         net.setPreferableTarget(target);
425
426         Mat img = imread(findDataFile("dnn/googlenet_1.png"));
427         Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
428
429         net.setInput(inputBlob);
430         Mat out = net.forward();
431
432         // Deprocessing.
433         getPlane(out, 0, 0) += 103.939;
434         getPlane(out, 0, 1) += 116.779;
435         getPlane(out, 0, 2) += 123.68;
436         out = cv::min(cv::max(0, out), 255);
437
438         Mat ref = imread(findDataFile(targets[i]));
439         Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);
440
441         if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
442         {
443             double normL1 = cvtest::norm(refBlob, out, cv::NORM_L1) / refBlob.total();
444             if (target == DNN_TARGET_MYRIAD)
445                 EXPECT_LE(normL1, 4.0f);
446             else
447                 EXPECT_LE(normL1, 0.6f);
448         }
449         else
450             normAssert(out, refBlob, "", 0.5, 1.1);
451     }
452 }
453
454 INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, dnnBackendsAndTargets());
455
456 // Test a custom layer
457 // https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest
458 class SpatialUpSamplingNearestLayer CV_FINAL : public Layer
459 {
460 public:
461     SpatialUpSamplingNearestLayer(const LayerParams &params) : Layer(params)
462     {
463         scale = params.get<int>("scale_factor");
464     }
465
466     static Ptr<Layer> create(LayerParams& params)
467     {
468         return Ptr<Layer>(new SpatialUpSamplingNearestLayer(params));
469     }
470
471     virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
472                                  const int requiredOutputs,
473                                  std::vector<std::vector<int> > &outputs,
474                                  std::vector<std::vector<int> > &internals) const CV_OVERRIDE
475     {
476         std::vector<int> outShape(4);
477         outShape[0] = inputs[0][0];  // batch size
478         outShape[1] = inputs[0][1];  // number of channels
479         outShape[2] = scale * inputs[0][2];
480         outShape[3] = scale * inputs[0][3];
481         outputs.assign(1, outShape);
482         return false;
483     }
484
485     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays) CV_OVERRIDE
486     {
487         CV_TRACE_FUNCTION();
488         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
489
490         std::vector<Mat> inputs, outputs;
491         inputs_arr.getMatVector(inputs);
492         outputs_arr.getMatVector(outputs);
493
494         Mat& inp = inputs[0];
495         Mat& out = outputs[0];
496         const int outHeight = out.size[2];
497         const int outWidth = out.size[3];
498         for (size_t n = 0; n < inp.size[0]; ++n)
499         {
500             for (size_t ch = 0; ch < inp.size[1]; ++ch)
501             {
502                 resize(getPlane(inp, n, ch), getPlane(out, n, ch),
503                        Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
504             }
505         }
506     }
507
508 private:
509     int scale;
510 };
511
512 TEST_P(Test_Torch_layers, upsampling_nearest)
513 {
514     // Test a custom layer.
515     CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer);
516     try
517     {
518         runTorchNet("net_spatial_upsampling_nearest", "", false, true);
519     }
520     catch (...)
521     {
522         LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
523         throw;
524     }
525     LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
526
527     // Test an implemented layer.
528     runTorchNet("net_spatial_upsampling_nearest", "", false, true);
529 }
530
531 INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, dnnBackendsAndTargets());
532
533 }