1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "test_precomp.hpp"
43 #include "npy_blob.hpp"
44 #include <opencv2/dnn/shape_utils.hpp>
45 #include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS
51 using namespace testing;
53 using namespace cv::dnn;
55 template<typename TStr>
56 static std::string _tf(TStr filename, bool inTorchDir = true, bool required = true)
62 return findDataFile(path, required);
65 TEST(Torch_Importer, simple_read)
68 ASSERT_NO_THROW(net = readNetFromTorch(_tf("net_simple_net.txt"), false));
69 ASSERT_FALSE(net.empty());
72 class Test_Torch_layers : public DNNTestLayer
75 void runTorchNet(const String& prefix, String outLayerName = "",
76 bool check2ndBlob = false, bool isBinary = false, bool evaluate = true,
77 double l1 = 0.0, double lInf = 0.0)
79 String suffix = (isBinary) ? ".dat" : ".txt";
82 ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );
83 ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );
85 checkBackend(backend, target, &inp, &outRef);
87 Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary, evaluate);
88 ASSERT_FALSE(net.empty());
90 net.setPreferableBackend(backend);
91 net.setPreferableTarget(target);
93 if (outLayerName.empty())
94 outLayerName = net.getLayerNames().back();
97 std::vector<Mat> outBlobs;
98 net.forward(outBlobs, outLayerName);
99 l1 = l1 ? l1 : default_l1;
100 lInf = lInf ? lInf : default_lInf;
101 normAssert(outRef, outBlobs[0], "", l1, lInf);
103 if (check2ndBlob && backend != DNN_BACKEND_INFERENCE_ENGINE)
105 Mat out2 = outBlobs[1];
106 Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary);
107 normAssert(out2, ref2, "", l1, lInf);
112 TEST_P(Test_Torch_layers, run_convolution)
114 // Output reference values are in range [23.4018, 72.0181]
115 double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.08 : default_l1;
116 double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.42 : default_lInf;
117 runTorchNet("net_conv", "", false, true, true, l1, lInf);
120 TEST_P(Test_Torch_layers, run_pool_max)
122 if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
123 applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
124 runTorchNet("net_pool_max", "", true);
127 TEST_P(Test_Torch_layers, run_pool_ave)
129 runTorchNet("net_pool_ave");
132 TEST_P(Test_Torch_layers, run_reshape_change_batch_size)
134 runTorchNet("net_reshape");
137 TEST_P(Test_Torch_layers, run_reshape)
139 if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
140 applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
141 runTorchNet("net_reshape_batch");
142 runTorchNet("net_reshape_channels", "", false, true);
145 TEST_P(Test_Torch_layers, run_reshape_single_sample)
147 // Reference output values in range [14.4586, 18.4492].
148 runTorchNet("net_reshape_single_sample", "", false, false, true,
149 (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.033 : default_l1,
150 (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.05 : default_lInf);
153 TEST_P(Test_Torch_layers, run_linear)
155 if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
156 applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
157 runTorchNet("net_linear_2d");
160 TEST_P(Test_Torch_layers, run_concat)
162 runTorchNet("net_concat", "l5_torchMerge");
165 TEST_P(Test_Torch_layers, run_depth_concat)
167 runTorchNet("net_depth_concat", "", false, true, true, 0.0,
168 target == DNN_TARGET_OPENCL_FP16 ? 0.021 : 0.0);
171 TEST_P(Test_Torch_layers, run_deconv)
173 runTorchNet("net_deconv");
176 TEST_P(Test_Torch_layers, run_batch_norm)
178 runTorchNet("net_batch_norm", "", false, true);
179 runTorchNet("net_batch_norm_train", "", false, true, false);
182 TEST_P(Test_Torch_layers, net_prelu)
184 runTorchNet("net_prelu");
187 TEST_P(Test_Torch_layers, net_cadd_table)
189 runTorchNet("net_cadd_table");
192 TEST_P(Test_Torch_layers, net_softmax)
194 runTorchNet("net_softmax");
195 runTorchNet("net_softmax_spatial");
198 TEST_P(Test_Torch_layers, net_logsoftmax)
200 runTorchNet("net_logsoftmax");
201 runTorchNet("net_logsoftmax_spatial");
204 TEST_P(Test_Torch_layers, net_lp_pooling)
206 runTorchNet("net_lp_pooling_square", "", false, true);
207 runTorchNet("net_lp_pooling_power", "", false, true);
210 TEST_P(Test_Torch_layers, net_conv_gemm_lrn)
212 if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
213 applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
214 runTorchNet("net_conv_gemm_lrn", "", false, true, true,
215 target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0,
216 target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0);
219 TEST_P(Test_Torch_layers, net_inception_block)
221 runTorchNet("net_inception_block", "", false, true);
224 TEST_P(Test_Torch_layers, net_normalize)
226 runTorchNet("net_normalize", "", false, true);
229 TEST_P(Test_Torch_layers, net_padding)
231 runTorchNet("net_padding", "", false, true);
232 runTorchNet("net_spatial_zero_padding", "", false, true);
233 runTorchNet("net_spatial_reflection_padding", "", false, true);
236 TEST_P(Test_Torch_layers, net_non_spatial)
238 if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
239 (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
240 applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
241 runTorchNet("net_non_spatial", "", false, true);
244 TEST_P(Test_Torch_layers, run_paralel)
246 if (backend != DNN_BACKEND_OPENCV || target != DNN_TARGET_CPU)
247 throw SkipTestException(""); // TODO: Check this
248 runTorchNet("net_parallel", "l5_torchMerge");
251 TEST_P(Test_Torch_layers, net_residual)
253 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
254 if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL ||
255 target == DNN_TARGET_OPENCL_FP16))
256 applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
258 runTorchNet("net_residual", "", false, true);
261 class Test_Torch_nets : public DNNTestLayer {};
263 TEST_P(Test_Torch_nets, OpenFace_accuracy)
265 #if defined(INF_ENGINE_RELEASE)
266 if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
267 applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
271 const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false);
272 Net net = readNetFromTorch(model);
274 net.setPreferableBackend(backend);
275 net.setPreferableTarget(target);
277 Mat sample = imread(findDataFile("cv/shared/lena.png"));
278 Mat sampleF32(sample.size(), CV_32FC3);
279 sample.convertTo(sampleF32, sampleF32.type());
281 resize(sampleF32, sampleF32, Size(96, 96), 0, 0, INTER_NEAREST);
283 Mat inputBlob = blobFromImage(sampleF32, 1.0, Size(), Scalar(), /*swapRB*/true);
285 net.setInput(inputBlob);
286 Mat out = net.forward();
288 // Reference output values are in range [-0.17212, 0.263492]
289 // on Myriad problem layer: l4_Pooling - does not use pads_begin
290 float l1 = (target == DNN_TARGET_OPENCL_FP16) ? 4e-4 : 1e-5;
291 float lInf = (target == DNN_TARGET_OPENCL_FP16) ? 1.5e-3 : 1e-3;
292 Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true);
293 normAssert(out, outRef, "", l1, lInf);
296 static Mat getSegmMask(const Mat& scores)
298 const int rows = scores.size[2];
299 const int cols = scores.size[3];
300 const int numClasses = scores.size[1];
302 Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
303 Mat maxVal(rows, cols, CV_32FC1, Scalar(0));
304 for (int ch = 0; ch < numClasses; ch++)
306 for (int row = 0; row < rows; row++)
308 const float *ptrScore = scores.ptr<float>(0, ch, row);
309 uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);
310 float *ptrMaxVal = maxVal.ptr<float>(row);
311 for (int col = 0; col < cols; col++)
313 if (ptrScore[col] > ptrMaxVal[col])
315 ptrMaxVal[col] = ptrScore[col];
316 ptrMaxCl[col] = (uchar)ch;
324 // Computer per-class intersection over union metric.
325 static void normAssertSegmentation(const Mat& ref, const Mat& test)
327 CV_Assert_N(ref.dims == 4, test.dims == 4);
328 const int numClasses = ref.size[1];
329 CV_Assert(numClasses == test.size[1]);
331 Mat refMask = getSegmMask(ref);
332 Mat testMask = getSegmMask(test);
333 EXPECT_EQ(countNonZero(refMask != testMask), 0);
336 TEST_P(Test_Torch_nets, ENet_accuracy)
338 applyTestTag(target == DNN_TARGET_CPU ? "" : CV_TEST_TAG_MEMORY_512MB);
340 if (backend == DNN_BACKEND_INFERENCE_ENGINE ||
341 (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
342 applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
346 const string model = findDataFile("dnn/Enet-model-best.net", false);
347 net = readNetFromTorch(model, true);
348 ASSERT_TRUE(!net.empty());
351 net.setPreferableBackend(backend);
352 net.setPreferableTarget(target);
354 Mat sample = imread(_tf("street.png", false));
355 Mat inputBlob = blobFromImage(sample, 1./255, Size(), Scalar(), /*swapRB*/true);
357 net.setInput(inputBlob, "");
358 Mat out = net.forward();
359 Mat ref = blobFromNPY(_tf("torch_enet_prob.npy", false));
360 // Due to numerical instability in Pooling-Unpooling layers (indexes jittering)
361 // thresholds for ENet must be changed. Accuracy of results was checked on
362 // Cityscapes dataset and difference in mIOU with Torch is 10E-4%
363 normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.552);
364 normAssertSegmentation(ref, out);
367 for (int i = 0; i < N; i++)
369 net.setInput(inputBlob, "");
370 Mat out = net.forward();
371 normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.552);
372 normAssertSegmentation(ref, out);
376 // Check accuracy of style transfer models from https://github.com/jcjohnson/fast-neural-style
377 // th fast_neural_style.lua \
378 // -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
379 // -output_image lena.png \
380 // -median_filter 0 \
382 // -model models/eccv16/starry_night.t7
383 // th fast_neural_style.lua \
384 // -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
385 // -output_image lena.png \
386 // -median_filter 0 \
388 // -model models/instance_norm/feathers.t7
389 TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
391 #if defined INF_ENGINE_RELEASE
392 if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
393 && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
394 applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
399 #if defined(INF_ENGINE_RELEASE)
400 #if INF_ENGINE_RELEASE <= 2018050000
401 if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
402 applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2018R5);
406 std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
407 "dnn/fast_neural_style_instance_norm_feathers.t7"};
408 std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
410 for (int i = 0; i < 2; ++i)
412 const string model = findDataFile(models[i], false);
413 Net net = readNetFromTorch(model);
415 net.setPreferableBackend(backend);
416 net.setPreferableTarget(target);
418 Mat img = imread(findDataFile("dnn/googlenet_1.png"));
419 Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
421 net.setInput(inputBlob);
422 Mat out = net.forward();
425 getPlane(out, 0, 0) += 103.939;
426 getPlane(out, 0, 1) += 116.779;
427 getPlane(out, 0, 2) += 123.68;
428 out = cv::min(cv::max(0, out), 255);
430 Mat ref = imread(findDataFile(targets[i]));
431 Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);
433 if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
435 double normL1 = cvtest::norm(refBlob, out, cv::NORM_L1) / refBlob.total();
436 if (target == DNN_TARGET_MYRIAD)
437 EXPECT_LE(normL1, 4.0f);
439 EXPECT_LE(normL1, 0.6f);
442 normAssert(out, refBlob, "", 0.5, 1.1);
446 INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, dnnBackendsAndTargets());
448 // Test a custom layer
449 // https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest
450 class SpatialUpSamplingNearestLayer CV_FINAL : public Layer
453 SpatialUpSamplingNearestLayer(const LayerParams ¶ms) : Layer(params)
455 scale = params.get<int>("scale_factor");
458 static Ptr<Layer> create(LayerParams& params)
460 return Ptr<Layer>(new SpatialUpSamplingNearestLayer(params));
463 virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
464 const int requiredOutputs,
465 std::vector<std::vector<int> > &outputs,
466 std::vector<std::vector<int> > &internals) const CV_OVERRIDE
468 std::vector<int> outShape(4);
469 outShape[0] = inputs[0][0]; // batch size
470 outShape[1] = inputs[0][1]; // number of channels
471 outShape[2] = scale * inputs[0][2];
472 outShape[3] = scale * inputs[0][3];
473 outputs.assign(1, outShape);
477 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays) CV_OVERRIDE
480 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
482 std::vector<Mat> inputs, outputs;
483 inputs_arr.getMatVector(inputs);
484 outputs_arr.getMatVector(outputs);
486 Mat& inp = inputs[0];
487 Mat& out = outputs[0];
488 const int outHeight = out.size[2];
489 const int outWidth = out.size[3];
490 for (size_t n = 0; n < inp.size[0]; ++n)
492 for (size_t ch = 0; ch < inp.size[1]; ++ch)
494 resize(getPlane(inp, n, ch), getPlane(out, n, ch),
495 Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
504 TEST_P(Test_Torch_layers, upsampling_nearest)
506 // Test a custom layer.
507 CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer);
510 runTorchNet("net_spatial_upsampling_nearest", "", false, true);
514 LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
517 LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
519 // Test an implemented layer.
520 runTorchNet("net_spatial_upsampling_nearest", "", false, true);
523 INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, dnnBackendsAndTargets());