modules/dnn/test/test_torch_importer.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of the copyright holders may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "test_precomp.hpp"
  43 #include "npy_blob.hpp"
  44 #include <opencv2/dnn/shape_utils.hpp>
  45 #include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
  46
  47 namespace opencv_test
  48 {
  49
  50 using namespace std;
  51 using namespace testing;
  52 using namespace cv;
  53 using namespace cv::dnn;
  54
  55 template<typename TStr>
  56 static std::string _tf(TStr filename, bool inTorchDir = true, bool required = true)
  57 {
  58     String path = "dnn/";
  59     if (inTorchDir)
  60         path += "torch/";
  61     path += filename;
  62     return findDataFile(path, required);
  63 }
  64
  65 TEST(Torch_Importer, simple_read)
  66 {
  67     Net net;
  68     ASSERT_NO_THROW(net = readNetFromTorch(_tf("net_simple_net.txt"), false));
  69     ASSERT_FALSE(net.empty());
  70 }
  71
  72 class Test_Torch_layers : public DNNTestLayer
  73 {
  74 public:
  75     void runTorchNet(const String& prefix, String outLayerName = "",
  76                      bool check2ndBlob = false, bool isBinary = false, bool evaluate = true,
  77                      double l1 = 0.0, double lInf = 0.0)
  78     {
  79         String suffix = (isBinary) ? ".dat" : ".txt";
  80
  81         Mat inp, outRef;
  82         ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );
  83         ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );
  84
  85         checkBackend(backend, target, &inp, &outRef);
  86
  87         Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary, evaluate);
  88         ASSERT_FALSE(net.empty());
  89
  90         net.setPreferableBackend(backend);
  91         net.setPreferableTarget(target);
  92
  93         if (outLayerName.empty())
  94             outLayerName = net.getLayerNames().back();
  95
  96         net.setInput(inp);
  97         std::vector<Mat> outBlobs;
  98         net.forward(outBlobs, outLayerName);
  99         l1 = l1 ? l1 : default_l1;
 100         lInf = lInf ? lInf : default_lInf;
 101         normAssert(outRef, outBlobs[0], "", l1, lInf);
 102
 103         if (check2ndBlob && backend != DNN_BACKEND_INFERENCE_ENGINE)
 104         {
 105             Mat out2 = outBlobs[1];
 106             Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary);
 107             normAssert(out2, ref2, "", l1, lInf);
 108         }
 109     }
 110 };
 111
 112 TEST_P(Test_Torch_layers, run_convolution)
 113 {
 114     // Output reference values are in range [23.4018, 72.0181]
 115     double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.08 : default_l1;
 116     double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.42 : default_lInf;
 117     runTorchNet("net_conv", "", false, true, true, l1, lInf);
 118 }
 119
 120 TEST_P(Test_Torch_layers, run_pool_max)
 121 {
 122     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 123         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 124     runTorchNet("net_pool_max", "", true);
 125 }
 126
 127 TEST_P(Test_Torch_layers, run_pool_ave)
 128 {
 129     runTorchNet("net_pool_ave");
 130 }
 131
 132 TEST_P(Test_Torch_layers, run_reshape_change_batch_size)
 133 {
 134     runTorchNet("net_reshape");
 135 }
 136
 137 TEST_P(Test_Torch_layers, run_reshape)
 138 {
 139     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
 140         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
 141     runTorchNet("net_reshape_batch");
 142     runTorchNet("net_reshape_channels", "", false, true);
 143 }
 144
 145 TEST_P(Test_Torch_layers, run_reshape_single_sample)
 146 {
 147     // Reference output values in range [14.4586, 18.4492].
 148     runTorchNet("net_reshape_single_sample", "", false, false, true,
 149                 (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.033 : default_l1,
 150                 (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.05 : default_lInf);
 151 }
 152
 153 TEST_P(Test_Torch_layers, run_linear)
 154 {
 155     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 156         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 157     runTorchNet("net_linear_2d");
 158 }
 159
 160 TEST_P(Test_Torch_layers, run_concat)
 161 {
 162     runTorchNet("net_concat", "l5_torchMerge");
 163 }
 164
 165 TEST_P(Test_Torch_layers, run_depth_concat)
 166 {
 167     runTorchNet("net_depth_concat", "", false, true, true, 0.0,
 168                 target == DNN_TARGET_OPENCL_FP16 ? 0.021 : 0.0);
 169 }
 170
 171 TEST_P(Test_Torch_layers, run_deconv)
 172 {
 173     runTorchNet("net_deconv");
 174 }
 175
 176 TEST_P(Test_Torch_layers, run_batch_norm)
 177 {
 178     runTorchNet("net_batch_norm", "", false, true);
 179     runTorchNet("net_batch_norm_train", "", false, true, false);
 180 }
 181
 182 TEST_P(Test_Torch_layers, net_prelu)
 183 {
 184     runTorchNet("net_prelu");
 185 }
 186
 187 TEST_P(Test_Torch_layers, net_cadd_table)
 188 {
 189     runTorchNet("net_cadd_table");
 190 }
 191
 192 TEST_P(Test_Torch_layers, net_softmax)
 193 {
 194     runTorchNet("net_softmax");
 195     runTorchNet("net_softmax_spatial");
 196 }
 197
 198 TEST_P(Test_Torch_layers, net_logsoftmax)
 199 {
 200     runTorchNet("net_logsoftmax");
 201     runTorchNet("net_logsoftmax_spatial");
 202 }
 203
 204 TEST_P(Test_Torch_layers, net_lp_pooling)
 205 {
 206     runTorchNet("net_lp_pooling_square", "", false, true);
 207     runTorchNet("net_lp_pooling_power", "", false, true);
 208 }
 209
 210 TEST_P(Test_Torch_layers, net_conv_gemm_lrn)
 211 {
 212     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
 213         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
 214     runTorchNet("net_conv_gemm_lrn", "", false, true, true,
 215                 target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0,
 216                 target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0);
 217 }
 218
 219 TEST_P(Test_Torch_layers, net_inception_block)
 220 {
 221     runTorchNet("net_inception_block", "", false, true);
 222 }
 223
 224 TEST_P(Test_Torch_layers, net_normalize)
 225 {
 226     if(backend == DNN_BACKEND_CUDA)
 227         applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); /* only L1 and L2 norms are supported */
 228     runTorchNet("net_normalize", "", false, true);
 229 }
 230
 231 TEST_P(Test_Torch_layers, net_padding)
 232 {
 233     runTorchNet("net_padding", "", false, true);
 234     runTorchNet("net_spatial_zero_padding", "", false, true);
 235     runTorchNet("net_spatial_reflection_padding", "", false, true);
 236 }
 237
 238 TEST_P(Test_Torch_layers, net_non_spatial)
 239 {
 240     if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
 241         (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
 242         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
 243     runTorchNet("net_non_spatial", "", false, true);
 244 }
 245
 246 TEST_P(Test_Torch_layers, run_paralel)
 247 {
 248     if (backend != DNN_BACKEND_OPENCV || target != DNN_TARGET_CPU)
 249         throw SkipTestException("");  // TODO: Check this
 250     runTorchNet("net_parallel", "l5_torchMerge");
 251 }
 252
 253 TEST_P(Test_Torch_layers, net_residual)
 254 {
 255 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
 256     if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL ||
 257                                                     target == DNN_TARGET_OPENCL_FP16))
 258         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
 259 #endif
 260     runTorchNet("net_residual", "", false, true);
 261 }
 262
 263 class Test_Torch_nets : public DNNTestLayer {};
 264
 265 TEST_P(Test_Torch_nets, OpenFace_accuracy)
 266 {
 267 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
 268     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
 269         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
 270 #endif
 271     checkBackend();
 272
 273     const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false);
 274     Net net = readNetFromTorch(model);
 275
 276     net.setPreferableBackend(backend);
 277     net.setPreferableTarget(target);
 278
 279     Mat sample = imread(findDataFile("cv/shared/lena.png"));
 280     Mat sampleF32(sample.size(), CV_32FC3);
 281     sample.convertTo(sampleF32, sampleF32.type());
 282     sampleF32 /= 255;
 283     resize(sampleF32, sampleF32, Size(96, 96), 0, 0, INTER_NEAREST);
 284
 285     Mat inputBlob = blobFromImage(sampleF32, 1.0, Size(), Scalar(), /*swapRB*/true);
 286
 287     net.setInput(inputBlob);
 288     Mat out = net.forward();
 289
 290     // Reference output values are in range [-0.17212, 0.263492]
 291     // on Myriad problem layer: l4_Pooling - does not use pads_begin
 292     float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 2e-3 : 1e-5;
 293     float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5e-3 : 1e-3;
 294     Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true);
 295     normAssert(out, outRef, "", l1, lInf);
 296 }
 297
 298 static Mat getSegmMask(const Mat& scores)
 299 {
 300     const int rows = scores.size[2];
 301     const int cols = scores.size[3];
 302     const int numClasses = scores.size[1];
 303
 304     Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
 305     Mat maxVal(rows, cols, CV_32FC1, Scalar(0));
 306     for (int ch = 0; ch < numClasses; ch++)
 307     {
 308         for (int row = 0; row < rows; row++)
 309         {
 310             const float *ptrScore = scores.ptr<float>(0, ch, row);
 311             uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);
 312             float *ptrMaxVal = maxVal.ptr<float>(row);
 313             for (int col = 0; col < cols; col++)
 314             {
 315                 if (ptrScore[col] > ptrMaxVal[col])
 316                 {
 317                     ptrMaxVal[col] = ptrScore[col];
 318                     ptrMaxCl[col] = (uchar)ch;
 319                 }
 320             }
 321         }
 322     }
 323     return maxCl;
 324 }
 325
 326 // Computer per-class intersection over union metric.
 327 static void normAssertSegmentation(const Mat& ref, const Mat& test)
 328 {
 329     CV_Assert_N(ref.dims == 4, test.dims == 4);
 330     const int numClasses = ref.size[1];
 331     CV_Assert(numClasses == test.size[1]);
 332
 333     Mat refMask = getSegmMask(ref);
 334     Mat testMask = getSegmMask(test);
 335     EXPECT_EQ(countNonZero(refMask != testMask), 0);
 336 }
 337
 338 TEST_P(Test_Torch_nets, ENet_accuracy)
 339 {
 340     applyTestTag(target == DNN_TARGET_CPU ? "" : CV_TEST_TAG_MEMORY_512MB);
 341     checkBackend();
 342     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 343         throw SkipTestException("");
 344     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
 345     {
 346         if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
 347         if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL);
 348         if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
 349         throw SkipTestException("");
 350     }
 351
 352     Net net;
 353     {
 354         const string model = findDataFile("dnn/Enet-model-best.net", false);
 355         net = readNetFromTorch(model, true);
 356         ASSERT_TRUE(!net.empty());
 357     }
 358
 359     net.setPreferableBackend(backend);
 360     net.setPreferableTarget(target);
 361
 362     Mat sample = imread(_tf("street.png", false));
 363     Mat inputBlob = blobFromImage(sample, 1./255, Size(), Scalar(), /*swapRB*/true);
 364
 365     net.setInput(inputBlob, "");
 366     Mat out = net.forward();
 367     Mat ref = blobFromNPY(_tf("torch_enet_prob.npy", false));
 368     // Due to numerical instability in Pooling-Unpooling layers (indexes jittering)
 369     // thresholds for ENet must be changed. Accuracy of results was checked on
 370     // Cityscapes dataset and difference in mIOU with Torch is 10E-4%
 371     normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.552);
 372     normAssertSegmentation(ref, out);
 373
 374     const int N = 3;
 375     for (int i = 0; i < N; i++)
 376     {
 377         net.setInput(inputBlob, "");
 378         Mat out = net.forward();
 379         normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.552);
 380         normAssertSegmentation(ref, out);
 381     }
 382 }
 383
 384 // Check accuracy of style transfer models from https://github.com/jcjohnson/fast-neural-style
 385 // th fast_neural_style.lua \
 386 //   -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
 387 //   -output_image lena.png \
 388 //   -median_filter 0 \
 389 //   -image_size 0 \
 390 //   -model models/eccv16/starry_night.t7
 391 // th fast_neural_style.lua \
 392 //   -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
 393 //   -output_image lena.png \
 394 //   -median_filter 0 \
 395 //   -image_size 0 \
 396 //   -model models/instance_norm/feathers.t7
 397 TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
 398 {
 399 #if defined INF_ENGINE_RELEASE
 400     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
 401             && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
 402         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
 403 #endif
 404
 405     checkBackend();
 406
 407 #if defined(INF_ENGINE_RELEASE)
 408 #if INF_ENGINE_RELEASE <= 2018050000
 409     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
 410         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2018R5);
 411 #endif
 412 #endif
 413
 414     std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
 415                             "dnn/fast_neural_style_instance_norm_feathers.t7"};
 416     std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
 417
 418     for (int i = 0; i < 2; ++i)
 419     {
 420         const string model = findDataFile(models[i], false);
 421         Net net = readNetFromTorch(model);
 422
 423         net.setPreferableBackend(backend);
 424         net.setPreferableTarget(target);
 425
 426         Mat img = imread(findDataFile("dnn/googlenet_1.png"));
 427         Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
 428
 429         net.setInput(inputBlob);
 430         Mat out = net.forward();
 431
 432         // Deprocessing.
 433         getPlane(out, 0, 0) += 103.939;
 434         getPlane(out, 0, 1) += 116.779;
 435         getPlane(out, 0, 2) += 123.68;
 436         out = cv::min(cv::max(0, out), 255);
 437
 438         Mat ref = imread(findDataFile(targets[i]));
 439         Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);
 440
 441         if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
 442         {
 443             double normL1 = cvtest::norm(refBlob, out, cv::NORM_L1) / refBlob.total();
 444             if (target == DNN_TARGET_MYRIAD)
 445                 EXPECT_LE(normL1, 4.0f);
 446             else
 447                 EXPECT_LE(normL1, 0.6f);
 448         }
 449         else
 450             normAssert(out, refBlob, "", 0.5, 1.1);
 451     }
 452 }
 453
 454 INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, dnnBackendsAndTargets());
 455
 456 // Test a custom layer
 457 // https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest
 458 class SpatialUpSamplingNearestLayer CV_FINAL : public Layer
 459 {
 460 public:
 461     SpatialUpSamplingNearestLayer(const LayerParams &params) : Layer(params)
 462     {
 463         scale = params.get<int>("scale_factor");
 464     }
 465
 466     static Ptr<Layer> create(LayerParams& params)
 467     {
 468         return Ptr<Layer>(new SpatialUpSamplingNearestLayer(params));
 469     }
 470
 471     virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
 472                                  const int requiredOutputs,
 473                                  std::vector<std::vector<int> > &outputs,
 474                                  std::vector<std::vector<int> > &internals) const CV_OVERRIDE
 475     {
 476         std::vector<int> outShape(4);
 477         outShape[0] = inputs[0][0];  // batch size
 478         outShape[1] = inputs[0][1];  // number of channels
 479         outShape[2] = scale * inputs[0][2];
 480         outShape[3] = scale * inputs[0][3];
 481         outputs.assign(1, outShape);
 482         return false;
 483     }
 484
 485     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays) CV_OVERRIDE
 486     {
 487         CV_TRACE_FUNCTION();
 488         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 489
 490         std::vector<Mat> inputs, outputs;
 491         inputs_arr.getMatVector(inputs);
 492         outputs_arr.getMatVector(outputs);
 493
 494         Mat& inp = inputs[0];
 495         Mat& out = outputs[0];
 496         const int outHeight = out.size[2];
 497         const int outWidth = out.size[3];
 498         for (size_t n = 0; n < inp.size[0]; ++n)
 499         {
 500             for (size_t ch = 0; ch < inp.size[1]; ++ch)
 501             {
 502                 resize(getPlane(inp, n, ch), getPlane(out, n, ch),
 503                        Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
 504             }
 505         }
 506     }
 507
 508 private:
 509     int scale;
 510 };
 511
 512 TEST_P(Test_Torch_layers, upsampling_nearest)
 513 {
 514     // Test a custom layer.
 515     CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer);
 516     try
 517     {
 518         runTorchNet("net_spatial_upsampling_nearest", "", false, true);
 519     }
 520     catch (...)
 521     {
 522         LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
 523         throw;
 524     }
 525     LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
 526
 527     // Test an implemented layer.
 528     runTorchNet("net_spatial_upsampling_nearest", "", false, true);
 529 }
 530
 531 INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, dnnBackendsAndTargets());
 532
 533 }