modules/dnn/test/test_layers.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of the copyright holders may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "test_precomp.hpp"
  43 #include <opencv2/core/ocl.hpp>
  44 #include "npy_blob.hpp"
  45 #include <opencv2/dnn/shape_utils.hpp>
  46 #include <opencv2/dnn/all_layers.hpp>
  47 #include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
  48
  49 #ifdef HAVE_INF_ENGINE
  50 #include <thread>
  51 #endif
  52
  53 namespace opencv_test { namespace {
  54
  55 template<typename TString>
  56 static String _tf(TString filename)
  57 {
  58     String basetestdir = getOpenCVExtraDir();
  59     size_t len = basetestdir.size();
  60     if(len > 0 && basetestdir[len-1] != '/' && basetestdir[len-1] != '\\')
  61         return (basetestdir + "/dnn/layers") + filename;
  62     return (basetestdir + "dnn/layers/") + filename;
  63 }
  64
  65 void runLayer(Ptr<Layer> layer, std::vector<Mat> &inpBlobs, std::vector<Mat> &outBlobs)
  66 {
  67     size_t ninputs = inpBlobs.size();
  68     std::vector<Mat> inp(ninputs), outp, intp;
  69     std::vector<MatShape> inputs, outputs, internals;
  70
  71     for (size_t i = 0; i < ninputs; i++)
  72     {
  73         inp[i] = inpBlobs[i].clone();
  74         inputs.push_back(shape(inp[i]));
  75     }
  76
  77     layer->getMemoryShapes(inputs, 0, outputs, internals);
  78     for (size_t i = 0; i < outputs.size(); i++)
  79     {
  80         outp.push_back(Mat(outputs[i], CV_32F));
  81     }
  82     for (size_t i = 0; i < internals.size(); i++)
  83     {
  84         intp.push_back(Mat(internals[i], CV_32F));
  85     }
  86
  87     layer->finalize(inp, outp);
  88     layer->forward(inp, outp, intp);
  89
  90     size_t noutputs = outp.size();
  91     outBlobs.resize(noutputs);
  92     for (size_t i = 0; i < noutputs; i++)
  93         outBlobs[i] = outp[i];
  94 }
  95
  96 class Test_Caffe_layers : public DNNTestLayer
  97 {
  98 public:
  99     void testLayerUsingCaffeModels(const String& basename, bool useCaffeModel = false,
 100                                    bool useCommonInputBlob = true, double l1 = 0.0, double lInf = 0.0,
 101                                    int numInps = 1, int numOuts = 1)
 102     {
 103         CV_Assert_N(numInps >= 1, numInps <= 10, numOuts >= 1, numOuts <= 10);
 104         String prototxt = _tf(basename + ".prototxt");
 105         String caffemodel = _tf(basename + ".caffemodel");
 106
 107         std::vector<Mat> inps, refs, outs;
 108
 109         if (numInps > 1)
 110         {
 111             for (int i = 0; i < numInps; i++)
 112             {
 113                 String inpfile = _tf(basename + ".input_" + (i + '0') + ".npy");
 114                 inps.push_back(blobFromNPY(inpfile));
 115             }
 116         }
 117         else
 118         {
 119             String inpfile = (useCommonInputBlob) ? _tf("blob.npy") : _tf(basename + ".input.npy");
 120             inps.push_back(blobFromNPY(inpfile));
 121         }
 122
 123         if (numOuts > 1)
 124         {
 125             for (int i = 0; i < numOuts; i++)
 126             {
 127                 String outfile = _tf(basename + "_" + (i + '0') + ".npy");
 128                 refs.push_back(blobFromNPY(outfile));
 129             }
 130         }
 131         else
 132         {
 133             String outfile = _tf(basename + ".npy");
 134             refs.push_back(blobFromNPY(outfile));
 135         }
 136
 137         Net net = readNetFromCaffe(prototxt, (useCaffeModel) ? caffemodel : String());
 138         ASSERT_FALSE(net.empty());
 139         checkBackend(&inps[0], &refs[0]);
 140
 141         net.setPreferableBackend(backend);
 142         net.setPreferableTarget(target);
 143
 144         String inp_name = "input";
 145         if (numInps > 1)
 146         {
 147             for (int i = 0; i < numInps; i++)
 148             {
 149                 net.setInput(inps[i], inp_name + "_" + (i + '0'));
 150             }
 151         }
 152         else
 153         {
 154             net.setInput(inps.back(), inp_name);
 155         }
 156
 157         net.forward(outs);
 158         for (int i = 0; i < refs.size(); i++)
 159         {
 160             normAssert(refs[i], outs[i], "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
 161         }
 162     }
 163 };
 164
 165 TEST_P(Test_Caffe_layers, Softmax)
 166 {
 167     testLayerUsingCaffeModels("layer_softmax");
 168 }
 169
 170 TEST_P(Test_Caffe_layers, LRN)
 171 {
 172     testLayerUsingCaffeModels("layer_lrn_spatial");
 173     testLayerUsingCaffeModels("layer_lrn_channels");
 174 }
 175
 176 TEST_P(Test_Caffe_layers, Convolution)
 177 {
 178     testLayerUsingCaffeModels("layer_convolution", true);
 179 }
 180
 181 TEST_P(Test_Caffe_layers, DeConvolution)
 182 {
 183     testLayerUsingCaffeModels("layer_deconvolution", true, false);
 184 }
 185
 186 TEST_P(Test_Caffe_layers, InnerProduct)
 187 {
 188     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
 189         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
 190     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
 191         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 192
 193     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 194         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 195     testLayerUsingCaffeModels("layer_inner_product", true);
 196 }
 197
 198 TEST_P(Test_Caffe_layers, Pooling_max)
 199 {
 200     testLayerUsingCaffeModels("layer_pooling_max");
 201 }
 202
 203 TEST_P(Test_Caffe_layers, Pooling_ave)
 204 {
 205     testLayerUsingCaffeModels("layer_pooling_ave");
 206 }
 207
 208 TEST_P(Test_Caffe_layers, MVN)
 209 {
 210     testLayerUsingCaffeModels("layer_mvn");
 211 }
 212
 213 void testReshape(const MatShape& inputShape, const MatShape& targetShape,
 214                  int axis = 0, int num_axes = -1,
 215                  MatShape mask = MatShape())
 216 {
 217     LayerParams params;
 218     params.set("axis", axis);
 219     params.set("num_axes", num_axes);
 220     if (!mask.empty())
 221     {
 222         params.set("dim", DictValue::arrayInt<int*>(&mask[0], mask.size()));
 223     }
 224
 225     Mat inp(inputShape.size(), &inputShape[0], CV_32F);
 226     std::vector<Mat> inpVec(1, inp);
 227     std::vector<Mat> outVec, intVec;
 228
 229     Ptr<Layer> rl = LayerFactory::createLayerInstance("Reshape", params);
 230     runLayer(rl, inpVec, outVec);
 231
 232     Mat& out = outVec[0];
 233     MatShape shape(out.size.p, out.size.p + out.dims);
 234     EXPECT_EQ(shape, targetShape);
 235 }
 236
 237 TEST(Layer_Test_Reshape, Accuracy)
 238 {
 239     {
 240         int inp[] = {4, 3, 1, 2};
 241         int out[] = {4, 3, 2};
 242         testReshape(MatShape(inp, inp + 4), MatShape(out, out + 3), 2, 1);
 243     }
 244     {
 245         int inp[] = {1, 128, 4, 4};
 246         int out[] = {1, 2048};
 247         int mask[] = {-1, 2048};
 248         testReshape(MatShape(inp, inp + 4), MatShape(out, out + 2), 0, -1,
 249                     MatShape(mask, mask + 2));
 250     }
 251     {
 252         int inp[] = {1, 2, 3};
 253         int out[] = {3, 1, 2};
 254         int mask[] = {3, 1, 2};
 255         testReshape(MatShape(inp, inp + 3), MatShape(out, out + 3), 0, -1,
 256                     MatShape(mask, mask + 3));
 257     }
 258 }
 259
 260 TEST_P(Test_Caffe_layers, BatchNorm)
 261 {
 262     testLayerUsingCaffeModels("layer_batch_norm", true);
 263     testLayerUsingCaffeModels("layer_batch_norm_local_stats", true, false);
 264 }
 265
 266 TEST_P(Test_Caffe_layers, ReLU)
 267 {
 268     testLayerUsingCaffeModels("layer_relu");
 269 }
 270
 271 TEST_P(Test_Caffe_layers, Dropout)
 272 {
 273     testLayerUsingCaffeModels("layer_dropout");
 274 }
 275
 276 TEST_P(Test_Caffe_layers, Concat)
 277 {
 278 #if defined(INF_ENGINE_RELEASE)
 279 #if INF_ENGINE_VER_MAJOR_GE(2019010000) && INF_ENGINE_VER_MAJOR_LT(2019020000)
 280     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
 281         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 282 #elif INF_ENGINE_VER_MAJOR_EQ(2019020000)
 283     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
 284         (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
 285         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
 286                      CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 287 #endif
 288
 289     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH &&
 290         (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
 291         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
 292                      CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 293
 294 #endif
 295     testLayerUsingCaffeModels("layer_concat");
 296     testLayerUsingCaffeModels("layer_concat_optim", true, false);
 297     testLayerUsingCaffeModels("layer_concat_shared_input", true, false);
 298 }
 299
 300 TEST_P(Test_Caffe_layers, Fused_Concat)
 301 {
 302     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
 303         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
 304                      CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 305
 306     checkBackend();
 307
 308     // Test case
 309     // input
 310     //   |
 311     //   v
 312     // some_layer
 313     // |   |
 314     // v   v
 315     // concat
 316     Net net;
 317     int interLayer;
 318     {
 319         LayerParams lp;
 320         lp.type = "AbsVal";
 321         lp.name = "someLayer";
 322         interLayer = net.addLayerToPrev(lp.name, lp.type, lp);
 323     }
 324     {
 325         LayerParams lp;
 326         lp.set("axis", 1);
 327         lp.type = "Concat";
 328         lp.name = "testConcat";
 329         int id = net.addLayer(lp.name, lp.type, lp);
 330         net.connect(interLayer, 0, id, 0);
 331         net.connect(interLayer, 0, id, 1);
 332     }
 333     int shape[] = {1, 2, 3, 4};
 334     Mat input(4, shape, CV_32F);
 335     randu(input, 0.0f, 1.0f);  // [0, 1] to make AbsVal an identity transformation.
 336
 337     net.setInput(input);
 338     net.setPreferableBackend(backend);
 339     net.setPreferableTarget(target);
 340     Mat out = net.forward();
 341
 342     normAssert(slice(out, Range::all(), Range(0, 2), Range::all(), Range::all()), input, "", default_l1, default_lInf);
 343     normAssert(slice(out, Range::all(), Range(2, 4), Range::all(), Range::all()), input, "", default_l1, default_lInf);
 344 }
 345
 346 TEST_P(Test_Caffe_layers, Eltwise)
 347 {
 348     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
 349         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
 350     testLayerUsingCaffeModels("layer_eltwise");
 351 }
 352
 353 TEST_P(Test_Caffe_layers, PReLU)
 354 {
 355     double lInf = (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.021 : 0.0;
 356     testLayerUsingCaffeModels("layer_prelu", true, true, 0.0, lInf);
 357 }
 358
 359 // TODO: fix an unstable test case
 360 TEST_P(Test_Caffe_layers, layer_prelu_fc)
 361 {
 362     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 363         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 364     // Reference output values are in range [-0.0001, 10.3906]
 365     double l1 = (target == DNN_TARGET_MYRIAD) ? 0.005 : 0.0;
 366     double lInf = (target == DNN_TARGET_MYRIAD) ? 0.021 : 0.0;
 367 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000)
 368     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
 369     {
 370         l1 = 0.006f; lInf = 0.05f;
 371     }
 372     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
 373     {
 374         l1 = 0.01f; lInf = 0.05f;
 375     }
 376 #endif
 377     testLayerUsingCaffeModels("layer_prelu_fc", true, false, l1, lInf);
 378 }
 379
 380 TEST_P(Test_Caffe_layers, Reshape_Split_Slice)
 381 {
 382     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
 383         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
 384     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
 385         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 386
 387     Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt"));
 388     ASSERT_FALSE(net.empty());
 389
 390     net.setPreferableBackend(backend);
 391     net.setPreferableTarget(target);
 392
 393     Mat input(6, 12, CV_32F);
 394     RNG rng(0);
 395     rng.fill(input, RNG::UNIFORM, -1, 1);
 396
 397     net.setInput(input, "input");
 398     Mat output = net.forward("output");
 399
 400     normAssert(input, output, "", default_l1, default_lInf);
 401 }
 402
 403 TEST_P(Test_Caffe_layers, Conv_Elu)
 404 {
 405 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE <= 2018050000
 406     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
 407         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 408 #endif
 409
 410     Net net = readNetFromTensorflow(_tf("layer_elu_model.pb"));
 411     ASSERT_FALSE(net.empty());
 412
 413     Mat inp = blobFromNPY(_tf("layer_elu_in.npy"));
 414     Mat ref = blobFromNPY(_tf("layer_elu_out.npy"));
 415
 416     net.setInput(inp, "input");
 417     net.setPreferableBackend(backend);
 418     net.setPreferableTarget(target);
 419     Mat out = net.forward();
 420
 421     normAssert(ref, out, "", default_l1, default_lInf);
 422 }
 423
 424 class Layer_LSTM_Test : public ::testing::Test
 425 {
 426 public:
 427     int numInp, numOut;
 428     Mat Wh, Wx, b;
 429     Ptr<LSTMLayer> layer;
 430     std::vector<Mat> inputs, outputs;
 431
 432     Layer_LSTM_Test() {}
 433
 434     void init(const MatShape &inpShape_, const MatShape &outShape_,
 435               bool produceCellOutput, bool useTimestampDim)
 436     {
 437         numInp = total(inpShape_);
 438         numOut = total(outShape_);
 439
 440         Wh = Mat::ones(4 * numOut, numOut, CV_32F);
 441         Wx = Mat::ones(4 * numOut, numInp, CV_32F);
 442         b  = Mat::ones(4 * numOut, 1, CV_32F);
 443
 444         LayerParams lp;
 445         lp.blobs.resize(3);
 446         lp.blobs[0] = Wh;
 447         lp.blobs[1] = Wx;
 448         lp.blobs[2] = b;
 449         lp.set<bool>("produce_cell_output", produceCellOutput);
 450         lp.set<bool>("use_timestamp_dim", useTimestampDim);
 451
 452         layer = LSTMLayer::create(lp);
 453         layer->setOutShape(outShape_);
 454     }
 455 };
 456
 457 TEST_F(Layer_LSTM_Test, get_set_test)
 458 {
 459     const int TN = 4;
 460     MatShape inpShape = shape(5, 3, 2);
 461     MatShape outShape = shape(3, 1, 2);
 462     MatShape inpResShape = concat(shape(TN), inpShape);
 463     MatShape outResShape = concat(shape(TN), outShape);
 464
 465     init(inpShape, outShape, true, false);
 466     layer->setOutShape(outShape);
 467
 468     Mat C((int)outResShape.size(), &outResShape[0], CV_32F);
 469     randu(C, -1., 1.);
 470     Mat H = C.clone();
 471     randu(H, -1., 1.);
 472
 473     Mat inp((int)inpResShape.size(), &inpResShape[0], CV_32F);
 474     randu(inp, -1., 1.);
 475
 476     inputs.push_back(inp);
 477     runLayer(layer, inputs, outputs);
 478
 479     EXPECT_EQ(2u, outputs.size());
 480
 481     print(outResShape, "outResShape");
 482     print(shape(outputs[0]), "out0");
 483     print(shape(outputs[0]), "out1");
 484
 485     EXPECT_EQ(outResShape, shape(outputs[0]));
 486     EXPECT_EQ(outResShape, shape(outputs[1]));
 487
 488     EXPECT_EQ(0, layer->inputNameToIndex("x"));
 489     EXPECT_EQ(0, layer->outputNameToIndex("h"));
 490     EXPECT_EQ(1, layer->outputNameToIndex("c"));
 491 }
 492
 493 TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
 494 {
 495     LayerParams lp;
 496     lp.blobs.resize(3);
 497     lp.blobs[0] = blobFromNPY(_tf("lstm.prototxt.w_2.npy"));  // Wh
 498     lp.blobs[1] = blobFromNPY(_tf("lstm.prototxt.w_0.npy"));  // Wx
 499     lp.blobs[2] = blobFromNPY(_tf("lstm.prototxt.w_1.npy"));  // bias
 500     Ptr<LSTMLayer> layer = LSTMLayer::create(lp);
 501
 502     Mat inp = blobFromNPY(_tf("recurrent.input.npy"));
 503     std::vector<Mat> inputs(1, inp), outputs;
 504     runLayer(layer, inputs, outputs);
 505
 506     Mat h_t_reference = blobFromNPY(_tf("lstm.prototxt.h_1.npy"));
 507     normAssert(h_t_reference, outputs[0]);
 508 }
 509
 510 TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
 511 {
 512     Ptr<RNNLayer> layer = RNNLayer::create(LayerParams());
 513
 514     layer->setWeights(
 515                 blobFromNPY(_tf("rnn.prototxt.w_0.npy")),
 516                 blobFromNPY(_tf("rnn.prototxt.w_1.npy")),
 517                 blobFromNPY(_tf("rnn.prototxt.w_2.npy")),
 518                 blobFromNPY(_tf("rnn.prototxt.w_3.npy")),
 519                 blobFromNPY(_tf("rnn.prototxt.w_4.npy")) );
 520
 521     std::vector<Mat> output, input(1, blobFromNPY(_tf("recurrent.input.npy")));
 522     runLayer(layer, input, output);
 523
 524     Mat h_ref = blobFromNPY(_tf("rnn.prototxt.h_1.npy"));
 525     normAssert(h_ref, output[0]);
 526 }
 527
 528 TEST(Layer_LSTM_Test_Accuracy_, Reverse)
 529 {
 530     // This handcrafted setup calculates (approximately) the prefix sum of the
 531     // input, assuming the inputs are suitably small.
 532     cv::Mat input(2, 1, CV_32FC1);
 533     input.at<float>(0, 0) = 1e-5f;
 534     input.at<float>(1, 0) = 2e-5f;
 535
 536     cv::Mat Wx(4, 1, CV_32FC1);
 537     Wx.at<float>(0, 0) = 0.f;  // Input gate
 538     Wx.at<float>(1, 0) = 0.f;  // Forget gate
 539     Wx.at<float>(2, 0) = 0.f;  // Output gate
 540     Wx.at<float>(3, 0) = 1.f;  // Update signal
 541
 542     cv::Mat Wh(4, 1, CV_32FC1);
 543     Wh.at<float>(0, 0) = 0.f;  // Input gate
 544     Wh.at<float>(1, 0) = 0.f;  // Forget gate
 545     Wh.at<float>(2, 0) = 0.f;  // Output gate
 546     Wh.at<float>(3, 0) = 0.f;  // Update signal
 547
 548     cv::Mat bias(4, 1, CV_32FC1);
 549     bias.at<float>(0, 0) = 1e10f;  // Input gate - always allows input to c
 550     bias.at<float>(1, 0) = 1e10f;  // Forget gate - never forget anything on c
 551     bias.at<float>(2, 0) = 1e10f;  // Output gate - always output everything
 552     bias.at<float>(3, 0) = 0.f;  // Update signal
 553
 554     LayerParams lp;
 555     lp.set("reverse", true);
 556     lp.set("use_timestamp_dim", true);
 557     lp.blobs.clear();
 558     lp.blobs.push_back(Wh);
 559     lp.blobs.push_back(Wx);
 560     lp.blobs.push_back(bias);
 561
 562     cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp);
 563     std::vector<cv::Mat> outputs;
 564     std::vector<cv::Mat> inputs;
 565     inputs.push_back(input);
 566     runLayer(layer, inputs, outputs);
 567
 568     ASSERT_EQ(1, outputs.size());
 569     cv::Mat out = outputs[0];
 570     ASSERT_EQ(3, out.dims);
 571     ASSERT_EQ(shape(2, 1, 1), shape(out));
 572     float* data = reinterpret_cast<float*>(out.data);
 573     EXPECT_NEAR(std::tanh(1e-5f) + std::tanh(2e-5f), data[0], 1e-10);
 574     EXPECT_NEAR(std::tanh(2e-5f), data[1], 1e-10);
 575 }
 576
 577
 578 class Layer_RNN_Test : public ::testing::Test
 579 {
 580 public:
 581     int nX, nH, nO, nT, nS;
 582     Mat Whh, Wxh, bh, Who, bo;
 583     Ptr<RNNLayer> layer;
 584
 585     std::vector<Mat> inputs, outputs;
 586
 587     Layer_RNN_Test()
 588     {
 589         nT = 3;
 590         nS = 5;
 591         nX = 31;
 592         nH = 64;
 593         nO = 100;
 594
 595         Whh = Mat::ones(nH, nH, CV_32F);
 596         Wxh = Mat::ones(nH, nX, CV_32F);
 597         bh  = Mat::ones(nH, 1, CV_32F);
 598         Who = Mat::ones(nO, nH, CV_32F);
 599         bo  = Mat::ones(nO, 1, CV_32F);
 600
 601         layer = RNNLayer::create(LayerParams());
 602         layer->setProduceHiddenOutput(true);
 603         layer->setWeights(Wxh, bh, Whh, Who, bo);
 604     }
 605 };
 606
 607 TEST_F(Layer_RNN_Test, get_set_test)
 608 {
 609     int sz[] = { nT, nS, 1, nX };
 610     Mat inp(4, sz, CV_32F);
 611     randu(inp, -1., 1.);
 612     inputs.push_back(inp);
 613     runLayer(layer, inputs, outputs);
 614
 615     EXPECT_EQ(outputs.size(), 2u);
 616     EXPECT_EQ(shape(outputs[0]), shape(nT, nS, nO));
 617     EXPECT_EQ(shape(outputs[1]), shape(nT, nS, nH));
 618 }
 619
 620 TEST_P(Test_Caffe_layers, Accum)
 621 {
 622     if (backend == DNN_BACKEND_OPENCV && target != DNN_TARGET_CPU)
 623         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL, CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 624
 625     testLayerUsingCaffeModels("accum", false, false, 0.0, 0.0, 2);
 626     testLayerUsingCaffeModels("accum_ref", false, false, 0.0, 0.0, 2);
 627 }
 628
 629 TEST_P(Test_Caffe_layers, FlowWarp)
 630 {
 631     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 632         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 633
 634     testLayerUsingCaffeModels("flow_warp", false, false, 0.0, 0.0, 2);
 635 }
 636
 637 TEST_P(Test_Caffe_layers, ChannelNorm)
 638 {
 639     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 640         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 641     testLayerUsingCaffeModels("channel_norm", false, false);
 642 }
 643
 644 TEST_P(Test_Caffe_layers, DataAugmentation)
 645 {
 646     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 647         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 648     testLayerUsingCaffeModels("data_augmentation", true, false);
 649     testLayerUsingCaffeModels("data_augmentation_2x1", true, false);
 650     testLayerUsingCaffeModels("data_augmentation_8x6", true, false);
 651 }
 652
 653 TEST_P(Test_Caffe_layers, Resample)
 654 {
 655     if (backend != DNN_BACKEND_OPENCV)
 656         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 657     testLayerUsingCaffeModels("nearest_2inps", false, false, 0.0, 0.0, 2);
 658     testLayerUsingCaffeModels("nearest", false, false);
 659 }
 660
 661 TEST_P(Test_Caffe_layers, Correlation)
 662 {
 663     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 664         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER,
 665                      CV_TEST_TAG_DNN_SKIP_OPENCL, CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 666     testLayerUsingCaffeModels("correlation", false, false, 0.0, 0.0, 2);
 667 }
 668
 669 TEST_P(Test_Caffe_layers, Convolution2Inputs)
 670 {
 671     testLayerUsingCaffeModels("conv_2_inps", true, false, 0.0, 0.0, 2);
 672 }
 673
 674 TEST_P(Test_Caffe_layers, ROIPooling_Accuracy)
 675 {
 676     Net net = readNetFromCaffe(_tf("net_roi_pooling.prototxt"));
 677     ASSERT_FALSE(net.empty());
 678
 679     Mat inp = blobFromNPY(_tf("net_roi_pooling.input.npy"));
 680     Mat rois = blobFromNPY(_tf("net_roi_pooling.rois.npy"));
 681     Mat ref = blobFromNPY(_tf("net_roi_pooling.npy"));
 682
 683     checkBackend(&inp, &ref);
 684
 685     net.setPreferableBackend(backend);
 686     net.setPreferableTarget(target);
 687
 688     net.setInput(inp, "input");
 689     net.setInput(rois, "rois");
 690
 691     Mat out = net.forward();
 692
 693     double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-3 : 1e-5;
 694     double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-3 : 1e-4;
 695     normAssert(out, ref, "", l1, lInf);
 696 }
 697
 698 TEST_P(Test_Caffe_layers, FasterRCNN_Proposal)
 699 {
 700     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 701         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 702     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
 703         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
 704     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
 705         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 706
 707     Net net = readNetFromCaffe(_tf("net_faster_rcnn_proposal.prototxt"));
 708
 709     Mat scores = blobFromNPY(_tf("net_faster_rcnn_proposal.scores.npy"));
 710     Mat deltas = blobFromNPY(_tf("net_faster_rcnn_proposal.deltas.npy"));
 711     Mat imInfo = (Mat_<float>(1, 3) << 600, 800, 1.6f);
 712
 713     net.setInput(scores, "rpn_cls_prob_reshape");
 714     net.setInput(deltas, "rpn_bbox_pred");
 715     net.setInput(imInfo, "im_info");
 716
 717     std::vector<Mat> outs;
 718     net.setPreferableBackend(backend);
 719     net.setPreferableTarget(target);
 720     net.forward(outs, "output");
 721
 722     for (int i = 0; i < 2; ++i)
 723     {
 724         Mat ref = blobFromNPY(_tf(i == 0 ? "net_faster_rcnn_proposal.out_rois.npy" :
 725                                            "net_faster_rcnn_proposal.out_scores.npy"));
 726         const int numDets = ref.size[0];
 727         EXPECT_LE(numDets, outs[i].size[0]);
 728         normAssert(outs[i].rowRange(0, numDets), ref);
 729
 730         if (numDets < outs[i].size[0])
 731         {
 732             EXPECT_EQ(countNonZero(outs[i].rowRange(numDets, outs[i].size[0])), 0);
 733         }
 734     }
 735 }
 736
 737 typedef testing::TestWithParam<tuple<Vec4i, Vec2i, bool> > Scale_untrainable;
 738 TEST_P(Scale_untrainable, Accuracy)
 739 {
 740     Vec4i inpShapeVec = get<0>(GetParam());
 741     int axis = get<1>(GetParam())[0];
 742     int weightsDims = get<1>(GetParam())[1];
 743     bool testFusion = get<2>(GetParam());
 744     const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
 745
 746     // Create a network with two inputs. Scale layer multiplies a first input to
 747     // a second one. See http://caffe.berkeleyvision.org/tutorial/layers/scale.html
 748     Net net;
 749     // Check that this version of Scale layer won't be fused with Convolution layer.
 750     if (testFusion)
 751     {
 752         LayerParams lp;
 753         lp.set("kernel_size", 1);
 754         lp.set("num_output", 3);
 755         lp.set("group", 3);
 756         lp.set("bias_term", false);
 757         lp.type = "Convolution";
 758         lp.name = "testConv";
 759
 760         std::vector<int> weightsShape(4);
 761         weightsShape[0] = 3;  // #outChannels
 762         weightsShape[1] = 1;  // #inpChannels / group
 763         weightsShape[2] = 1;  // height
 764         weightsShape[3] = 1;  // width
 765         Mat weights(weightsShape, CV_32F);
 766         weights.setTo(1);
 767         lp.blobs.push_back(weights);
 768         net.addLayerToPrev(lp.name, lp.type, lp);
 769     }
 770     LayerParams lp;
 771     lp.type = "Scale";
 772     lp.name = "testLayer";
 773     lp.set("axis", axis);
 774     int id = net.addLayerToPrev(lp.name, lp.type, lp);
 775     net.connect(0, 1, id, 1);
 776
 777     Mat input(4, inpShape, CV_32F);
 778     Mat weights(weightsDims, &inpShape[axis], CV_32F);
 779     randu(input, -1, 1);
 780     randu(weights, -1, 1);
 781
 782     std::vector<String> inpNames(2);
 783     inpNames[0] = "scale_input";
 784     inpNames[1] = "scale_weights";
 785     net.setInputsNames(inpNames);
 786     net.setInput(input, inpNames[0]);
 787     net.setInput(weights, inpNames[1]);
 788     net.setPreferableBackend(DNN_BACKEND_OPENCV);
 789     Mat out = net.forward();
 790
 791     Mat ref(input.dims, input.size, CV_32F);
 792     float* inpData = (float*)input.data;
 793     float* refData = (float*)ref.data;
 794     float* weightsData = (float*)weights.data;
 795     int spatialSize = 1;
 796     for (int i = axis + weightsDims; i < 4; ++i)
 797         spatialSize *= inpShape[i];
 798     for (int i = 0; i < ref.total(); ++i)
 799     {
 800         float w = weightsData[(i / spatialSize) % weights.total()];
 801         refData[i] = inpData[i] * w;
 802     }
 803     normAssert(out, ref);
 804 }
 805
 806 INSTANTIATE_TEST_CASE_P(Layer_Test, Scale_untrainable, Combine(
 807 /*input size*/   Values(Vec4i(2, 3, 4, 5)),
 808 /*axis, #dims*/  Values(Vec2i(0, 1), Vec2i(0, 2), Vec2i(0, 3), Vec2i(0, 4),
 809                                      Vec2i(1, 1), Vec2i(1, 2), Vec2i(1, 3),
 810                                                   Vec2i(2, 1), Vec2i(2, 2),
 811                                                                Vec2i(3, 1)),
 812 /*conv fusion*/  testing::Bool()
 813 ));
 814
 815 typedef testing::TestWithParam<tuple<Vec4i, Vec4i, int, int, int> > Crop;
 816 TEST_P(Crop, Accuracy)
 817 {
 818     Vec4i inpShapeVec = get<0>(GetParam());
 819     Vec4i sizShapeVec = get<1>(GetParam());
 820     int axis = get<2>(GetParam());
 821     int numOffsets = get<3>(GetParam());
 822     int offsetVal = get<4>(GetParam());
 823     const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
 824     const int sizShape[] = {sizShapeVec[0], sizShapeVec[1], sizShapeVec[2], sizShapeVec[3]};
 825
 826     // Create a network with two inputs. Crop layer crops a first input to
 827     // the size of a second one.
 828     // See http://caffe.berkeleyvision.org/tutorial/layers/crop.html
 829     Net net;
 830
 831     LayerParams lp;
 832     lp.name = "testCrop";
 833     lp.type = "Crop";
 834     lp.set("axis", axis);
 835     if (numOffsets > 0)
 836     {
 837         std::vector<int> offsets(numOffsets, offsetVal);
 838         lp.set("offset", DictValue::arrayInt<int*>(&offsets[0], offsets.size()));
 839     }
 840     else
 841         offsetVal = 0;
 842     int id = net.addLayerToPrev(lp.name, lp.type, lp);
 843     net.connect(0, 1, id, 1);
 844
 845     Mat inpImage(4, inpShape, CV_32F);
 846     Mat sizImage(4, sizShape, CV_32F);
 847     randu(inpImage, -1, 1);
 848     randu(sizImage, -1, 1);
 849
 850     std::vector<String> inpNames(2);
 851     inpNames[0] = "cropImage";
 852     inpNames[1] = "sizImage";
 853     net.setInputsNames(inpNames);
 854     net.setInput(inpImage, inpNames[0]);
 855     net.setInput(sizImage, inpNames[1]);
 856     net.setPreferableBackend(DNN_BACKEND_OPENCV);
 857
 858     // There are a few conditions that represent invalid input to the crop
 859     // layer, so in those cases we want to verify an exception is thrown.
 860
 861     bool shouldThrowException = false;
 862     if (numOffsets > 1 && numOffsets != 4 - axis)
 863         shouldThrowException = true;
 864     else
 865         for (int i = axis; i < 4; i++)
 866             if (sizShape[i] + offsetVal > inpShape[i])
 867                 shouldThrowException = true;
 868
 869     Mat out;
 870     if (shouldThrowException)
 871     {
 872         ASSERT_ANY_THROW(out = net.forward());
 873         return;
 874     }
 875     else
 876         out = net.forward();
 877
 878     // Finally, compare the cropped output blob from the DNN layer (out)
 879     // to a reference blob (ref) that we compute here.
 880
 881     std::vector<Range> crop_range;
 882     crop_range.resize(4, Range::all());
 883     for (int i = axis; i < 4; i++)
 884         crop_range[i] = Range(offsetVal, sizShape[i] + offsetVal);
 885
 886     Mat ref(sizImage.dims, sizImage.size, CV_32F);
 887     inpImage(&crop_range[0]).copyTo(ref);
 888     normAssert(out, ref);
 889 }
 890
 891 INSTANTIATE_TEST_CASE_P(Layer_Test, Crop, Combine(
 892 /*input blob shape*/    Values(Vec4i(1, 3, 20, 30)),
 893 /*cropsize blob shape*/ Values(Vec4i(1, 3, 10, 12)),
 894 /*start axis*/          Values(0, 1, 2),
 895 /*number of offsets*/   Values(0, 1, 2, 4),
 896 /*offset value*/        Values(3, 4)
 897 ));
 898
 899 // Check that by default average pooling layer should not count zero padded values
 900 // into the normalization area.
 901 TEST_P(Test_Caffe_layers, Average_pooling_kernel_area)
 902 {
 903     LayerParams lp;
 904     lp.name = "testAvePool";
 905     lp.type = "Pooling";
 906     lp.set("kernel_size", 2);
 907     lp.set("stride", 2);
 908     lp.set("pool", "AVE");
 909
 910     Net net;
 911     net.addLayerToPrev(lp.name, lp.type, lp);
 912     // 1 2 | 3
 913     // 4 5 | 6
 914     // ----+--
 915     // 7 8 | 9
 916     Mat inp = (Mat_<float>(3, 3) << 1, 2, 3, 4, 5, 6, 7, 8, 9);
 917     Mat ref = (Mat_<float>(2, 2) << (1 + 2 + 4 + 5) / 4.f, (3 + 6) / 2.f, (7 + 8) / 2.f, 9);
 918     Mat tmp = blobFromImage(inp);
 919     net.setInput(blobFromImage(inp));
 920     net.setPreferableBackend(backend);
 921     net.setPreferableTarget(target);
 922     Mat out = net.forward();
 923     normAssert(out, blobFromImage(ref));
 924 }
 925
 926 TEST_P(Test_Caffe_layers, PriorBox_repeated)
 927 {
 928     Net net = readNet(_tf("prior_box.prototxt"));
 929     int inp_size[] = {1, 3, 10, 10};
 930     int shape_size[] = {1, 2, 3, 4};
 931     Mat inp(4, inp_size, CV_32F);
 932     randu(inp, -1.0f, 1.0f);
 933     Mat shape(4, shape_size, CV_32F);
 934     randu(shape, -1.0f, 1.0f);
 935     net.setInput(inp, "data");
 936     net.setInput(shape, "shape");
 937     net.setPreferableBackend(backend);
 938     net.setPreferableTarget(target);
 939     Mat out = net.forward();
 940     Mat ref = blobFromNPY(_tf("priorbox_output.npy"));
 941
 942     double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-3 : 1e-5;
 943     double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-3 : 1e-4;
 944     normAssert(out, ref, "", l1, lInf);
 945 }
 946
 947 // Test PriorBoxLayer in case of no aspect ratios (just squared proposals).
 948 TEST_P(Test_Caffe_layers, PriorBox_squares)
 949 {
 950     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
 951         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
 952     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
 953         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 954     LayerParams lp;
 955     lp.name = "testPriorBox";
 956     lp.type = "PriorBox";
 957     lp.set("min_size", 2);
 958     lp.set("flip", true);
 959     lp.set("clip", true);
 960     float variance[] = {0.1f, 0.1f, 0.2f, 0.2f};
 961     float aspectRatios[] = {1.0f};  // That should be ignored.
 962     lp.set("variance", DictValue::arrayReal<float*>(&variance[0], 4));
 963     lp.set("aspect_ratio", DictValue::arrayReal<float*>(&aspectRatios[0], 1));
 964
 965     Net net;
 966     int id = net.addLayerToPrev(lp.name, lp.type, lp);
 967     net.connect(0, 0, id, 1);  // The second input is an input image. Shapes are used for boxes normalization.
 968     Mat inp(1, 2, CV_32F);
 969     randu(inp, -1, 1);
 970     net.setInput(blobFromImage(inp));
 971     net.setPreferableBackend(backend);
 972     net.setPreferableTarget(target);
 973     Mat out = net.forward();
 974
 975     Mat ref = (Mat_<float>(4, 4) << 0.0, 0.0, 0.75, 1.0,
 976                                        0.25, 0.0, 1.0, 1.0,
 977                                        0.1f, 0.1f, 0.2f, 0.2f,
 978                                        0.1f, 0.1f, 0.2f, 0.2f);
 979     double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 2e-5 : 1e-5;
 980     normAssert(out.reshape(1, 4), ref, "", l1);
 981 }
 982
 983 typedef TestWithParam<tuple<int, int> > Layer_Test_DWconv_Prelu;
 984 TEST_P(Layer_Test_DWconv_Prelu, Accuracy)
 985 {
 986     // Test case
 987     // input       img size 3x16x16  value all 1
 988     //   |
 989     //   v
 990     // dw_conv     weight[0]=-1 weight[1]=-2 weight[2]=-3   bias={1,2,3}
 991     //   |
 992     //   v
 993     // prelu       weight={1,2,3}
 994     //   |
 995     //   v
 996     // output      out size 3x14x14  if right: out[0]=-8 out[0]=-32 out[0]=-72
 997     //             but current opencv output: out[0]=-24 out[0]=-48 out[0]=-72
 998
 999     const int num_input = get<0>(GetParam());   //inpChannels
1000     const int group = 3;                        //outChannels=group when group>1
1001     const int num_output = get<1>(GetParam());
1002     const int kernel_depth = num_input/group;
1003     CV_Assert_N(num_output >= group, num_output % group == 0, num_input % group == 0);
1004
1005     Net net;
1006     //layer 1: dwconv
1007     LayerParams lp;
1008     lp.name = "dwconv";
1009     lp.type = "Convolution";
1010     lp.set("kernel_size", 3);
1011     lp.set("num_output", num_output);
1012     lp.set("pad", 0);
1013     lp.set("group", group);
1014     lp.set("stride", 1);
1015     lp.set("engine", "CAFFE");
1016     lp.set("bias_term", "true");
1017
1018     std::vector<int> weightsShape(4);
1019     weightsShape[0] = num_output;   // #outChannels
1020     weightsShape[1] = kernel_depth; // #inpChannels / group
1021     weightsShape[2] = 3;            // height
1022     weightsShape[3] = 3;            // width
1023     Mat weights(weightsShape, CV_32F, Scalar(1));
1024
1025     //assign weights
1026     for (int i = 0; i < weightsShape[0]; ++i)
1027     {
1028         for (int j = 0; j < weightsShape[1]; ++j)
1029         {
1030             for (int k = 0; k < weightsShape[2]; ++k)
1031             {
1032                 for (int l = 0; l < weightsShape[3]; ++l)
1033                 {
1034                     weights.ptr<float>(i, j, k)[l]=-1*(i+1);
1035                 }
1036             }
1037         }
1038     }
1039     lp.blobs.push_back(weights);
1040
1041     //assign bias
1042     Mat bias(1, num_output, CV_32F, Scalar(1));
1043     for (int i = 0; i < 1; ++i)
1044     {
1045         for (int j = 0; j < num_output; ++j)
1046         {
1047             bias.ptr<float>(i)[j]=j+1;
1048         }
1049     }
1050     lp.blobs.push_back(bias);
1051     net.addLayerToPrev(lp.name, lp.type, lp);
1052
1053     //layer 2: prelu
1054     LayerParams lpr;
1055     lpr.name = "dw_relu";
1056     lpr.type = "PReLU";
1057     Mat weightsp(1, num_output, CV_32F, Scalar(1));
1058
1059     //assign weights
1060     for (int i = 0; i < 1; ++i)
1061     {
1062         for (int j = 0; j < num_output; ++j)
1063         {
1064             weightsp.ptr<float>(i)[j]=j+1;
1065         }
1066     }
1067
1068     lpr.blobs.push_back(weightsp);
1069     net.addLayerToPrev(lpr.name, lpr.type, lpr);
1070
1071     int shape[] = {1, num_input, 16, 16};
1072     Mat in_blob(4, &shape[0], CV_32FC1, Scalar(1));
1073
1074     net.setPreferableBackend(DNN_BACKEND_OPENCV);
1075     net.setInput(in_blob);
1076     Mat out = net.forward();
1077
1078     //assign target
1079     std::vector<int> outShape(4);
1080     outShape[0] = 1;
1081     outShape[1] = num_output;       // outChannels
1082     outShape[2] = 14;          // height
1083     outShape[3] = 14;          // width
1084     Mat target(outShape, CV_32F, Scalar(1));
1085     for (int i = 0; i < outShape[0]; ++i)
1086     {
1087         for (int j = 0; j < outShape[1]; ++j)
1088         {
1089             for (int k = 0; k < outShape[2]; ++k)
1090             {
1091                 for (int l = 0; l < outShape[3]; ++l)
1092                 {
1093                     target.ptr<float>(i, j, k)[l]=(-9*kernel_depth*(j+1)+j+1)*(j+1);
1094                 }
1095             }
1096         }
1097     }
1098
1099     normAssert(out, target);
1100 }
1101 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_DWconv_Prelu, Combine(Values(3, 6), Values(3, 6)));
1102
1103 #ifdef HAVE_INF_ENGINE
1104 // Using Intel's Model Optimizer generate .xml and .bin files:
1105 // ./ModelOptimizer -w /path/to/caffemodel -d /path/to/prototxt \
1106 //                  -p FP32 -i -b ${batch_size} -o /path/to/output/folder
1107 typedef testing::TestWithParam<tuple<Backend, Target> > Layer_Test_Convolution_DLDT;
1108 TEST_P(Layer_Test_Convolution_DLDT, Accuracy)
1109 {
1110     const Backend backendId = get<0>(GetParam());
1111     const Target targetId = get<1>(GetParam());
1112
1113     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD)
1114         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
1115
1116     if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1117         throw SkipTestException("No support for async forward");
1118
1119     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1120         setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
1121     else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1122         setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
1123     else
1124         FAIL() << "Unknown backendId";
1125
1126     Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt"));
1127     Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
1128
1129     Mat inp = blobFromNPY(_tf("blob.npy"));
1130
1131     netDefault.setInput(inp);
1132     netDefault.setPreferableBackend(DNN_BACKEND_OPENCV);
1133     Mat outDefault = netDefault.forward();
1134
1135     net.setInput(inp);
1136     net.setPreferableBackend(backendId);
1137     net.setPreferableTarget(targetId);
1138
1139     Mat out = net.forward();
1140
1141     double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-3 : 1e-5;
1142     double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.8e-2 : 1e-4;
1143     normAssert(outDefault, out, "", l1, lInf);
1144
1145     std::vector<int> outLayers = net.getUnconnectedOutLayers();
1146     ASSERT_EQ(net.getLayer(outLayers[0])->name, "output");
1147     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1148         ASSERT_EQ(net.getLayer(outLayers[0])->type, "Convolution");
1149     else
1150         ASSERT_EQ(net.getLayer(outLayers[0])->type, "Add");
1151 }
1152
1153 TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8)
1154 {
1155     const Backend backendId = get<0>(GetParam());
1156     const Target targetId = get<1>(GetParam());
1157
1158     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD)
1159         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
1160
1161     if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1162         throw SkipTestException("No support for async forward");
1163
1164     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1165         setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
1166     else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1167         setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
1168     else
1169         FAIL() << "Unknown backendId";
1170
1171     int blobSize[] = {2, 6, 75, 113};
1172     Mat inputs[] = {Mat(4, &blobSize[0], CV_8U), Mat()};
1173
1174     randu(inputs[0], 0, 255);
1175     inputs[0].convertTo(inputs[1], CV_32F);
1176
1177     Mat outs[2];
1178     for (int i = 0; i < 2; ++i)
1179     {
1180         Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
1181         net.setPreferableBackend(backendId);
1182         net.setPreferableTarget(targetId);
1183         net.setInput(inputs[i]);
1184         outs[i] = net.forward();
1185         ASSERT_EQ(outs[i].type(), CV_32F);
1186     }
1187     if (targetId != DNN_TARGET_MYRIAD)
1188         normAssert(outs[0], outs[1]);
1189 }
1190
1191 TEST_P(Layer_Test_Convolution_DLDT, multithreading)
1192 {
1193     const Backend backendId = get<0>(GetParam());
1194     const Target targetId = get<1>(GetParam());
1195
1196     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD)
1197         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
1198
1199     if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1200         throw SkipTestException("No support for async forward");
1201
1202     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1203         setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
1204     else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1205         setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
1206     else
1207         FAIL() << "Unknown backendId";
1208
1209     std::string xmlPath = _tf("layer_convolution.xml");
1210     std::string binPath = _tf("layer_convolution.bin");
1211     Net firstNet = readNet(xmlPath, binPath);
1212     Net secondNet = readNet(xmlPath, binPath);
1213     Mat inp = blobFromNPY(_tf("blob.npy"));
1214
1215     firstNet.setInput(inp);
1216     secondNet.setInput(inp);
1217     firstNet.setPreferableBackend(backendId);
1218     firstNet.setPreferableTarget(targetId);
1219     secondNet.setPreferableBackend(backendId);
1220     secondNet.setPreferableTarget(targetId);
1221
1222     Mat out1, out2;
1223     std::thread t1([&]{out1 = firstNet.forward();});
1224     std::thread t2([&]{out2 = secondNet.forward();});
1225
1226     t1.join();
1227     t2.join();
1228
1229     Mat ref = blobFromNPY(_tf("layer_convolution.npy"));
1230     double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-3 : 1e-5;
1231     double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.8e-2 : 1e-4;
1232     normAssert(out1, ref, "first thread", l1, lInf);
1233     normAssert(out2, ref, "second thread", l1, lInf);
1234 }
1235
1236 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Convolution_DLDT,
1237     dnnBackendsAndTargetsIE()
1238 );
1239
1240 // 1. Create a .prototxt file with the following network:
1241 // layer {
1242 //   type: "Input" name: "data" top: "data"
1243 //   input_param { shape { dim: 1 dim: 2 dim: 3 } }
1244 // }
1245 // layer {
1246 //   type: "Input" name: "second_input" top: "second_input"
1247 //   input_param { shape { dim: 1 dim: 2 dim: 3 } }
1248 // }
1249 // layer {
1250 //  type: "Eltwise" name: "output" top: "output"
1251 //  bottom: "data" bottom: "second_input"
1252 //  eltwise_param { operation: SUM }
1253 // }
1254 //
1255 // 2. Create a .caffemodel file using Caffe:
1256 //
1257 // import caffe
1258 // net = caffe.Net('/path/to/prototxt', caffe.TEST)
1259 // net.save('/path/to/caffemodel')
1260 //
1261 // 3. Convert using ModelOptimizer.
1262 typedef testing::TestWithParam<tuple<int, int, Target, std::vector<int> > > Test_DLDT_two_inputs_3dim;
1263 TEST_P(Test_DLDT_two_inputs_3dim, as_IR)
1264 {
1265     int firstInpType = get<0>(GetParam());
1266     int secondInpType = get<1>(GetParam());
1267     Target targetId = get<2>(GetParam());
1268
1269     Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin"));
1270     std::vector<int> inpSize = get<3>(GetParam());
1271     Mat firstInp(3, inpSize.data(), firstInpType);
1272     Mat secondInp(3, inpSize.data(), secondInpType);
1273     randu(firstInp, 0, 255);
1274     randu(secondInp, 0, 255);
1275
1276     net.setInput(firstInp, "data");
1277     net.setInput(secondInp, "second_input");
1278     net.setPreferableTarget(targetId);
1279
1280     double l1 = ((targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) &&
1281                  (firstInpType == CV_32F || secondInpType == CV_32F)) ? 0.06 : 0.0;
1282     double lInf = ((targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) &&
1283                    (firstInpType == CV_32F || secondInpType == CV_32F)) ? 0.23 : 0.0;
1284
1285     Mat out = net.forward();
1286
1287     Mat ref;
1288     cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
1289     normAssert(out, ref, "", l1, lInf);
1290 }
1291
1292 std::vector< std::vector<int> > list_sizes{ {1, 2, 3}, {3, 2, 1}, {5, 5, 5}, {13, 7, 11} };
1293
1294 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs_3dim, Combine(
1295   Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
1296   testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)),
1297   testing::ValuesIn(list_sizes)
1298 ));
1299
1300 typedef testing::TestWithParam<tuple<int, int, tuple<Backend, Target> > > Test_DLDT_two_inputs;
1301 TEST_P(Test_DLDT_two_inputs, as_backend)
1302 {
1303     static const float kScale = 0.5f;
1304     static const float kScaleInv = 1.0f / kScale;
1305
1306     Backend backendId = get<0>(get<2>(GetParam()));
1307     Target targetId = get<1>(get<2>(GetParam()));
1308
1309     Net net;
1310     LayerParams lp;
1311     lp.type = "Eltwise";
1312     lp.name = "testLayer";
1313     lp.set("operation", "sum");
1314     int eltwiseId = net.addLayerToPrev(lp.name, lp.type, lp);  // connect to a first input
1315     net.connect(0, 1, eltwiseId, 1);  // connect to a second input
1316
1317     int inpSize[] = {1, 2, 3, 4};
1318     Mat firstInp(4, &inpSize[0], get<0>(GetParam()));
1319     Mat secondInp(4, &inpSize[0], get<1>(GetParam()));
1320     randu(firstInp, 0, 255);
1321     randu(secondInp, 0, 255);
1322
1323     net.setInputsNames({"data", "second_input"});
1324     net.setInput(firstInp, "data", kScale);
1325     net.setInput(secondInp, "second_input", kScaleInv);
1326     net.setPreferableBackend(backendId);
1327     net.setPreferableTarget(targetId);
1328     Mat out = net.forward();
1329
1330     Mat ref;
1331     addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F);
1332     // Output values are in range [0, 637.5].
1333     double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6;
1334     double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5;
1335     normAssert(out, ref, "", l1, lInf);
1336 }
1337
1338 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs, Combine(
1339   Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
1340   dnnBackendsAndTargets()
1341 ));
1342
1343 class UnsupportedLayer : public Layer
1344 {
1345 public:
1346     UnsupportedLayer(const LayerParams &params) : Layer(params) {}
1347
1348     static Ptr<Layer> create(const LayerParams& params)
1349     {
1350         return Ptr<Layer>(new UnsupportedLayer(params));
1351     }
1352
1353     virtual bool supportBackend(int backendId) CV_OVERRIDE
1354     {
1355         return backendId == DNN_BACKEND_OPENCV;
1356     }
1357
1358     virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals) CV_OVERRIDE {}
1359 };
1360
1361 typedef DNNTestLayer Test_DLDT_layers;
1362
1363 static void test_dldt_fused_output(Backend backend, Target target)
1364 {
1365     static const int kNumChannels = 3;
1366     Net net;
1367     {
1368         LayerParams lp;
1369         lp.set("kernel_size", 1);
1370         lp.set("num_output", 3);
1371         lp.set("bias_term", false);
1372         lp.type = "Convolution";
1373         lp.name = "testConv";
1374         lp.blobs.push_back(Mat({kNumChannels, 1, 1, 1}, CV_32F, Scalar(1)));
1375         net.addLayerToPrev(lp.name, lp.type, lp);
1376     }
1377     {
1378         LayerParams lp;
1379         lp.set("bias_term", false);
1380         lp.type = "Scale";
1381         lp.name = "testScale";
1382         lp.blobs.push_back(Mat({kNumChannels}, CV_32F, Scalar(1)));
1383         net.addLayerToPrev(lp.name, lp.type, lp);
1384     }
1385     {
1386         LayerParams lp;
1387         net.addLayerToPrev("unsupported_layer", "Unsupported", lp);
1388     }
1389     net.setPreferableBackend(backend);
1390     net.setPreferableTarget(target);
1391     net.setInput(Mat({1, 1, 2, 3}, CV_32FC1, Scalar(1)));
1392     net.forward();
1393 }
1394
1395 TEST_P(Test_DLDT_layers, fused_output)
1396 {
1397     CV_DNN_REGISTER_LAYER_CLASS(Unsupported, UnsupportedLayer);
1398     try
1399     {
1400         test_dldt_fused_output(backend, target);
1401     }
1402     catch (const std::exception& e)
1403     {
1404         ADD_FAILURE() << "Exception: " << e.what();
1405     }
1406     catch(...)
1407     {
1408         ADD_FAILURE() << "Unknown exception";
1409     }
1410     LayerFactory::unregisterLayer("Unsupported");
1411 }
1412
1413 TEST_P(Test_DLDT_layers, multiple_networks)
1414 {
1415     Net nets[2];
1416     for (int i = 0; i < 2; ++i)
1417     {
1418         nets[i].setInputsNames(std::vector<String>(1, format("input_%d", i)));
1419
1420         LayerParams lp;
1421         lp.set("kernel_size", 1);
1422         lp.set("num_output", 1);
1423         lp.set("bias_term", false);
1424         lp.type = "Convolution";
1425         lp.name = format("testConv_%d", i);
1426         lp.blobs.push_back(Mat({1, 1, 1, 1}, CV_32F, Scalar(1 + i)));
1427         nets[i].addLayerToPrev(lp.name, lp.type, lp);
1428         nets[i].setPreferableBackend(backend);
1429         nets[i].setPreferableTarget(target);
1430         nets[i].setInput(Mat({1, 1, 2, 3}, CV_32FC1, Scalar(1)));
1431     }
1432     Mat out_1 = nets[0].forward();
1433     Mat out_2 = nets[1].forward();
1434     // After the second model is initialized we try to receive an output from the first network again.
1435     out_1 = nets[0].forward();
1436     normAssert(2 * out_1, out_2);
1437 }
1438
1439 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_layers, dnnBackendsAndTargets());
1440
1441 #endif  // HAVE_INF_ENGINE
1442
1443 // Test a custom layer.
1444 class CustomInterpLayer CV_FINAL : public Layer
1445 {
1446 public:
1447     CustomInterpLayer(const LayerParams &params) : Layer(params)
1448     {
1449         zoomFactor = params.get<int>("zoom_factor", 0);
1450         outWidth = params.get<int>("width", 0);
1451         outHeight = params.get<int>("height", 0);
1452     }
1453
1454     static Ptr<Layer> create(LayerParams& params)
1455     {
1456         return Ptr<Layer>(new CustomInterpLayer(params));
1457     }
1458
1459     virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
1460                                  const int requiredOutputs,
1461                                  std::vector<std::vector<int> > &outputs,
1462                                  std::vector<std::vector<int> > &internals) const CV_OVERRIDE
1463     {
1464         const int batchSize = inputs[0][0];
1465         const int numChannels = inputs[0][1];
1466         const int inpHeight = inputs[0][2];
1467         const int inpWidth = inputs[0][3];
1468
1469         std::vector<int> outShape(4);
1470         outShape[0] = batchSize;
1471         outShape[1] = numChannels;
1472         outShape[2] = outHeight != 0 ? outHeight : (inpHeight + (inpHeight - 1) * (zoomFactor - 1));
1473         outShape[3] = outWidth != 0 ? outWidth : (inpWidth + (inpWidth - 1) * (zoomFactor - 1));
1474         outputs.assign(1, outShape);
1475         return false;
1476     }
1477
1478     virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
1479     {
1480         std::vector<Mat> outputs;
1481         outputs_arr.getMatVector(outputs);
1482
1483         if (!outWidth && !outHeight)
1484         {
1485             outHeight = outputs[0].size[2];
1486             outWidth = outputs[0].size[3];
1487         }
1488     }
1489
1490     // Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp
1491     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
1492     {
1493         CV_TRACE_FUNCTION();
1494         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
1495
1496         if (inputs_arr.depth() == CV_16S)
1497         {
1498             forward_fallback(inputs_arr, outputs_arr, internals_arr);
1499             return;
1500         }
1501
1502         std::vector<Mat> inputs, outputs;
1503         inputs_arr.getMatVector(inputs);
1504         outputs_arr.getMatVector(outputs);
1505
1506         Mat& inp = inputs[0];
1507         Mat& out = outputs[0];
1508         const float* inpData = (float*)inp.data;
1509         float* outData = (float*)out.data;
1510
1511         const int batchSize = inp.size[0];
1512         const int numChannels = inp.size[1];
1513         const int inpHeight = inp.size[2];
1514         const int inpWidth = inp.size[3];
1515
1516         const float rheight = (outHeight > 1) ? static_cast<float>(inpHeight - 1) / (outHeight - 1) : 0.f;
1517         const float rwidth = (outWidth > 1) ? static_cast<float>(inpWidth - 1) / (outWidth - 1) : 0.f;
1518         for (int h2 = 0; h2 < outHeight; ++h2)
1519         {
1520             const float h1r = rheight * h2;
1521             const int h1 = h1r;
1522             const int h1p = (h1 < inpHeight - 1) ? 1 : 0;
1523             const float h1lambda = h1r - h1;
1524             const float h0lambda = 1.f - h1lambda;
1525             for (int w2 = 0; w2 < outWidth; ++w2)
1526             {
1527                 const float w1r = rwidth * w2;
1528                 const int w1 = w1r;
1529                 const int w1p = (w1 < inpWidth - 1) ? 1 : 0;
1530                 const float w1lambda = w1r - w1;
1531                 const float w0lambda = 1.f - w1lambda;
1532                 const float* pos1 = inpData + h1 * inpWidth + w1;
1533                 float* pos2 = outData + h2 * outWidth + w2;
1534                 for (int c = 0; c < batchSize * numChannels; ++c)
1535                 {
1536                     pos2[0] =
1537                       h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
1538                       h1lambda * (w0lambda * pos1[h1p * inpWidth] + w1lambda * pos1[h1p * inpWidth + w1p]);
1539                     pos1 += inpWidth * inpHeight;
1540                     pos2 += outWidth * outHeight;
1541                 }
1542             }
1543         }
1544     }
1545
1546 private:
1547     int outWidth, outHeight, zoomFactor;
1548 };
1549
1550 #ifndef OPENCV_DNN_EXTERNAL_PROTOBUF
1551 TEST_P(Test_Caffe_layers, Interp)
1552 #else
1553 TEST_P(Test_Caffe_layers, DISABLED_Interp)  // requires patched protobuf (available in OpenCV source tree only)
1554 #endif
1555 {
1556     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
1557         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
1558
1559     // Test a custom layer.
1560     CV_DNN_REGISTER_LAYER_CLASS(Interp, CustomInterpLayer);
1561     try
1562     {
1563         testLayerUsingCaffeModels("layer_interp", false, false);
1564     }
1565     catch (...)
1566     {
1567         LayerFactory::unregisterLayer("Interp");
1568         throw;
1569     }
1570     LayerFactory::unregisterLayer("Interp");
1571
1572     // Test an implemented layer.
1573     testLayerUsingCaffeModels("layer_interp", false, false);
1574 }
1575
1576 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_Caffe_layers, dnnBackendsAndTargets());
1577
1578 TEST(Layer_Test_PoolingIndices, Accuracy)
1579 {
1580     Net net;
1581
1582     LayerParams lp;
1583     lp.set("pool", "max");
1584     lp.set("kernel_w", 2);
1585     lp.set("kernel_h", 2);
1586     lp.set("stride_w", 2);
1587     lp.set("stride_h", 2);
1588     lp.set("pad_w", 0);
1589     lp.set("pad_h", 0);
1590     lp.name = "testLayer.name";  // This test also checks that OpenCV lets use names with dots.
1591     lp.type = "Pooling";
1592     net.addLayerToPrev(lp.name, lp.type, lp);
1593
1594     Mat inp(10, 10, CV_8U);
1595     randu(inp, 0, 255);
1596
1597     Mat maxValues(5, 5, CV_32F, Scalar(-1)), indices(5, 5, CV_32F, Scalar(-1));
1598     for (int y = 0; y < 10; ++y)
1599     {
1600         int dstY = y / 2;
1601         for (int x = 0; x < 10; ++x)
1602         {
1603             int dstX = x / 2;
1604             uint8_t val = inp.at<uint8_t>(y, x);
1605             if ((float)inp.at<uint8_t>(y, x) > maxValues.at<float>(dstY, dstX))
1606             {
1607                 maxValues.at<float>(dstY, dstX) = val;
1608                 indices.at<float>(dstY, dstX) = y * 10 + x;
1609             }
1610         }
1611     }
1612     net.setPreferableBackend(DNN_BACKEND_OPENCV);
1613     net.setInput(blobFromImage(inp));
1614
1615     std::vector<Mat> outputs;
1616     net.forward(outputs, lp.name);
1617     normAssert(maxValues, outputs[0].reshape(1, 5));
1618     normAssert(indices, outputs[1].reshape(1, 5));
1619 }
1620
1621 typedef testing::TestWithParam<tuple<Vec4i, int, tuple<Backend, Target> > > Layer_Test_ShuffleChannel;
1622 TEST_P(Layer_Test_ShuffleChannel, Accuracy)
1623 {
1624     Vec4i inpShapeVec = get<0>(GetParam());
1625     int group = get<1>(GetParam());
1626     ASSERT_EQ(inpShapeVec[1] % group, 0);
1627     const int groupSize = inpShapeVec[1] / group;
1628     int backendId = get<0>(get<2>(GetParam()));
1629     int targetId = get<1>(get<2>(GetParam()));
1630
1631     Net net;
1632     LayerParams lp;
1633     lp.set("group", group);
1634     lp.type = "ShuffleChannel";
1635     lp.name = "testLayer";
1636     net.addLayerToPrev(lp.name, lp.type, lp);
1637
1638     const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
1639     Mat inp(4, inpShape, CV_32F);
1640     randu(inp, 0, 255);
1641
1642     net.setInput(inp);
1643     net.setPreferableBackend(backendId);
1644     net.setPreferableTarget(targetId);
1645     Mat out = net.forward();
1646
1647     double l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 5e-2 : 1e-5;
1648     double lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 7e-2 : 1e-4;
1649     for (int n = 0; n < inpShapeVec[0]; ++n)
1650     {
1651         for (int c = 0; c < inpShapeVec[1]; ++c)
1652         {
1653             Mat outChannel = getPlane(out, n, c);
1654             Mat inpChannel = getPlane(inp, n, groupSize * (c % group) + c / group);
1655             normAssert(outChannel, inpChannel, "", l1, lInf);
1656         }
1657     }
1658 }
1659 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_ShuffleChannel, Combine(
1660 /*input shape*/  Values(Vec4i(1, 6, 5, 7), Vec4i(3, 12, 1, 4)),
1661 /*group*/        Values(1, 2, 3, 6), dnnBackendsAndTargets(/*with IE*/ false)
1662 ));
1663
1664 // Check if relu is not fused to convolution if we requested it's output
1665 TEST(Layer_Test_Convolution, relu_fusion)
1666 {
1667     Net net;
1668     {
1669         LayerParams lp;
1670         lp.set("kernel_size", 1);
1671         lp.set("num_output", 1);
1672         lp.set("bias_term", false);
1673         lp.type = "Convolution";
1674         lp.name = "testConv";
1675
1676         int weightsShape[] = {1, 1, 1, 1};
1677         Mat weights(4, &weightsShape[0], CV_32F, Scalar(1));
1678         lp.blobs.push_back(weights);
1679         net.addLayerToPrev(lp.name, lp.type, lp);
1680     }
1681     {
1682         LayerParams lp;
1683         lp.type = "ReLU";
1684         lp.name = "testReLU";
1685         net.addLayerToPrev(lp.name, lp.type, lp);
1686     }
1687     int sz[] = {1, 1, 2, 3};
1688     Mat input(4, &sz[0], CV_32F);
1689     randu(input, -1.0, -0.1);
1690     net.setInput(input);
1691     net.setPreferableBackend(DNN_BACKEND_OPENCV);
1692     Mat output = net.forward("testConv");
1693     normAssert(input, output);
1694 }
1695
1696 typedef testing::TestWithParam<tuple<bool, tuple<Backend, Target> > > Layer_Test_Eltwise_unequal;
1697 TEST_P(Layer_Test_Eltwise_unequal, accuracy_input_0_truncate)
1698 {
1699     bool weighted = get<0>(GetParam());
1700     int backendId = get<0>(get<1>(GetParam()));
1701     int targetId = get<1>(get<1>(GetParam()));
1702
1703     Net net;
1704     LayerParams lp;
1705     lp.type = "Eltwise";
1706     lp.name = "testLayer";
1707     lp.set<std::string>("output_channels_mode", "input_0_truncate");
1708
1709     const int inpShapes[][4] = {{1, 4, 2, 2}, {1, 5, 2, 2}, {1, 3, 2, 2}};
1710     const int out_channels = inpShapes[0][1];
1711     std::vector<String> inpNames(3);
1712     std::vector<Mat> inputs(3);
1713
1714     std::vector<float> weights(3, 1);
1715     if (weighted)
1716     {
1717         for (int i = 0; i < inputs.size(); ++i)
1718             weights[i] = -0.125f + i * 0.25f;
1719         lp.set("coeff", DictValue::arrayReal<float*>(&weights[0], weights.size()));
1720     }
1721
1722     int eltwiseId = net.addLayer(lp.name, lp.type, lp);
1723     for (int i = 0; i < inputs.size(); ++i)
1724     {
1725         inputs[i].create(4, inpShapes[i], CV_32F);
1726         size_t total = inputs[i].total();
1727         for (size_t j = 0; j < total; j++)
1728             inputs[i].ptr<float>()[j] = j + i * 100;
1729         inpNames[i] = format("input_%d", i);
1730         net.connect(0, i, eltwiseId, i);
1731     }
1732     Mat ref(4, inpShapes[0], CV_32F, Scalar(0));
1733
1734     net.setInputsNames(inpNames);
1735     for (int i = 0; i < inputs.size(); ++i)
1736     {
1737         //std::cout << ref.reshape(1,1) << endl;
1738         net.setInput(inputs[i], inpNames[i]);
1739         for (size_t batchId = 0; batchId < ref.size[0]; batchId++)
1740         {
1741             int input_channels = inputs[i].size[1];
1742             Range ranges[4] = { Range(batchId, batchId + 1), Range(0, std::min(out_channels, input_channels)), Range::all(), Range::all() };
1743             Mat ref_slice = ref(ranges);
1744             Mat input_slice = inputs[i](ranges);
1745             ref_slice += weights[i] * input_slice;
1746         }
1747     }
1748
1749     net.setPreferableBackend(backendId);
1750     net.setPreferableTarget(targetId);
1751     Mat out = net.forward();
1752     normAssert(out, ref);
1753     if (testing::Test::HasFailure())
1754     {
1755         std::cout << out.reshape(1,1) << endl;
1756         std::cout << ref.reshape(1,1) << endl;
1757     }
1758 }
1759
1760 TEST_P(Layer_Test_Eltwise_unequal, accuracy_input_0)
1761 {
1762     bool weighted = get<0>(GetParam());
1763     int backendId = get<0>(get<1>(GetParam()));
1764     int targetId = get<1>(get<1>(GetParam()));
1765
1766     Net net;
1767     LayerParams lp;
1768     lp.type = "Eltwise";
1769     lp.name = "testLayer";
1770     lp.set<std::string>("output_channels_mode", "input_0");
1771
1772     const int inpShapes[][4] = {{1, 4, 2, 2}, {1, 2, 2, 2}, {1, 3, 2, 2}};
1773     const int out_channels = inpShapes[0][1];
1774     std::vector<String> inpNames(3);
1775     std::vector<Mat> inputs(3);
1776
1777     std::vector<float> weights(3, 1);
1778     if (weighted)
1779     {
1780         for (int i = 0; i < inputs.size(); ++i)
1781             weights[i] = -0.125f + i * 0.25f;
1782         lp.set("coeff", DictValue::arrayReal<float*>(&weights[0], weights.size()));
1783     }
1784
1785     int eltwiseId = net.addLayer(lp.name, lp.type, lp);
1786     for (int i = 0; i < inputs.size(); ++i)
1787     {
1788         inputs[i].create(4, inpShapes[i], CV_32F);
1789         size_t total = inputs[i].total();
1790         for (size_t j = 0; j < total; j++)
1791             inputs[i].ptr<float>()[j] = j + i * 100;
1792         inpNames[i] = format("input_%d", i);
1793         net.connect(0, i, eltwiseId, i);
1794     }
1795     Mat ref(4, inpShapes[0], CV_32F, Scalar(0));
1796
1797     net.setInputsNames(inpNames);
1798     for (int i = 0; i < inputs.size(); ++i)
1799     {
1800         //std::cout << ref.reshape(1,1) << endl;
1801         net.setInput(inputs[i], inpNames[i]);
1802         for (size_t batchId = 0; batchId < ref.size[0]; batchId++)
1803         {
1804             int input_channels = inputs[i].size[1];
1805             Range ranges[4] = { Range(batchId, batchId + 1), Range(0, std::min(out_channels, input_channels)), Range::all(), Range::all() };
1806             Mat ref_slice = ref(ranges);
1807             Mat input_slice = inputs[i](ranges);
1808             ref_slice += weights[i] * input_slice;
1809         }
1810     }
1811
1812     net.setPreferableBackend(backendId);
1813     net.setPreferableTarget(targetId);
1814     Mat out = net.forward();
1815     normAssert(out, ref);
1816     if (testing::Test::HasFailure())
1817     {
1818         std::cout << out.reshape(1,1) << endl;
1819         std::cout << ref.reshape(1,1) << endl;
1820     }
1821 }
1822
1823 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Eltwise_unequal, Combine(
1824     testing::Bool(),
1825     dnnBackendsAndTargets()
1826 ));
1827
1828 typedef testing::TestWithParam<tuple<Backend, Target> > Layer_Test_Resize;
1829 TEST_P(Layer_Test_Resize, change_input)
1830 {
1831     int backendId = get<0>(GetParam());
1832     int targetId = get<1>(GetParam());
1833
1834     Net net;
1835     LayerParams lp;
1836     lp.type = "Resize";
1837     lp.name = "testLayer";
1838     lp.set("zoom_factor", 2);
1839     lp.set("interpolation", "nearest");
1840     net.addLayerToPrev(lp.name, lp.type, lp);
1841
1842     for (int i = 0; i < 2; ++i)
1843     {
1844         Mat inp(4 + i, 5 + i, CV_8UC3), ref;
1845         randu(inp, 0, 255);
1846         resize(inp, ref, Size(0, 0), 2, 2, INTER_NEAREST);
1847         ref = blobFromImage(ref);
1848
1849         net.setInput(blobFromImage(inp));
1850         net.setPreferableBackend(backendId);
1851         net.setPreferableTarget(targetId);
1852         Mat out = net.forward();
1853         normAssert(out, ref);
1854     }
1855 }
1856
1857 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Resize, dnnBackendsAndTargets());
1858
1859 struct Layer_Test_Slice : public testing::TestWithParam<tuple<Backend, Target> >
1860 {
1861     template<int DIMS>
1862     void test_slice(const int* inputShape, const int* begin, const int* end)
1863     {
1864         int backendId = get<0>(GetParam());
1865         int targetId = get<1>(GetParam());
1866
1867         Mat input(DIMS, inputShape, CV_32FC1, Scalar::all(0));
1868         for (int i = 0; i < (int)input.total(); ++i)
1869             input.ptr<float>()[i] = (float)i;
1870
1871         std::vector<Range> range(DIMS);
1872         for (int i = 0; i < DIMS; ++i)
1873             range[i] = Range(begin[i], end[i]);
1874
1875         Net net;
1876         LayerParams lp;
1877         lp.type = "Slice";
1878         lp.name = "testLayer";
1879         lp.set("begin", DictValue::arrayInt<int*>((int*)&begin[0], DIMS));
1880         lp.set("end", DictValue::arrayInt<int*>((int*)&end[0], DIMS));
1881         net.addLayerToPrev(lp.name, lp.type, lp);
1882
1883         {
1884             net.setInput(input);
1885             net.setPreferableBackend(backendId);
1886             net.setPreferableTarget(targetId);
1887             Mat out = net.forward();
1888
1889             EXPECT_GT(cv::norm(out, NORM_INF), 0);
1890             normAssert(out, input(range));
1891 #if 0
1892             cout << input(range).clone().reshape(1, 1) << endl;
1893             cout << out.reshape(1, 1) << endl;
1894 #endif
1895         }
1896     }
1897 };
1898
1899 TEST_P(Layer_Test_Slice, slice_channels_17762)
1900 {
1901     const int inputShape[4] = {1, 16, 6, 8};
1902     const int begin[] = {0, 4, 0, 0};
1903     const int end[] = {1, 8, 6, 8};
1904     test_slice<4>(inputShape, begin, end);
1905 }
1906
1907 TEST_P(Layer_Test_Slice, slice_channels_with_batch_17762)
1908 {
1909     const int inputShape[4] = {4, 4, 3, 4};
1910     const int begin[] = {0, 1, 0, 0};
1911     const int end[] = {4, 3, 3, 4};
1912     test_slice<4>(inputShape, begin, end);
1913 }
1914
1915 TEST_P(Layer_Test_Slice, slice_channels_and_batch_17762)
1916 {
1917     int backend = get<0>(GetParam());
1918     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1919         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
1920
1921     const int inputShape[4] = {4, 4, 3, 4};
1922     const int begin[] = {2, 1, 0, 0};
1923     const int end[] = {4, 3, 3, 4};
1924     test_slice<4>(inputShape, begin, end);
1925 }
1926
1927 TEST_P(Layer_Test_Slice, slice_rows)
1928 {
1929     const int inputShape[4] = {1, 2, 6, 4};
1930     const int begin[] = {0, 0, 4, 0};
1931     const int end[] = {1, 2, 6, 4};
1932     test_slice<4>(inputShape, begin, end);
1933 }
1934
1935 TEST_P(Layer_Test_Slice, slice_cols)
1936 {
1937     const int inputShape[4] = {1, 2, 3, 8};
1938     const int begin[] = {0, 0, 0, 4};
1939     const int end[] = {1, 2, 3, 8};
1940     test_slice<4>(inputShape, begin, end);
1941 }
1942
1943
1944 TEST_P(Layer_Test_Slice, slice_complex_1_unaligned)
1945 {
1946     const int inputShape[4] = {1, 4, 2, 3};
1947     const int begin[] = {0, 2, 1, 0};
1948     const int end[] = {1, 3, 2, 2};
1949     test_slice<4>(inputShape, begin, end);
1950 }
1951
1952 TEST_P(Layer_Test_Slice, slice_complex_2_x4)
1953 {
1954     const int inputShape[4] = {1, 3, 2, 4};
1955     const int begin[] = {0, 2, 1, 0};
1956     const int end[] = {1, 3, 2, 2};
1957     test_slice<4>(inputShape, begin, end);
1958 }
1959
1960 TEST_P(Layer_Test_Slice, slice_complex_3)
1961 {
1962     const int inputShape[4] = {1, 6, 4, 8};
1963     const int begin[] = {0, 2, 1, 4};
1964     const int end[] = {1, 4, 3, 8};
1965     test_slice<4>(inputShape, begin, end);
1966 }
1967
1968 TEST_P(Layer_Test_Slice, variable_input_shape)
1969 {
1970     int backendId = get<0>(GetParam());
1971     int targetId = get<1>(GetParam());
1972
1973     int begin[] = {0, 0, 0, 0};
1974     int end[] = {-1, -1, -1, -1};
1975
1976     Net net;
1977     LayerParams lp;
1978     lp.type = "Slice";
1979     lp.name = "testLayer";
1980     lp.set("begin", DictValue::arrayInt<int*>(&begin[0], 4));
1981     lp.set("end", DictValue::arrayInt<int*>(&end[0], 4));
1982     net.addLayerToPrev(lp.name, lp.type, lp);
1983
1984     for (int i = 0; i < 2; ++i)
1985     {
1986         Mat inp(4 + i, 5 + i, CV_8UC1);
1987         randu(inp, 0, 255);
1988         inp = blobFromImage(inp);
1989
1990         net.setInput(inp);
1991         net.setPreferableBackend(backendId);
1992         net.setPreferableTarget(targetId);
1993         Mat out = net.forward();
1994
1995         normAssert(out, inp);
1996     }
1997 }
1998
1999 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Slice, dnnBackendsAndTargets());
2000
2001 typedef testing::TestWithParam<tuple<Backend, Target> > Layer_Test_BatchNorm;
2002 TEST_P(Layer_Test_BatchNorm, fusion)
2003 {
2004     // This tests reinitializes network by forwarding different batch size input.
2005     // We check BatchNorm layer weights restoring after fusion.
2006     int backendId = get<0>(GetParam());
2007     int targetId = get<1>(GetParam());
2008     const int ch = 4;
2009
2010     Mat mean(1, ch, CV_32F), var(1, ch, CV_32F), weights(1, ch, CV_32F);
2011     randu(mean, 0, 1);
2012     randu(var, 0, 1);
2013     randu(weights, 0, 1);
2014
2015     Net net;
2016     {
2017         LayerParams lp;
2018         lp.type = "BatchNorm";
2019         lp.name = "bn";
2020         lp.set("has_weight", false);
2021         lp.set("has_bias", false);
2022         lp.blobs.push_back(mean);
2023         lp.blobs.push_back(var);
2024         net.addLayerToPrev(lp.name, lp.type, lp);
2025     }
2026     {
2027         LayerParams lp;
2028         lp.type = "Scale";
2029         lp.name = "scale";
2030         lp.set("has_bias", false);
2031         lp.blobs.push_back(weights);
2032         net.addLayerToPrev(lp.name, lp.type, lp);
2033     }
2034
2035     Mat inp(4, 5, CV_32FC(ch));
2036     randu(inp, 0, 1);
2037
2038     net.setPreferableBackend(backendId);
2039     net.setPreferableTarget(targetId);
2040
2041     net.setInput(blobFromImage(inp));
2042     Mat ref = net.forward();
2043
2044     net.setInput(blobFromImages(std::vector<Mat>(2, inp)));
2045     Mat out = net.forward();
2046
2047     for (int i = 0; i < 2; ++i)
2048     {
2049         std::vector<Range> ranges(4, Range::all());
2050         ranges[0].start = i;
2051         ranges[0].end = i + 1;
2052         normAssert(out(ranges), ref);
2053     }
2054 }
2055
2056 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_BatchNorm, dnnBackendsAndTargets());
2057
2058 class TestLayerFusion : public DNNTestLayer {
2059 public:
2060     static void makeDefaultTestConvolutionLayer(LayerParams& convParams, int in_channels, int num_filters, bool bias_term)
2061     {
2062         const int kernel_h = 3, kernel_w = 3;
2063         const int pad_h = kernel_h / 2, pad_w = kernel_w / 2;
2064
2065         convParams.set("kernel_h", kernel_h);
2066         convParams.set("kernel_w", kernel_w);
2067         convParams.set("pad_h", pad_h);
2068         convParams.set("pad_w", pad_w);
2069         convParams.set("num_output", num_filters);
2070         convParams.set("bias_term", bias_term);
2071         convParams.type = "Convolution";
2072         convParams.name = "convolution";
2073
2074         float conv_init_magnitude = 1.0f / in_channels / kernel_h / kernel_w;
2075         int weightsShape[] = {num_filters, in_channels, kernel_h, kernel_w};
2076         Mat weights(4, &weightsShape[0], CV_32F);
2077         randu(weights, -conv_init_magnitude, conv_init_magnitude);
2078         convParams.blobs.push_back(weights);
2079         if (bias_term)
2080         {
2081             Mat bias(1, num_filters, CV_32F);
2082             randu(bias, -1.0f, 1.0f);
2083             convParams.blobs.push_back(bias);
2084         }
2085     }
2086
2087     static void makeDefaultTestActivationLayer(LayerParams& activationParams, const std::string& type, int in_channels)
2088     {
2089         activationParams.type = type;
2090         activationParams.name = "activation";
2091         if (activationParams.type == "ReLU")
2092             activationParams.set("negative_slope", 0.1f);
2093         else if (activationParams.type == "Power")
2094         {
2095             activationParams.set("power", 2.0f);
2096             activationParams.set("scale", 0.5f);
2097             activationParams.set("shift", 0.3f);
2098         }
2099         else if (activationParams.type == "ReLU6")
2100         {
2101             activationParams.set("min_value", -1.0f);
2102             activationParams.set("max_value", 1.0f);
2103         }
2104         else if (activationParams.type == "ChannelsPReLU")
2105         {
2106             Mat scales(1, in_channels, CV_32F);
2107             randu(scales, -1.0f, 1.0f);
2108             activationParams.blobs.push_back(scales);
2109         }
2110     }
2111
2112     static void makeDefaultTestEltwiseLayer(LayerParams& eltwiseParams, const std::string& op, bool withCoefficients)
2113     {
2114         eltwiseParams.type = "Eltwise";
2115         eltwiseParams.name = "eltwise";
2116         eltwiseParams.set("operation", op);
2117         if (withCoefficients)
2118         {
2119             float coeff[] = {0.3f, 0.5f};
2120             eltwiseParams.set("coeff", DictValue::arrayReal<float*>(coeff, 2));
2121         }
2122     }
2123
2124     static void test(Mat& input, Net& net, Backend backendId, Target targetId, std::vector<int> expectedFusedLayers = std::vector<int>(), double l1 = 0.0, double lInf = 0.0)
2125     {
2126         DNNTestLayer::checkBackend(backendId, targetId);
2127
2128         net.enableFusion(false);
2129         net.setPreferableBackend(DNN_BACKEND_OPENCV);
2130         net.setPreferableTarget(DNN_TARGET_CPU);
2131         net.setInput(input);
2132         Mat outputReference = net.forward().clone();
2133         std::vector<double> refTimings;
2134         net.getPerfProfile(refTimings);
2135         for (int i = 0; i < refTimings.size(); i++)
2136         {
2137             CV_Assert(refTimings[i] != 0.0);
2138         }
2139
2140         net.enableFusion(true);
2141         net.setPreferableBackend(backendId);
2142         net.setPreferableTarget(targetId);
2143         net.setInput(input);
2144         Mat outputTest = net.forward().clone();
2145         std::vector<double> testTimings;
2146         net.getPerfProfile(testTimings);
2147         for (int i = 0; i < testTimings.size(); i++)
2148         {
2149             if(std::find(expectedFusedLayers.begin(), expectedFusedLayers.end(), i + 1) != expectedFusedLayers.end())
2150             {
2151                 EXPECT_EQ(testTimings[i], 0.0);
2152             }
2153             else
2154             {
2155                 EXPECT_NE(testTimings[i], 0.0);
2156             }
2157         }
2158
2159         // double ref_max_value, ref_min_value;
2160         // minMaxLoc(outputReference.reshape(1, 1), &ref_min_value, &ref_max_value);
2161         // std::cout << "reference range: " << ref_min_value << ' ' << ref_max_value << std::endl;
2162
2163         double default_l1, default_lInf;
2164         DNNTestLayer::getDefaultThresholds(backendId, targetId, &default_l1, &default_lInf);
2165         if (l1 == 0.0)
2166             l1 = default_l1;
2167         if (lInf == 0.0)
2168             lInf = default_lInf;
2169         normAssert(outputReference, outputTest, "", l1, lInf);
2170     }
2171
2172     static testing::internal::ParamGenerator<std::string> eltwiseOpList()
2173     {
2174         // TODO: automate list generation
2175         return Values("sum", "max", "prod", "div");
2176     }
2177
2178     static testing::internal::ParamGenerator<std::string> activationLayersList()
2179     {
2180         // TODO: automate list generation
2181         return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power");
2182     }
2183
2184     static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsForFusionTests()
2185     {
2186         return dnnBackendsAndTargets(false, false, true, false); // OCV OpenCL + OCV CPU
2187     }
2188 };
2189
2190 typedef TestWithParam<tuple<bool, std::string, tuple<Backend, Target> > > ConvolutionActivationFusion;
2191 TEST_P(ConvolutionActivationFusion, Accuracy)
2192 {
2193     //          input
2194     //            |
2195     // -----------------------
2196     // |     convolution     |
2197     // -----------------------
2198     //            |
2199     // -----------------------
2200     // |     activation      |
2201     // -----------------------
2202     //            |
2203     //         output
2204
2205     const int batch_size = 2, in_channels = 16;
2206     const int in_height = 16, in_width = 16;
2207     int inputShape[] = {batch_size, in_channels, in_height, in_width};
2208     Mat input(4, &inputShape[0], CV_32F);
2209     randu(input, 1.0f, 2.0f);
2210
2211     bool bias_term = get<0>(GetParam());
2212     LayerParams convParams;
2213     TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
2214
2215     std::string actType = get<1>(GetParam());
2216     LayerParams activationParams;
2217     TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
2218
2219     Backend backendId = get<0>(get<2>(GetParam()));
2220     Target targetId = get<1>(get<2>(GetParam()));
2221
2222     // bug: https://github.com/opencv/opencv/issues/17964
2223     if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
2224         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
2225
2226     Net net;
2227     int convId = net.addLayer(convParams.name, convParams.type, convParams);
2228     int activId = net.addLayerToPrev(activationParams.name, activationParams.type, activationParams);
2229     net.connect(0, 0, convId, 0);
2230
2231     std::vector<int> expectedFusedLayers;
2232     if (backendId == DNN_BACKEND_OPENCV)
2233     {
2234         if (targetId == DNN_TARGET_CPU)
2235             expectedFusedLayers.push_back(activId); // all activations are fused
2236         else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
2237         {
2238             if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Power")
2239                 expectedFusedLayers.push_back(activId);
2240         }
2241     }
2242
2243     TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
2244 }
2245 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationFusion, Combine(
2246 /* bias */       testing::Bool(),
2247 /* activation */ TestLayerFusion::activationLayersList(),
2248                  TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
2249 ));
2250
2251 typedef TestWithParam<tuple<bool, std::string, bool, tuple<Backend, Target> > > ConvolutionEltwiseFusion;
2252 TEST_P(ConvolutionEltwiseFusion, Accuracy)
2253 {
2254     //                 input
2255     //                   |
2256     //    -------------------------------
2257     //    |                             |
2258     //    |                      ---------------
2259     //    |                      | convolution |
2260     //    |                      ---------------
2261     //    |                             |
2262     //    |       ----------------      |
2263     //    --------|  eltwise op  |-------
2264     //            ----------------
2265     //                   |
2266     //                 output
2267
2268     const int batch_size = 2, in_channels = 16;
2269     const int in_height = 16, in_width = 16;
2270     int inputShape[] = {batch_size, in_channels, in_height, in_width};
2271     Mat input(4, &inputShape[0], CV_32F);
2272     randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
2273
2274     bool bias_term = get<0>(GetParam());
2275     LayerParams convParams;
2276     TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
2277
2278     std::string eltwiseOp = get<1>(GetParam());
2279     bool weightedEltwise = get<2>(GetParam());
2280     if (eltwiseOp != "sum" && weightedEltwise)
2281             throw SkipTestException("weighted eltwise not supported");
2282     LayerParams eltwiseParams;
2283     TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
2284
2285     Net net;
2286     int convId = net.addLayer(convParams.name, convParams.type, convParams);
2287     int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
2288     net.connect(0, 0, convId, 0);
2289     net.connect(convId, 0, eltwiseId, 0);
2290     net.connect(0, 0, eltwiseId, 1);
2291
2292     Backend backendId = get<0>(get<3>(GetParam()));
2293     Target targetId = get<1>(get<3>(GetParam()));
2294     TestLayerFusion::test(input, net, backendId, targetId);
2295 }
2296 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseFusion, Combine(
2297 /* bias */              testing::Bool(),
2298 /* eltwise op */        TestLayerFusion::eltwiseOpList(),
2299 /* eltwise weighted */  testing::Bool(),
2300                         TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
2301 ));
2302
2303 typedef TestWithParam<tuple<bool, std::string, bool, std::string, tuple<Backend, Target> > > ConvolutionEltwiseActivationFusion;
2304 TEST_P(ConvolutionEltwiseActivationFusion, Accuracy)
2305 {
2306     //                 input
2307     //                   |
2308     //    -------------------------------
2309     //    |                             |
2310     //    |                      ---------------
2311     //    |                      | convolution |
2312     //    |                      ---------------
2313     //    |                             |
2314     //    |       ----------------      |
2315     //    --------|  eltwise op  |-------
2316     //            ----------------
2317     //                   |
2318     //            ----------------
2319     //            |  activation  |
2320     //            ----------------
2321     //                   |
2322     //                output
2323
2324     const int batch_size = 2, in_channels = 16;
2325     const int in_height = 16, in_width = 16;
2326     int inputShape[] = {batch_size, in_channels, in_height, in_width};
2327     Mat input(4, &inputShape[0], CV_32F);
2328     randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
2329
2330     bool bias_term = get<0>(GetParam());
2331     LayerParams convParams;
2332     TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
2333
2334     std::string eltwiseOp = get<1>(GetParam());
2335     bool weightedEltwise = get<2>(GetParam());
2336     if (eltwiseOp != "sum" && weightedEltwise)
2337             throw SkipTestException("weighted eltwise not supported");
2338     LayerParams eltwiseParams;
2339     TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
2340
2341     std::string actType = get<3>(GetParam());
2342     LayerParams activationParams;
2343     TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
2344
2345     Backend backendId = get<0>(get<4>(GetParam()));
2346     Target targetId = get<1>(get<4>(GetParam()));
2347
2348     // bug: https://github.com/opencv/opencv/issues/17945
2349     if ((eltwiseOp != "sum" || weightedEltwise) && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
2350         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
2351
2352     // bug: https://github.com/opencv/opencv/issues/17953
2353     if (eltwiseOp == "sum" && actType == "ChannelsPReLU" && bias_term == false &&
2354         backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
2355     {
2356         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
2357     }
2358
2359     // bug: https://github.com/opencv/opencv/issues/17964
2360     if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
2361         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
2362
2363     Net net;
2364     int convId = net.addLayer(convParams.name, convParams.type, convParams);
2365     int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
2366     int activId = net.addLayer(activationParams.name, activationParams.type, activationParams);
2367     net.connect(0, 0, convId, 0);
2368     net.connect(convId, 0, eltwiseId, 0);
2369     net.connect(0, 0, eltwiseId, 1);
2370     net.connect(eltwiseId, 0, activId, 0);
2371
2372     std::vector<int> expectedFusedLayers;
2373     if (backendId == DNN_BACKEND_OPENCV)
2374     {
2375         if (targetId == DNN_TARGET_CPU)
2376             expectedFusedLayers.push_back(activId); // activation is fused with eltwise layer
2377         else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
2378         {
2379             if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "Power")
2380             {
2381                 expectedFusedLayers.push_back(eltwiseId);
2382                 expectedFusedLayers.push_back(activId);
2383             }
2384         }
2385     }
2386
2387     TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
2388 }
2389 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseActivationFusion, Combine(
2390 /* bias */              testing::Bool(),
2391 /* eltwise op */        TestLayerFusion::eltwiseOpList(),
2392 /* eltwise weighted */  testing::Bool(),
2393 /* activation */        TestLayerFusion::activationLayersList(),
2394                         TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
2395 ));
2396
2397 typedef TestWithParam<tuple<bool, std::string, std::string, bool, tuple<Backend, Target> > > ConvolutionActivationEltwiseFusion;
2398 TEST_P(ConvolutionActivationEltwiseFusion, Accuracy)
2399 {
2400     //                 input
2401     //                   |
2402     //    -------------------------------
2403     //    |                             |
2404     //    |                     ----------------
2405     //    |                     |  convolution |
2406     //    |                     ----------------
2407     //    |                             |
2408     //    |                     ----------------
2409     //    |                     |  activation  |
2410     //    |                     ----------------
2411     //    |                             |
2412     //    |       ----------------      |
2413     //    --------| eltwise sum  |-------
2414     //            ----------------
2415     //                   |
2416
2417     const int batch_size = 2, in_channels = 16;
2418     const int in_height = 16, in_width = 16;
2419     int inputShape[] = {batch_size, in_channels, in_height, in_width};
2420     Mat input(4, &inputShape[0], CV_32F);
2421     randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
2422
2423     bool bias_term = get<0>(GetParam());
2424     LayerParams convParams;
2425     TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
2426
2427     std::string actType = get<1>(GetParam());
2428     LayerParams activationParams;
2429     TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
2430
2431     std::string eltwiseOp = get<2>(GetParam());
2432     bool weightedEltwise = get<3>(GetParam());
2433     if (eltwiseOp != "sum" && weightedEltwise)
2434             throw SkipTestException("weighted eltwise not supported");
2435     LayerParams eltwiseParams;
2436     TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
2437
2438     Backend backendId = get<0>(get<4>(GetParam()));
2439     Target targetId = get<1>(get<4>(GetParam()));
2440
2441     // bug: https://github.com/opencv/opencv/issues/17964
2442     if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
2443         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
2444
2445     // bug: https://github.com/opencv/opencv/issues/17953
2446     if (actType == "ChannelsPReLU" && bias_term == false &&
2447         backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
2448     {
2449         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
2450     }
2451
2452     Net net;
2453     int convId = net.addLayer(convParams.name, convParams.type, convParams);
2454     int activId = net.addLayer(activationParams.name, activationParams.type, activationParams);
2455     int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
2456     net.connect(0, 0, convId, 0);
2457     net.connect(convId, 0, activId, 0);
2458     net.connect(activId, 0, eltwiseId, 0);
2459     net.connect(0, 0, eltwiseId, 1);
2460
2461     std::vector<int> expectedFusedLayers;
2462     if (backendId == DNN_BACKEND_OPENCV)
2463     {
2464         if (targetId == DNN_TARGET_CPU)
2465             expectedFusedLayers.push_back(activId); // activation fused with convolution
2466         else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
2467         {
2468             if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Power")
2469                 expectedFusedLayers.push_back(activId); // activation fused with convolution
2470         }
2471     }
2472
2473     TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
2474 }
2475 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationEltwiseFusion, Combine(
2476 /* bias */              testing::Bool(),
2477 /* activation */        TestLayerFusion::activationLayersList(),
2478 /* eltwise op */        TestLayerFusion::eltwiseOpList(),
2479 /* eltwise weighted */  testing::Bool(),
2480                         TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
2481 ));
2482
2483 }} // namespace