modules/dnn/test/test_layers.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of the copyright holders may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "test_precomp.hpp"
  43 #include <opencv2/core/ocl.hpp>
  44 #include "npy_blob.hpp"
  45 #include <opencv2/dnn/shape_utils.hpp>
  46 #include <opencv2/dnn/all_layers.hpp>
  47 #include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
  48
  49 #ifdef HAVE_INF_ENGINE
  50 #include <thread>
  51 #endif
  52
  53 namespace opencv_test { namespace {
  54
  55 template<typename TString>
  56 static String _tf(TString filename)
  57 {
  58     String basetestdir = getOpenCVExtraDir();
  59     size_t len = basetestdir.size();
  60     if(len > 0 && basetestdir[len-1] != '/' && basetestdir[len-1] != '\\')
  61         return (basetestdir + "/dnn/layers") + filename;
  62     return (basetestdir + "dnn/layers/") + filename;
  63 }
  64
  65 void runLayer(Ptr<Layer> layer, std::vector<Mat> &inpBlobs, std::vector<Mat> &outBlobs)
  66 {
  67     size_t ninputs = inpBlobs.size();
  68     std::vector<Mat> inp(ninputs), outp, intp;
  69     std::vector<MatShape> inputs, outputs, internals;
  70
  71     for (size_t i = 0; i < ninputs; i++)
  72     {
  73         inp[i] = inpBlobs[i].clone();
  74         inputs.push_back(shape(inp[i]));
  75     }
  76
  77     layer->getMemoryShapes(inputs, 0, outputs, internals);
  78     for (size_t i = 0; i < outputs.size(); i++)
  79     {
  80         outp.push_back(Mat(outputs[i], CV_32F));
  81     }
  82     for (size_t i = 0; i < internals.size(); i++)
  83     {
  84         intp.push_back(Mat(internals[i], CV_32F));
  85     }
  86
  87     layer->finalize(inp, outp);
  88     layer->forward(inp, outp, intp);
  89
  90     size_t noutputs = outp.size();
  91     outBlobs.resize(noutputs);
  92     for (size_t i = 0; i < noutputs; i++)
  93         outBlobs[i] = outp[i];
  94 }
  95
  96 class Test_Caffe_layers : public DNNTestLayer
  97 {
  98 public:
  99     void testLayerUsingCaffeModels(const String& basename, bool useCaffeModel = false,
 100                                    bool useCommonInputBlob = true, double l1 = 0.0,
 101                                    double lInf = 0.0)
 102     {
 103         String prototxt = _tf(basename + ".prototxt");
 104         String caffemodel = _tf(basename + ".caffemodel");
 105
 106         String inpfile = (useCommonInputBlob) ? _tf("blob.npy") : _tf(basename + ".input.npy");
 107         String outfile = _tf(basename + ".npy");
 108
 109         Mat inp = blobFromNPY(inpfile);
 110         Mat ref = blobFromNPY(outfile);
 111         checkBackend(&inp, &ref);
 112
 113         Net net = readNetFromCaffe(prototxt, (useCaffeModel) ? caffemodel : String());
 114         ASSERT_FALSE(net.empty());
 115
 116         net.setPreferableBackend(backend);
 117         net.setPreferableTarget(target);
 118
 119         net.setInput(inp, "input");
 120         Mat out = net.forward("output");
 121
 122         normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
 123     }
 124 };
 125
 126 TEST_P(Test_Caffe_layers, Softmax)
 127 {
 128     testLayerUsingCaffeModels("layer_softmax");
 129 }
 130
 131 TEST_P(Test_Caffe_layers, LRN)
 132 {
 133     testLayerUsingCaffeModels("layer_lrn_spatial");
 134     testLayerUsingCaffeModels("layer_lrn_channels");
 135 }
 136
 137 TEST_P(Test_Caffe_layers, Convolution)
 138 {
 139     testLayerUsingCaffeModels("layer_convolution", true);
 140 }
 141
 142 TEST_P(Test_Caffe_layers, DeConvolution)
 143 {
 144     testLayerUsingCaffeModels("layer_deconvolution", true, false);
 145 }
 146
 147 TEST_P(Test_Caffe_layers, InnerProduct)
 148 {
 149     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
 150         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
 151     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 152         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 153     testLayerUsingCaffeModels("layer_inner_product", true);
 154 }
 155
 156 TEST_P(Test_Caffe_layers, Pooling_max)
 157 {
 158     testLayerUsingCaffeModels("layer_pooling_max");
 159 }
 160
 161 TEST_P(Test_Caffe_layers, Pooling_ave)
 162 {
 163     testLayerUsingCaffeModels("layer_pooling_ave");
 164 }
 165
 166 TEST_P(Test_Caffe_layers, MVN)
 167 {
 168     if(backend == DNN_BACKEND_CUDA)
 169         applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); /* MVN is unsupported */
 170
 171     testLayerUsingCaffeModels("layer_mvn");
 172 }
 173
 174 void testReshape(const MatShape& inputShape, const MatShape& targetShape,
 175                  int axis = 0, int num_axes = -1,
 176                  MatShape mask = MatShape())
 177 {
 178     LayerParams params;
 179     params.set("axis", axis);
 180     params.set("num_axes", num_axes);
 181     if (!mask.empty())
 182     {
 183         params.set("dim", DictValue::arrayInt<int*>(&mask[0], mask.size()));
 184     }
 185
 186     Mat inp(inputShape.size(), &inputShape[0], CV_32F);
 187     std::vector<Mat> inpVec(1, inp);
 188     std::vector<Mat> outVec, intVec;
 189
 190     Ptr<Layer> rl = LayerFactory::createLayerInstance("Reshape", params);
 191     runLayer(rl, inpVec, outVec);
 192
 193     Mat& out = outVec[0];
 194     MatShape shape(out.size.p, out.size.p + out.dims);
 195     EXPECT_EQ(shape, targetShape);
 196 }
 197
 198 TEST(Layer_Test_Reshape, Accuracy)
 199 {
 200     {
 201         int inp[] = {4, 3, 1, 2};
 202         int out[] = {4, 3, 2};
 203         testReshape(MatShape(inp, inp + 4), MatShape(out, out + 3), 2, 1);
 204     }
 205     {
 206         int inp[] = {1, 128, 4, 4};
 207         int out[] = {1, 2048};
 208         int mask[] = {-1, 2048};
 209         testReshape(MatShape(inp, inp + 4), MatShape(out, out + 2), 0, -1,
 210                     MatShape(mask, mask + 2));
 211     }
 212     {
 213         int inp[] = {1, 2, 3};
 214         int out[] = {3, 1, 2};
 215         int mask[] = {3, 1, 2};
 216         testReshape(MatShape(inp, inp + 3), MatShape(out, out + 3), 0, -1,
 217                     MatShape(mask, mask + 3));
 218     }
 219 }
 220
 221 TEST_P(Test_Caffe_layers, BatchNorm)
 222 {
 223     testLayerUsingCaffeModels("layer_batch_norm", true);
 224     testLayerUsingCaffeModels("layer_batch_norm_local_stats", true, false);
 225 }
 226
 227 TEST_P(Test_Caffe_layers, ReLU)
 228 {
 229     testLayerUsingCaffeModels("layer_relu");
 230 }
 231
 232 TEST_P(Test_Caffe_layers, Dropout)
 233 {
 234     testLayerUsingCaffeModels("layer_dropout");
 235 }
 236
 237 TEST_P(Test_Caffe_layers, Concat)
 238 {
 239 #if defined(INF_ENGINE_RELEASE)
 240 #if INF_ENGINE_VER_MAJOR_GE(2019010000) && INF_ENGINE_VER_MAJOR_LT(2019020000)
 241     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
 242         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_2019R1, CV_TEST_TAG_DNN_SKIP_IE_2019R1_1);
 243 #elif INF_ENGINE_VER_MAJOR_EQ(2019020000)
 244     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
 245         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2019R2);
 246 #endif
 247 #endif
 248     testLayerUsingCaffeModels("layer_concat");
 249     testLayerUsingCaffeModels("layer_concat_optim", true, false);
 250     testLayerUsingCaffeModels("layer_concat_shared_input", true, false);
 251 }
 252
 253 TEST_P(Test_Caffe_layers, Fused_Concat)
 254 {
 255     if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
 256         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
 257
 258     checkBackend();
 259
 260     // Test case
 261     // input
 262     //   |
 263     //   v
 264     // some_layer
 265     // |   |
 266     // v   v
 267     // concat
 268     Net net;
 269     int interLayer;
 270     {
 271         LayerParams lp;
 272         lp.type = "AbsVal";
 273         lp.name = "someLayer";
 274         interLayer = net.addLayerToPrev(lp.name, lp.type, lp);
 275     }
 276     {
 277         LayerParams lp;
 278         lp.set("axis", 1);
 279         lp.type = "Concat";
 280         lp.name = "testConcat";
 281         int id = net.addLayer(lp.name, lp.type, lp);
 282         net.connect(interLayer, 0, id, 0);
 283         net.connect(interLayer, 0, id, 1);
 284     }
 285     int shape[] = {1, 2, 3, 4};
 286     Mat input(4, shape, CV_32F);
 287     randu(input, 0.0f, 1.0f);  // [0, 1] to make AbsVal an identity transformation.
 288
 289     net.setInput(input);
 290     net.setPreferableBackend(backend);
 291     net.setPreferableTarget(target);
 292     Mat out = net.forward();
 293
 294     normAssert(slice(out, Range::all(), Range(0, 2), Range::all(), Range::all()), input, "", default_l1, default_lInf);
 295     normAssert(slice(out, Range::all(), Range(2, 4), Range::all(), Range::all()), input, "", default_l1, default_lInf);
 296 }
 297
 298 TEST_P(Test_Caffe_layers, Eltwise)
 299 {
 300     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
 301         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
 302     testLayerUsingCaffeModels("layer_eltwise");
 303 }
 304
 305 TEST_P(Test_Caffe_layers, PReLU)
 306 {
 307     testLayerUsingCaffeModels("layer_prelu", true);
 308 }
 309
 310 // TODO: fix an unstable test case
 311 TEST_P(Test_Caffe_layers, layer_prelu_fc)
 312 {
 313     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 314         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 315     // Reference output values are in range [-0.0001, 10.3906]
 316     double l1 = (target == DNN_TARGET_MYRIAD) ? 0.005 : 0.0;
 317     double lInf = (target == DNN_TARGET_MYRIAD) ? 0.021 : 0.0;
 318     testLayerUsingCaffeModels("layer_prelu_fc", true, false, l1, lInf);
 319 }
 320
 321 TEST_P(Test_Caffe_layers, Reshape_Split_Slice)
 322 {
 323     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
 324         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
 325
 326     Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt"));
 327     ASSERT_FALSE(net.empty());
 328
 329     net.setPreferableBackend(backend);
 330     net.setPreferableTarget(target);
 331
 332     Mat input(6, 12, CV_32F);
 333     RNG rng(0);
 334     rng.fill(input, RNG::UNIFORM, -1, 1);
 335
 336     net.setInput(input, "input");
 337     Mat output = net.forward("output");
 338
 339     normAssert(input, output, "", default_l1, default_lInf);
 340 }
 341
 342 TEST_P(Test_Caffe_layers, Conv_Elu)
 343 {
 344 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE <= 2018050000
 345     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
 346         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_2018R5);
 347 #endif
 348
 349     Net net = readNetFromTensorflow(_tf("layer_elu_model.pb"));
 350     ASSERT_FALSE(net.empty());
 351
 352     Mat inp = blobFromNPY(_tf("layer_elu_in.npy"));
 353     Mat ref = blobFromNPY(_tf("layer_elu_out.npy"));
 354
 355     net.setInput(inp, "input");
 356     net.setPreferableBackend(backend);
 357     net.setPreferableTarget(target);
 358     Mat out = net.forward();
 359
 360     normAssert(ref, out, "", default_l1, default_lInf);
 361 }
 362
 363 class Layer_LSTM_Test : public ::testing::Test
 364 {
 365 public:
 366     int numInp, numOut;
 367     Mat Wh, Wx, b;
 368     Ptr<LSTMLayer> layer;
 369     std::vector<Mat> inputs, outputs;
 370
 371     Layer_LSTM_Test() {}
 372
 373     void init(const MatShape &inpShape_, const MatShape &outShape_,
 374               bool produceCellOutput, bool useTimestampDim)
 375     {
 376         numInp = total(inpShape_);
 377         numOut = total(outShape_);
 378
 379         Wh = Mat::ones(4 * numOut, numOut, CV_32F);
 380         Wx = Mat::ones(4 * numOut, numInp, CV_32F);
 381         b  = Mat::ones(4 * numOut, 1, CV_32F);
 382
 383         LayerParams lp;
 384         lp.blobs.resize(3);
 385         lp.blobs[0] = Wh;
 386         lp.blobs[1] = Wx;
 387         lp.blobs[2] = b;
 388         lp.set<bool>("produce_cell_output", produceCellOutput);
 389         lp.set<bool>("use_timestamp_dim", useTimestampDim);
 390
 391         layer = LSTMLayer::create(lp);
 392         layer->setOutShape(outShape_);
 393     }
 394 };
 395
 396 TEST_F(Layer_LSTM_Test, get_set_test)
 397 {
 398     const int TN = 4;
 399     MatShape inpShape = shape(5, 3, 2);
 400     MatShape outShape = shape(3, 1, 2);
 401     MatShape inpResShape = concat(shape(TN), inpShape);
 402     MatShape outResShape = concat(shape(TN), outShape);
 403
 404     init(inpShape, outShape, true, false);
 405     layer->setOutShape(outShape);
 406
 407     Mat C((int)outResShape.size(), &outResShape[0], CV_32F);
 408     randu(C, -1., 1.);
 409     Mat H = C.clone();
 410     randu(H, -1., 1.);
 411
 412     Mat inp((int)inpResShape.size(), &inpResShape[0], CV_32F);
 413     randu(inp, -1., 1.);
 414
 415     inputs.push_back(inp);
 416     runLayer(layer, inputs, outputs);
 417
 418     EXPECT_EQ(2u, outputs.size());
 419
 420     print(outResShape, "outResShape");
 421     print(shape(outputs[0]), "out0");
 422     print(shape(outputs[0]), "out1");
 423
 424     EXPECT_EQ(outResShape, shape(outputs[0]));
 425     EXPECT_EQ(outResShape, shape(outputs[1]));
 426
 427     EXPECT_EQ(0, layer->inputNameToIndex("x"));
 428     EXPECT_EQ(0, layer->outputNameToIndex("h"));
 429     EXPECT_EQ(1, layer->outputNameToIndex("c"));
 430 }
 431
 432 TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
 433 {
 434     LayerParams lp;
 435     lp.blobs.resize(3);
 436     lp.blobs[0] = blobFromNPY(_tf("lstm.prototxt.w_2.npy"));  // Wh
 437     lp.blobs[1] = blobFromNPY(_tf("lstm.prototxt.w_0.npy"));  // Wx
 438     lp.blobs[2] = blobFromNPY(_tf("lstm.prototxt.w_1.npy"));  // bias
 439     Ptr<LSTMLayer> layer = LSTMLayer::create(lp);
 440
 441     Mat inp = blobFromNPY(_tf("recurrent.input.npy"));
 442     std::vector<Mat> inputs(1, inp), outputs;
 443     runLayer(layer, inputs, outputs);
 444
 445     Mat h_t_reference = blobFromNPY(_tf("lstm.prototxt.h_1.npy"));
 446     normAssert(h_t_reference, outputs[0]);
 447 }
 448
 449 TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
 450 {
 451     Ptr<RNNLayer> layer = RNNLayer::create(LayerParams());
 452
 453     layer->setWeights(
 454                 blobFromNPY(_tf("rnn.prototxt.w_0.npy")),
 455                 blobFromNPY(_tf("rnn.prototxt.w_1.npy")),
 456                 blobFromNPY(_tf("rnn.prototxt.w_2.npy")),
 457                 blobFromNPY(_tf("rnn.prototxt.w_3.npy")),
 458                 blobFromNPY(_tf("rnn.prototxt.w_4.npy")) );
 459
 460     std::vector<Mat> output, input(1, blobFromNPY(_tf("recurrent.input.npy")));
 461     runLayer(layer, input, output);
 462
 463     Mat h_ref = blobFromNPY(_tf("rnn.prototxt.h_1.npy"));
 464     normAssert(h_ref, output[0]);
 465 }
 466
 467 TEST(Layer_LSTM_Test_Accuracy_, Reverse)
 468 {
 469     // This handcrafted setup calculates (approximately) the prefix sum of the
 470     // input, assuming the inputs are suitably small.
 471     cv::Mat input(2, 1, CV_32FC1);
 472     input.at<float>(0, 0) = 1e-5f;
 473     input.at<float>(1, 0) = 2e-5f;
 474
 475     cv::Mat Wx(4, 1, CV_32FC1);
 476     Wx.at<float>(0, 0) = 0.f;  // Input gate
 477     Wx.at<float>(1, 0) = 0.f;  // Forget gate
 478     Wx.at<float>(2, 0) = 0.f;  // Output gate
 479     Wx.at<float>(3, 0) = 1.f;  // Update signal
 480
 481     cv::Mat Wh(4, 1, CV_32FC1);
 482     Wh.at<float>(0, 0) = 0.f;  // Input gate
 483     Wh.at<float>(1, 0) = 0.f;  // Forget gate
 484     Wh.at<float>(2, 0) = 0.f;  // Output gate
 485     Wh.at<float>(3, 0) = 0.f;  // Update signal
 486
 487     cv::Mat bias(4, 1, CV_32FC1);
 488     bias.at<float>(0, 0) = 1e10f;  // Input gate - always allows input to c
 489     bias.at<float>(1, 0) = 1e10f;  // Forget gate - never forget anything on c
 490     bias.at<float>(2, 0) = 1e10f;  // Output gate - always output everything
 491     bias.at<float>(3, 0) = 0.f;  // Update signal
 492
 493     LayerParams lp;
 494     lp.set("reverse", true);
 495     lp.set("use_timestamp_dim", true);
 496     lp.blobs.clear();
 497     lp.blobs.push_back(Wh);
 498     lp.blobs.push_back(Wx);
 499     lp.blobs.push_back(bias);
 500
 501     cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp);
 502     std::vector<cv::Mat> outputs;
 503     std::vector<cv::Mat> inputs;
 504     inputs.push_back(input);
 505     runLayer(layer, inputs, outputs);
 506
 507     ASSERT_EQ(1, outputs.size());
 508     cv::Mat out = outputs[0];
 509     ASSERT_EQ(3, out.dims);
 510     ASSERT_EQ(shape(2, 1, 1), shape(out));
 511     float* data = reinterpret_cast<float*>(out.data);
 512     EXPECT_NEAR(std::tanh(1e-5f) + std::tanh(2e-5f), data[0], 1e-10);
 513     EXPECT_NEAR(std::tanh(2e-5f), data[1], 1e-10);
 514 }
 515
 516
 517 class Layer_RNN_Test : public ::testing::Test
 518 {
 519 public:
 520     int nX, nH, nO, nT, nS;
 521     Mat Whh, Wxh, bh, Who, bo;
 522     Ptr<RNNLayer> layer;
 523
 524     std::vector<Mat> inputs, outputs;
 525
 526     Layer_RNN_Test()
 527     {
 528         nT = 3;
 529         nS = 5;
 530         nX = 31;
 531         nH = 64;
 532         nO = 100;
 533
 534         Whh = Mat::ones(nH, nH, CV_32F);
 535         Wxh = Mat::ones(nH, nX, CV_32F);
 536         bh  = Mat::ones(nH, 1, CV_32F);
 537         Who = Mat::ones(nO, nH, CV_32F);
 538         bo  = Mat::ones(nO, 1, CV_32F);
 539
 540         layer = RNNLayer::create(LayerParams());
 541         layer->setProduceHiddenOutput(true);
 542         layer->setWeights(Wxh, bh, Whh, Who, bo);
 543     }
 544 };
 545
 546 TEST_F(Layer_RNN_Test, get_set_test)
 547 {
 548     int sz[] = { nT, nS, 1, nX };
 549     Mat inp(4, sz, CV_32F);
 550     randu(inp, -1., 1.);
 551     inputs.push_back(inp);
 552     runLayer(layer, inputs, outputs);
 553
 554     EXPECT_EQ(outputs.size(), 2u);
 555     EXPECT_EQ(shape(outputs[0]), shape(nT, nS, nO));
 556     EXPECT_EQ(shape(outputs[1]), shape(nT, nS, nH));
 557 }
 558
 559 TEST(Layer_Test_ROIPooling, Accuracy)
 560 {
 561     Net net = readNetFromCaffe(_tf("net_roi_pooling.prototxt"));
 562
 563     Mat inp = blobFromNPY(_tf("net_roi_pooling.input.npy"));
 564     Mat rois = blobFromNPY(_tf("net_roi_pooling.rois.npy"));
 565     Mat ref = blobFromNPY(_tf("net_roi_pooling.npy"));
 566
 567     net.setInput(inp, "input");
 568     net.setInput(rois, "rois");
 569     net.setPreferableBackend(DNN_BACKEND_OPENCV);
 570
 571     Mat out = net.forward();
 572
 573     normAssert(out, ref);
 574 }
 575
 576 TEST_P(Test_Caffe_layers, FasterRCNN_Proposal)
 577 {
 578     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
 579         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
 580     if (backend == DNN_BACKEND_INFERENCE_ENGINE)
 581         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
 582     if(backend == DNN_BACKEND_CUDA)
 583         applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); /* Proposal layer is unsupported */
 584
 585     Net net = readNetFromCaffe(_tf("net_faster_rcnn_proposal.prototxt"));
 586
 587     Mat scores = blobFromNPY(_tf("net_faster_rcnn_proposal.scores.npy"));
 588     Mat deltas = blobFromNPY(_tf("net_faster_rcnn_proposal.deltas.npy"));
 589     Mat imInfo = (Mat_<float>(1, 3) << 600, 800, 1.6f);
 590
 591     net.setInput(scores, "rpn_cls_prob_reshape");
 592     net.setInput(deltas, "rpn_bbox_pred");
 593     net.setInput(imInfo, "im_info");
 594
 595     std::vector<Mat> outs;
 596     net.setPreferableBackend(backend);
 597     net.setPreferableTarget(target);
 598     net.forward(outs, "output");
 599
 600     for (int i = 0; i < 2; ++i)
 601     {
 602         Mat ref = blobFromNPY(_tf(i == 0 ? "net_faster_rcnn_proposal.out_rois.npy" :
 603                                            "net_faster_rcnn_proposal.out_scores.npy"));
 604         const int numDets = ref.size[0];
 605         EXPECT_LE(numDets, outs[i].size[0]);
 606         normAssert(outs[i].rowRange(0, numDets), ref);
 607
 608         if (numDets < outs[i].size[0])
 609         {
 610             EXPECT_EQ(countNonZero(outs[i].rowRange(numDets, outs[i].size[0])), 0);
 611         }
 612     }
 613 }
 614
 615 typedef testing::TestWithParam<tuple<Vec4i, Vec2i, bool> > Scale_untrainable;
 616 TEST_P(Scale_untrainable, Accuracy)
 617 {
 618     Vec4i inpShapeVec = get<0>(GetParam());
 619     int axis = get<1>(GetParam())[0];
 620     int weightsDims = get<1>(GetParam())[1];
 621     bool testFusion = get<2>(GetParam());
 622     const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
 623
 624     // Create a network with two inputs. Scale layer multiplies a first input to
 625     // a second one. See http://caffe.berkeleyvision.org/tutorial/layers/scale.html
 626     Net net;
 627     // Check that this version of Scale layer won't be fused with Convolution layer.
 628     if (testFusion)
 629     {
 630         LayerParams lp;
 631         lp.set("kernel_size", 1);
 632         lp.set("num_output", 3);
 633         lp.set("group", 3);
 634         lp.set("bias_term", false);
 635         lp.type = "Convolution";
 636         lp.name = "testConv";
 637
 638         std::vector<int> weightsShape(4);
 639         weightsShape[0] = 3;  // #outChannels
 640         weightsShape[1] = 1;  // #inpChannels / group
 641         weightsShape[2] = 1;  // height
 642         weightsShape[3] = 1;  // width
 643         Mat weights(weightsShape, CV_32F);
 644         weights.setTo(1);
 645         lp.blobs.push_back(weights);
 646         net.addLayerToPrev(lp.name, lp.type, lp);
 647     }
 648     LayerParams lp;
 649     lp.type = "Scale";
 650     lp.name = "testLayer";
 651     lp.set("axis", axis);
 652     int id = net.addLayerToPrev(lp.name, lp.type, lp);
 653     net.connect(0, 1, id, 1);
 654
 655     Mat input(4, inpShape, CV_32F);
 656     Mat weights(weightsDims, &inpShape[axis], CV_32F);
 657     randu(input, -1, 1);
 658     randu(weights, -1, 1);
 659
 660     std::vector<String> inpNames(2);
 661     inpNames[0] = "scale_input";
 662     inpNames[1] = "scale_weights";
 663     net.setInputsNames(inpNames);
 664     net.setInput(input, inpNames[0]);
 665     net.setInput(weights, inpNames[1]);
 666     net.setPreferableBackend(DNN_BACKEND_OPENCV);
 667     Mat out = net.forward();
 668
 669     Mat ref(input.dims, input.size, CV_32F);
 670     float* inpData = (float*)input.data;
 671     float* refData = (float*)ref.data;
 672     float* weightsData = (float*)weights.data;
 673     int spatialSize = 1;
 674     for (int i = axis + weightsDims; i < 4; ++i)
 675         spatialSize *= inpShape[i];
 676     for (int i = 0; i < ref.total(); ++i)
 677     {
 678         float w = weightsData[(i / spatialSize) % weights.total()];
 679         refData[i] = inpData[i] * w;
 680     }
 681     normAssert(out, ref);
 682 }
 683
 684 INSTANTIATE_TEST_CASE_P(Layer_Test, Scale_untrainable, Combine(
 685 /*input size*/   Values(Vec4i(2, 3, 4, 5)),
 686 /*axis, #dims*/  Values(Vec2i(0, 1), Vec2i(0, 2), Vec2i(0, 3), Vec2i(0, 4),
 687                                      Vec2i(1, 1), Vec2i(1, 2), Vec2i(1, 3),
 688                                                   Vec2i(2, 1), Vec2i(2, 2),
 689                                                                Vec2i(3, 1)),
 690 /*conv fusion*/  testing::Bool()
 691 ));
 692
 693 typedef testing::TestWithParam<tuple<Vec4i, Vec4i, int, int, int> > Crop;
 694 TEST_P(Crop, Accuracy)
 695 {
 696     Vec4i inpShapeVec = get<0>(GetParam());
 697     Vec4i sizShapeVec = get<1>(GetParam());
 698     int axis = get<2>(GetParam());
 699     int numOffsets = get<3>(GetParam());
 700     int offsetVal = get<4>(GetParam());
 701     const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
 702     const int sizShape[] = {sizShapeVec[0], sizShapeVec[1], sizShapeVec[2], sizShapeVec[3]};
 703
 704     // Create a network with two inputs. Crop layer crops a first input to
 705     // the size of a second one.
 706     // See http://caffe.berkeleyvision.org/tutorial/layers/crop.html
 707     Net net;
 708
 709     LayerParams lp;
 710     lp.name = "testCrop";
 711     lp.type = "Crop";
 712     lp.set("axis", axis);
 713     if (numOffsets > 0)
 714     {
 715         std::vector<int> offsets(numOffsets, offsetVal);
 716         lp.set("offset", DictValue::arrayInt<int*>(&offsets[0], offsets.size()));
 717     }
 718     else
 719         offsetVal = 0;
 720     int id = net.addLayerToPrev(lp.name, lp.type, lp);
 721     net.connect(0, 1, id, 1);
 722
 723     Mat inpImage(4, inpShape, CV_32F);
 724     Mat sizImage(4, sizShape, CV_32F);
 725     randu(inpImage, -1, 1);
 726     randu(sizImage, -1, 1);
 727
 728     std::vector<String> inpNames(2);
 729     inpNames[0] = "cropImage";
 730     inpNames[1] = "sizImage";
 731     net.setInputsNames(inpNames);
 732     net.setInput(inpImage, inpNames[0]);
 733     net.setInput(sizImage, inpNames[1]);
 734     net.setPreferableBackend(DNN_BACKEND_OPENCV);
 735
 736     // There are a few conditions that represent invalid input to the crop
 737     // layer, so in those cases we want to verify an exception is thrown.
 738
 739     bool shouldThrowException = false;
 740     if (numOffsets > 1 && numOffsets != 4 - axis)
 741         shouldThrowException = true;
 742     else
 743         for (int i = axis; i < 4; i++)
 744             if (sizShape[i] + offsetVal > inpShape[i])
 745                 shouldThrowException = true;
 746
 747     Mat out;
 748     if (shouldThrowException)
 749     {
 750         ASSERT_ANY_THROW(out = net.forward());
 751         return;
 752     }
 753     else
 754         out = net.forward();
 755
 756     // Finally, compare the cropped output blob from the DNN layer (out)
 757     // to a reference blob (ref) that we compute here.
 758
 759     std::vector<Range> crop_range;
 760     crop_range.resize(4, Range::all());
 761     for (int i = axis; i < 4; i++)
 762         crop_range[i] = Range(offsetVal, sizShape[i] + offsetVal);
 763
 764     Mat ref(sizImage.dims, sizImage.size, CV_32F);
 765     inpImage(&crop_range[0]).copyTo(ref);
 766     normAssert(out, ref);
 767 }
 768
 769 INSTANTIATE_TEST_CASE_P(Layer_Test, Crop, Combine(
 770 /*input blob shape*/    Values(Vec4i(1, 3, 20, 30)),
 771 /*cropsize blob shape*/ Values(Vec4i(1, 3, 10, 12)),
 772 /*start axis*/          Values(0, 1, 2),
 773 /*number of offsets*/   Values(0, 1, 2, 4),
 774 /*offset value*/        Values(3, 4)
 775 ));
 776
 777 // Check that by default average pooling layer should not count zero padded values
 778 // into the normalization area.
 779 TEST_P(Test_Caffe_layers, Average_pooling_kernel_area)
 780 {
 781     LayerParams lp;
 782     lp.name = "testAvePool";
 783     lp.type = "Pooling";
 784     lp.set("kernel_size", 2);
 785     lp.set("stride", 2);
 786     lp.set("pool", "AVE");
 787
 788     Net net;
 789     net.addLayerToPrev(lp.name, lp.type, lp);
 790     // 1 2 | 3
 791     // 4 5 | 6
 792     // ----+--
 793     // 7 8 | 9
 794     Mat inp = (Mat_<float>(3, 3) << 1, 2, 3, 4, 5, 6, 7, 8, 9);
 795     Mat ref = (Mat_<float>(2, 2) << (1 + 2 + 4 + 5) / 4.f, (3 + 6) / 2.f, (7 + 8) / 2.f, 9);
 796     Mat tmp = blobFromImage(inp);
 797     net.setInput(blobFromImage(inp));
 798     net.setPreferableBackend(backend);
 799     net.setPreferableTarget(target);
 800     Mat out = net.forward();
 801     normAssert(out, blobFromImage(ref));
 802 }
 803
 804 TEST_P(Test_Caffe_layers, PriorBox_repeated)
 805 {
 806     Net net = readNet(_tf("prior_box.prototxt"));
 807     int inp_size[] = {1, 3, 10, 10};
 808     int shape_size[] = {1, 2, 3, 4};
 809     Mat inp(4, inp_size, CV_32F);
 810     randu(inp, -1.0f, 1.0f);
 811     Mat shape(4, shape_size, CV_32F);
 812     randu(shape, -1.0f, 1.0f);
 813     net.setInput(inp, "data");
 814     net.setInput(shape, "shape");
 815     Mat out = net.forward();
 816     Mat ref = blobFromNPY(_tf("priorbox_output.npy"));
 817     normAssert(out, ref, "");
 818 }
 819
 820 // Test PriorBoxLayer in case of no aspect ratios (just squared proposals).
 821 TEST_P(Test_Caffe_layers, PriorBox_squares)
 822 {
 823     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
 824         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
 825     LayerParams lp;
 826     lp.name = "testPriorBox";
 827     lp.type = "PriorBox";
 828     lp.set("min_size", 2);
 829     lp.set("flip", true);
 830     lp.set("clip", true);
 831     float variance[] = {0.1f, 0.1f, 0.2f, 0.2f};
 832     float aspectRatios[] = {1.0f};  // That should be ignored.
 833     lp.set("variance", DictValue::arrayReal<float*>(&variance[0], 4));
 834     lp.set("aspect_ratio", DictValue::arrayReal<float*>(&aspectRatios[0], 1));
 835
 836     Net net;
 837     int id = net.addLayerToPrev(lp.name, lp.type, lp);
 838     net.connect(0, 0, id, 1);  // The second input is an input image. Shapes are used for boxes normalization.
 839     Mat inp(1, 2, CV_32F);
 840     randu(inp, -1, 1);
 841     net.setInput(blobFromImage(inp));
 842     net.setPreferableBackend(backend);
 843     net.setPreferableTarget(target);
 844     Mat out = net.forward();
 845
 846     Mat ref = (Mat_<float>(4, 4) << 0.0, 0.0, 0.75, 1.0,
 847                                        0.25, 0.0, 1.0, 1.0,
 848                                        0.1f, 0.1f, 0.2f, 0.2f,
 849                                        0.1f, 0.1f, 0.2f, 0.2f);
 850     double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 2e-5 : 1e-5;
 851     normAssert(out.reshape(1, 4), ref, "", l1);
 852 }
 853
 854 typedef TestWithParam<tuple<int, int> > Layer_Test_DWconv_Prelu;
 855 TEST_P(Layer_Test_DWconv_Prelu, Accuracy)
 856 {
 857     // Test case
 858     // input       img size 3x16x16  value all 1
 859     //   |
 860     //   v
 861     // dw_conv     weight[0]=-1 weight[1]=-2 weight[2]=-3   bias={1,2,3}
 862     //   |
 863     //   v
 864     // prelu       weight={1,2,3}
 865     //   |
 866     //   v
 867     // output      out size 3x14x14  if right: out[0]=-8 out[0]=-32 out[0]=-72
 868     //             but current opencv output: out[0]=-24 out[0]=-48 out[0]=-72
 869
 870     const int num_input = get<0>(GetParam());   //inpChannels
 871     const int group = 3;                        //outChannels=group when group>1
 872     const int num_output = get<1>(GetParam());
 873     const int kernel_depth = num_input/group;
 874     CV_Assert_N(num_output >= group, num_output % group == 0, num_input % group == 0);
 875
 876     Net net;
 877     //layer 1: dwconv
 878     LayerParams lp;
 879     lp.name = "dwconv";
 880     lp.type = "Convolution";
 881     lp.set("kernel_size", 3);
 882     lp.set("num_output", num_output);
 883     lp.set("pad", 0);
 884     lp.set("group", group);
 885     lp.set("stride", 1);
 886     lp.set("engine", "CAFFE");
 887     lp.set("bias_term", "true");
 888
 889     std::vector<int> weightsShape(4);
 890     weightsShape[0] = num_output;   // #outChannels
 891     weightsShape[1] = kernel_depth; // #inpChannels / group
 892     weightsShape[2] = 3;            // height
 893     weightsShape[3] = 3;            // width
 894     Mat weights(weightsShape, CV_32F, Scalar(1));
 895
 896     //assign weights
 897     for (int i = 0; i < weightsShape[0]; ++i)
 898     {
 899         for (int j = 0; j < weightsShape[1]; ++j)
 900         {
 901             for (int k = 0; k < weightsShape[2]; ++k)
 902             {
 903                 for (int l = 0; l < weightsShape[3]; ++l)
 904                 {
 905                     weights.ptr<float>(i, j, k)[l]=-1*(i+1);
 906                 }
 907             }
 908         }
 909     }
 910     lp.blobs.push_back(weights);
 911
 912     //assign bias
 913     Mat bias(1, num_output, CV_32F, Scalar(1));
 914     for (int i = 0; i < 1; ++i)
 915     {
 916         for (int j = 0; j < num_output; ++j)
 917         {
 918             bias.ptr<float>(i)[j]=j+1;
 919         }
 920     }
 921     lp.blobs.push_back(bias);
 922     net.addLayerToPrev(lp.name, lp.type, lp);
 923
 924     //layer 2: prelu
 925     LayerParams lpr;
 926     lpr.name = "dw_relu";
 927     lpr.type = "PReLU";
 928     Mat weightsp(1, num_output, CV_32F, Scalar(1));
 929
 930     //assign weights
 931     for (int i = 0; i < 1; ++i)
 932     {
 933         for (int j = 0; j < num_output; ++j)
 934         {
 935             weightsp.ptr<float>(i)[j]=j+1;
 936         }
 937     }
 938
 939     lpr.blobs.push_back(weightsp);
 940     net.addLayerToPrev(lpr.name, lpr.type, lpr);
 941
 942     int shape[] = {1, num_input, 16, 16};
 943     Mat in_blob(4, &shape[0], CV_32FC1, Scalar(1));
 944
 945     net.setPreferableBackend(DNN_BACKEND_OPENCV);
 946     net.setInput(in_blob);
 947     Mat out = net.forward();
 948
 949     //assign target
 950     std::vector<int> outShape(4);
 951     outShape[0] = 1;
 952     outShape[1] = num_output;       // outChannels
 953     outShape[2] = 14;          // height
 954     outShape[3] = 14;          // width
 955     Mat target(outShape, CV_32F, Scalar(1));
 956     for (int i = 0; i < outShape[0]; ++i)
 957     {
 958         for (int j = 0; j < outShape[1]; ++j)
 959         {
 960             for (int k = 0; k < outShape[2]; ++k)
 961             {
 962                 for (int l = 0; l < outShape[3]; ++l)
 963                 {
 964                     target.ptr<float>(i, j, k)[l]=(-9*kernel_depth*(j+1)+j+1)*(j+1);
 965                 }
 966             }
 967         }
 968     }
 969
 970     normAssert(out, target);
 971 }
 972 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_DWconv_Prelu, Combine(Values(3, 6), Values(3, 6)));
 973
 974 #ifdef HAVE_INF_ENGINE
 975 // Using Intel's Model Optimizer generate .xml and .bin files:
 976 // ./ModelOptimizer -w /path/to/caffemodel -d /path/to/prototxt \
 977 //                  -p FP32 -i -b ${batch_size} -o /path/to/output/folder
 978 typedef testing::TestWithParam<Target> Layer_Test_Convolution_DLDT;
 979 TEST_P(Layer_Test_Convolution_DLDT, Accuracy)
 980 {
 981     Target targetId = GetParam();
 982
 983     std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
 984     Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt"));
 985     Net net = readNet(_tf("layer_convolution" + suffix + ".xml"), _tf("layer_convolution" + suffix + ".bin"));
 986
 987     Mat inp = blobFromNPY(_tf("blob.npy"));
 988
 989     netDefault.setInput(inp);
 990     netDefault.setPreferableBackend(DNN_BACKEND_OPENCV);
 991     Mat outDefault = netDefault.forward();
 992
 993     net.setInput(inp);
 994     net.setPreferableTarget(targetId);
 995
 996     Mat out = net.forward();
 997
 998     double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-3 : 1e-5;
 999     double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.8e-2 : 1e-4;
1000     normAssert(outDefault, out, "", l1, lInf);
1001
1002     std::vector<int> outLayers = net.getUnconnectedOutLayers();
1003     ASSERT_EQ(net.getLayer(outLayers[0])->name, "output");
1004     ASSERT_EQ(net.getLayer(outLayers[0])->type, "Convolution");
1005 }
1006
1007 TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8)
1008 {
1009     Target targetId = GetParam();
1010     Mat inp = blobFromNPY(_tf("blob.npy"));
1011
1012     Mat inputs[] = {Mat(inp.dims, inp.size, CV_8U), Mat()};
1013     randu(inputs[0], 0, 255);
1014     inputs[0].convertTo(inputs[1], CV_32F);
1015
1016     std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
1017
1018     Mat outs[2];
1019     for (int i = 0; i < 2; ++i)
1020     {
1021         Net net = readNet(_tf("layer_convolution" + suffix + ".xml"), _tf("layer_convolution" + suffix + ".bin"));
1022         net.setPreferableTarget(targetId);
1023         net.setInput(inputs[i]);
1024         outs[i] = net.forward();
1025         ASSERT_EQ(outs[i].type(), CV_32F);
1026     }
1027     if (targetId != DNN_TARGET_MYRIAD)
1028         normAssert(outs[0], outs[1]);
1029 }
1030
1031 TEST_P(Layer_Test_Convolution_DLDT, multithreading)
1032 {
1033     Target targetId = GetParam();
1034     std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
1035     std::string xmlPath = _tf("layer_convolution" + suffix + ".xml");
1036     std::string binPath = _tf("layer_convolution" + suffix + ".bin");
1037     Net firstNet = readNet(xmlPath, binPath);
1038     Net secondNet = readNet(xmlPath, binPath);
1039     Mat inp = blobFromNPY(_tf("blob.npy"));
1040
1041     firstNet.setInput(inp);
1042     secondNet.setInput(inp);
1043     firstNet.setPreferableTarget(targetId);
1044     secondNet.setPreferableTarget(targetId);
1045
1046     Mat out1, out2;
1047     std::thread t1([&]{out1 = firstNet.forward();});
1048     std::thread t2([&]{out2 = secondNet.forward();});
1049
1050     t1.join();
1051     t2.join();
1052
1053     Mat ref = blobFromNPY(_tf("layer_convolution.npy"));
1054     double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-3 : 1e-5;
1055     double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.8e-2 : 1e-4;
1056     normAssert(out1, ref, "first thread", l1, lInf);
1057     normAssert(out2, ref, "second thread", l1, lInf);
1058 }
1059
1060 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Convolution_DLDT,
1061     testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE)));
1062
1063 // 1. Create a .prototxt file with the following network:
1064 // layer {
1065 //   type: "Input" name: "data" top: "data"
1066 //   input_param { shape { dim: 1 dim: 2 dim: 3 } }
1067 // }
1068 // layer {
1069 //   type: "Input" name: "second_input" top: "second_input"
1070 //   input_param { shape { dim: 1 dim: 2 dim: 3 } }
1071 // }
1072 // layer {
1073 //  type: "Eltwise" name: "output" top: "output"
1074 //  bottom: "data" bottom: "second_input"
1075 //  eltwise_param { operation: SUM }
1076 // }
1077 //
1078 // 2. Create a .caffemodel file using Caffe:
1079 //
1080 // import caffe
1081 // net = caffe.Net('/path/to/prototxt', caffe.TEST)
1082 // net.save('/path/to/caffemodel')
1083 //
1084 // 3. Convert using ModelOptimizer.
1085 typedef testing::TestWithParam<tuple<int, int, Target, std::vector<int> > > Test_DLDT_two_inputs_3dim;
1086 TEST_P(Test_DLDT_two_inputs_3dim, as_IR)
1087 {
1088     int firstInpType = get<0>(GetParam());
1089     int secondInpType = get<1>(GetParam());
1090     Target targetId = get<2>(GetParam());
1091
1092     std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
1093     Net net = readNet(_tf("net_two_inputs" + suffix + ".xml"), _tf("net_two_inputs.bin"));
1094     std::vector<int> inpSize = get<3>(GetParam());
1095     Mat firstInp(3, inpSize.data(), firstInpType);
1096     Mat secondInp(3, inpSize.data(), secondInpType);
1097     randu(firstInp, 0, 255);
1098     randu(secondInp, 0, 255);
1099
1100     net.setInput(firstInp, "data");
1101     net.setInput(secondInp, "second_input");
1102     net.setPreferableTarget(targetId);
1103
1104     double l1 = ((targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) &&
1105                  (firstInpType == CV_32F || secondInpType == CV_32F)) ? 0.06 : 0.0;
1106     double lInf = ((targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) &&
1107                    (firstInpType == CV_32F || secondInpType == CV_32F)) ? 0.23 : 0.0;
1108
1109     Mat out = net.forward();
1110
1111     Mat ref;
1112     cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
1113     normAssert(out, ref, "", l1, lInf);
1114 }
1115
1116 std::vector< std::vector<int> > list_sizes{ {1, 2, 3}, {3, 2, 1}, {5, 5, 5}, {13, 7, 11} };
1117
1118 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs_3dim, Combine(
1119   Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
1120   testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE)),
1121   testing::ValuesIn(list_sizes)
1122 ));
1123
1124 typedef testing::TestWithParam<tuple<int, int, Target> > Test_DLDT_two_inputs;
1125 TEST_P(Test_DLDT_two_inputs, as_backend)
1126 {
1127     static const float kScale = 0.5f;
1128     static const float kScaleInv = 1.0f / kScale;
1129
1130     Target targetId = get<2>(GetParam());
1131
1132     Net net;
1133     LayerParams lp;
1134     lp.type = "Eltwise";
1135     lp.name = "testLayer";
1136     lp.set("operation", "sum");
1137     int eltwiseId = net.addLayerToPrev(lp.name, lp.type, lp);  // connect to a first input
1138     net.connect(0, 1, eltwiseId, 1);  // connect to a second input
1139
1140     int inpSize[] = {1, 2, 3, 4};
1141     Mat firstInp(4, &inpSize[0], get<0>(GetParam()));
1142     Mat secondInp(4, &inpSize[0], get<1>(GetParam()));
1143     randu(firstInp, 0, 255);
1144     randu(secondInp, 0, 255);
1145
1146     net.setInputsNames({"data", "second_input"});
1147     net.setInput(firstInp, "data", kScale);
1148     net.setInput(secondInp, "second_input", kScaleInv);
1149     net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
1150     net.setPreferableTarget(targetId);
1151     Mat out = net.forward();
1152
1153     Mat ref;
1154     addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F);
1155     // Output values are in range [0, 637.5].
1156     double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6;
1157     double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5;
1158     normAssert(out, ref, "", l1, lInf);
1159 }
1160
1161 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs, Combine(
1162   Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
1163   testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE))
1164 ));
1165
1166 class UnsupportedLayer : public Layer
1167 {
1168 public:
1169     UnsupportedLayer(const LayerParams &params) : Layer(params) {}
1170
1171     static Ptr<Layer> create(const LayerParams& params)
1172     {
1173         return Ptr<Layer>(new UnsupportedLayer(params));
1174     }
1175
1176     virtual bool supportBackend(int backendId) CV_OVERRIDE
1177     {
1178         return backendId == DNN_BACKEND_OPENCV;
1179     }
1180
1181     virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals) CV_OVERRIDE {}
1182 };
1183
1184 TEST(Test_DLDT, fused_output)
1185 {
1186     static const int kNumChannels = 3;
1187     CV_DNN_REGISTER_LAYER_CLASS(Unsupported, UnsupportedLayer);
1188     Net net;
1189     {
1190         LayerParams lp;
1191         lp.set("kernel_size", 1);
1192         lp.set("num_output", 3);
1193         lp.set("bias_term", false);
1194         lp.type = "Convolution";
1195         lp.name = "testConv";
1196         lp.blobs.push_back(Mat({kNumChannels, 1, 1, 1}, CV_32F, Scalar(1)));
1197         net.addLayerToPrev(lp.name, lp.type, lp);
1198     }
1199     {
1200         LayerParams lp;
1201         lp.set("bias_term", false);
1202         lp.type = "Scale";
1203         lp.name = "testScale";
1204         lp.blobs.push_back(Mat({kNumChannels}, CV_32F, Scalar(1)));
1205         net.addLayerToPrev(lp.name, lp.type, lp);
1206     }
1207     {
1208         LayerParams lp;
1209         net.addLayerToPrev("unsupported_layer", "Unsupported", lp);
1210     }
1211     net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
1212     net.setInput(Mat({1, 1, 1, 1}, CV_32FC1, Scalar(1)));
1213     ASSERT_NO_THROW(net.forward());
1214     LayerFactory::unregisterLayer("Unsupported");
1215 }
1216
1217 TEST(Test_DLDT, multiple_networks)
1218 {
1219     Net nets[2];
1220     for (int i = 0; i < 2; ++i)
1221     {
1222         nets[i].setInputsNames(std::vector<String>(1, format("input_%d", i)));
1223
1224         LayerParams lp;
1225         lp.set("kernel_size", 1);
1226         lp.set("num_output", 1);
1227         lp.set("bias_term", false);
1228         lp.type = "Convolution";
1229         lp.name = format("testConv_%d", i);
1230         lp.blobs.push_back(Mat({1, 1, 1, 1}, CV_32F, Scalar(1 + i)));
1231         nets[i].addLayerToPrev(lp.name, lp.type, lp);
1232         nets[i].setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
1233         nets[i].setInput(Mat({1, 1, 1, 1}, CV_32FC1, Scalar(1)));
1234     }
1235     Mat out_1 = nets[0].forward();
1236     Mat out_2 = nets[1].forward();
1237     // After the second model is initialized we try to receive an output from the first network again.
1238     out_1 = nets[0].forward();
1239     normAssert(2 * out_1, out_2);
1240 }
1241 #endif  // HAVE_INF_ENGINE
1242
1243 // Test a custom layer.
1244 class CustomInterpLayer CV_FINAL : public Layer
1245 {
1246 public:
1247     CustomInterpLayer(const LayerParams &params) : Layer(params)
1248     {
1249         zoomFactor = params.get<int>("zoom_factor", 0);
1250         outWidth = params.get<int>("width", 0);
1251         outHeight = params.get<int>("height", 0);
1252     }
1253
1254     static Ptr<Layer> create(LayerParams& params)
1255     {
1256         return Ptr<Layer>(new CustomInterpLayer(params));
1257     }
1258
1259     virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
1260                                  const int requiredOutputs,
1261                                  std::vector<std::vector<int> > &outputs,
1262                                  std::vector<std::vector<int> > &internals) const CV_OVERRIDE
1263     {
1264         const int batchSize = inputs[0][0];
1265         const int numChannels = inputs[0][1];
1266         const int inpHeight = inputs[0][2];
1267         const int inpWidth = inputs[0][3];
1268
1269         std::vector<int> outShape(4);
1270         outShape[0] = batchSize;
1271         outShape[1] = numChannels;
1272         outShape[2] = outHeight != 0 ? outHeight : (inpHeight + (inpHeight - 1) * (zoomFactor - 1));
1273         outShape[3] = outWidth != 0 ? outWidth : (inpWidth + (inpWidth - 1) * (zoomFactor - 1));
1274         outputs.assign(1, outShape);
1275         return false;
1276     }
1277
1278     virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
1279     {
1280         std::vector<Mat> outputs;
1281         outputs_arr.getMatVector(outputs);
1282
1283         if (!outWidth && !outHeight)
1284         {
1285             outHeight = outputs[0].size[2];
1286             outWidth = outputs[0].size[3];
1287         }
1288     }
1289
1290     // Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp
1291     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
1292     {
1293         CV_TRACE_FUNCTION();
1294         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
1295
1296         if (inputs_arr.depth() == CV_16S)
1297         {
1298             forward_fallback(inputs_arr, outputs_arr, internals_arr);
1299             return;
1300         }
1301
1302         std::vector<Mat> inputs, outputs;
1303         inputs_arr.getMatVector(inputs);
1304         outputs_arr.getMatVector(outputs);
1305
1306         Mat& inp = inputs[0];
1307         Mat& out = outputs[0];
1308         const float* inpData = (float*)inp.data;
1309         float* outData = (float*)out.data;
1310
1311         const int batchSize = inp.size[0];
1312         const int numChannels = inp.size[1];
1313         const int inpHeight = inp.size[2];
1314         const int inpWidth = inp.size[3];
1315
1316         const float rheight = (outHeight > 1) ? static_cast<float>(inpHeight - 1) / (outHeight - 1) : 0.f;
1317         const float rwidth = (outWidth > 1) ? static_cast<float>(inpWidth - 1) / (outWidth - 1) : 0.f;
1318         for (int h2 = 0; h2 < outHeight; ++h2)
1319         {
1320             const float h1r = rheight * h2;
1321             const int h1 = h1r;
1322             const int h1p = (h1 < inpHeight - 1) ? 1 : 0;
1323             const float h1lambda = h1r - h1;
1324             const float h0lambda = 1.f - h1lambda;
1325             for (int w2 = 0; w2 < outWidth; ++w2)
1326             {
1327                 const float w1r = rwidth * w2;
1328                 const int w1 = w1r;
1329                 const int w1p = (w1 < inpWidth - 1) ? 1 : 0;
1330                 const float w1lambda = w1r - w1;
1331                 const float w0lambda = 1.f - w1lambda;
1332                 const float* pos1 = inpData + h1 * inpWidth + w1;
1333                 float* pos2 = outData + h2 * outWidth + w2;
1334                 for (int c = 0; c < batchSize * numChannels; ++c)
1335                 {
1336                     pos2[0] =
1337                       h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
1338                       h1lambda * (w0lambda * pos1[h1p * inpWidth] + w1lambda * pos1[h1p * inpWidth + w1p]);
1339                     pos1 += inpWidth * inpHeight;
1340                     pos2 += outWidth * outHeight;
1341                 }
1342             }
1343         }
1344     }
1345
1346 private:
1347     int outWidth, outHeight, zoomFactor;
1348 };
1349
1350 #ifndef OPENCV_DNN_EXTERNAL_PROTOBUF
1351 TEST_P(Test_Caffe_layers, Interp)
1352 #else
1353 TEST_P(Test_Caffe_layers, DISABLED_Interp)  // requires patched protobuf (available in OpenCV source tree only)
1354 #endif
1355 {
1356     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
1357         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
1358
1359     // Test a custom layer.
1360     CV_DNN_REGISTER_LAYER_CLASS(Interp, CustomInterpLayer);
1361     try
1362     {
1363         testLayerUsingCaffeModels("layer_interp", false, false);
1364     }
1365     catch (...)
1366     {
1367         LayerFactory::unregisterLayer("Interp");
1368         throw;
1369     }
1370     LayerFactory::unregisterLayer("Interp");
1371
1372     // Test an implemented layer.
1373     testLayerUsingCaffeModels("layer_interp", false, false);
1374 }
1375
1376 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_Caffe_layers, dnnBackendsAndTargets());
1377
1378 TEST(Layer_Test_PoolingIndices, Accuracy)
1379 {
1380     Net net;
1381
1382     LayerParams lp;
1383     lp.set("pool", "max");
1384     lp.set("kernel_w", 2);
1385     lp.set("kernel_h", 2);
1386     lp.set("stride_w", 2);
1387     lp.set("stride_h", 2);
1388     lp.set("pad_w", 0);
1389     lp.set("pad_h", 0);
1390     lp.name = "testLayer.name";  // This test also checks that OpenCV lets use names with dots.
1391     lp.type = "Pooling";
1392     net.addLayerToPrev(lp.name, lp.type, lp);
1393
1394     Mat inp(10, 10, CV_8U);
1395     randu(inp, 0, 255);
1396
1397     Mat maxValues(5, 5, CV_32F, Scalar(-1)), indices(5, 5, CV_32F, Scalar(-1));
1398     for (int y = 0; y < 10; ++y)
1399     {
1400         int dstY = y / 2;
1401         for (int x = 0; x < 10; ++x)
1402         {
1403             int dstX = x / 2;
1404             uint8_t val = inp.at<uint8_t>(y, x);
1405             if ((float)inp.at<uint8_t>(y, x) > maxValues.at<float>(dstY, dstX))
1406             {
1407                 maxValues.at<float>(dstY, dstX) = val;
1408                 indices.at<float>(dstY, dstX) = y * 10 + x;
1409             }
1410         }
1411     }
1412     net.setPreferableBackend(DNN_BACKEND_OPENCV);
1413     net.setInput(blobFromImage(inp));
1414
1415     std::vector<Mat> outputs;
1416     net.forward(outputs, lp.name);
1417     normAssert(maxValues, outputs[0].reshape(1, 5));
1418     normAssert(indices, outputs[1].reshape(1, 5));
1419 }
1420
1421 typedef testing::TestWithParam<tuple<Vec4i, int, tuple<Backend, Target> > > Layer_Test_ShuffleChannel;
1422 TEST_P(Layer_Test_ShuffleChannel, Accuracy)
1423 {
1424     Vec4i inpShapeVec = get<0>(GetParam());
1425     int group = get<1>(GetParam());
1426     ASSERT_EQ(inpShapeVec[1] % group, 0);
1427     const int groupSize = inpShapeVec[1] / group;
1428     int backendId = get<0>(get<2>(GetParam()));
1429     int targetId = get<1>(get<2>(GetParam()));
1430
1431     Net net;
1432     LayerParams lp;
1433     lp.set("group", group);
1434     lp.type = "ShuffleChannel";
1435     lp.name = "testLayer";
1436     net.addLayerToPrev(lp.name, lp.type, lp);
1437
1438     const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
1439     Mat inp(4, inpShape, CV_32F);
1440     randu(inp, 0, 255);
1441
1442     net.setInput(inp);
1443     net.setPreferableBackend(backendId);
1444     net.setPreferableTarget(targetId);
1445     Mat out = net.forward();
1446
1447     double l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 5e-2 : 1e-5;
1448     double lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 7e-2 : 1e-4;
1449     for (int n = 0; n < inpShapeVec[0]; ++n)
1450     {
1451         for (int c = 0; c < inpShapeVec[1]; ++c)
1452         {
1453             Mat outChannel = getPlane(out, n, c);
1454             Mat inpChannel = getPlane(inp, n, groupSize * (c % group) + c / group);
1455             normAssert(outChannel, inpChannel, "", l1, lInf);
1456         }
1457     }
1458 }
1459 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_ShuffleChannel, Combine(
1460 /*input shape*/  Values(Vec4i(1, 6, 5, 7), Vec4i(3, 12, 1, 4)),
1461 /*group*/        Values(1, 2, 3, 6), dnnBackendsAndTargets(/*with IE*/ false)
1462 ));
1463
1464 // Check if relu is not fused to convolution if we requested it's output
1465 TEST(Layer_Test_Convolution, relu_fusion)
1466 {
1467     Net net;
1468     {
1469         LayerParams lp;
1470         lp.set("kernel_size", 1);
1471         lp.set("num_output", 1);
1472         lp.set("bias_term", false);
1473         lp.type = "Convolution";
1474         lp.name = "testConv";
1475
1476         int weightsShape[] = {1, 1, 1, 1};
1477         Mat weights(4, &weightsShape[0], CV_32F, Scalar(1));
1478         lp.blobs.push_back(weights);
1479         net.addLayerToPrev(lp.name, lp.type, lp);
1480     }
1481     {
1482         LayerParams lp;
1483         lp.type = "ReLU";
1484         lp.name = "testReLU";
1485         net.addLayerToPrev(lp.name, lp.type, lp);
1486     }
1487     int sz[] = {1, 1, 2, 3};
1488     Mat input(4, &sz[0], CV_32F);
1489     randu(input, -1.0, -0.1);
1490     net.setInput(input);
1491     net.setPreferableBackend(DNN_BACKEND_OPENCV);
1492     Mat output = net.forward("testConv");
1493     normAssert(input, output);
1494 }
1495
1496 }} // namespace