modules/dnn/test/test_tf_importer.cpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 // Copyright (C) 2017, Intel Corporation, all rights reserved.
   6 // Third party copyrights are property of their respective owners.
   7
   8 /*
   9 Test for Tensorflow models loading
  10 */
  11
  12 #include "test_precomp.hpp"
  13 #include "npy_blob.hpp"
  14
  15 #include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
  16
  17 namespace opencv_test
  18 {
  19
  20 using namespace cv;
  21 using namespace cv::dnn;
  22
  23 template<typename TString>
  24 static std::string _tf(TString filename)
  25 {
  26     return (getOpenCVExtraDir() + "/dnn/") + filename;
  27 }
  28
  29 TEST(Test_TensorFlow, read_inception)
  30 {
  31     Net net;
  32     {
  33         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
  34         net = readNetFromTensorflow(model);
  35         ASSERT_FALSE(net.empty());
  36     }
  37
  38     Mat sample = imread(_tf("grace_hopper_227.png"));
  39     ASSERT_TRUE(!sample.empty());
  40     Mat input;
  41     resize(sample, input, Size(224, 224));
  42     input -= 128; // mean sub
  43
  44     Mat inputBlob = blobFromImage(input);
  45
  46     net.setInput(inputBlob, "input");
  47     Mat out = net.forward("softmax2");
  48
  49     std::cout << out.dims << std::endl;
  50 }
  51
  52 TEST(Test_TensorFlow, inception_accuracy)
  53 {
  54     Net net;
  55     {
  56         const string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
  57         net = readNetFromTensorflow(model);
  58         ASSERT_FALSE(net.empty());
  59     }
  60
  61     Mat sample = imread(_tf("grace_hopper_227.png"));
  62     ASSERT_TRUE(!sample.empty());
  63     resize(sample, sample, Size(224, 224));
  64     Mat inputBlob = blobFromImage(sample);
  65
  66     net.setInput(inputBlob, "input");
  67     Mat out = net.forward("softmax2");
  68
  69     Mat ref = blobFromNPY(_tf("tf_inception_prob.npy"));
  70
  71     normAssert(ref, out);
  72 }
  73
  74 static std::string path(const std::string& file)
  75 {
  76     return findDataFile("dnn/tensorflow/" + file, false);
  77 }
  78
  79 static void runTensorFlowNet(const std::string& prefix, int targetId = DNN_TARGET_CPU, bool hasText = false,
  80                              double l1 = 1e-5, double lInf = 1e-4,
  81                              bool memoryLoad = false)
  82 {
  83     std::string netPath = path(prefix + "_net.pb");
  84     std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : "");
  85     std::string inpPath = path(prefix + "_in.npy");
  86     std::string outPath = path(prefix + "_out.npy");
  87
  88     Net net;
  89     if (memoryLoad)
  90     {
  91         // Load files into a memory buffers
  92         string dataModel;
  93         ASSERT_TRUE(readFileInMemory(netPath, dataModel));
  94
  95         string dataConfig;
  96         if (hasText)
  97             ASSERT_TRUE(readFileInMemory(netConfig, dataConfig));
  98
  99         net = readNetFromTensorflow(dataModel.c_str(), dataModel.size(),
 100                                     dataConfig.c_str(), dataConfig.size());
 101     }
 102     else
 103         net = readNetFromTensorflow(netPath, netConfig);
 104
 105     ASSERT_FALSE(net.empty());
 106
 107     net.setPreferableBackend(DNN_BACKEND_DEFAULT);
 108     net.setPreferableTarget(targetId);
 109
 110     cv::Mat input = blobFromNPY(inpPath);
 111     cv::Mat target = blobFromNPY(outPath);
 112
 113     net.setInput(input);
 114     cv::Mat output = net.forward();
 115     normAssert(target, output, "", l1, lInf);
 116 }
 117
 118 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_layers;
 119
 120 TEST_P(Test_TensorFlow_layers, conv)
 121 {
 122     int targetId = GetParam();
 123     runTensorFlowNet("single_conv", targetId);
 124     runTensorFlowNet("atrous_conv2d_valid", targetId);
 125     runTensorFlowNet("atrous_conv2d_same", targetId);
 126     runTensorFlowNet("depthwise_conv2d", targetId);
 127 }
 128
 129 TEST_P(Test_TensorFlow_layers, padding)
 130 {
 131     int targetId = GetParam();
 132     runTensorFlowNet("padding_same", targetId);
 133     runTensorFlowNet("padding_valid", targetId);
 134     runTensorFlowNet("spatial_padding", targetId);
 135 }
 136
 137 TEST_P(Test_TensorFlow_layers, eltwise_add_mul)
 138 {
 139     runTensorFlowNet("eltwise_add_mul", GetParam());
 140 }
 141
 142 TEST_P(Test_TensorFlow_layers, pad_and_concat)
 143 {
 144     runTensorFlowNet("pad_and_concat", GetParam());
 145 }
 146
 147 TEST_P(Test_TensorFlow_layers, batch_norm)
 148 {
 149     int targetId = GetParam();
 150     runTensorFlowNet("batch_norm", targetId);
 151     runTensorFlowNet("fused_batch_norm", targetId);
 152     runTensorFlowNet("batch_norm_text", targetId, true);
 153     runTensorFlowNet("mvn_batch_norm", targetId);
 154     runTensorFlowNet("mvn_batch_norm_1x1", targetId);
 155     runTensorFlowNet("unfused_batch_norm", targetId);
 156     runTensorFlowNet("fused_batch_norm_no_gamma", targetId);
 157     runTensorFlowNet("unfused_batch_norm_no_gamma", targetId);
 158 }
 159
 160 TEST_P(Test_TensorFlow_layers, pooling)
 161 {
 162     int targetId = GetParam();
 163     runTensorFlowNet("max_pool_even", targetId);
 164     runTensorFlowNet("max_pool_odd_valid", targetId);
 165     runTensorFlowNet("ave_pool_same", targetId);
 166     runTensorFlowNet("max_pool_odd_same", targetId);
 167     runTensorFlowNet("reduce_mean", targetId);  // an average pooling over all spatial dimensions.
 168 }
 169
 170 TEST_P(Test_TensorFlow_layers, deconvolution)
 171 {
 172     int targetId = GetParam();
 173     runTensorFlowNet("deconvolution", targetId);
 174     runTensorFlowNet("deconvolution_same", targetId);
 175     runTensorFlowNet("deconvolution_stride_2_same", targetId);
 176     runTensorFlowNet("deconvolution_adj_pad_valid", targetId);
 177     runTensorFlowNet("deconvolution_adj_pad_same", targetId);
 178     runTensorFlowNet("keras_deconv_valid", targetId);
 179     runTensorFlowNet("keras_deconv_same", targetId);
 180 }
 181
 182 TEST_P(Test_TensorFlow_layers, matmul)
 183 {
 184     int targetId = GetParam();
 185     runTensorFlowNet("matmul", targetId);
 186     runTensorFlowNet("nhwc_reshape_matmul", targetId);
 187     runTensorFlowNet("nhwc_transpose_reshape_matmul", targetId);
 188 }
 189
 190 TEST_P(Test_TensorFlow_layers, reshape)
 191 {
 192     int targetId = GetParam();
 193     runTensorFlowNet("shift_reshape_no_reorder", targetId);
 194     runTensorFlowNet("reshape_reduce", targetId);
 195     runTensorFlowNet("flatten", targetId, true);
 196     runTensorFlowNet("unfused_flatten", targetId);
 197     runTensorFlowNet("unfused_flatten_unknown_batch", targetId);
 198 }
 199
 200 TEST_P(Test_TensorFlow_layers, l2_normalize)
 201 {
 202     int targetId = GetParam();
 203     runTensorFlowNet("l2_normalize", targetId);
 204     runTensorFlowNet("l2_normalize_3d", targetId);
 205 }
 206
 207 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets());
 208
 209 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets;
 210
 211 TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
 212 {
 213     std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false);
 214     std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false);
 215     std::string imgPath = findDataFile("dnn/street.png", false);
 216
 217     Mat inp;
 218     resize(imread(imgPath), inp, Size(300, 300));
 219     inp = blobFromImage(inp, 1.0f / 127.5, Size(), Scalar(127.5, 127.5, 127.5), true);
 220
 221     std::vector<String> outNames(3);
 222     outNames[0] = "concat";
 223     outNames[1] = "concat_1";
 224     outNames[2] = "detection_out";
 225
 226     std::vector<Mat> target(outNames.size());
 227     for (int i = 0; i < outNames.size(); ++i)
 228     {
 229         std::string path = findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco." + outNames[i] + ".npy", false);
 230         target[i] = blobFromNPY(path);
 231     }
 232
 233     Net net = readNetFromTensorflow(netPath, netConfig);
 234
 235     net.setPreferableTarget(GetParam());
 236
 237     net.setInput(inp);
 238
 239     std::vector<Mat> output;
 240     net.forward(output, outNames);
 241
 242     normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4);
 243     normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4);
 244     normAssertDetections(target[2], output[2], "", 0.2);
 245 }
 246
 247 TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
 248 {
 249     std::string proto = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", false);
 250     std::string model = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pb", false);
 251
 252     Net net = readNetFromTensorflow(model, proto);
 253     Mat img = imread(findDataFile("dnn/street.png", false));
 254     Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
 255
 256     net.setPreferableTarget(GetParam());
 257
 258     net.setInput(blob);
 259     // Output has shape 1x1xNx7 where N - number of detections.
 260     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
 261     Mat out = net.forward();
 262     Mat ref = (Mat_<float>(5, 7) << 0, 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729,
 263                                     0, 3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131,
 264                                     0, 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
 265                                     0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
 266                                     0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
 267     normAssertDetections(ref, out, "", 0.5);
 268 }
 269
 270 TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
 271 {
 272     std::string proto = findDataFile("dnn/opencv_face_detector.pbtxt", false);
 273     std::string model = findDataFile("dnn/opencv_face_detector_uint8.pb", false);
 274
 275     Net net = readNetFromTensorflow(model, proto);
 276     Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
 277     Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
 278
 279     net.setPreferableTarget(GetParam());
 280
 281     net.setInput(blob);
 282     // Output has shape 1x1xNx7 where N - number of detections.
 283     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
 284     Mat out = net.forward();
 285
 286     // References are from test for Caffe model.
 287     Mat ref = (Mat_<float>(6, 7) << 0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
 288                                     0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
 289                                     0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
 290                                     0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
 291                                     0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
 292                                     0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
 293     normAssertDetections(ref, out, "", 0.9, 3.4e-3, 1e-2);
 294 }
 295
 296 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, availableDnnTargets());
 297
 298 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_fp16;
 299
 300 TEST_P(Test_TensorFlow_fp16, tests)
 301 {
 302     int targetId = GetParam();
 303     const float l1 = 7e-4;
 304     const float lInf = 1e-2;
 305     runTensorFlowNet("fp16_single_conv", targetId, false, l1, lInf);
 306     runTensorFlowNet("fp16_deconvolution", targetId, false, l1, lInf);
 307     runTensorFlowNet("fp16_max_pool_odd_same", targetId, false, l1, lInf);
 308     runTensorFlowNet("fp16_padding_valid", targetId, false, l1, lInf);
 309     runTensorFlowNet("fp16_eltwise_add_mul", targetId, false, l1, lInf);
 310     runTensorFlowNet("fp16_max_pool_odd_valid", targetId, false, l1, lInf);
 311     runTensorFlowNet("fp16_pad_and_concat", targetId, false, l1, lInf);
 312     runTensorFlowNet("fp16_max_pool_even", targetId, false, l1, lInf);
 313     runTensorFlowNet("fp16_padding_same", targetId, false, l1, lInf);
 314 }
 315
 316 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_fp16,
 317                         Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16));
 318
 319 TEST(Test_TensorFlow, defun)
 320 {
 321     runTensorFlowNet("defun_dropout");
 322 }
 323
 324 TEST(Test_TensorFlow, quantized)
 325 {
 326     runTensorFlowNet("uint8_single_conv");
 327 }
 328
 329 TEST(Test_TensorFlow, lstm)
 330 {
 331     runTensorFlowNet("lstm", DNN_TARGET_CPU, true);
 332 }
 333
 334 TEST(Test_TensorFlow, split)
 335 {
 336     runTensorFlowNet("split_equals");
 337 }
 338
 339 TEST(Test_TensorFlow, resize_nearest_neighbor)
 340 {
 341     runTensorFlowNet("resize_nearest_neighbor");
 342 }
 343
 344 TEST(Test_TensorFlow, slice)
 345 {
 346     runTensorFlowNet("slice_4d");
 347 }
 348
 349 TEST(Test_TensorFlow, softmax)
 350 {
 351     runTensorFlowNet("keras_softmax");
 352 }
 353
 354 TEST(Test_TensorFlow, relu6)
 355 {
 356     runTensorFlowNet("keras_relu6");
 357 }
 358
 359 TEST(Test_TensorFlow, keras_mobilenet_head)
 360 {
 361     runTensorFlowNet("keras_mobilenet_head");
 362 }
 363
 364 TEST(Test_TensorFlow, memory_read)
 365 {
 366     double l1 = 1e-5;
 367     double lInf = 1e-4;
 368     runTensorFlowNet("lstm", DNN_TARGET_CPU, true, l1, lInf, true);
 369
 370     runTensorFlowNet("batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
 371     runTensorFlowNet("fused_batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
 372     runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
 373 }
 374
 375 // Test a custom layer.
 376 class ResizeBilinearLayer CV_FINAL : public Layer
 377 {
 378 public:
 379     ResizeBilinearLayer(const LayerParams &params) : Layer(params),
 380         outWidth(0), outHeight(0), factorWidth(1), factorHeight(1)
 381     {
 382         CV_Assert(!params.get<bool>("align_corners", false));
 383         CV_Assert(!blobs.empty());
 384
 385         for (size_t i = 0; i < blobs.size(); ++i)
 386             CV_Assert(blobs[i].type() == CV_32SC1);
 387
 388         if (blobs.size() == 1)
 389         {
 390             CV_Assert(blobs[0].total() == 2);
 391             outHeight = blobs[0].at<int>(0, 0);
 392             outWidth = blobs[0].at<int>(0, 1);
 393         }
 394         else
 395         {
 396             CV_Assert(blobs.size() == 2, blobs[0].total() == 1, blobs[1].total() == 1);
 397             factorHeight = blobs[0].at<int>(0, 0);
 398             factorWidth = blobs[1].at<int>(0, 0);
 399             outHeight = outWidth = 0;
 400         }
 401     }
 402
 403     static Ptr<Layer> create(LayerParams& params)
 404     {
 405         return Ptr<Layer>(new ResizeBilinearLayer(params));
 406     }
 407
 408     virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
 409                                  const int requiredOutputs,
 410                                  std::vector<std::vector<int> > &outputs,
 411                                  std::vector<std::vector<int> > &internals) const CV_OVERRIDE
 412     {
 413         std::vector<int> outShape(4);
 414         outShape[0] = inputs[0][0];  // batch size
 415         outShape[1] = inputs[0][1];  // number of channels
 416         outShape[2] = outHeight != 0 ? outHeight : (inputs[0][2] * factorHeight);
 417         outShape[3] = outWidth != 0 ? outWidth : (inputs[0][3] * factorWidth);
 418         outputs.assign(1, outShape);
 419         return false;
 420     }
 421
 422     virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
 423     {
 424         if (!outWidth && !outHeight)
 425         {
 426             outHeight = outputs[0].size[2];
 427             outWidth = outputs[0].size[3];
 428         }
 429     }
 430
 431     // This implementation is based on a reference implementation from
 432     // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
 433     virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
 434     {
 435         Mat& inp = *inputs[0];
 436         Mat& out = outputs[0];
 437         const float* inpData = (float*)inp.data;
 438         float* outData = (float*)out.data;
 439
 440         const int batchSize = inp.size[0];
 441         const int numChannels = inp.size[1];
 442         const int inpHeight = inp.size[2];
 443         const int inpWidth = inp.size[3];
 444
 445         float heightScale = static_cast<float>(inpHeight) / outHeight;
 446         float widthScale = static_cast<float>(inpWidth) / outWidth;
 447         for (int b = 0; b < batchSize; ++b)
 448         {
 449             for (int y = 0; y < outHeight; ++y)
 450             {
 451                 float input_y = y * heightScale;
 452                 int y0 = static_cast<int>(std::floor(input_y));
 453                 int y1 = std::min(y0 + 1, inpHeight - 1);
 454                 for (int x = 0; x < outWidth; ++x)
 455                 {
 456                     float input_x = x * widthScale;
 457                     int x0 = static_cast<int>(std::floor(input_x));
 458                     int x1 = std::min(x0 + 1, inpWidth - 1);
 459                     for (int c = 0; c < numChannels; ++c)
 460                     {
 461                         float interpolation =
 462                             inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) +
 463                             inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) +
 464                             inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) +
 465                             inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0);
 466                         outData[offset(out.size, c, x, y, b)] = interpolation;
 467                     }
 468                 }
 469             }
 470         }
 471     }
 472
 473     virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
 474
 475 private:
 476     static inline int offset(const MatSize& size, int c, int x, int y, int b)
 477     {
 478         return x + size[3] * (y + size[2] * (c + size[1] * b));
 479     }
 480
 481     int outWidth, outHeight, factorWidth, factorHeight;
 482 };
 483
 484 TEST(Test_TensorFlow, resize_bilinear)
 485 {
 486     CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
 487     runTensorFlowNet("resize_bilinear");
 488     runTensorFlowNet("resize_bilinear_factor");
 489     LayerFactory::unregisterLayer("ResizeBilinear");
 490 }
 491
 492 // inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png')
 493 // inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3)
 494 // outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'),
 495 //                  sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')],
 496 //                 feed_dict={'input_images:0': inp})
 497 // scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2))
 498 // geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2))
 499 // np.save('east_text_detection.scores.npy', scores)
 500 // np.save('east_text_detection.geometry.npy', geometry)
 501 TEST(Test_TensorFlow, EAST_text_detection)
 502 {
 503     CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
 504     std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
 505     std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false);
 506     std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false);
 507     std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false);
 508
 509     Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false));
 510
 511     Mat img = imread(imgPath);
 512     Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false);
 513     net.setInput(inp);
 514
 515     std::vector<Mat> outs;
 516     std::vector<String> outNames(2);
 517     outNames[0] = "feature_fusion/Conv_7/Sigmoid";
 518     outNames[1] = "feature_fusion/concat_3";
 519     net.forward(outs, outNames);
 520
 521     Mat scores = outs[0];
 522     Mat geometry = outs[1];
 523
 524     normAssert(scores, blobFromNPY(refScoresPath), "scores");
 525     normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3);
 526     LayerFactory::unregisterLayer("ResizeBilinear");
 527 }
 528
 529 }