modules/dnn/test/test_darknet_importer.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //                        (3-clause BSD License)
  13 //
  14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 // * Redistributions of source code must retain the above copyright notice,
  21 // this list of conditions and the following disclaimer.
  22 //
  23 // * Redistributions in binary form must reproduce the above copyright notice,
  24 // this list of conditions and the following disclaimer in the documentation
  25 // and/or other materials provided with the distribution.
  26 //
  27 // * Neither the names of the copyright holders nor the names of the contributors
  28 // may be used to endorse or promote products derived from this software
  29 // without specific prior written permission.
  30 //
  31 // This software is provided by the copyright holders and contributors "as is" and
  32 // any express or implied warranties, including, but not limited to, the implied
  33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34 // In no event shall copyright holders or contributors be liable for any direct,
  35 // indirect, incidental, special, exemplary, or consequential damages
  36 // (including, but not limited to, procurement of substitute goods or services;
  37 // loss of use, data, or profits; or business interruption) however caused
  38 // and on any theory of liability, whether in contract, strict liability,
  39 // or tort (including negligence or otherwise) arising in any way out of
  40 // the use of this software, even if advised of the possibility of such damage.
  41 //
  42 //M*/
  43
  44 #include "test_precomp.hpp"
  45 #include "npy_blob.hpp"
  46 #include <opencv2/dnn/shape_utils.hpp>
  47
  48 namespace opencv_test { namespace {
  49
  50 template<typename TString>
  51 static std::string _tf(TString filename)
  52 {
  53     return (getOpenCVExtraDir() + "/dnn/") + filename;
  54 }
  55
  56 static std::vector<String> getOutputsNames(const Net& net)
  57 {
  58     std::vector<String> names;
  59     std::vector<int> outLayers = net.getUnconnectedOutLayers();
  60     std::vector<String> layersNames = net.getLayerNames();
  61     names.resize(outLayers.size());
  62     for (size_t i = 0; i < outLayers.size(); ++i)
  63           names[i] = layersNames[outLayers[i] - 1];
  64     return names;
  65 }
  66
  67 TEST(Test_Darknet, read_tiny_yolo_voc)
  68 {
  69     Net net = readNetFromDarknet(_tf("tiny-yolo-voc.cfg"));
  70     ASSERT_FALSE(net.empty());
  71 }
  72
  73 TEST(Test_Darknet, read_yolo_voc)
  74 {
  75     Net net = readNetFromDarknet(_tf("yolo-voc.cfg"));
  76     ASSERT_FALSE(net.empty());
  77 }
  78
  79 TEST(Test_Darknet, read_yolo_voc_stream)
  80 {
  81     applyTestTag(CV_TEST_TAG_MEMORY_1GB);
  82     Mat ref;
  83     Mat sample = imread(_tf("dog416.png"));
  84     Mat inp = blobFromImage(sample, 1.0/255, Size(416, 416), Scalar(), true, false);
  85     const std::string cfgFile = findDataFile("dnn/yolo-voc.cfg");
  86     const std::string weightsFile = findDataFile("dnn/yolo-voc.weights", false);
  87     // Import by paths.
  88     {
  89         Net net = readNetFromDarknet(cfgFile, weightsFile);
  90         net.setInput(inp);
  91         net.setPreferableBackend(DNN_BACKEND_OPENCV);
  92         ref = net.forward();
  93     }
  94     // Import from bytes array.
  95     {
  96         std::vector<char> cfg, weights;
  97         readFileContent(cfgFile, cfg);
  98         readFileContent(weightsFile, weights);
  99
 100         Net net = readNetFromDarknet(cfg.data(), cfg.size(), weights.data(), weights.size());
 101         net.setInput(inp);
 102         net.setPreferableBackend(DNN_BACKEND_OPENCV);
 103         Mat out = net.forward();
 104         normAssert(ref, out);
 105     }
 106 }
 107
 108 class Test_Darknet_layers : public DNNTestLayer
 109 {
 110 public:
 111     void testDarknetLayer(const std::string& name, bool hasWeights = false)
 112     {
 113         Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy"));
 114         Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy"));
 115
 116         std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg");
 117         std::string model = "";
 118         if (hasWeights)
 119             model = findDataFile("dnn/darknet/" + name + ".weights", false);
 120
 121         checkBackend(&inp, &ref);
 122
 123         Net net = readNet(cfg, model);
 124         net.setPreferableBackend(backend);
 125         net.setPreferableTarget(target);
 126         net.setInput(inp);
 127         Mat out = net.forward();
 128         normAssert(out, ref, "", default_l1, default_lInf);
 129     }
 130 };
 131
 132 class Test_Darknet_nets : public DNNTestLayer
 133 {
 134 public:
 135     // Test object detection network from Darknet framework.
 136     void testDarknetModel(const std::string& cfg, const std::string& weights,
 137                           const std::vector<std::vector<int> >& refClassIds,
 138                           const std::vector<std::vector<float> >& refConfidences,
 139                           const std::vector<std::vector<Rect2d> >& refBoxes,
 140                           double scoreDiff, double iouDiff, float confThreshold = 0.24, float nmsThreshold = 0.4)
 141     {
 142         checkBackend();
 143
 144         Mat img1 = imread(_tf("dog416.png"));
 145         Mat img2 = imread(_tf("street.png"));
 146         std::vector<Mat> samples(2);
 147         samples[0] = img1; samples[1] = img2;
 148
 149         // determine test type, whether batch or single img
 150         int batch_size = refClassIds.size();
 151         CV_Assert(batch_size == 1 || batch_size == 2);
 152         samples.resize(batch_size);
 153
 154         Mat inp = blobFromImages(samples, 1.0/255, Size(416, 416), Scalar(), true, false);
 155
 156         Net net = readNet(findDataFile("dnn/" + cfg),
 157                           findDataFile("dnn/" + weights, false));
 158         net.setPreferableBackend(backend);
 159         net.setPreferableTarget(target);
 160         net.setInput(inp);
 161         std::vector<Mat> outs;
 162         net.forward(outs, getOutputsNames(net));
 163
 164         for (int b = 0; b < batch_size; ++b)
 165         {
 166             std::vector<int> classIds;
 167             std::vector<float> confidences;
 168             std::vector<Rect2d> boxes;
 169             for (int i = 0; i < outs.size(); ++i)
 170             {
 171                 Mat out;
 172                 if (batch_size > 1){
 173                     // get the sample slice from 3D matrix (batch, box, classes+5)
 174                     Range ranges[3] = {Range(b, b+1), Range::all(), Range::all()};
 175                     out = outs[i](ranges).reshape(1, outs[i].size[1]);
 176                 }else{
 177                     out = outs[i];
 178                 }
 179                 for (int j = 0; j < out.rows; ++j)
 180                 {
 181                     Mat scores = out.row(j).colRange(5, out.cols);
 182                     double confidence;
 183                     Point maxLoc;
 184                     minMaxLoc(scores, 0, &confidence, 0, &maxLoc);
 185
 186                     if (confidence > confThreshold) {
 187                         float* detection = out.ptr<float>(j);
 188                         double centerX = detection[0];
 189                         double centerY = detection[1];
 190                         double width = detection[2];
 191                         double height = detection[3];
 192                         boxes.push_back(Rect2d(centerX - 0.5 * width, centerY - 0.5 * height,
 193                                             width, height));
 194                         confidences.push_back(confidence);
 195                         classIds.push_back(maxLoc.x);
 196                     }
 197                 }
 198             }
 199
 200             // here we need NMS of boxes
 201             std::vector<int> indices;
 202             NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
 203
 204             std::vector<int> nms_classIds;
 205             std::vector<float> nms_confidences;
 206             std::vector<Rect2d> nms_boxes;
 207
 208             for (size_t i = 0; i < indices.size(); ++i)
 209             {
 210                 int idx = indices[i];
 211                 Rect2d box = boxes[idx];
 212                 float conf = confidences[idx];
 213                 int class_id = classIds[idx];
 214                 nms_boxes.push_back(box);
 215                 nms_confidences.push_back(conf);
 216                 nms_classIds.push_back(class_id);
 217             }
 218
 219             normAssertDetections(refClassIds[b], refConfidences[b], refBoxes[b], nms_classIds,
 220                              nms_confidences, nms_boxes, format("batch size %d, sample %d\n", batch_size, b).c_str(), confThreshold, scoreDiff, iouDiff);
 221         }
 222     }
 223
 224     void testDarknetModel(const std::string& cfg, const std::string& weights,
 225                           const std::vector<int>& refClassIds,
 226                           const std::vector<float>& refConfidences,
 227                           const std::vector<Rect2d>& refBoxes,
 228                           double scoreDiff, double iouDiff, float confThreshold = 0.24, float nmsThreshold = 0.4)
 229     {
 230         testDarknetModel(cfg, weights,
 231                          std::vector<std::vector<int> >(1, refClassIds),
 232                          std::vector<std::vector<float> >(1, refConfidences),
 233                          std::vector<std::vector<Rect2d> >(1, refBoxes),
 234                          scoreDiff, iouDiff, confThreshold, nmsThreshold);
 235     }
 236
 237     void testDarknetModel(const std::string& cfg, const std::string& weights,
 238                           const cv::Mat& ref, double scoreDiff, double iouDiff,
 239                           float confThreshold = 0.24, float nmsThreshold = 0.4)
 240     {
 241         CV_Assert(ref.cols == 7);
 242         std::vector<std::vector<int> > refClassIds;
 243         std::vector<std::vector<float> > refScores;
 244         std::vector<std::vector<Rect2d> > refBoxes;
 245         for (int i = 0; i < ref.rows; ++i)
 246         {
 247             int batchId = static_cast<int>(ref.at<float>(i, 0));
 248             int classId = static_cast<int>(ref.at<float>(i, 1));
 249             float score = ref.at<float>(i, 2);
 250             float left  = ref.at<float>(i, 3);
 251             float top   = ref.at<float>(i, 4);
 252             float right  = ref.at<float>(i, 5);
 253             float bottom = ref.at<float>(i, 6);
 254             Rect2d box(left, top, right - left, bottom - top);
 255             if (batchId >= refClassIds.size())
 256             {
 257                 refClassIds.resize(batchId + 1);
 258                 refScores.resize(batchId + 1);
 259                 refBoxes.resize(batchId + 1);
 260             }
 261             refClassIds[batchId].push_back(classId);
 262             refScores[batchId].push_back(score);
 263             refBoxes[batchId].push_back(box);
 264         }
 265         testDarknetModel(cfg, weights, refClassIds, refScores, refBoxes,
 266                          scoreDiff, iouDiff, confThreshold, nmsThreshold);
 267     }
 268 };
 269
 270 TEST_P(Test_Darknet_nets, YoloVoc)
 271 {
 272     applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_MEMORY_1GB);
 273
 274 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
 275     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
 276         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
 277 #endif
 278 #if defined(INF_ENGINE_RELEASE)
 279     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
 280             && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
 281         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);  // need to update check function
 282 #endif
 283
 284     // batchId, classId, confidence, left, top, right, bottom
 285     Mat ref = (Mat_<float>(6, 7) << 0, 6,  0.750469f, 0.577374f, 0.127391f, 0.902949f, 0.300809f,  // a car
 286                                     0, 1,  0.780879f, 0.270762f, 0.264102f, 0.732475f, 0.745412f,  // a bicycle
 287                                     0, 11, 0.901615f, 0.1386f,   0.338509f, 0.421337f, 0.938789f,  // a dog
 288                                     1, 14, 0.623813f, 0.183179f, 0.381921f, 0.247726f, 0.625847f,  // a person
 289                                     1, 6,  0.667770f, 0.446555f, 0.453578f, 0.499986f, 0.519167f,  // a car
 290                                     1, 6,  0.844947f, 0.637058f, 0.460398f, 0.828508f, 0.66427f);  // a car
 291
 292     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-2 : 8e-5;
 293     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : 3e-4;
 294     double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.397 : 0.4;
 295
 296     std::string config_file = "yolo-voc.cfg";
 297     std::string weights_file = "yolo-voc.weights";
 298
 299     {
 300     SCOPED_TRACE("batch size 1");
 301     testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff);
 302     }
 303
 304     {
 305     SCOPED_TRACE("batch size 2");
 306     testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff, 0.24, nmsThreshold);
 307     }
 308 }
 309
 310 TEST_P(Test_Darknet_nets, TinyYoloVoc)
 311 {
 312     applyTestTag(CV_TEST_TAG_MEMORY_512MB);
 313
 314 #if defined(INF_ENGINE_RELEASE)
 315     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
 316             && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
 317         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);  // need to update check function
 318 #endif
 319     // batchId, classId, confidence, left, top, right, bottom
 320     Mat ref = (Mat_<float>(4, 7) << 0, 6,  0.761967f, 0.579042f, 0.159161f, 0.894482f, 0.31994f,   // a car
 321                                     0, 11, 0.780595f, 0.129696f, 0.386467f, 0.445275f, 0.920994f,  // a dog
 322                                     1, 6,  0.651450f, 0.460526f, 0.458019f, 0.522527f, 0.5341f,    // a car
 323                                     1, 6,  0.928758f, 0.651024f, 0.463539f, 0.823784f, 0.654998f); // a car
 324
 325     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 8e-3 : 8e-5;
 326     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : 3e-4;
 327
 328     std::string config_file = "tiny-yolo-voc.cfg";
 329     std::string weights_file = "tiny-yolo-voc.weights";
 330
 331     {
 332     SCOPED_TRACE("batch size 1");
 333     testDarknetModel(config_file, weights_file, ref.rowRange(0, 2), scoreDiff, iouDiff);
 334     }
 335
 336     {
 337     SCOPED_TRACE("batch size 2");
 338     testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff);
 339     }
 340 }
 341
 342 TEST_P(Test_Darknet_nets, YOLOv3)
 343 {
 344     applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB));
 345
 346 #if defined(INF_ENGINE_RELEASE)
 347     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
 348             && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
 349         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
 350 #endif
 351
 352     // batchId, classId, confidence, left, top, right, bottom
 353     Mat ref = (Mat_<float>(9, 7) << 0, 7,  0.952983f, 0.614622f, 0.150257f, 0.901369f, 0.289251f,  // a truck
 354                                     0, 1,  0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.74626f,   // a bicycle
 355                                     0, 16, 0.998836f, 0.160024f, 0.389964f, 0.417885f, 0.943716f,  // a dog (COCO)
 356                                     1, 9,  0.384801f, 0.659824f, 0.372389f, 0.673926f, 0.429412f,  // a traffic light
 357                                     1, 9,  0.733283f, 0.376029f, 0.315694f, 0.401776f, 0.395165f,  // a traffic light
 358                                     1, 9,  0.785352f, 0.665503f, 0.373543f, 0.688893f, 0.439245f,  // a traffic light
 359                                     1, 0,  0.980052f, 0.195856f, 0.378454f, 0.258626f, 0.629258f,  // a person
 360                                     1, 2,  0.989633f, 0.450719f, 0.463353f, 0.496305f, 0.522258f,  // a car
 361                                     1, 2,  0.997412f, 0.647584f, 0.459939f, 0.821038f, 0.663947f); // a car
 362
 363     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0047 : 8e-5;
 364     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : 3e-4;
 365
 366     std::string config_file = "yolov3.cfg";
 367     std::string weights_file = "yolov3.weights";
 368
 369     {
 370     SCOPED_TRACE("batch size 1");
 371     testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff);
 372     }
 373
 374 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2018050000)
 375     if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
 376         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL)  // Test with 'batch size 2' is disabled for DLIE/OpenCL target
 377 #endif
 378
 379     {
 380         SCOPED_TRACE("batch size 2");
 381         testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff);
 382     }
 383 }
 384
 385 INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets());
 386
 387 TEST_P(Test_Darknet_layers, shortcut)
 388 {
 389     testDarknetLayer("shortcut");
 390 }
 391
 392 TEST_P(Test_Darknet_layers, upsample)
 393 {
 394     testDarknetLayer("upsample");
 395 }
 396
 397 TEST_P(Test_Darknet_layers, avgpool_softmax)
 398 {
 399     testDarknetLayer("avgpool_softmax");
 400 }
 401
 402 TEST_P(Test_Darknet_layers, region)
 403 {
 404     testDarknetLayer("region");
 405 }
 406
 407 TEST_P(Test_Darknet_layers, reorg)
 408 {
 409     testDarknetLayer("reorg");
 410 }
 411
 412 INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_layers, dnnBackendsAndTargets());
 413
 414 }} // namespace