modules/dnn/src/dnn.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of the copyright holders may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "precomp.hpp"
  43 #include "op_halide.hpp"
  44 #include "op_inf_engine.hpp"
  45 #include "ie_ngraph.hpp"
  46
  47 #include "halide_scheduler.hpp"
  48 #include <set>
  49 #include <algorithm>
  50 #include <iostream>
  51 #include <sstream>
  52 #include <fstream>
  53 #include <iterator>
  54 #include <numeric>
  55 #include <opencv2/dnn/shape_utils.hpp>
  56 #include <opencv2/imgproc.hpp>
  57
  58 #include <opencv2/core/utils/configuration.private.hpp>
  59 #include <opencv2/core/utils/logger.hpp>
  60
  61 namespace cv {
  62 namespace dnn {
  63 CV__DNN_EXPERIMENTAL_NS_BEGIN
  64
  65 static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0);
  66
  67 // this option is useful to run valgrind memory errors detection
  68 static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
  69
  70 #ifdef HAVE_OPENCL
  71 static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
  72 #endif
  73
  74 static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
  75 #ifdef HAVE_INF_ENGINE
  76     (size_t)DNN_BACKEND_INFERENCE_ENGINE
  77 #else
  78     (size_t)DNN_BACKEND_OPENCV
  79 #endif
  80 );
  81
  82 // Additional checks (slowdowns execution!)
  83 static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
  84 static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
  85 static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
  86
  87 using std::vector;
  88 using std::map;
  89 using std::make_pair;
  90 using std::set;
  91 using std::string;
  92
  93 //==================================================================================================
  94
  95 class BackendRegistry
  96 {
  97 public:
  98     typedef std::vector< std::pair<Backend, Target> > BackendsList;
  99     const BackendsList & getBackends() const { return backends; }
 100     static BackendRegistry & getRegistry()
 101     {
 102         static BackendRegistry impl;
 103         return impl;
 104     }
 105
 106 #ifdef HAVE_INF_ENGINE
 107     static inline bool checkIETarget(Target target)
 108     {
 109 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3)
 110         // Lightweight detection
 111         const std::vector<std::string> devices = getCore("").GetAvailableDevices();
 112         for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
 113         {
 114             if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD)
 115                 return true;
 116             else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA)
 117                 return true;
 118             else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU)
 119                 return true;
 120             else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
 121                 return true;
 122         }
 123         return false;
 124 #else
 125         cv::dnn::Net net;
 126         cv::dnn::LayerParams lp;
 127         lp.set("kernel_size", 1);
 128         lp.set("num_output", 1);
 129         lp.set("bias_term", false);
 130         lp.type = "Convolution";
 131         lp.name = "testLayer";
 132         lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
 133         net.addLayerToPrev(lp.name, lp.type, lp);
 134         net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
 135         net.setPreferableTarget(target);
 136         static int inpDims[] = {1, 2, 3, 4};
 137         net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0)));
 138         try
 139         {
 140             net.forward();
 141         }
 142         catch(const std::exception& e)
 143         {
 144             CV_LOG_INFO(NULL, "checkIETarget(" << (int)target << ") has failed with message: " << e.what());
 145             return false;
 146         }
 147         return true;
 148 #endif
 149     }
 150 #endif
 151
 152 private:
 153     BackendRegistry()
 154     {
 155 #ifdef HAVE_HALIDE
 156         backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
 157 #  ifdef HAVE_OPENCL
 158         if (cv::ocl::useOpenCL())
 159             backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
 160 #  endif
 161 #endif // HAVE_HALIDE
 162
 163 #ifdef HAVE_INF_ENGINE
 164         if (checkIETarget(DNN_TARGET_CPU)) {
 165 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
 166             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_CPU));
 167 #endif
 168 #ifdef HAVE_DNN_NGRAPH
 169             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
 170 #endif
 171         }
 172         if (checkIETarget(DNN_TARGET_MYRIAD)) {
 173 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
 174             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_MYRIAD));
 175 #endif
 176 #ifdef HAVE_DNN_NGRAPH
 177             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD));
 178 #endif
 179         }
 180 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
 181         if (checkIETarget(DNN_TARGET_FPGA))
 182             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_FPGA));
 183 #endif
 184 #ifdef HAVE_OPENCL
 185         if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel())
 186         {
 187             if (checkIETarget(DNN_TARGET_OPENCL)) {
 188 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
 189                 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL));
 190 #endif
 191 #ifdef HAVE_DNN_NGRAPH
 192                 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL));
 193 #endif
 194             }
 195             if (checkIETarget(DNN_TARGET_OPENCL_FP16)) {
 196 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
 197                 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL_FP16));
 198 #endif
 199 #ifdef HAVE_DNN_NGRAPH
 200                 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16));
 201 #endif
 202             }
 203         }
 204 #endif
 205 #endif // HAVE_INF_ENGINE
 206
 207 #ifdef HAVE_OPENCL
 208         if (cv::ocl::useOpenCL())
 209         {
 210             backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
 211             backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
 212         }
 213 #endif
 214
 215         backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
 216     }
 217
 218     BackendsList backends;
 219 };
 220
 221
 222 std::vector< std::pair<Backend, Target> > getAvailableBackends()
 223 {
 224     return BackendRegistry::getRegistry().getBackends();
 225 }
 226
 227 std::vector<Target> getAvailableTargets(Backend be)
 228 {
 229     if (be == DNN_BACKEND_DEFAULT)
 230         be = (Backend)PARAM_DNN_BACKEND_DEFAULT;
 231 #ifdef HAVE_INF_ENGINE
 232     if (be == DNN_BACKEND_INFERENCE_ENGINE)
 233         be = getInferenceEngineBackendTypeParam();
 234 #endif
 235
 236     std::vector<Target> result;
 237     const BackendRegistry::BackendsList all_backends = getAvailableBackends();
 238     for(BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i )
 239     {
 240         if (i->first == be)
 241             result.push_back(i->second);
 242     }
 243     return result;
 244 }
 245
 246 //==================================================================================================
 247
 248 namespace
 249 {
 250     typedef std::vector<MatShape> ShapesVec;
 251
 252     struct LayerShapes
 253     {
 254         ShapesVec in, out, internal;
 255         // No guarantees that layer which support in-place computations
 256         // will be computed in-place (input.data_ptr == output.data_ptr).
 257         // If layer said that it could work in-place and layers after it
 258         // no longer use input blob, we'll set output = input.
 259         bool supportInPlace;
 260         LayerShapes() {supportInPlace = false;}
 261     };
 262 }
 263
 264 Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
 265                   const Scalar& mean, bool swapRB, bool crop, int ddepth)
 266 {
 267     CV_TRACE_FUNCTION();
 268     Mat blob;
 269     blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
 270     return blob;
 271 }
 272
 273 void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
 274                    const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
 275 {
 276     CV_TRACE_FUNCTION();
 277     std::vector<Mat> images(1, image.getMat());
 278     blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
 279 }
 280
 281 Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
 282                    const Scalar& mean, bool swapRB, bool crop, int ddepth)
 283 {
 284     CV_TRACE_FUNCTION();
 285     Mat blob;
 286     blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
 287     return blob;
 288 }
 289
 290 void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
 291                     Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
 292 {
 293     CV_TRACE_FUNCTION();
 294     CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
 295     if (ddepth == CV_8U)
 296     {
 297         CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
 298         CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
 299     }
 300
 301     std::vector<Mat> images;
 302     images_.getMatVector(images);
 303     CV_Assert(!images.empty());
 304     for (size_t i = 0; i < images.size(); i++)
 305     {
 306         Size imgSize = images[i].size();
 307         if (size == Size())
 308             size = imgSize;
 309         if (size != imgSize)
 310         {
 311             if(crop)
 312             {
 313               float resizeFactor = std::max(size.width / (float)imgSize.width,
 314                                             size.height / (float)imgSize.height);
 315               resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
 316               Rect crop(Point(0.5 * (images[i].cols - size.width),
 317                               0.5 * (images[i].rows - size.height)),
 318                         size);
 319               images[i] = images[i](crop);
 320             }
 321             else
 322               resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
 323         }
 324         if(images[i].depth() == CV_8U && ddepth == CV_32F)
 325             images[i].convertTo(images[i], CV_32F);
 326         Scalar mean = mean_;
 327         if (swapRB)
 328             std::swap(mean[0], mean[2]);
 329
 330         images[i] -= mean;
 331         images[i] *= scalefactor;
 332     }
 333
 334     size_t nimages = images.size();
 335     Mat image0 = images[0];
 336     int nch = image0.channels();
 337     CV_Assert(image0.dims == 2);
 338     if (nch == 3 || nch == 4)
 339     {
 340         int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
 341         blob_.create(4, sz, ddepth);
 342         Mat blob = blob_.getMat();
 343         Mat ch[4];
 344
 345         for(size_t i = 0; i < nimages; i++ )
 346         {
 347             const Mat& image = images[i];
 348             CV_Assert(image.depth() == blob_.depth());
 349             nch = image.channels();
 350             CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
 351             CV_Assert(image.size() == image0.size());
 352
 353             for( int j = 0; j < nch; j++ )
 354                 ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
 355             if(swapRB)
 356                 std::swap(ch[0], ch[2]);
 357             split(image, ch);
 358         }
 359     }
 360     else
 361     {
 362        CV_Assert(nch == 1);
 363        int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
 364        blob_.create(4, sz, ddepth);
 365        Mat blob = blob_.getMat();
 366
 367        for(size_t i = 0; i < nimages; i++ )
 368        {
 369            const Mat& image = images[i];
 370            CV_Assert(image.depth() == blob_.depth());
 371            nch = image.channels();
 372            CV_Assert(image.dims == 2 && (nch == 1));
 373            CV_Assert(image.size() == image0.size());
 374
 375            image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
 376        }
 377     }
 378 }
 379
 380 void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
 381 {
 382     CV_TRACE_FUNCTION();
 383
 384     //A blob is a 4 dimensional matrix in floating point precision
 385     //blob_[0] = batchSize = nbOfImages
 386     //blob_[1] = nbOfChannels
 387     //blob_[2] = height
 388     //blob_[3] = width
 389     CV_Assert(blob_.depth() == CV_32F);
 390     CV_Assert(blob_.dims == 4);
 391
 392     images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
 393
 394     std::vector<Mat> vectorOfChannels(blob_.size[1]);
 395     for (int n = 0; n <  blob_.size[0]; ++n)
 396     {
 397         for (int c = 0; c < blob_.size[1]; ++c)
 398         {
 399             vectorOfChannels[c] = getPlane(blob_, n, c);
 400         }
 401         cv::merge(vectorOfChannels, images_.getMatRef(n));
 402     }
 403 }
 404
 405 #ifdef HAVE_OPENCL
 406 class OpenCLBackendWrapper : public BackendWrapper
 407 {
 408 public:
 409     OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
 410     {
 411         m.copyTo(umat);
 412         host = &m;
 413         hostDirty = false;
 414     }
 415
 416     OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
 417         : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
 418     {
 419         Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
 420         CV_Assert(!base.empty());
 421
 422         host = &m;
 423
 424         int shape[] = {1, (int)base->umat.total()};
 425         umat = base->umat.reshape(1, 2, &shape[0])
 426                          .colRange(0, host->total())
 427                          .reshape(1, host->dims, &host->size[0]);
 428         hostDirty = false;
 429     }
 430
 431     static Ptr<BackendWrapper> create(Mat& m)
 432     {
 433         return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
 434     }
 435
 436     static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
 437     {
 438         return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
 439     }
 440
 441     static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
 442     {
 443         const int numWrappers = wrappers.size();
 444         std::vector<UMat> mats(wrappers.size());
 445         for (int i = 0; i < numWrappers; ++i)
 446         {
 447             Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
 448             CV_Assert(!umatWrapper.empty());
 449             umatWrapper->copyToDevice();
 450             mats[i] = umatWrapper->umat;
 451         }
 452         return mats;
 453     }
 454
 455     // Replaces all umats in wrappers to specific ones.
 456     static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
 457                        const std::vector<UMat>& umats)
 458     {
 459         CV_Assert(wrappers.size() == umats.size());
 460         for (int i = 0, n = umats.size(); i < n; ++i)
 461         {
 462             Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
 463             CV_Assert(!umatWrapper.empty());
 464             umatWrapper->umat = umats[i];
 465         }
 466     }
 467
 468     ~OpenCLBackendWrapper() {}
 469
 470     // Copies data from device to a host memory.
 471     virtual void copyToHost() CV_OVERRIDE
 472     {
 473         umat.copyTo(*host);
 474     }
 475
 476     virtual void setHostDirty() CV_OVERRIDE
 477     {
 478         hostDirty = true;
 479     };
 480
 481     void copyToDevice()
 482     {
 483         if (hostDirty)
 484         {
 485             host->copyTo(umat);
 486             hostDirty = false;
 487         }
 488     }
 489
 490 private:
 491     UMat umat;
 492     Mat* host;
 493     bool hostDirty;
 494 };
 495 #endif
 496
 497 struct LayerPin
 498 {
 499     int lid;
 500     int oid;
 501
 502     LayerPin(int layerId = -1, int outputId = -1)
 503         : lid(layerId), oid(outputId) {}
 504
 505     bool valid() const
 506     {
 507         return (lid >= 0 && oid >= 0);
 508     }
 509
 510     bool equal(const LayerPin &r) const
 511     {
 512         return (lid == r.lid && oid == r.oid);
 513     }
 514
 515     bool operator<(const LayerPin &r) const
 516     {
 517         return lid < r.lid || (lid == r.lid && oid < r.oid);
 518     }
 519
 520     bool operator ==(const LayerPin &r) const
 521     {
 522         return lid == r.lid && oid == r.oid;
 523     }
 524 };
 525
 526 struct LayerData
 527 {
 528     LayerData() : id(-1), skip(false), flag(0) {}
 529     LayerData(int _id, const String &_name, const String &_type, LayerParams &_params)
 530         : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0)
 531     {
 532         CV_TRACE_FUNCTION();
 533
 534         //add logging info
 535         params.name = name;
 536         params.type = type;
 537     }
 538
 539     int id;
 540     String name;
 541     String type;
 542     LayerParams params;
 543
 544     std::vector<LayerPin> inputBlobsId;
 545     std::set<int> inputLayersId;
 546     std::set<int> requiredOutputs;
 547     std::vector<LayerPin> consumers;
 548     std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
 549     std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
 550     std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
 551
 552     Ptr<Layer> layerInstance;
 553     std::vector<Mat> outputBlobs;
 554     std::vector<Mat*> inputBlobs;
 555     std::vector<Mat> internals;
 556     // Computation nodes of implemented backends (except DEFAULT).
 557     std::map<int, Ptr<BackendNode> > backendNodes;
 558     // Flag for skip layer computation for specific backend.
 559     bool skip;
 560
 561     int flag;
 562
 563     Ptr<Layer> getLayerInstance()
 564     {
 565         CV_TRACE_FUNCTION();
 566         CV_TRACE_ARG_VALUE(type, "type", type.c_str());
 567
 568         if (layerInstance)
 569             return layerInstance;
 570
 571         layerInstance = LayerFactory::createLayerInstance(type, params);
 572         if (!layerInstance)
 573         {
 574             CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
 575         }
 576
 577         return layerInstance;
 578     }
 579 };
 580
 581 //fake layer containing network input blobs
 582 struct DataLayer : public Layer
 583 {
 584     DataLayer() : Layer()
 585     {
 586         skip = false;
 587     }
 588
 589     virtual bool supportBackend(int backendId) CV_OVERRIDE
 590     {
 591         return backendId == DNN_BACKEND_OPENCV ||
 592                (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && inputsData.size() == 1);
 593     }
 594
 595     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
 596     {
 597         CV_TRACE_FUNCTION();
 598         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 599
 600         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
 601                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 602
 603         if (outputs_arr.depth() == CV_16S)
 604         {
 605             forward_fallback(inputs_arr, outputs_arr, internals_arr);
 606             return;
 607         }
 608
 609         std::vector<Mat> outputs, internals;
 610         outputs_arr.getMatVector(outputs);
 611         internals_arr.getMatVector(internals);
 612
 613         // Supported modes:
 614         // | Input type | Output type |
 615         // |       fp32 |        fp32 |
 616         // |      uint8 |        fp32 |
 617         for (int i = 0; i < inputsData.size(); ++i)
 618         {
 619             double scale = scaleFactors[i];
 620             Scalar& mean = means[i];
 621             CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
 622             CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
 623
 624             bool singleMean = true;
 625             for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
 626             {
 627                 singleMean = mean[j] == mean[j - 1];
 628             }
 629
 630             if (singleMean)
 631             {
 632                 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
 633             }
 634             else
 635             {
 636                 for (int n = 0; n < inputsData[i].size[0]; ++n)
 637                     for (int c = 0; c < inputsData[i].size[1]; ++c)
 638                     {
 639                         Mat inp = getPlane(inputsData[i], n, c);
 640                         Mat out = getPlane(outputs[i], n, c);
 641                         inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
 642                     }
 643             }
 644         }
 645     }
 646
 647 #ifdef HAVE_OPENCL
 648     std::vector<Mat> tmp_expressions;
 649     bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
 650     {
 651         // Supported modes:
 652         // | Input type | Output type |
 653         // |       fp32 |        fp32 |
 654         // |       fp32 |        fp16 |
 655         // |      uint8 |        fp32 |
 656         std::vector<UMat> outputs;
 657         outputs_.getUMatVector(outputs);
 658
 659         tmp_expressions.clear();
 660         for (int i = 0; i < inputsData.size(); ++i)
 661         {
 662             Mat inputData = inputsData[i];
 663
 664             double scale = scaleFactors[i];
 665             Scalar& mean = means[i];
 666
 667             CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
 668             bool singleMean = true;
 669             for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
 670             {
 671                 singleMean = mean[j] == mean[j - 1];
 672             }
 673
 674             if (outputs_.depth() == CV_16S)
 675             {
 676                 if (singleMean)
 677                 {
 678                     tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0])));
 679                     convertFp16(tmp_expressions.back(), outputs[i]);
 680                 }
 681                 else
 682                 {
 683                     for (int n = 0; n < inputsData[i].size[0]; ++n)
 684                         for (int c = 0; c < inputsData[i].size[1]; ++c)
 685                         {
 686                             Mat inp = getPlane(inputsData[i], n, c);
 687
 688                             std::vector<cv::Range> plane(4, Range::all());
 689                             plane[0] = Range(n, n + 1);
 690                             plane[1] = Range(c, c + 1);
 691                             UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
 692
 693                             tmp_expressions.push_back(scale * (inp - mean[c]));
 694                             convertFp16(tmp_expressions.back(), out);
 695                         }
 696                 }
 697             }
 698             else
 699             {
 700                 CV_Assert(outputs_.depth() == CV_32F);
 701                 if (singleMean)
 702                 {
 703                     inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
 704                 }
 705                 else
 706                 {
 707                     for (int n = 0; n < inputsData[i].size[0]; ++n)
 708                         for (int c = 0; c < inputsData[i].size[1]; ++c)
 709                         {
 710                             Mat inp = getPlane(inputsData[i], n, c);
 711
 712                             std::vector<cv::Range> plane(4, Range::all());
 713                             plane[0] = Range(n, n + 1);
 714                             plane[1] = Range(c, c + 1);
 715                             UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
 716
 717                             inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
 718                         }
 719                 }
 720             }
 721         }
 722         return true;
 723     }
 724 #endif
 725
 726     int outputNameToIndex(const String& tgtName) CV_OVERRIDE
 727     {
 728         int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
 729         return (idx < (int)outNames.size()) ? idx : -1;
 730     }
 731
 732     void setNames(const std::vector<String> &names)
 733     {
 734         outNames.assign(names.begin(), names.end());
 735         shapes.clear(); shapes.resize(outNames.size());
 736     }
 737
 738     void setInputShape(const String& tgtName, const MatShape& shape)
 739     {
 740         std::vector<String>::const_iterator it = std::find(outNames.begin(), outNames.end(), tgtName);
 741         CV_Check(tgtName, it != outNames.end(), "Unknown input");
 742         int idx = (int)(it - outNames.begin());
 743
 744         CV_Assert(idx < (int)shapes.size());
 745         CV_Check(tgtName, shapes[idx].empty(), "Input shape redefinition is not allowed");
 746         shapes[idx] = shape;
 747     }
 748
 749     bool getMemoryShapes(const std::vector<MatShape> &inputs,
 750                          const int requiredOutputs,
 751                          std::vector<MatShape> &outputs,
 752                          std::vector<MatShape> &internals) const CV_OVERRIDE
 753     {
 754         CV_Assert(inputs.size() == requiredOutputs);
 755         outputs.assign(inputs.begin(), inputs.end());
 756         return false;
 757     }
 758
 759     virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
 760     {
 761         std::vector<Mat> outputs;
 762         outputs_arr.getMatVector(outputs);
 763
 764         CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
 765                   inputsData.size() == outputs.size());
 766         skip = true;
 767         for (int i = 0; skip && i < inputsData.size(); ++i)
 768         {
 769             if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
 770                 skip = false;
 771         }
 772     }
 773
 774 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
 775     virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
 776     {
 777         CV_CheckEQ(inputsData.size(), (size_t)1, "");
 778         CV_CheckEQ(inputsData[0].dims, 4, "");
 779         const size_t numChannels = inputsData[0].size[1];
 780         CV_Assert(numChannels <= 4);
 781
 782         // Scale
 783         InferenceEngine::TensorDesc td(InferenceEngine::Precision::FP32, {numChannels},
 784                                        InferenceEngine::Layout::C);
 785         auto weights = InferenceEngine::make_shared_blob<float>(td);
 786         weights->allocate();
 787
 788         float* weight_buf = weights->buffer().as<float*>();
 789         std::fill(weight_buf, weight_buf + numChannels, scaleFactors[0]);
 790
 791         // Mean subtraction
 792         auto biases = InferenceEngine::make_shared_blob<float>(td);
 793         biases->allocate();
 794         float* bias_buf = biases->buffer().as<float*>();
 795
 796         for (int i = 0; i < numChannels; ++i)
 797         {
 798             bias_buf[i] = -means[0][i] * scaleFactors[0];
 799         }
 800
 801         InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
 802         addConstantData("weights", weights, ieLayer);
 803         addConstantData("biases", biases, ieLayer);
 804         return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
 805     }
 806 #endif  // HAVE_DNN_IE_NN_BUILDER_2019
 807
 808     std::vector<String> outNames;
 809     std::vector<MatShape> shapes;
 810     // Preprocessing parameters for each network's input.
 811     std::vector<double> scaleFactors;
 812     std::vector<Scalar> means;
 813     std::vector<Mat> inputsData;
 814     bool skip;
 815 };
 816
 817 struct BlobManager
 818 {
 819 public:
 820     // Increase references counter to layer output.
 821     void addReference(const LayerPin& lp)
 822     {
 823         std::map<LayerPin, int>::iterator it = refCounter.find(lp);
 824         if (it == refCounter.end())
 825             refCounter[lp] = 1;
 826         else
 827             it->second += 1;
 828     }
 829
 830     void addReferences(const std::vector<LayerPin>& pins)
 831     {
 832         for (int i = 0; i < pins.size(); i++)
 833         {
 834             addReference(pins[i]);
 835         }
 836     }
 837
 838     // Returns number of references to allocated memory that used in specific
 839     // layer blob.
 840     int numReferences(const LayerPin& lp)
 841     {
 842         std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
 843         CV_Assert(mapIt != reuseMap.end());
 844         LayerPin memHost = mapIt->second;
 845
 846         std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
 847         CV_Assert(refIt != refCounter.end());
 848         return refIt->second;
 849     }
 850
 851     // Reuse data allocated in <host> inside the <user> blob.
 852     void reuse(const LayerPin& host, const LayerPin& user)
 853     {
 854         CV_Assert(reuseMap.find(user) == reuseMap.end());
 855         CV_Assert(reuseMap.find(host) != reuseMap.end());
 856         LayerPin memHost = reuseMap[host];
 857         reuseMap[user] = memHost;
 858         if (refCounter.find(memHost) != refCounter.end())
 859         {
 860             std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
 861             if (userRefIt != refCounter.end())
 862             {
 863                 refCounter[memHost] += userRefIt->second;
 864                 refCounter.erase(userRefIt);
 865             }
 866             else
 867                 refCounter[memHost] += 1;
 868         }
 869     }
 870
 871     // Decrease references counter to allocated memory inside specific blob.
 872     void releaseReference(const LayerPin& lp)
 873     {
 874         std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
 875         CV_Assert(mapIt != reuseMap.end());
 876
 877         std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
 878         CV_Assert(refIt != refCounter.end());
 879         CV_Assert(refIt->second > 0);
 880         refIt->second -= 1;
 881     }
 882
 883     void releaseReferences(const std::vector<LayerPin>& pins)
 884     {
 885         for (int i = 0; i < pins.size(); i++)
 886         {
 887             releaseReference(pins[i]);
 888         }
 889     }
 890
 891     void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
 892     {
 893         if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
 894         {
 895             Mat bestBlob;
 896             LayerPin bestBlobPin;
 897
 898             std::map<LayerPin, Mat>::iterator hostIt;
 899             std::map<LayerPin, int>::iterator refIt;
 900
 901             const int targetTotal = total(shape);
 902             int bestBlobTotal = INT_MAX;
 903
 904             for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
 905             {
 906                 refIt = refCounter.find(hostIt->first);
 907                 // Use only blobs that had references before because if not,
 908                 // it might be used as output.
 909                 if (refIt != refCounter.end() && refIt->second == 0)
 910                 {
 911                     Mat& unusedBlob = hostIt->second;
 912                     if (unusedBlob.total() >= targetTotal &&
 913                         unusedBlob.total() < bestBlobTotal)
 914                     {
 915                         bestBlobPin = hostIt->first;
 916                         bestBlob = unusedBlob;
 917                         bestBlobTotal = unusedBlob.total();
 918                     }
 919                 }
 920             }
 921             if (!bestBlob.empty())
 922             {
 923                 reuse(bestBlobPin, lp);
 924                 dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
 925                 return;
 926             }
 927         }
 928
 929         {
 930             // if dst already has been allocated with total(shape) elements,
 931             // it won't be recreated and pointer of dst.data remains the same.
 932             dst.create(shape, use_half ? CV_16S : CV_32F);
 933             addHost(lp, dst);
 934         }
 935     }
 936
 937     void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
 938                                std::vector<LayerPin>& pinsForInternalBlobs,
 939                                bool use_half = false)
 940     {
 941         CV_TRACE_FUNCTION();
 942
 943         pinsForInternalBlobs.clear();
 944
 945         std::vector<Mat>& outputBlobs = ld.outputBlobs,
 946                 &internalBlobs = ld.internals;
 947
 948         const ShapesVec& outShapes = layerShapes.out,
 949                 internalShapes = layerShapes.internal;
 950
 951         outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
 952         internalBlobs.resize(internalShapes.size());
 953
 954         CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
 955
 956         // Check that layer could work in-place.
 957         bool inPlace = false;
 958         if (layerShapes.supportInPlace)
 959         {
 960             if (ld.inputBlobs.size() == 1)
 961             {
 962                 // Get number of references to the input memory.
 963                 int numRef = numReferences(ld.inputBlobsId[0]);
 964                 // If current layer is one and only customer of this blob.
 965                 inPlace = numRef == 1;
 966             }
 967         }
 968
 969         ShapesVec shapes(outShapes);
 970         shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
 971         std::vector<Mat*> blobs;
 972         for(int i = 0; i < outputBlobs.size(); i++)
 973         {
 974             blobs.push_back(&outputBlobs[i]);
 975         }
 976
 977         for(int i = 0; i < internalBlobs.size(); i++)
 978         {
 979             blobs.push_back(&internalBlobs[i]);
 980             if (total(internalShapes[i]))
 981             {
 982                 pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
 983             }
 984         }
 985
 986         addReferences(pinsForInternalBlobs);
 987
 988         std::map<int, std::vector<int> > idxSizes;
 989         for(int i = 0; i < shapes.size(); i++)
 990         {
 991             idxSizes[total(shapes[i])].push_back(i);
 992         }
 993
 994         std::map<int, std::vector<int> >::reverse_iterator it;
 995         for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
 996         {
 997             for(int j = 0; j < it->second.size(); j++)
 998             {
 999                 int index = it->second[j];
1000                 if (total(shapes[index]))
1001                 {
1002                     LayerPin blobPin(ld.id, index);
1003                     if (index < outShapes.size() && inPlace)
1004                     {
1005                         CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
1006                         ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
1007                         reuse(ld.inputBlobsId[0], blobPin);
1008                     }
1009                     else
1010                         reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
1011                 }
1012             }
1013         }
1014     }
1015
1016     // Clear internal state. Calls before an every reallocation.
1017     void reset()
1018     {
1019         CV_TRACE_FUNCTION();
1020
1021         refCounter.clear();
1022         reuseMap.clear();
1023         memHosts.clear();
1024     }
1025
1026 private:
1027     // Register allocated memory.
1028     void addHost(const LayerPin& lp, const Mat& mat)
1029     {
1030         CV_Assert(memHosts.find(lp) == memHosts.end());
1031         reuseMap[lp] = lp;
1032         memHosts[lp] = mat;
1033     }
1034
1035     std::map<LayerPin, int> refCounter;
1036     // Maps pin to origin blob (for whom memory was allocated firstly).
1037     // For origin blobs key == value.
1038     std::map<LayerPin, LayerPin> reuseMap;
1039     std::map<LayerPin, Mat> memHosts;
1040 };
1041
1042 static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
1043 {
1044     if (backendId == DNN_BACKEND_OPENCV)
1045     {
1046         if (targetId == DNN_TARGET_CPU)
1047             return Ptr<BackendWrapper>();
1048 #ifdef HAVE_OPENCL
1049         else if (IS_DNN_OPENCL_TARGET(targetId))
1050             return OpenCLBackendWrapper::create(m);
1051 #endif
1052         else
1053             CV_Error(Error::StsNotImplemented, "Unknown/unsupported target identifier");
1054     }
1055     else if (backendId == DNN_BACKEND_HALIDE)
1056     {
1057         CV_Assert(haveHalide());
1058 #ifdef HAVE_HALIDE
1059         return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
1060 #endif  // HAVE_HALIDE
1061     }
1062     else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1063     {
1064 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1065         return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
1066 #else
1067         CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
1068 #endif
1069     }
1070     else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1071     {
1072 #ifdef HAVE_DNN_NGRAPH
1073         return Ptr<BackendWrapper>(new NgraphBackendWrapper(targetId, m));
1074 #else
1075         CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
1076 #endif
1077     }
1078     else
1079         CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1080     return Ptr<BackendWrapper>();  // TODO Error?
1081 }
1082
1083 static int g_networkId = 0;
1084
1085 detail::NetImplBase::NetImplBase()
1086     : networkId(CV_XADD(&g_networkId, 1))
1087     , networkDumpCounter(0)
1088     , dumpLevel(DNN_NETWORK_DUMP)
1089 {
1090     // nothing
1091 }
1092
1093 std::string detail::NetImplBase::getDumpFileNameBase()
1094 {
1095     std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++);
1096     return dumpFileNameBase;
1097 }
1098
1099 struct Net::Impl : public detail::NetImplBase
1100 {
1101     typedef std::map<int, LayerShapes> LayersShapesMap;
1102     typedef std::map<int, LayerData> MapIdToLayerData;
1103
1104     Impl()
1105     {
1106         //allocate fake net input layer
1107         netInputLayer = Ptr<DataLayer>(new DataLayer());
1108         LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
1109         inpl.id = 0;
1110         netInputLayer->name = inpl.name = "_input";
1111         inpl.type = "__NetInputLayer__";
1112         inpl.layerInstance = netInputLayer;
1113         layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
1114
1115         lastLayerId = 0;
1116         netWasAllocated = false;
1117         fusion = true;
1118         isAsync = false;
1119         preferableBackend = DNN_BACKEND_DEFAULT;
1120         preferableTarget = DNN_TARGET_CPU;
1121         skipInfEngineInit = false;
1122     }
1123
1124     Ptr<DataLayer> netInputLayer;
1125     std::vector<LayerPin> blobsToKeep;
1126     MapIdToLayerData layers;
1127     std::map<String, int> layerNameToId;
1128     BlobManager blobManager;
1129     int preferableBackend;
1130     int preferableTarget;
1131     String halideConfigFile;
1132     bool skipInfEngineInit;
1133     // Map host data to backend specific wrapper.
1134     std::map<void*, Ptr<BackendWrapper> > backendWrappers;
1135
1136     int lastLayerId;
1137
1138     bool netWasAllocated;
1139     bool fusion;
1140     bool isAsync;
1141     std::vector<int64> layersTimings;
1142     Mat output_blob;
1143
1144     Ptr<BackendWrapper> wrap(Mat& host)
1145     {
1146         if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
1147             return Ptr<BackendWrapper>();
1148
1149         MatShape shape(host.dims);
1150         for (int i = 0; i < host.dims; ++i)
1151             shape[i] = host.size[i];
1152
1153         void* data = host.data;
1154         if (backendWrappers.find(data) != backendWrappers.end())
1155         {
1156             Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
1157             if (preferableBackend == DNN_BACKEND_OPENCV)
1158             {
1159 #ifdef HAVE_OPENCL
1160                 CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
1161                 return OpenCLBackendWrapper::create(baseBuffer, host);
1162 #else
1163                 CV_Error(Error::StsInternal, "");
1164 #endif
1165             }
1166             else if (preferableBackend == DNN_BACKEND_HALIDE)
1167             {
1168                 CV_Assert(haveHalide());
1169 #ifdef HAVE_HALIDE
1170                 return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
1171 #endif
1172             }
1173             else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1174             {
1175                 return wrapMat(preferableBackend, preferableTarget, host);
1176             }
1177             else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1178             {
1179                 return wrapMat(preferableBackend, preferableTarget, host);
1180             }
1181             else
1182                 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1183         }
1184
1185         Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
1186         backendWrappers[data] = wrapper;
1187         return wrapper;
1188     }
1189
1190 #ifdef HAVE_HALIDE
1191     void compileHalide()
1192     {
1193         CV_TRACE_FUNCTION();
1194
1195         CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
1196
1197         HalideScheduler scheduler(halideConfigFile);
1198         std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
1199         for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
1200         {
1201             LayerData &ld = it->second;
1202             Ptr<Layer> layer = ld.layerInstance;
1203             if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
1204             {
1205                 CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
1206                 bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
1207                 if (!scheduled)
1208                 {
1209                     // Use automatic scheduling provided by layer.
1210                     layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
1211                                                 ld.inputBlobs, ld.outputBlobs,
1212                                                 preferableTarget);
1213                 }
1214                 compileList.emplace_back(ld);
1215             }
1216         }
1217         std::atomic<int> progress(0);
1218         auto fn = ([&] () -> void
1219         {
1220             for (;;)
1221             {
1222                 int id = progress.fetch_add(1);
1223                 if ((size_t)id >= compileList.size())
1224                     return;
1225                 const LayerData& ld = compileList[id].get();
1226                 Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
1227                 dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
1228             }
1229         });
1230         size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
1231         num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
1232         std::vector<std::thread> threads(num_threads - 1);
1233         for (auto& t: threads) t = std::thread(fn);
1234         fn(); // process own tasks
1235         for (auto& t: threads) t.join();
1236     }
1237 #endif
1238
1239     void clear()
1240     {
1241         CV_TRACE_FUNCTION();
1242
1243         MapIdToLayerData::iterator it;
1244         for (it = layers.begin(); it != layers.end(); it++)
1245         {
1246             if (it->second.id != 0) {
1247                 it->second.inputBlobs.clear();
1248                 it->second.outputBlobs.clear();
1249                 it->second.internals.clear();
1250             }
1251             it->second.skip = false;
1252             //it->second.consumers.clear();
1253             Ptr<Layer> currLayer = it->second.layerInstance;
1254
1255             if( currLayer.empty() )
1256                 continue;
1257
1258             currLayer->unsetAttached();
1259         }
1260
1261         layersTimings.clear();
1262     }
1263
1264     void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
1265     {
1266         CV_TRACE_FUNCTION();
1267
1268         if (dumpLevel && networkDumpCounter == 0)
1269         {
1270             dumpNetworkToFile();
1271         }
1272
1273         if (preferableBackend == DNN_BACKEND_DEFAULT)
1274             preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
1275 #ifdef HAVE_INF_ENGINE
1276         if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1277             preferableBackend = getInferenceEngineBackendTypeParam();
1278 #endif
1279
1280         CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
1281                   preferableTarget == DNN_TARGET_CPU ||
1282                   preferableTarget == DNN_TARGET_OPENCL ||
1283                   preferableTarget == DNN_TARGET_OPENCL_FP16);
1284         CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
1285                   preferableTarget == DNN_TARGET_CPU ||
1286                   preferableTarget == DNN_TARGET_OPENCL);
1287         if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
1288             preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1289         {
1290             CV_Assert(
1291                   preferableTarget == DNN_TARGET_CPU ||
1292                   preferableTarget == DNN_TARGET_OPENCL ||
1293                   preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1294                   preferableTarget == DNN_TARGET_MYRIAD ||
1295                   preferableTarget == DNN_TARGET_FPGA
1296             );
1297         }
1298         if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
1299         {
1300             if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
1301 #ifndef HAVE_OPENCL
1302             {
1303                 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
1304                 preferableTarget = DNN_TARGET_CPU;
1305             }
1306 #else
1307             {
1308                 if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
1309                 {
1310                     // Current implementation is only valid for GPU (#11494)
1311                     if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
1312                     {
1313                         CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
1314                         preferableTarget = DNN_TARGET_CPU;
1315                     }
1316                     else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
1317                     {
1318                         CV_LOG_WARNING(NULL,
1319                             "DNN: OpenCL target with fp16 precision is not supported "
1320                             "with current OpenCL device (tested with Intel GPUs only), "
1321                             "switching to OpenCL with fp32 precision.");
1322                         preferableTarget = DNN_TARGET_OPENCL;
1323                     }
1324                 }
1325             }
1326 #endif
1327             clear();
1328
1329             this->blobsToKeep = blobsToKeep_;
1330
1331             allocateLayers(blobsToKeep_);
1332
1333             MapIdToLayerData::iterator it = layers.find(0);
1334             CV_Assert(it != layers.end());
1335             it->second.skip = netInputLayer->skip;
1336
1337             initBackend(blobsToKeep_);
1338
1339             if (!netWasAllocated )
1340             {
1341 #ifdef HAVE_HALIDE
1342                 if (preferableBackend == DNN_BACKEND_HALIDE)
1343                     compileHalide();
1344 #else
1345                 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
1346 #endif
1347             }
1348
1349             netWasAllocated = true;
1350
1351             if (dumpLevel)
1352             {
1353                 dumpNetworkToFile();
1354             }
1355         }
1356     }
1357
1358     int getLayerId(const String &layerName)
1359     {
1360         std::map<String, int>::iterator it = layerNameToId.find(layerName);
1361         return (it != layerNameToId.end()) ? it->second : -1;
1362     }
1363
1364     int getLayerId(int id)
1365     {
1366         MapIdToLayerData::iterator it = layers.find(id);
1367         return (it != layers.end()) ? id : -1;
1368     }
1369
1370     int getLayerId(DictValue &layerDesc)
1371     {
1372         if (layerDesc.isInt())
1373             return getLayerId(layerDesc.get<int>());
1374         else if (layerDesc.isString())
1375             return getLayerId(layerDesc.get<String>());
1376
1377         CV_Assert(layerDesc.isInt() || layerDesc.isString());
1378         return -1;
1379     }
1380
1381     String getLayerName(int id)
1382     {
1383         MapIdToLayerData::iterator it = layers.find(id);
1384         return (it != layers.end()) ? it->second.name : "(unknown layer)";
1385     }
1386
1387     LayerData& getLayerData(int id)
1388     {
1389         MapIdToLayerData::iterator it = layers.find(id);
1390
1391         if (it == layers.end())
1392             CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
1393
1394         return it->second;
1395     }
1396
1397     LayerData& getLayerData(const String &layerName)
1398     {
1399         int id = getLayerId(layerName);
1400
1401         if (id < 0)
1402             CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
1403
1404         return getLayerData(id);
1405     }
1406
1407     LayerData& getLayerData(const DictValue &layerDesc)
1408     {
1409         CV_Assert(layerDesc.isInt() || layerDesc.isString());
1410         if (layerDesc.isInt())
1411             return getLayerData(layerDesc.get<int>());
1412         else /*if (layerDesc.isString())*/
1413             return getLayerData(layerDesc.get<String>());
1414     }
1415
1416     static void addLayerInput(LayerData &ld, int inNum, LayerPin from)
1417     {
1418         if ((int)ld.inputBlobsId.size() <= inNum)
1419         {
1420             ld.inputBlobsId.resize(inNum + 1);
1421         }
1422         else
1423         {
1424             LayerPin storedFrom = ld.inputBlobsId[inNum];
1425             if (storedFrom.valid() && !storedFrom.equal(from))
1426                 CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
1427                                                  inNum, ld.name.c_str()));
1428         }
1429
1430         ld.inputBlobsId[inNum] = from;
1431     }
1432
1433     int resolvePinOutputName(LayerData &ld, const String &outName)
1434     {
1435         if (outName.empty())
1436             return 0;
1437         return ld.getLayerInstance()->outputNameToIndex(outName);
1438     }
1439
1440     LayerPin getPinByAlias(const String &layerName)
1441     {
1442         LayerPin pin;
1443         pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1444
1445         if (pin.lid >= 0)
1446             pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
1447
1448         return pin;
1449     }
1450
1451     std::vector<LayerPin> getLayerOutPins(const String &layerName)
1452     {
1453         int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1454
1455         std::vector<LayerPin> pins;
1456
1457         for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
1458         {
1459             pins.push_back(LayerPin(lid, i));
1460         }
1461
1462         return pins;
1463     }
1464
1465     void connect(int outLayerId, int outNum, int inLayerId, int inNum)
1466     {
1467         CV_Assert(outLayerId < inLayerId);
1468         LayerData &ldOut = getLayerData(outLayerId);
1469         LayerData &ldInp = getLayerData(inLayerId);
1470
1471         addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
1472         ldOut.requiredOutputs.insert(outNum);
1473         ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
1474     }
1475
1476     void initBackend(const std::vector<LayerPin>& blobsToKeep_)
1477     {
1478         CV_TRACE_FUNCTION();
1479         if (preferableBackend == DNN_BACKEND_OPENCV)
1480             CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
1481         else if (preferableBackend == DNN_BACKEND_HALIDE)
1482             initHalideBackend();
1483         else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1484         {
1485 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1486             initInfEngineBackend(blobsToKeep_);
1487 #else
1488             CV_Assert(false && "This OpenCV version is built without Inference Engine NN Builder API support");
1489 #endif
1490         }
1491         else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1492         {
1493 #ifdef HAVE_DNN_NGRAPH
1494             initNgraphBackend(blobsToKeep_);
1495 #else
1496             CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
1497 #endif
1498         }
1499         else
1500             CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1501     }
1502
1503     void initHalideBackend()
1504     {
1505         CV_TRACE_FUNCTION();
1506         CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
1507
1508         // Iterator to current layer.
1509         MapIdToLayerData::iterator it = layers.begin();
1510         // Iterator to base layer for fusion. In example, in case of conv+bn+relu
1511         // it'll be a conv layer.
1512         MapIdToLayerData::iterator baseIt = layers.begin();
1513         for (; it != layers.end(); it++)
1514         {
1515             LayerData &ldTop = it->second;
1516             Ptr<Layer> layerTop = ldTop.layerInstance;
1517             if (!layerTop->supportBackend(preferableBackend))
1518             {
1519                 // Move base iterator to layer that don't support preferable
1520                 // backend to prevent fusion over layer of different backend.
1521                 baseIt = it;
1522                 continue;
1523             }
1524             // Try to do layers fusion.
1525             LayerData &ldBot = baseIt->second;
1526             Ptr<Layer> layerBot = ldBot.layerInstance;
1527             // 1. Check that bottom and top from the same backends.
1528             if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
1529             {
1530                 // 2. Check that current layer works in-place.
1531                 bool inPlace = ldTop.inputBlobs.size() == 1 &&
1532                                ldBot.outputBlobs.size() == 1 &&
1533                                ldTop.inputBlobs[0]->data ==
1534                                ldBot.outputBlobs[0].data;
1535                 if (inPlace)
1536                 {
1537                     // 3. Try to attach node.
1538                     CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
1539                     Ptr<BackendNode> fusedNode =
1540                         layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
1541                     if (!fusedNode.empty())
1542                     {
1543                         ldTop.skip = true;
1544                         ldBot.backendNodes[preferableBackend] = fusedNode;
1545                         ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
1546                         continue;
1547                     }
1548                 }
1549             }
1550             // No layers fusion.
1551             ldTop.skip = false;
1552             ldTop.backendNodes[DNN_BACKEND_HALIDE] =
1553                 layerTop->initHalide(ldTop.inputBlobsWrappers);
1554             baseIt = it;
1555         }
1556     }
1557
1558 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1559     // Before launching Inference Engine graph we need to specify output blobs.
1560     // This function requests output blobs based on inputs references of
1561     // layers from default backend or layers from different graphs.
1562     void addInfEngineNetOutputs(LayerData &ld)
1563     {
1564         CV_TRACE_FUNCTION();
1565         Ptr<InfEngineBackendNet> layerNet;
1566         if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
1567         {
1568             Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1569             if (!node.empty())
1570             {
1571                 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1572                 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1573                 layerNet = ieNode->net;
1574             }
1575         }
1576         // For an every input reference we check that it belongs to one of
1577         // the Inference Engine backend graphs. Request an output blob if it is.
1578         // Do nothing if layer's input is from the same graph.
1579         for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1580         {
1581             LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1582             Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1583             if (!inpNode.empty())
1584             {
1585                 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1586                 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1587                 if (layerNet != ieInpNode->net)
1588                 {
1589                     // layerNet is empty or nodes are from different graphs.
1590                     ieInpNode->net->addOutput(ieInpNode->layer.getName());
1591                 }
1592             }
1593         }
1594     }
1595
1596     void initInfEngineBackend(const std::vector<LayerPin>& blobsToKeep_)
1597     {
1598         CV_TRACE_FUNCTION();
1599         CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine());
1600         MapIdToLayerData::iterator it;
1601         Ptr<InfEngineBackendNet> net;
1602
1603         for (it = layers.begin(); it != layers.end(); ++it)
1604         {
1605             LayerData &ld = it->second;
1606             if (ld.id == 0)
1607             {
1608                 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1609                           (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1610                 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1611                 {
1612                     InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1613 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1614                     dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1615 #else
1616                     dataPtr->setName(netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]);
1617 #endif
1618                 }
1619             }
1620             else
1621             {
1622                 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1623                 {
1624                     InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1625 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1626                     dataPtr->name = ld.name;
1627 #else
1628                     dataPtr->setName(ld.name);
1629 #endif
1630                 }
1631             }
1632         }
1633
1634         if (skipInfEngineInit)
1635         {
1636             Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1637             CV_Assert(!node.empty());
1638
1639             Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1640             CV_Assert(!ieNode.empty());
1641             ieNode->net->reset();
1642
1643             for (it = layers.begin(); it != layers.end(); ++it)
1644             {
1645                 LayerData &ld = it->second;
1646                 if (ld.id == 0)
1647                 {
1648                     for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1649                     {
1650                         InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
1651 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1652                         dataPtr->name = netInputLayer->outNames[i];
1653 #else
1654                         dataPtr->setName(netInputLayer->outNames[i]);
1655 #endif
1656                     }
1657                 }
1658                 else
1659                 {
1660                     for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1661                     {
1662                         InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1663 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1664                         dataPtr->name = ld.name;
1665 #else
1666                         dataPtr->setName(ld.name);
1667 #endif
1668                     }
1669                 }
1670                 ieNode->net->addBlobs(ld.inputBlobsWrappers);
1671                 ieNode->net->addBlobs(ld.outputBlobsWrappers);
1672                 ld.skip = true;
1673             }
1674             layers[lastLayerId].skip = false;
1675             ieNode->net->init((Target)preferableTarget);
1676             return;
1677         }
1678
1679         // Build Inference Engine networks from sets of layers that support this
1680         // backend. Split a whole model on several Inference Engine networks if
1681         // some of layers are not implemented.
1682
1683         bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU ||
1684                                    BackendRegistry::checkIETarget(DNN_TARGET_CPU);
1685
1686         // Set of all input and output blobs wrappers for current network.
1687         std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
1688         for (it = layers.begin(); it != layers.end(); ++it)
1689         {
1690             LayerData &ld = it->second;
1691             if (ld.id == 0 && ld.skip)
1692                 continue;
1693             bool fused = ld.skip;
1694
1695             Ptr<Layer> layer = ld.layerInstance;
1696             if (!fused && !layer->supportBackend(preferableBackend))
1697             {
1698                 bool customizable = ld.id != 0 &&
1699                                     INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2) &&
1700                                     supportsCPUFallback;
1701                 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
1702                 if (preferableTarget == DNN_TARGET_MYRIAD)
1703                 {
1704                     for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
1705                     {
1706                         customizable = ld.inputBlobs[i]->size[0] == 1;
1707                     }
1708                 }
1709
1710                 // TODO: fix these workarounds
1711                 if (preferableTarget == DNN_TARGET_MYRIAD ||
1712                     preferableTarget == DNN_TARGET_OPENCL ||
1713                     preferableTarget == DNN_TARGET_OPENCL_FP16)
1714                     customizable &= ld.type != "Concat";
1715
1716                 if (preferableTarget == DNN_TARGET_OPENCL ||
1717                     preferableTarget == DNN_TARGET_OPENCL_FP16)
1718                     customizable &= ld.type != "Power";
1719
1720                 if (preferableTarget == DNN_TARGET_OPENCL)
1721                     customizable &= ld.type != "Eltwise";
1722
1723                 if (!customizable)
1724                 {
1725                     addInfEngineNetOutputs(ld);
1726                     net = Ptr<InfEngineBackendNet>();
1727                     netBlobsWrappers.clear();  // Is not used for R5 release but we don't wrap it to #ifdef.
1728                     layer->preferableTarget = DNN_TARGET_CPU;
1729                     continue;
1730                 }
1731             }
1732             ld.skip = true;  // Initially skip all Inference Engine supported layers.
1733
1734             // Create a new network if one of inputs from different Inference Engine graph.
1735             for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1736             {
1737                 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1738                 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1739                 if (!inpNode.empty())
1740                 {
1741                     Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1742                     CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1743                     if (ieInpNode->net != net)
1744                     {
1745                         net = Ptr<InfEngineBackendNet>();
1746                         netBlobsWrappers.clear();  // Is not used for R5 release but we don't wrap it to #ifdef.
1747                         break;
1748                     }
1749                 }
1750             }
1751
1752             Ptr<BackendNode> node;
1753             if (!net.empty())
1754             {
1755                 if (fused)
1756                 {
1757                     bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
1758                                    ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
1759                     CV_Assert(inPlace);
1760                     node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
1761                     ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
1762                 }
1763             }
1764             else
1765                 net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
1766
1767             if (!fused)
1768             {
1769                 if (layer->supportBackend(preferableBackend))
1770                     node = layer->initInfEngine(ld.inputBlobsWrappers);
1771                 else
1772                 {
1773                     node = Ptr<BackendNode>(new InfEngineBackendNode(
1774                         ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
1775                 }
1776             }
1777             else if (node.empty())
1778                 continue;
1779
1780             CV_Assert(!node.empty());
1781             ld.backendNodes[preferableBackend] = node;
1782
1783             Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1784             CV_Assert(!ieNode.empty());
1785             ieNode->net = net;
1786
1787             for (const auto& pin : blobsToKeep_)
1788             {
1789                 if (pin.lid == ld.id)
1790                 {
1791                     ieNode->net->addOutput(ieNode->layer.getName());
1792                     break;
1793                 }
1794             }
1795
1796             // Convert weights in FP16 for specific targets.
1797             if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1798                  preferableTarget == DNN_TARGET_MYRIAD ||
1799                  preferableTarget == DNN_TARGET_FPGA) && !fused)
1800             {
1801 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
1802                 for (const std::string& name : {"weights", "biases"})
1803                 {
1804                     auto it = ieNode->layer.getParameters().find(name);
1805                     if (it != ieNode->layer.getParameters().end())
1806                     {
1807                         InferenceEngine::Blob::Ptr bp = it->second.as<InferenceEngine::Blob::Ptr>();
1808                         it->second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(bp));
1809                     }
1810                 }
1811 #else
1812                 auto& blobs = ieNode->layer.getConstantData();
1813                 if (blobs.empty())
1814                 {
1815                     // In case of non weightable layer we have to specify
1816                     // it's precision adding dummy blob.
1817                     auto blob = InferenceEngine::make_shared_blob<int16_t>(
1818                                     InferenceEngine::Precision::FP16,
1819                                     InferenceEngine::Layout::C, {1});
1820                     blob->allocate();
1821                     blobs[""] = blob;
1822                 }
1823                 else
1824                 {
1825                     for (auto& it : blobs)
1826                         it.second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(it.second));
1827                 }
1828 #endif
1829             }
1830
1831             if (!fused)
1832                 net->addLayer(ieNode->layer);
1833
1834             net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName());
1835             net->addBlobs(ld.inputBlobsWrappers);
1836             net->addBlobs(ld.outputBlobsWrappers);
1837             addInfEngineNetOutputs(ld);
1838         }
1839
1840         // Initialize all networks.
1841         for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
1842         {
1843             LayerData &ld = it->second;
1844             if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
1845                 continue;
1846
1847             Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1848             if (node.empty())
1849                 continue;
1850
1851             Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1852             if (ieNode.empty())
1853                 continue;
1854
1855             CV_Assert(!ieNode->net.empty());
1856
1857             if (!ieNode->net->isInitialized())
1858             {
1859                 ieNode->net->init((Target)preferableTarget);
1860                 ld.skip = false;
1861             }
1862         }
1863     }
1864 #endif  // HAVE_DNN_IE_NN_BUILDER_2019
1865
1866
1867 #ifdef HAVE_DNN_NGRAPH
1868     void addNgraphOutputs(LayerData &ld)
1869     {
1870         CV_TRACE_FUNCTION();
1871
1872         Ptr<InfEngineNgraphNet> layerNet;
1873         auto it = ld.backendNodes.find(preferableBackend);
1874         if (it != ld.backendNodes.end())
1875         {
1876             Ptr<BackendNode> node = it->second;
1877             if (!node.empty())
1878             {
1879                 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
1880                 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1881                 layerNet = ieNode->net;
1882             }
1883         }
1884
1885         for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1886         {
1887             LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1888             Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1889             if (!inpNode.empty())
1890             {
1891                 Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
1892                 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1893                 if (layerNet != ieInpNode->net)
1894                 {
1895                     ieInpNode->net->addOutput(ieInpNode->node->get_friendly_name());
1896                     ieInpNode->net->setUnconnectedNodes(ieInpNode);
1897                 }
1898             }
1899         }
1900     }
1901
1902     void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
1903     {
1904         CV_TRACE_FUNCTION();
1905         CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine());
1906
1907         MapIdToLayerData::iterator it;
1908         Ptr<InfEngineNgraphNet> net;
1909
1910         for (it = layers.begin(); it != layers.end(); ++it)
1911         {
1912             LayerData &ld = it->second;
1913             if (ld.id == 0)
1914             {
1915                 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1916                           (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1917                 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1918                 {
1919                     InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
1920                     std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1921                     outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName;
1922                     dataPtr->setName(outputName);
1923                 }
1924             }
1925             else
1926             {
1927                 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1928                 {
1929                     InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
1930                     std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name;
1931                     dataPtr->setName(outputName);
1932                 }
1933             }
1934         }
1935
1936         if (skipInfEngineInit)
1937         {
1938             Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1939             CV_Assert(!node.empty());
1940
1941             Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
1942             CV_Assert(!ieNode.empty());
1943             ieNode->net->reset();
1944
1945             for (it = layers.begin(); it != layers.end(); ++it)
1946             {
1947                 LayerData &ld = it->second;
1948                 if (ld.id == 0)
1949                 {
1950                     for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1951                     {
1952                         InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.inputBlobsWrappers[i]);
1953                         dataPtr->setName(netInputLayer->outNames[i]);
1954                     }
1955                 }
1956                 else
1957                 {
1958                     for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1959                     {
1960                         InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
1961                         dataPtr->setName(ld.name);
1962                     }
1963                 }
1964                 ieNode->net->addBlobs(ld.inputBlobsWrappers);
1965                 ieNode->net->addBlobs(ld.outputBlobsWrappers);
1966                 ld.skip = true;
1967             }
1968             layers[lastLayerId].skip = false;
1969             ieNode->net->init((Target)preferableTarget);
1970             return;
1971         }
1972
1973         bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU ||
1974                                    BackendRegistry::checkIETarget(DNN_TARGET_CPU);
1975
1976         // Build Inference Engine networks from sets of layers that support this
1977         // backend. Split a whole model on several Inference Engine networks if
1978         // some of layers are not implemented.
1979         for (it = layers.begin(); it != layers.end(); ++it)
1980         {
1981             LayerData &ld = it->second;
1982
1983             if (ld.id == 0 && ld.skip)
1984                 continue;
1985
1986             bool fused = ld.skip;
1987             Ptr<Layer> layer = ld.layerInstance;
1988             if (!fused && !layer->supportBackend(preferableBackend))
1989             {
1990                 bool customizable = ld.id != 0 && supportsCPUFallback;
1991
1992                 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
1993                 if (preferableTarget == DNN_TARGET_MYRIAD)
1994                 {
1995                     for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
1996                     {
1997                         customizable = ld.inputBlobs[i]->size[0] == 1;
1998                     }
1999                 }
2000
2001                 // TODO: fix these workarounds
2002                 if (preferableTarget == DNN_TARGET_MYRIAD ||
2003                     preferableTarget == DNN_TARGET_OPENCL ||
2004                     preferableTarget == DNN_TARGET_OPENCL_FP16)
2005                     customizable &= ld.type != "Concat";
2006
2007                 if (preferableTarget == DNN_TARGET_OPENCL ||
2008                     preferableTarget == DNN_TARGET_OPENCL_FP16)
2009                     customizable &= ld.type != "Power";
2010
2011                 if (preferableTarget == DNN_TARGET_OPENCL)
2012                     customizable &= ld.type != "Eltwise";
2013
2014                 if (!customizable)
2015                 {
2016                     addNgraphOutputs(ld);
2017                     net = Ptr<InfEngineNgraphNet>();
2018                     layer->preferableTarget = DNN_TARGET_CPU;
2019
2020                     for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2021                     {
2022                         LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2023                         Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2024                         if (!inpNode.empty()) {
2025                             Ptr<InfEngineNgraphNode> ieNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2026                             CV_Assert(!ieNode.empty());
2027                             ieNode->net->setUnconnectedNodes(ieNode);
2028                         }
2029                     }
2030                     continue;
2031                 }
2032             }
2033             ld.skip = true;  // Initially skip all Inference Engine supported layers.
2034
2035             // Create a new network if one of inputs from different Inference Engine graph.
2036             std::vector<Ptr<BackendNode>> inputNodes;
2037             for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2038             {
2039                 // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois)
2040                 if (inputNodes.size() == ld.inputBlobsId.size()) {
2041                     break;
2042                 }
2043                 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2044                 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2045                 if (!inpNode.empty())
2046                 {
2047                      Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2048                      CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
2049                      if (ieInpNode->net == net && !fused) {
2050                         inputNodes.push_back(inpNode);
2051                         continue;
2052                      }
2053                 }
2054
2055                 if (net.empty()) {
2056                     net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
2057                 }
2058
2059                 if (!fused) {
2060                     std::vector<std::string> inputNames;
2061                     std::vector<cv::Mat> inputs;
2062
2063                     auto curr_pos = inpLd.consumers.begin();
2064                     auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; };
2065                     auto cons = curr_pos;
2066                     while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) !=
2067                             inpLd.consumers.end()) {
2068                         int cons_inp = cons->oid;
2069                         Ptr<NgraphBackendWrapper> inpWrapper = inpLd.outputBlobsWrappers[cons_inp].
2070                                                                      dynamicCast<NgraphBackendWrapper>();
2071                         CV_Assert(!inpWrapper.empty());
2072                         auto iter = std::find(inputNames.begin(), inputNames.end(),
2073                                               inpWrapper->dataPtr->getName());
2074                         if (iter == inputNames.end()) {
2075                             inputNames.push_back(inpWrapper->dataPtr->getName());
2076                             inputs.push_back(inpLd.outputBlobs[cons_inp]);
2077                         }
2078                         curr_pos = cons + 1;
2079                     }
2080
2081                     auto inps = net->setInputs(inputs, inputNames);
2082                     for (auto& inp : inps) {
2083                         inputNodes.emplace_back(Ptr<BackendNode>(new InfEngineNgraphNode(inp)));
2084                     }
2085                 }
2086             }
2087
2088             Ptr<BackendNode> node;
2089             if (!net.empty())
2090             {
2091                 if (fused)
2092                 {
2093                     bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
2094                                    ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
2095                     CV_Assert(inPlace);
2096                     node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
2097                     ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
2098                 }
2099             }
2100             else {
2101                 net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
2102             }
2103
2104             if (!fused)
2105             {
2106                 CV_Assert(ld.inputBlobsId.size() == inputNodes.size());
2107                 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2108                 {
2109                     int lid = ld.inputBlobsId[i].lid;
2110                     int oid = ld.inputBlobsId[i].oid;
2111                     if (oid == 0 || lid == 0)
2112                         continue;
2113
2114                     auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
2115                     CV_Assert(oid < ieInpNode->node->get_output_size());
2116 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
2117                     inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node));
2118 #elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3)
2119                     inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid)));
2120 #else
2121                     inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false)));
2122 #endif
2123                 }
2124
2125                 if (layer->supportBackend(preferableBackend))
2126                 {
2127                     node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes);
2128                     for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2129                     {
2130                         InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2131                         node.dynamicCast<InfEngineNgraphNode>()->setName(dataPtr->getName());
2132                     }
2133                 }
2134                 else
2135                 {
2136                     node = Ptr<BackendNode>(new InfEngineNgraphNode(inputNodes,
2137                         ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
2138                 }
2139             }
2140             else if (node.empty())
2141                 continue;
2142
2143             ld.backendNodes[preferableBackend] = node;
2144
2145             Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2146             CV_Assert(!ieNode.empty());
2147             ieNode->net = net;
2148
2149             if (ld.consumers.empty()) {
2150                 // TF EAST_text_detection
2151                 ieNode->net->setUnconnectedNodes(ieNode);
2152             }
2153             for (const auto& pin : blobsToKeep_)
2154             {
2155                 if (pin.lid == ld.id)
2156                 {
2157                     ieNode->net->addOutput(ieNode->node->get_friendly_name());
2158                     break;
2159                 }
2160             }
2161             ieNode->net->setNodePtr(&ieNode->node);
2162
2163             net->addBlobs(ld.inputBlobsWrappers);
2164             net->addBlobs(ld.outputBlobsWrappers);
2165             addNgraphOutputs(ld);
2166         }
2167
2168         // Initialize all networks.
2169         for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
2170         {
2171             LayerData &ld = it->second;
2172             auto iter = ld.backendNodes.find(preferableBackend);
2173             if (iter == ld.backendNodes.end())
2174                 continue;
2175
2176             Ptr<BackendNode>& node = iter->second;
2177             if (node.empty())
2178                 continue;
2179
2180             Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2181             if (ieNode.empty())
2182                 continue;
2183
2184             CV_Assert(!ieNode->net.empty());
2185
2186             if (!ieNode->net->isInitialized())
2187             {
2188                 ieNode->net->setUnconnectedNodes(ieNode);
2189                 ieNode->net->createNet((Target)preferableTarget);
2190                 ld.skip = false;
2191             }
2192         }
2193     }
2194 #endif  // HAVE_DNN_NGRAPH
2195
2196     void allocateLayer(int lid, const LayersShapesMap& layersShapes)
2197     {
2198         CV_TRACE_FUNCTION();
2199
2200         LayerData &ld = layers[lid];
2201
2202         //already allocated
2203         if (ld.flag)
2204             return;
2205
2206         size_t ninputs = ld.inputBlobsId.size();
2207 #if 0
2208         printf("layer %s:", ld.name.c_str());
2209         for (size_t i = 0; i < ninputs; i++)
2210         {
2211             int inp_lid = ld.inputBlobsId[i].lid;
2212             LayerData &inp_ld = layers[inp_lid];
2213             int inp_outputs = (int)inp_ld.outputBlobs.size();
2214             std::cout << " " << inp_ld.name << "(" << inp_outputs;
2215
2216             for( int j = 0; j < inp_outputs; j++ )
2217             {
2218                 std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
2219             }
2220             std::cout << ")";
2221         }
2222         printf("\n");
2223 #endif
2224
2225         //determine parent layers
2226         for (size_t i = 0; i < ninputs; i++)
2227             ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
2228
2229         //allocate parents
2230         for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
2231             allocateLayer(*i, layersShapes);
2232
2233         //bind inputs
2234         if (ld.id == 0)  // DataLayer
2235         {
2236             ninputs = netInputLayer->inputsData.size();
2237             ld.inputBlobsWrappers.resize(ninputs);
2238             for (size_t i = 0; i < ninputs; i++)
2239             {
2240                 ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
2241             }
2242         }
2243         else
2244         {
2245             ld.inputBlobs.resize(ninputs);
2246             ld.inputBlobsWrappers.resize(ninputs);
2247             for (size_t i = 0; i < ninputs; i++)
2248             {
2249                 LayerPin from = ld.inputBlobsId[i];
2250                 CV_Assert(from.valid());
2251                 CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
2252                 ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
2253                 ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
2254             }
2255         }
2256
2257         LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
2258
2259         CV_Assert(layerShapesIt != layersShapes.end());
2260
2261         std::vector<LayerPin> pinsForInternalBlobs;
2262         blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
2263                                           preferableBackend == DNN_BACKEND_OPENCV &&
2264                                           preferableTarget == DNN_TARGET_OPENCL_FP16);
2265         ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
2266         for (int i = 0; i < ld.outputBlobs.size(); ++i)
2267         {
2268             ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
2269         }
2270         ld.internalBlobsWrappers.resize(ld.internals.size());
2271         for (int i = 0; i < ld.internals.size(); ++i)
2272         {
2273             ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
2274         }
2275
2276         Ptr<Layer> layerPtr = ld.getLayerInstance();
2277         {
2278             std::vector<Mat> inps(ld.inputBlobs.size());
2279             for (int i = 0; i < ld.inputBlobs.size(); ++i)
2280             {
2281                 inps[i] = *ld.inputBlobs[i];
2282             }
2283             layerPtr->finalize(inps, ld.outputBlobs);
2284             layerPtr->preferableTarget = preferableTarget;
2285 #if 0
2286             std::cout << "\toutputs:";
2287             size_t noutputs = ld.outputBlobs.size();
2288             for (size_t j = 0; j < noutputs; j++)
2289             {
2290                 std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
2291             }
2292             std::cout << "\n";
2293 #endif
2294         }
2295
2296         // After allocation of layer, we decrease counters to it's input blobs.
2297         blobManager.releaseReferences(ld.inputBlobsId);
2298         blobManager.releaseReferences(pinsForInternalBlobs);
2299
2300         ld.flag = 1;
2301     }
2302
2303 #if 0
2304 #define printf_(args) printf args
2305 #else
2306 #define printf_(args)
2307 #endif
2308
2309     void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
2310     {
2311         CV_TRACE_FUNCTION();
2312
2313         if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV &&
2314                         preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
2315                         preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
2316            return;
2317
2318         // scan through all the layers. If there is convolution layer followed by the activation layer,
2319         // we try to embed this activation into the convolution and disable separate execution of the activation
2320         std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
2321                                       blobsToKeep_.end());
2322         MapIdToLayerData::iterator it;
2323         for (it = layers.begin(); it != layers.end(); it++)
2324         {
2325             int lid = it->first;
2326             LayerData& ld = layers[lid];
2327             if( ld.skip )
2328             {
2329                 printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
2330                 continue;
2331             }
2332             printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
2333
2334             // the optimization #1. try to fuse batch norm, scaling and/or activation layers
2335             // with the current layer if they follow it. Normally, the are fused with the convolution layer,
2336             // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
2337             // some other layers.
2338             Ptr<Layer>& currLayer = ld.layerInstance;
2339             if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
2340             {
2341                 LayerData* nextData = &layers[ld.consumers[0].lid];
2342                 LayerPin lpNext(ld.consumers[0].lid, 0);
2343                 while (nextData)
2344                 {
2345                     Ptr<Layer> nextLayer = nextData->layerInstance;
2346                     if (currLayer->tryFuse(nextLayer))
2347                     {
2348                         printf_(("\tfused with %s\n", nextLayer->name.c_str()));
2349                         nextData->skip = true;
2350                         ld.outputBlobs = layers[lpNext.lid].outputBlobs;
2351                         ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
2352                         if (nextData->consumers.size() == 1)
2353                         {
2354                             int nextLayerId = nextData->consumers[0].lid;
2355                             nextData = &layers[nextLayerId];
2356                             lpNext = LayerPin(nextLayerId, 0);
2357                         }
2358                         else
2359                         {
2360                             nextData = 0;
2361                             break;
2362                         }
2363                     }
2364                     else
2365                         break;
2366                 }
2367
2368                 if (preferableBackend != DNN_BACKEND_OPENCV)
2369                     continue;  // Go to the next layer.
2370
2371                 // TODO: OpenCL target support more fusion styles.
2372                 if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
2373                      (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
2374                      ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
2375                      ld.layerInstance->type != "Concat")) )
2376                     continue;
2377
2378                 while (nextData)
2379                 {
2380                     // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
2381                     if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
2382                         nextData->type != "ReLU" &&
2383                         nextData->type != "ChannelsPReLU" &&
2384                         nextData->type != "ReLU6" &&
2385                         nextData->type != "TanH" &&
2386                         nextData->type != "Power")
2387                         break;
2388
2389                     Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2390                     if (nextActivLayer.empty())
2391                         break;
2392
2393                     if (currLayer->setActivation(nextActivLayer))
2394                     {
2395                         printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
2396                         nextData->skip = true;
2397                         ld.outputBlobs = layers[lpNext.lid].outputBlobs;
2398                         ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
2399                         if (nextData->consumers.size() == 1)
2400                         {
2401                             int nextLayerId = nextData->consumers[0].lid;
2402                             nextData = &layers[nextLayerId];
2403                             lpNext = LayerPin(nextLayerId, 0);
2404                         }
2405                         else
2406                         {
2407                             nextData = 0;
2408                             break;
2409                         }
2410                     }
2411                     else
2412                         break;
2413                 }
2414
2415                 // fuse convolution layer followed by eltwise + relu
2416                 if ( IS_DNN_OPENCL_TARGET(preferableTarget) && ld.layerInstance->type == "Convolution" )
2417                 {
2418                     Ptr<EltwiseLayer> nextEltwiseLayer;
2419                     if( nextData )
2420                         nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
2421
2422                     if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
2423                         nextData && nextData->inputBlobsId.size() == 2 )
2424                     {
2425                         LayerData *eltwiseData = nextData;
2426
2427                         // Eltwise layer has two inputs. We need to determine which
2428                         // is a base convolution layer and which could be used as it's bias.
2429                         LayerData* biasLayerData = 0;
2430                         for (int i = 0; i < 2; ++i)
2431                         {
2432                             LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
2433                             CV_Assert(downLayerData);
2434                             while (downLayerData->skip)
2435                             {
2436                                 if (downLayerData->inputBlobsId.size() == 1)
2437                                     downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
2438                                 else
2439                                 {
2440                                     downLayerData = 0;
2441                                     break;
2442                                 }
2443                             }
2444                             if (downLayerData && ld.id == downLayerData->id)
2445                             {
2446                                 biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
2447                                 break;
2448                             }
2449                         }
2450                         CV_Assert(biasLayerData);
2451                         {
2452                             if( eltwiseData->consumers.size() == 1 )
2453                             {
2454                                 // fuse eltwise + activation layer
2455                                 if (biasLayerData->id < ld.id)
2456                                 {
2457                                     nextData = &layers[eltwiseData->consumers[0].lid];
2458                                     lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
2459                                     Ptr<ActivationLayer> nextActivLayer;
2460                                     if( nextData )
2461                                         nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2462
2463                                     if( !nextActivLayer.empty() &&
2464                                             (!nextData->type.compare("ReLU") ||
2465                                              !nextData->type.compare("ChannelsPReLU") ||
2466                                              !nextData->type.compare("Power")) &&
2467                                             currLayer->setActivation(nextActivLayer) )
2468                                     {
2469                                         CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
2470                                         ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
2471                                         printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
2472                                         printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
2473                                         eltwiseData->skip = true;
2474                                         nextData->skip = true;
2475                                         // This optimization for cases like
2476                                         // some_layer   conv
2477                                         //   |             |
2478                                         //   +-- eltwise --+
2479                                         //          |
2480                                         //        activ
2481                                         // This way all the element-wise computations
2482                                         // (i.e. some_layer+conv or some_layer*conv)
2483                                         // would be done at [conv] layer. So we need to
2484                                         // replace [conv]'s output blob to [eltwise]'s one
2485                                         // considering that [activ] is an in-place layer.
2486                                         // Also we need to move all the consumers' references.
2487                                         // To prevent memory collisions (i.e. when input of
2488                                         // [conv] and output of [eltwise] is the same blob)
2489                                         // we allocate a new blob.
2490                                         CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
2491                                         ld.outputBlobs[0] = ld.outputBlobs[0].clone();
2492                                         ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
2493
2494                                         eltwiseData->outputBlobs = ld.outputBlobs;
2495                                         nextData->outputBlobs = ld.outputBlobs;
2496                                         eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
2497                                         nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
2498
2499                                         // Move references of [activ] layer consumers to the newly allocated blob.
2500                                         for (int i = 0; i < nextData->consumers.size(); ++i)
2501                                         {
2502                                             LayerData& consumer = layers[nextData->consumers[i].lid];
2503                                             for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
2504                                             {
2505                                                 if (consumer.inputBlobsId[j].lid == lpNext.lid)
2506                                                 {
2507                                                     consumer.inputBlobs[j] = &ld.outputBlobs[0];
2508                                                     consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
2509                                                     break;
2510                                                 }
2511                                             }
2512                                         }
2513                                     }
2514                                 }
2515                             }
2516                         }
2517                     }
2518                 }
2519             }
2520
2521             if (preferableBackend != DNN_BACKEND_OPENCV)
2522                 continue;  // Go to the next layer.
2523
2524             // the optimization #2. if there is concat layer that concatenates channels
2525             // from the inputs together (i.e. axis == 1) then we make the inputs of
2526             // the concat layer to write to the concatenation output buffer
2527             // (and so we eliminate the concatenation layer, because the channels
2528             // are concatenated implicitly).
2529             Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
2530             if( !concatLayer.empty() && !concatLayer->padding && ld.outputBlobs.size() == 1 )
2531             {
2532                 Mat& output = ld.outputBlobs[0];
2533                 UMat umat_output;
2534 #ifdef HAVE_OPENCL
2535                 if (!ld.outputBlobsWrappers.empty() &&
2536                     (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
2537                 {
2538                     size_t i, ninputs = ld.inputBlobsId.size();
2539                     bool conv_layer = true;
2540                     for( i = 0; i < ninputs; i++ )
2541                     {
2542                         LayerPin pin = ld.inputBlobsId[i];
2543                         LayerData* inp_i_data = &layers[pin.lid];
2544                         while(inp_i_data->skip &&
2545                               inp_i_data->inputBlobsId.size() == 1 &&
2546                               inp_i_data->consumers.size() == 1)
2547                         {
2548                             pin = inp_i_data->inputBlobsId[0];
2549                             inp_i_data = &layers[pin.lid];
2550                         }
2551                         conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
2552                     }
2553                     if (!conv_layer)
2554                         continue;
2555                     std::vector<UMat> umat_outputBlobs;
2556                     umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2557                     umat_output = umat_outputBlobs[0];
2558                 }
2559 #endif
2560
2561                 // TODO: in general, this optimization can always be done, but
2562                 // many layers currently check that the input/output blobs are
2563                 // continuous arrays. Unfortunately, this is not true when
2564                 // the concatenation optimization is applied with batch_size > 1.
2565                 // so, for now, we only apply this optimization in the most popular
2566                 // case batch_size == 1.
2567                 int axis = clamp(concatLayer->axis, output.dims);
2568                 if( output.total(0, axis) == 1 )
2569                 {
2570                     size_t i, ninputs = ld.inputBlobsId.size();
2571                     std::vector<LayerPin> realinputs(ninputs);
2572                     for( i = 0; i < ninputs; i++ )
2573                     {
2574                         LayerPin pin = ld.inputBlobsId[i];
2575                         LayerData* inp_i_data = &layers[pin.lid];
2576                         while(inp_i_data->skip &&
2577                               inp_i_data->inputBlobsId.size() == 1 &&
2578                               inp_i_data->consumers.size() == 1)
2579                         {
2580                             pin = inp_i_data->inputBlobsId[0];
2581                             inp_i_data = &layers[pin.lid];
2582                         }
2583                         printf_(("\treal input for %s is %s\n",
2584                                layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
2585                                inp_i_data->getLayerInstance()->name.c_str()));
2586
2587                         if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
2588                             break;
2589                         realinputs[i] = pin;
2590                     }
2591
2592                     if( i >= ninputs )
2593                     {
2594                         // Allocate new memory to prevent collisions during memory
2595                         // reusing (see https://github.com/opencv/opencv/pull/10456).
2596                         output = output.clone();
2597 #ifdef HAVE_OPENCL
2598                         if (preferableBackend == DNN_BACKEND_OPENCV &&
2599                             IS_DNN_OPENCL_TARGET(preferableTarget))
2600                         {
2601                             std::vector<UMat> umats(1);
2602                             umat_output = umat_output.clone();
2603                             umats[0] = umat_output;
2604                             OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
2605                         }
2606 #endif
2607                         std::vector<Range> chrange(output.dims, Range::all());
2608                         int ofs = 0;
2609                         for( i = 0; i < ninputs; i++ )
2610                         {
2611                             LayerPin pin = realinputs[i];
2612                             LayerData* inp_i_data = &layers[pin.lid];
2613                             int channels_i = ld.inputBlobs[i]->size[axis];
2614                             chrange[axis] = Range(ofs, ofs + channels_i);
2615                             printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
2616                                    pin.oid, ofs, ofs + channels_i));
2617                             ofs += channels_i;
2618                             Mat output_slice = output(chrange);
2619                             Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
2620                             CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
2621                             Mat* oldPtr = &curr_output;
2622                             curr_output = output_slice;
2623 #ifdef HAVE_OPENCL
2624                             if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2625                             {
2626                                 std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
2627                                 umats[pin.oid] = umat_output(chrange);
2628                                 OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
2629                             }
2630 #endif
2631                             // Layers that refer old input Mat will refer to the
2632                             // new data but the same Mat object.
2633                             CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
2634                         }
2635                         ld.skip = true;
2636                         printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
2637                     }
2638                 }
2639             }
2640         }
2641     }
2642
2643     void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
2644     {
2645         CV_TRACE_FUNCTION();
2646
2647         MapIdToLayerData::iterator it;
2648         for (it = layers.begin(); it != layers.end(); it++)
2649             it->second.flag = 0;
2650
2651         CV_Assert(!layers[0].outputBlobs.empty());
2652         ShapesVec inputShapes;
2653         for(int i = 0; i < layers[0].outputBlobs.size(); i++)
2654         {
2655             Mat& inp = layers[0].outputBlobs[i];
2656             CV_Assert(inp.total());
2657             if (preferableBackend == DNN_BACKEND_OPENCV &&
2658                 preferableTarget == DNN_TARGET_OPENCL_FP16)
2659             {
2660                 layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
2661             }
2662             inputShapes.push_back(shape(inp));
2663         }
2664         LayersShapesMap layersShapes;
2665         getLayersShapes(inputShapes, layersShapes);
2666
2667         blobManager.reset();
2668         backendWrappers.clear();
2669         // Fake references to input blobs.
2670         for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
2671             blobManager.addReference(LayerPin(0, i));
2672         for (it = layers.begin(); it != layers.end(); ++it)
2673         {
2674             const LayerData& ld = it->second;
2675             blobManager.addReferences(ld.inputBlobsId);
2676         }
2677
2678         for (int i = 0; i < blobsToKeep_.size(); i++)
2679         {
2680             blobManager.addReference(blobsToKeep_[i]);
2681         }
2682
2683         for (it = layers.begin(); it != layers.end(); it++)
2684         {
2685             int lid = it->first;
2686             allocateLayer(lid, layersShapes);
2687         }
2688
2689         layersTimings.resize(lastLayerId + 1, 0);
2690         fuseLayers(blobsToKeep_);
2691     }
2692
2693     void forwardLayer(LayerData &ld)
2694     {
2695         CV_TRACE_FUNCTION();
2696
2697         Ptr<Layer> layer = ld.layerInstance;
2698
2699         TickMeter tm;
2700         tm.start();
2701
2702         if( !ld.skip )
2703         {
2704             std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
2705             if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
2706             {
2707                 if (isAsync)
2708                     CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
2709
2710                 if (!layer->supportBackend(DNN_BACKEND_OPENCV))
2711                     CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend",
2712                                                        ld.name.c_str(), ld.type.c_str()));
2713
2714 #ifdef HAVE_OPENCL
2715                 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2716                 {
2717                     std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
2718                     std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2719                     std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
2720                     layer->forward(umat_inputBlobs,
2721                                    umat_outputBlobs,
2722                                    umat_internalBlobs);
2723                     if (DNN_CHECK_NAN_INF)
2724                     {
2725                         bool fail = false;
2726                         for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2727                         {
2728                             UMat& u = umat_outputBlobs[i];
2729                             Mat m;
2730                             if (u.depth() == CV_16S) // FP16
2731                                 convertFp16(u, m);
2732                             else
2733                                 m = u.getMat(ACCESS_READ);
2734                             if (!checkRange(m))
2735                             {
2736                                 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2737                                 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2738                                 fail = true;
2739                             }
2740                             else if (!checkRange(m, true, NULL, -1e6, 1e6))
2741                             {
2742                                 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2743                                 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2744                                 fail = true;
2745                             }
2746                         }
2747                         if (fail)
2748                         {
2749                             for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
2750                             {
2751                                 UMat& u = umat_inputBlobs[i];
2752                                 Mat m;
2753                                 if (u.depth() == CV_16S) // FP16
2754                                     convertFp16(u, m);
2755                                 else
2756                                     m = u.getMat(ACCESS_READ);
2757                                 std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2758                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2759                             }
2760                             for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2761                             {
2762                                 UMat& u = umat_outputBlobs[i];
2763                                 Mat m;
2764                                 if (u.depth() == CV_16S) // FP16
2765                                     convertFp16(u, m);
2766                                 else
2767                                     m = u.getMat(ACCESS_READ);
2768                                 std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2769                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2770                             }
2771                             for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
2772                             {
2773                                 UMat& u = umat_internalBlobs[i];
2774                                 Mat m;
2775                                 if (u.depth() == CV_16S) // FP16
2776                                     convertFp16(u, m);
2777                                 else
2778                                     m = u.getMat(ACCESS_READ);
2779                                 std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
2780                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
2781                             }
2782                             if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2783                                 CV_Assert(!fail);
2784                         }
2785                     }
2786                     OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
2787                 }
2788                 else
2789 #endif
2790                 {
2791                     for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
2792                     {
2793                         if (!ld.inputBlobsWrappers[i].empty())
2794                             ld.inputBlobsWrappers[i]->copyToHost();
2795                     }
2796
2797                     std::vector<Mat> inps(ld.inputBlobs.size());
2798                     for (int i = 0; i < ld.inputBlobs.size(); ++i)
2799                     {
2800                         inps[i] = *ld.inputBlobs[i];
2801                     }
2802                     layer->forward(inps, ld.outputBlobs, ld.internals);
2803
2804                     if (DNN_CHECK_NAN_INF)
2805                     {
2806                         bool fail = false;
2807                         for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2808                         {
2809                             const Mat& m = ld.outputBlobs[i];
2810                             if (!checkRange(m))
2811                             {
2812                                 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2813                                 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2814                                 fail = true;
2815                             }
2816                             else if (!checkRange(m, true, NULL, -1e6, 1e6))
2817                             {
2818                                 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2819                                 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2820                                 fail = true;
2821                             }
2822                         }
2823                         if (fail)
2824                         {
2825                             for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
2826                             {
2827                                 const Mat* pM = ld.inputBlobs[i];
2828                                 if (!pM)
2829                                 {
2830                                     std::cout << "INPUT " << i << " is NULL" << std::endl;
2831                                     continue;
2832                                 }
2833                                 const Mat& m = *pM;
2834                                 std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2835                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2836                             }
2837                             for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2838                             {
2839                                 const Mat& m = ld.outputBlobs[i];
2840                                 std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2841                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2842                             }
2843                             for (size_t i = 0; i < ld.internals.size(); ++i)
2844                             {
2845                                 const Mat& m = ld.internals[i];
2846                                 std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2847                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2848                             }
2849                             if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2850                                 CV_Assert(!fail);
2851                         }
2852                     }
2853
2854                     for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
2855                     {
2856                         if (!ld.outputBlobsWrappers[i].empty())
2857                             ld.outputBlobsWrappers[i]->setHostDirty();
2858                     }
2859                 }
2860             }
2861             else
2862             {
2863                 Ptr<BackendNode> node = it->second;
2864                 CV_Assert(!node.empty());
2865                 if (preferableBackend == DNN_BACKEND_HALIDE)
2866                 {
2867                     forwardHalide(ld.outputBlobsWrappers, node);
2868                 }
2869                 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
2870                 {
2871                     forwardInfEngine(ld.outputBlobsWrappers, node, isAsync);
2872                 }
2873                 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
2874                 {
2875                     forwardNgraph(ld.outputBlobsWrappers, node, isAsync);
2876                 }
2877                 else
2878                 {
2879                     CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
2880                 }
2881             }
2882         }
2883         else
2884             tm.reset();
2885
2886         tm.stop();
2887         layersTimings[ld.id] = tm.getTimeTicks();
2888
2889         ld.flag = 1;
2890     }
2891
2892     void forwardToLayer(LayerData &ld, bool clearFlags = true)
2893     {
2894         CV_TRACE_FUNCTION();
2895
2896         if (clearFlags)
2897         {
2898             MapIdToLayerData::iterator it;
2899             for (it = layers.begin(); it != layers.end(); it++)
2900                 it->second.flag = 0;
2901         }
2902
2903         //already was forwarded
2904         if (ld.flag)
2905             return;
2906
2907         //forward parents
2908         MapIdToLayerData::iterator it;
2909         for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
2910         {
2911             LayerData &ld = it->second;
2912             if (ld.flag)
2913                 continue;
2914             forwardLayer(ld);
2915         }
2916
2917         //forward itself
2918         forwardLayer(ld);
2919     }
2920
2921     void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
2922     {
2923         std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
2924
2925         if (id == 0 && inOutShapes[id].in[0].empty())
2926         {
2927             if (!layers[0].outputBlobs.empty())
2928             {
2929                 ShapesVec shapes;
2930                 for (int i = 0; i < layers[0].outputBlobs.size(); i++)
2931                 {
2932                     Mat& inp = layers[0].outputBlobs[i];
2933                     CV_Assert(inp.total());
2934                     shapes.push_back(shape(inp));
2935                 }
2936                 inOutShapes[0].in = shapes;
2937             }
2938             else
2939             {
2940                 const std::vector<MatShape>& inputShapes = netInputLayer->shapes;
2941                 bool none = true;
2942                 for (size_t i = 0; i < inputShapes.size(); i++)
2943                 {
2944                     if (!inputShapes[i].empty())
2945                     {
2946                         none = false;
2947                         break;
2948                     }
2949                 }
2950                 if (none)
2951                 {
2952                     inOutShapes[0].out.clear();
2953                     return;
2954                 }
2955                 else
2956                 {
2957                     inOutShapes[0].in = inputShapes;
2958                 }
2959             }
2960         }
2961
2962         if (inOutShapes[id].in.empty())
2963         {
2964             for(int i = 0; i < inputLayerIds.size(); i++)
2965             {
2966                 int layerId = inputLayerIds[i].lid;
2967                 LayersShapesMap::iterator it =
2968                         inOutShapes.find(layerId);
2969                 if(it == inOutShapes.end() ||
2970                         it->second.out.empty())
2971                 {
2972                     getLayerShapesRecursively(layerId, inOutShapes);
2973                 }
2974                 const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
2975                 inOutShapes[id].in.push_back(shape);
2976             }
2977         }
2978         const ShapesVec& is = inOutShapes[id].in;
2979         ShapesVec& os = inOutShapes[id].out;
2980         ShapesVec& ints = inOutShapes[id].internal;
2981         int requiredOutputs = layers[id].requiredOutputs.size();
2982         Ptr<Layer> l = layers[id].getLayerInstance();
2983         CV_Assert(l);
2984         bool layerSupportInPlace = false;
2985         try
2986         {
2987             layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints);
2988         }
2989         catch (const cv::Exception& e)
2990         {
2991             CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." <<
2992                     " inputs=" << is.size() <<
2993                     " outputs=" << os.size() << "/" << requiredOutputs <<
2994                     " blobs=" << l->blobs.size());
2995             for (size_t i = 0; i < is.size(); ++i)
2996             {
2997                 CV_LOG_ERROR(NULL, "    input[" << i << "] = " << toString(is[i]));
2998             }
2999             for (size_t i = 0; i < os.size(); ++i)
3000             {
3001                 CV_LOG_ERROR(NULL, "    output[" << i << "] = " << toString(os[i]));
3002             }
3003             for (size_t i = 0; i < l->blobs.size(); ++i)
3004             {
3005                 CV_LOG_ERROR(NULL, "    blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i])));
3006             }
3007             CV_LOG_ERROR(NULL, "Exception message: " << e.what());
3008             throw;
3009         }
3010         inOutShapes[id].supportInPlace = layerSupportInPlace;
3011
3012         for (int i = 0; i < ints.size(); i++)
3013             CV_Assert(total(ints[i]) > 0);
3014
3015         for (int i = 0; i < os.size(); i++)
3016             CV_Assert(total(os[i]) > 0);
3017     }
3018
3019     void getLayersShapes(const ShapesVec& netInputShapes,
3020                          LayersShapesMap& inOutShapes)
3021     {
3022         inOutShapes.clear();
3023
3024         inOutShapes[0].in = netInputShapes; //insert shape for first input layer
3025         for (MapIdToLayerData::iterator it = layers.begin();
3026              it != layers.end(); it++)
3027         {
3028             getLayerShapesRecursively(it->first, inOutShapes);
3029         }
3030     }
3031
3032     void getLayerShapes(const ShapesVec& netInputShapes,
3033                         const int layerId,
3034                         LayerShapes& shapes)
3035     {
3036         LayersShapesMap inOutShapes;
3037         inOutShapes[0].in = netInputShapes; //insert shape for first input layer
3038         getLayerShapesRecursively(layerId, inOutShapes);
3039         shapes = inOutShapes[layerId];
3040     }
3041
3042     LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
3043     {
3044         return *std::max_element(pins.begin(), pins.end());
3045     }
3046
3047     Mat getBlob(const LayerPin& pin)
3048     {
3049         CV_TRACE_FUNCTION();
3050
3051         if (!pin.valid())
3052             CV_Error(Error::StsObjectNotFound, "Requested blob not found");
3053
3054         LayerData &ld = layers[pin.lid];
3055         if ((size_t)pin.oid >= ld.outputBlobs.size())
3056         {
3057             CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
3058                                            "the #%d was requested", ld.name.c_str(),
3059                                            ld.outputBlobs.size(), pin.oid));
3060         }
3061         if (preferableTarget != DNN_TARGET_CPU)
3062         {
3063             CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
3064             // Transfer data to CPU if it's require.
3065             ld.outputBlobsWrappers[pin.oid]->copyToHost();
3066         }
3067
3068         if (ld.outputBlobs[pin.oid].depth() == CV_16S)
3069         {
3070             convertFp16(ld.outputBlobs[pin.oid], output_blob);
3071             return output_blob;
3072         }
3073         else
3074             return ld.outputBlobs[pin.oid];
3075     }
3076
3077     Mat getBlob(String outputName)
3078     {
3079         return getBlob(getPinByAlias(outputName));
3080     }
3081
3082 #ifdef CV_CXX11
3083     AsyncArray getBlobAsync(const LayerPin& pin)
3084     {
3085         CV_TRACE_FUNCTION();
3086 #ifdef HAVE_INF_ENGINE
3087         if (!pin.valid())
3088             CV_Error(Error::StsObjectNotFound, "Requested blob not found");
3089
3090         LayerData &ld = layers[pin.lid];
3091         if ((size_t)pin.oid >= ld.outputBlobs.size())
3092         {
3093             CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
3094                                            "the #%d was requested", ld.name.c_str(),
3095                                            ld.outputBlobs.size(), pin.oid));
3096         }
3097         if (preferableTarget != DNN_TARGET_CPU)
3098         {
3099             CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
3100             // Transfer data to CPU if it's require.
3101             ld.outputBlobsWrappers[pin.oid]->copyToHost();
3102         }
3103         CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
3104
3105         if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) {
3106 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3107             Ptr<InfEngineBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<InfEngineBackendWrapper>();
3108             return std::move(wrapper->futureMat);
3109 #else
3110             CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3111 #endif
3112         }
3113         else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
3114         {
3115 #ifdef HAVE_DNN_NGRAPH
3116             Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
3117             return std::move(wrapper->futureMat);
3118 #else
3119             CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
3120 #endif
3121         }
3122 #endif  // HAVE_INF_ENGINE
3123         CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 backend is required");
3124     }
3125
3126     AsyncArray getBlobAsync(String outputName)
3127     {
3128         return getBlobAsync(getPinByAlias(outputName));
3129     }
3130 #endif  // CV_CXX11
3131
3132 #ifdef HAVE_INF_ENGINE
3133     static
3134     Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
3135 #endif
3136
3137     string dump();
3138
3139     void dumpNetworkToFile()
3140     {
3141 #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
3142         string dumpFileNameBase = getDumpFileNameBase();
3143         string dumpFileName = dumpFileNameBase + ".dot";
3144         try
3145         {
3146             string dumpStr = dump();
3147             std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary);
3148             out << dumpStr;
3149         }
3150         catch (const std::exception& e)
3151         {
3152             std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
3153             out << "Exception: " << e.what() << std::endl;
3154         }
3155         catch (...)
3156         {
3157             std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
3158             out << "Can't dump: unknown exception" << std::endl;
3159         }
3160 #endif
3161     }
3162 };
3163
3164 Net::Net() : impl(new Net::Impl)
3165 {
3166 }
3167
3168 #ifdef HAVE_INF_ENGINE
3169 /*static*/
3170 Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
3171 {
3172     CV_TRACE_FUNCTION();
3173
3174     CV_TRACE_REGION("register_inputs");
3175
3176     std::vector<String> inputsNames;
3177     std::vector<MatShape> inp_shapes;
3178     for (auto& it : ieNet.getInputsInfo())
3179     {
3180         inputsNames.push_back(it.first);
3181         std::vector<size_t> dims = it.second->getTensorDesc().getDims();
3182         inp_shapes.push_back(std::vector<int>(dims.begin(), dims.end()));
3183     }
3184
3185     Net cvNet;
3186     cvNet.setInputsNames(inputsNames);
3187
3188     // set empty input to determine input shapes
3189     for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id)
3190     {
3191         cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]);
3192     }
3193
3194     CV_TRACE_REGION_NEXT("backendNode");
3195
3196     Ptr<BackendNode> backendNode;
3197 #ifdef HAVE_DNN_NGRAPH
3198     if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
3199     {
3200         auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{});
3201         Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
3202         backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
3203         backendNode = backendNodeNGraph;
3204     }
3205     else
3206 #endif
3207     {
3208 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3209         Ptr<InfEngineBackendNode> backendNodeNN(new InfEngineBackendNode(InferenceEngine::Builder::Layer("")));
3210         backendNodeNN->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
3211         backendNode = backendNodeNN;
3212 #else
3213         CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3214 #endif
3215     }
3216
3217     CV_TRACE_REGION_NEXT("register_outputs");
3218
3219 #ifdef HAVE_DNN_NGRAPH
3220     auto ngraphFunction = ieNet.getFunction();
3221 #if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
3222     std::list< std::shared_ptr<ngraph::Node> > ngraphOperations;
3223 #else
3224     std::vector< std::shared_ptr<ngraph::Node> > ngraphOperations;
3225 #endif
3226     if (ngraphFunction)
3227     {
3228         ngraphOperations = ngraphFunction->get_ops();
3229     }
3230 #endif
3231
3232     for (auto& it : ieNet.getOutputsInfo())
3233     {
3234         CV_TRACE_REGION("output");
3235         const auto& outputName = it.first;
3236
3237         LayerParams lp;
3238         int lid = cvNet.addLayer(it.first, "", lp);
3239
3240         LayerData& ld = cvNet.impl->layers[lid];
3241
3242 #ifdef HAVE_DNN_NGRAPH
3243         if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
3244         {
3245             Ptr<Layer> cvLayer(new NgraphBackendLayer(ieNet));
3246             cvLayer->name = outputName;
3247             cvLayer->type = "_unknown_";
3248
3249             auto process_layer = [&](const std::string& name) -> bool
3250             {
3251                 if (ngraphFunction)
3252                 {
3253                     CV_TRACE_REGION("ngraph_function");
3254                     for (const auto& op : ngraphOperations)
3255                     {
3256                         CV_Assert(op);
3257                         if (op->get_friendly_name() == name)
3258                         {
3259                             const std::string typeName = op->get_type_info().name;
3260                             cvLayer->type = typeName;
3261                             return true;
3262                         }
3263                     }
3264                     return false;
3265                 }
3266                 else
3267                 {
3268 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
3269                     CV_Error(Error::StsNotImplemented, "This OpenCV version is built with Inference Engine which has dropped IR v7 support");
3270 #else
3271                     CV_TRACE_REGION("legacy_cnn_layer");
3272                     try
3273                     {
3274                         InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(name.c_str());
3275                         CV_Assert(ieLayer);
3276
3277                         cvLayer->type = ieLayer->type;
3278                         return true;
3279                     }
3280                     catch (const std::exception& e)
3281                     {
3282                         CV_UNUSED(e);
3283                         CV_LOG_DEBUG(NULL, "IE layer extraction failure: '" << name << "' - " << e.what());
3284                         return false;
3285                     }
3286 #endif
3287
3288                 }
3289             };
3290
3291             bool found = process_layer(outputName);
3292             if (!found)
3293             {
3294                 auto pos = outputName.rfind('.');  // cut port number: ".0"
3295                 if (pos != std::string::npos)
3296                 {
3297                     std::string layerName = outputName.substr(0, pos);
3298                     found = process_layer(layerName);
3299                 }
3300             }
3301             if (!found)
3302                 CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'");
3303
3304             ld.layerInstance = cvLayer;
3305             ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode;
3306         }
3307         else
3308 #endif
3309         {
3310 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3311             Ptr<Layer> cvLayer(new InfEngineBackendLayer(ieNet));
3312
3313             InferenceEngine::CNNLayerPtr ieLayer;
3314             try
3315             {
3316                 ieLayer = ieNet.getLayerByName(outputName.c_str());
3317             }
3318             catch (...)
3319             {
3320                 auto pos = outputName.rfind('.');  // cut port number: ".0"
3321                 if (pos != std::string::npos)
3322                 {
3323                     std::string layerName = outputName.substr(0, pos);
3324                     ieLayer = ieNet.getLayerByName(layerName.c_str());
3325                 }
3326             }
3327             CV_Assert(ieLayer);
3328
3329             cvLayer->name = outputName;
3330             cvLayer->type = ieLayer->type;
3331             ld.layerInstance = cvLayer;
3332
3333             ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019] = backendNode;
3334 #else
3335             CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3336 #endif
3337         }
3338
3339         for (int i = 0; i < inputsNames.size(); ++i)
3340             cvNet.connect(0, i, lid, i);
3341     }
3342
3343     CV_TRACE_REGION_NEXT("finalize");
3344
3345     cvNet.setPreferableBackend(getInferenceEngineBackendTypeParam());
3346
3347     cvNet.impl->skipInfEngineInit = true;
3348     return cvNet;
3349 }
3350 #endif  // HAVE_INF_ENGINE
3351
3352 Net Net::readFromModelOptimizer(const String& xml, const String& bin)
3353 {
3354     CV_TRACE_FUNCTION();
3355 #ifndef HAVE_INF_ENGINE
3356     CV_UNUSED(xml); CV_UNUSED(bin);
3357     CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
3358 #else
3359 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
3360     InferenceEngine::CNNNetReader reader;
3361     reader.ReadNetwork(xml);
3362     reader.ReadWeights(bin);
3363
3364     InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
3365 #else
3366     InferenceEngine::Core& ie = getCore("");
3367     InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
3368 #endif
3369
3370     return Impl::createNetworkFromModelOptimizer(ieNet);
3371 #endif  // HAVE_INF_ENGINE
3372 }
3373
3374 Net Net::readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights)
3375 {
3376     CV_TRACE_FUNCTION();
3377     CV_Assert(!bufferModelConfig.empty());
3378     CV_Assert(!bufferWeights.empty());
3379     return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(),
3380                                            bufferWeights.data(), bufferWeights.size());
3381 }
3382
3383 Net Net::readFromModelOptimizer(
3384         const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
3385         const uchar* bufferWeightsPtr, size_t bufferWeightsSize
3386 )
3387 {
3388     CV_TRACE_FUNCTION();
3389 #ifndef HAVE_INF_ENGINE
3390     CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr);
3391     CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize);
3392     CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
3393 #else
3394
3395 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
3396     InferenceEngine::CNNNetReader reader;
3397
3398     try
3399     {
3400         reader.ReadNetwork(bufferModelConfigPtr, bufferModelConfigSize);
3401
3402         InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
3403         InferenceEngine::TBlob<uint8_t>::Ptr weightsBlobPtr(new InferenceEngine::TBlob<uint8_t>(tensorDesc));
3404         weightsBlobPtr->allocate();
3405         std::memcpy(weightsBlobPtr->buffer(), (uchar*)bufferWeightsPtr, bufferWeightsSize);
3406         reader.SetWeights(weightsBlobPtr);
3407     }
3408     catch (const std::exception& e)
3409     {
3410         CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
3411     }
3412
3413     InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
3414 #else
3415     InferenceEngine::Core& ie = getCore("");
3416
3417     std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize);
3418
3419     InferenceEngine::CNNNetwork ieNet;
3420     try
3421     {
3422         InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
3423         InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob<uint8_t>(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize);
3424
3425         ieNet = ie.ReadNetwork(model, weights_blob);
3426     }
3427     catch (const std::exception& e)
3428     {
3429         CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
3430     }
3431 #endif
3432
3433     return Impl::createNetworkFromModelOptimizer(ieNet);
3434 #endif  // HAVE_INF_ENGINE
3435 }
3436
3437
3438 Net::~Net()
3439 {
3440 }
3441
3442 int Net::addLayer(const String &name, const String &type, LayerParams &params)
3443 {
3444     CV_TRACE_FUNCTION();
3445
3446     if (impl->getLayerId(name) >= 0)
3447     {
3448         CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
3449         return -1;
3450     }
3451
3452     int id = ++impl->lastLayerId;
3453     impl->layerNameToId.insert(std::make_pair(name, id));
3454     impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
3455
3456     return id;
3457 }
3458
3459 int Net::addLayerToPrev(const String &name, const String &type, LayerParams &params)
3460 {
3461     CV_TRACE_FUNCTION();
3462
3463     int prvLid = impl->lastLayerId;
3464     int newLid = this->addLayer(name, type, params);
3465     this->connect(prvLid, 0, newLid, 0);
3466     return newLid;
3467 }
3468
3469 void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
3470 {
3471     CV_TRACE_FUNCTION();
3472
3473     impl->connect(outLayerId, outNum, inpLayerId, inpNum);
3474 }
3475
3476 void Net::connect(String _outPin, String _inPin)
3477 {
3478     CV_TRACE_FUNCTION();
3479
3480     LayerPin outPin = impl->getPinByAlias(_outPin);
3481     LayerPin inpPin = impl->getPinByAlias(_inPin);
3482
3483     CV_Assert(outPin.valid() && inpPin.valid());
3484
3485     impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
3486 }
3487
3488 Mat Net::forward(const String& outputName)
3489 {
3490     CV_TRACE_FUNCTION();
3491     CV_Assert(!empty());
3492
3493     String layerName = outputName;
3494
3495     if (layerName.empty())
3496     {
3497         std::vector<String> layerNames = getLayerNames();
3498         CV_Assert(!layerNames.empty());
3499         layerName = layerNames.back();
3500     }
3501
3502     std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
3503     impl->setUpNet(pins);
3504     impl->forwardToLayer(impl->getLayerData(layerName));
3505
3506     return impl->getBlob(layerName);
3507 }
3508
3509 AsyncArray Net::forwardAsync(const String& outputName)
3510 {
3511     CV_TRACE_FUNCTION();
3512     CV_Assert(!empty());
3513
3514 #ifdef CV_CXX11
3515     String layerName = outputName;
3516
3517     if (layerName.empty())
3518     {
3519         std::vector<String> layerNames = getLayerNames();
3520         CV_Assert(!layerNames.empty());
3521         layerName = layerNames.back();
3522     }
3523
3524     std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
3525     impl->setUpNet(pins);
3526
3527     if (!(impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
3528         CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backends only");
3529
3530     impl->isAsync = true;
3531     impl->forwardToLayer(impl->getLayerData(layerName));
3532     impl->isAsync = false;
3533
3534     return impl->getBlobAsync(layerName);
3535 #else
3536     CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11");
3537 #endif  // CV_CXX11
3538 }
3539
3540 void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
3541 {
3542     CV_TRACE_FUNCTION();
3543     CV_Assert(!empty());
3544
3545     String layerName = outputName;
3546
3547     if (layerName.empty())
3548     {
3549         std::vector<String> layerNames = getLayerNames();
3550         CV_Assert(!layerNames.empty());
3551         layerName = layerNames.back();
3552     }
3553
3554     std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
3555     impl->setUpNet(pins);
3556     impl->forwardToLayer(impl->getLayerData(layerName));
3557
3558     LayerPin pin = impl->getPinByAlias(layerName);
3559     LayerData &ld = impl->layers[pin.lid];
3560
3561     if (outputBlobs.isUMat())
3562     {
3563         impl->getBlob(layerName).copyTo(outputBlobs);
3564     }
3565     else if (outputBlobs.isMat())
3566     {
3567         outputBlobs.assign(impl->getBlob(layerName));
3568     }
3569     else if (outputBlobs.isMatVector())
3570     {
3571         if (impl->preferableTarget != DNN_TARGET_CPU)
3572         {
3573             for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
3574             {
3575                 CV_Assert(!ld.outputBlobsWrappers[i].empty());
3576                 ld.outputBlobsWrappers[i]->copyToHost();
3577             }
3578         }
3579         if (ld.outputBlobs[0].depth() == CV_32F)
3580         {
3581             std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
3582             outputvec = ld.outputBlobs;
3583         } else {
3584             std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
3585             outputvec.resize(ld.outputBlobs.size());
3586             for (int i = 0; i < outputvec.size(); i++)
3587                 convertFp16(ld.outputBlobs[i], outputvec[i]);
3588         }
3589     }
3590     else if (outputBlobs.isUMatVector())
3591     {
3592         std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
3593
3594 #ifdef HAVE_OPENCL
3595         if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
3596             IS_DNN_OPENCL_TARGET(impl->preferableTarget))
3597         {
3598             if (impl->preferableTarget == DNN_TARGET_OPENCL)
3599                 outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
3600             else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
3601             {
3602                 std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
3603                 outputvec.resize(out_vec.size());
3604                 for (int i = 0; i < out_vec.size(); i++)
3605                     convertFp16(out_vec[i], outputvec[i]);
3606             }
3607         }
3608         else
3609 #endif
3610         {
3611             outputvec.resize(ld.outputBlobs.size());
3612             for (int i = 0; i < outputvec.size(); ++i)
3613                 ld.outputBlobs[i].copyTo(outputvec[i]);
3614         }
3615     }
3616 }
3617
3618 void Net::forward(OutputArrayOfArrays outputBlobs,
3619                   const std::vector<String>& outBlobNames)
3620 {
3621     CV_TRACE_FUNCTION();
3622
3623     std::vector<LayerPin> pins;
3624     for (int i = 0; i < outBlobNames.size(); i++)
3625     {
3626         pins.push_back(impl->getPinByAlias(outBlobNames[i]));
3627     }
3628
3629     impl->setUpNet(pins);
3630
3631     LayerPin out = impl->getLatestLayerPin(pins);
3632
3633     impl->forwardToLayer(impl->getLayerData(out.lid));
3634
3635     std::vector<Mat> matvec;
3636     for (int i = 0; i < pins.size(); i++)
3637     {
3638         matvec.push_back(impl->getBlob(pins[i]));
3639     }
3640
3641     std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
3642     outputvec = matvec;
3643 }
3644
3645 void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
3646                      const std::vector<String>& outBlobNames)
3647 {
3648     CV_TRACE_FUNCTION();
3649
3650     std::vector<LayerPin> pins;
3651     for (int i = 0; i < outBlobNames.size(); i++)
3652     {
3653         pins.push_back(impl->getPinByAlias(outBlobNames[i]));
3654     }
3655
3656     impl->setUpNet(pins);
3657
3658     LayerPin out = impl->getLatestLayerPin(pins);
3659
3660     impl->forwardToLayer(impl->getLayerData(out.lid));
3661
3662     outputBlobs.resize(outBlobNames.size());
3663     for (int i = 0; i < outBlobNames.size(); i++)
3664     {
3665         std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
3666         outputBlobs[i].resize(lp.size());
3667         for (int j = 0; j < lp.size(); j++)
3668         {
3669             outputBlobs[i][j] = impl->getBlob(lp[j]);
3670         }
3671     }
3672 }
3673
3674 void Net::setPreferableBackend(int backendId)
3675 {
3676     CV_TRACE_FUNCTION();
3677     CV_TRACE_ARG(backendId);
3678
3679 #ifdef HAVE_INF_ENGINE
3680     if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
3681         backendId = getInferenceEngineBackendTypeParam();
3682 #endif
3683
3684     if( impl->preferableBackend != backendId )
3685     {
3686         impl->preferableBackend = backendId;
3687         impl->netWasAllocated = false;
3688         impl->clear();
3689     }
3690 }
3691
3692 void Net::setPreferableTarget(int targetId)
3693 {
3694     CV_TRACE_FUNCTION();
3695     CV_TRACE_ARG(targetId);
3696
3697     if( impl->preferableTarget != targetId )
3698     {
3699         impl->preferableTarget = targetId;
3700         if (IS_DNN_OPENCL_TARGET(targetId))
3701         {
3702 #ifndef HAVE_OPENCL
3703 #ifdef HAVE_INF_ENGINE
3704             if (impl->preferableBackend == DNN_BACKEND_OPENCV)
3705 #else
3706             if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
3707                 impl->preferableBackend == DNN_BACKEND_OPENCV)
3708 #endif  // HAVE_INF_ENGINE
3709                 impl->preferableTarget = DNN_TARGET_CPU;
3710 #else
3711             bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
3712             if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
3713                 impl->preferableTarget = DNN_TARGET_OPENCL;
3714 #endif
3715         }
3716         impl->netWasAllocated = false;
3717         impl->clear();
3718     }
3719 }
3720
3721 void Net::setInputsNames(const std::vector<String> &inputBlobNames)
3722 {
3723     CV_TRACE_FUNCTION();
3724
3725     impl->netInputLayer->setNames(inputBlobNames);
3726 }
3727
3728 void Net::setInputShape(const String &inputName, const MatShape& shape)
3729 {
3730     CV_TRACE_FUNCTION();
3731
3732     impl->netInputLayer->setInputShape(inputName, shape);
3733 }
3734
3735 void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
3736 {
3737     CV_TRACE_FUNCTION();
3738     CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3739
3740     LayerPin pin;
3741     pin.lid = 0;
3742     pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
3743
3744     if (!pin.valid())
3745         CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
3746
3747     Mat blob_ = blob.getMat();  // can't use InputArray directly due MatExpr stuff
3748     MatShape blobShape = shape(blob_);
3749
3750     if (pin.lid == 0)
3751     {
3752         CV_Assert(!impl->netInputLayer.empty());
3753         const DataLayer& netInputLayer = *impl->netInputLayer.get();
3754         if (!netInputLayer.shapes.empty())
3755         {
3756             CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), "");
3757             const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid];
3758             if (!inputShapeLimitation.empty())
3759             {
3760                 CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), "");
3761 #if 0  // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0
3762                 const size_t dims = inputShapeLimitation.size();
3763                 for (size_t dim = 0; dim < dims; dim++)
3764                 {
3765                     if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1)
3766                         continue;  // don't limit batch
3767                     CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], "");
3768                 }
3769 #endif
3770             }
3771         }
3772     }
3773
3774     LayerData &ld = impl->layers[pin.lid];
3775     const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
3776     ld.outputBlobs.resize(numInputs);
3777     ld.outputBlobsWrappers.resize(numInputs);
3778     impl->netInputLayer->inputsData.resize(numInputs);
3779     impl->netInputLayer->scaleFactors.resize(numInputs);
3780     impl->netInputLayer->means.resize(numInputs);
3781
3782     MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
3783     bool oldShape = prevShape == blobShape;
3784
3785     blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
3786     if (!oldShape)
3787         ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
3788
3789     if (!ld.outputBlobsWrappers[pin.oid].empty())
3790     {
3791         ld.outputBlobsWrappers[pin.oid]->setHostDirty();
3792     }
3793     impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
3794     impl->netInputLayer->means[pin.oid] = mean;
3795     impl->netWasAllocated = impl->netWasAllocated && oldShape;
3796 }
3797
3798 Mat Net::getParam(LayerId layer, int numParam)
3799 {
3800     LayerData &ld = impl->getLayerData(layer);
3801     std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
3802     CV_Assert(numParam < (int)layerBlobs.size());
3803     return layerBlobs[numParam];
3804 }
3805
3806 void Net::setParam(LayerId layer, int numParam, const Mat &blob)
3807 {
3808     LayerData &ld = impl->getLayerData(layer);
3809
3810     std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
3811     CV_Assert(numParam < (int)layerBlobs.size());
3812     //we don't make strong checks, use this function carefully
3813     layerBlobs[numParam] = blob;
3814 }
3815
3816 int Net::getLayerId(const String &layer)
3817 {
3818     return impl->getLayerId(layer);
3819 }
3820
3821 static
3822 string dumpLayerParameterSize(const string& name, const LayerParams& lp)
3823 {
3824     std::ostringstream out(name, std::ios::ate);
3825     DictValue param = lp.get(name);
3826     switch (param.size())
3827     {
3828         case 1: out << " : "; break;
3829         case 2: out << " (HxW): "; break;
3830         case 3: out << " (DxHxW): "; break;
3831         default:
3832             CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size()));
3833             out << ": ";
3834     }
3835     for (size_t i = 0; i < param.size(); i++)
3836     {
3837         if (i > 0)
3838             out << " x ";
3839         out << param.get<int>(i);
3840     }
3841     return out.str();
3842 }
3843
3844 String Net::dump()
3845 {
3846     CV_Assert(!empty());
3847
3848     bool hasInput = !impl->netInputLayer->inputsData.empty();
3849
3850     if (hasInput)
3851     {
3852         if (!impl->netWasAllocated)
3853             impl->setUpNet();
3854     }
3855
3856     return impl->dump();
3857 }
3858
3859 string Net::Impl::dump()
3860 {
3861     bool hasInput = !netInputLayer->inputsData.empty();
3862
3863     std::ostringstream out;
3864     const std::map<int, LayerData>& map = layers;
3865
3866     Backend prefBackend = (Backend)preferableBackend;
3867     std::vector<std::vector<int> > skippedLayers;
3868     std::vector<int> skipId;
3869     std::vector<int> allLayers(map.size(), -1);
3870     int idPrev = -1;
3871     Ptr<BackendNode> prevNode;
3872     for (std::map<int, LayerData>::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit)
3873     {
3874         std::map<int, Ptr<BackendNode> >::const_iterator itBackend = rit->second.backendNodes.find(prefBackend);
3875         if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() ||
3876             itBackend->second.empty())
3877         {
3878                 if (rit->second.skip)
3879                     skipId.push_back(rit->first);
3880                 else if (!skipId.empty())
3881                 {
3882                     if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty())
3883                         skipId.push_back(rit->first);
3884                     else if (idPrev != -1)
3885                         skipId.push_back(idPrev);
3886
3887                     std::sort(skipId.begin(), skipId.end());
3888                     for (int i = 0; i < skipId.size(); i++) {
3889                         allLayers[skipId[i]] = skippedLayers.size();
3890                     }
3891                     skippedLayers.push_back(skipId);
3892                     skipId.clear();
3893                 }
3894         }
3895         else
3896         {
3897             if (itBackend->second == prevNode)
3898                 skipId.push_back(idPrev);
3899             else if (!skipId.empty())
3900             {
3901                 skipId.push_back(idPrev);
3902                 std::sort(skipId.begin(), skipId.end());
3903                 for (int i = 0; i < skipId.size(); i++) {
3904                     allLayers[skipId[i]] = skippedLayers.size();
3905                 }
3906                 skippedLayers.push_back(skipId);
3907                 skipId.clear();
3908             }
3909             idPrev = rit->first;
3910             prevNode = itBackend->second;
3911         }
3912     }
3913     string colors[] = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462"};
3914     string backend;
3915     switch (prefBackend)
3916     {
3917         case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break;
3918         case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break;
3919         case DNN_BACKEND_INFERENCE_ENGINE: // fallthru
3920         case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: backend = "DLIE/"; break;
3921         case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "NGRAPH/"; break;
3922         case DNN_BACKEND_OPENCV: backend = "OCV/"; break;
3923         // don't use default:
3924     }
3925     out << "digraph G {\n";
3926     // Add nodes
3927     for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
3928     {
3929         const LayerData& ld = it->second;
3930         string name = ld.params.name;
3931         std::vector<int> clusterIds(1, it->first);
3932         if (allLayers[it->first] == -1 && !name.empty())
3933         {
3934             out << "\t\"" << name << "\" [label=\"";
3935         }
3936         else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0])
3937         {
3938             continue;
3939         }
3940         else // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0]
3941         {
3942             int cluster = allLayers[it->first];
3943             out << "\t\"" << "cluster_" << cluster << "\" [label=\"{";
3944             clusterIds = skippedLayers[allLayers[it->first]]; // vertices in current cluster
3945         }
3946         for (int i = 0; i < clusterIds.size(); i++)
3947         {
3948             CV_DbgAssert(map.find(clusterIds[i]) != map.end());
3949             const LayerParams& lp = map.find(clusterIds[i])->second.params;
3950             if (!lp.name.empty()) {
3951                 if (i > 0) {
3952                     out << " | ";
3953                 }
3954                 out << lp.name << "\\n" << lp.type << "\\n";  // align center
3955                 if (lp.has("kernel_size"))
3956                 {
3957                     string kernel = dumpLayerParameterSize("kernel_size", lp);
3958                     out << kernel;
3959                     out << "\\l";  // align left
3960                 } else if (lp.has("kernel_h") && lp.has("kernel_w")) {
3961                     DictValue h = lp.get("kernel_h");
3962                     DictValue w = lp.get("kernel_w");
3963                     out << "kernel (HxW): " << h << " x " << w;
3964                     out << "\\l";  // align left
3965                 }
3966                 if (lp.has("stride")) {
3967                     string stride = dumpLayerParameterSize("stride", lp);
3968                     out << stride;
3969                     out << "\\l";  // align left
3970                 } else if (lp.has("stride_h") && lp.has("stride_w")) {
3971                     DictValue h = lp.get("stride_h");
3972                     DictValue w = lp.get("stride_w");
3973                     out << "stride (HxW): " << h << " x " << w;
3974                     out << "\\l";  // align left
3975                 }
3976                 if (lp.has("dilation")) {
3977                     string dilation = dumpLayerParameterSize("dilation", lp);
3978                     out << dilation;
3979                     out << "\\l";  // align left
3980                 } else if (lp.has("dilation_h") && lp.has("dilation_w")) {
3981                     DictValue h = lp.get("dilation_h");
3982                     DictValue w = lp.get("dilation_w");
3983                     out << "dilation (HxW): " << h << " x " << w;
3984                     out << "\\l";  // align left
3985                 }
3986                 if (lp.has("pad")) {
3987                     DictValue pad = lp.get("pad");
3988                     out << "pad ";
3989                     switch (pad.size())
3990                     {
3991                         case 1: out << ": " << pad; break;
3992                         case 2:
3993                             out << "(HxW): (" << pad.get<int>(0) << " x " << pad.get<int>(1) << ")";
3994                             break;
3995                         case 4:
3996                             out << "(HxW): (" << pad.get<int>(0) << ", " << pad.get<int>(2)
3997                                 << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(3) << ")";
3998                             break;
3999                         case 6:
4000                             out << "(DxHxW): (" << pad.get<int>(0) << ", " << pad.get<int>(3)
4001                                 << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(4)
4002                                 << ") x (" << pad.get<int>(2) << ", " << pad.get<int>(5) << ")";
4003                             break;
4004                         default: CV_Error(Error::StsNotImplemented,  format("Unsupported pad size = %d", pad.size()));
4005                     }
4006                     out << "\\l";  // align left
4007                 } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) {
4008                     DictValue l = lp.get("pad_l");
4009                     DictValue t = lp.get("pad_t");
4010                     DictValue r = lp.get("pad_r");
4011                     DictValue b = lp.get("pad_b");
4012                     out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")";
4013                     out << "\\l";  // align left
4014                 }
4015                 else if (lp.has("pooled_w") || lp.has("pooled_h")) {
4016                     DictValue h = lp.get("pooled_h");
4017                     DictValue w = lp.get("pooled_w");
4018                     out << "pad pooled (HxW): " << h << " x " << w;
4019                     out << "\\l";  // align left
4020                 }
4021                 if (lp.has("pool")) {
4022                     out << "pool: " << lp.get("pool");
4023                     out << "\\l";  // align left
4024                 }
4025                 if (lp.has("global_pooling")) {
4026                     out << "global_pooling: " << lp.get("global_pooling");
4027                     out << "\\l";  // align left
4028                 }
4029                 if (lp.has("group")) {
4030                     out << "group: " << lp.get("group");
4031                     out << "\\l";  // align left
4032                 }
4033             }
4034         }
4035         if (!ld.outputBlobs.empty())
4036         {
4037             out << "output: " << ld.outputBlobs[0].size;
4038             out << "\\l";  // align left
4039         }
4040
4041         Ptr<BackendNode> layerBackend;
4042         std::map<int, Ptr<BackendNode> >::const_iterator ibn = ld.backendNodes.find(prefBackend);
4043         if (ibn != ld.backendNodes.end())
4044             layerBackend = ibn->second;
4045         out << (!layerBackend.empty() ? backend : "OCV/");
4046         int colorId = 0;
4047         const Target target = ld.layerInstance.empty()
4048                          ? DNN_TARGET_CPU
4049                                  : (Target)(ld.layerInstance->preferableTarget);  // TODO fix preferableTarget type
4050         switch (target)
4051         {
4052             case DNN_TARGET_CPU: out << "CPU"; colorId = layerBackend.empty() ? 0 : 5; break;
4053             case DNN_TARGET_OPENCL: out << "OCL"; colorId = 1; break;
4054             case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16"; colorId = 2; break;
4055             case DNN_TARGET_MYRIAD: out << "MYRIAD"; colorId = 3; break;
4056             case DNN_TARGET_FPGA: out << "FPGA"; colorId = 4; break;
4057             // don't use default:
4058         }
4059         out << "\\n";  // align center
4060         out << ((clusterIds.size() == 1)? "\" " : " }\" ");
4061         out << "fillcolor=\"" << colors[colorId] << "\" ";
4062         out << "style=filled ";
4063         out << "shape=" << ((clusterIds.size() == 1)? "box" : "record") << "]\n";
4064     }
4065     out << '\n';
4066     // Add edges
4067     int inputsSize = hasInput ? netInputLayer->outNames.size() : 0;
4068     for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
4069     {
4070         const LayerData& ld = it->second;
4071         if (allLayers[it->first] == -1)  // node
4072         {
4073             for (int i = 0; i < ld.consumers.size(); i++)
4074             {
4075                 int outId = ld.consumers[i].lid;
4076                 if (it == map.begin() && inputsSize > 1)
4077                     out << "\t\"" << ld.name << "_" << i << "\"" << " -> ";
4078                 else
4079                     out << "\t\"" << ld.name << "\"" << " -> ";
4080                 if (allLayers[outId] == -1)  // node
4081                 {
4082                     CV_DbgAssert(map.find(outId) != map.end());
4083                     out << "\"" << map.find(outId)->second.name << "\"\n";
4084                 }
4085                 else  // cluster
4086                 {
4087                     out << "\"" << "cluster_" << allLayers[outId] << "\"\n";
4088                 }
4089             }
4090         }
4091         else if (it->first == skippedLayers[allLayers[it->first]].back())  // edges from last layer in cluster
4092         {
4093             for (int i = 0; i < ld.consumers.size(); i++)
4094             {
4095                 int outId = ld.consumers[i].lid;
4096                 if (allLayers[outId] == -1) // node
4097                 {
4098                     CV_DbgAssert(map.find(outId) != map.end());
4099                     out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
4100                     out << "\"" << map.find(outId)->second.name << "\"\n";
4101                 }
4102                 else if (allLayers[outId] != allLayers[it->first]) { // another cluster
4103                     out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
4104                     out << "\"" << "cluster_" << allLayers[outId] << "\"\n";
4105                 }
4106             }
4107         }
4108     }
4109     out << "}\n";
4110     return out.str();
4111 }
4112
4113 void Net::dumpToFile(const String& path) {
4114     std::ofstream file(path.c_str());
4115     file << dump();
4116     file.close();
4117 }
4118
4119 Ptr<Layer> Net::getLayer(LayerId layerId)
4120 {
4121     LayerData &ld = impl->getLayerData(layerId);
4122     return ld.getLayerInstance();
4123 }
4124
4125 std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
4126 {
4127     LayerData &ld = impl->getLayerData(layerId);
4128
4129     std::vector<Ptr<Layer> > inputLayers;
4130     inputLayers.reserve(ld.inputBlobsId.size());
4131     for (int i = 0; i < ld.inputBlobsId.size(); ++i) {
4132         inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid));
4133     }
4134     return inputLayers;
4135 }
4136
4137 std::vector<String> Net::getLayerNames() const
4138 {
4139     CV_TRACE_FUNCTION();
4140
4141     std::vector<String> res;
4142     res.reserve(impl->layers.size());
4143
4144     Impl::MapIdToLayerData::iterator it;
4145     for (it = impl->layers.begin(); it != impl->layers.end(); it++)
4146     {
4147         if (it->second.id) //skip Data layer
4148             res.push_back(it->second.name);
4149     }
4150
4151     return res;
4152 }
4153
4154 bool Net::empty() const
4155 {
4156     return impl->layers.size() <= 1; //first layer is default Data layer
4157 }
4158
4159 std::vector<int> Net::getUnconnectedOutLayers() const
4160 {
4161     std::vector<int> layersIds;
4162
4163     Impl::MapIdToLayerData::iterator it;
4164     for (it = impl->layers.begin(); it != impl->layers.end(); it++)
4165     {
4166         int lid = it->first;
4167         LayerData &ld = it->second;
4168
4169         if (ld.requiredOutputs.size() == 0)
4170             layersIds.push_back(lid);
4171     }
4172
4173     return layersIds;
4174 }
4175
4176 std::vector<String> Net::getUnconnectedOutLayersNames() const
4177 {
4178     std::vector<int> ids = getUnconnectedOutLayers();
4179     const size_t n = ids.size();
4180     std::vector<String> names(n);
4181     for (size_t i = 0; i < n; ++i)
4182     {
4183         names[i] = impl->layers[ids[i]].name;
4184     }
4185     return names;
4186 }
4187
4188 void Net::getLayersShapes(const ShapesVec& netInputShapes,
4189                           std::vector<int>& layersIds,
4190                           std::vector<ShapesVec>& inLayersShapes,
4191                           std::vector<ShapesVec>& outLayersShapes) const
4192 {
4193     layersIds.clear();
4194     inLayersShapes.clear();
4195     outLayersShapes.clear();
4196
4197     Impl::LayersShapesMap inOutShapes;
4198     impl->getLayersShapes(netInputShapes, inOutShapes);
4199
4200     for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
4201         it != inOutShapes.end(); it++)
4202     {
4203         layersIds.push_back(it->first);
4204         inLayersShapes.push_back(it->second.in);
4205         outLayersShapes.push_back(it->second.out);
4206     }
4207 }
4208
4209 void Net::getLayersShapes(const MatShape& netInputShape,
4210                           std::vector<int>& layerIds,
4211                           std::vector<ShapesVec>& inLayersShapes,
4212                           std::vector<ShapesVec>& outLayersShapes) const
4213 {
4214     getLayersShapes(ShapesVec(1, netInputShape),
4215                     layerIds, inLayersShapes, outLayersShapes);
4216 }
4217
4218 void Net::getLayerShapes(const MatShape& netInputShape,
4219                          const int layerId,
4220                          ShapesVec& inLayerShapes,
4221                          ShapesVec& outLayerShapes) const
4222 {
4223     getLayerShapes(ShapesVec(1, netInputShape),
4224                    layerId, inLayerShapes, outLayerShapes);
4225
4226 }
4227
4228 void Net::getLayerShapes(const ShapesVec& netInputShapes,
4229                     const int layerId,
4230                     ShapesVec& inLayerShapes,
4231                     ShapesVec& outLayerShapes) const
4232 {
4233     LayerShapes shapes;
4234     impl->getLayerShapes(netInputShapes, layerId, shapes);
4235     inLayerShapes = shapes.in;
4236     outLayerShapes = shapes.out;
4237 }
4238
4239 int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
4240 {
4241     CV_TRACE_FUNCTION();
4242
4243     int64 flops = 0;
4244     std::vector<int> ids;
4245     std::vector<std::vector<MatShape> > inShapes, outShapes;
4246     getLayersShapes(netInputShapes, ids, inShapes, outShapes);
4247     CV_Assert(inShapes.size() == outShapes.size());
4248     CV_Assert(inShapes.size() == ids.size());
4249
4250     for(int i = 0; i < ids.size(); i++)
4251     {
4252         flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i],
4253                                                                    outShapes[i]);
4254     }
4255
4256     return flops;
4257 }
4258
4259 int64 Net::getFLOPS(const MatShape& netInputShape) const
4260 {
4261     return getFLOPS(std::vector<MatShape>(1, netInputShape));
4262 }
4263
4264 int64 Net::getFLOPS(const int layerId,
4265               const std::vector<MatShape>& netInputShapes) const
4266 {
4267     Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
4268     CV_Assert(layer != impl->layers.end());
4269
4270     LayerShapes shapes;
4271     impl->getLayerShapes(netInputShapes, layerId, shapes);
4272
4273     return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
4274 }
4275
4276 int64 Net::getFLOPS(const int layerId,
4277               const MatShape& netInputShape) const
4278 {
4279     return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
4280 }
4281
4282 void Net::getLayerTypes(std::vector<String>& layersTypes) const
4283 {
4284     layersTypes.clear();
4285
4286     std::map<String, int> layers;
4287     for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
4288          it != impl->layers.end(); it++)
4289     {
4290         if (layers.find(it->second.type) == layers.end())
4291             layers[it->second.type] = 0;
4292         layers[it->second.type]++;
4293     }
4294
4295     for (std::map<String, int>::iterator it = layers.begin();
4296          it != layers.end(); it++)
4297     {
4298         layersTypes.push_back(it->first);
4299     }
4300 }
4301
4302 int Net::getLayersCount(const String& layerType) const
4303 {
4304     int count = 0;
4305     for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
4306          it != impl->layers.end(); it++)
4307     {
4308         if (it->second.type == layerType)
4309             count++;
4310     }
4311     return count;
4312 }
4313
4314 void Net::getMemoryConsumption(const int layerId,
4315                                const std::vector<MatShape>& netInputShapes,
4316                                size_t& weights, size_t& blobs) const
4317 {
4318     CV_TRACE_FUNCTION();
4319
4320     Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
4321     CV_Assert(layer != impl->layers.end());
4322
4323     weights = blobs = 0;
4324
4325     for(int i = 0; i < layer->second.params.blobs.size(); i++)
4326     {
4327         const Mat& weightsBlob = layer->second.params.blobs[i];
4328         weights += weightsBlob.total()*weightsBlob.elemSize();
4329     }
4330
4331     ShapesVec inLayerShapes, outLayerShapes;
4332     getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
4333     for(int i = 0; i < outLayerShapes.size(); i++)
4334     {
4335         blobs += total(outLayerShapes[i]) * sizeof(float);
4336     }
4337 }
4338
4339 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
4340                                size_t& weights, size_t& blobs) const
4341 {
4342     CV_TRACE_FUNCTION();
4343
4344     std::vector<int> layerIds;
4345     std::vector<size_t> w, b;
4346     getMemoryConsumption(netInputShapes, layerIds, w, b);
4347
4348     weights = blobs = 0;
4349     for(int i = 0; i < layerIds.size(); i++)
4350     {
4351         weights += w[i];
4352         blobs += b[i];
4353     }
4354 }
4355
4356 void Net::getMemoryConsumption(const int layerId,
4357                                const MatShape& netInputShape,
4358                                size_t& weights, size_t& blobs) const
4359 {
4360     getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
4361                          weights, blobs);
4362 }
4363
4364 void Net::getMemoryConsumption(const MatShape& netInputShape,
4365                                size_t& weights, size_t& blobs) const
4366 {
4367     getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
4368                          weights, blobs);
4369 }
4370
4371 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
4372                                   std::vector<int>& layerIds, std::vector<size_t>& weights,
4373                                   std::vector<size_t>& blobs) const
4374 {
4375     CV_TRACE_FUNCTION();
4376
4377     layerIds.clear();
4378     weights.clear();
4379     blobs.clear();
4380
4381     std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
4382
4383     getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
4384
4385     for(int i = 0; i < layerIds.size(); i++)
4386     {
4387         int w = 0, b = 0;
4388         Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
4389         CV_Assert(layer != impl->layers.end());
4390
4391         for(int j = 0; j < layer->second.params.blobs.size(); j++)
4392         {
4393             const Mat& weightsBlob = layer->second.params.blobs[j];
4394             w += weightsBlob.total()*weightsBlob.elemSize();
4395         }
4396
4397         for(int j = 0; j < outLayerShapes[i].size(); j++)
4398         {
4399             b += total(outLayerShapes[i][j]) * sizeof(float);
4400         }
4401
4402         weights.push_back(w);
4403         blobs.push_back(b);
4404     }
4405 }
4406
4407 void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
4408                                std::vector<size_t>& weights, std::vector<size_t>& blobs) const
4409 {
4410     getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
4411                          weights, blobs);
4412 }
4413
4414 void Net::enableFusion(bool fusion)
4415 {
4416     if( impl->fusion != fusion )
4417     {
4418         impl->fusion = fusion;
4419         impl->netWasAllocated = false;
4420         impl->clear();
4421     }
4422 }
4423
4424 void Net::setHalideScheduler(const String& scheduler)
4425 {
4426     CV_TRACE_FUNCTION();
4427     CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
4428
4429     impl->halideConfigFile = scheduler;
4430 }
4431
4432 int64 Net::getPerfProfile(std::vector<double>& timings)
4433 {
4434     timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
4435     int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0);
4436     return total;
4437 }
4438
4439 //////////////////////////////////////////////////////////////////////////
4440
4441 Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
4442
4443 Layer::Layer(const LayerParams &params)
4444     : blobs(params.blobs), name(params.name), type(params.type)
4445 {
4446     preferableTarget = DNN_TARGET_CPU;
4447 }
4448
4449 void Layer::setParamsFrom(const LayerParams &params)
4450 {
4451     blobs = params.blobs;
4452     name = params.name;
4453     type = params.type;
4454 }
4455
4456 int Layer::inputNameToIndex(String)
4457 {
4458     return -1;
4459 }
4460
4461 int Layer::outputNameToIndex(const String&)
4462 {
4463     return 0;
4464 }
4465
4466 bool Layer::supportBackend(int backendId)
4467 {
4468     return backendId == DNN_BACKEND_OPENCV;
4469 }
4470
4471 Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
4472 {
4473     CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type +
4474                                        " layers is not defined.");
4475     return Ptr<BackendNode>();
4476 }
4477
4478 Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
4479 {
4480     CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
4481                                        " layers is not defined.");
4482     return Ptr<BackendNode>();
4483 }
4484
4485 Ptr<BackendNode> Layer::initNgraph(const std::vector<Ptr<BackendWrapper> > & inputs, const std::vector<Ptr<BackendNode> >& nodes)
4486 {
4487     CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
4488                                        " layers is not defined.");
4489     return Ptr<BackendNode>();
4490 }
4491
4492 void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
4493                                  const std::vector<Mat> &outputs, int targetId) const
4494 {
4495 #ifdef  HAVE_HALIDE
4496     CV_TRACE_FUNCTION();
4497
4498     Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
4499                 xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
4500     Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
4501
4502     int outW, outH, outC, outN;
4503     getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
4504
4505     if (targetId == DNN_TARGET_CPU)
4506     {
4507         if (outW == 1 && outH == 1)
4508         {
4509             if (outC + outN == 1)
4510                 return;
4511
4512             if (outC > 8)
4513               top.split(c, co, ci, 8)
4514                  .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
4515                  .parallel(tile)
4516                  .vectorize(ci, 8);
4517             else
4518               top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
4519                  .parallel(tile);
4520         }
4521         else
4522         {
4523             if (outH > 2)
4524             {
4525                 top.reorder(x, c, y)
4526                    .split(y, yo, yi, 2)
4527                    .fuse(yo, n, tile)
4528                    .parallel(tile)
4529                    .unroll(yi)
4530                    .vectorize(x, outW >= 16 ? 16 : outW);
4531             }
4532         }
4533     }
4534     else if (targetId == DNN_TARGET_OPENCL)
4535     {
4536         if (outW == 1 && outH == 1)
4537         {
4538             int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
4539             top.split(c, co, ci, c_split)
4540                .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
4541                .gpu_blocks(tile)
4542                .gpu_threads(ci);
4543         }
4544         else
4545         {
4546             int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
4547             int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
4548             // Supported vectorization widths: 2, 3, 4, 8, 16
4549             int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
4550             top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
4551                .split(c, co, ci, c_split)
4552                .gpu_blocks(xo, yo, co)
4553                .gpu_threads(xi, yi)
4554                .reorder(xi, yi, ci, xo, yo, co)
4555                .vectorize(ci);
4556         }
4557     }
4558     else
4559         CV_Error(Error::StsNotImplemented, "Unknown target identifier");
4560 #endif  // HAVE_HALIDE
4561 }
4562
4563 Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
4564 {
4565     return Ptr<BackendNode>();
4566 }
4567
4568 bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
4569 bool Layer::tryFuse(Ptr<Layer>&) { return false; }
4570 void Layer::getScaleShift(Mat& scale, Mat& shift) const
4571 {
4572     scale = Mat();
4573     shift = Mat();
4574 }
4575
4576 void Layer::unsetAttached()
4577 {
4578     setActivation(Ptr<ActivationLayer>());
4579 }
4580
4581 template <typename T>
4582 static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
4583 {
4584     pv.resize(v.size());
4585     for (size_t i = 0; i < v.size(); i++)
4586         pv[i] = const_cast<T*>(&v[i]);
4587 }
4588
4589 void Layer::finalize(const std::vector<Mat> &inputs, std::vector<Mat> &outputs)
4590 {
4591     CV_TRACE_FUNCTION();
4592     this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
4593 }
4594
4595 void Layer::finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
4596 {
4597     CV_UNUSED(input);CV_UNUSED(output);
4598 }
4599
4600 void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
4601 {
4602     CV_TRACE_FUNCTION();
4603     std::vector<Mat> inputs, outputs;
4604     inputs_arr.getMatVector(inputs);
4605     outputs_arr.getMatVector(outputs);
4606
4607     std::vector<Mat*> inputsp;
4608     vecToPVec(inputs, inputsp);
4609     this->finalize(inputsp, outputs);
4610 }
4611
4612 std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
4613 {
4614     CV_TRACE_FUNCTION();
4615
4616     std::vector<Mat> outputs;
4617     this->finalize(inputs, outputs);
4618     return outputs;
4619 }
4620
4621 void Layer::forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
4622 {
4623     // We kept this method for compatibility. DNN calls it now only to support users' implementations.
4624 }
4625
4626 void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
4627 {
4628     CV_TRACE_FUNCTION();
4629     CV_TRACE_ARG_VALUE(name, "name", name.c_str());
4630
4631     Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
4632 }
4633
4634 void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
4635 {
4636     CV_TRACE_FUNCTION();
4637     CV_TRACE_ARG_VALUE(name, "name", name.c_str());
4638
4639     if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
4640     {
4641         std::vector<UMat> inputs;
4642         std::vector<UMat> outputs;
4643         std::vector<UMat> internals;
4644
4645         std::vector<UMat> orig_inputs;
4646         std::vector<UMat> orig_outputs;
4647         std::vector<UMat> orig_internals;
4648
4649         inputs_arr.getUMatVector(orig_inputs);
4650         outputs_arr.getUMatVector(orig_outputs);
4651         internals_arr.getUMatVector(orig_internals);
4652
4653         inputs.resize(orig_inputs.size());
4654         for (size_t i = 0; i < orig_inputs.size(); i++)
4655             convertFp16(orig_inputs[i], inputs[i]);
4656
4657         outputs.resize(orig_outputs.size());
4658         for (size_t i = 0; i < orig_outputs.size(); i++)
4659             outputs[i].create(shape(orig_outputs[i]), CV_32F);
4660
4661         internals.resize(orig_internals.size());
4662         for (size_t i = 0; i < orig_internals.size(); i++)
4663             internals[i].create(shape(orig_internals[i]), CV_32F);
4664
4665         forward(inputs, outputs, internals);
4666
4667         for (size_t i = 0; i < outputs.size(); i++)
4668             convertFp16(outputs[i], orig_outputs[i]);
4669
4670         // sync results back
4671         outputs_arr.assign(orig_outputs);
4672         internals_arr.assign(orig_internals);
4673         return;
4674     }
4675     std::vector<Mat> inpvec;
4676     std::vector<Mat> outputs;
4677     std::vector<Mat> internals;
4678
4679     inputs_arr.getMatVector(inpvec);
4680     outputs_arr.getMatVector(outputs);
4681     internals_arr.getMatVector(internals);
4682
4683     std::vector<Mat*> inputs(inpvec.size());
4684     for (int i = 0; i < inpvec.size(); i++)
4685         inputs[i] = &inpvec[i];
4686
4687     this->forward(inputs, outputs, internals);
4688
4689     // sync results back
4690     outputs_arr.assign(outputs);
4691     internals_arr.assign(internals);
4692 }
4693
4694 void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
4695 {
4696     CV_TRACE_FUNCTION();
4697
4698     this->finalize(inputs, outputs);
4699     this->forward(inputs, outputs, internals);
4700 }
4701
4702 Layer::~Layer() {}
4703
4704 bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
4705                             const int requiredOutputs,
4706                             std::vector<MatShape> &outputs,
4707                             std::vector<MatShape> &internals) const
4708 {
4709     CV_Assert(inputs.size());
4710     outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
4711     return false;
4712 }
4713
4714 //////////////////////////////////////////////////////////////////////////
4715
4716 static Mutex& getLayerFactoryMutex()
4717 {
4718     static Mutex* volatile instance = NULL;
4719     if (instance == NULL)
4720     {
4721         cv::AutoLock lock(getInitializationMutex());
4722         if (instance == NULL)
4723             instance = new Mutex();
4724     }
4725     return *instance;
4726 }
4727
4728 typedef std::map<String, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
4729
4730 static LayerFactory_Impl& getLayerFactoryImpl_()
4731 {
4732     static LayerFactory_Impl impl;
4733     return impl;
4734 }
4735
4736 static LayerFactory_Impl& getLayerFactoryImpl()
4737 {
4738     static LayerFactory_Impl* volatile instance = NULL;
4739     if (instance == NULL)
4740     {
4741         cv::AutoLock lock(getLayerFactoryMutex());
4742         if (instance == NULL)
4743         {
4744             instance = &getLayerFactoryImpl_();
4745             initializeLayerFactory();
4746         }
4747     }
4748     return *instance;
4749 }
4750
4751 void LayerFactory::registerLayer(const String &type, Constructor constructor)
4752 {
4753     CV_TRACE_FUNCTION();
4754     CV_TRACE_ARG_VALUE(type, "type", type.c_str());
4755
4756     cv::AutoLock lock(getLayerFactoryMutex());
4757     LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
4758
4759     if (it != getLayerFactoryImpl().end())
4760     {
4761         if (it->second.back() == constructor)
4762             CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered");
4763         it->second.push_back(constructor);
4764     }
4765     getLayerFactoryImpl().insert(std::make_pair(type, std::vector<Constructor>(1, constructor)));
4766 }
4767
4768 void LayerFactory::unregisterLayer(const String &type)
4769 {
4770     CV_TRACE_FUNCTION();
4771     CV_TRACE_ARG_VALUE(type, "type", type.c_str());
4772
4773     cv::AutoLock lock(getLayerFactoryMutex());
4774
4775     LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
4776     if (it != getLayerFactoryImpl().end())
4777     {
4778         if (it->second.size() > 1)
4779             it->second.pop_back();
4780         else
4781             getLayerFactoryImpl().erase(it);
4782     }
4783 }
4784
4785 Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
4786 {
4787     CV_TRACE_FUNCTION();
4788     CV_TRACE_ARG_VALUE(type, "type", type.c_str());
4789
4790     cv::AutoLock lock(getLayerFactoryMutex());
4791     LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type);
4792
4793     if (it != getLayerFactoryImpl().end())
4794     {
4795         CV_Assert(!it->second.empty());
4796         return it->second.back()(params);
4797     }
4798     else
4799     {
4800         return Ptr<Layer>(); //NULL
4801     }
4802 }
4803
4804 BackendNode::BackendNode(int backendId) : backendId(backendId) {}
4805
4806 BackendNode::~BackendNode() {};
4807
4808 BackendWrapper::BackendWrapper(int backendId, int targetId)
4809     : backendId(backendId), targetId(targetId) {}
4810
4811 BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
4812 {
4813     CV_Error(Error::StsNotImplemented,
4814              "Constructor of backend wrapper must be implemented");
4815 }
4816
4817 BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
4818 {
4819     CV_Error(Error::StsNotImplemented,
4820              "Constructor of backend wrapper must be implemented");
4821 }
4822
4823 BackendWrapper::~BackendWrapper() {}
4824
4825 Net readNet(const String& _model, const String& _config, const String& _framework)
4826 {
4827     String framework = _framework.toLowerCase();
4828     String model = _model;
4829     String config = _config;
4830     const std::string modelExt = model.substr(model.rfind('.') + 1);
4831     const std::string configExt = config.substr(config.rfind('.') + 1);
4832     if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
4833                                 modelExt == "prototxt" || configExt == "prototxt")
4834     {
4835         if (modelExt == "prototxt" || configExt == "caffemodel")
4836             std::swap(model, config);
4837         return readNetFromCaffe(config, model);
4838     }
4839     if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
4840                                      modelExt == "pbtxt" || configExt == "pbtxt")
4841     {
4842         if (modelExt == "pbtxt" || configExt == "pb")
4843             std::swap(model, config);
4844         return readNetFromTensorflow(model, config);
4845     }
4846     if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
4847                                 configExt == "t7" || configExt == "net")
4848     {
4849         return readNetFromTorch(model.empty() ? config : model);
4850     }
4851     if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
4852                                   modelExt == "cfg" || configExt == "cfg")
4853     {
4854         if (modelExt == "cfg" || configExt == "weights")
4855             std::swap(model, config);
4856         return readNetFromDarknet(config, model);
4857     }
4858     if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
4859                                modelExt == "xml" || configExt == "xml")
4860     {
4861         if (modelExt == "xml" || configExt == "bin")
4862             std::swap(model, config);
4863         return readNetFromModelOptimizer(config, model);
4864     }
4865     if (framework == "onnx" || modelExt == "onnx")
4866     {
4867         return readNetFromONNX(model);
4868     }
4869     CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
4870                                       model + (config.empty() ? "" : ", " + config));
4871 }
4872
4873 Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
4874             const std::vector<uchar>& bufferConfig)
4875 {
4876     String framework = _framework.toLowerCase();
4877     if (framework == "caffe")
4878         return readNetFromCaffe(bufferConfig, bufferModel);
4879     else if (framework == "tensorflow")
4880         return readNetFromTensorflow(bufferModel, bufferConfig);
4881     else if (framework == "darknet")
4882         return readNetFromDarknet(bufferConfig, bufferModel);
4883     else if (framework == "torch")
4884         CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
4885     else if (framework == "dldt")
4886         return readNetFromModelOptimizer(bufferConfig, bufferModel);
4887     CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
4888 }
4889
4890 Net readNetFromModelOptimizer(const String &xml, const String &bin)
4891 {
4892     return Net::readFromModelOptimizer(xml, bin);
4893 }
4894
4895 Net readNetFromModelOptimizer(const std::vector<uchar>& bufferCfg, const std::vector<uchar>& bufferModel)
4896 {
4897     return Net::readFromModelOptimizer(bufferCfg, bufferModel);
4898 }
4899
4900 Net readNetFromModelOptimizer(
4901         const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
4902         const uchar* bufferWeightsPtr, size_t bufferWeightsSize
4903 )
4904 {
4905     return Net::readFromModelOptimizer(
4906         bufferModelConfigPtr, bufferModelConfigSize,
4907         bufferWeightsPtr, bufferWeightsSize
4908     );
4909 }
4910
4911 CV__DNN_EXPERIMENTAL_NS_END
4912 }} // namespace