1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
43 #include "op_halide.hpp"
44 #include "op_inf_engine.hpp"
45 #include "ie_ngraph.hpp"
46 #include "op_vkcom.hpp"
47 #include "op_cuda.hpp"
50 #include "cuda4dnn/init.hpp"
51 #include "cuda4dnn/primitives/eltwise.hpp" // required by fuseLayers
54 #include "halide_scheduler.hpp"
64 #include <opencv2/dnn/shape_utils.hpp>
65 #include <opencv2/imgproc.hpp>
67 #include <opencv2/core/utils/configuration.private.hpp>
68 #include <opencv2/core/utils/logger.hpp>
72 CV__DNN_INLINE_NS_BEGIN
74 static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0);
76 // this option is useful to run valgrind memory errors detection
77 static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
80 static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
83 static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
84 #ifdef HAVE_INF_ENGINE
85 (size_t)DNN_BACKEND_INFERENCE_ENGINE
87 (size_t)DNN_BACKEND_OPENCV
91 // Additional checks (slowdowns execution!)
92 static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
93 static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
94 static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
102 //==================================================================================================
104 class BackendRegistry
107 typedef std::vector< std::pair<Backend, Target> > BackendsList;
108 const BackendsList & getBackends() const { return backends; }
109 static BackendRegistry & getRegistry()
111 static BackendRegistry impl;
115 #ifdef HAVE_INF_ENGINE
116 static inline bool checkIETarget(Target target)
118 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3)
119 // Lightweight detection
120 const std::vector<std::string> devices = getCore("").GetAvailableDevices();
121 for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
123 if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD)
125 else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA)
127 else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU)
129 else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
135 cv::dnn::LayerParams lp;
136 lp.set("kernel_size", 1);
137 lp.set("num_output", 1);
138 lp.set("bias_term", false);
139 lp.type = "Convolution";
140 lp.name = "testLayer";
141 lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
142 net.addLayerToPrev(lp.name, lp.type, lp);
143 net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
144 net.setPreferableTarget(target);
145 static int inpDims[] = {1, 2, 3, 4};
146 net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0)));
151 catch(const std::exception& e)
153 CV_LOG_INFO(NULL, "checkIETarget(" << (int)target << ") has failed with message: " << e.what());
165 backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
167 if (cv::ocl::useOpenCL())
168 backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
170 #endif // HAVE_HALIDE
172 #ifdef HAVE_INF_ENGINE
173 if (checkIETarget(DNN_TARGET_CPU)) {
174 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
175 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_CPU));
177 #ifdef HAVE_DNN_NGRAPH
178 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
181 if (checkIETarget(DNN_TARGET_MYRIAD)) {
182 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
183 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_MYRIAD));
185 #ifdef HAVE_DNN_NGRAPH
186 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD));
189 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
190 if (checkIETarget(DNN_TARGET_FPGA))
191 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_FPGA));
194 if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel())
196 if (checkIETarget(DNN_TARGET_OPENCL)) {
197 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
198 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL));
200 #ifdef HAVE_DNN_NGRAPH
201 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL));
204 if (checkIETarget(DNN_TARGET_OPENCL_FP16)) {
205 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
206 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL_FP16));
208 #ifdef HAVE_DNN_NGRAPH
209 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16));
214 #endif // HAVE_INF_ENGINE
217 if (cv::ocl::useOpenCL())
219 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
220 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
224 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
228 backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN));
232 if (haveCUDA() && cuda4dnn::isDeviceCompatible())
234 backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
235 if (cuda4dnn::doesDeviceSupportFP16())
236 backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
241 BackendsList backends;
245 std::vector< std::pair<Backend, Target> > getAvailableBackends()
247 return BackendRegistry::getRegistry().getBackends();
250 std::vector<Target> getAvailableTargets(Backend be)
252 if (be == DNN_BACKEND_DEFAULT)
253 be = (Backend)PARAM_DNN_BACKEND_DEFAULT;
254 #ifdef HAVE_INF_ENGINE
255 if (be == DNN_BACKEND_INFERENCE_ENGINE)
256 be = getInferenceEngineBackendTypeParam();
259 std::vector<Target> result;
260 const BackendRegistry::BackendsList all_backends = getAvailableBackends();
261 for(BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i )
264 result.push_back(i->second);
269 //==================================================================================================
273 typedef std::vector<MatShape> ShapesVec;
277 ShapesVec in, out, internal;
278 // No guarantees that layer which support in-place computations
279 // will be computed in-place (input.data_ptr == output.data_ptr).
280 // If layer said that it could work in-place and layers after it
281 // no longer use input blob, we'll set output = input.
283 LayerShapes() {supportInPlace = false;}
287 Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
288 const Scalar& mean, bool swapRB, bool crop, int ddepth)
292 blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
296 void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
297 const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
300 std::vector<Mat> images(1, image.getMat());
301 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
304 Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
305 const Scalar& mean, bool swapRB, bool crop, int ddepth)
309 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
313 void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
314 Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
317 CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
320 CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
321 CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
324 std::vector<Mat> images;
325 images_.getMatVector(images);
326 CV_Assert(!images.empty());
327 for (size_t i = 0; i < images.size(); i++)
329 Size imgSize = images[i].size();
336 float resizeFactor = std::max(size.width / (float)imgSize.width,
337 size.height / (float)imgSize.height);
338 resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
339 Rect crop(Point(0.5 * (images[i].cols - size.width),
340 0.5 * (images[i].rows - size.height)),
342 images[i] = images[i](crop);
345 resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
347 if(images[i].depth() == CV_8U && ddepth == CV_32F)
348 images[i].convertTo(images[i], CV_32F);
351 std::swap(mean[0], mean[2]);
354 images[i] *= scalefactor;
357 size_t nimages = images.size();
358 Mat image0 = images[0];
359 int nch = image0.channels();
360 CV_Assert(image0.dims == 2);
361 if (nch == 3 || nch == 4)
363 int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
364 blob_.create(4, sz, ddepth);
365 Mat blob = blob_.getMat();
368 for(size_t i = 0; i < nimages; i++ )
370 const Mat& image = images[i];
371 CV_Assert(image.depth() == blob_.depth());
372 nch = image.channels();
373 CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
374 CV_Assert(image.size() == image0.size());
376 for( int j = 0; j < nch; j++ )
377 ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
379 std::swap(ch[0], ch[2]);
386 int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
387 blob_.create(4, sz, ddepth);
388 Mat blob = blob_.getMat();
390 for(size_t i = 0; i < nimages; i++ )
392 const Mat& image = images[i];
393 CV_Assert(image.depth() == blob_.depth());
394 nch = image.channels();
395 CV_Assert(image.dims == 2 && (nch == 1));
396 CV_Assert(image.size() == image0.size());
398 image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
403 void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
407 //A blob is a 4 dimensional matrix in floating point precision
408 //blob_[0] = batchSize = nbOfImages
409 //blob_[1] = nbOfChannels
412 CV_Assert(blob_.depth() == CV_32F);
413 CV_Assert(blob_.dims == 4);
415 images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
417 std::vector<Mat> vectorOfChannels(blob_.size[1]);
418 for (int n = 0; n < blob_.size[0]; ++n)
420 for (int c = 0; c < blob_.size[1]; ++c)
422 vectorOfChannels[c] = getPlane(blob_, n, c);
424 cv::merge(vectorOfChannels, images_.getMatRef(n));
429 class OpenCLBackendWrapper : public BackendWrapper
432 OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
439 OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
440 : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
442 Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
443 CV_Assert(!base.empty());
447 int shape[] = {1, (int)base->umat.total()};
448 umat = base->umat.reshape(1, 2, &shape[0])
449 .colRange(0, host->total())
450 .reshape(1, host->dims, &host->size[0]);
454 static Ptr<BackendWrapper> create(Mat& m)
456 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
459 static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
461 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
464 static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
466 const int numWrappers = wrappers.size();
467 std::vector<UMat> mats(wrappers.size());
468 for (int i = 0; i < numWrappers; ++i)
470 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
471 CV_Assert(!umatWrapper.empty());
472 umatWrapper->copyToDevice();
473 mats[i] = umatWrapper->umat;
478 // Replaces all umats in wrappers to specific ones.
479 static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
480 const std::vector<UMat>& umats)
482 CV_Assert(wrappers.size() == umats.size());
483 for (int i = 0, n = umats.size(); i < n; ++i)
485 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
486 CV_Assert(!umatWrapper.empty());
487 umatWrapper->umat = umats[i];
491 ~OpenCLBackendWrapper() {}
493 // Copies data from device to a host memory.
494 virtual void copyToHost() CV_OVERRIDE
499 virtual void setHostDirty() CV_OVERRIDE
525 LayerPin(int layerId = -1, int outputId = -1)
526 : lid(layerId), oid(outputId) {}
530 return (lid >= 0 && oid >= 0);
533 bool equal(const LayerPin &r) const
535 return (lid == r.lid && oid == r.oid);
538 bool operator<(const LayerPin &r) const
540 return lid < r.lid || (lid == r.lid && oid < r.oid);
543 bool operator ==(const LayerPin &r) const
545 return lid == r.lid && oid == r.oid;
551 LayerData() : id(-1), skip(false), flag(0) {}
552 LayerData(int _id, const String &_name, const String &_type, LayerParams &_params)
553 : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0)
567 std::vector<LayerPin> inputBlobsId;
568 std::set<int> inputLayersId;
569 std::set<int> requiredOutputs;
570 std::vector<LayerPin> consumers;
571 std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
572 std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
573 std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
576 /* output ids which must be transferred to the host in the background
577 * after the completion of the forward pass of the layer
579 std::vector<int> cudaD2HBackgroundTransfers;
582 Ptr<Layer> layerInstance;
583 std::vector<Mat> outputBlobs;
584 std::vector<Mat*> inputBlobs;
585 std::vector<Mat> internals;
586 // Computation nodes of implemented backends (except DEFAULT).
587 std::map<int, Ptr<BackendNode> > backendNodes;
588 // Flag for skip layer computation for specific backend.
593 Ptr<Layer> getLayerInstance()
596 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
599 return layerInstance;
601 layerInstance = LayerFactory::createLayerInstance(type, params);
604 CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
607 return layerInstance;
611 //fake layer containing network input blobs
612 struct DataLayer : public Layer
614 DataLayer() : Layer()
619 virtual bool supportBackend(int backendId) CV_OVERRIDE
621 return backendId == DNN_BACKEND_OPENCV ||
622 (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && inputsData.size() == 1);
625 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
628 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
630 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
631 forward_ocl(inputs_arr, outputs_arr, internals_arr))
633 if (outputs_arr.depth() == CV_16S)
635 forward_fallback(inputs_arr, outputs_arr, internals_arr);
639 std::vector<Mat> outputs, internals;
640 outputs_arr.getMatVector(outputs);
641 internals_arr.getMatVector(internals);
644 // | Input type | Output type |
647 for (int i = 0; i < inputsData.size(); ++i)
649 double scale = scaleFactors[i];
650 Scalar& mean = means[i];
651 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
652 CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
654 bool singleMean = true;
655 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
657 singleMean = mean[j] == mean[j - 1];
662 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
666 for (int n = 0; n < inputsData[i].size[0]; ++n)
667 for (int c = 0; c < inputsData[i].size[1]; ++c)
669 Mat inp = getPlane(inputsData[i], n, c);
670 Mat out = getPlane(outputs[i], n, c);
671 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
678 std::vector<Mat> tmp_expressions;
679 bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
682 // | Input type | Output type |
686 std::vector<UMat> outputs;
687 outputs_.getUMatVector(outputs);
689 tmp_expressions.clear();
690 for (int i = 0; i < inputsData.size(); ++i)
692 Mat inputData = inputsData[i];
694 double scale = scaleFactors[i];
695 Scalar& mean = means[i];
697 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
698 bool singleMean = true;
699 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
701 singleMean = mean[j] == mean[j - 1];
704 if (outputs_.depth() == CV_16S)
708 tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0])));
709 convertFp16(tmp_expressions.back(), outputs[i]);
713 for (int n = 0; n < inputsData[i].size[0]; ++n)
714 for (int c = 0; c < inputsData[i].size[1]; ++c)
716 Mat inp = getPlane(inputsData[i], n, c);
718 std::vector<cv::Range> plane(4, Range::all());
719 plane[0] = Range(n, n + 1);
720 plane[1] = Range(c, c + 1);
721 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
723 tmp_expressions.push_back(scale * (inp - mean[c]));
724 convertFp16(tmp_expressions.back(), out);
730 CV_Assert(outputs_.depth() == CV_32F);
733 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
737 for (int n = 0; n < inputsData[i].size[0]; ++n)
738 for (int c = 0; c < inputsData[i].size[1]; ++c)
740 Mat inp = getPlane(inputsData[i], n, c);
742 std::vector<cv::Range> plane(4, Range::all());
743 plane[0] = Range(n, n + 1);
744 plane[1] = Range(c, c + 1);
745 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
747 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
756 int outputNameToIndex(const String& tgtName) CV_OVERRIDE
758 int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
759 return (idx < (int)outNames.size()) ? idx : -1;
762 void setNames(const std::vector<String> &names)
764 outNames.assign(names.begin(), names.end());
765 shapes.clear(); shapes.resize(outNames.size());
768 void setInputShape(const String& tgtName, const MatShape& shape)
770 std::vector<String>::const_iterator it = std::find(outNames.begin(), outNames.end(), tgtName);
771 CV_Check(tgtName, it != outNames.end(), "Unknown input");
772 int idx = (int)(it - outNames.begin());
774 CV_Assert(idx < (int)shapes.size());
775 CV_Check(tgtName, shapes[idx].empty(), "Input shape redefinition is not allowed");
779 bool getMemoryShapes(const std::vector<MatShape> &inputs,
780 const int requiredOutputs,
781 std::vector<MatShape> &outputs,
782 std::vector<MatShape> &internals) const CV_OVERRIDE
784 CV_Assert(inputs.size() == requiredOutputs);
785 outputs.assign(inputs.begin(), inputs.end());
789 virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
791 std::vector<Mat> outputs;
792 outputs_arr.getMatVector(outputs);
794 CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
795 inputsData.size() == outputs.size());
797 for (int i = 0; skip && i < inputsData.size(); ++i)
799 if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
804 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
805 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
807 CV_CheckEQ(inputsData.size(), (size_t)1, "");
808 CV_CheckEQ(inputsData[0].dims, 4, "");
809 const size_t numChannels = inputsData[0].size[1];
810 CV_Assert(numChannels <= 4);
813 InferenceEngine::TensorDesc td(InferenceEngine::Precision::FP32, {numChannels},
814 InferenceEngine::Layout::C);
815 auto weights = InferenceEngine::make_shared_blob<float>(td);
818 float* weight_buf = weights->buffer().as<float*>();
819 std::fill(weight_buf, weight_buf + numChannels, scaleFactors[0]);
822 auto biases = InferenceEngine::make_shared_blob<float>(td);
824 float* bias_buf = biases->buffer().as<float*>();
826 for (int i = 0; i < numChannels; ++i)
828 bias_buf[i] = -means[0][i] * scaleFactors[0];
831 InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
832 addConstantData("weights", weights, ieLayer);
833 addConstantData("biases", biases, ieLayer);
834 return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
836 #endif // HAVE_DNN_IE_NN_BUILDER_2019
838 std::vector<String> outNames;
839 std::vector<MatShape> shapes;
840 // Preprocessing parameters for each network's input.
841 std::vector<double> scaleFactors;
842 std::vector<Scalar> means;
843 std::vector<Mat> inputsData;
850 // Increase references counter to layer output.
851 void addReference(const LayerPin& lp)
853 std::map<LayerPin, int>::iterator it = refCounter.find(lp);
854 if (it == refCounter.end())
860 void addReferences(const std::vector<LayerPin>& pins)
862 for (int i = 0; i < pins.size(); i++)
864 addReference(pins[i]);
868 // Returns number of references to allocated memory that used in specific
870 int numReferences(const LayerPin& lp)
872 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
873 CV_Assert(mapIt != reuseMap.end());
874 LayerPin memHost = mapIt->second;
876 std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
877 CV_Assert(refIt != refCounter.end());
878 return refIt->second;
881 // Reuse data allocated in <host> inside the <user> blob.
882 void reuse(const LayerPin& host, const LayerPin& user)
884 CV_Assert(reuseMap.find(user) == reuseMap.end());
885 CV_Assert(reuseMap.find(host) != reuseMap.end());
886 LayerPin memHost = reuseMap[host];
887 reuseMap[user] = memHost;
888 if (refCounter.find(memHost) != refCounter.end())
890 std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
891 if (userRefIt != refCounter.end())
893 refCounter[memHost] += userRefIt->second;
894 refCounter.erase(userRefIt);
897 refCounter[memHost] += 1;
901 // Decrease references counter to allocated memory inside specific blob.
902 void releaseReference(const LayerPin& lp)
904 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
905 CV_Assert(mapIt != reuseMap.end());
907 std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
908 CV_Assert(refIt != refCounter.end());
909 CV_Assert(refIt->second > 0);
913 void releaseReferences(const std::vector<LayerPin>& pins)
915 for (int i = 0; i < pins.size(); i++)
917 releaseReference(pins[i]);
921 void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
923 if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
926 LayerPin bestBlobPin;
928 std::map<LayerPin, Mat>::iterator hostIt;
929 std::map<LayerPin, int>::iterator refIt;
931 const int targetTotal = total(shape);
932 int bestBlobTotal = INT_MAX;
934 for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
936 refIt = refCounter.find(hostIt->first);
937 // Use only blobs that had references before because if not,
938 // it might be used as output.
939 if (refIt != refCounter.end() && refIt->second == 0)
941 Mat& unusedBlob = hostIt->second;
942 if (unusedBlob.total() >= targetTotal &&
943 unusedBlob.total() < bestBlobTotal)
945 bestBlobPin = hostIt->first;
946 bestBlob = unusedBlob;
947 bestBlobTotal = unusedBlob.total();
951 if (!bestBlob.empty())
953 reuse(bestBlobPin, lp);
954 dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
960 // if dst already has been allocated with total(shape) elements,
961 // it won't be recreated and pointer of dst.data remains the same.
962 dst.create(shape, use_half ? CV_16S : CV_32F);
967 void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
968 std::vector<LayerPin>& pinsForInternalBlobs,
969 bool use_half = false)
973 pinsForInternalBlobs.clear();
975 std::vector<Mat>& outputBlobs = ld.outputBlobs,
976 &internalBlobs = ld.internals;
978 const ShapesVec& outShapes = layerShapes.out,
979 internalShapes = layerShapes.internal;
981 outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
982 internalBlobs.resize(internalShapes.size());
984 CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
986 // Check that layer could work in-place.
987 bool inPlace = false;
988 if (layerShapes.supportInPlace)
990 if (ld.inputBlobs.size() == 1)
992 // Get number of references to the input memory.
993 int numRef = numReferences(ld.inputBlobsId[0]);
994 // If current layer is one and only customer of this blob.
995 inPlace = numRef == 1;
999 ShapesVec shapes(outShapes);
1000 shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
1001 std::vector<Mat*> blobs;
1002 for(int i = 0; i < outputBlobs.size(); i++)
1004 blobs.push_back(&outputBlobs[i]);
1007 for(int i = 0; i < internalBlobs.size(); i++)
1009 blobs.push_back(&internalBlobs[i]);
1010 if (total(internalShapes[i]))
1012 pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
1016 addReferences(pinsForInternalBlobs);
1018 std::map<int, std::vector<int> > idxSizes;
1019 for(int i = 0; i < shapes.size(); i++)
1021 idxSizes[total(shapes[i])].push_back(i);
1024 std::map<int, std::vector<int> >::reverse_iterator it;
1025 for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
1027 for(int j = 0; j < it->second.size(); j++)
1029 int index = it->second[j];
1030 if (total(shapes[index]))
1032 LayerPin blobPin(ld.id, index);
1033 if (index < outShapes.size() && inPlace)
1035 CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
1036 ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
1037 reuse(ld.inputBlobsId[0], blobPin);
1040 reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
1046 // Clear internal state. Calls before an every reallocation.
1049 CV_TRACE_FUNCTION();
1057 // Register allocated memory.
1058 void addHost(const LayerPin& lp, const Mat& mat)
1060 CV_Assert(memHosts.find(lp) == memHosts.end());
1065 std::map<LayerPin, int> refCounter;
1066 // Maps pin to origin blob (for whom memory was allocated firstly).
1067 // For origin blobs key == value.
1068 std::map<LayerPin, LayerPin> reuseMap;
1069 std::map<LayerPin, Mat> memHosts;
1072 static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
1074 if (backendId == DNN_BACKEND_OPENCV)
1076 if (targetId == DNN_TARGET_CPU)
1077 return Ptr<BackendWrapper>();
1079 else if (IS_DNN_OPENCL_TARGET(targetId))
1080 return OpenCLBackendWrapper::create(m);
1083 CV_Error(Error::StsNotImplemented, "Unknown/unsupported target identifier");
1085 else if (backendId == DNN_BACKEND_HALIDE)
1087 CV_Assert(haveHalide());
1089 return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
1090 #endif // HAVE_HALIDE
1092 else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1094 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1095 return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
1097 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
1100 else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1102 #ifdef HAVE_DNN_NGRAPH
1103 return Ptr<BackendWrapper>(new NgraphBackendWrapper(targetId, m));
1105 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
1108 else if (backendId == DNN_BACKEND_VKCOM)
1110 CV_Assert(haveVulkan());
1112 return Ptr<BackendWrapper>(new VkComBackendWrapper(m));
1113 #endif // HAVE_VULKAN
1115 else if (backendId == DNN_BACKEND_CUDA)
1117 CV_Assert(haveCUDA());
1122 case DNN_TARGET_CUDA:
1123 return CUDABackendWrapperFP32::create(m);
1124 case DNN_TARGET_CUDA_FP16:
1125 return CUDABackendWrapperFP16::create(m);
1127 CV_Assert(IS_DNN_CUDA_TARGET(targetId));
1132 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1133 return Ptr<BackendWrapper>(); // TODO Error?
1136 static int g_networkId = 0;
1138 detail::NetImplBase::NetImplBase()
1139 : networkId(CV_XADD(&g_networkId, 1))
1140 , networkDumpCounter(0)
1141 , dumpLevel(DNN_NETWORK_DUMP)
1146 std::string detail::NetImplBase::getDumpFileNameBase()
1148 std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++);
1149 return dumpFileNameBase;
1152 struct Net::Impl : public detail::NetImplBase
1154 typedef std::map<int, LayerShapes> LayersShapesMap;
1155 typedef std::map<int, LayerData> MapIdToLayerData;
1159 //allocate fake net input layer
1160 netInputLayer = Ptr<DataLayer>(new DataLayer());
1161 LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
1163 netInputLayer->name = inpl.name = "_input";
1164 inpl.type = "__NetInputLayer__";
1165 inpl.layerInstance = netInputLayer;
1166 layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
1169 netWasAllocated = false;
1172 preferableBackend = DNN_BACKEND_DEFAULT;
1173 preferableTarget = DNN_TARGET_CPU;
1174 skipInfEngineInit = false;
1177 Ptr<DataLayer> netInputLayer;
1178 std::vector<LayerPin> blobsToKeep;
1179 MapIdToLayerData layers;
1180 std::map<String, int> layerNameToId;
1181 BlobManager blobManager;
1182 int preferableBackend;
1183 int preferableTarget;
1184 String halideConfigFile;
1185 bool skipInfEngineInit;
1186 // Map host data to backend specific wrapper.
1187 std::map<void*, Ptr<BackendWrapper> > backendWrappers;
1191 bool netWasAllocated;
1194 std::vector<int64> layersTimings;
1200 CudaInfo_t(cuda4dnn::csl::CSLContext ctxt, cuda4dnn::csl::Stream d2h_stream_)
1201 : context(std::move(ctxt)), d2h_stream(std::move(d2h_stream_)) { }
1202 cuda4dnn::csl::CSLContext context;
1203 cuda4dnn::csl::Stream d2h_stream;
1204 cuda4dnn::csl::Workspace workspace;
1207 std::unique_ptr<CudaInfo_t> cudaInfo;
1210 Ptr<BackendWrapper> wrap(Mat& host)
1212 if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
1213 return Ptr<BackendWrapper>();
1215 MatShape shape(host.dims);
1216 for (int i = 0; i < host.dims; ++i)
1217 shape[i] = host.size[i];
1219 void* data = host.data;
1220 if (backendWrappers.find(data) != backendWrappers.end())
1222 Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
1223 if (preferableBackend == DNN_BACKEND_OPENCV)
1226 CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
1227 return OpenCLBackendWrapper::create(baseBuffer, host);
1229 CV_Error(Error::StsInternal, "");
1232 else if (preferableBackend == DNN_BACKEND_HALIDE)
1234 CV_Assert(haveHalide());
1236 return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
1239 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1241 return wrapMat(preferableBackend, preferableTarget, host);
1243 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1245 return wrapMat(preferableBackend, preferableTarget, host);
1247 else if (preferableBackend == DNN_BACKEND_VKCOM)
1250 return Ptr<BackendWrapper>(new VkComBackendWrapper(baseBuffer, host));
1253 else if (preferableBackend == DNN_BACKEND_CUDA)
1255 CV_Assert(haveCUDA());
1257 switch (preferableTarget)
1259 case DNN_TARGET_CUDA:
1260 return CUDABackendWrapperFP32::create(baseBuffer, shape);
1261 case DNN_TARGET_CUDA_FP16:
1262 return CUDABackendWrapperFP16::create(baseBuffer, shape);
1264 CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget));
1269 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1272 Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
1273 backendWrappers[data] = wrapper;
1278 void compileHalide()
1280 CV_TRACE_FUNCTION();
1282 CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
1284 HalideScheduler scheduler(halideConfigFile);
1285 std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
1286 for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
1288 LayerData &ld = it->second;
1289 Ptr<Layer> layer = ld.layerInstance;
1290 if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
1292 CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
1293 bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
1296 // Use automatic scheduling provided by layer.
1297 layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
1298 ld.inputBlobs, ld.outputBlobs,
1301 compileList.emplace_back(ld);
1304 std::atomic<int> progress(0);
1305 auto fn = ([&] () -> void
1309 int id = progress.fetch_add(1);
1310 if ((size_t)id >= compileList.size())
1312 const LayerData& ld = compileList[id].get();
1313 Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
1314 dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
1317 size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
1318 num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
1319 std::vector<std::thread> threads(num_threads - 1);
1320 for (auto& t: threads) t = std::thread(fn);
1321 fn(); // process own tasks
1322 for (auto& t: threads) t.join();
1328 CV_TRACE_FUNCTION();
1330 MapIdToLayerData::iterator it;
1331 for (it = layers.begin(); it != layers.end(); it++)
1333 if (it->second.id != 0) {
1334 it->second.inputBlobs.clear();
1335 it->second.outputBlobs.clear();
1336 it->second.internals.clear();
1338 it->second.skip = false;
1339 //it->second.consumers.clear();
1340 Ptr<Layer> currLayer = it->second.layerInstance;
1342 if( currLayer.empty() )
1345 currLayer->unsetAttached();
1348 layersTimings.clear();
1351 void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
1353 CV_TRACE_FUNCTION();
1355 if (dumpLevel && networkDumpCounter == 0)
1357 dumpNetworkToFile();
1360 if (preferableBackend == DNN_BACKEND_DEFAULT)
1361 preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
1362 #ifdef HAVE_INF_ENGINE
1363 if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1364 preferableBackend = getInferenceEngineBackendTypeParam();
1367 CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
1368 preferableTarget == DNN_TARGET_CPU ||
1369 preferableTarget == DNN_TARGET_OPENCL ||
1370 preferableTarget == DNN_TARGET_OPENCL_FP16);
1371 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
1372 preferableTarget == DNN_TARGET_CPU ||
1373 preferableTarget == DNN_TARGET_OPENCL);
1374 if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
1375 preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1378 preferableTarget == DNN_TARGET_CPU ||
1379 preferableTarget == DNN_TARGET_OPENCL ||
1380 preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1381 preferableTarget == DNN_TARGET_MYRIAD ||
1382 preferableTarget == DNN_TARGET_FPGA
1385 CV_Assert(preferableBackend != DNN_BACKEND_VKCOM ||
1386 preferableTarget == DNN_TARGET_VULKAN);
1387 CV_Assert(preferableBackend != DNN_BACKEND_CUDA ||
1388 IS_DNN_CUDA_TARGET(preferableTarget));
1389 if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
1391 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
1394 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
1395 preferableTarget = DNN_TARGET_CPU;
1399 if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
1401 // Current implementation is only valid for GPU (#11494)
1402 if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
1404 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
1405 preferableTarget = DNN_TARGET_CPU;
1407 else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
1409 CV_LOG_WARNING(NULL,
1410 "DNN: OpenCL target with fp16 precision is not supported "
1411 "with current OpenCL device (tested with Intel GPUs only), "
1412 "switching to OpenCL with fp32 precision.");
1413 preferableTarget = DNN_TARGET_OPENCL;
1418 if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan())
1420 preferableBackend = DNN_BACKEND_OPENCV;
1421 preferableTarget = DNN_TARGET_CPU;
1424 if (preferableBackend == DNN_BACKEND_CUDA && !haveCUDA())
1427 CV_LOG_WARNING(NULL, "unable to use CUDA backend; switching to CPU");
1429 CV_LOG_WARNING(NULL, "DNN module was not built with CUDA backend; switching to CPU");
1431 preferableBackend = DNN_BACKEND_OPENCV;
1432 preferableTarget = DNN_TARGET_CPU;
1437 this->blobsToKeep = blobsToKeep_;
1439 allocateLayers(blobsToKeep_);
1441 MapIdToLayerData::iterator it = layers.find(0);
1442 CV_Assert(it != layers.end());
1443 it->second.skip = netInputLayer->skip;
1445 initBackend(blobsToKeep_);
1447 if (!netWasAllocated)
1450 if (preferableBackend == DNN_BACKEND_HALIDE)
1453 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
1457 netWasAllocated = true;
1461 dumpNetworkToFile();
1466 int getLayerId(const String &layerName)
1468 std::map<String, int>::iterator it = layerNameToId.find(layerName);
1469 return (it != layerNameToId.end()) ? it->second : -1;
1472 int getLayerId(int id)
1474 MapIdToLayerData::iterator it = layers.find(id);
1475 return (it != layers.end()) ? id : -1;
1478 int getLayerId(DictValue &layerDesc)
1480 if (layerDesc.isInt())
1481 return getLayerId(layerDesc.get<int>());
1482 else if (layerDesc.isString())
1483 return getLayerId(layerDesc.get<String>());
1485 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1489 String getLayerName(int id)
1491 MapIdToLayerData::iterator it = layers.find(id);
1492 return (it != layers.end()) ? it->second.name : "(unknown layer)";
1495 LayerData& getLayerData(int id)
1497 MapIdToLayerData::iterator it = layers.find(id);
1499 if (it == layers.end())
1500 CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
1505 LayerData& getLayerData(const String &layerName)
1507 int id = getLayerId(layerName);
1510 CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
1512 return getLayerData(id);
1515 LayerData& getLayerData(const DictValue &layerDesc)
1517 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1518 if (layerDesc.isInt())
1519 return getLayerData(layerDesc.get<int>());
1520 else /*if (layerDesc.isString())*/
1521 return getLayerData(layerDesc.get<String>());
1524 static void addLayerInput(LayerData &ld, int inNum, LayerPin from)
1526 if ((int)ld.inputBlobsId.size() <= inNum)
1528 ld.inputBlobsId.resize(inNum + 1);
1532 LayerPin storedFrom = ld.inputBlobsId[inNum];
1533 if (storedFrom.valid() && !storedFrom.equal(from))
1534 CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
1535 inNum, ld.name.c_str()));
1538 ld.inputBlobsId[inNum] = from;
1541 int resolvePinOutputName(LayerData &ld, const String &outName)
1543 if (outName.empty())
1545 return ld.getLayerInstance()->outputNameToIndex(outName);
1548 LayerPin getPinByAlias(const String &layerName)
1551 pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1554 pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
1559 std::vector<LayerPin> getLayerOutPins(const String &layerName)
1561 int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1563 std::vector<LayerPin> pins;
1565 for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
1567 pins.push_back(LayerPin(lid, i));
1573 void connect(int outLayerId, int outNum, int inLayerId, int inNum)
1575 CV_Assert(outLayerId < inLayerId);
1576 LayerData &ldOut = getLayerData(outLayerId);
1577 LayerData &ldInp = getLayerData(inLayerId);
1579 addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
1580 ldOut.requiredOutputs.insert(outNum);
1581 ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
1584 void initBackend(const std::vector<LayerPin>& blobsToKeep_)
1586 CV_TRACE_FUNCTION();
1587 if (preferableBackend == DNN_BACKEND_OPENCV)
1589 CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
1591 else if (preferableBackend == DNN_BACKEND_HALIDE)
1592 initHalideBackend();
1593 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1595 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1596 initInfEngineBackend(blobsToKeep_);
1598 CV_Assert(false && "This OpenCV version is built without Inference Engine NN Builder API support");
1601 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1603 #ifdef HAVE_DNN_NGRAPH
1604 initNgraphBackend(blobsToKeep_);
1606 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
1609 else if (preferableBackend == DNN_BACKEND_VKCOM)
1611 else if (preferableBackend == DNN_BACKEND_CUDA)
1612 initCUDABackend(blobsToKeep_);
1614 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1617 void initHalideBackend()
1619 CV_TRACE_FUNCTION();
1620 CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
1622 // Iterator to current layer.
1623 MapIdToLayerData::iterator it = layers.begin();
1624 // Iterator to base layer for fusion. In example, in case of conv+bn+relu
1625 // it'll be a conv layer.
1626 MapIdToLayerData::iterator baseIt = layers.begin();
1627 for (; it != layers.end(); it++)
1629 LayerData &ldTop = it->second;
1630 Ptr<Layer> layerTop = ldTop.layerInstance;
1631 if (!layerTop->supportBackend(preferableBackend))
1633 // Move base iterator to layer that don't support preferable
1634 // backend to prevent fusion over layer of different backend.
1638 // Try to do layers fusion.
1639 LayerData &ldBot = baseIt->second;
1640 Ptr<Layer> layerBot = ldBot.layerInstance;
1641 // 1. Check that bottom and top from the same backends.
1642 if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
1644 // 2. Check that current layer works in-place.
1645 bool inPlace = ldTop.inputBlobs.size() == 1 &&
1646 ldBot.outputBlobs.size() == 1 &&
1647 ldTop.inputBlobs[0]->data ==
1648 ldBot.outputBlobs[0].data;
1651 // 3. Try to attach node.
1652 CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
1653 Ptr<BackendNode> fusedNode =
1654 layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
1655 if (!fusedNode.empty())
1658 ldBot.backendNodes[preferableBackend] = fusedNode;
1659 ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
1664 // No layers fusion.
1666 ldTop.backendNodes[DNN_BACKEND_HALIDE] =
1667 layerTop->initHalide(ldTop.inputBlobsWrappers);
1672 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1673 // Before launching Inference Engine graph we need to specify output blobs.
1674 // This function requests output blobs based on inputs references of
1675 // layers from default backend or layers from different graphs.
1676 void addInfEngineNetOutputs(LayerData &ld)
1678 CV_TRACE_FUNCTION();
1679 Ptr<InfEngineBackendNet> layerNet;
1680 if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
1682 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1685 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1686 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1687 layerNet = ieNode->net;
1690 // For an every input reference we check that it belongs to one of
1691 // the Inference Engine backend graphs. Request an output blob if it is.
1692 // Do nothing if layer's input is from the same graph.
1693 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1695 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1696 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1697 if (!inpNode.empty())
1699 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1700 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1701 if (layerNet != ieInpNode->net)
1703 // layerNet is empty or nodes are from different graphs.
1704 ieInpNode->net->addOutput(ieInpNode->layer.getName());
1710 void initInfEngineBackend(const std::vector<LayerPin>& blobsToKeep_)
1712 CV_TRACE_FUNCTION();
1713 CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine());
1714 MapIdToLayerData::iterator it;
1715 Ptr<InfEngineBackendNet> net;
1717 for (it = layers.begin(); it != layers.end(); ++it)
1719 LayerData &ld = it->second;
1722 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1723 (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1724 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1726 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1727 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1728 dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1730 dataPtr->setName(netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]);
1736 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1738 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1739 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1740 dataPtr->name = ld.name;
1742 dataPtr->setName(ld.name);
1748 if (skipInfEngineInit)
1750 Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1751 CV_Assert(!node.empty());
1753 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1754 CV_Assert(!ieNode.empty());
1755 ieNode->net->reset();
1757 for (it = layers.begin(); it != layers.end(); ++it)
1759 LayerData &ld = it->second;
1762 for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1764 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
1765 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1766 dataPtr->name = netInputLayer->outNames[i];
1768 dataPtr->setName(netInputLayer->outNames[i]);
1774 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1776 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1777 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1778 dataPtr->name = ld.name;
1780 dataPtr->setName(ld.name);
1784 ieNode->net->addBlobs(ld.inputBlobsWrappers);
1785 ieNode->net->addBlobs(ld.outputBlobsWrappers);
1788 layers[lastLayerId].skip = false;
1789 ieNode->net->init((Target)preferableTarget);
1793 // Build Inference Engine networks from sets of layers that support this
1794 // backend. Split a whole model on several Inference Engine networks if
1795 // some of layers are not implemented.
1797 bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU ||
1798 BackendRegistry::checkIETarget(DNN_TARGET_CPU);
1800 // Set of all input and output blobs wrappers for current network.
1801 std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
1802 for (it = layers.begin(); it != layers.end(); ++it)
1804 LayerData &ld = it->second;
1805 if (ld.id == 0 && ld.skip)
1807 bool fused = ld.skip;
1809 Ptr<Layer> layer = ld.layerInstance;
1810 if (!fused && !layer->supportBackend(preferableBackend))
1812 bool customizable = ld.id != 0 &&
1813 INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2) &&
1814 supportsCPUFallback;
1815 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
1816 if (preferableTarget == DNN_TARGET_MYRIAD)
1818 for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
1820 customizable = ld.inputBlobs[i]->size[0] == 1;
1824 // TODO: fix these workarounds
1825 if (preferableTarget == DNN_TARGET_MYRIAD ||
1826 preferableTarget == DNN_TARGET_OPENCL ||
1827 preferableTarget == DNN_TARGET_OPENCL_FP16)
1828 customizable &= ld.type != "Concat";
1830 if (preferableTarget == DNN_TARGET_OPENCL ||
1831 preferableTarget == DNN_TARGET_OPENCL_FP16)
1832 customizable &= ld.type != "Power";
1834 if (preferableTarget == DNN_TARGET_OPENCL)
1835 customizable &= ld.type != "Eltwise";
1839 addInfEngineNetOutputs(ld);
1840 net = Ptr<InfEngineBackendNet>();
1841 netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
1842 layer->preferableTarget = DNN_TARGET_CPU;
1846 ld.skip = true; // Initially skip all Inference Engine supported layers.
1848 // Create a new network if one of inputs from different Inference Engine graph.
1849 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1851 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1852 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1853 if (!inpNode.empty())
1855 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1856 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1857 if (ieInpNode->net != net)
1859 net = Ptr<InfEngineBackendNet>();
1860 netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
1866 Ptr<BackendNode> node;
1871 bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
1872 ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
1874 node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
1875 ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
1879 net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
1883 if (layer->supportBackend(preferableBackend))
1884 node = layer->initInfEngine(ld.inputBlobsWrappers);
1887 node = Ptr<BackendNode>(new InfEngineBackendNode(
1888 ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
1891 else if (node.empty())
1894 CV_Assert(!node.empty());
1895 ld.backendNodes[preferableBackend] = node;
1897 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1898 CV_Assert(!ieNode.empty());
1901 for (const auto& pin : blobsToKeep_)
1903 if (pin.lid == ld.id)
1905 ieNode->net->addOutput(ieNode->layer.getName());
1910 // Convert weights in FP16 for specific targets.
1911 if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1912 preferableTarget == DNN_TARGET_MYRIAD ||
1913 preferableTarget == DNN_TARGET_FPGA) && !fused)
1915 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
1916 for (const std::string& name : {"weights", "biases"})
1918 auto it = ieNode->layer.getParameters().find(name);
1919 if (it != ieNode->layer.getParameters().end())
1921 InferenceEngine::Blob::Ptr bp = it->second.as<InferenceEngine::Blob::Ptr>();
1922 it->second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(bp));
1926 auto& blobs = ieNode->layer.getConstantData();
1929 // In case of non weightable layer we have to specify
1930 // it's precision adding dummy blob.
1931 auto blob = InferenceEngine::make_shared_blob<int16_t>(
1932 InferenceEngine::Precision::FP16,
1933 InferenceEngine::Layout::C, {1});
1939 for (auto& it : blobs)
1940 it.second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(it.second));
1946 net->addLayer(ieNode->layer);
1948 net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName());
1949 net->addBlobs(ld.inputBlobsWrappers);
1950 net->addBlobs(ld.outputBlobsWrappers);
1951 addInfEngineNetOutputs(ld);
1954 // Initialize all networks.
1955 for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
1957 LayerData &ld = it->second;
1958 if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
1961 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1965 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1969 CV_Assert(!ieNode->net.empty());
1971 if (!ieNode->net->isInitialized())
1973 ieNode->net->init((Target)preferableTarget);
1978 #endif // HAVE_DNN_IE_NN_BUILDER_2019
1981 #ifdef HAVE_DNN_NGRAPH
1982 void addNgraphOutputs(LayerData &ld)
1984 CV_TRACE_FUNCTION();
1986 Ptr<InfEngineNgraphNet> layerNet;
1987 auto it = ld.backendNodes.find(preferableBackend);
1988 if (it != ld.backendNodes.end())
1990 Ptr<BackendNode> node = it->second;
1993 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
1994 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1995 layerNet = ieNode->net;
1999 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2001 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2002 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2003 if (!inpNode.empty())
2005 Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2006 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
2007 if (layerNet != ieInpNode->net)
2009 ieInpNode->net->addOutput(ieInpNode->node->get_friendly_name());
2010 ieInpNode->net->setUnconnectedNodes(ieInpNode);
2016 void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
2018 CV_TRACE_FUNCTION();
2019 CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine());
2021 MapIdToLayerData::iterator it;
2022 Ptr<InfEngineNgraphNet> net;
2024 for (it = layers.begin(); it != layers.end(); ++it)
2026 LayerData &ld = it->second;
2029 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
2030 (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
2031 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2033 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2034 std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
2035 outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName;
2036 dataPtr->setName(outputName);
2041 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2043 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2044 std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name;
2045 dataPtr->setName(outputName);
2050 if (skipInfEngineInit)
2052 Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
2053 CV_Assert(!node.empty());
2055 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2056 CV_Assert(!ieNode.empty());
2057 ieNode->net->reset();
2059 for (it = layers.begin(); it != layers.end(); ++it)
2061 LayerData &ld = it->second;
2064 for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
2066 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.inputBlobsWrappers[i]);
2067 dataPtr->setName(netInputLayer->outNames[i]);
2072 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2074 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2075 dataPtr->setName(ld.name);
2078 ieNode->net->addBlobs(ld.inputBlobsWrappers);
2079 ieNode->net->addBlobs(ld.outputBlobsWrappers);
2082 layers[lastLayerId].skip = false;
2083 ieNode->net->init((Target)preferableTarget);
2087 bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU ||
2088 BackendRegistry::checkIETarget(DNN_TARGET_CPU);
2090 // Build Inference Engine networks from sets of layers that support this
2091 // backend. Split a whole model on several Inference Engine networks if
2092 // some of layers are not implemented.
2093 for (it = layers.begin(); it != layers.end(); ++it)
2095 LayerData &ld = it->second;
2097 if (ld.id == 0 && ld.skip)
2100 bool fused = ld.skip;
2101 Ptr<Layer> layer = ld.layerInstance;
2102 if (!fused && !layer->supportBackend(preferableBackend))
2104 bool customizable = ld.id != 0 && supportsCPUFallback;
2106 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
2107 if (preferableTarget == DNN_TARGET_MYRIAD)
2109 for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
2111 customizable = ld.inputBlobs[i]->size[0] == 1;
2115 // TODO: fix these workarounds
2116 if (preferableTarget == DNN_TARGET_MYRIAD ||
2117 preferableTarget == DNN_TARGET_OPENCL ||
2118 preferableTarget == DNN_TARGET_OPENCL_FP16)
2119 customizable &= ld.type != "Concat";
2121 if (preferableTarget == DNN_TARGET_OPENCL ||
2122 preferableTarget == DNN_TARGET_OPENCL_FP16)
2123 customizable &= ld.type != "Power";
2125 if (preferableTarget == DNN_TARGET_OPENCL)
2126 customizable &= ld.type != "Eltwise";
2130 addNgraphOutputs(ld);
2131 net = Ptr<InfEngineNgraphNet>();
2132 layer->preferableTarget = DNN_TARGET_CPU;
2134 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2136 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2137 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2138 if (!inpNode.empty()) {
2139 Ptr<InfEngineNgraphNode> ieNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2140 CV_Assert(!ieNode.empty());
2141 ieNode->net->setUnconnectedNodes(ieNode);
2147 ld.skip = true; // Initially skip all Inference Engine supported layers.
2149 // Create a new network if one of inputs from different Inference Engine graph.
2150 std::vector<Ptr<BackendNode>> inputNodes;
2151 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2153 // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois)
2154 if (inputNodes.size() == ld.inputBlobsId.size()) {
2157 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2158 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2159 if (!inpNode.empty())
2161 Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2162 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
2163 if (ieInpNode->net == net && !fused) {
2164 inputNodes.push_back(inpNode);
2170 net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
2174 std::vector<std::string> inputNames;
2175 std::vector<cv::Mat> inputs;
2177 auto curr_pos = inpLd.consumers.begin();
2178 auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; };
2179 auto cons = curr_pos;
2180 while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) !=
2181 inpLd.consumers.end()) {
2182 int cons_inp = cons->oid;
2183 Ptr<NgraphBackendWrapper> inpWrapper = inpLd.outputBlobsWrappers[cons_inp].
2184 dynamicCast<NgraphBackendWrapper>();
2185 CV_Assert(!inpWrapper.empty());
2186 auto iter = std::find(inputNames.begin(), inputNames.end(),
2187 inpWrapper->dataPtr->getName());
2188 if (iter == inputNames.end()) {
2189 inputNames.push_back(inpWrapper->dataPtr->getName());
2190 inputs.push_back(inpLd.outputBlobs[cons_inp]);
2192 curr_pos = cons + 1;
2195 auto inps = net->setInputs(inputs, inputNames);
2196 for (auto& inp : inps) {
2197 inputNodes.emplace_back(Ptr<BackendNode>(new InfEngineNgraphNode(inp)));
2202 Ptr<BackendNode> node;
2207 bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
2208 ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
2210 node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
2211 ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
2215 net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
2220 CV_Assert(ld.inputBlobsId.size() == inputNodes.size());
2221 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2223 int lid = ld.inputBlobsId[i].lid;
2224 int oid = ld.inputBlobsId[i].oid;
2225 if (oid == 0 || lid == 0)
2228 auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
2229 CV_Assert(oid < ieInpNode->node->get_output_size());
2230 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
2231 inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node));
2232 #elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3)
2233 inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid)));
2235 inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false)));
2239 if (layer->supportBackend(preferableBackend))
2241 node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes);
2242 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2244 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2245 node.dynamicCast<InfEngineNgraphNode>()->setName(dataPtr->getName());
2250 node = Ptr<BackendNode>(new InfEngineNgraphNode(inputNodes,
2251 ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
2254 else if (node.empty())
2257 ld.backendNodes[preferableBackend] = node;
2259 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2260 CV_Assert(!ieNode.empty());
2263 if (ld.consumers.empty()) {
2264 // TF EAST_text_detection
2265 ieNode->net->setUnconnectedNodes(ieNode);
2267 for (const auto& pin : blobsToKeep_)
2269 if (pin.lid == ld.id)
2271 ieNode->net->addOutput(ieNode->node->get_friendly_name());
2275 ieNode->net->setNodePtr(&ieNode->node);
2277 net->addBlobs(ld.inputBlobsWrappers);
2278 net->addBlobs(ld.outputBlobsWrappers);
2279 addNgraphOutputs(ld);
2282 // Initialize all networks.
2283 for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
2285 LayerData &ld = it->second;
2286 auto iter = ld.backendNodes.find(preferableBackend);
2287 if (iter == ld.backendNodes.end())
2290 Ptr<BackendNode>& node = iter->second;
2294 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2298 CV_Assert(!ieNode->net.empty());
2300 if (!ieNode->net->isInitialized())
2302 ieNode->net->setUnconnectedNodes(ieNode);
2303 ieNode->net->createNet((Target)preferableTarget);
2308 #endif // HAVE_DNN_NGRAPH
2310 void initVkComBackend()
2312 CV_TRACE_FUNCTION();
2313 CV_Assert(preferableBackend == DNN_BACKEND_VKCOM);
2318 MapIdToLayerData::iterator it = layers.begin();
2319 for (; it != layers.end(); it++)
2321 LayerData &ld = it->second;
2322 Ptr<Layer> layer = ld.layerInstance;
2323 if (!layer->supportBackend(preferableBackend))
2332 ld.backendNodes[DNN_BACKEND_VKCOM] =
2333 layer->initVkCom(ld.inputBlobsWrappers);
2335 catch (const cv::Exception& e)
2337 CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what());
2338 ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr<BackendNode>();
2344 void initCUDABackend(const std::vector<LayerPin>& blobsToKeep_)
2346 CV_Assert(haveCUDA());
2347 CV_Assert(preferableBackend == DNN_BACKEND_CUDA);
2350 if (cuda4dnn::getDeviceCount() <= 0)
2351 CV_Error(Error::StsError, "No CUDA capable device found.");
2353 if (cuda4dnn::getDevice() < 0)
2354 CV_Error(Error::StsError, "No CUDA capable device selected.");
2356 if (!cuda4dnn::isDeviceCompatible())
2357 CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration.");
2359 if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16())
2360 CV_Error(Error::StsError, "The selected CUDA device does not support FP16 operations.");
2364 cuda4dnn::csl::CSLContext context;
2365 context.stream = cuda4dnn::csl::Stream(true);
2366 context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream);
2367 context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream);
2369 auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers
2370 cudaInfo = std::unique_ptr<CudaInfo_t>(new CudaInfo_t(std::move(context), std::move(d2h_stream)));
2371 cuda4dnn::checkVersions();
2374 cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any
2376 for (auto& layer : layers)
2378 auto& ld = layer.second;
2381 for (auto& wrapper : ld.inputBlobsWrappers)
2383 auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
2384 cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
2388 for (auto& wrapper : ld.outputBlobsWrappers)
2390 auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
2391 cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
2395 for (auto& layer : layers)
2397 auto& ld = layer.second;
2398 auto& layerInstance = ld.layerInstance;
2400 if (!layerInstance->supportBackend(DNN_BACKEND_CUDA))
2402 std::ostringstream os;
2403 os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name
2404 << "\" of type " << ld.type << '\n';
2405 CV_LOG_INFO(NULL, os.str().c_str());
2409 /* we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` */
2410 auto context = cudaInfo->context;
2411 auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers);
2412 ld.backendNodes[DNN_BACKEND_CUDA] = node;
2414 auto cudaNode = node.dynamicCast<CUDABackendNode>();
2415 cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes());
2418 if (blobsToKeep_.size() > 1)
2420 for (const auto& pin : blobsToKeep_)
2422 LayerData& ld = layers[pin.lid];
2423 ld.cudaD2HBackgroundTransfers.push_back(pin.oid);
2429 void allocateLayer(int lid, const LayersShapesMap& layersShapes)
2431 CV_TRACE_FUNCTION();
2433 LayerData &ld = layers[lid];
2439 size_t ninputs = ld.inputBlobsId.size();
2441 printf("layer %s:", ld.name.c_str());
2442 for (size_t i = 0; i < ninputs; i++)
2444 int inp_lid = ld.inputBlobsId[i].lid;
2445 LayerData &inp_ld = layers[inp_lid];
2446 int inp_outputs = (int)inp_ld.outputBlobs.size();
2447 std::cout << " " << inp_ld.name << "(" << inp_outputs;
2449 for( int j = 0; j < inp_outputs; j++ )
2451 std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
2458 //determine parent layers
2459 for (size_t i = 0; i < ninputs; i++)
2460 ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
2463 for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
2464 allocateLayer(*i, layersShapes);
2467 if (ld.id == 0) // DataLayer
2469 ninputs = netInputLayer->inputsData.size();
2470 ld.inputBlobsWrappers.resize(ninputs);
2471 for (size_t i = 0; i < ninputs; i++)
2472 ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
2476 ld.inputBlobs.resize(ninputs);
2477 ld.inputBlobsWrappers.resize(ninputs);
2478 for (size_t i = 0; i < ninputs; i++)
2480 LayerPin from = ld.inputBlobsId[i];
2481 CV_Assert(from.valid());
2482 CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
2483 ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
2484 ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
2488 LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
2490 CV_Assert(layerShapesIt != layersShapes.end());
2492 std::vector<LayerPin> pinsForInternalBlobs;
2493 blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
2494 preferableBackend == DNN_BACKEND_OPENCV &&
2495 preferableTarget == DNN_TARGET_OPENCL_FP16);
2496 ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
2497 for (int i = 0; i < ld.outputBlobs.size(); ++i)
2498 ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
2500 /* CUDA backend has its own system for internal blobs; we don't need these */
2501 ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA) ? 0 : ld.internals.size());
2502 for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i)
2503 ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
2505 Ptr<Layer> layerPtr = ld.getLayerInstance();
2507 std::vector<Mat> inps(ld.inputBlobs.size());
2508 for (int i = 0; i < ld.inputBlobs.size(); ++i)
2510 inps[i] = *ld.inputBlobs[i];
2512 layerPtr->finalize(inps, ld.outputBlobs);
2513 layerPtr->preferableTarget = preferableTarget;
2515 std::cout << "\toutputs:";
2516 size_t noutputs = ld.outputBlobs.size();
2517 for (size_t j = 0; j < noutputs; j++)
2519 std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
2525 // After allocation of layer, we decrease counters to it's input blobs.
2526 blobManager.releaseReferences(ld.inputBlobsId);
2527 blobManager.releaseReferences(pinsForInternalBlobs);
2533 #define printf_(args) printf args
2535 #define printf_(args)
2538 void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
2540 CV_TRACE_FUNCTION();
2542 if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV &&
2543 preferableBackend != DNN_BACKEND_CUDA &&
2544 preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
2545 preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
2548 // scan through all the layers. If there is convolution layer followed by the activation layer,
2549 // we try to embed this activation into the convolution and disable separate execution of the activation
2550 std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
2551 blobsToKeep_.end());
2552 MapIdToLayerData::iterator it;
2553 for (it = layers.begin(); it != layers.end(); it++)
2555 int lid = it->first;
2556 LayerData& ld = layers[lid];
2559 printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
2562 printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
2564 // the optimization #1. try to fuse batch norm, scaling and/or activation layers
2565 // with the current layer if they follow it. Normally, the are fused with the convolution layer,
2566 // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
2567 // some other layers.
2568 Ptr<Layer>& currLayer = ld.layerInstance;
2569 if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
2571 LayerData* nextData = &layers[ld.consumers[0].lid];
2572 LayerPin lpNext(ld.consumers[0].lid, 0);
2575 /* we use `tryFuse` member of convolution layer to fuse eltwise later
2576 * it's not intended to be fused here; hence, we stop when we encounter eltwise
2578 if (preferableBackend == DNN_BACKEND_CUDA && ld.type == "Convolution" && nextData->type == "Eltwise")
2580 Ptr<Layer> nextLayer = nextData->layerInstance;
2581 if (currLayer->tryFuse(nextLayer))
2583 printf_(("\tfused with %s\n", nextLayer->name.c_str()));
2584 nextData->skip = true;
2585 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
2586 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
2587 if (nextData->consumers.size() == 1)
2589 int nextLayerId = nextData->consumers[0].lid;
2590 nextData = &layers[nextLayerId];
2591 lpNext = LayerPin(nextLayerId, 0);
2603 if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA)
2604 continue; // Go to the next layer.
2606 // TODO: OpenCL target support more fusion styles.
2607 if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
2608 (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
2609 ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
2610 ld.layerInstance->type != "Concat")) )
2613 if (preferableBackend == DNN_BACKEND_CUDA && IS_DNN_CUDA_TARGET(preferableTarget)
2614 && ld.layerInstance->type != "Convolution"
2615 && ld.layerInstance->type != "Concat")
2620 // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
2621 if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
2622 nextData->type != "ReLU" &&
2623 nextData->type != "ChannelsPReLU" &&
2624 nextData->type != "ReLU6" &&
2625 nextData->type != "TanH" &&
2626 nextData->type != "Power")
2629 Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2630 if (nextActivLayer.empty())
2633 if (currLayer->setActivation(nextActivLayer))
2635 printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
2636 nextData->skip = true;
2637 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
2638 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
2639 if (nextData->consumers.size() == 1)
2641 int nextLayerId = nextData->consumers[0].lid;
2642 nextData = &layers[nextLayerId];
2643 lpNext = LayerPin(nextLayerId, 0);
2655 // OpenCL: fuse convolution layer followed by eltwise + relu
2656 // CUDA: fuse convolution layer followed by eltwise (and optional activation)
2658 (IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) &&
2659 ld.layerInstance->type == "Convolution"
2660 ) // semantic of 'if'
2662 Ptr<EltwiseLayer> nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
2663 if (nextEltwiseLayer.empty())
2667 // CUDA backend supports fusion with eltwise sum (without variable channels)
2668 // `nextEltwiseLayer` is reset if eltwise layer doesn't have a compatible configuration for fusion
2669 if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty())
2671 // we create a temporary backend node for eltwise layer to obtain the eltwise configuration
2672 cuda4dnn::csl::CSLContext context; // assume that initCUDA and EltwiseOp do not use the context during init
2673 const auto node = nextData->layerInstance->initCUDA(&context, nextData->inputBlobsWrappers, nextData->outputBlobsWrappers);
2674 const auto eltwiseNode = node.dynamicCast<cuda4dnn::EltwiseOpBase>();
2675 // CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used.
2676 // Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors.
2677 if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty())
2678 nextEltwiseLayer = Ptr<EltwiseLayer>();
2682 if (pinsToKeep.count(lpNext) != 0)
2684 if (nextData->inputBlobsId.size() != 2)
2687 if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum")
2689 if (nextData->params.has("coeff"))
2691 DictValue paramCoeff = nextData->params.get("coeff");
2692 int n = paramCoeff.size();
2693 bool isCoeffOneOne = (n == 2);
2694 for (int i = 0; isCoeffOneOne && i < n; i++)
2696 float c = paramCoeff.get<float>(i);
2697 isCoeffOneOne &= (c == 1.0f);
2701 CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only");
2708 CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation"));
2713 LayerData *eltwiseData = nextData;
2715 // Eltwise layer has two inputs. We need to determine which
2716 // is a base convolution layer and which could be used as it's bias.
2717 LayerData* biasLayerData = 0;
2718 for (int i = 0; i < 2; ++i)
2720 LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
2721 CV_Assert(downLayerData);
2722 while (downLayerData->skip)
2724 if (downLayerData->inputBlobsId.size() == 1)
2725 downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
2732 if (downLayerData && ld.id == downLayerData->id)
2734 biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
2738 CV_Assert(biasLayerData);
2740 // fuse eltwise + activation layer
2741 // bias must already be computed to fuse => bias layer must appear before convolution
2742 if (biasLayerData->id < ld.id)
2744 /* we can fuse activation if:
2745 * => activation layer that follows is the only consumer of eltwise output
2746 * => activation layer does not process multiple inputs
2747 * => we do not require to keep the output of eltwise
2749 Ptr<ActivationLayer> nextFusabeleActivLayer;
2750 if (eltwiseData->consumers.size() == 1 && pinsToKeep.count(lpNext) == 0)
2752 nextData = &layers[eltwiseData->consumers[0].lid];
2753 lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
2754 CV_Assert(nextData);
2755 if (nextData->outputBlobs.size() == 1)
2756 nextFusabeleActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2760 // OCL backend cannot fuse in this case but the CUDA backend can continue with just eltwise
2764 // the requirements of OCV OpenCL backend and CUDA backend are different
2765 // we need to check them separately; hence, the fuse variables
2766 bool fuse_eltwise = false, fuse_activation = false;
2768 Ptr<PowerLayer> activ_power;
2769 if (IS_DNN_OPENCL_TARGET(preferableTarget) && !nextFusabeleActivLayer.empty() &&
2771 (!nextData->type.compare("ReLU") ||
2772 !nextData->type.compare("ChannelsPReLU") ||
2773 (!nextData->type.compare("Power") && (activ_power = nextFusabeleActivLayer.dynamicCast<PowerLayer>()) && activ_power->scale == 1.0f)
2775 currLayer->setActivation(nextFusabeleActivLayer))
2777 fuse_eltwise = true;
2778 fuse_activation = true;
2781 if (IS_DNN_CUDA_TARGET(preferableTarget))
2783 /* supported fusion options:
2784 * => convolution + eltwise
2785 * => activation(convolution) + eltwise
2786 * > convolution + activation would have been fused already; we have to fuse eltwise
2787 * => activation(convolution + eltwise)
2788 * > fuse eltwise and then activation
2790 auto layer = nextEltwiseLayer.staticCast<Layer>();
2791 if (currLayer->tryFuse(layer))
2793 fuse_eltwise = true; /* eltwise was successfully fused */
2794 if (!nextFusabeleActivLayer.empty() && nextData)
2796 if ((!nextData->type.compare("ReLU") ||
2797 !nextData->type.compare("ReLU6") ||
2798 !nextData->type.compare("Power") ||
2799 !nextData->type.compare("TanH") ||
2800 !nextData->type.compare("Sigmoid") ||
2801 !nextData->type.compare("Swish") ||
2802 !nextData->type.compare("Mish")) &&
2803 currLayer->setActivation(nextFusabeleActivLayer))
2805 // activation was fused
2806 fuse_activation = true;
2812 CV_Assert(!fuse_activation || fuse_eltwise); /* cannot fuse activation without eltwise */
2813 if(fuse_eltwise && fuse_activation)
2815 CV_Assert(nextData);
2816 CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
2817 ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
2818 printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
2819 printf_(("\tfused with %s\n", nextFusabeleActivLayer->name.c_str()));
2820 eltwiseData->skip = true;
2821 nextData->skip = true;
2822 // This optimization for cases like
2828 // This way all the element-wise computations
2829 // (i.e. some_layer+conv or some_layer*conv)
2830 // would be done at [conv] layer. So we need to
2831 // replace [conv]'s output blob to [eltwise]'s one
2832 // considering that [activ] is an in-place layer.
2833 // Also we need to move all the consumers' references.
2834 // To prevent memory collisions (i.e. when input of
2835 // [conv] and output of [eltwise] is the same blob)
2836 // we allocate a new blob.
2837 CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
2838 ld.outputBlobs[0] = ld.outputBlobs[0].clone();
2839 ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
2841 eltwiseData->outputBlobs = ld.outputBlobs;
2842 nextData->outputBlobs = ld.outputBlobs;
2843 eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
2844 nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
2846 // Move references of [activ] layer consumers to the newly allocated blob.
2847 for (int i = 0; i < nextData->consumers.size(); ++i)
2849 LayerData& consumer = layers[nextData->consumers[i].lid];
2850 for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
2852 if (consumer.inputBlobsId[j].lid == lpNext.lid)
2854 consumer.inputBlobs[j] = &ld.outputBlobs[0];
2855 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
2861 else if (fuse_eltwise) // conv + eltwise (note: conv could have fused activations before eltwise)
2863 CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget));
2864 CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
2865 ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
2866 printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
2867 eltwiseData->skip = true;
2868 // This optimization is for cases like
2869 // some_layer conv (maybe fused with activ)
2873 // This way all the element-wise computations
2874 // (i.e. some_layer+conv or some_layer*conv)
2875 // would be done at [conv] layer. So we need to
2876 // replace [conv]'s output blob to [eltwise]'s one.
2877 // Also we need to move all the consumers' references.
2878 // To prevent memory collisions (i.e. when input of
2879 // [conv] and output of [eltwise] is the same blob)
2880 // we allocate a new blob.
2881 CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
2882 ld.outputBlobs[0] = ld.outputBlobs[0].clone();
2883 ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
2885 eltwiseData->outputBlobs = ld.outputBlobs;
2886 eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
2888 // Move references of [eltwise] layer consumers to the newly allocated blob.
2889 for (int i = 0; i < eltwiseData->consumers.size(); ++i)
2891 LayerData& consumer = layers[eltwiseData->consumers[i].lid];
2892 for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
2894 if (consumer.inputBlobsId[j].lid == eltwiseData->id)
2896 consumer.inputBlobs[j] = &ld.outputBlobs[0];
2897 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
2911 if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA)
2912 continue; // Go to the next layer.
2914 // the optimization #2. if there is concat layer that concatenates channels
2915 // from the inputs together (i.e. axis == 1) then we make the inputs of
2916 // the concat layer to write to the concatenation output buffer
2917 // (and so we eliminate the concatenation layer, because the channels
2918 // are concatenated implicitly).
2919 Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
2920 if( !concatLayer.empty() && !concatLayer->padding && ld.outputBlobs.size() == 1 )
2922 Mat& output = ld.outputBlobs[0];
2925 if (!ld.outputBlobsWrappers.empty() &&
2926 (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
2928 size_t i, ninputs = ld.inputBlobsId.size();
2929 bool conv_layer = true;
2930 for( i = 0; i < ninputs; i++ )
2932 LayerPin pin = ld.inputBlobsId[i];
2933 LayerData* inp_i_data = &layers[pin.lid];
2934 while(inp_i_data->skip &&
2935 inp_i_data->inputBlobsId.size() == 1 &&
2936 inp_i_data->consumers.size() == 1)
2938 pin = inp_i_data->inputBlobsId[0];
2939 inp_i_data = &layers[pin.lid];
2941 conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
2945 std::vector<UMat> umat_outputBlobs;
2946 umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2947 umat_output = umat_outputBlobs[0];
2951 // TODO: in general, this optimization can always be done, but
2952 // many layers currently check that the input/output blobs are
2953 // continuous arrays. Unfortunately, this is not true when
2954 // the concatenation optimization is applied with batch_size > 1.
2955 // so, for now, we only apply this optimization in the most popular
2956 // case batch_size == 1.
2957 int axis = clamp(concatLayer->axis, output.dims);
2958 if( output.total(0, axis) == 1 )
2960 size_t i, ninputs = ld.inputBlobsId.size();
2961 std::vector<LayerPin> realinputs(ninputs);
2962 for( i = 0; i < ninputs; i++ )
2964 LayerPin pin = ld.inputBlobsId[i];
2965 LayerData* inp_i_data = &layers[pin.lid];
2966 while(inp_i_data->skip &&
2967 inp_i_data->inputBlobsId.size() == 1 &&
2968 inp_i_data->consumers.size() == 1)
2970 pin = inp_i_data->inputBlobsId[0];
2971 inp_i_data = &layers[pin.lid];
2973 printf_(("\treal input for %s is %s\n",
2974 layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
2975 inp_i_data->getLayerInstance()->name.c_str()));
2977 if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
2980 if (preferableBackend == DNN_BACKEND_CUDA &&
2981 (inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false ||
2982 (inp_i_data->layerInstance->type != "Convolution" &&
2983 inp_i_data->layerInstance->type != "Pooling" &&
2984 inp_i_data->layerInstance->type != "Resize" &&
2985 inp_i_data->layerInstance->type != "Flatten" &&
2986 inp_i_data->layerInstance->type != "Permute" &&
2987 inp_i_data->layerInstance->type != "Reorg" &&
2988 inp_i_data->layerInstance->type != "Eltwise" &&
2989 inp_i_data->layerInstance.dynamicCast<ActivationLayer>().empty())))
2994 realinputs[i] = pin;
2999 // Allocate new memory to prevent collisions during memory
3000 // reusing (see https://github.com/opencv/opencv/pull/10456).
3001 output = output.clone();
3003 if (preferableBackend == DNN_BACKEND_OPENCV &&
3004 IS_DNN_OPENCL_TARGET(preferableTarget))
3006 std::vector<UMat> umats(1);
3007 umat_output = umat_output.clone();
3008 umats[0] = umat_output;
3009 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
3014 if (preferableBackend == DNN_BACKEND_CUDA)
3015 ld.outputBlobsWrappers[0] = wrap(output);
3017 std::vector<Range> chrange(output.dims, Range::all());
3019 for( i = 0; i < ninputs; i++ )
3021 LayerPin pin = realinputs[i];
3022 LayerData* inp_i_data = &layers[pin.lid];
3023 int channels_i = ld.inputBlobs[i]->size[axis];
3024 chrange[axis] = Range(ofs, ofs + channels_i);
3025 printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
3026 pin.oid, ofs, ofs + channels_i));
3028 Mat output_slice = output(chrange);
3029 Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
3030 CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
3031 Mat* oldPtr = &curr_output;
3032 curr_output = output_slice;
3034 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
3036 std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
3037 umats[pin.oid] = umat_output(chrange);
3038 OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
3042 if (preferableBackend == DNN_BACKEND_CUDA)
3044 auto cuda_wrapper = wrap(output).dynamicCast<CUDABackendWrapper>();
3045 auto offset = chrange[axis].start * output_slice.total(axis + 1, output.dims);
3046 auto new_shape = shape(output_slice);
3047 cuda_wrapper->update(new_shape, offset);
3048 inp_i_data->outputBlobsWrappers[pin.oid] = cuda_wrapper.staticCast<BackendWrapper>();
3051 // Layers that refer old input Mat will refer to the
3052 // new data but the same Mat object.
3053 CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
3057 if (preferableBackend == DNN_BACKEND_CUDA)
3059 for (int i = 0; i < ld.consumers.size(); i++)
3061 LayerData& consumer = layers[ld.consumers[i].lid];
3062 for (int j = 0; j < consumer.inputBlobsId.size(); j++)
3064 if (consumer.inputBlobsId[j].lid == ld.id)
3066 CV_Assert(consumer.inputBlobs[j]->data == ld.outputBlobs[0].data);
3067 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
3075 printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
3082 void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
3084 CV_TRACE_FUNCTION();
3086 MapIdToLayerData::iterator it;
3087 for (it = layers.begin(); it != layers.end(); it++)
3088 it->second.flag = 0;
3090 CV_Assert(!layers[0].outputBlobs.empty());
3091 ShapesVec inputShapes;
3092 for(int i = 0; i < layers[0].outputBlobs.size(); i++)
3094 Mat& inp = layers[0].outputBlobs[i];
3095 CV_Assert(inp.total());
3096 if (preferableBackend == DNN_BACKEND_OPENCV &&
3097 preferableTarget == DNN_TARGET_OPENCL_FP16)
3099 layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
3101 inputShapes.push_back(shape(inp));
3103 LayersShapesMap layersShapes;
3104 getLayersShapes(inputShapes, layersShapes);
3106 blobManager.reset();
3107 backendWrappers.clear();
3109 for(auto& layer : layers)
3111 auto& ld = layer.second;
3112 ld.inputBlobsWrappers.clear();
3113 ld.outputBlobsWrappers.clear();
3114 ld.internalBlobsWrappers.clear();
3117 // Fake references to input blobs.
3118 for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
3119 blobManager.addReference(LayerPin(0, i));
3120 for (it = layers.begin(); it != layers.end(); ++it)
3122 const LayerData& ld = it->second;
3123 blobManager.addReferences(ld.inputBlobsId);
3126 for (int i = 0; i < blobsToKeep_.size(); i++)
3128 blobManager.addReference(blobsToKeep_[i]);
3131 for (it = layers.begin(); it != layers.end(); it++)
3133 int lid = it->first;
3134 allocateLayer(lid, layersShapes);
3137 layersTimings.resize(lastLayerId + 1, 0);
3138 fuseLayers(blobsToKeep_);
3141 void forwardLayer(LayerData &ld)
3143 CV_TRACE_FUNCTION();
3145 Ptr<Layer> layer = ld.layerInstance;
3152 std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
3153 if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
3156 CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
3158 if (!layer->supportBackend(DNN_BACKEND_OPENCV))
3159 CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend",
3160 ld.name.c_str(), ld.type.c_str()));
3163 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
3165 std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
3166 std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
3167 std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
3168 layer->forward(umat_inputBlobs,
3170 umat_internalBlobs);
3171 if (DNN_CHECK_NAN_INF)
3174 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
3176 UMat& u = umat_outputBlobs[i];
3178 if (u.depth() == CV_16S) // FP16
3181 m = u.getMat(ACCESS_READ);
3184 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3185 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3188 else if (!checkRange(m, true, NULL, -1e6, 1e6))
3190 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3191 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3197 for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
3199 UMat& u = umat_inputBlobs[i];
3201 if (u.depth() == CV_16S) // FP16
3204 m = u.getMat(ACCESS_READ);
3205 std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
3206 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3208 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
3210 UMat& u = umat_outputBlobs[i];
3212 if (u.depth() == CV_16S) // FP16
3215 m = u.getMat(ACCESS_READ);
3216 std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
3217 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3219 for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
3221 UMat& u = umat_internalBlobs[i];
3223 if (u.depth() == CV_16S) // FP16
3226 m = u.getMat(ACCESS_READ);
3227 std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
3228 if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
3230 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
3234 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
3239 for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
3241 if (!ld.inputBlobsWrappers[i].empty())
3242 ld.inputBlobsWrappers[i]->copyToHost();
3245 std::vector<Mat> inps(ld.inputBlobs.size());
3246 for (int i = 0; i < ld.inputBlobs.size(); ++i)
3248 inps[i] = *ld.inputBlobs[i];
3250 layer->forward(inps, ld.outputBlobs, ld.internals);
3252 if (DNN_CHECK_NAN_INF)
3255 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
3257 const Mat& m = ld.outputBlobs[i];
3260 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3261 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3264 else if (!checkRange(m, true, NULL, -1e6, 1e6))
3266 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3267 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3273 for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
3275 const Mat* pM = ld.inputBlobs[i];
3278 std::cout << "INPUT " << i << " is NULL" << std::endl;
3282 std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
3283 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3285 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
3287 const Mat& m = ld.outputBlobs[i];
3288 std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
3289 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3291 for (size_t i = 0; i < ld.internals.size(); ++i)
3293 const Mat& m = ld.internals[i];
3294 std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
3295 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3297 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
3302 for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
3304 if (!ld.outputBlobsWrappers[i].empty())
3305 ld.outputBlobsWrappers[i]->setHostDirty();
3311 Ptr<BackendNode> node = it->second;
3312 CV_Assert(!node.empty());
3313 if (preferableBackend == DNN_BACKEND_CUDA)
3315 CV_Assert(haveCUDA());
3318 Ptr<CUDABackendNode> cudaNode = node.dynamicCast<CUDABackendNode>();
3319 CV_Assert(!cudaNode.empty());
3321 cudaNode->forward(ld.inputBlobsWrappers, ld.outputBlobsWrappers, cudaInfo->workspace);
3323 for (auto id : ld.cudaD2HBackgroundTransfers)
3325 auto wrapper = ld.outputBlobsWrappers[id].dynamicCast<CUDABackendWrapper>();
3326 wrapper->copyToHostInBackground();
3330 else if (preferableBackend == DNN_BACKEND_HALIDE)
3332 forwardHalide(ld.outputBlobsWrappers, node);
3334 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
3336 forwardInfEngine(ld.outputBlobsWrappers, node, isAsync);
3338 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
3340 forwardNgraph(ld.outputBlobsWrappers, node, isAsync);
3342 else if (preferableBackend == DNN_BACKEND_VKCOM)
3346 forwardVkCom(ld.outputBlobsWrappers, node);
3348 catch (const cv::Exception& e)
3350 CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what());
3351 it->second = Ptr<BackendNode>();
3357 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
3362 int64 t = tm.getTimeTicks();
3363 layersTimings[ld.id] = (t > 0) ? t : t + 1; // zero for skipped layers only
3367 layersTimings[ld.id] = 0;
3373 void forwardToLayer(LayerData &ld, bool clearFlags = true)
3375 CV_TRACE_FUNCTION();
3379 MapIdToLayerData::iterator it;
3380 for (it = layers.begin(); it != layers.end(); it++)
3381 it->second.flag = 0;
3384 //already was forwarded
3389 MapIdToLayerData::iterator it;
3390 for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
3392 LayerData &ld = it->second;
3402 if (preferableBackend == DNN_BACKEND_CUDA)
3403 cudaInfo->context.stream.synchronize();
3407 void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
3409 std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
3411 if (id == 0 && inOutShapes[id].in[0].empty())
3413 if (!layers[0].outputBlobs.empty())
3416 for (int i = 0; i < layers[0].outputBlobs.size(); i++)
3418 Mat& inp = layers[0].outputBlobs[i];
3419 CV_Assert(inp.total());
3420 shapes.push_back(shape(inp));
3422 inOutShapes[0].in = shapes;
3426 const std::vector<MatShape>& inputShapes = netInputLayer->shapes;
3428 for (size_t i = 0; i < inputShapes.size(); i++)
3430 if (!inputShapes[i].empty())
3438 inOutShapes[0].out.clear();
3443 inOutShapes[0].in = inputShapes;
3448 if (inOutShapes[id].in.empty())
3450 for(int i = 0; i < inputLayerIds.size(); i++)
3452 int layerId = inputLayerIds[i].lid;
3453 LayersShapesMap::iterator it =
3454 inOutShapes.find(layerId);
3455 if(it == inOutShapes.end() ||
3456 it->second.out.empty())
3458 getLayerShapesRecursively(layerId, inOutShapes);
3460 const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
3461 inOutShapes[id].in.push_back(shape);
3464 const ShapesVec& is = inOutShapes[id].in;
3465 ShapesVec& os = inOutShapes[id].out;
3466 ShapesVec& ints = inOutShapes[id].internal;
3467 int requiredOutputs = layers[id].requiredOutputs.size();
3468 Ptr<Layer> l = layers[id].getLayerInstance();
3470 bool layerSupportInPlace = false;
3473 layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints);
3475 catch (const cv::Exception& e)
3477 CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." <<
3478 " inputs=" << is.size() <<
3479 " outputs=" << os.size() << "/" << requiredOutputs <<
3480 " blobs=" << l->blobs.size());
3481 for (size_t i = 0; i < is.size(); ++i)
3483 CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i]));
3485 for (size_t i = 0; i < os.size(); ++i)
3487 CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i]));
3489 for (size_t i = 0; i < l->blobs.size(); ++i)
3491 CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i])));
3493 CV_LOG_ERROR(NULL, "Exception message: " << e.what());
3496 inOutShapes[id].supportInPlace = layerSupportInPlace;
3498 for (int i = 0; i < ints.size(); i++)
3499 CV_Assert(total(ints[i]) > 0);
3501 for (int i = 0; i < os.size(); i++)
3502 CV_Assert(total(os[i]) > 0);
3505 void getLayersShapes(const ShapesVec& netInputShapes,
3506 LayersShapesMap& inOutShapes)
3508 inOutShapes.clear();
3510 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
3511 for (MapIdToLayerData::iterator it = layers.begin();
3512 it != layers.end(); it++)
3514 getLayerShapesRecursively(it->first, inOutShapes);
3518 void getLayerShapes(const ShapesVec& netInputShapes,
3520 LayerShapes& shapes)
3522 LayersShapesMap inOutShapes;
3523 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
3524 getLayerShapesRecursively(layerId, inOutShapes);
3525 shapes = inOutShapes[layerId];
3528 LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
3530 return *std::max_element(pins.begin(), pins.end());
3533 Mat getBlob(const LayerPin& pin)
3535 CV_TRACE_FUNCTION();
3538 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
3540 LayerData &ld = layers[pin.lid];
3541 if ((size_t)pin.oid >= ld.outputBlobs.size())
3543 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, "
3544 "the #%d was requested", ld.name.c_str(),
3545 ld.outputBlobs.size(), pin.oid));
3547 if (preferableTarget != DNN_TARGET_CPU)
3549 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
3550 // Transfer data to CPU if it's require.
3551 ld.outputBlobsWrappers[pin.oid]->copyToHost();
3554 if (ld.outputBlobs[pin.oid].depth() == CV_16S)
3556 convertFp16(ld.outputBlobs[pin.oid], output_blob);
3560 return ld.outputBlobs[pin.oid];
3563 Mat getBlob(String outputName)
3565 return getBlob(getPinByAlias(outputName));
3569 AsyncArray getBlobAsync(const LayerPin& pin)
3571 CV_TRACE_FUNCTION();
3572 #ifdef HAVE_INF_ENGINE
3574 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
3576 LayerData &ld = layers[pin.lid];
3577 if ((size_t)pin.oid >= ld.outputBlobs.size())
3579 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
3580 "the #%d was requested", ld.name.c_str(),
3581 (int)ld.outputBlobs.size(), (int)pin.oid));
3583 if (preferableTarget != DNN_TARGET_CPU)
3585 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
3586 // Transfer data to CPU if it's require.
3587 ld.outputBlobsWrappers[pin.oid]->copyToHost();
3589 CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
3591 if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) {
3592 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3593 Ptr<InfEngineBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<InfEngineBackendWrapper>();
3594 return std::move(wrapper->futureMat);
3596 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3599 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
3601 #ifdef HAVE_DNN_NGRAPH
3602 Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
3603 return std::move(wrapper->futureMat);
3605 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
3608 #endif // HAVE_INF_ENGINE
3609 CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 backend is required");
3612 AsyncArray getBlobAsync(String outputName)
3614 return getBlobAsync(getPinByAlias(outputName));
3618 #ifdef HAVE_INF_ENGINE
3620 Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
3625 void dumpNetworkToFile()
3627 #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
3628 string dumpFileNameBase = getDumpFileNameBase();
3629 string dumpFileName = dumpFileNameBase + ".dot";
3632 string dumpStr = dump();
3633 std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary);
3636 catch (const std::exception& e)
3638 std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
3639 out << "Exception: " << e.what() << std::endl;
3643 std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
3644 out << "Can't dump: unknown exception" << std::endl;
3650 Net::Net() : impl(new Net::Impl)
3654 #ifdef HAVE_INF_ENGINE
3656 Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
3658 CV_TRACE_FUNCTION();
3660 CV_TRACE_REGION("register_inputs");
3662 std::vector<String> inputsNames;
3663 std::vector<MatShape> inp_shapes;
3664 for (auto& it : ieNet.getInputsInfo())
3666 inputsNames.push_back(it.first);
3667 std::vector<size_t> dims = it.second->getTensorDesc().getDims();
3668 inp_shapes.push_back(std::vector<int>(dims.begin(), dims.end()));
3672 cvNet.setInputsNames(inputsNames);
3674 // set empty input to determine input shapes
3675 for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id)
3677 cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]);
3680 CV_TRACE_REGION_NEXT("backendNode");
3682 Ptr<BackendNode> backendNode;
3683 #ifdef HAVE_DNN_NGRAPH
3684 if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
3686 auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{});
3687 Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
3688 backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
3689 backendNode = backendNodeNGraph;
3694 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3695 Ptr<InfEngineBackendNode> backendNodeNN(new InfEngineBackendNode(InferenceEngine::Builder::Layer("")));
3696 backendNodeNN->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
3697 backendNode = backendNodeNN;
3699 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3703 CV_TRACE_REGION_NEXT("register_outputs");
3705 #ifdef HAVE_DNN_NGRAPH
3706 auto ngraphFunction = ieNet.getFunction();
3707 #if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
3708 std::list< std::shared_ptr<ngraph::Node> > ngraphOperations;
3710 std::vector< std::shared_ptr<ngraph::Node> > ngraphOperations;
3714 ngraphOperations = ngraphFunction->get_ops();
3718 for (auto& it : ieNet.getOutputsInfo())
3720 CV_TRACE_REGION("output");
3721 const auto& outputName = it.first;
3724 int lid = cvNet.addLayer(it.first, "", lp);
3726 LayerData& ld = cvNet.impl->layers[lid];
3728 #ifdef HAVE_DNN_NGRAPH
3729 if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
3731 Ptr<Layer> cvLayer(new NgraphBackendLayer(ieNet));
3732 cvLayer->name = outputName;
3733 cvLayer->type = "_unknown_";
3735 auto process_layer = [&](const std::string& name) -> bool
3739 CV_TRACE_REGION("ngraph_function");
3740 for (const auto& op : ngraphOperations)
3743 if (op->get_friendly_name() == name)
3745 const std::string typeName = op->get_type_info().name;
3746 cvLayer->type = typeName;
3754 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
3755 CV_Error(Error::StsNotImplemented, "This OpenCV version is built with Inference Engine which has dropped IR v7 support");
3757 CV_TRACE_REGION("legacy_cnn_layer");
3760 InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(name.c_str());
3763 cvLayer->type = ieLayer->type;
3766 catch (const std::exception& e)
3769 CV_LOG_DEBUG(NULL, "IE layer extraction failure: '" << name << "' - " << e.what());
3777 bool found = process_layer(outputName);
3780 auto pos = outputName.rfind('.'); // cut port number: ".0"
3781 if (pos != std::string::npos)
3783 std::string layerName = outputName.substr(0, pos);
3784 found = process_layer(layerName);
3788 CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'");
3790 ld.layerInstance = cvLayer;
3791 ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode;
3796 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3797 Ptr<Layer> cvLayer(new InfEngineBackendLayer(ieNet));
3799 InferenceEngine::CNNLayerPtr ieLayer;
3802 ieLayer = ieNet.getLayerByName(outputName.c_str());
3806 auto pos = outputName.rfind('.'); // cut port number: ".0"
3807 if (pos != std::string::npos)
3809 std::string layerName = outputName.substr(0, pos);
3810 ieLayer = ieNet.getLayerByName(layerName.c_str());
3815 cvLayer->name = outputName;
3816 cvLayer->type = ieLayer->type;
3817 ld.layerInstance = cvLayer;
3819 ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019] = backendNode;
3821 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3825 for (int i = 0; i < inputsNames.size(); ++i)
3826 cvNet.connect(0, i, lid, i);
3829 CV_TRACE_REGION_NEXT("finalize");
3831 cvNet.setPreferableBackend(getInferenceEngineBackendTypeParam());
3833 cvNet.impl->skipInfEngineInit = true;
3836 #endif // HAVE_INF_ENGINE
3838 Net Net::readFromModelOptimizer(const String& xml, const String& bin)
3840 CV_TRACE_FUNCTION();
3841 #ifndef HAVE_INF_ENGINE
3842 CV_UNUSED(xml); CV_UNUSED(bin);
3843 CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
3845 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
3846 InferenceEngine::CNNNetReader reader;
3847 reader.ReadNetwork(xml);
3848 reader.ReadWeights(bin);
3850 InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
3852 InferenceEngine::Core& ie = getCore("");
3853 InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
3856 return Impl::createNetworkFromModelOptimizer(ieNet);
3857 #endif // HAVE_INF_ENGINE
3860 Net Net::readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights)
3862 CV_TRACE_FUNCTION();
3863 CV_Assert(!bufferModelConfig.empty());
3864 CV_Assert(!bufferWeights.empty());
3865 return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(),
3866 bufferWeights.data(), bufferWeights.size());
3869 Net Net::readFromModelOptimizer(
3870 const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
3871 const uchar* bufferWeightsPtr, size_t bufferWeightsSize
3874 CV_TRACE_FUNCTION();
3875 #ifndef HAVE_INF_ENGINE
3876 CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr);
3877 CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize);
3878 CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
3881 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
3882 InferenceEngine::CNNNetReader reader;
3886 reader.ReadNetwork(bufferModelConfigPtr, bufferModelConfigSize);
3888 InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
3889 InferenceEngine::TBlob<uint8_t>::Ptr weightsBlobPtr(new InferenceEngine::TBlob<uint8_t>(tensorDesc));
3890 weightsBlobPtr->allocate();
3891 std::memcpy(weightsBlobPtr->buffer(), (uchar*)bufferWeightsPtr, bufferWeightsSize);
3892 reader.SetWeights(weightsBlobPtr);
3894 catch (const std::exception& e)
3896 CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
3899 InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
3901 InferenceEngine::Core& ie = getCore("");
3903 std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize);
3905 InferenceEngine::CNNNetwork ieNet;
3908 InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
3909 InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob<uint8_t>(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize);
3911 ieNet = ie.ReadNetwork(model, weights_blob);
3913 catch (const std::exception& e)
3915 CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
3919 return Impl::createNetworkFromModelOptimizer(ieNet);
3920 #endif // HAVE_INF_ENGINE
3928 int Net::addLayer(const String &name, const String &type, LayerParams ¶ms)
3930 CV_TRACE_FUNCTION();
3932 if (impl->getLayerId(name) >= 0)
3934 CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
3938 int id = ++impl->lastLayerId;
3939 impl->layerNameToId.insert(std::make_pair(name, id));
3940 impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
3945 int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms)
3947 CV_TRACE_FUNCTION();
3949 int prvLid = impl->lastLayerId;
3950 int newLid = this->addLayer(name, type, params);
3951 this->connect(prvLid, 0, newLid, 0);
3955 void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
3957 CV_TRACE_FUNCTION();
3959 impl->connect(outLayerId, outNum, inpLayerId, inpNum);
3962 void Net::connect(String _outPin, String _inPin)
3964 CV_TRACE_FUNCTION();
3966 LayerPin outPin = impl->getPinByAlias(_outPin);
3967 LayerPin inpPin = impl->getPinByAlias(_inPin);
3969 CV_Assert(outPin.valid() && inpPin.valid());
3971 impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
3974 Mat Net::forward(const String& outputName)
3976 CV_TRACE_FUNCTION();
3977 CV_Assert(!empty());
3979 String layerName = outputName;
3981 if (layerName.empty())
3983 std::vector<String> layerNames = getLayerNames();
3984 CV_Assert(!layerNames.empty());
3985 layerName = layerNames.back();
3988 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
3989 impl->setUpNet(pins);
3990 impl->forwardToLayer(impl->getLayerData(layerName));
3992 return impl->getBlob(layerName);
3995 AsyncArray Net::forwardAsync(const String& outputName)
3997 CV_TRACE_FUNCTION();
3998 CV_Assert(!empty());
4001 String layerName = outputName;
4003 if (layerName.empty())
4005 std::vector<String> layerNames = getLayerNames();
4006 CV_Assert(!layerNames.empty());
4007 layerName = layerNames.back();
4010 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
4011 impl->setUpNet(pins);
4013 if (!(impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
4014 CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backends only");
4016 impl->isAsync = true;
4017 impl->forwardToLayer(impl->getLayerData(layerName));
4018 impl->isAsync = false;
4020 return impl->getBlobAsync(layerName);
4022 CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11");
4026 void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
4028 CV_TRACE_FUNCTION();
4029 CV_Assert(!empty());
4031 String layerName = outputName;
4033 if (layerName.empty())
4035 std::vector<String> layerNames = getLayerNames();
4036 CV_Assert(!layerNames.empty());
4037 layerName = layerNames.back();
4040 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
4041 impl->setUpNet(pins);
4042 impl->forwardToLayer(impl->getLayerData(layerName));
4044 LayerPin pin = impl->getPinByAlias(layerName);
4045 LayerData &ld = impl->layers[pin.lid];
4047 if (outputBlobs.isUMat())
4049 impl->getBlob(layerName).copyTo(outputBlobs);
4051 else if (outputBlobs.isMat())
4053 outputBlobs.assign(impl->getBlob(layerName));
4055 else if (outputBlobs.isMatVector())
4057 if (impl->preferableTarget != DNN_TARGET_CPU)
4059 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
4061 CV_Assert(!ld.outputBlobsWrappers[i].empty());
4062 ld.outputBlobsWrappers[i]->copyToHost();
4065 if (ld.outputBlobs[0].depth() == CV_32F)
4067 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
4068 outputvec = ld.outputBlobs;
4070 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
4071 outputvec.resize(ld.outputBlobs.size());
4072 for (int i = 0; i < outputvec.size(); i++)
4073 convertFp16(ld.outputBlobs[i], outputvec[i]);
4076 else if (outputBlobs.isUMatVector())
4078 std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
4081 if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
4082 IS_DNN_OPENCL_TARGET(impl->preferableTarget))
4084 if (impl->preferableTarget == DNN_TARGET_OPENCL)
4085 outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
4086 else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
4088 std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
4089 outputvec.resize(out_vec.size());
4090 for (int i = 0; i < out_vec.size(); i++)
4091 convertFp16(out_vec[i], outputvec[i]);
4097 outputvec.resize(ld.outputBlobs.size());
4098 for (int i = 0; i < outputvec.size(); ++i)
4099 ld.outputBlobs[i].copyTo(outputvec[i]);
4104 void Net::forward(OutputArrayOfArrays outputBlobs,
4105 const std::vector<String>& outBlobNames)
4107 CV_TRACE_FUNCTION();
4109 std::vector<LayerPin> pins;
4110 for (int i = 0; i < outBlobNames.size(); i++)
4112 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
4115 impl->setUpNet(pins);
4117 LayerPin out = impl->getLatestLayerPin(pins);
4119 impl->forwardToLayer(impl->getLayerData(out.lid));
4121 std::vector<Mat> matvec;
4122 for (int i = 0; i < pins.size(); i++)
4124 matvec.push_back(impl->getBlob(pins[i]));
4127 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
4131 void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
4132 const std::vector<String>& outBlobNames)
4134 CV_TRACE_FUNCTION();
4136 std::vector<LayerPin> pins;
4137 for (int i = 0; i < outBlobNames.size(); i++)
4139 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
4142 impl->setUpNet(pins);
4144 LayerPin out = impl->getLatestLayerPin(pins);
4146 impl->forwardToLayer(impl->getLayerData(out.lid));
4148 outputBlobs.resize(outBlobNames.size());
4149 for (int i = 0; i < outBlobNames.size(); i++)
4151 std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
4152 outputBlobs[i].resize(lp.size());
4153 for (int j = 0; j < lp.size(); j++)
4155 outputBlobs[i][j] = impl->getBlob(lp[j]);
4160 void Net::setPreferableBackend(int backendId)
4162 CV_TRACE_FUNCTION();
4163 CV_TRACE_ARG(backendId);
4165 #ifdef HAVE_INF_ENGINE
4166 if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
4167 backendId = getInferenceEngineBackendTypeParam();
4170 if( impl->preferableBackend != backendId )
4172 impl->preferableBackend = backendId;
4173 impl->netWasAllocated = false;
4178 void Net::setPreferableTarget(int targetId)
4180 CV_TRACE_FUNCTION();
4181 CV_TRACE_ARG(targetId);
4183 if( impl->preferableTarget != targetId )
4185 impl->preferableTarget = targetId;
4186 if (IS_DNN_OPENCL_TARGET(targetId))
4189 #ifdef HAVE_INF_ENGINE
4190 if (impl->preferableBackend == DNN_BACKEND_OPENCV)
4192 if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
4193 impl->preferableBackend == DNN_BACKEND_OPENCV)
4194 #endif // HAVE_INF_ENGINE
4195 impl->preferableTarget = DNN_TARGET_CPU;
4197 bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
4198 if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
4199 impl->preferableTarget = DNN_TARGET_OPENCL;
4202 impl->netWasAllocated = false;
4207 void Net::setInputsNames(const std::vector<String> &inputBlobNames)
4209 CV_TRACE_FUNCTION();
4211 impl->netInputLayer->setNames(inputBlobNames);
4214 void Net::setInputShape(const String &inputName, const MatShape& shape)
4216 CV_TRACE_FUNCTION();
4218 impl->netInputLayer->setInputShape(inputName, shape);
4221 void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
4223 CV_TRACE_FUNCTION();
4224 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
4228 pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
4231 CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
4233 Mat blob_ = blob.getMat(); // can't use InputArray directly due MatExpr stuff
4234 MatShape blobShape = shape(blob_);
4238 CV_Assert(!impl->netInputLayer.empty());
4239 const DataLayer& netInputLayer = *impl->netInputLayer.get();
4240 if (!netInputLayer.shapes.empty())
4242 CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), "");
4243 const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid];
4244 if (!inputShapeLimitation.empty())
4246 CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), "");
4247 #if 0 // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0
4248 const size_t dims = inputShapeLimitation.size();
4249 for (size_t dim = 0; dim < dims; dim++)
4251 if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1)
4252 continue; // don't limit batch
4253 CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], "");
4260 LayerData &ld = impl->layers[pin.lid];
4261 const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
4262 ld.outputBlobs.resize(numInputs);
4263 ld.outputBlobsWrappers.resize(numInputs);
4264 impl->netInputLayer->inputsData.resize(numInputs);
4265 impl->netInputLayer->scaleFactors.resize(numInputs);
4266 impl->netInputLayer->means.resize(numInputs);
4268 MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
4269 bool oldShape = prevShape == blobShape;
4271 blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
4273 ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
4275 if (!ld.outputBlobsWrappers[pin.oid].empty())
4277 ld.outputBlobsWrappers[pin.oid]->setHostDirty();
4279 impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
4280 impl->netInputLayer->means[pin.oid] = mean;
4281 impl->netWasAllocated = impl->netWasAllocated && oldShape;
4284 Mat Net::getParam(LayerId layer, int numParam)
4286 LayerData &ld = impl->getLayerData(layer);
4287 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
4288 CV_Assert(numParam < (int)layerBlobs.size());
4289 return layerBlobs[numParam];
4292 void Net::setParam(LayerId layer, int numParam, const Mat &blob)
4294 LayerData &ld = impl->getLayerData(layer);
4296 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
4297 CV_Assert(numParam < (int)layerBlobs.size());
4298 //we don't make strong checks, use this function carefully
4299 layerBlobs[numParam] = blob;
4302 int Net::getLayerId(const String &layer)
4304 return impl->getLayerId(layer);
4308 string dumpLayerParameterSize(const string& name, const LayerParams& lp)
4310 std::ostringstream out(name, std::ios::ate);
4311 DictValue param = lp.get(name);
4312 switch (param.size())
4314 case 1: out << " : "; break;
4315 case 2: out << " (HxW): "; break;
4316 case 3: out << " (DxHxW): "; break;
4318 CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size()));
4321 for (size_t i = 0; i < param.size(); i++)
4325 out << param.get<int>(i);
4332 CV_Assert(!empty());
4334 bool hasInput = !impl->netInputLayer->inputsData.empty();
4338 if (!impl->netWasAllocated)
4342 return impl->dump();
4345 string Net::Impl::dump()
4347 bool hasInput = !netInputLayer->inputsData.empty();
4349 std::ostringstream out;
4350 const std::map<int, LayerData>& map = layers;
4352 Backend prefBackend = (Backend)preferableBackend;
4353 std::vector<std::vector<int> > skippedLayers;
4354 std::vector<int> skipId;
4355 std::vector<int> allLayers(map.size(), -1);
4357 Ptr<BackendNode> prevNode;
4358 for (std::map<int, LayerData>::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit)
4360 std::map<int, Ptr<BackendNode> >::const_iterator itBackend = rit->second.backendNodes.find(prefBackend);
4361 if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() ||
4362 itBackend->second.empty())
4364 if (rit->second.skip)
4365 skipId.push_back(rit->first);
4366 else if (!skipId.empty())
4368 if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty())
4369 skipId.push_back(rit->first);
4370 else if (idPrev != -1)
4371 skipId.push_back(idPrev);
4373 std::sort(skipId.begin(), skipId.end());
4374 for (int i = 0; i < skipId.size(); i++) {
4375 allLayers[skipId[i]] = skippedLayers.size();
4377 skippedLayers.push_back(skipId);
4383 if (itBackend->second == prevNode)
4384 skipId.push_back(idPrev);
4385 else if (!skipId.empty())
4387 skipId.push_back(idPrev);
4388 std::sort(skipId.begin(), skipId.end());
4389 for (int i = 0; i < skipId.size(); i++) {
4390 allLayers[skipId[i]] = skippedLayers.size();
4392 skippedLayers.push_back(skipId);
4395 idPrev = rit->first;
4396 prevNode = itBackend->second;
4399 string colors[] = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151"};
4401 switch (prefBackend)
4403 case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break;
4404 case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break;
4405 case DNN_BACKEND_INFERENCE_ENGINE: // fallthru
4406 case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: backend = "DLIE/"; break;
4407 case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "NGRAPH/"; break;
4408 case DNN_BACKEND_OPENCV: backend = "OCV/"; break;
4409 case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break;
4410 case DNN_BACKEND_CUDA: backend = "CUDA/"; break;
4411 // don't use default:
4413 out << "digraph G {\n";
4415 for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
4417 const LayerData& ld = it->second;
4418 string name = ld.params.name;
4419 std::vector<int> clusterIds(1, it->first);
4420 if (allLayers[it->first] == -1 && !name.empty())
4422 out << "\t\"" << name << "\" [label=\"";
4424 else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0])
4428 else // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0]
4430 int cluster = allLayers[it->first];
4431 out << "\t\"" << "cluster_" << cluster << "\" [label=\"{";
4432 clusterIds = skippedLayers[allLayers[it->first]]; // vertices in current cluster
4434 for (int i = 0; i < clusterIds.size(); i++)
4436 CV_DbgAssert(map.find(clusterIds[i]) != map.end());
4437 const LayerParams& lp = map.find(clusterIds[i])->second.params;
4438 if (!lp.name.empty()) {
4442 out << lp.name << "\\n" << lp.type << "\\n"; // align center
4443 if (lp.has("kernel_size"))
4445 string kernel = dumpLayerParameterSize("kernel_size", lp);
4447 out << "\\l"; // align left
4448 } else if (lp.has("kernel_h") && lp.has("kernel_w")) {
4449 DictValue h = lp.get("kernel_h");
4450 DictValue w = lp.get("kernel_w");
4451 out << "kernel (HxW): " << h << " x " << w;
4452 out << "\\l"; // align left
4454 if (lp.has("stride")) {
4455 string stride = dumpLayerParameterSize("stride", lp);
4457 out << "\\l"; // align left
4458 } else if (lp.has("stride_h") && lp.has("stride_w")) {
4459 DictValue h = lp.get("stride_h");
4460 DictValue w = lp.get("stride_w");
4461 out << "stride (HxW): " << h << " x " << w;
4462 out << "\\l"; // align left
4464 if (lp.has("dilation")) {
4465 string dilation = dumpLayerParameterSize("dilation", lp);
4467 out << "\\l"; // align left
4468 } else if (lp.has("dilation_h") && lp.has("dilation_w")) {
4469 DictValue h = lp.get("dilation_h");
4470 DictValue w = lp.get("dilation_w");
4471 out << "dilation (HxW): " << h << " x " << w;
4472 out << "\\l"; // align left
4474 if (lp.has("pad")) {
4475 DictValue pad = lp.get("pad");
4479 case 1: out << ": " << pad; break;
4481 out << "(HxW): (" << pad.get<int>(0) << " x " << pad.get<int>(1) << ")";
4484 out << "(HxW): (" << pad.get<int>(0) << ", " << pad.get<int>(2)
4485 << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(3) << ")";
4488 out << "(DxHxW): (" << pad.get<int>(0) << ", " << pad.get<int>(3)
4489 << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(4)
4490 << ") x (" << pad.get<int>(2) << ", " << pad.get<int>(5) << ")";
4492 default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size()));
4494 out << "\\l"; // align left
4495 } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) {
4496 DictValue l = lp.get("pad_l");
4497 DictValue t = lp.get("pad_t");
4498 DictValue r = lp.get("pad_r");
4499 DictValue b = lp.get("pad_b");
4500 out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")";
4501 out << "\\l"; // align left
4503 else if (lp.has("pooled_w") || lp.has("pooled_h")) {
4504 DictValue h = lp.get("pooled_h");
4505 DictValue w = lp.get("pooled_w");
4506 out << "pad pooled (HxW): " << h << " x " << w;
4507 out << "\\l"; // align left
4509 if (lp.has("pool")) {
4510 out << "pool: " << lp.get("pool");
4511 out << "\\l"; // align left
4513 if (lp.has("global_pooling")) {
4514 out << "global_pooling: " << lp.get("global_pooling");
4515 out << "\\l"; // align left
4517 if (lp.has("group")) {
4518 out << "group: " << lp.get("group");
4519 out << "\\l"; // align left
4523 if (!ld.outputBlobs.empty())
4525 out << "output: " << ld.outputBlobs[0].size;
4526 out << "\\l"; // align left
4529 Ptr<BackendNode> layerBackend;
4530 std::map<int, Ptr<BackendNode> >::const_iterator ibn = ld.backendNodes.find(prefBackend);
4531 if (ibn != ld.backendNodes.end())
4532 layerBackend = ibn->second;
4533 out << (!layerBackend.empty() ? backend : "OCV/");
4535 const Target target = ld.layerInstance.empty()
4537 : (Target)(ld.layerInstance->preferableTarget); // TODO fix preferableTarget type
4540 case DNN_TARGET_CPU: out << "CPU"; colorId = layerBackend.empty() ? 0 : 5; break;
4541 case DNN_TARGET_OPENCL: out << "OCL"; colorId = 1; break;
4542 case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16"; colorId = 2; break;
4543 case DNN_TARGET_MYRIAD: out << "MYRIAD"; colorId = 3; break;
4544 case DNN_TARGET_VULKAN: out << "VULKAN"; colorId = 7; break;
4545 case DNN_TARGET_FPGA: out << "FPGA"; colorId = 4; break;
4546 case DNN_TARGET_CUDA: out << "CUDA"; colorId = 5; break;
4547 case DNN_TARGET_CUDA_FP16: out << "CUDA_FP16"; colorId = 6; break;
4548 // don't use default:
4550 out << "\\n"; // align center
4551 out << ((clusterIds.size() == 1)? "\" " : " }\" ");
4552 out << "fillcolor=\"" << colors[colorId] << "\" ";
4553 out << "style=filled ";
4554 out << "shape=" << ((clusterIds.size() == 1)? "box" : "record") << "]\n";
4558 int inputsSize = hasInput ? netInputLayer->outNames.size() : 0;
4559 for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
4561 const LayerData& ld = it->second;
4562 if (allLayers[it->first] == -1) // node
4564 for (int i = 0; i < ld.consumers.size(); i++)
4566 int outId = ld.consumers[i].lid;
4567 if (it == map.begin() && inputsSize > 1)
4568 out << "\t\"" << ld.name << "_" << i << "\"" << " -> ";
4570 out << "\t\"" << ld.name << "\"" << " -> ";
4571 if (allLayers[outId] == -1) // node
4573 CV_DbgAssert(map.find(outId) != map.end());
4574 out << "\"" << map.find(outId)->second.name << "\"\n";
4578 out << "\"" << "cluster_" << allLayers[outId] << "\"\n";
4582 else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster
4584 for (int i = 0; i < ld.consumers.size(); i++)
4586 int outId = ld.consumers[i].lid;
4587 if (allLayers[outId] == -1) // node
4589 CV_DbgAssert(map.find(outId) != map.end());
4590 out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
4591 out << "\"" << map.find(outId)->second.name << "\"\n";
4593 else if (allLayers[outId] != allLayers[it->first]) { // another cluster
4594 out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
4595 out << "\"" << "cluster_" << allLayers[outId] << "\"\n";
4604 void Net::dumpToFile(const String& path) {
4605 std::ofstream file(path.c_str());
4610 Ptr<Layer> Net::getLayer(LayerId layerId)
4612 LayerData &ld = impl->getLayerData(layerId);
4613 return ld.getLayerInstance();
4616 std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
4618 LayerData &ld = impl->getLayerData(layerId);
4620 std::vector<Ptr<Layer> > inputLayers;
4621 inputLayers.reserve(ld.inputBlobsId.size());
4622 for (int i = 0; i < ld.inputBlobsId.size(); ++i) {
4623 inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid));
4628 std::vector<String> Net::getLayerNames() const
4630 CV_TRACE_FUNCTION();
4632 std::vector<String> res;
4633 res.reserve(impl->layers.size());
4635 Impl::MapIdToLayerData::iterator it;
4636 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
4638 if (it->second.id) //skip Data layer
4639 res.push_back(it->second.name);
4645 bool Net::empty() const
4647 return impl->layers.size() <= 1; //first layer is default Data layer
4650 std::vector<int> Net::getUnconnectedOutLayers() const
4652 std::vector<int> layersIds;
4654 Impl::MapIdToLayerData::iterator it;
4655 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
4657 int lid = it->first;
4658 LayerData &ld = it->second;
4660 if (ld.requiredOutputs.size() == 0)
4661 layersIds.push_back(lid);
4667 std::vector<String> Net::getUnconnectedOutLayersNames() const
4669 std::vector<int> ids = getUnconnectedOutLayers();
4670 const size_t n = ids.size();
4671 std::vector<String> names(n);
4672 for (size_t i = 0; i < n; ++i)
4674 names[i] = impl->layers[ids[i]].name;
4679 void Net::getLayersShapes(const ShapesVec& netInputShapes,
4680 std::vector<int>& layersIds,
4681 std::vector<ShapesVec>& inLayersShapes,
4682 std::vector<ShapesVec>& outLayersShapes) const
4685 inLayersShapes.clear();
4686 outLayersShapes.clear();
4688 Impl::LayersShapesMap inOutShapes;
4689 impl->getLayersShapes(netInputShapes, inOutShapes);
4691 for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
4692 it != inOutShapes.end(); it++)
4694 layersIds.push_back(it->first);
4695 inLayersShapes.push_back(it->second.in);
4696 outLayersShapes.push_back(it->second.out);
4700 void Net::getLayersShapes(const MatShape& netInputShape,
4701 std::vector<int>& layerIds,
4702 std::vector<ShapesVec>& inLayersShapes,
4703 std::vector<ShapesVec>& outLayersShapes) const
4705 getLayersShapes(ShapesVec(1, netInputShape),
4706 layerIds, inLayersShapes, outLayersShapes);
4709 void Net::getLayerShapes(const MatShape& netInputShape,
4711 ShapesVec& inLayerShapes,
4712 ShapesVec& outLayerShapes) const
4714 getLayerShapes(ShapesVec(1, netInputShape),
4715 layerId, inLayerShapes, outLayerShapes);
4719 void Net::getLayerShapes(const ShapesVec& netInputShapes,
4721 ShapesVec& inLayerShapes,
4722 ShapesVec& outLayerShapes) const
4725 impl->getLayerShapes(netInputShapes, layerId, shapes);
4726 inLayerShapes = shapes.in;
4727 outLayerShapes = shapes.out;
4730 int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
4732 CV_TRACE_FUNCTION();
4735 std::vector<int> ids;
4736 std::vector<std::vector<MatShape> > inShapes, outShapes;
4737 getLayersShapes(netInputShapes, ids, inShapes, outShapes);
4738 CV_Assert(inShapes.size() == outShapes.size());
4739 CV_Assert(inShapes.size() == ids.size());
4741 for(int i = 0; i < ids.size(); i++)
4743 flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i],
4750 int64 Net::getFLOPS(const MatShape& netInputShape) const
4752 return getFLOPS(std::vector<MatShape>(1, netInputShape));
4755 int64 Net::getFLOPS(const int layerId,
4756 const std::vector<MatShape>& netInputShapes) const
4758 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
4759 CV_Assert(layer != impl->layers.end());
4762 impl->getLayerShapes(netInputShapes, layerId, shapes);
4764 return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
4767 int64 Net::getFLOPS(const int layerId,
4768 const MatShape& netInputShape) const
4770 return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
4773 void Net::getLayerTypes(std::vector<String>& layersTypes) const
4775 layersTypes.clear();
4777 std::map<String, int> layers;
4778 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
4779 it != impl->layers.end(); it++)
4781 if (layers.find(it->second.type) == layers.end())
4782 layers[it->second.type] = 0;
4783 layers[it->second.type]++;
4786 for (std::map<String, int>::iterator it = layers.begin();
4787 it != layers.end(); it++)
4789 layersTypes.push_back(it->first);
4793 int Net::getLayersCount(const String& layerType) const
4796 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
4797 it != impl->layers.end(); it++)
4799 if (it->second.type == layerType)
4805 void Net::getMemoryConsumption(const int layerId,
4806 const std::vector<MatShape>& netInputShapes,
4807 size_t& weights, size_t& blobs) const
4809 CV_TRACE_FUNCTION();
4811 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
4812 CV_Assert(layer != impl->layers.end());
4814 weights = blobs = 0;
4816 for(int i = 0; i < layer->second.params.blobs.size(); i++)
4818 const Mat& weightsBlob = layer->second.params.blobs[i];
4819 weights += weightsBlob.total()*weightsBlob.elemSize();
4822 ShapesVec inLayerShapes, outLayerShapes;
4823 getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
4824 for(int i = 0; i < outLayerShapes.size(); i++)
4826 blobs += total(outLayerShapes[i]) * sizeof(float);
4830 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
4831 size_t& weights, size_t& blobs) const
4833 CV_TRACE_FUNCTION();
4835 std::vector<int> layerIds;
4836 std::vector<size_t> w, b;
4837 getMemoryConsumption(netInputShapes, layerIds, w, b);
4839 weights = blobs = 0;
4840 for(int i = 0; i < layerIds.size(); i++)
4847 void Net::getMemoryConsumption(const int layerId,
4848 const MatShape& netInputShape,
4849 size_t& weights, size_t& blobs) const
4851 getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
4855 void Net::getMemoryConsumption(const MatShape& netInputShape,
4856 size_t& weights, size_t& blobs) const
4858 getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
4862 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
4863 std::vector<int>& layerIds, std::vector<size_t>& weights,
4864 std::vector<size_t>& blobs) const
4866 CV_TRACE_FUNCTION();
4872 std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
4874 getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
4876 for(int i = 0; i < layerIds.size(); i++)
4879 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
4880 CV_Assert(layer != impl->layers.end());
4882 for(int j = 0; j < layer->second.params.blobs.size(); j++)
4884 const Mat& weightsBlob = layer->second.params.blobs[j];
4885 w += weightsBlob.total()*weightsBlob.elemSize();
4888 for(int j = 0; j < outLayerShapes[i].size(); j++)
4890 b += total(outLayerShapes[i][j]) * sizeof(float);
4893 weights.push_back(w);
4898 void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
4899 std::vector<size_t>& weights, std::vector<size_t>& blobs) const
4901 getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
4905 void Net::enableFusion(bool fusion)
4907 if( impl->fusion != fusion )
4909 impl->fusion = fusion;
4910 impl->netWasAllocated = false;
4915 void Net::setHalideScheduler(const String& scheduler)
4917 CV_TRACE_FUNCTION();
4918 CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
4920 impl->halideConfigFile = scheduler;
4923 int64 Net::getPerfProfile(std::vector<double>& timings)
4925 timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
4926 int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0);
4930 //////////////////////////////////////////////////////////////////////////
4932 Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
4934 Layer::Layer(const LayerParams ¶ms)
4935 : blobs(params.blobs), name(params.name), type(params.type)
4937 preferableTarget = DNN_TARGET_CPU;
4940 void Layer::setParamsFrom(const LayerParams ¶ms)
4942 blobs = params.blobs;
4947 int Layer::inputNameToIndex(String)
4952 int Layer::outputNameToIndex(const String&)
4957 bool Layer::supportBackend(int backendId)
4959 return backendId == DNN_BACKEND_OPENCV;
4962 Ptr<BackendNode> Layer::initCUDA(
4964 const std::vector<Ptr<BackendWrapper>>&,
4965 const std::vector<Ptr<BackendWrapper>>&)
4967 CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type +
4968 " layers is not defined.");
4969 return Ptr<BackendNode>();
4972 Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper> > &)
4974 CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type +
4975 " layers is not defined.");
4976 return Ptr<BackendNode>();
4979 Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
4981 CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type +
4982 " layers is not defined.");
4983 return Ptr<BackendNode>();
4986 Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
4988 CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
4989 " layers is not defined.");
4990 return Ptr<BackendNode>();
4993 Ptr<BackendNode> Layer::initNgraph(const std::vector<Ptr<BackendWrapper> > & inputs, const std::vector<Ptr<BackendNode> >& nodes)
4995 CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
4996 " layers is not defined.");
4997 return Ptr<BackendNode>();
5000 void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
5001 const std::vector<Mat> &outputs, int targetId) const
5004 CV_TRACE_FUNCTION();
5006 Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
5007 xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
5008 Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
5010 int outW, outH, outC, outN;
5011 getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
5013 if (targetId == DNN_TARGET_CPU)
5015 if (outW == 1 && outH == 1)
5017 if (outC + outN == 1)
5021 top.split(c, co, ci, 8)
5022 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
5026 top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
5033 top.reorder(x, c, y)
5034 .split(y, yo, yi, 2)
5038 .vectorize(x, outW >= 16 ? 16 : outW);
5042 else if (targetId == DNN_TARGET_OPENCL)
5044 if (outW == 1 && outH == 1)
5046 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
5047 top.split(c, co, ci, c_split)
5048 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
5054 int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
5055 int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
5056 // Supported vectorization widths: 2, 3, 4, 8, 16
5057 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
5058 top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
5059 .split(c, co, ci, c_split)
5060 .gpu_blocks(xo, yo, co)
5061 .gpu_threads(xi, yi)
5062 .reorder(xi, yi, ci, xo, yo, co)
5067 CV_Error(Error::StsNotImplemented, "Unknown target identifier");
5068 #endif // HAVE_HALIDE
5071 Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
5073 return Ptr<BackendNode>();
5076 bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
5077 bool Layer::tryFuse(Ptr<Layer>&) { return false; }
5078 void Layer::getScaleShift(Mat& scale, Mat& shift) const
5084 void Layer::unsetAttached()
5086 setActivation(Ptr<ActivationLayer>());
5089 template <typename T>
5090 static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
5092 pv.resize(v.size());
5093 for (size_t i = 0; i < v.size(); i++)
5094 pv[i] = const_cast<T*>(&v[i]);
5097 void Layer::finalize(const std::vector<Mat> &inputs, std::vector<Mat> &outputs)
5099 CV_TRACE_FUNCTION();
5100 this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
5103 void Layer::finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
5105 CV_UNUSED(input);CV_UNUSED(output);
5108 void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
5110 CV_TRACE_FUNCTION();
5111 std::vector<Mat> inputs, outputs;
5112 inputs_arr.getMatVector(inputs);
5113 outputs_arr.getMatVector(outputs);
5115 std::vector<Mat*> inputsp;
5116 vecToPVec(inputs, inputsp);
5117 this->finalize(inputsp, outputs);
5120 std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
5122 CV_TRACE_FUNCTION();
5124 std::vector<Mat> outputs;
5125 this->finalize(inputs, outputs);
5129 void Layer::forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
5131 // We kept this method for compatibility. DNN calls it now only to support users' implementations.
5134 void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
5136 CV_TRACE_FUNCTION();
5137 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
5139 Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
5142 void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
5144 CV_TRACE_FUNCTION();
5145 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
5147 if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
5149 std::vector<UMat> inputs;
5150 std::vector<UMat> outputs;
5151 std::vector<UMat> internals;
5153 std::vector<UMat> orig_inputs;
5154 std::vector<UMat> orig_outputs;
5155 std::vector<UMat> orig_internals;
5157 inputs_arr.getUMatVector(orig_inputs);
5158 outputs_arr.getUMatVector(orig_outputs);
5159 internals_arr.getUMatVector(orig_internals);
5161 inputs.resize(orig_inputs.size());
5162 for (size_t i = 0; i < orig_inputs.size(); i++)
5163 convertFp16(orig_inputs[i], inputs[i]);
5165 outputs.resize(orig_outputs.size());
5166 for (size_t i = 0; i < orig_outputs.size(); i++)
5167 outputs[i].create(shape(orig_outputs[i]), CV_32F);
5169 internals.resize(orig_internals.size());
5170 for (size_t i = 0; i < orig_internals.size(); i++)
5171 internals[i].create(shape(orig_internals[i]), CV_32F);
5173 forward(inputs, outputs, internals);
5175 for (size_t i = 0; i < outputs.size(); i++)
5176 convertFp16(outputs[i], orig_outputs[i]);
5178 // sync results back
5179 outputs_arr.assign(orig_outputs);
5180 internals_arr.assign(orig_internals);
5183 std::vector<Mat> inpvec;
5184 std::vector<Mat> outputs;
5185 std::vector<Mat> internals;
5187 inputs_arr.getMatVector(inpvec);
5188 outputs_arr.getMatVector(outputs);
5189 internals_arr.getMatVector(internals);
5191 std::vector<Mat*> inputs(inpvec.size());
5192 for (int i = 0; i < inpvec.size(); i++)
5193 inputs[i] = &inpvec[i];
5195 this->forward(inputs, outputs, internals);
5197 // sync results back
5198 outputs_arr.assign(outputs);
5199 internals_arr.assign(internals);
5202 void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
5204 CV_TRACE_FUNCTION();
5206 this->finalize(inputs, outputs);
5207 this->forward(inputs, outputs, internals);
5212 bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
5213 const int requiredOutputs,
5214 std::vector<MatShape> &outputs,
5215 std::vector<MatShape> &internals) const
5217 CV_Assert(inputs.size());
5218 outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
5222 //////////////////////////////////////////////////////////////////////////
5224 static Mutex& getLayerFactoryMutex()
5226 static Mutex* volatile instance = NULL;
5227 if (instance == NULL)
5229 cv::AutoLock lock(getInitializationMutex());
5230 if (instance == NULL)
5231 instance = new Mutex();
5236 typedef std::map<String, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
5238 static LayerFactory_Impl& getLayerFactoryImpl_()
5240 static LayerFactory_Impl impl;
5244 static LayerFactory_Impl& getLayerFactoryImpl()
5246 static LayerFactory_Impl* volatile instance = NULL;
5247 if (instance == NULL)
5249 cv::AutoLock lock(getLayerFactoryMutex());
5250 if (instance == NULL)
5252 instance = &getLayerFactoryImpl_();
5253 initializeLayerFactory();
5259 void LayerFactory::registerLayer(const String &type, Constructor constructor)
5261 CV_TRACE_FUNCTION();
5262 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
5264 cv::AutoLock lock(getLayerFactoryMutex());
5265 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
5267 if (it != getLayerFactoryImpl().end())
5269 if (it->second.back() == constructor)
5270 CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered");
5271 it->second.push_back(constructor);
5273 getLayerFactoryImpl().insert(std::make_pair(type, std::vector<Constructor>(1, constructor)));
5276 void LayerFactory::unregisterLayer(const String &type)
5278 CV_TRACE_FUNCTION();
5279 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
5281 cv::AutoLock lock(getLayerFactoryMutex());
5283 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
5284 if (it != getLayerFactoryImpl().end())
5286 if (it->second.size() > 1)
5287 it->second.pop_back();
5289 getLayerFactoryImpl().erase(it);
5293 Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
5295 CV_TRACE_FUNCTION();
5296 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
5298 cv::AutoLock lock(getLayerFactoryMutex());
5299 LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type);
5301 if (it != getLayerFactoryImpl().end())
5303 CV_Assert(!it->second.empty());
5304 return it->second.back()(params);
5308 return Ptr<Layer>(); //NULL
5312 BackendNode::BackendNode(int backendId) : backendId(backendId) {}
5314 BackendNode::~BackendNode() {};
5316 BackendWrapper::BackendWrapper(int backendId, int targetId)
5317 : backendId(backendId), targetId(targetId) {}
5319 BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
5321 CV_Error(Error::StsNotImplemented,
5322 "Constructor of backend wrapper must be implemented");
5325 BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
5327 CV_Error(Error::StsNotImplemented,
5328 "Constructor of backend wrapper must be implemented");
5331 BackendWrapper::~BackendWrapper() {}
5333 Net readNet(const String& _model, const String& _config, const String& _framework)
5335 String framework = toLowerCase(_framework);
5336 String model = _model;
5337 String config = _config;
5338 const std::string modelExt = model.substr(model.rfind('.') + 1);
5339 const std::string configExt = config.substr(config.rfind('.') + 1);
5340 if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
5341 modelExt == "prototxt" || configExt == "prototxt")
5343 if (modelExt == "prototxt" || configExt == "caffemodel")
5344 std::swap(model, config);
5345 return readNetFromCaffe(config, model);
5347 if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
5348 modelExt == "pbtxt" || configExt == "pbtxt")
5350 if (modelExt == "pbtxt" || configExt == "pb")
5351 std::swap(model, config);
5352 return readNetFromTensorflow(model, config);
5354 if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
5355 configExt == "t7" || configExt == "net")
5357 return readNetFromTorch(model.empty() ? config : model);
5359 if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
5360 modelExt == "cfg" || configExt == "cfg")
5362 if (modelExt == "cfg" || configExt == "weights")
5363 std::swap(model, config);
5364 return readNetFromDarknet(config, model);
5366 if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
5367 modelExt == "xml" || configExt == "xml")
5369 if (modelExt == "xml" || configExt == "bin")
5370 std::swap(model, config);
5371 return readNetFromModelOptimizer(config, model);
5373 if (framework == "onnx" || modelExt == "onnx")
5375 return readNetFromONNX(model);
5377 CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
5378 model + (config.empty() ? "" : ", " + config));
5381 Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
5382 const std::vector<uchar>& bufferConfig)
5384 String framework = toLowerCase(_framework);
5385 if (framework == "caffe")
5386 return readNetFromCaffe(bufferConfig, bufferModel);
5387 else if (framework == "tensorflow")
5388 return readNetFromTensorflow(bufferModel, bufferConfig);
5389 else if (framework == "darknet")
5390 return readNetFromDarknet(bufferConfig, bufferModel);
5391 else if (framework == "torch")
5392 CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
5393 else if (framework == "dldt")
5394 return readNetFromModelOptimizer(bufferConfig, bufferModel);
5395 CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
5398 Net readNetFromModelOptimizer(const String &xml, const String &bin)
5400 return Net::readFromModelOptimizer(xml, bin);
5403 Net readNetFromModelOptimizer(const std::vector<uchar>& bufferCfg, const std::vector<uchar>& bufferModel)
5405 return Net::readFromModelOptimizer(bufferCfg, bufferModel);
5408 Net readNetFromModelOptimizer(
5409 const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
5410 const uchar* bufferWeightsPtr, size_t bufferWeightsSize
5413 return Net::readFromModelOptimizer(
5414 bufferModelConfigPtr, bufferModelConfigSize,
5415 bufferWeightsPtr, bufferWeightsSize
5419 CV__DNN_INLINE_NS_END