1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
43 #include "op_halide.hpp"
44 #include "op_inf_engine.hpp"
45 #include "ie_ngraph.hpp"
47 #include "halide_scheduler.hpp"
55 #include <opencv2/dnn/shape_utils.hpp>
56 #include <opencv2/imgproc.hpp>
58 #include <opencv2/core/utils/configuration.private.hpp>
59 #include <opencv2/core/utils/logger.hpp>
63 CV__DNN_EXPERIMENTAL_NS_BEGIN
65 static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0);
67 // this option is useful to run valgrind memory errors detection
68 static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
71 static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
74 static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
75 #ifdef HAVE_INF_ENGINE
76 (size_t)DNN_BACKEND_INFERENCE_ENGINE
78 (size_t)DNN_BACKEND_OPENCV
82 // Additional checks (slowdowns execution!)
83 static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
84 static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
85 static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
93 //==================================================================================================
98 typedef std::vector< std::pair<Backend, Target> > BackendsList;
99 const BackendsList & getBackends() const { return backends; }
100 static BackendRegistry & getRegistry()
102 static BackendRegistry impl;
106 #ifdef HAVE_INF_ENGINE
107 static inline bool checkIETarget(Target target)
109 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3)
110 // Lightweight detection
111 const std::vector<std::string> devices = getCore("").GetAvailableDevices();
112 for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
114 if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD)
116 else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA)
118 else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU)
120 else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
126 cv::dnn::LayerParams lp;
127 lp.set("kernel_size", 1);
128 lp.set("num_output", 1);
129 lp.set("bias_term", false);
130 lp.type = "Convolution";
131 lp.name = "testLayer";
132 lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
133 net.addLayerToPrev(lp.name, lp.type, lp);
134 net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
135 net.setPreferableTarget(target);
136 static int inpDims[] = {1, 2, 3, 4};
137 net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0)));
142 catch(const std::exception& e)
144 CV_LOG_INFO(NULL, "checkIETarget(" << (int)target << ") has failed with message: " << e.what());
156 backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
158 if (cv::ocl::useOpenCL())
159 backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
161 #endif // HAVE_HALIDE
163 #ifdef HAVE_INF_ENGINE
164 if (checkIETarget(DNN_TARGET_CPU)) {
165 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
166 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_CPU));
168 #ifdef HAVE_DNN_NGRAPH
169 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
172 if (checkIETarget(DNN_TARGET_MYRIAD)) {
173 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
174 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_MYRIAD));
176 #ifdef HAVE_DNN_NGRAPH
177 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD));
180 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
181 if (checkIETarget(DNN_TARGET_FPGA))
182 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_FPGA));
185 if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel())
187 if (checkIETarget(DNN_TARGET_OPENCL)) {
188 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
189 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL));
191 #ifdef HAVE_DNN_NGRAPH
192 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL));
195 if (checkIETarget(DNN_TARGET_OPENCL_FP16)) {
196 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
197 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL_FP16));
199 #ifdef HAVE_DNN_NGRAPH
200 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16));
205 #endif // HAVE_INF_ENGINE
208 if (cv::ocl::useOpenCL())
210 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
211 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
215 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
218 BackendsList backends;
222 std::vector< std::pair<Backend, Target> > getAvailableBackends()
224 return BackendRegistry::getRegistry().getBackends();
227 std::vector<Target> getAvailableTargets(Backend be)
229 if (be == DNN_BACKEND_DEFAULT)
230 be = (Backend)PARAM_DNN_BACKEND_DEFAULT;
231 #ifdef HAVE_INF_ENGINE
232 if (be == DNN_BACKEND_INFERENCE_ENGINE)
233 be = getInferenceEngineBackendTypeParam();
236 std::vector<Target> result;
237 const BackendRegistry::BackendsList all_backends = getAvailableBackends();
238 for(BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i )
241 result.push_back(i->second);
246 //==================================================================================================
250 typedef std::vector<MatShape> ShapesVec;
254 ShapesVec in, out, internal;
255 // No guarantees that layer which support in-place computations
256 // will be computed in-place (input.data_ptr == output.data_ptr).
257 // If layer said that it could work in-place and layers after it
258 // no longer use input blob, we'll set output = input.
260 LayerShapes() {supportInPlace = false;}
264 Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
265 const Scalar& mean, bool swapRB, bool crop, int ddepth)
269 blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
273 void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
274 const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
277 std::vector<Mat> images(1, image.getMat());
278 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
281 Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
282 const Scalar& mean, bool swapRB, bool crop, int ddepth)
286 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
290 void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
291 Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
294 CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
297 CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
298 CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
301 std::vector<Mat> images;
302 images_.getMatVector(images);
303 CV_Assert(!images.empty());
304 for (size_t i = 0; i < images.size(); i++)
306 Size imgSize = images[i].size();
313 float resizeFactor = std::max(size.width / (float)imgSize.width,
314 size.height / (float)imgSize.height);
315 resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
316 Rect crop(Point(0.5 * (images[i].cols - size.width),
317 0.5 * (images[i].rows - size.height)),
319 images[i] = images[i](crop);
322 resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
324 if(images[i].depth() == CV_8U && ddepth == CV_32F)
325 images[i].convertTo(images[i], CV_32F);
328 std::swap(mean[0], mean[2]);
331 images[i] *= scalefactor;
334 size_t nimages = images.size();
335 Mat image0 = images[0];
336 int nch = image0.channels();
337 CV_Assert(image0.dims == 2);
338 if (nch == 3 || nch == 4)
340 int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
341 blob_.create(4, sz, ddepth);
342 Mat blob = blob_.getMat();
345 for(size_t i = 0; i < nimages; i++ )
347 const Mat& image = images[i];
348 CV_Assert(image.depth() == blob_.depth());
349 nch = image.channels();
350 CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
351 CV_Assert(image.size() == image0.size());
353 for( int j = 0; j < nch; j++ )
354 ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
356 std::swap(ch[0], ch[2]);
363 int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
364 blob_.create(4, sz, ddepth);
365 Mat blob = blob_.getMat();
367 for(size_t i = 0; i < nimages; i++ )
369 const Mat& image = images[i];
370 CV_Assert(image.depth() == blob_.depth());
371 nch = image.channels();
372 CV_Assert(image.dims == 2 && (nch == 1));
373 CV_Assert(image.size() == image0.size());
375 image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
380 void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
384 //A blob is a 4 dimensional matrix in floating point precision
385 //blob_[0] = batchSize = nbOfImages
386 //blob_[1] = nbOfChannels
389 CV_Assert(blob_.depth() == CV_32F);
390 CV_Assert(blob_.dims == 4);
392 images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
394 std::vector<Mat> vectorOfChannels(blob_.size[1]);
395 for (int n = 0; n < blob_.size[0]; ++n)
397 for (int c = 0; c < blob_.size[1]; ++c)
399 vectorOfChannels[c] = getPlane(blob_, n, c);
401 cv::merge(vectorOfChannels, images_.getMatRef(n));
406 class OpenCLBackendWrapper : public BackendWrapper
409 OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
416 OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
417 : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
419 Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
420 CV_Assert(!base.empty());
424 int shape[] = {1, (int)base->umat.total()};
425 umat = base->umat.reshape(1, 2, &shape[0])
426 .colRange(0, host->total())
427 .reshape(1, host->dims, &host->size[0]);
431 static Ptr<BackendWrapper> create(Mat& m)
433 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
436 static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
438 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
441 static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
443 const int numWrappers = wrappers.size();
444 std::vector<UMat> mats(wrappers.size());
445 for (int i = 0; i < numWrappers; ++i)
447 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
448 CV_Assert(!umatWrapper.empty());
449 umatWrapper->copyToDevice();
450 mats[i] = umatWrapper->umat;
455 // Replaces all umats in wrappers to specific ones.
456 static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
457 const std::vector<UMat>& umats)
459 CV_Assert(wrappers.size() == umats.size());
460 for (int i = 0, n = umats.size(); i < n; ++i)
462 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
463 CV_Assert(!umatWrapper.empty());
464 umatWrapper->umat = umats[i];
468 ~OpenCLBackendWrapper() {}
470 // Copies data from device to a host memory.
471 virtual void copyToHost() CV_OVERRIDE
476 virtual void setHostDirty() CV_OVERRIDE
502 LayerPin(int layerId = -1, int outputId = -1)
503 : lid(layerId), oid(outputId) {}
507 return (lid >= 0 && oid >= 0);
510 bool equal(const LayerPin &r) const
512 return (lid == r.lid && oid == r.oid);
515 bool operator<(const LayerPin &r) const
517 return lid < r.lid || (lid == r.lid && oid < r.oid);
520 bool operator ==(const LayerPin &r) const
522 return lid == r.lid && oid == r.oid;
528 LayerData() : id(-1), skip(false), flag(0) {}
529 LayerData(int _id, const String &_name, const String &_type, LayerParams &_params)
530 : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0)
544 std::vector<LayerPin> inputBlobsId;
545 std::set<int> inputLayersId;
546 std::set<int> requiredOutputs;
547 std::vector<LayerPin> consumers;
548 std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
549 std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
550 std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
552 Ptr<Layer> layerInstance;
553 std::vector<Mat> outputBlobs;
554 std::vector<Mat*> inputBlobs;
555 std::vector<Mat> internals;
556 // Computation nodes of implemented backends (except DEFAULT).
557 std::map<int, Ptr<BackendNode> > backendNodes;
558 // Flag for skip layer computation for specific backend.
563 Ptr<Layer> getLayerInstance()
566 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
569 return layerInstance;
571 layerInstance = LayerFactory::createLayerInstance(type, params);
574 CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
577 return layerInstance;
581 //fake layer containing network input blobs
582 struct DataLayer : public Layer
584 DataLayer() : Layer()
589 virtual bool supportBackend(int backendId) CV_OVERRIDE
591 return backendId == DNN_BACKEND_OPENCV ||
592 (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && inputsData.size() == 1);
595 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
598 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
600 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
601 forward_ocl(inputs_arr, outputs_arr, internals_arr))
603 if (outputs_arr.depth() == CV_16S)
605 forward_fallback(inputs_arr, outputs_arr, internals_arr);
609 std::vector<Mat> outputs, internals;
610 outputs_arr.getMatVector(outputs);
611 internals_arr.getMatVector(internals);
614 // | Input type | Output type |
617 for (int i = 0; i < inputsData.size(); ++i)
619 double scale = scaleFactors[i];
620 Scalar& mean = means[i];
621 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
622 CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
624 bool singleMean = true;
625 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
627 singleMean = mean[j] == mean[j - 1];
632 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
636 for (int n = 0; n < inputsData[i].size[0]; ++n)
637 for (int c = 0; c < inputsData[i].size[1]; ++c)
639 Mat inp = getPlane(inputsData[i], n, c);
640 Mat out = getPlane(outputs[i], n, c);
641 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
648 std::vector<Mat> tmp_expressions;
649 bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
652 // | Input type | Output type |
656 std::vector<UMat> outputs;
657 outputs_.getUMatVector(outputs);
659 tmp_expressions.clear();
660 for (int i = 0; i < inputsData.size(); ++i)
662 Mat inputData = inputsData[i];
664 double scale = scaleFactors[i];
665 Scalar& mean = means[i];
667 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
668 bool singleMean = true;
669 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
671 singleMean = mean[j] == mean[j - 1];
674 if (outputs_.depth() == CV_16S)
678 tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0])));
679 convertFp16(tmp_expressions.back(), outputs[i]);
683 for (int n = 0; n < inputsData[i].size[0]; ++n)
684 for (int c = 0; c < inputsData[i].size[1]; ++c)
686 Mat inp = getPlane(inputsData[i], n, c);
688 std::vector<cv::Range> plane(4, Range::all());
689 plane[0] = Range(n, n + 1);
690 plane[1] = Range(c, c + 1);
691 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
693 tmp_expressions.push_back(scale * (inp - mean[c]));
694 convertFp16(tmp_expressions.back(), out);
700 CV_Assert(outputs_.depth() == CV_32F);
703 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
707 for (int n = 0; n < inputsData[i].size[0]; ++n)
708 for (int c = 0; c < inputsData[i].size[1]; ++c)
710 Mat inp = getPlane(inputsData[i], n, c);
712 std::vector<cv::Range> plane(4, Range::all());
713 plane[0] = Range(n, n + 1);
714 plane[1] = Range(c, c + 1);
715 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
717 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
726 int outputNameToIndex(const String& tgtName) CV_OVERRIDE
728 int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
729 return (idx < (int)outNames.size()) ? idx : -1;
732 void setNames(const std::vector<String> &names)
734 outNames.assign(names.begin(), names.end());
735 shapes.clear(); shapes.resize(outNames.size());
738 void setInputShape(const String& tgtName, const MatShape& shape)
740 std::vector<String>::const_iterator it = std::find(outNames.begin(), outNames.end(), tgtName);
741 CV_Check(tgtName, it != outNames.end(), "Unknown input");
742 int idx = (int)(it - outNames.begin());
744 CV_Assert(idx < (int)shapes.size());
745 CV_Check(tgtName, shapes[idx].empty(), "Input shape redefinition is not allowed");
749 bool getMemoryShapes(const std::vector<MatShape> &inputs,
750 const int requiredOutputs,
751 std::vector<MatShape> &outputs,
752 std::vector<MatShape> &internals) const CV_OVERRIDE
754 CV_Assert(inputs.size() == requiredOutputs);
755 outputs.assign(inputs.begin(), inputs.end());
759 virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
761 std::vector<Mat> outputs;
762 outputs_arr.getMatVector(outputs);
764 CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
765 inputsData.size() == outputs.size());
767 for (int i = 0; skip && i < inputsData.size(); ++i)
769 if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
774 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
775 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
777 CV_CheckEQ(inputsData.size(), (size_t)1, "");
778 CV_CheckEQ(inputsData[0].dims, 4, "");
779 const size_t numChannels = inputsData[0].size[1];
780 CV_Assert(numChannels <= 4);
783 InferenceEngine::TensorDesc td(InferenceEngine::Precision::FP32, {numChannels},
784 InferenceEngine::Layout::C);
785 auto weights = InferenceEngine::make_shared_blob<float>(td);
788 float* weight_buf = weights->buffer().as<float*>();
789 std::fill(weight_buf, weight_buf + numChannels, scaleFactors[0]);
792 auto biases = InferenceEngine::make_shared_blob<float>(td);
794 float* bias_buf = biases->buffer().as<float*>();
796 for (int i = 0; i < numChannels; ++i)
798 bias_buf[i] = -means[0][i] * scaleFactors[0];
801 InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
802 addConstantData("weights", weights, ieLayer);
803 addConstantData("biases", biases, ieLayer);
804 return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
806 #endif // HAVE_DNN_IE_NN_BUILDER_2019
808 std::vector<String> outNames;
809 std::vector<MatShape> shapes;
810 // Preprocessing parameters for each network's input.
811 std::vector<double> scaleFactors;
812 std::vector<Scalar> means;
813 std::vector<Mat> inputsData;
820 // Increase references counter to layer output.
821 void addReference(const LayerPin& lp)
823 std::map<LayerPin, int>::iterator it = refCounter.find(lp);
824 if (it == refCounter.end())
830 void addReferences(const std::vector<LayerPin>& pins)
832 for (int i = 0; i < pins.size(); i++)
834 addReference(pins[i]);
838 // Returns number of references to allocated memory that used in specific
840 int numReferences(const LayerPin& lp)
842 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
843 CV_Assert(mapIt != reuseMap.end());
844 LayerPin memHost = mapIt->second;
846 std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
847 CV_Assert(refIt != refCounter.end());
848 return refIt->second;
851 // Reuse data allocated in <host> inside the <user> blob.
852 void reuse(const LayerPin& host, const LayerPin& user)
854 CV_Assert(reuseMap.find(user) == reuseMap.end());
855 CV_Assert(reuseMap.find(host) != reuseMap.end());
856 LayerPin memHost = reuseMap[host];
857 reuseMap[user] = memHost;
858 if (refCounter.find(memHost) != refCounter.end())
860 std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
861 if (userRefIt != refCounter.end())
863 refCounter[memHost] += userRefIt->second;
864 refCounter.erase(userRefIt);
867 refCounter[memHost] += 1;
871 // Decrease references counter to allocated memory inside specific blob.
872 void releaseReference(const LayerPin& lp)
874 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
875 CV_Assert(mapIt != reuseMap.end());
877 std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
878 CV_Assert(refIt != refCounter.end());
879 CV_Assert(refIt->second > 0);
883 void releaseReferences(const std::vector<LayerPin>& pins)
885 for (int i = 0; i < pins.size(); i++)
887 releaseReference(pins[i]);
891 void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
893 if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
896 LayerPin bestBlobPin;
898 std::map<LayerPin, Mat>::iterator hostIt;
899 std::map<LayerPin, int>::iterator refIt;
901 const int targetTotal = total(shape);
902 int bestBlobTotal = INT_MAX;
904 for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
906 refIt = refCounter.find(hostIt->first);
907 // Use only blobs that had references before because if not,
908 // it might be used as output.
909 if (refIt != refCounter.end() && refIt->second == 0)
911 Mat& unusedBlob = hostIt->second;
912 if (unusedBlob.total() >= targetTotal &&
913 unusedBlob.total() < bestBlobTotal)
915 bestBlobPin = hostIt->first;
916 bestBlob = unusedBlob;
917 bestBlobTotal = unusedBlob.total();
921 if (!bestBlob.empty())
923 reuse(bestBlobPin, lp);
924 dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
930 // if dst already has been allocated with total(shape) elements,
931 // it won't be recreated and pointer of dst.data remains the same.
932 dst.create(shape, use_half ? CV_16S : CV_32F);
937 void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
938 std::vector<LayerPin>& pinsForInternalBlobs,
939 bool use_half = false)
943 pinsForInternalBlobs.clear();
945 std::vector<Mat>& outputBlobs = ld.outputBlobs,
946 &internalBlobs = ld.internals;
948 const ShapesVec& outShapes = layerShapes.out,
949 internalShapes = layerShapes.internal;
951 outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
952 internalBlobs.resize(internalShapes.size());
954 CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
956 // Check that layer could work in-place.
957 bool inPlace = false;
958 if (layerShapes.supportInPlace)
960 if (ld.inputBlobs.size() == 1)
962 // Get number of references to the input memory.
963 int numRef = numReferences(ld.inputBlobsId[0]);
964 // If current layer is one and only customer of this blob.
965 inPlace = numRef == 1;
969 ShapesVec shapes(outShapes);
970 shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
971 std::vector<Mat*> blobs;
972 for(int i = 0; i < outputBlobs.size(); i++)
974 blobs.push_back(&outputBlobs[i]);
977 for(int i = 0; i < internalBlobs.size(); i++)
979 blobs.push_back(&internalBlobs[i]);
980 if (total(internalShapes[i]))
982 pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
986 addReferences(pinsForInternalBlobs);
988 std::map<int, std::vector<int> > idxSizes;
989 for(int i = 0; i < shapes.size(); i++)
991 idxSizes[total(shapes[i])].push_back(i);
994 std::map<int, std::vector<int> >::reverse_iterator it;
995 for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
997 for(int j = 0; j < it->second.size(); j++)
999 int index = it->second[j];
1000 if (total(shapes[index]))
1002 LayerPin blobPin(ld.id, index);
1003 if (index < outShapes.size() && inPlace)
1005 CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
1006 ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
1007 reuse(ld.inputBlobsId[0], blobPin);
1010 reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
1016 // Clear internal state. Calls before an every reallocation.
1019 CV_TRACE_FUNCTION();
1027 // Register allocated memory.
1028 void addHost(const LayerPin& lp, const Mat& mat)
1030 CV_Assert(memHosts.find(lp) == memHosts.end());
1035 std::map<LayerPin, int> refCounter;
1036 // Maps pin to origin blob (for whom memory was allocated firstly).
1037 // For origin blobs key == value.
1038 std::map<LayerPin, LayerPin> reuseMap;
1039 std::map<LayerPin, Mat> memHosts;
1042 static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
1044 if (backendId == DNN_BACKEND_OPENCV)
1046 if (targetId == DNN_TARGET_CPU)
1047 return Ptr<BackendWrapper>();
1049 else if (IS_DNN_OPENCL_TARGET(targetId))
1050 return OpenCLBackendWrapper::create(m);
1053 CV_Error(Error::StsNotImplemented, "Unknown/unsupported target identifier");
1055 else if (backendId == DNN_BACKEND_HALIDE)
1057 CV_Assert(haveHalide());
1059 return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
1060 #endif // HAVE_HALIDE
1062 else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1064 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1065 return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
1067 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
1070 else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1072 #ifdef HAVE_DNN_NGRAPH
1073 return Ptr<BackendWrapper>(new NgraphBackendWrapper(targetId, m));
1075 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
1079 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1080 return Ptr<BackendWrapper>(); // TODO Error?
1083 static int g_networkId = 0;
1085 detail::NetImplBase::NetImplBase()
1086 : networkId(CV_XADD(&g_networkId, 1))
1087 , networkDumpCounter(0)
1088 , dumpLevel(DNN_NETWORK_DUMP)
1093 std::string detail::NetImplBase::getDumpFileNameBase()
1095 std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++);
1096 return dumpFileNameBase;
1099 struct Net::Impl : public detail::NetImplBase
1101 typedef std::map<int, LayerShapes> LayersShapesMap;
1102 typedef std::map<int, LayerData> MapIdToLayerData;
1106 //allocate fake net input layer
1107 netInputLayer = Ptr<DataLayer>(new DataLayer());
1108 LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
1110 netInputLayer->name = inpl.name = "_input";
1111 inpl.type = "__NetInputLayer__";
1112 inpl.layerInstance = netInputLayer;
1113 layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
1116 netWasAllocated = false;
1119 preferableBackend = DNN_BACKEND_DEFAULT;
1120 preferableTarget = DNN_TARGET_CPU;
1121 skipInfEngineInit = false;
1124 Ptr<DataLayer> netInputLayer;
1125 std::vector<LayerPin> blobsToKeep;
1126 MapIdToLayerData layers;
1127 std::map<String, int> layerNameToId;
1128 BlobManager blobManager;
1129 int preferableBackend;
1130 int preferableTarget;
1131 String halideConfigFile;
1132 bool skipInfEngineInit;
1133 // Map host data to backend specific wrapper.
1134 std::map<void*, Ptr<BackendWrapper> > backendWrappers;
1138 bool netWasAllocated;
1141 std::vector<int64> layersTimings;
1144 Ptr<BackendWrapper> wrap(Mat& host)
1146 if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
1147 return Ptr<BackendWrapper>();
1149 MatShape shape(host.dims);
1150 for (int i = 0; i < host.dims; ++i)
1151 shape[i] = host.size[i];
1153 void* data = host.data;
1154 if (backendWrappers.find(data) != backendWrappers.end())
1156 Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
1157 if (preferableBackend == DNN_BACKEND_OPENCV)
1160 CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
1161 return OpenCLBackendWrapper::create(baseBuffer, host);
1163 CV_Error(Error::StsInternal, "");
1166 else if (preferableBackend == DNN_BACKEND_HALIDE)
1168 CV_Assert(haveHalide());
1170 return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
1173 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1175 return wrapMat(preferableBackend, preferableTarget, host);
1177 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1179 return wrapMat(preferableBackend, preferableTarget, host);
1182 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1185 Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
1186 backendWrappers[data] = wrapper;
1191 void compileHalide()
1193 CV_TRACE_FUNCTION();
1195 CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
1197 HalideScheduler scheduler(halideConfigFile);
1198 std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
1199 for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
1201 LayerData &ld = it->second;
1202 Ptr<Layer> layer = ld.layerInstance;
1203 if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
1205 CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
1206 bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
1209 // Use automatic scheduling provided by layer.
1210 layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
1211 ld.inputBlobs, ld.outputBlobs,
1214 compileList.emplace_back(ld);
1217 std::atomic<int> progress(0);
1218 auto fn = ([&] () -> void
1222 int id = progress.fetch_add(1);
1223 if ((size_t)id >= compileList.size())
1225 const LayerData& ld = compileList[id].get();
1226 Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
1227 dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
1230 size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
1231 num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
1232 std::vector<std::thread> threads(num_threads - 1);
1233 for (auto& t: threads) t = std::thread(fn);
1234 fn(); // process own tasks
1235 for (auto& t: threads) t.join();
1241 CV_TRACE_FUNCTION();
1243 MapIdToLayerData::iterator it;
1244 for (it = layers.begin(); it != layers.end(); it++)
1246 if (it->second.id != 0) {
1247 it->second.inputBlobs.clear();
1248 it->second.outputBlobs.clear();
1249 it->second.internals.clear();
1251 it->second.skip = false;
1252 //it->second.consumers.clear();
1253 Ptr<Layer> currLayer = it->second.layerInstance;
1255 if( currLayer.empty() )
1258 currLayer->unsetAttached();
1261 layersTimings.clear();
1264 void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
1266 CV_TRACE_FUNCTION();
1268 if (dumpLevel && networkDumpCounter == 0)
1270 dumpNetworkToFile();
1273 if (preferableBackend == DNN_BACKEND_DEFAULT)
1274 preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
1275 #ifdef HAVE_INF_ENGINE
1276 if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1277 preferableBackend = getInferenceEngineBackendTypeParam();
1280 CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
1281 preferableTarget == DNN_TARGET_CPU ||
1282 preferableTarget == DNN_TARGET_OPENCL ||
1283 preferableTarget == DNN_TARGET_OPENCL_FP16);
1284 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
1285 preferableTarget == DNN_TARGET_CPU ||
1286 preferableTarget == DNN_TARGET_OPENCL);
1287 if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
1288 preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1291 preferableTarget == DNN_TARGET_CPU ||
1292 preferableTarget == DNN_TARGET_OPENCL ||
1293 preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1294 preferableTarget == DNN_TARGET_MYRIAD ||
1295 preferableTarget == DNN_TARGET_FPGA
1298 if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
1300 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
1303 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
1304 preferableTarget = DNN_TARGET_CPU;
1308 if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
1310 // Current implementation is only valid for GPU (#11494)
1311 if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
1313 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
1314 preferableTarget = DNN_TARGET_CPU;
1316 else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
1318 CV_LOG_WARNING(NULL,
1319 "DNN: OpenCL target with fp16 precision is not supported "
1320 "with current OpenCL device (tested with Intel GPUs only), "
1321 "switching to OpenCL with fp32 precision.");
1322 preferableTarget = DNN_TARGET_OPENCL;
1329 this->blobsToKeep = blobsToKeep_;
1331 allocateLayers(blobsToKeep_);
1333 MapIdToLayerData::iterator it = layers.find(0);
1334 CV_Assert(it != layers.end());
1335 it->second.skip = netInputLayer->skip;
1337 initBackend(blobsToKeep_);
1339 if (!netWasAllocated )
1342 if (preferableBackend == DNN_BACKEND_HALIDE)
1345 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
1349 netWasAllocated = true;
1353 dumpNetworkToFile();
1358 int getLayerId(const String &layerName)
1360 std::map<String, int>::iterator it = layerNameToId.find(layerName);
1361 return (it != layerNameToId.end()) ? it->second : -1;
1364 int getLayerId(int id)
1366 MapIdToLayerData::iterator it = layers.find(id);
1367 return (it != layers.end()) ? id : -1;
1370 int getLayerId(DictValue &layerDesc)
1372 if (layerDesc.isInt())
1373 return getLayerId(layerDesc.get<int>());
1374 else if (layerDesc.isString())
1375 return getLayerId(layerDesc.get<String>());
1377 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1381 String getLayerName(int id)
1383 MapIdToLayerData::iterator it = layers.find(id);
1384 return (it != layers.end()) ? it->second.name : "(unknown layer)";
1387 LayerData& getLayerData(int id)
1389 MapIdToLayerData::iterator it = layers.find(id);
1391 if (it == layers.end())
1392 CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
1397 LayerData& getLayerData(const String &layerName)
1399 int id = getLayerId(layerName);
1402 CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
1404 return getLayerData(id);
1407 LayerData& getLayerData(const DictValue &layerDesc)
1409 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1410 if (layerDesc.isInt())
1411 return getLayerData(layerDesc.get<int>());
1412 else /*if (layerDesc.isString())*/
1413 return getLayerData(layerDesc.get<String>());
1416 static void addLayerInput(LayerData &ld, int inNum, LayerPin from)
1418 if ((int)ld.inputBlobsId.size() <= inNum)
1420 ld.inputBlobsId.resize(inNum + 1);
1424 LayerPin storedFrom = ld.inputBlobsId[inNum];
1425 if (storedFrom.valid() && !storedFrom.equal(from))
1426 CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
1427 inNum, ld.name.c_str()));
1430 ld.inputBlobsId[inNum] = from;
1433 int resolvePinOutputName(LayerData &ld, const String &outName)
1435 if (outName.empty())
1437 return ld.getLayerInstance()->outputNameToIndex(outName);
1440 LayerPin getPinByAlias(const String &layerName)
1443 pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1446 pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
1451 std::vector<LayerPin> getLayerOutPins(const String &layerName)
1453 int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1455 std::vector<LayerPin> pins;
1457 for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
1459 pins.push_back(LayerPin(lid, i));
1465 void connect(int outLayerId, int outNum, int inLayerId, int inNum)
1467 CV_Assert(outLayerId < inLayerId);
1468 LayerData &ldOut = getLayerData(outLayerId);
1469 LayerData &ldInp = getLayerData(inLayerId);
1471 addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
1472 ldOut.requiredOutputs.insert(outNum);
1473 ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
1476 void initBackend(const std::vector<LayerPin>& blobsToKeep_)
1478 CV_TRACE_FUNCTION();
1479 if (preferableBackend == DNN_BACKEND_OPENCV)
1480 CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
1481 else if (preferableBackend == DNN_BACKEND_HALIDE)
1482 initHalideBackend();
1483 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1485 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1486 initInfEngineBackend(blobsToKeep_);
1488 CV_Assert(false && "This OpenCV version is built without Inference Engine NN Builder API support");
1491 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1493 #ifdef HAVE_DNN_NGRAPH
1494 initNgraphBackend(blobsToKeep_);
1496 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
1500 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1503 void initHalideBackend()
1505 CV_TRACE_FUNCTION();
1506 CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
1508 // Iterator to current layer.
1509 MapIdToLayerData::iterator it = layers.begin();
1510 // Iterator to base layer for fusion. In example, in case of conv+bn+relu
1511 // it'll be a conv layer.
1512 MapIdToLayerData::iterator baseIt = layers.begin();
1513 for (; it != layers.end(); it++)
1515 LayerData &ldTop = it->second;
1516 Ptr<Layer> layerTop = ldTop.layerInstance;
1517 if (!layerTop->supportBackend(preferableBackend))
1519 // Move base iterator to layer that don't support preferable
1520 // backend to prevent fusion over layer of different backend.
1524 // Try to do layers fusion.
1525 LayerData &ldBot = baseIt->second;
1526 Ptr<Layer> layerBot = ldBot.layerInstance;
1527 // 1. Check that bottom and top from the same backends.
1528 if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
1530 // 2. Check that current layer works in-place.
1531 bool inPlace = ldTop.inputBlobs.size() == 1 &&
1532 ldBot.outputBlobs.size() == 1 &&
1533 ldTop.inputBlobs[0]->data ==
1534 ldBot.outputBlobs[0].data;
1537 // 3. Try to attach node.
1538 CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
1539 Ptr<BackendNode> fusedNode =
1540 layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
1541 if (!fusedNode.empty())
1544 ldBot.backendNodes[preferableBackend] = fusedNode;
1545 ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
1550 // No layers fusion.
1552 ldTop.backendNodes[DNN_BACKEND_HALIDE] =
1553 layerTop->initHalide(ldTop.inputBlobsWrappers);
1558 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1559 // Before launching Inference Engine graph we need to specify output blobs.
1560 // This function requests output blobs based on inputs references of
1561 // layers from default backend or layers from different graphs.
1562 void addInfEngineNetOutputs(LayerData &ld)
1564 CV_TRACE_FUNCTION();
1565 Ptr<InfEngineBackendNet> layerNet;
1566 if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
1568 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1571 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1572 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1573 layerNet = ieNode->net;
1576 // For an every input reference we check that it belongs to one of
1577 // the Inference Engine backend graphs. Request an output blob if it is.
1578 // Do nothing if layer's input is from the same graph.
1579 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1581 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1582 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1583 if (!inpNode.empty())
1585 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1586 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1587 if (layerNet != ieInpNode->net)
1589 // layerNet is empty or nodes are from different graphs.
1590 ieInpNode->net->addOutput(ieInpNode->layer.getName());
1596 void initInfEngineBackend(const std::vector<LayerPin>& blobsToKeep_)
1598 CV_TRACE_FUNCTION();
1599 CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine());
1600 MapIdToLayerData::iterator it;
1601 Ptr<InfEngineBackendNet> net;
1603 for (it = layers.begin(); it != layers.end(); ++it)
1605 LayerData &ld = it->second;
1608 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1609 (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1610 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1612 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1613 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1614 dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1616 dataPtr->setName(netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]);
1622 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1624 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1625 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1626 dataPtr->name = ld.name;
1628 dataPtr->setName(ld.name);
1634 if (skipInfEngineInit)
1636 Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1637 CV_Assert(!node.empty());
1639 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1640 CV_Assert(!ieNode.empty());
1641 ieNode->net->reset();
1643 for (it = layers.begin(); it != layers.end(); ++it)
1645 LayerData &ld = it->second;
1648 for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1650 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
1651 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1652 dataPtr->name = netInputLayer->outNames[i];
1654 dataPtr->setName(netInputLayer->outNames[i]);
1660 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1662 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1663 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1664 dataPtr->name = ld.name;
1666 dataPtr->setName(ld.name);
1670 ieNode->net->addBlobs(ld.inputBlobsWrappers);
1671 ieNode->net->addBlobs(ld.outputBlobsWrappers);
1674 layers[lastLayerId].skip = false;
1675 ieNode->net->init((Target)preferableTarget);
1679 // Build Inference Engine networks from sets of layers that support this
1680 // backend. Split a whole model on several Inference Engine networks if
1681 // some of layers are not implemented.
1683 bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU ||
1684 BackendRegistry::checkIETarget(DNN_TARGET_CPU);
1686 // Set of all input and output blobs wrappers for current network.
1687 std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
1688 for (it = layers.begin(); it != layers.end(); ++it)
1690 LayerData &ld = it->second;
1691 if (ld.id == 0 && ld.skip)
1693 bool fused = ld.skip;
1695 Ptr<Layer> layer = ld.layerInstance;
1696 if (!fused && !layer->supportBackend(preferableBackend))
1698 bool customizable = ld.id != 0 &&
1699 INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2) &&
1700 supportsCPUFallback;
1701 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
1702 if (preferableTarget == DNN_TARGET_MYRIAD)
1704 for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
1706 customizable = ld.inputBlobs[i]->size[0] == 1;
1710 // TODO: fix these workarounds
1711 if (preferableTarget == DNN_TARGET_MYRIAD ||
1712 preferableTarget == DNN_TARGET_OPENCL ||
1713 preferableTarget == DNN_TARGET_OPENCL_FP16)
1714 customizable &= ld.type != "Concat";
1716 if (preferableTarget == DNN_TARGET_OPENCL ||
1717 preferableTarget == DNN_TARGET_OPENCL_FP16)
1718 customizable &= ld.type != "Power";
1720 if (preferableTarget == DNN_TARGET_OPENCL)
1721 customizable &= ld.type != "Eltwise";
1725 addInfEngineNetOutputs(ld);
1726 net = Ptr<InfEngineBackendNet>();
1727 netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
1728 layer->preferableTarget = DNN_TARGET_CPU;
1732 ld.skip = true; // Initially skip all Inference Engine supported layers.
1734 // Create a new network if one of inputs from different Inference Engine graph.
1735 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1737 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1738 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1739 if (!inpNode.empty())
1741 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1742 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1743 if (ieInpNode->net != net)
1745 net = Ptr<InfEngineBackendNet>();
1746 netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
1752 Ptr<BackendNode> node;
1757 bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
1758 ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
1760 node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
1761 ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
1765 net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
1769 if (layer->supportBackend(preferableBackend))
1770 node = layer->initInfEngine(ld.inputBlobsWrappers);
1773 node = Ptr<BackendNode>(new InfEngineBackendNode(
1774 ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
1777 else if (node.empty())
1780 CV_Assert(!node.empty());
1781 ld.backendNodes[preferableBackend] = node;
1783 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1784 CV_Assert(!ieNode.empty());
1787 for (const auto& pin : blobsToKeep_)
1789 if (pin.lid == ld.id)
1791 ieNode->net->addOutput(ieNode->layer.getName());
1796 // Convert weights in FP16 for specific targets.
1797 if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1798 preferableTarget == DNN_TARGET_MYRIAD ||
1799 preferableTarget == DNN_TARGET_FPGA) && !fused)
1801 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
1802 for (const std::string& name : {"weights", "biases"})
1804 auto it = ieNode->layer.getParameters().find(name);
1805 if (it != ieNode->layer.getParameters().end())
1807 InferenceEngine::Blob::Ptr bp = it->second.as<InferenceEngine::Blob::Ptr>();
1808 it->second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(bp));
1812 auto& blobs = ieNode->layer.getConstantData();
1815 // In case of non weightable layer we have to specify
1816 // it's precision adding dummy blob.
1817 auto blob = InferenceEngine::make_shared_blob<int16_t>(
1818 InferenceEngine::Precision::FP16,
1819 InferenceEngine::Layout::C, {1});
1825 for (auto& it : blobs)
1826 it.second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(it.second));
1832 net->addLayer(ieNode->layer);
1834 net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName());
1835 net->addBlobs(ld.inputBlobsWrappers);
1836 net->addBlobs(ld.outputBlobsWrappers);
1837 addInfEngineNetOutputs(ld);
1840 // Initialize all networks.
1841 for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
1843 LayerData &ld = it->second;
1844 if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
1847 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1851 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1855 CV_Assert(!ieNode->net.empty());
1857 if (!ieNode->net->isInitialized())
1859 ieNode->net->init((Target)preferableTarget);
1864 #endif // HAVE_DNN_IE_NN_BUILDER_2019
1867 #ifdef HAVE_DNN_NGRAPH
1868 void addNgraphOutputs(LayerData &ld)
1870 CV_TRACE_FUNCTION();
1872 Ptr<InfEngineNgraphNet> layerNet;
1873 auto it = ld.backendNodes.find(preferableBackend);
1874 if (it != ld.backendNodes.end())
1876 Ptr<BackendNode> node = it->second;
1879 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
1880 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1881 layerNet = ieNode->net;
1885 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1887 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1888 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1889 if (!inpNode.empty())
1891 Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
1892 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1893 if (layerNet != ieInpNode->net)
1895 ieInpNode->net->addOutput(ieInpNode->node->get_friendly_name());
1896 ieInpNode->net->setUnconnectedNodes(ieInpNode);
1902 void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
1904 CV_TRACE_FUNCTION();
1905 CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine());
1907 MapIdToLayerData::iterator it;
1908 Ptr<InfEngineNgraphNet> net;
1910 for (it = layers.begin(); it != layers.end(); ++it)
1912 LayerData &ld = it->second;
1915 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1916 (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1917 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1919 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
1920 std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1921 outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName;
1922 dataPtr->setName(outputName);
1927 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1929 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
1930 std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name;
1931 dataPtr->setName(outputName);
1936 if (skipInfEngineInit)
1938 Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1939 CV_Assert(!node.empty());
1941 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
1942 CV_Assert(!ieNode.empty());
1943 ieNode->net->reset();
1945 for (it = layers.begin(); it != layers.end(); ++it)
1947 LayerData &ld = it->second;
1950 for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1952 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.inputBlobsWrappers[i]);
1953 dataPtr->setName(netInputLayer->outNames[i]);
1958 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1960 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
1961 dataPtr->setName(ld.name);
1964 ieNode->net->addBlobs(ld.inputBlobsWrappers);
1965 ieNode->net->addBlobs(ld.outputBlobsWrappers);
1968 layers[lastLayerId].skip = false;
1969 ieNode->net->init((Target)preferableTarget);
1973 bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU ||
1974 BackendRegistry::checkIETarget(DNN_TARGET_CPU);
1976 // Build Inference Engine networks from sets of layers that support this
1977 // backend. Split a whole model on several Inference Engine networks if
1978 // some of layers are not implemented.
1979 for (it = layers.begin(); it != layers.end(); ++it)
1981 LayerData &ld = it->second;
1983 if (ld.id == 0 && ld.skip)
1986 bool fused = ld.skip;
1987 Ptr<Layer> layer = ld.layerInstance;
1988 if (!fused && !layer->supportBackend(preferableBackend))
1990 bool customizable = ld.id != 0 && supportsCPUFallback;
1992 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
1993 if (preferableTarget == DNN_TARGET_MYRIAD)
1995 for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
1997 customizable = ld.inputBlobs[i]->size[0] == 1;
2001 // TODO: fix these workarounds
2002 if (preferableTarget == DNN_TARGET_MYRIAD ||
2003 preferableTarget == DNN_TARGET_OPENCL ||
2004 preferableTarget == DNN_TARGET_OPENCL_FP16)
2005 customizable &= ld.type != "Concat";
2007 if (preferableTarget == DNN_TARGET_OPENCL ||
2008 preferableTarget == DNN_TARGET_OPENCL_FP16)
2009 customizable &= ld.type != "Power";
2011 if (preferableTarget == DNN_TARGET_OPENCL)
2012 customizable &= ld.type != "Eltwise";
2016 addNgraphOutputs(ld);
2017 net = Ptr<InfEngineNgraphNet>();
2018 layer->preferableTarget = DNN_TARGET_CPU;
2020 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2022 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2023 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2024 if (!inpNode.empty()) {
2025 Ptr<InfEngineNgraphNode> ieNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2026 CV_Assert(!ieNode.empty());
2027 ieNode->net->setUnconnectedNodes(ieNode);
2033 ld.skip = true; // Initially skip all Inference Engine supported layers.
2035 // Create a new network if one of inputs from different Inference Engine graph.
2036 std::vector<Ptr<BackendNode>> inputNodes;
2037 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2039 // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois)
2040 if (inputNodes.size() == ld.inputBlobsId.size()) {
2043 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2044 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2045 if (!inpNode.empty())
2047 Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2048 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
2049 if (ieInpNode->net == net && !fused) {
2050 inputNodes.push_back(inpNode);
2056 net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
2060 std::vector<std::string> inputNames;
2061 std::vector<cv::Mat> inputs;
2063 auto curr_pos = inpLd.consumers.begin();
2064 auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; };
2065 auto cons = curr_pos;
2066 while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) !=
2067 inpLd.consumers.end()) {
2068 int cons_inp = cons->oid;
2069 Ptr<NgraphBackendWrapper> inpWrapper = inpLd.outputBlobsWrappers[cons_inp].
2070 dynamicCast<NgraphBackendWrapper>();
2071 CV_Assert(!inpWrapper.empty());
2072 auto iter = std::find(inputNames.begin(), inputNames.end(),
2073 inpWrapper->dataPtr->getName());
2074 if (iter == inputNames.end()) {
2075 inputNames.push_back(inpWrapper->dataPtr->getName());
2076 inputs.push_back(inpLd.outputBlobs[cons_inp]);
2078 curr_pos = cons + 1;
2081 auto inps = net->setInputs(inputs, inputNames);
2082 for (auto& inp : inps) {
2083 inputNodes.emplace_back(Ptr<BackendNode>(new InfEngineNgraphNode(inp)));
2088 Ptr<BackendNode> node;
2093 bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
2094 ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
2096 node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
2097 ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
2101 net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
2106 CV_Assert(ld.inputBlobsId.size() == inputNodes.size());
2107 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2109 int lid = ld.inputBlobsId[i].lid;
2110 int oid = ld.inputBlobsId[i].oid;
2111 if (oid == 0 || lid == 0)
2114 auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
2115 CV_Assert(oid < ieInpNode->node->get_output_size());
2116 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
2117 inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node));
2118 #elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3)
2119 inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid)));
2121 inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false)));
2125 if (layer->supportBackend(preferableBackend))
2127 node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes);
2128 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2130 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2131 node.dynamicCast<InfEngineNgraphNode>()->setName(dataPtr->getName());
2136 node = Ptr<BackendNode>(new InfEngineNgraphNode(inputNodes,
2137 ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
2140 else if (node.empty())
2143 ld.backendNodes[preferableBackend] = node;
2145 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2146 CV_Assert(!ieNode.empty());
2149 if (ld.consumers.empty()) {
2150 // TF EAST_text_detection
2151 ieNode->net->setUnconnectedNodes(ieNode);
2153 for (const auto& pin : blobsToKeep_)
2155 if (pin.lid == ld.id)
2157 ieNode->net->addOutput(ieNode->node->get_friendly_name());
2161 ieNode->net->setNodePtr(&ieNode->node);
2163 net->addBlobs(ld.inputBlobsWrappers);
2164 net->addBlobs(ld.outputBlobsWrappers);
2165 addNgraphOutputs(ld);
2168 // Initialize all networks.
2169 for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
2171 LayerData &ld = it->second;
2172 auto iter = ld.backendNodes.find(preferableBackend);
2173 if (iter == ld.backendNodes.end())
2176 Ptr<BackendNode>& node = iter->second;
2180 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2184 CV_Assert(!ieNode->net.empty());
2186 if (!ieNode->net->isInitialized())
2188 ieNode->net->setUnconnectedNodes(ieNode);
2189 ieNode->net->createNet((Target)preferableTarget);
2194 #endif // HAVE_DNN_NGRAPH
2196 void allocateLayer(int lid, const LayersShapesMap& layersShapes)
2198 CV_TRACE_FUNCTION();
2200 LayerData &ld = layers[lid];
2206 size_t ninputs = ld.inputBlobsId.size();
2208 printf("layer %s:", ld.name.c_str());
2209 for (size_t i = 0; i < ninputs; i++)
2211 int inp_lid = ld.inputBlobsId[i].lid;
2212 LayerData &inp_ld = layers[inp_lid];
2213 int inp_outputs = (int)inp_ld.outputBlobs.size();
2214 std::cout << " " << inp_ld.name << "(" << inp_outputs;
2216 for( int j = 0; j < inp_outputs; j++ )
2218 std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
2225 //determine parent layers
2226 for (size_t i = 0; i < ninputs; i++)
2227 ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
2230 for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
2231 allocateLayer(*i, layersShapes);
2234 if (ld.id == 0) // DataLayer
2236 ninputs = netInputLayer->inputsData.size();
2237 ld.inputBlobsWrappers.resize(ninputs);
2238 for (size_t i = 0; i < ninputs; i++)
2240 ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
2245 ld.inputBlobs.resize(ninputs);
2246 ld.inputBlobsWrappers.resize(ninputs);
2247 for (size_t i = 0; i < ninputs; i++)
2249 LayerPin from = ld.inputBlobsId[i];
2250 CV_Assert(from.valid());
2251 CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
2252 ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
2253 ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
2257 LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
2259 CV_Assert(layerShapesIt != layersShapes.end());
2261 std::vector<LayerPin> pinsForInternalBlobs;
2262 blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
2263 preferableBackend == DNN_BACKEND_OPENCV &&
2264 preferableTarget == DNN_TARGET_OPENCL_FP16);
2265 ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
2266 for (int i = 0; i < ld.outputBlobs.size(); ++i)
2268 ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
2270 ld.internalBlobsWrappers.resize(ld.internals.size());
2271 for (int i = 0; i < ld.internals.size(); ++i)
2273 ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
2276 Ptr<Layer> layerPtr = ld.getLayerInstance();
2278 std::vector<Mat> inps(ld.inputBlobs.size());
2279 for (int i = 0; i < ld.inputBlobs.size(); ++i)
2281 inps[i] = *ld.inputBlobs[i];
2283 layerPtr->finalize(inps, ld.outputBlobs);
2284 layerPtr->preferableTarget = preferableTarget;
2286 std::cout << "\toutputs:";
2287 size_t noutputs = ld.outputBlobs.size();
2288 for (size_t j = 0; j < noutputs; j++)
2290 std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
2296 // After allocation of layer, we decrease counters to it's input blobs.
2297 blobManager.releaseReferences(ld.inputBlobsId);
2298 blobManager.releaseReferences(pinsForInternalBlobs);
2304 #define printf_(args) printf args
2306 #define printf_(args)
2309 void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
2311 CV_TRACE_FUNCTION();
2313 if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV &&
2314 preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
2315 preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
2318 // scan through all the layers. If there is convolution layer followed by the activation layer,
2319 // we try to embed this activation into the convolution and disable separate execution of the activation
2320 std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
2321 blobsToKeep_.end());
2322 MapIdToLayerData::iterator it;
2323 for (it = layers.begin(); it != layers.end(); it++)
2325 int lid = it->first;
2326 LayerData& ld = layers[lid];
2329 printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
2332 printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
2334 // the optimization #1. try to fuse batch norm, scaling and/or activation layers
2335 // with the current layer if they follow it. Normally, the are fused with the convolution layer,
2336 // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
2337 // some other layers.
2338 Ptr<Layer>& currLayer = ld.layerInstance;
2339 if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
2341 LayerData* nextData = &layers[ld.consumers[0].lid];
2342 LayerPin lpNext(ld.consumers[0].lid, 0);
2345 Ptr<Layer> nextLayer = nextData->layerInstance;
2346 if (currLayer->tryFuse(nextLayer))
2348 printf_(("\tfused with %s\n", nextLayer->name.c_str()));
2349 nextData->skip = true;
2350 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
2351 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
2352 if (nextData->consumers.size() == 1)
2354 int nextLayerId = nextData->consumers[0].lid;
2355 nextData = &layers[nextLayerId];
2356 lpNext = LayerPin(nextLayerId, 0);
2368 if (preferableBackend != DNN_BACKEND_OPENCV)
2369 continue; // Go to the next layer.
2371 // TODO: OpenCL target support more fusion styles.
2372 if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
2373 (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
2374 ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
2375 ld.layerInstance->type != "Concat")) )
2380 // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
2381 if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
2382 nextData->type != "ReLU" &&
2383 nextData->type != "ChannelsPReLU" &&
2384 nextData->type != "ReLU6" &&
2385 nextData->type != "TanH" &&
2386 nextData->type != "Power")
2389 Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2390 if (nextActivLayer.empty())
2393 if (currLayer->setActivation(nextActivLayer))
2395 printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
2396 nextData->skip = true;
2397 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
2398 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
2399 if (nextData->consumers.size() == 1)
2401 int nextLayerId = nextData->consumers[0].lid;
2402 nextData = &layers[nextLayerId];
2403 lpNext = LayerPin(nextLayerId, 0);
2415 // fuse convolution layer followed by eltwise + relu
2416 if ( IS_DNN_OPENCL_TARGET(preferableTarget) && ld.layerInstance->type == "Convolution" )
2418 Ptr<EltwiseLayer> nextEltwiseLayer;
2420 nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
2422 if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
2423 nextData && nextData->inputBlobsId.size() == 2 )
2425 LayerData *eltwiseData = nextData;
2427 // Eltwise layer has two inputs. We need to determine which
2428 // is a base convolution layer and which could be used as it's bias.
2429 LayerData* biasLayerData = 0;
2430 for (int i = 0; i < 2; ++i)
2432 LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
2433 CV_Assert(downLayerData);
2434 while (downLayerData->skip)
2436 if (downLayerData->inputBlobsId.size() == 1)
2437 downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
2444 if (downLayerData && ld.id == downLayerData->id)
2446 biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
2450 CV_Assert(biasLayerData);
2452 if( eltwiseData->consumers.size() == 1 )
2454 // fuse eltwise + activation layer
2455 if (biasLayerData->id < ld.id)
2457 nextData = &layers[eltwiseData->consumers[0].lid];
2458 lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
2459 Ptr<ActivationLayer> nextActivLayer;
2461 nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2463 if( !nextActivLayer.empty() &&
2464 (!nextData->type.compare("ReLU") ||
2465 !nextData->type.compare("ChannelsPReLU") ||
2466 !nextData->type.compare("Power")) &&
2467 currLayer->setActivation(nextActivLayer) )
2469 CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
2470 ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
2471 printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
2472 printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
2473 eltwiseData->skip = true;
2474 nextData->skip = true;
2475 // This optimization for cases like
2481 // This way all the element-wise computations
2482 // (i.e. some_layer+conv or some_layer*conv)
2483 // would be done at [conv] layer. So we need to
2484 // replace [conv]'s output blob to [eltwise]'s one
2485 // considering that [activ] is an in-place layer.
2486 // Also we need to move all the consumers' references.
2487 // To prevent memory collisions (i.e. when input of
2488 // [conv] and output of [eltwise] is the same blob)
2489 // we allocate a new blob.
2490 CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
2491 ld.outputBlobs[0] = ld.outputBlobs[0].clone();
2492 ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
2494 eltwiseData->outputBlobs = ld.outputBlobs;
2495 nextData->outputBlobs = ld.outputBlobs;
2496 eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
2497 nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
2499 // Move references of [activ] layer consumers to the newly allocated blob.
2500 for (int i = 0; i < nextData->consumers.size(); ++i)
2502 LayerData& consumer = layers[nextData->consumers[i].lid];
2503 for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
2505 if (consumer.inputBlobsId[j].lid == lpNext.lid)
2507 consumer.inputBlobs[j] = &ld.outputBlobs[0];
2508 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
2521 if (preferableBackend != DNN_BACKEND_OPENCV)
2522 continue; // Go to the next layer.
2524 // the optimization #2. if there is concat layer that concatenates channels
2525 // from the inputs together (i.e. axis == 1) then we make the inputs of
2526 // the concat layer to write to the concatenation output buffer
2527 // (and so we eliminate the concatenation layer, because the channels
2528 // are concatenated implicitly).
2529 Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
2530 if( !concatLayer.empty() && !concatLayer->padding && ld.outputBlobs.size() == 1 )
2532 Mat& output = ld.outputBlobs[0];
2535 if (!ld.outputBlobsWrappers.empty() &&
2536 (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
2538 size_t i, ninputs = ld.inputBlobsId.size();
2539 bool conv_layer = true;
2540 for( i = 0; i < ninputs; i++ )
2542 LayerPin pin = ld.inputBlobsId[i];
2543 LayerData* inp_i_data = &layers[pin.lid];
2544 while(inp_i_data->skip &&
2545 inp_i_data->inputBlobsId.size() == 1 &&
2546 inp_i_data->consumers.size() == 1)
2548 pin = inp_i_data->inputBlobsId[0];
2549 inp_i_data = &layers[pin.lid];
2551 conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
2555 std::vector<UMat> umat_outputBlobs;
2556 umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2557 umat_output = umat_outputBlobs[0];
2561 // TODO: in general, this optimization can always be done, but
2562 // many layers currently check that the input/output blobs are
2563 // continuous arrays. Unfortunately, this is not true when
2564 // the concatenation optimization is applied with batch_size > 1.
2565 // so, for now, we only apply this optimization in the most popular
2566 // case batch_size == 1.
2567 int axis = clamp(concatLayer->axis, output.dims);
2568 if( output.total(0, axis) == 1 )
2570 size_t i, ninputs = ld.inputBlobsId.size();
2571 std::vector<LayerPin> realinputs(ninputs);
2572 for( i = 0; i < ninputs; i++ )
2574 LayerPin pin = ld.inputBlobsId[i];
2575 LayerData* inp_i_data = &layers[pin.lid];
2576 while(inp_i_data->skip &&
2577 inp_i_data->inputBlobsId.size() == 1 &&
2578 inp_i_data->consumers.size() == 1)
2580 pin = inp_i_data->inputBlobsId[0];
2581 inp_i_data = &layers[pin.lid];
2583 printf_(("\treal input for %s is %s\n",
2584 layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
2585 inp_i_data->getLayerInstance()->name.c_str()));
2587 if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
2589 realinputs[i] = pin;
2594 // Allocate new memory to prevent collisions during memory
2595 // reusing (see https://github.com/opencv/opencv/pull/10456).
2596 output = output.clone();
2598 if (preferableBackend == DNN_BACKEND_OPENCV &&
2599 IS_DNN_OPENCL_TARGET(preferableTarget))
2601 std::vector<UMat> umats(1);
2602 umat_output = umat_output.clone();
2603 umats[0] = umat_output;
2604 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
2607 std::vector<Range> chrange(output.dims, Range::all());
2609 for( i = 0; i < ninputs; i++ )
2611 LayerPin pin = realinputs[i];
2612 LayerData* inp_i_data = &layers[pin.lid];
2613 int channels_i = ld.inputBlobs[i]->size[axis];
2614 chrange[axis] = Range(ofs, ofs + channels_i);
2615 printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
2616 pin.oid, ofs, ofs + channels_i));
2618 Mat output_slice = output(chrange);
2619 Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
2620 CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
2621 Mat* oldPtr = &curr_output;
2622 curr_output = output_slice;
2624 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2626 std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
2627 umats[pin.oid] = umat_output(chrange);
2628 OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
2631 // Layers that refer old input Mat will refer to the
2632 // new data but the same Mat object.
2633 CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
2636 printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
2643 void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
2645 CV_TRACE_FUNCTION();
2647 MapIdToLayerData::iterator it;
2648 for (it = layers.begin(); it != layers.end(); it++)
2649 it->second.flag = 0;
2651 CV_Assert(!layers[0].outputBlobs.empty());
2652 ShapesVec inputShapes;
2653 for(int i = 0; i < layers[0].outputBlobs.size(); i++)
2655 Mat& inp = layers[0].outputBlobs[i];
2656 CV_Assert(inp.total());
2657 if (preferableBackend == DNN_BACKEND_OPENCV &&
2658 preferableTarget == DNN_TARGET_OPENCL_FP16)
2660 layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
2662 inputShapes.push_back(shape(inp));
2664 LayersShapesMap layersShapes;
2665 getLayersShapes(inputShapes, layersShapes);
2667 blobManager.reset();
2668 backendWrappers.clear();
2669 // Fake references to input blobs.
2670 for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
2671 blobManager.addReference(LayerPin(0, i));
2672 for (it = layers.begin(); it != layers.end(); ++it)
2674 const LayerData& ld = it->second;
2675 blobManager.addReferences(ld.inputBlobsId);
2678 for (int i = 0; i < blobsToKeep_.size(); i++)
2680 blobManager.addReference(blobsToKeep_[i]);
2683 for (it = layers.begin(); it != layers.end(); it++)
2685 int lid = it->first;
2686 allocateLayer(lid, layersShapes);
2689 layersTimings.resize(lastLayerId + 1, 0);
2690 fuseLayers(blobsToKeep_);
2693 void forwardLayer(LayerData &ld)
2695 CV_TRACE_FUNCTION();
2697 Ptr<Layer> layer = ld.layerInstance;
2704 std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
2705 if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
2708 CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
2710 if (!layer->supportBackend(DNN_BACKEND_OPENCV))
2711 CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend",
2712 ld.name.c_str(), ld.type.c_str()));
2715 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2717 std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
2718 std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2719 std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
2720 layer->forward(umat_inputBlobs,
2722 umat_internalBlobs);
2723 if (DNN_CHECK_NAN_INF)
2726 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2728 UMat& u = umat_outputBlobs[i];
2730 if (u.depth() == CV_16S) // FP16
2733 m = u.getMat(ACCESS_READ);
2736 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2737 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2740 else if (!checkRange(m, true, NULL, -1e6, 1e6))
2742 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2743 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2749 for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
2751 UMat& u = umat_inputBlobs[i];
2753 if (u.depth() == CV_16S) // FP16
2756 m = u.getMat(ACCESS_READ);
2757 std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2758 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2760 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2762 UMat& u = umat_outputBlobs[i];
2764 if (u.depth() == CV_16S) // FP16
2767 m = u.getMat(ACCESS_READ);
2768 std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2769 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2771 for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
2773 UMat& u = umat_internalBlobs[i];
2775 if (u.depth() == CV_16S) // FP16
2778 m = u.getMat(ACCESS_READ);
2779 std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
2780 if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
2782 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2786 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
2791 for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
2793 if (!ld.inputBlobsWrappers[i].empty())
2794 ld.inputBlobsWrappers[i]->copyToHost();
2797 std::vector<Mat> inps(ld.inputBlobs.size());
2798 for (int i = 0; i < ld.inputBlobs.size(); ++i)
2800 inps[i] = *ld.inputBlobs[i];
2802 layer->forward(inps, ld.outputBlobs, ld.internals);
2804 if (DNN_CHECK_NAN_INF)
2807 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2809 const Mat& m = ld.outputBlobs[i];
2812 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2813 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2816 else if (!checkRange(m, true, NULL, -1e6, 1e6))
2818 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2819 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2825 for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
2827 const Mat* pM = ld.inputBlobs[i];
2830 std::cout << "INPUT " << i << " is NULL" << std::endl;
2834 std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2835 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2837 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2839 const Mat& m = ld.outputBlobs[i];
2840 std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2841 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2843 for (size_t i = 0; i < ld.internals.size(); ++i)
2845 const Mat& m = ld.internals[i];
2846 std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2847 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2849 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2854 for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
2856 if (!ld.outputBlobsWrappers[i].empty())
2857 ld.outputBlobsWrappers[i]->setHostDirty();
2863 Ptr<BackendNode> node = it->second;
2864 CV_Assert(!node.empty());
2865 if (preferableBackend == DNN_BACKEND_HALIDE)
2867 forwardHalide(ld.outputBlobsWrappers, node);
2869 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
2871 forwardInfEngine(ld.outputBlobsWrappers, node, isAsync);
2873 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
2875 forwardNgraph(ld.outputBlobsWrappers, node, isAsync);
2879 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
2887 layersTimings[ld.id] = tm.getTimeTicks();
2892 void forwardToLayer(LayerData &ld, bool clearFlags = true)
2894 CV_TRACE_FUNCTION();
2898 MapIdToLayerData::iterator it;
2899 for (it = layers.begin(); it != layers.end(); it++)
2900 it->second.flag = 0;
2903 //already was forwarded
2908 MapIdToLayerData::iterator it;
2909 for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
2911 LayerData &ld = it->second;
2921 void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
2923 std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
2925 if (id == 0 && inOutShapes[id].in[0].empty())
2927 if (!layers[0].outputBlobs.empty())
2930 for (int i = 0; i < layers[0].outputBlobs.size(); i++)
2932 Mat& inp = layers[0].outputBlobs[i];
2933 CV_Assert(inp.total());
2934 shapes.push_back(shape(inp));
2936 inOutShapes[0].in = shapes;
2940 const std::vector<MatShape>& inputShapes = netInputLayer->shapes;
2942 for (size_t i = 0; i < inputShapes.size(); i++)
2944 if (!inputShapes[i].empty())
2952 inOutShapes[0].out.clear();
2957 inOutShapes[0].in = inputShapes;
2962 if (inOutShapes[id].in.empty())
2964 for(int i = 0; i < inputLayerIds.size(); i++)
2966 int layerId = inputLayerIds[i].lid;
2967 LayersShapesMap::iterator it =
2968 inOutShapes.find(layerId);
2969 if(it == inOutShapes.end() ||
2970 it->second.out.empty())
2972 getLayerShapesRecursively(layerId, inOutShapes);
2974 const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
2975 inOutShapes[id].in.push_back(shape);
2978 const ShapesVec& is = inOutShapes[id].in;
2979 ShapesVec& os = inOutShapes[id].out;
2980 ShapesVec& ints = inOutShapes[id].internal;
2981 int requiredOutputs = layers[id].requiredOutputs.size();
2982 Ptr<Layer> l = layers[id].getLayerInstance();
2984 bool layerSupportInPlace = false;
2987 layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints);
2989 catch (const cv::Exception& e)
2991 CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." <<
2992 " inputs=" << is.size() <<
2993 " outputs=" << os.size() << "/" << requiredOutputs <<
2994 " blobs=" << l->blobs.size());
2995 for (size_t i = 0; i < is.size(); ++i)
2997 CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i]));
2999 for (size_t i = 0; i < os.size(); ++i)
3001 CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i]));
3003 for (size_t i = 0; i < l->blobs.size(); ++i)
3005 CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i])));
3007 CV_LOG_ERROR(NULL, "Exception message: " << e.what());
3010 inOutShapes[id].supportInPlace = layerSupportInPlace;
3012 for (int i = 0; i < ints.size(); i++)
3013 CV_Assert(total(ints[i]) > 0);
3015 for (int i = 0; i < os.size(); i++)
3016 CV_Assert(total(os[i]) > 0);
3019 void getLayersShapes(const ShapesVec& netInputShapes,
3020 LayersShapesMap& inOutShapes)
3022 inOutShapes.clear();
3024 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
3025 for (MapIdToLayerData::iterator it = layers.begin();
3026 it != layers.end(); it++)
3028 getLayerShapesRecursively(it->first, inOutShapes);
3032 void getLayerShapes(const ShapesVec& netInputShapes,
3034 LayerShapes& shapes)
3036 LayersShapesMap inOutShapes;
3037 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
3038 getLayerShapesRecursively(layerId, inOutShapes);
3039 shapes = inOutShapes[layerId];
3042 LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
3044 return *std::max_element(pins.begin(), pins.end());
3047 Mat getBlob(const LayerPin& pin)
3049 CV_TRACE_FUNCTION();
3052 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
3054 LayerData &ld = layers[pin.lid];
3055 if ((size_t)pin.oid >= ld.outputBlobs.size())
3057 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
3058 "the #%d was requested", ld.name.c_str(),
3059 ld.outputBlobs.size(), pin.oid));
3061 if (preferableTarget != DNN_TARGET_CPU)
3063 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
3064 // Transfer data to CPU if it's require.
3065 ld.outputBlobsWrappers[pin.oid]->copyToHost();
3068 if (ld.outputBlobs[pin.oid].depth() == CV_16S)
3070 convertFp16(ld.outputBlobs[pin.oid], output_blob);
3074 return ld.outputBlobs[pin.oid];
3077 Mat getBlob(String outputName)
3079 return getBlob(getPinByAlias(outputName));
3083 AsyncArray getBlobAsync(const LayerPin& pin)
3085 CV_TRACE_FUNCTION();
3086 #ifdef HAVE_INF_ENGINE
3088 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
3090 LayerData &ld = layers[pin.lid];
3091 if ((size_t)pin.oid >= ld.outputBlobs.size())
3093 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
3094 "the #%d was requested", ld.name.c_str(),
3095 ld.outputBlobs.size(), pin.oid));
3097 if (preferableTarget != DNN_TARGET_CPU)
3099 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
3100 // Transfer data to CPU if it's require.
3101 ld.outputBlobsWrappers[pin.oid]->copyToHost();
3103 CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
3105 if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) {
3106 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3107 Ptr<InfEngineBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<InfEngineBackendWrapper>();
3108 return std::move(wrapper->futureMat);
3110 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3113 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
3115 #ifdef HAVE_DNN_NGRAPH
3116 Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
3117 return std::move(wrapper->futureMat);
3119 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
3122 #endif // HAVE_INF_ENGINE
3123 CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 backend is required");
3126 AsyncArray getBlobAsync(String outputName)
3128 return getBlobAsync(getPinByAlias(outputName));
3132 #ifdef HAVE_INF_ENGINE
3134 Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
3139 void dumpNetworkToFile()
3141 #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
3142 string dumpFileNameBase = getDumpFileNameBase();
3143 string dumpFileName = dumpFileNameBase + ".dot";
3146 string dumpStr = dump();
3147 std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary);
3150 catch (const std::exception& e)
3152 std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
3153 out << "Exception: " << e.what() << std::endl;
3157 std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
3158 out << "Can't dump: unknown exception" << std::endl;
3164 Net::Net() : impl(new Net::Impl)
3168 #ifdef HAVE_INF_ENGINE
3170 Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
3172 CV_TRACE_FUNCTION();
3174 CV_TRACE_REGION("register_inputs");
3176 std::vector<String> inputsNames;
3177 std::vector<MatShape> inp_shapes;
3178 for (auto& it : ieNet.getInputsInfo())
3180 inputsNames.push_back(it.first);
3181 std::vector<size_t> dims = it.second->getTensorDesc().getDims();
3182 inp_shapes.push_back(std::vector<int>(dims.begin(), dims.end()));
3186 cvNet.setInputsNames(inputsNames);
3188 // set empty input to determine input shapes
3189 for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id)
3191 cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]);
3194 CV_TRACE_REGION_NEXT("backendNode");
3196 Ptr<BackendNode> backendNode;
3197 #ifdef HAVE_DNN_NGRAPH
3198 if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
3200 auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{});
3201 Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
3202 backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
3203 backendNode = backendNodeNGraph;
3208 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3209 Ptr<InfEngineBackendNode> backendNodeNN(new InfEngineBackendNode(InferenceEngine::Builder::Layer("")));
3210 backendNodeNN->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
3211 backendNode = backendNodeNN;
3213 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3217 CV_TRACE_REGION_NEXT("register_outputs");
3219 #ifdef HAVE_DNN_NGRAPH
3220 auto ngraphFunction = ieNet.getFunction();
3221 #if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
3222 std::list< std::shared_ptr<ngraph::Node> > ngraphOperations;
3224 std::vector< std::shared_ptr<ngraph::Node> > ngraphOperations;
3228 ngraphOperations = ngraphFunction->get_ops();
3232 for (auto& it : ieNet.getOutputsInfo())
3234 CV_TRACE_REGION("output");
3235 const auto& outputName = it.first;
3238 int lid = cvNet.addLayer(it.first, "", lp);
3240 LayerData& ld = cvNet.impl->layers[lid];
3242 #ifdef HAVE_DNN_NGRAPH
3243 if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
3245 Ptr<Layer> cvLayer(new NgraphBackendLayer(ieNet));
3246 cvLayer->name = outputName;
3247 cvLayer->type = "_unknown_";
3249 auto process_layer = [&](const std::string& name) -> bool
3253 CV_TRACE_REGION("ngraph_function");
3254 for (const auto& op : ngraphOperations)
3257 if (op->get_friendly_name() == name)
3259 const std::string typeName = op->get_type_info().name;
3260 cvLayer->type = typeName;
3268 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
3269 CV_Error(Error::StsNotImplemented, "This OpenCV version is built with Inference Engine which has dropped IR v7 support");
3271 CV_TRACE_REGION("legacy_cnn_layer");
3274 InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(name.c_str());
3277 cvLayer->type = ieLayer->type;
3280 catch (const std::exception& e)
3283 CV_LOG_DEBUG(NULL, "IE layer extraction failure: '" << name << "' - " << e.what());
3291 bool found = process_layer(outputName);
3294 auto pos = outputName.rfind('.'); // cut port number: ".0"
3295 if (pos != std::string::npos)
3297 std::string layerName = outputName.substr(0, pos);
3298 found = process_layer(layerName);
3302 CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'");
3304 ld.layerInstance = cvLayer;
3305 ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode;
3310 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3311 Ptr<Layer> cvLayer(new InfEngineBackendLayer(ieNet));
3313 InferenceEngine::CNNLayerPtr ieLayer;
3316 ieLayer = ieNet.getLayerByName(outputName.c_str());
3320 auto pos = outputName.rfind('.'); // cut port number: ".0"
3321 if (pos != std::string::npos)
3323 std::string layerName = outputName.substr(0, pos);
3324 ieLayer = ieNet.getLayerByName(layerName.c_str());
3329 cvLayer->name = outputName;
3330 cvLayer->type = ieLayer->type;
3331 ld.layerInstance = cvLayer;
3333 ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019] = backendNode;
3335 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3339 for (int i = 0; i < inputsNames.size(); ++i)
3340 cvNet.connect(0, i, lid, i);
3343 CV_TRACE_REGION_NEXT("finalize");
3345 cvNet.setPreferableBackend(getInferenceEngineBackendTypeParam());
3347 cvNet.impl->skipInfEngineInit = true;
3350 #endif // HAVE_INF_ENGINE
3352 Net Net::readFromModelOptimizer(const String& xml, const String& bin)
3354 CV_TRACE_FUNCTION();
3355 #ifndef HAVE_INF_ENGINE
3356 CV_UNUSED(xml); CV_UNUSED(bin);
3357 CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
3359 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
3360 InferenceEngine::CNNNetReader reader;
3361 reader.ReadNetwork(xml);
3362 reader.ReadWeights(bin);
3364 InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
3366 InferenceEngine::Core& ie = getCore("");
3367 InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
3370 return Impl::createNetworkFromModelOptimizer(ieNet);
3371 #endif // HAVE_INF_ENGINE
3374 Net Net::readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights)
3376 CV_TRACE_FUNCTION();
3377 CV_Assert(!bufferModelConfig.empty());
3378 CV_Assert(!bufferWeights.empty());
3379 return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(),
3380 bufferWeights.data(), bufferWeights.size());
3383 Net Net::readFromModelOptimizer(
3384 const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
3385 const uchar* bufferWeightsPtr, size_t bufferWeightsSize
3388 CV_TRACE_FUNCTION();
3389 #ifndef HAVE_INF_ENGINE
3390 CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr);
3391 CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize);
3392 CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
3395 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
3396 InferenceEngine::CNNNetReader reader;
3400 reader.ReadNetwork(bufferModelConfigPtr, bufferModelConfigSize);
3402 InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
3403 InferenceEngine::TBlob<uint8_t>::Ptr weightsBlobPtr(new InferenceEngine::TBlob<uint8_t>(tensorDesc));
3404 weightsBlobPtr->allocate();
3405 std::memcpy(weightsBlobPtr->buffer(), (uchar*)bufferWeightsPtr, bufferWeightsSize);
3406 reader.SetWeights(weightsBlobPtr);
3408 catch (const std::exception& e)
3410 CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
3413 InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
3415 InferenceEngine::Core& ie = getCore("");
3417 std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize);
3419 InferenceEngine::CNNNetwork ieNet;
3422 InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
3423 InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob<uint8_t>(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize);
3425 ieNet = ie.ReadNetwork(model, weights_blob);
3427 catch (const std::exception& e)
3429 CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
3433 return Impl::createNetworkFromModelOptimizer(ieNet);
3434 #endif // HAVE_INF_ENGINE
3442 int Net::addLayer(const String &name, const String &type, LayerParams ¶ms)
3444 CV_TRACE_FUNCTION();
3446 if (impl->getLayerId(name) >= 0)
3448 CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
3452 int id = ++impl->lastLayerId;
3453 impl->layerNameToId.insert(std::make_pair(name, id));
3454 impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
3459 int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms)
3461 CV_TRACE_FUNCTION();
3463 int prvLid = impl->lastLayerId;
3464 int newLid = this->addLayer(name, type, params);
3465 this->connect(prvLid, 0, newLid, 0);
3469 void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
3471 CV_TRACE_FUNCTION();
3473 impl->connect(outLayerId, outNum, inpLayerId, inpNum);
3476 void Net::connect(String _outPin, String _inPin)
3478 CV_TRACE_FUNCTION();
3480 LayerPin outPin = impl->getPinByAlias(_outPin);
3481 LayerPin inpPin = impl->getPinByAlias(_inPin);
3483 CV_Assert(outPin.valid() && inpPin.valid());
3485 impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
3488 Mat Net::forward(const String& outputName)
3490 CV_TRACE_FUNCTION();
3491 CV_Assert(!empty());
3493 String layerName = outputName;
3495 if (layerName.empty())
3497 std::vector<String> layerNames = getLayerNames();
3498 CV_Assert(!layerNames.empty());
3499 layerName = layerNames.back();
3502 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
3503 impl->setUpNet(pins);
3504 impl->forwardToLayer(impl->getLayerData(layerName));
3506 return impl->getBlob(layerName);
3509 AsyncArray Net::forwardAsync(const String& outputName)
3511 CV_TRACE_FUNCTION();
3512 CV_Assert(!empty());
3515 String layerName = outputName;
3517 if (layerName.empty())
3519 std::vector<String> layerNames = getLayerNames();
3520 CV_Assert(!layerNames.empty());
3521 layerName = layerNames.back();
3524 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
3525 impl->setUpNet(pins);
3527 if (!(impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
3528 CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backends only");
3530 impl->isAsync = true;
3531 impl->forwardToLayer(impl->getLayerData(layerName));
3532 impl->isAsync = false;
3534 return impl->getBlobAsync(layerName);
3536 CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11");
3540 void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
3542 CV_TRACE_FUNCTION();
3543 CV_Assert(!empty());
3545 String layerName = outputName;
3547 if (layerName.empty())
3549 std::vector<String> layerNames = getLayerNames();
3550 CV_Assert(!layerNames.empty());
3551 layerName = layerNames.back();
3554 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
3555 impl->setUpNet(pins);
3556 impl->forwardToLayer(impl->getLayerData(layerName));
3558 LayerPin pin = impl->getPinByAlias(layerName);
3559 LayerData &ld = impl->layers[pin.lid];
3561 if (outputBlobs.isUMat())
3563 impl->getBlob(layerName).copyTo(outputBlobs);
3565 else if (outputBlobs.isMat())
3567 outputBlobs.assign(impl->getBlob(layerName));
3569 else if (outputBlobs.isMatVector())
3571 if (impl->preferableTarget != DNN_TARGET_CPU)
3573 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
3575 CV_Assert(!ld.outputBlobsWrappers[i].empty());
3576 ld.outputBlobsWrappers[i]->copyToHost();
3579 if (ld.outputBlobs[0].depth() == CV_32F)
3581 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
3582 outputvec = ld.outputBlobs;
3584 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
3585 outputvec.resize(ld.outputBlobs.size());
3586 for (int i = 0; i < outputvec.size(); i++)
3587 convertFp16(ld.outputBlobs[i], outputvec[i]);
3590 else if (outputBlobs.isUMatVector())
3592 std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
3595 if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
3596 IS_DNN_OPENCL_TARGET(impl->preferableTarget))
3598 if (impl->preferableTarget == DNN_TARGET_OPENCL)
3599 outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
3600 else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
3602 std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
3603 outputvec.resize(out_vec.size());
3604 for (int i = 0; i < out_vec.size(); i++)
3605 convertFp16(out_vec[i], outputvec[i]);
3611 outputvec.resize(ld.outputBlobs.size());
3612 for (int i = 0; i < outputvec.size(); ++i)
3613 ld.outputBlobs[i].copyTo(outputvec[i]);
3618 void Net::forward(OutputArrayOfArrays outputBlobs,
3619 const std::vector<String>& outBlobNames)
3621 CV_TRACE_FUNCTION();
3623 std::vector<LayerPin> pins;
3624 for (int i = 0; i < outBlobNames.size(); i++)
3626 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
3629 impl->setUpNet(pins);
3631 LayerPin out = impl->getLatestLayerPin(pins);
3633 impl->forwardToLayer(impl->getLayerData(out.lid));
3635 std::vector<Mat> matvec;
3636 for (int i = 0; i < pins.size(); i++)
3638 matvec.push_back(impl->getBlob(pins[i]));
3641 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
3645 void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
3646 const std::vector<String>& outBlobNames)
3648 CV_TRACE_FUNCTION();
3650 std::vector<LayerPin> pins;
3651 for (int i = 0; i < outBlobNames.size(); i++)
3653 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
3656 impl->setUpNet(pins);
3658 LayerPin out = impl->getLatestLayerPin(pins);
3660 impl->forwardToLayer(impl->getLayerData(out.lid));
3662 outputBlobs.resize(outBlobNames.size());
3663 for (int i = 0; i < outBlobNames.size(); i++)
3665 std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
3666 outputBlobs[i].resize(lp.size());
3667 for (int j = 0; j < lp.size(); j++)
3669 outputBlobs[i][j] = impl->getBlob(lp[j]);
3674 void Net::setPreferableBackend(int backendId)
3676 CV_TRACE_FUNCTION();
3677 CV_TRACE_ARG(backendId);
3679 #ifdef HAVE_INF_ENGINE
3680 if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
3681 backendId = getInferenceEngineBackendTypeParam();
3684 if( impl->preferableBackend != backendId )
3686 impl->preferableBackend = backendId;
3687 impl->netWasAllocated = false;
3692 void Net::setPreferableTarget(int targetId)
3694 CV_TRACE_FUNCTION();
3695 CV_TRACE_ARG(targetId);
3697 if( impl->preferableTarget != targetId )
3699 impl->preferableTarget = targetId;
3700 if (IS_DNN_OPENCL_TARGET(targetId))
3703 #ifdef HAVE_INF_ENGINE
3704 if (impl->preferableBackend == DNN_BACKEND_OPENCV)
3706 if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
3707 impl->preferableBackend == DNN_BACKEND_OPENCV)
3708 #endif // HAVE_INF_ENGINE
3709 impl->preferableTarget = DNN_TARGET_CPU;
3711 bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
3712 if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
3713 impl->preferableTarget = DNN_TARGET_OPENCL;
3716 impl->netWasAllocated = false;
3721 void Net::setInputsNames(const std::vector<String> &inputBlobNames)
3723 CV_TRACE_FUNCTION();
3725 impl->netInputLayer->setNames(inputBlobNames);
3728 void Net::setInputShape(const String &inputName, const MatShape& shape)
3730 CV_TRACE_FUNCTION();
3732 impl->netInputLayer->setInputShape(inputName, shape);
3735 void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
3737 CV_TRACE_FUNCTION();
3738 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3742 pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
3745 CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
3747 Mat blob_ = blob.getMat(); // can't use InputArray directly due MatExpr stuff
3748 MatShape blobShape = shape(blob_);
3752 CV_Assert(!impl->netInputLayer.empty());
3753 const DataLayer& netInputLayer = *impl->netInputLayer.get();
3754 if (!netInputLayer.shapes.empty())
3756 CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), "");
3757 const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid];
3758 if (!inputShapeLimitation.empty())
3760 CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), "");
3761 #if 0 // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0
3762 const size_t dims = inputShapeLimitation.size();
3763 for (size_t dim = 0; dim < dims; dim++)
3765 if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1)
3766 continue; // don't limit batch
3767 CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], "");
3774 LayerData &ld = impl->layers[pin.lid];
3775 const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
3776 ld.outputBlobs.resize(numInputs);
3777 ld.outputBlobsWrappers.resize(numInputs);
3778 impl->netInputLayer->inputsData.resize(numInputs);
3779 impl->netInputLayer->scaleFactors.resize(numInputs);
3780 impl->netInputLayer->means.resize(numInputs);
3782 MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
3783 bool oldShape = prevShape == blobShape;
3785 blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
3787 ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
3789 if (!ld.outputBlobsWrappers[pin.oid].empty())
3791 ld.outputBlobsWrappers[pin.oid]->setHostDirty();
3793 impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
3794 impl->netInputLayer->means[pin.oid] = mean;
3795 impl->netWasAllocated = impl->netWasAllocated && oldShape;
3798 Mat Net::getParam(LayerId layer, int numParam)
3800 LayerData &ld = impl->getLayerData(layer);
3801 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
3802 CV_Assert(numParam < (int)layerBlobs.size());
3803 return layerBlobs[numParam];
3806 void Net::setParam(LayerId layer, int numParam, const Mat &blob)
3808 LayerData &ld = impl->getLayerData(layer);
3810 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
3811 CV_Assert(numParam < (int)layerBlobs.size());
3812 //we don't make strong checks, use this function carefully
3813 layerBlobs[numParam] = blob;
3816 int Net::getLayerId(const String &layer)
3818 return impl->getLayerId(layer);
3822 string dumpLayerParameterSize(const string& name, const LayerParams& lp)
3824 std::ostringstream out(name, std::ios::ate);
3825 DictValue param = lp.get(name);
3826 switch (param.size())
3828 case 1: out << " : "; break;
3829 case 2: out << " (HxW): "; break;
3830 case 3: out << " (DxHxW): "; break;
3832 CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size()));
3835 for (size_t i = 0; i < param.size(); i++)
3839 out << param.get<int>(i);
3846 CV_Assert(!empty());
3848 bool hasInput = !impl->netInputLayer->inputsData.empty();
3852 if (!impl->netWasAllocated)
3856 return impl->dump();
3859 string Net::Impl::dump()
3861 bool hasInput = !netInputLayer->inputsData.empty();
3863 std::ostringstream out;
3864 const std::map<int, LayerData>& map = layers;
3866 Backend prefBackend = (Backend)preferableBackend;
3867 std::vector<std::vector<int> > skippedLayers;
3868 std::vector<int> skipId;
3869 std::vector<int> allLayers(map.size(), -1);
3871 Ptr<BackendNode> prevNode;
3872 for (std::map<int, LayerData>::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit)
3874 std::map<int, Ptr<BackendNode> >::const_iterator itBackend = rit->second.backendNodes.find(prefBackend);
3875 if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() ||
3876 itBackend->second.empty())
3878 if (rit->second.skip)
3879 skipId.push_back(rit->first);
3880 else if (!skipId.empty())
3882 if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty())
3883 skipId.push_back(rit->first);
3884 else if (idPrev != -1)
3885 skipId.push_back(idPrev);
3887 std::sort(skipId.begin(), skipId.end());
3888 for (int i = 0; i < skipId.size(); i++) {
3889 allLayers[skipId[i]] = skippedLayers.size();
3891 skippedLayers.push_back(skipId);
3897 if (itBackend->second == prevNode)
3898 skipId.push_back(idPrev);
3899 else if (!skipId.empty())
3901 skipId.push_back(idPrev);
3902 std::sort(skipId.begin(), skipId.end());
3903 for (int i = 0; i < skipId.size(); i++) {
3904 allLayers[skipId[i]] = skippedLayers.size();
3906 skippedLayers.push_back(skipId);
3909 idPrev = rit->first;
3910 prevNode = itBackend->second;
3913 string colors[] = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462"};
3915 switch (prefBackend)
3917 case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break;
3918 case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break;
3919 case DNN_BACKEND_INFERENCE_ENGINE: // fallthru
3920 case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: backend = "DLIE/"; break;
3921 case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "NGRAPH/"; break;
3922 case DNN_BACKEND_OPENCV: backend = "OCV/"; break;
3923 // don't use default:
3925 out << "digraph G {\n";
3927 for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
3929 const LayerData& ld = it->second;
3930 string name = ld.params.name;
3931 std::vector<int> clusterIds(1, it->first);
3932 if (allLayers[it->first] == -1 && !name.empty())
3934 out << "\t\"" << name << "\" [label=\"";
3936 else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0])
3940 else // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0]
3942 int cluster = allLayers[it->first];
3943 out << "\t\"" << "cluster_" << cluster << "\" [label=\"{";
3944 clusterIds = skippedLayers[allLayers[it->first]]; // vertices in current cluster
3946 for (int i = 0; i < clusterIds.size(); i++)
3948 CV_DbgAssert(map.find(clusterIds[i]) != map.end());
3949 const LayerParams& lp = map.find(clusterIds[i])->second.params;
3950 if (!lp.name.empty()) {
3954 out << lp.name << "\\n" << lp.type << "\\n"; // align center
3955 if (lp.has("kernel_size"))
3957 string kernel = dumpLayerParameterSize("kernel_size", lp);
3959 out << "\\l"; // align left
3960 } else if (lp.has("kernel_h") && lp.has("kernel_w")) {
3961 DictValue h = lp.get("kernel_h");
3962 DictValue w = lp.get("kernel_w");
3963 out << "kernel (HxW): " << h << " x " << w;
3964 out << "\\l"; // align left
3966 if (lp.has("stride")) {
3967 string stride = dumpLayerParameterSize("stride", lp);
3969 out << "\\l"; // align left
3970 } else if (lp.has("stride_h") && lp.has("stride_w")) {
3971 DictValue h = lp.get("stride_h");
3972 DictValue w = lp.get("stride_w");
3973 out << "stride (HxW): " << h << " x " << w;
3974 out << "\\l"; // align left
3976 if (lp.has("dilation")) {
3977 string dilation = dumpLayerParameterSize("dilation", lp);
3979 out << "\\l"; // align left
3980 } else if (lp.has("dilation_h") && lp.has("dilation_w")) {
3981 DictValue h = lp.get("dilation_h");
3982 DictValue w = lp.get("dilation_w");
3983 out << "dilation (HxW): " << h << " x " << w;
3984 out << "\\l"; // align left
3986 if (lp.has("pad")) {
3987 DictValue pad = lp.get("pad");
3991 case 1: out << ": " << pad; break;
3993 out << "(HxW): (" << pad.get<int>(0) << " x " << pad.get<int>(1) << ")";
3996 out << "(HxW): (" << pad.get<int>(0) << ", " << pad.get<int>(2)
3997 << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(3) << ")";
4000 out << "(DxHxW): (" << pad.get<int>(0) << ", " << pad.get<int>(3)
4001 << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(4)
4002 << ") x (" << pad.get<int>(2) << ", " << pad.get<int>(5) << ")";
4004 default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size()));
4006 out << "\\l"; // align left
4007 } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) {
4008 DictValue l = lp.get("pad_l");
4009 DictValue t = lp.get("pad_t");
4010 DictValue r = lp.get("pad_r");
4011 DictValue b = lp.get("pad_b");
4012 out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")";
4013 out << "\\l"; // align left
4015 else if (lp.has("pooled_w") || lp.has("pooled_h")) {
4016 DictValue h = lp.get("pooled_h");
4017 DictValue w = lp.get("pooled_w");
4018 out << "pad pooled (HxW): " << h << " x " << w;
4019 out << "\\l"; // align left
4021 if (lp.has("pool")) {
4022 out << "pool: " << lp.get("pool");
4023 out << "\\l"; // align left
4025 if (lp.has("global_pooling")) {
4026 out << "global_pooling: " << lp.get("global_pooling");
4027 out << "\\l"; // align left
4029 if (lp.has("group")) {
4030 out << "group: " << lp.get("group");
4031 out << "\\l"; // align left
4035 if (!ld.outputBlobs.empty())
4037 out << "output: " << ld.outputBlobs[0].size;
4038 out << "\\l"; // align left
4041 Ptr<BackendNode> layerBackend;
4042 std::map<int, Ptr<BackendNode> >::const_iterator ibn = ld.backendNodes.find(prefBackend);
4043 if (ibn != ld.backendNodes.end())
4044 layerBackend = ibn->second;
4045 out << (!layerBackend.empty() ? backend : "OCV/");
4047 const Target target = ld.layerInstance.empty()
4049 : (Target)(ld.layerInstance->preferableTarget); // TODO fix preferableTarget type
4052 case DNN_TARGET_CPU: out << "CPU"; colorId = layerBackend.empty() ? 0 : 5; break;
4053 case DNN_TARGET_OPENCL: out << "OCL"; colorId = 1; break;
4054 case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16"; colorId = 2; break;
4055 case DNN_TARGET_MYRIAD: out << "MYRIAD"; colorId = 3; break;
4056 case DNN_TARGET_FPGA: out << "FPGA"; colorId = 4; break;
4057 // don't use default:
4059 out << "\\n"; // align center
4060 out << ((clusterIds.size() == 1)? "\" " : " }\" ");
4061 out << "fillcolor=\"" << colors[colorId] << "\" ";
4062 out << "style=filled ";
4063 out << "shape=" << ((clusterIds.size() == 1)? "box" : "record") << "]\n";
4067 int inputsSize = hasInput ? netInputLayer->outNames.size() : 0;
4068 for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
4070 const LayerData& ld = it->second;
4071 if (allLayers[it->first] == -1) // node
4073 for (int i = 0; i < ld.consumers.size(); i++)
4075 int outId = ld.consumers[i].lid;
4076 if (it == map.begin() && inputsSize > 1)
4077 out << "\t\"" << ld.name << "_" << i << "\"" << " -> ";
4079 out << "\t\"" << ld.name << "\"" << " -> ";
4080 if (allLayers[outId] == -1) // node
4082 CV_DbgAssert(map.find(outId) != map.end());
4083 out << "\"" << map.find(outId)->second.name << "\"\n";
4087 out << "\"" << "cluster_" << allLayers[outId] << "\"\n";
4091 else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster
4093 for (int i = 0; i < ld.consumers.size(); i++)
4095 int outId = ld.consumers[i].lid;
4096 if (allLayers[outId] == -1) // node
4098 CV_DbgAssert(map.find(outId) != map.end());
4099 out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
4100 out << "\"" << map.find(outId)->second.name << "\"\n";
4102 else if (allLayers[outId] != allLayers[it->first]) { // another cluster
4103 out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
4104 out << "\"" << "cluster_" << allLayers[outId] << "\"\n";
4113 void Net::dumpToFile(const String& path) {
4114 std::ofstream file(path.c_str());
4119 Ptr<Layer> Net::getLayer(LayerId layerId)
4121 LayerData &ld = impl->getLayerData(layerId);
4122 return ld.getLayerInstance();
4125 std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
4127 LayerData &ld = impl->getLayerData(layerId);
4129 std::vector<Ptr<Layer> > inputLayers;
4130 inputLayers.reserve(ld.inputBlobsId.size());
4131 for (int i = 0; i < ld.inputBlobsId.size(); ++i) {
4132 inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid));
4137 std::vector<String> Net::getLayerNames() const
4139 CV_TRACE_FUNCTION();
4141 std::vector<String> res;
4142 res.reserve(impl->layers.size());
4144 Impl::MapIdToLayerData::iterator it;
4145 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
4147 if (it->second.id) //skip Data layer
4148 res.push_back(it->second.name);
4154 bool Net::empty() const
4156 return impl->layers.size() <= 1; //first layer is default Data layer
4159 std::vector<int> Net::getUnconnectedOutLayers() const
4161 std::vector<int> layersIds;
4163 Impl::MapIdToLayerData::iterator it;
4164 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
4166 int lid = it->first;
4167 LayerData &ld = it->second;
4169 if (ld.requiredOutputs.size() == 0)
4170 layersIds.push_back(lid);
4176 std::vector<String> Net::getUnconnectedOutLayersNames() const
4178 std::vector<int> ids = getUnconnectedOutLayers();
4179 const size_t n = ids.size();
4180 std::vector<String> names(n);
4181 for (size_t i = 0; i < n; ++i)
4183 names[i] = impl->layers[ids[i]].name;
4188 void Net::getLayersShapes(const ShapesVec& netInputShapes,
4189 std::vector<int>& layersIds,
4190 std::vector<ShapesVec>& inLayersShapes,
4191 std::vector<ShapesVec>& outLayersShapes) const
4194 inLayersShapes.clear();
4195 outLayersShapes.clear();
4197 Impl::LayersShapesMap inOutShapes;
4198 impl->getLayersShapes(netInputShapes, inOutShapes);
4200 for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
4201 it != inOutShapes.end(); it++)
4203 layersIds.push_back(it->first);
4204 inLayersShapes.push_back(it->second.in);
4205 outLayersShapes.push_back(it->second.out);
4209 void Net::getLayersShapes(const MatShape& netInputShape,
4210 std::vector<int>& layerIds,
4211 std::vector<ShapesVec>& inLayersShapes,
4212 std::vector<ShapesVec>& outLayersShapes) const
4214 getLayersShapes(ShapesVec(1, netInputShape),
4215 layerIds, inLayersShapes, outLayersShapes);
4218 void Net::getLayerShapes(const MatShape& netInputShape,
4220 ShapesVec& inLayerShapes,
4221 ShapesVec& outLayerShapes) const
4223 getLayerShapes(ShapesVec(1, netInputShape),
4224 layerId, inLayerShapes, outLayerShapes);
4228 void Net::getLayerShapes(const ShapesVec& netInputShapes,
4230 ShapesVec& inLayerShapes,
4231 ShapesVec& outLayerShapes) const
4234 impl->getLayerShapes(netInputShapes, layerId, shapes);
4235 inLayerShapes = shapes.in;
4236 outLayerShapes = shapes.out;
4239 int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
4241 CV_TRACE_FUNCTION();
4244 std::vector<int> ids;
4245 std::vector<std::vector<MatShape> > inShapes, outShapes;
4246 getLayersShapes(netInputShapes, ids, inShapes, outShapes);
4247 CV_Assert(inShapes.size() == outShapes.size());
4248 CV_Assert(inShapes.size() == ids.size());
4250 for(int i = 0; i < ids.size(); i++)
4252 flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i],
4259 int64 Net::getFLOPS(const MatShape& netInputShape) const
4261 return getFLOPS(std::vector<MatShape>(1, netInputShape));
4264 int64 Net::getFLOPS(const int layerId,
4265 const std::vector<MatShape>& netInputShapes) const
4267 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
4268 CV_Assert(layer != impl->layers.end());
4271 impl->getLayerShapes(netInputShapes, layerId, shapes);
4273 return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
4276 int64 Net::getFLOPS(const int layerId,
4277 const MatShape& netInputShape) const
4279 return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
4282 void Net::getLayerTypes(std::vector<String>& layersTypes) const
4284 layersTypes.clear();
4286 std::map<String, int> layers;
4287 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
4288 it != impl->layers.end(); it++)
4290 if (layers.find(it->second.type) == layers.end())
4291 layers[it->second.type] = 0;
4292 layers[it->second.type]++;
4295 for (std::map<String, int>::iterator it = layers.begin();
4296 it != layers.end(); it++)
4298 layersTypes.push_back(it->first);
4302 int Net::getLayersCount(const String& layerType) const
4305 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
4306 it != impl->layers.end(); it++)
4308 if (it->second.type == layerType)
4314 void Net::getMemoryConsumption(const int layerId,
4315 const std::vector<MatShape>& netInputShapes,
4316 size_t& weights, size_t& blobs) const
4318 CV_TRACE_FUNCTION();
4320 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
4321 CV_Assert(layer != impl->layers.end());
4323 weights = blobs = 0;
4325 for(int i = 0; i < layer->second.params.blobs.size(); i++)
4327 const Mat& weightsBlob = layer->second.params.blobs[i];
4328 weights += weightsBlob.total()*weightsBlob.elemSize();
4331 ShapesVec inLayerShapes, outLayerShapes;
4332 getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
4333 for(int i = 0; i < outLayerShapes.size(); i++)
4335 blobs += total(outLayerShapes[i]) * sizeof(float);
4339 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
4340 size_t& weights, size_t& blobs) const
4342 CV_TRACE_FUNCTION();
4344 std::vector<int> layerIds;
4345 std::vector<size_t> w, b;
4346 getMemoryConsumption(netInputShapes, layerIds, w, b);
4348 weights = blobs = 0;
4349 for(int i = 0; i < layerIds.size(); i++)
4356 void Net::getMemoryConsumption(const int layerId,
4357 const MatShape& netInputShape,
4358 size_t& weights, size_t& blobs) const
4360 getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
4364 void Net::getMemoryConsumption(const MatShape& netInputShape,
4365 size_t& weights, size_t& blobs) const
4367 getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
4371 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
4372 std::vector<int>& layerIds, std::vector<size_t>& weights,
4373 std::vector<size_t>& blobs) const
4375 CV_TRACE_FUNCTION();
4381 std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
4383 getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
4385 for(int i = 0; i < layerIds.size(); i++)
4388 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
4389 CV_Assert(layer != impl->layers.end());
4391 for(int j = 0; j < layer->second.params.blobs.size(); j++)
4393 const Mat& weightsBlob = layer->second.params.blobs[j];
4394 w += weightsBlob.total()*weightsBlob.elemSize();
4397 for(int j = 0; j < outLayerShapes[i].size(); j++)
4399 b += total(outLayerShapes[i][j]) * sizeof(float);
4402 weights.push_back(w);
4407 void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
4408 std::vector<size_t>& weights, std::vector<size_t>& blobs) const
4410 getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
4414 void Net::enableFusion(bool fusion)
4416 if( impl->fusion != fusion )
4418 impl->fusion = fusion;
4419 impl->netWasAllocated = false;
4424 void Net::setHalideScheduler(const String& scheduler)
4426 CV_TRACE_FUNCTION();
4427 CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
4429 impl->halideConfigFile = scheduler;
4432 int64 Net::getPerfProfile(std::vector<double>& timings)
4434 timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
4435 int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0);
4439 //////////////////////////////////////////////////////////////////////////
4441 Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
4443 Layer::Layer(const LayerParams ¶ms)
4444 : blobs(params.blobs), name(params.name), type(params.type)
4446 preferableTarget = DNN_TARGET_CPU;
4449 void Layer::setParamsFrom(const LayerParams ¶ms)
4451 blobs = params.blobs;
4456 int Layer::inputNameToIndex(String)
4461 int Layer::outputNameToIndex(const String&)
4466 bool Layer::supportBackend(int backendId)
4468 return backendId == DNN_BACKEND_OPENCV;
4471 Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
4473 CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type +
4474 " layers is not defined.");
4475 return Ptr<BackendNode>();
4478 Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
4480 CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
4481 " layers is not defined.");
4482 return Ptr<BackendNode>();
4485 Ptr<BackendNode> Layer::initNgraph(const std::vector<Ptr<BackendWrapper> > & inputs, const std::vector<Ptr<BackendNode> >& nodes)
4487 CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
4488 " layers is not defined.");
4489 return Ptr<BackendNode>();
4492 void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
4493 const std::vector<Mat> &outputs, int targetId) const
4496 CV_TRACE_FUNCTION();
4498 Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
4499 xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
4500 Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
4502 int outW, outH, outC, outN;
4503 getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
4505 if (targetId == DNN_TARGET_CPU)
4507 if (outW == 1 && outH == 1)
4509 if (outC + outN == 1)
4513 top.split(c, co, ci, 8)
4514 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
4518 top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
4525 top.reorder(x, c, y)
4526 .split(y, yo, yi, 2)
4530 .vectorize(x, outW >= 16 ? 16 : outW);
4534 else if (targetId == DNN_TARGET_OPENCL)
4536 if (outW == 1 && outH == 1)
4538 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
4539 top.split(c, co, ci, c_split)
4540 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
4546 int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
4547 int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
4548 // Supported vectorization widths: 2, 3, 4, 8, 16
4549 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
4550 top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
4551 .split(c, co, ci, c_split)
4552 .gpu_blocks(xo, yo, co)
4553 .gpu_threads(xi, yi)
4554 .reorder(xi, yi, ci, xo, yo, co)
4559 CV_Error(Error::StsNotImplemented, "Unknown target identifier");
4560 #endif // HAVE_HALIDE
4563 Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
4565 return Ptr<BackendNode>();
4568 bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
4569 bool Layer::tryFuse(Ptr<Layer>&) { return false; }
4570 void Layer::getScaleShift(Mat& scale, Mat& shift) const
4576 void Layer::unsetAttached()
4578 setActivation(Ptr<ActivationLayer>());
4581 template <typename T>
4582 static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
4584 pv.resize(v.size());
4585 for (size_t i = 0; i < v.size(); i++)
4586 pv[i] = const_cast<T*>(&v[i]);
4589 void Layer::finalize(const std::vector<Mat> &inputs, std::vector<Mat> &outputs)
4591 CV_TRACE_FUNCTION();
4592 this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
4595 void Layer::finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
4597 CV_UNUSED(input);CV_UNUSED(output);
4600 void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
4602 CV_TRACE_FUNCTION();
4603 std::vector<Mat> inputs, outputs;
4604 inputs_arr.getMatVector(inputs);
4605 outputs_arr.getMatVector(outputs);
4607 std::vector<Mat*> inputsp;
4608 vecToPVec(inputs, inputsp);
4609 this->finalize(inputsp, outputs);
4612 std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
4614 CV_TRACE_FUNCTION();
4616 std::vector<Mat> outputs;
4617 this->finalize(inputs, outputs);
4621 void Layer::forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
4623 // We kept this method for compatibility. DNN calls it now only to support users' implementations.
4626 void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
4628 CV_TRACE_FUNCTION();
4629 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
4631 Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
4634 void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
4636 CV_TRACE_FUNCTION();
4637 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
4639 if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
4641 std::vector<UMat> inputs;
4642 std::vector<UMat> outputs;
4643 std::vector<UMat> internals;
4645 std::vector<UMat> orig_inputs;
4646 std::vector<UMat> orig_outputs;
4647 std::vector<UMat> orig_internals;
4649 inputs_arr.getUMatVector(orig_inputs);
4650 outputs_arr.getUMatVector(orig_outputs);
4651 internals_arr.getUMatVector(orig_internals);
4653 inputs.resize(orig_inputs.size());
4654 for (size_t i = 0; i < orig_inputs.size(); i++)
4655 convertFp16(orig_inputs[i], inputs[i]);
4657 outputs.resize(orig_outputs.size());
4658 for (size_t i = 0; i < orig_outputs.size(); i++)
4659 outputs[i].create(shape(orig_outputs[i]), CV_32F);
4661 internals.resize(orig_internals.size());
4662 for (size_t i = 0; i < orig_internals.size(); i++)
4663 internals[i].create(shape(orig_internals[i]), CV_32F);
4665 forward(inputs, outputs, internals);
4667 for (size_t i = 0; i < outputs.size(); i++)
4668 convertFp16(outputs[i], orig_outputs[i]);
4670 // sync results back
4671 outputs_arr.assign(orig_outputs);
4672 internals_arr.assign(orig_internals);
4675 std::vector<Mat> inpvec;
4676 std::vector<Mat> outputs;
4677 std::vector<Mat> internals;
4679 inputs_arr.getMatVector(inpvec);
4680 outputs_arr.getMatVector(outputs);
4681 internals_arr.getMatVector(internals);
4683 std::vector<Mat*> inputs(inpvec.size());
4684 for (int i = 0; i < inpvec.size(); i++)
4685 inputs[i] = &inpvec[i];
4687 this->forward(inputs, outputs, internals);
4689 // sync results back
4690 outputs_arr.assign(outputs);
4691 internals_arr.assign(internals);
4694 void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
4696 CV_TRACE_FUNCTION();
4698 this->finalize(inputs, outputs);
4699 this->forward(inputs, outputs, internals);
4704 bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
4705 const int requiredOutputs,
4706 std::vector<MatShape> &outputs,
4707 std::vector<MatShape> &internals) const
4709 CV_Assert(inputs.size());
4710 outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
4714 //////////////////////////////////////////////////////////////////////////
4716 static Mutex& getLayerFactoryMutex()
4718 static Mutex* volatile instance = NULL;
4719 if (instance == NULL)
4721 cv::AutoLock lock(getInitializationMutex());
4722 if (instance == NULL)
4723 instance = new Mutex();
4728 typedef std::map<String, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
4730 static LayerFactory_Impl& getLayerFactoryImpl_()
4732 static LayerFactory_Impl impl;
4736 static LayerFactory_Impl& getLayerFactoryImpl()
4738 static LayerFactory_Impl* volatile instance = NULL;
4739 if (instance == NULL)
4741 cv::AutoLock lock(getLayerFactoryMutex());
4742 if (instance == NULL)
4744 instance = &getLayerFactoryImpl_();
4745 initializeLayerFactory();
4751 void LayerFactory::registerLayer(const String &type, Constructor constructor)
4753 CV_TRACE_FUNCTION();
4754 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
4756 cv::AutoLock lock(getLayerFactoryMutex());
4757 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
4759 if (it != getLayerFactoryImpl().end())
4761 if (it->second.back() == constructor)
4762 CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered");
4763 it->second.push_back(constructor);
4765 getLayerFactoryImpl().insert(std::make_pair(type, std::vector<Constructor>(1, constructor)));
4768 void LayerFactory::unregisterLayer(const String &type)
4770 CV_TRACE_FUNCTION();
4771 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
4773 cv::AutoLock lock(getLayerFactoryMutex());
4775 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
4776 if (it != getLayerFactoryImpl().end())
4778 if (it->second.size() > 1)
4779 it->second.pop_back();
4781 getLayerFactoryImpl().erase(it);
4785 Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
4787 CV_TRACE_FUNCTION();
4788 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
4790 cv::AutoLock lock(getLayerFactoryMutex());
4791 LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type);
4793 if (it != getLayerFactoryImpl().end())
4795 CV_Assert(!it->second.empty());
4796 return it->second.back()(params);
4800 return Ptr<Layer>(); //NULL
4804 BackendNode::BackendNode(int backendId) : backendId(backendId) {}
4806 BackendNode::~BackendNode() {};
4808 BackendWrapper::BackendWrapper(int backendId, int targetId)
4809 : backendId(backendId), targetId(targetId) {}
4811 BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
4813 CV_Error(Error::StsNotImplemented,
4814 "Constructor of backend wrapper must be implemented");
4817 BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
4819 CV_Error(Error::StsNotImplemented,
4820 "Constructor of backend wrapper must be implemented");
4823 BackendWrapper::~BackendWrapper() {}
4825 Net readNet(const String& _model, const String& _config, const String& _framework)
4827 String framework = _framework.toLowerCase();
4828 String model = _model;
4829 String config = _config;
4830 const std::string modelExt = model.substr(model.rfind('.') + 1);
4831 const std::string configExt = config.substr(config.rfind('.') + 1);
4832 if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
4833 modelExt == "prototxt" || configExt == "prototxt")
4835 if (modelExt == "prototxt" || configExt == "caffemodel")
4836 std::swap(model, config);
4837 return readNetFromCaffe(config, model);
4839 if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
4840 modelExt == "pbtxt" || configExt == "pbtxt")
4842 if (modelExt == "pbtxt" || configExt == "pb")
4843 std::swap(model, config);
4844 return readNetFromTensorflow(model, config);
4846 if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
4847 configExt == "t7" || configExt == "net")
4849 return readNetFromTorch(model.empty() ? config : model);
4851 if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
4852 modelExt == "cfg" || configExt == "cfg")
4854 if (modelExt == "cfg" || configExt == "weights")
4855 std::swap(model, config);
4856 return readNetFromDarknet(config, model);
4858 if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
4859 modelExt == "xml" || configExt == "xml")
4861 if (modelExt == "xml" || configExt == "bin")
4862 std::swap(model, config);
4863 return readNetFromModelOptimizer(config, model);
4865 if (framework == "onnx" || modelExt == "onnx")
4867 return readNetFromONNX(model);
4869 CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
4870 model + (config.empty() ? "" : ", " + config));
4873 Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
4874 const std::vector<uchar>& bufferConfig)
4876 String framework = _framework.toLowerCase();
4877 if (framework == "caffe")
4878 return readNetFromCaffe(bufferConfig, bufferModel);
4879 else if (framework == "tensorflow")
4880 return readNetFromTensorflow(bufferModel, bufferConfig);
4881 else if (framework == "darknet")
4882 return readNetFromDarknet(bufferConfig, bufferModel);
4883 else if (framework == "torch")
4884 CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
4885 else if (framework == "dldt")
4886 return readNetFromModelOptimizer(bufferConfig, bufferModel);
4887 CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
4890 Net readNetFromModelOptimizer(const String &xml, const String &bin)
4892 return Net::readFromModelOptimizer(xml, bin);
4895 Net readNetFromModelOptimizer(const std::vector<uchar>& bufferCfg, const std::vector<uchar>& bufferModel)
4897 return Net::readFromModelOptimizer(bufferCfg, bufferModel);
4900 Net readNetFromModelOptimizer(
4901 const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
4902 const uchar* bufferWeightsPtr, size_t bufferWeightsSize
4905 return Net::readFromModelOptimizer(
4906 bufferModelConfigPtr, bufferModelConfigSize,
4907 bufferWeightsPtr, bufferWeightsSize
4911 CV__DNN_EXPERIMENTAL_NS_END