1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
43 #include "op_halide.hpp"
44 #include "op_inf_engine.hpp"
45 #include "op_vkcom.hpp"
46 #include "halide_scheduler.hpp"
54 #include <opencv2/dnn/shape_utils.hpp>
55 #include <opencv2/imgproc.hpp>
57 #include <opencv2/core/utils/configuration.private.hpp>
58 #include <opencv2/core/utils/logger.hpp>
62 CV__DNN_INLINE_NS_BEGIN
64 // this option is useful to run valgrind memory errors detection
65 static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
68 static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
71 static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
72 #ifdef HAVE_INF_ENGINE
73 (size_t)DNN_BACKEND_INFERENCE_ENGINE
75 (size_t)DNN_BACKEND_OPENCV
79 // Additional checks (slowdowns execution!)
80 static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
81 static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
82 static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
89 //==================================================================================================
94 typedef std::vector< std::pair<Backend, Target> > BackendsList;
95 const BackendsList & getBackends() const { return backends; }
96 static BackendRegistry & getRegistry()
98 static BackendRegistry impl;
105 backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
107 if (cv::ocl::useOpenCL())
108 backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
110 #endif // HAVE_HALIDE
112 #ifdef HAVE_INF_ENGINE
113 if (checkIETarget(DNN_TARGET_CPU))
114 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU));
115 if (checkIETarget(DNN_TARGET_MYRIAD))
116 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD));
117 if (checkIETarget(DNN_TARGET_FPGA))
118 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_FPGA));
120 if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel())
122 if (checkIETarget(DNN_TARGET_OPENCL))
123 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL));
124 if (checkIETarget(DNN_TARGET_OPENCL_FP16))
125 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16));
128 #endif // HAVE_INF_ENGINE
131 if (cv::ocl::useOpenCL())
133 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
134 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
138 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
142 backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN));
145 static inline bool checkIETarget(int target)
147 #ifndef HAVE_INF_ENGINE
151 cv::dnn::LayerParams lp;
152 lp.set("kernel_size", 1);
153 lp.set("num_output", 1);
154 lp.set("bias_term", false);
155 lp.type = "Convolution";
156 lp.name = "testLayer";
157 lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
158 net.addLayerToPrev(lp.name, lp.type, lp);
159 net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
160 net.setPreferableTarget(target);
161 static int inpDims[] = {1, 2, 3, 4};
162 net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0)));
175 BackendsList backends;
179 std::vector< std::pair<Backend, Target> > getAvailableBackends()
181 return BackendRegistry::getRegistry().getBackends();
184 std::vector<Target> getAvailableTargets(Backend be)
186 if (be == DNN_BACKEND_DEFAULT)
187 be = (Backend)PARAM_DNN_BACKEND_DEFAULT;
189 std::vector<Target> result;
190 const BackendRegistry::BackendsList all_backends = getAvailableBackends();
191 for(BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i )
194 result.push_back(i->second);
199 //==================================================================================================
203 typedef std::vector<MatShape> ShapesVec;
207 ShapesVec in, out, internal;
208 // No guarantees that layer which support in-place computations
209 // will be computed in-place (input.data_ptr == output.data_ptr).
210 // If layer said that it could work in-place and layers after it
211 // no longer use input blob, we'll set output = input.
213 LayerShapes() {supportInPlace = false;}
217 Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
218 const Scalar& mean, bool swapRB, bool crop, int ddepth)
222 blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
226 void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
227 const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
230 std::vector<Mat> images(1, image.getMat());
231 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
234 Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
235 const Scalar& mean, bool swapRB, bool crop, int ddepth)
239 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
243 void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
244 Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
247 CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
250 CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
251 CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
254 std::vector<Mat> images;
255 images_.getMatVector(images);
256 CV_Assert(!images.empty());
257 for (size_t i = 0; i < images.size(); i++)
259 Size imgSize = images[i].size();
266 float resizeFactor = std::max(size.width / (float)imgSize.width,
267 size.height / (float)imgSize.height);
268 resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
269 Rect crop(Point(0.5 * (images[i].cols - size.width),
270 0.5 * (images[i].rows - size.height)),
272 images[i] = images[i](crop);
275 resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
277 if(images[i].depth() == CV_8U && ddepth == CV_32F)
278 images[i].convertTo(images[i], CV_32F);
281 std::swap(mean[0], mean[2]);
284 images[i] *= scalefactor;
287 size_t nimages = images.size();
288 Mat image0 = images[0];
289 int nch = image0.channels();
290 CV_Assert(image0.dims == 2);
291 if (nch == 3 || nch == 4)
293 int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
294 blob_.create(4, sz, ddepth);
295 Mat blob = blob_.getMat();
298 for(size_t i = 0; i < nimages; i++ )
300 const Mat& image = images[i];
301 CV_Assert(image.depth() == blob_.depth());
302 nch = image.channels();
303 CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
304 CV_Assert(image.size() == image0.size());
306 for( int j = 0; j < nch; j++ )
307 ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
309 std::swap(ch[0], ch[2]);
316 int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
317 blob_.create(4, sz, ddepth);
318 Mat blob = blob_.getMat();
320 for(size_t i = 0; i < nimages; i++ )
322 const Mat& image = images[i];
323 CV_Assert(image.depth() == blob_.depth());
324 nch = image.channels();
325 CV_Assert(image.dims == 2 && (nch == 1));
326 CV_Assert(image.size() == image0.size());
328 image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
333 void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
337 //A blob is a 4 dimensional matrix in floating point precision
338 //blob_[0] = batchSize = nbOfImages
339 //blob_[1] = nbOfChannels
342 CV_Assert(blob_.depth() == CV_32F);
343 CV_Assert(blob_.dims == 4);
345 images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
347 std::vector<Mat> vectorOfChannels(blob_.size[1]);
348 for (int n = 0; n < blob_.size[0]; ++n)
350 for (int c = 0; c < blob_.size[1]; ++c)
352 vectorOfChannels[c] = getPlane(blob_, n, c);
354 cv::merge(vectorOfChannels, images_.getMatRef(n));
358 class OpenCLBackendWrapper : public BackendWrapper
361 OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
368 OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
369 : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
371 Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
372 CV_Assert(!base.empty());
376 int shape[] = {1, (int)base->umat.total()};
377 umat = base->umat.reshape(1, 2, &shape[0])
378 .colRange(0, host->total())
379 .reshape(1, host->dims, &host->size[0]);
383 static Ptr<BackendWrapper> create(Mat& m)
385 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
388 static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
390 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
393 static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
395 const int numWrappers = wrappers.size();
396 std::vector<UMat> mats(wrappers.size());
397 for (int i = 0; i < numWrappers; ++i)
399 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
400 CV_Assert(!umatWrapper.empty());
401 umatWrapper->copyToDevice();
402 mats[i] = umatWrapper->umat;
407 // Replaces all umats in wrappers to specific ones.
408 static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
409 const std::vector<UMat>& umats)
411 CV_Assert(wrappers.size() == umats.size());
412 for (int i = 0, n = umats.size(); i < n; ++i)
414 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
415 CV_Assert(!umatWrapper.empty());
416 umatWrapper->umat = umats[i];
420 ~OpenCLBackendWrapper() {}
422 // Copies data from device to a host memory.
423 virtual void copyToHost() CV_OVERRIDE
428 virtual void setHostDirty() CV_OVERRIDE
453 LayerPin(int layerId = -1, int outputId = -1)
454 : lid(layerId), oid(outputId) {}
458 return (lid >= 0 && oid >= 0);
461 bool equal(const LayerPin &r) const
463 return (lid == r.lid && oid == r.oid);
466 bool operator<(const LayerPin &r) const
468 return lid < r.lid || (lid == r.lid && oid < r.oid);
471 bool operator ==(const LayerPin &r) const
473 return lid == r.lid && oid == r.oid;
479 LayerData() : id(-1), skip(false), flag(0) {}
480 LayerData(int _id, const String &_name, const String &_type, LayerParams &_params)
481 : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0)
495 std::vector<LayerPin> inputBlobsId;
496 std::set<int> inputLayersId;
497 std::set<int> requiredOutputs;
498 std::vector<LayerPin> consumers;
499 std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
500 std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
501 std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
503 Ptr<Layer> layerInstance;
504 std::vector<Mat> outputBlobs;
505 std::vector<Mat*> inputBlobs;
506 std::vector<Mat> internals;
507 // Computation nodes of implemented backends (except DEFAULT).
508 std::map<int, Ptr<BackendNode> > backendNodes;
509 // Flag for skip layer computation for specific backend.
514 Ptr<Layer> getLayerInstance()
517 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
520 return layerInstance;
522 layerInstance = LayerFactory::createLayerInstance(type, params);
525 CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
528 return layerInstance;
532 //fake layer containing network input blobs
533 struct DataLayer : public Layer
535 DataLayer() : Layer()
540 virtual bool supportBackend(int backendId) CV_OVERRIDE
542 return backendId == DNN_BACKEND_OPENCV ||
543 (backendId == DNN_BACKEND_INFERENCE_ENGINE && inputsData.size() == 1);
546 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
549 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
551 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
552 forward_ocl(inputs_arr, outputs_arr, internals_arr))
554 if (outputs_arr.depth() == CV_16S)
556 forward_fallback(inputs_arr, outputs_arr, internals_arr);
560 std::vector<Mat> outputs, internals;
561 outputs_arr.getMatVector(outputs);
562 internals_arr.getMatVector(internals);
565 // | Input type | Output type |
568 for (int i = 0; i < inputsData.size(); ++i)
570 double scale = scaleFactors[i];
571 Scalar& mean = means[i];
572 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
573 CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
575 bool singleMean = true;
576 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
578 singleMean = mean[j] == mean[j - 1];
583 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
587 for (int n = 0; n < inputsData[i].size[0]; ++n)
588 for (int c = 0; c < inputsData[i].size[1]; ++c)
590 Mat inp = getPlane(inputsData[i], n, c);
591 Mat out = getPlane(outputs[i], n, c);
592 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
599 std::vector<Mat> tmp_expressions;
600 bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
603 // | Input type | Output type |
607 std::vector<UMat> outputs;
608 outputs_.getUMatVector(outputs);
610 tmp_expressions.clear();
611 for (int i = 0; i < inputsData.size(); ++i)
613 Mat inputData = inputsData[i];
615 double scale = scaleFactors[i];
616 Scalar& mean = means[i];
618 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
619 bool singleMean = true;
620 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
622 singleMean = mean[j] == mean[j - 1];
625 if (outputs_.depth() == CV_16S)
629 tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0])));
630 convertFp16(tmp_expressions.back(), outputs[i]);
634 for (int n = 0; n < inputsData[i].size[0]; ++n)
635 for (int c = 0; c < inputsData[i].size[1]; ++c)
637 Mat inp = getPlane(inputsData[i], n, c);
639 std::vector<cv::Range> plane(4, Range::all());
640 plane[0] = Range(n, n + 1);
641 plane[1] = Range(c, c + 1);
642 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
644 tmp_expressions.push_back(scale * (inp - mean[c]));
645 convertFp16(tmp_expressions.back(), out);
651 CV_Assert(outputs_.depth() == CV_32F);
654 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
658 for (int n = 0; n < inputsData[i].size[0]; ++n)
659 for (int c = 0; c < inputsData[i].size[1]; ++c)
661 Mat inp = getPlane(inputsData[i], n, c);
663 std::vector<cv::Range> plane(4, Range::all());
664 plane[0] = Range(n, n + 1);
665 plane[1] = Range(c, c + 1);
666 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
668 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
677 int outputNameToIndex(const String& tgtName) CV_OVERRIDE
679 int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
680 return (idx < (int)outNames.size()) ? idx : -1;
683 void setNames(const std::vector<String> &names)
685 outNames.assign(names.begin(), names.end());
688 bool getMemoryShapes(const std::vector<MatShape> &inputs,
689 const int requiredOutputs,
690 std::vector<MatShape> &outputs,
691 std::vector<MatShape> &internals) const CV_OVERRIDE
693 CV_Assert(inputs.size() == requiredOutputs);
694 outputs.assign(inputs.begin(), inputs.end());
698 virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
700 std::vector<Mat> outputs;
701 outputs_arr.getMatVector(outputs);
703 CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
704 inputsData.size() == outputs.size());
706 for (int i = 0; skip && i < inputsData.size(); ++i)
708 if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
713 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
715 #ifdef HAVE_INF_ENGINE
716 CV_CheckEQ(inputsData.size(), (size_t)1, "");
717 CV_CheckEQ(inputsData[0].dims, 4, "");
718 const size_t numChannels = inputsData[0].size[1];
719 CV_Assert(numChannels <= 4);
722 InferenceEngine::TensorDesc td(InferenceEngine::Precision::FP32, {numChannels},
723 InferenceEngine::Layout::C);
724 auto weights = InferenceEngine::make_shared_blob<float>(td);
727 float* weight_buf = weights->buffer().as<float*>();
728 std::fill(weight_buf, weight_buf + numChannels, scaleFactors[0]);
731 auto biases = InferenceEngine::make_shared_blob<float>(td);
733 float* bias_buf = biases->buffer().as<float*>();
735 for (int i = 0; i < numChannels; ++i)
737 bias_buf[i] = -means[0][i] * scaleFactors[0];
740 InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
741 addConstantData("weights", weights, ieLayer);
742 addConstantData("biases", biases, ieLayer);
743 return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
744 #endif // HAVE_INF_ENGINE
745 return Ptr<BackendNode>();
748 std::vector<String> outNames;
749 // Preprocessing parameters for each network's input.
750 std::vector<double> scaleFactors;
751 std::vector<Scalar> means;
752 std::vector<Mat> inputsData;
759 // Increase references counter to layer output.
760 void addReference(const LayerPin& lp)
762 std::map<LayerPin, int>::iterator it = refCounter.find(lp);
763 if (it == refCounter.end())
769 void addReferences(const std::vector<LayerPin>& pins)
771 for (int i = 0; i < pins.size(); i++)
773 addReference(pins[i]);
777 // Returns number of references to allocated memory that used in specific
779 int numReferences(const LayerPin& lp)
781 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
782 CV_Assert(mapIt != reuseMap.end());
783 LayerPin memHost = mapIt->second;
785 std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
786 CV_Assert(refIt != refCounter.end());
787 return refIt->second;
790 // Reuse data allocated in <host> inside the <user> blob.
791 void reuse(const LayerPin& host, const LayerPin& user)
793 CV_Assert(reuseMap.find(user) == reuseMap.end());
794 CV_Assert(reuseMap.find(host) != reuseMap.end());
795 LayerPin memHost = reuseMap[host];
796 reuseMap[user] = memHost;
797 if (refCounter.find(memHost) != refCounter.end())
799 std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
800 if (userRefIt != refCounter.end())
802 refCounter[memHost] += userRefIt->second;
803 refCounter.erase(userRefIt);
806 refCounter[memHost] += 1;
810 // Decrease references counter to allocated memory inside specific blob.
811 void releaseReference(const LayerPin& lp)
813 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
814 CV_Assert(mapIt != reuseMap.end());
816 std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
817 CV_Assert(refIt != refCounter.end());
818 CV_Assert(refIt->second > 0);
822 void releaseReferences(const std::vector<LayerPin>& pins)
824 for (int i = 0; i < pins.size(); i++)
826 releaseReference(pins[i]);
830 void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
832 if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
835 LayerPin bestBlobPin;
837 std::map<LayerPin, Mat>::iterator hostIt;
838 std::map<LayerPin, int>::iterator refIt;
840 const int targetTotal = total(shape);
841 int bestBlobTotal = INT_MAX;
843 for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
845 refIt = refCounter.find(hostIt->first);
846 // Use only blobs that had references before because if not,
847 // it might be used as output.
848 if (refIt != refCounter.end() && refIt->second == 0)
850 Mat& unusedBlob = hostIt->second;
851 if (unusedBlob.total() >= targetTotal &&
852 unusedBlob.total() < bestBlobTotal)
854 bestBlobPin = hostIt->first;
855 bestBlob = unusedBlob;
856 bestBlobTotal = unusedBlob.total();
860 if (!bestBlob.empty())
862 reuse(bestBlobPin, lp);
863 dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
869 // if dst already has been allocated with total(shape) elements,
870 // it won't be recreated and pointer of dst.data remains the same.
871 dst.create(shape, use_half ? CV_16S : CV_32F);
876 void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
877 std::vector<LayerPin>& pinsForInternalBlobs,
878 bool use_half = false)
882 pinsForInternalBlobs.clear();
884 std::vector<Mat>& outputBlobs = ld.outputBlobs,
885 &internalBlobs = ld.internals;
887 const ShapesVec& outShapes = layerShapes.out,
888 internalShapes = layerShapes.internal;
890 outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
891 internalBlobs.resize(internalShapes.size());
893 CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
895 // Check that layer could work in-place.
896 bool inPlace = false;
897 if (layerShapes.supportInPlace)
899 if (ld.inputBlobs.size() == 1)
901 // Get number of references to the input memory.
902 int numRef = numReferences(ld.inputBlobsId[0]);
903 // If current layer is one and only customer of this blob.
904 inPlace = numRef == 1;
908 ShapesVec shapes(outShapes);
909 shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
910 std::vector<Mat*> blobs;
911 for(int i = 0; i < outputBlobs.size(); i++)
913 blobs.push_back(&outputBlobs[i]);
916 for(int i = 0; i < internalBlobs.size(); i++)
918 blobs.push_back(&internalBlobs[i]);
919 if (total(internalShapes[i]))
921 pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
925 addReferences(pinsForInternalBlobs);
927 std::map<int, std::vector<int> > idxSizes;
928 for(int i = 0; i < shapes.size(); i++)
930 idxSizes[total(shapes[i])].push_back(i);
933 std::map<int, std::vector<int> >::reverse_iterator it;
934 for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
936 for(int j = 0; j < it->second.size(); j++)
938 int index = it->second[j];
939 if (total(shapes[index]))
941 LayerPin blobPin(ld.id, index);
942 if (index < outShapes.size() && inPlace)
944 CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
945 ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
946 reuse(ld.inputBlobsId[0], blobPin);
949 reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
955 // Clear internal state. Calls before an every reallocation.
966 // Register allocated memory.
967 void addHost(const LayerPin& lp, const Mat& mat)
969 CV_Assert(memHosts.find(lp) == memHosts.end());
974 std::map<LayerPin, int> refCounter;
975 // Maps pin to origin blob (for whom memory was allocated firstly).
976 // For origin blobs key == value.
977 std::map<LayerPin, LayerPin> reuseMap;
978 std::map<LayerPin, Mat> memHosts;
981 static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
983 if (backendId == DNN_BACKEND_OPENCV)
985 if (targetId == DNN_TARGET_CPU)
986 return Ptr<BackendWrapper>();
987 else if (IS_DNN_OPENCL_TARGET(targetId))
988 return OpenCLBackendWrapper::create(m);
990 CV_Error(Error::StsNotImplemented, "Unknown target identifier");
992 else if (backendId == DNN_BACKEND_HALIDE)
994 CV_Assert(haveHalide());
996 return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
997 #endif // HAVE_HALIDE
999 else if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
1001 CV_Assert(haveInfEngine());
1002 #ifdef HAVE_INF_ENGINE
1003 return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
1004 #endif // HAVE_INF_ENGINE
1006 else if (backendId == DNN_BACKEND_VKCOM)
1008 CV_Assert(haveVulkan());
1010 return Ptr<BackendWrapper>(new VkComBackendWrapper(m));
1011 #endif // HAVE_VULKAN
1014 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1015 return Ptr<BackendWrapper>();
1020 typedef std::map<int, LayerShapes> LayersShapesMap;
1021 typedef std::map<int, LayerData> MapIdToLayerData;
1025 //allocate fake net input layer
1026 netInputLayer = Ptr<DataLayer>(new DataLayer());
1027 LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
1029 netInputLayer->name = inpl.name = "_input";
1030 inpl.type = "__NetInputLayer__";
1031 inpl.layerInstance = netInputLayer;
1032 layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
1035 netWasAllocated = false;
1038 preferableBackend = DNN_BACKEND_DEFAULT;
1039 preferableTarget = DNN_TARGET_CPU;
1040 skipInfEngineInit = false;
1043 Ptr<DataLayer> netInputLayer;
1044 std::vector<LayerPin> blobsToKeep;
1045 MapIdToLayerData layers;
1046 std::map<String, int> layerNameToId;
1047 BlobManager blobManager;
1048 int preferableBackend;
1049 int preferableTarget;
1050 String halideConfigFile;
1051 bool skipInfEngineInit;
1052 // Map host data to backend specific wrapper.
1053 std::map<void*, Ptr<BackendWrapper> > backendWrappers;
1057 bool netWasAllocated;
1060 std::vector<int64> layersTimings;
1063 Ptr<BackendWrapper> wrap(Mat& host)
1065 if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
1066 return Ptr<BackendWrapper>();
1068 MatShape shape(host.dims);
1069 for (int i = 0; i < host.dims; ++i)
1070 shape[i] = host.size[i];
1072 void* data = host.data;
1073 if (backendWrappers.find(data) != backendWrappers.end())
1075 Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
1076 if (preferableBackend == DNN_BACKEND_OPENCV)
1078 CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
1079 return OpenCLBackendWrapper::create(baseBuffer, host);
1081 else if (preferableBackend == DNN_BACKEND_HALIDE)
1083 CV_Assert(haveHalide());
1085 return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
1086 #endif // HAVE_HALIDE
1088 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1090 return wrapMat(preferableBackend, preferableTarget, host);
1092 else if (preferableBackend == DNN_BACKEND_VKCOM)
1095 return Ptr<BackendWrapper>(new VkComBackendWrapper(baseBuffer, host));
1099 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1102 Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
1103 backendWrappers[data] = wrapper;
1108 void compileHalide()
1110 CV_TRACE_FUNCTION();
1112 CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
1114 HalideScheduler scheduler(halideConfigFile);
1115 std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
1116 for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
1118 LayerData &ld = it->second;
1119 Ptr<Layer> layer = ld.layerInstance;
1120 if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
1122 CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
1123 bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
1126 // Use automatic scheduling provided by layer.
1127 layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
1128 ld.inputBlobs, ld.outputBlobs,
1131 compileList.emplace_back(ld);
1134 std::atomic<int> progress(0);
1135 auto fn = ([&] () -> void
1139 int id = progress.fetch_add(1);
1140 if ((size_t)id >= compileList.size())
1142 const LayerData& ld = compileList[id].get();
1143 Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
1144 dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
1147 size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
1148 num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
1149 std::vector<std::thread> threads(num_threads - 1);
1150 for (auto& t: threads) t = std::thread(fn);
1151 fn(); // process own tasks
1152 for (auto& t: threads) t.join();
1158 CV_TRACE_FUNCTION();
1160 MapIdToLayerData::iterator it;
1161 for (it = layers.begin(); it != layers.end(); it++)
1163 if (it->second.id != 0) {
1164 it->second.inputBlobs.clear();
1165 it->second.outputBlobs.clear();
1166 it->second.internals.clear();
1168 it->second.skip = false;
1169 //it->second.consumers.clear();
1170 Ptr<Layer> currLayer = it->second.layerInstance;
1172 if( currLayer.empty() )
1175 currLayer->unsetAttached();
1178 layersTimings.clear();
1181 void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
1183 CV_TRACE_FUNCTION();
1185 if (preferableBackend == DNN_BACKEND_DEFAULT)
1186 preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
1188 CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
1189 preferableTarget == DNN_TARGET_CPU ||
1190 preferableTarget == DNN_TARGET_OPENCL ||
1191 preferableTarget == DNN_TARGET_OPENCL_FP16);
1192 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
1193 preferableTarget == DNN_TARGET_CPU ||
1194 preferableTarget == DNN_TARGET_OPENCL);
1195 CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE ||
1196 preferableTarget == DNN_TARGET_CPU ||
1197 preferableTarget == DNN_TARGET_OPENCL ||
1198 preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1199 preferableTarget == DNN_TARGET_MYRIAD ||
1200 preferableTarget == DNN_TARGET_FPGA);
1201 CV_Assert(preferableBackend != DNN_BACKEND_VKCOM ||
1202 preferableTarget == DNN_TARGET_VULKAN);
1203 if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
1205 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
1208 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
1209 preferableTarget = DNN_TARGET_CPU;
1213 if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
1215 // Current implementation is only valid for GPU (#11494)
1216 if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
1218 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
1219 preferableTarget = DNN_TARGET_CPU;
1221 else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
1223 CV_LOG_WARNING(NULL,
1224 "DNN: OpenCL target with fp16 precision is not supported "
1225 "with current OpenCL device (tested with Intel GPUs only), "
1226 "switching to OpenCL with fp32 precision.");
1227 preferableTarget = DNN_TARGET_OPENCL;
1232 if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan())
1234 preferableBackend = DNN_BACKEND_OPENCV;
1235 preferableTarget = DNN_TARGET_CPU;
1240 allocateLayers(blobsToKeep_);
1242 MapIdToLayerData::iterator it = layers.find(0);
1243 CV_Assert(it != layers.end());
1244 it->second.skip = netInputLayer->skip;
1248 if (!netWasAllocated )
1251 if (preferableBackend == DNN_BACKEND_HALIDE)
1254 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
1258 netWasAllocated = true;
1259 this->blobsToKeep = blobsToKeep_;
1263 int getLayerId(const String &layerName)
1265 std::map<String, int>::iterator it = layerNameToId.find(layerName);
1266 return (it != layerNameToId.end()) ? it->second : -1;
1269 int getLayerId(int id)
1271 MapIdToLayerData::iterator it = layers.find(id);
1272 return (it != layers.end()) ? id : -1;
1275 int getLayerId(DictValue &layerDesc)
1277 if (layerDesc.isInt())
1278 return getLayerId(layerDesc.get<int>());
1279 else if (layerDesc.isString())
1280 return getLayerId(layerDesc.get<String>());
1282 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1286 String getLayerName(int id)
1288 MapIdToLayerData::iterator it = layers.find(id);
1289 return (it != layers.end()) ? it->second.name : "(unknown layer)";
1292 LayerData& getLayerData(int id)
1294 MapIdToLayerData::iterator it = layers.find(id);
1296 if (it == layers.end())
1297 CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
1302 LayerData& getLayerData(const String &layerName)
1304 int id = getLayerId(layerName);
1307 CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
1309 return getLayerData(id);
1312 LayerData& getLayerData(const DictValue &layerDesc)
1314 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1315 if (layerDesc.isInt())
1316 return getLayerData(layerDesc.get<int>());
1317 else /*if (layerDesc.isString())*/
1318 return getLayerData(layerDesc.get<String>());
1321 static void addLayerInput(LayerData &ld, int inNum, LayerPin from)
1323 if ((int)ld.inputBlobsId.size() <= inNum)
1325 ld.inputBlobsId.resize(inNum + 1);
1329 LayerPin storedFrom = ld.inputBlobsId[inNum];
1330 if (storedFrom.valid() && !storedFrom.equal(from))
1331 CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
1332 inNum, ld.name.c_str()));
1335 ld.inputBlobsId[inNum] = from;
1338 int resolvePinOutputName(LayerData &ld, const String &outName)
1340 if (outName.empty())
1342 return ld.getLayerInstance()->outputNameToIndex(outName);
1345 LayerPin getPinByAlias(const String &layerName)
1348 pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1351 pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
1356 std::vector<LayerPin> getLayerOutPins(const String &layerName)
1358 int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1360 std::vector<LayerPin> pins;
1362 for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
1364 pins.push_back(LayerPin(lid, i));
1370 void connect(int outLayerId, int outNum, int inLayerId, int inNum)
1372 CV_Assert(outLayerId < inLayerId);
1373 LayerData &ldOut = getLayerData(outLayerId);
1374 LayerData &ldInp = getLayerData(inLayerId);
1376 addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
1377 ldOut.requiredOutputs.insert(outNum);
1378 ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
1383 CV_TRACE_FUNCTION();
1384 if (preferableBackend == DNN_BACKEND_OPENCV)
1385 CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
1386 else if (preferableBackend == DNN_BACKEND_HALIDE)
1387 initHalideBackend();
1388 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1389 initInfEngineBackend();
1390 else if (preferableBackend == DNN_BACKEND_VKCOM)
1393 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1396 void initHalideBackend()
1398 CV_TRACE_FUNCTION();
1399 CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
1401 // Iterator to current layer.
1402 MapIdToLayerData::iterator it = layers.begin();
1403 // Iterator to base layer for fusion. In example, in case of conv+bn+relu
1404 // it'll be a conv layer.
1405 MapIdToLayerData::iterator baseIt = layers.begin();
1406 for (; it != layers.end(); it++)
1408 LayerData &ldTop = it->second;
1409 Ptr<Layer> layerTop = ldTop.layerInstance;
1410 if (!layerTop->supportBackend(preferableBackend))
1412 // Move base iterator to layer that don't support preferable
1413 // backend to prevent fusion over layer of different backend.
1417 // Try to do layers fusion.
1418 LayerData &ldBot = baseIt->second;
1419 Ptr<Layer> layerBot = ldBot.layerInstance;
1420 // 1. Check that bottom and top from the same backends.
1421 if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
1423 // 2. Check that current layer works in-place.
1424 bool inPlace = ldTop.inputBlobs.size() == 1 &&
1425 ldBot.outputBlobs.size() == 1 &&
1426 ldTop.inputBlobs[0]->data ==
1427 ldBot.outputBlobs[0].data;
1430 // 3. Try to attach node.
1431 CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
1432 Ptr<BackendNode> fusedNode =
1433 layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
1434 if (!fusedNode.empty())
1437 ldBot.backendNodes[preferableBackend] = fusedNode;
1438 ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
1443 // No layers fusion.
1445 ldTop.backendNodes[DNN_BACKEND_HALIDE] =
1446 layerTop->initHalide(ldTop.inputBlobsWrappers);
1451 #ifdef HAVE_INF_ENGINE
1452 // Before launching Inference Engine graph we need to specify output blobs.
1453 // This function requests output blobs based on inputs references of
1454 // layers from default backend or layers from different graphs.
1455 void addInfEngineNetOutputs(LayerData &ld)
1457 Ptr<InfEngineBackendNet> layerNet;
1458 if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
1460 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1463 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1464 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1465 layerNet = ieNode->net;
1468 // For an every input reference we check that it belongs to one of
1469 // the Inference Engine backend graphs. Request an output blob if it is.
1470 // Do nothing if layer's input is from the same graph.
1471 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1473 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1474 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1475 if (!inpNode.empty())
1477 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1478 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1479 if (layerNet != ieInpNode->net)
1481 // layerNet is empty or nodes are from different graphs.
1482 ieInpNode->net->addOutput(ieInpNode->layer.getName());
1487 #endif // HAVE_INF_ENGINE
1489 void initVkComBackend()
1491 CV_TRACE_FUNCTION();
1492 CV_Assert(preferableBackend == DNN_BACKEND_VKCOM);
1497 MapIdToLayerData::iterator it = layers.begin();
1498 for (; it != layers.end(); it++)
1500 LayerData &ld = it->second;
1501 Ptr<Layer> layer = ld.layerInstance;
1502 if (!layer->supportBackend(preferableBackend))
1511 ld.backendNodes[DNN_BACKEND_VKCOM] =
1512 layer->initVkCom(ld.inputBlobsWrappers);
1514 catch (const cv::Exception& e)
1516 CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what());
1517 ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr<BackendNode>();
1523 void initInfEngineBackend()
1525 CV_TRACE_FUNCTION();
1526 CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE, haveInfEngine());
1527 #ifdef HAVE_INF_ENGINE
1528 MapIdToLayerData::iterator it;
1529 Ptr<InfEngineBackendNet> net;
1531 for (it = layers.begin(); it != layers.end(); ++it)
1533 LayerData &ld = it->second;
1536 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1537 (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1538 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1540 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1541 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1542 dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1544 dataPtr->setName(netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]);
1550 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1552 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1553 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1554 dataPtr->name = ld.name;
1556 dataPtr->setName(ld.name);
1562 if (skipInfEngineInit)
1564 Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1565 CV_Assert(!node.empty());
1567 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1568 CV_Assert(!ieNode.empty());
1570 for (it = layers.begin(); it != layers.end(); ++it)
1572 LayerData &ld = it->second;
1575 for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1577 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
1578 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1579 dataPtr->name = netInputLayer->outNames[i];
1581 dataPtr->setName(netInputLayer->outNames[i]);
1587 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1589 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1590 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1591 dataPtr->name = ld.name;
1593 dataPtr->setName(ld.name);
1597 ieNode->net->addBlobs(ld.inputBlobsWrappers);
1598 ieNode->net->addBlobs(ld.outputBlobsWrappers);
1601 layers[lastLayerId].skip = false;
1602 ieNode->net->init(preferableTarget);
1606 // Build Inference Engine networks from sets of layers that support this
1607 // backend. Split a whole model on several Inference Engine networks if
1608 // some of layers are not implemented.
1610 // Set of all input and output blobs wrappers for current network.
1611 std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
1612 for (it = layers.begin(); it != layers.end(); ++it)
1614 LayerData &ld = it->second;
1615 if (ld.id == 0 && ld.skip)
1617 bool fused = ld.skip;
1619 Ptr<Layer> layer = ld.layerInstance;
1620 if (!fused && !layer->supportBackend(preferableBackend))
1622 bool customizable = ld.id != 0 && ld.outputBlobs.size() == 1 &&
1623 INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2);
1624 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
1625 if (preferableTarget == DNN_TARGET_MYRIAD)
1627 for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
1629 customizable = ld.inputBlobs[i]->size[0] == 1;
1633 // TODO: fix these workarounds
1634 if (preferableTarget == DNN_TARGET_MYRIAD ||
1635 preferableTarget == DNN_TARGET_OPENCL ||
1636 preferableTarget == DNN_TARGET_OPENCL_FP16)
1637 customizable &= ld.type != "Concat";
1639 if (preferableTarget == DNN_TARGET_OPENCL ||
1640 preferableTarget == DNN_TARGET_OPENCL_FP16)
1641 customizable &= ld.type != "Power";
1643 if (preferableTarget == DNN_TARGET_OPENCL)
1644 customizable &= ld.type != "Eltwise";
1648 addInfEngineNetOutputs(ld);
1649 net = Ptr<InfEngineBackendNet>();
1650 netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
1651 layer->preferableTarget = DNN_TARGET_CPU;
1655 ld.skip = true; // Initially skip all Inference Engine supported layers.
1657 // Create a new network if one of inputs from different Inference Engine graph.
1658 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1660 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1661 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1662 if (!inpNode.empty())
1664 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1665 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1666 if (ieInpNode->net != net)
1668 net = Ptr<InfEngineBackendNet>();
1669 netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
1675 Ptr<BackendNode> node;
1680 bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
1681 ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
1683 node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
1684 ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
1688 net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
1692 if (layer->supportBackend(preferableBackend))
1693 node = layer->initInfEngine(ld.inputBlobsWrappers);
1696 node = Ptr<BackendNode>(new InfEngineBackendNode(
1697 ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
1700 else if (node.empty())
1703 CV_Assert(!node.empty());
1704 ld.backendNodes[preferableBackend] = node;
1706 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1707 CV_Assert(!ieNode.empty());
1710 // Convert weights in FP16 for specific targets.
1711 if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1712 preferableTarget == DNN_TARGET_MYRIAD ||
1713 preferableTarget == DNN_TARGET_FPGA) && !fused)
1715 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
1716 for (const std::string& name : {"weights", "biases"})
1718 auto it = ieNode->layer.getParameters().find(name);
1719 if (it != ieNode->layer.getParameters().end())
1721 InferenceEngine::Blob::Ptr bp = it->second.as<InferenceEngine::Blob::Ptr>();
1722 it->second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(bp));
1726 auto& blobs = ieNode->layer.getConstantData();
1729 // In case of non weightable layer we have to specify
1730 // it's precision adding dummy blob.
1731 auto blob = InferenceEngine::make_shared_blob<int16_t>(
1732 InferenceEngine::Precision::FP16,
1733 InferenceEngine::Layout::C, {1});
1739 for (auto& it : blobs)
1740 it.second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(it.second));
1746 net->addLayer(ieNode->layer);
1748 net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName());
1749 net->addBlobs(ld.inputBlobsWrappers);
1750 net->addBlobs(ld.outputBlobsWrappers);
1751 addInfEngineNetOutputs(ld);
1754 // Initialize all networks.
1755 for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
1757 LayerData &ld = it->second;
1758 if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
1761 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1765 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1769 CV_Assert(!ieNode->net.empty());
1771 if (!ieNode->net->isInitialized())
1773 ieNode->net->init(preferableTarget);
1777 #endif // HAVE_INF_ENGINE
1780 void allocateLayer(int lid, const LayersShapesMap& layersShapes)
1782 CV_TRACE_FUNCTION();
1784 LayerData &ld = layers[lid];
1790 size_t ninputs = ld.inputBlobsId.size();
1792 printf("layer %s:", ld.name.c_str());
1793 for (size_t i = 0; i < ninputs; i++)
1795 int inp_lid = ld.inputBlobsId[i].lid;
1796 LayerData &inp_ld = layers[inp_lid];
1797 int inp_outputs = (int)inp_ld.outputBlobs.size();
1798 std::cout << " " << inp_ld.name << "(" << inp_outputs;
1800 for( int j = 0; j < inp_outputs; j++ )
1802 std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
1809 //determine parent layers
1810 for (size_t i = 0; i < ninputs; i++)
1811 ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
1814 for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
1815 allocateLayer(*i, layersShapes);
1818 if (ld.id == 0) // DataLayer
1820 ninputs = netInputLayer->inputsData.size();
1821 ld.inputBlobsWrappers.resize(ninputs);
1822 for (size_t i = 0; i < ninputs; i++)
1824 ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
1829 ld.inputBlobs.resize(ninputs);
1830 ld.inputBlobsWrappers.resize(ninputs);
1831 for (size_t i = 0; i < ninputs; i++)
1833 LayerPin from = ld.inputBlobsId[i];
1834 CV_Assert(from.valid());
1835 CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
1836 ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
1837 ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
1841 LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
1843 CV_Assert(layerShapesIt != layersShapes.end());
1845 std::vector<LayerPin> pinsForInternalBlobs;
1846 blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
1847 preferableBackend == DNN_BACKEND_OPENCV &&
1848 preferableTarget == DNN_TARGET_OPENCL_FP16);
1849 ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
1850 for (int i = 0; i < ld.outputBlobs.size(); ++i)
1852 ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
1854 ld.internalBlobsWrappers.resize(ld.internals.size());
1855 for (int i = 0; i < ld.internals.size(); ++i)
1857 ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
1860 Ptr<Layer> layerPtr = ld.getLayerInstance();
1862 std::vector<Mat> inps(ld.inputBlobs.size());
1863 for (int i = 0; i < ld.inputBlobs.size(); ++i)
1865 inps[i] = *ld.inputBlobs[i];
1867 layerPtr->finalize(inps, ld.outputBlobs);
1868 layerPtr->preferableTarget = preferableTarget;
1870 std::cout << "\toutputs:";
1871 size_t noutputs = ld.outputBlobs.size();
1872 for (size_t j = 0; j < noutputs; j++)
1874 std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
1880 // After allocation of layer, we decrease counters to it's input blobs.
1881 blobManager.releaseReferences(ld.inputBlobsId);
1882 blobManager.releaseReferences(pinsForInternalBlobs);
1888 #define printf_(args) printf args
1890 #define printf_(args)
1893 void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
1895 if( !fusion || (preferableBackend != DNN_BACKEND_OPENCV &&
1896 preferableBackend != DNN_BACKEND_INFERENCE_ENGINE))
1899 CV_TRACE_FUNCTION();
1901 // scan through all the layers. If there is convolution layer followed by the activation layer,
1902 // we try to embed this activation into the convolution and disable separate execution of the activation
1903 std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
1904 blobsToKeep_.end());
1905 MapIdToLayerData::iterator it;
1906 for (it = layers.begin(); it != layers.end(); it++)
1908 int lid = it->first;
1909 LayerData& ld = layers[lid];
1912 printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
1915 printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
1917 // the optimization #1. try to fuse batch norm, scaling and/or activation layers
1918 // with the current layer if they follow it. Normally, the are fused with the convolution layer,
1919 // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
1920 // some other layers.
1921 Ptr<Layer>& currLayer = ld.layerInstance;
1922 if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
1924 LayerData* nextData = &layers[ld.consumers[0].lid];
1925 LayerPin lpNext(ld.consumers[0].lid, 0);
1928 Ptr<Layer> nextLayer = nextData->layerInstance;
1929 if (currLayer->tryFuse(nextLayer))
1931 printf_(("\tfused with %s\n", nextLayer->name.c_str()));
1932 nextData->skip = true;
1933 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1934 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
1935 if (nextData->consumers.size() == 1)
1937 int nextLayerId = nextData->consumers[0].lid;
1938 nextData = &layers[nextLayerId];
1939 lpNext = LayerPin(nextLayerId, 0);
1951 if (preferableBackend != DNN_BACKEND_OPENCV)
1952 continue; // Go to the next layer.
1954 // TODO: OpenCL target support more fusion styles.
1955 if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
1956 (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
1957 ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
1958 ld.layerInstance->type != "Concat")) )
1963 // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
1964 if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
1965 nextData->type != "ReLU" &&
1966 nextData->type != "ChannelsPReLU" &&
1967 nextData->type != "ReLU6" &&
1968 nextData->type != "TanH" &&
1969 nextData->type != "Power")
1972 Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
1973 if (nextActivLayer.empty())
1976 if (currLayer->setActivation(nextActivLayer))
1978 printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
1979 nextData->skip = true;
1980 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1981 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
1982 if (nextData->consumers.size() == 1)
1984 int nextLayerId = nextData->consumers[0].lid;
1985 nextData = &layers[nextLayerId];
1986 lpNext = LayerPin(nextLayerId, 0);
1998 // fuse convolution layer followed by eltwise + relu
1999 if ( IS_DNN_OPENCL_TARGET(preferableTarget) && ld.layerInstance->type == "Convolution" )
2001 Ptr<EltwiseLayer> nextEltwiseLayer;
2003 nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
2005 if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
2006 nextData && nextData->inputBlobsId.size() == 2 )
2008 LayerData *eltwiseData = nextData;
2010 // Eltwise layer has two inputs. We need to determine which
2011 // is a base convolution layer and which could be used as it's bias.
2012 LayerData* biasLayerData = 0;
2013 for (int i = 0; i < 2; ++i)
2015 LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
2016 CV_Assert(downLayerData);
2017 while (downLayerData->skip)
2019 if (downLayerData->inputBlobsId.size() == 1)
2020 downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
2027 if (downLayerData && ld.id == downLayerData->id)
2029 biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
2033 CV_Assert(biasLayerData);
2035 if( eltwiseData->consumers.size() == 1 )
2037 // fuse eltwise + activation layer
2038 if (biasLayerData->id < ld.id)
2040 nextData = &layers[eltwiseData->consumers[0].lid];
2041 lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
2042 Ptr<ActivationLayer> nextActivLayer;
2044 nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2046 if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
2047 (!nextData->type.compare("ReLU") ||
2048 !nextData->type.compare("ChannelsPReLU") ||
2049 !nextData->type.compare("Power")) &&
2050 currLayer->setActivation(nextActivLayer) )
2052 CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
2053 ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
2054 printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
2055 printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
2056 eltwiseData->skip = true;
2057 nextData->skip = true;
2058 // This optimization for cases like
2064 // This way all the element-wise computations
2065 // (i.e. some_layer+conv or some_layer*conv)
2066 // would be done at [conv] layer. So we need to
2067 // replace [conv]'s output blob to [eltwise]'s one
2068 // considering that [activ] is an in-place layer.
2069 // Also we need to move all the consumers' references.
2070 // To prevent memory collisions (i.e. when input of
2071 // [conv] and output of [eltwise] is the same blob)
2072 // we allocate a new blob.
2073 CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
2074 ld.outputBlobs[0] = ld.outputBlobs[0].clone();
2075 ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
2077 eltwiseData->outputBlobs = ld.outputBlobs;
2078 nextData->outputBlobs = ld.outputBlobs;
2079 eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
2080 nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
2082 // Move references of [activ] layer consumers to the newly allocated blob.
2083 for (int i = 0; i < nextData->consumers.size(); ++i)
2085 LayerData& consumer = layers[nextData->consumers[i].lid];
2086 for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
2088 if (consumer.inputBlobsId[j].lid == lpNext.lid)
2090 consumer.inputBlobs[j] = &ld.outputBlobs[0];
2091 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
2104 if (preferableBackend != DNN_BACKEND_OPENCV)
2105 continue; // Go to the next layer.
2107 // the optimization #2. if there is concat layer that concatenates channels
2108 // from the inputs together (i.e. axis == 1) then we make the inputs of
2109 // the concat layer to write to the concatenation output buffer
2110 // (and so we eliminate the concatenation layer, because the channels
2111 // are concatenated implicitly).
2112 Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
2113 if( !concatLayer.empty() && concatLayer->axis == 1 && !concatLayer->padding &&
2114 ld.outputBlobs.size() == 1 )
2116 Mat& output = ld.outputBlobs[0];
2118 if (!ld.outputBlobsWrappers.empty() &&
2119 (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
2121 size_t i, ninputs = ld.inputBlobsId.size();
2122 bool conv_layer = true;
2123 for( i = 0; i < ninputs; i++ )
2125 LayerPin pin = ld.inputBlobsId[i];
2126 LayerData* inp_i_data = &layers[pin.lid];
2127 while(inp_i_data->skip &&
2128 inp_i_data->inputBlobsId.size() == 1 &&
2129 inp_i_data->consumers.size() == 1)
2131 pin = inp_i_data->inputBlobsId[0];
2132 inp_i_data = &layers[pin.lid];
2134 conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
2138 std::vector<UMat> umat_outputBlobs;
2139 umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2140 umat_output = umat_outputBlobs[0];
2143 // TODO: in general, this optimization can always be done, but
2144 // many layers currently check that the input/output blobs are
2145 // continuous arrays. Unfortunately, this is not true when
2146 // the concatenation optimization is applied with batch_size > 1.
2147 // so, for now, we only apply this optimization in the most popular
2148 // case batch_size == 1.
2149 if( output.dims == 4 && output.size[0] == 1 )
2151 size_t i, ninputs = ld.inputBlobsId.size();
2152 std::vector<LayerPin> realinputs(ninputs);
2153 for( i = 0; i < ninputs; i++ )
2155 LayerPin pin = ld.inputBlobsId[i];
2156 LayerData* inp_i_data = &layers[pin.lid];
2157 while(inp_i_data->skip &&
2158 inp_i_data->inputBlobsId.size() == 1 &&
2159 inp_i_data->consumers.size() == 1)
2161 pin = inp_i_data->inputBlobsId[0];
2162 inp_i_data = &layers[pin.lid];
2164 printf_(("\treal input for %s is %s\n",
2165 layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
2166 inp_i_data->getLayerInstance()->name.c_str()));
2168 if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
2170 realinputs[i] = pin;
2175 // Allocate new memory to prevent collisions during memory
2176 // reusing (see https://github.com/opencv/opencv/pull/10456).
2177 output = output.clone();
2178 if (preferableBackend == DNN_BACKEND_OPENCV &&
2179 IS_DNN_OPENCL_TARGET(preferableTarget))
2181 std::vector<UMat> umats(1);
2182 umat_output = umat_output.clone();
2183 umats[0] = umat_output;
2184 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
2186 Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
2188 for( i = 0; i < ninputs; i++ )
2190 LayerPin pin = realinputs[i];
2191 LayerData* inp_i_data = &layers[pin.lid];
2192 int channels_i = ld.inputBlobs[i]->size[1];
2193 chrange[1] = Range(ofs, ofs + channels_i);
2194 printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
2195 pin.oid, ofs, ofs + channels_i));
2197 Mat output_slice = output(chrange);
2198 Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
2199 CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
2200 Mat* oldPtr = &curr_output;
2201 curr_output = output_slice;
2202 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2204 std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
2205 umats[pin.oid] = umat_output(chrange);
2206 OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
2208 // Layers that refer old input Mat will refer to the
2209 // new data but the same Mat object.
2210 CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
2213 printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
2220 void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
2222 CV_TRACE_FUNCTION();
2224 MapIdToLayerData::iterator it;
2225 for (it = layers.begin(); it != layers.end(); it++)
2226 it->second.flag = 0;
2228 CV_Assert(!layers[0].outputBlobs.empty());
2229 ShapesVec inputShapes;
2230 for(int i = 0; i < layers[0].outputBlobs.size(); i++)
2232 Mat& inp = layers[0].outputBlobs[i];
2233 CV_Assert(inp.total());
2234 if (preferableBackend == DNN_BACKEND_OPENCV &&
2235 preferableTarget == DNN_TARGET_OPENCL_FP16)
2237 layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
2239 inputShapes.push_back(shape(inp));
2241 LayersShapesMap layersShapes;
2242 getLayersShapes(inputShapes, layersShapes);
2244 blobManager.reset();
2245 backendWrappers.clear();
2246 // Fake references to input blobs.
2247 for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
2248 blobManager.addReference(LayerPin(0, i));
2249 for (it = layers.begin(); it != layers.end(); ++it)
2251 const LayerData& ld = it->second;
2252 blobManager.addReferences(ld.inputBlobsId);
2255 for (int i = 0; i < blobsToKeep_.size(); i++)
2257 blobManager.addReference(blobsToKeep_[i]);
2260 for (it = layers.begin(); it != layers.end(); it++)
2262 int lid = it->first;
2263 allocateLayer(lid, layersShapes);
2266 layersTimings.resize(lastLayerId + 1, 0);
2267 fuseLayers(blobsToKeep_);
2270 void forwardLayer(LayerData &ld)
2272 CV_TRACE_FUNCTION();
2274 Ptr<Layer> layer = ld.layerInstance;
2281 std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
2282 if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
2285 CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
2287 if (!layer->supportBackend(DNN_BACKEND_OPENCV))
2288 CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend",
2289 ld.name.c_str(), ld.type.c_str()));
2291 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2293 std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
2294 std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2295 std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
2296 layer->forward(umat_inputBlobs,
2298 umat_internalBlobs);
2299 if (DNN_CHECK_NAN_INF)
2302 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2304 UMat& u = umat_outputBlobs[i];
2306 if (u.depth() == CV_16S) // FP16
2309 m = u.getMat(ACCESS_READ);
2312 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2313 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2316 else if (!checkRange(m, true, NULL, -1e6, 1e6))
2318 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2319 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2325 for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
2327 UMat& u = umat_inputBlobs[i];
2329 if (u.depth() == CV_16S) // FP16
2332 m = u.getMat(ACCESS_READ);
2333 std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2334 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2336 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2338 UMat& u = umat_outputBlobs[i];
2340 if (u.depth() == CV_16S) // FP16
2343 m = u.getMat(ACCESS_READ);
2344 std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2345 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2347 for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
2349 UMat& u = umat_internalBlobs[i];
2351 if (u.depth() == CV_16S) // FP16
2354 m = u.getMat(ACCESS_READ);
2355 std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
2356 if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
2358 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2362 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
2366 for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
2368 if (!ld.inputBlobsWrappers[i].empty())
2369 ld.inputBlobsWrappers[i]->copyToHost();
2372 std::vector<Mat> inps(ld.inputBlobs.size());
2373 for (int i = 0; i < ld.inputBlobs.size(); ++i)
2375 inps[i] = *ld.inputBlobs[i];
2377 layer->forward(inps, ld.outputBlobs, ld.internals);
2379 if (DNN_CHECK_NAN_INF)
2382 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2384 const Mat& m = ld.outputBlobs[i];
2387 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2388 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2391 else if (!checkRange(m, true, NULL, -1e6, 1e6))
2393 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2394 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2400 for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
2402 const Mat* pM = ld.inputBlobs[i];
2405 std::cout << "INPUT " << i << " is NULL" << std::endl;
2409 std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2410 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2412 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2414 const Mat& m = ld.outputBlobs[i];
2415 std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2416 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2418 for (size_t i = 0; i < ld.internals.size(); ++i)
2420 const Mat& m = ld.internals[i];
2421 std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2422 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2424 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2429 for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
2431 if (!ld.outputBlobsWrappers[i].empty())
2432 ld.outputBlobsWrappers[i]->setHostDirty();
2438 Ptr<BackendNode> node = it->second;
2439 CV_Assert(!node.empty());
2440 if (preferableBackend == DNN_BACKEND_HALIDE)
2442 forwardHalide(ld.outputBlobsWrappers, node);
2444 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
2446 forwardInfEngine(ld.outputBlobsWrappers, node, isAsync);
2448 else if (preferableBackend == DNN_BACKEND_VKCOM)
2452 forwardVkCom(ld.outputBlobsWrappers, node);
2454 catch (const cv::Exception& e)
2456 CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what());
2457 it->second = Ptr<BackendNode>();
2463 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
2471 layersTimings[ld.id] = tm.getTimeTicks();
2476 void forwardToLayer(LayerData &ld, bool clearFlags = true)
2478 CV_TRACE_FUNCTION();
2482 MapIdToLayerData::iterator it;
2483 for (it = layers.begin(); it != layers.end(); it++)
2484 it->second.flag = 0;
2487 //already was forwarded
2492 MapIdToLayerData::iterator it;
2493 for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
2495 LayerData &ld = it->second;
2505 void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
2507 std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
2509 if (id == 0 && inOutShapes[id].in[0].empty())
2511 if (!layers[0].outputBlobs.empty())
2514 for (int i = 0; i < layers[0].outputBlobs.size(); i++)
2516 Mat& inp = layers[0].outputBlobs[i];
2517 CV_Assert(inp.total());
2518 shapes.push_back(shape(inp));
2520 inOutShapes[0].in = shapes;
2524 inOutShapes[0].out.clear();
2529 if (inOutShapes[id].in.empty())
2531 for(int i = 0; i < inputLayerIds.size(); i++)
2533 int layerId = inputLayerIds[i].lid;
2534 LayersShapesMap::iterator it =
2535 inOutShapes.find(layerId);
2536 if(it == inOutShapes.end() ||
2537 it->second.out.empty())
2539 getLayerShapesRecursively(layerId, inOutShapes);
2541 const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
2542 inOutShapes[id].in.push_back(shape);
2545 const ShapesVec& is = inOutShapes[id].in;
2546 ShapesVec& os = inOutShapes[id].out;
2547 ShapesVec& ints = inOutShapes[id].internal;
2548 int requiredOutputs = layers[id].requiredOutputs.size();
2549 inOutShapes[id].supportInPlace =
2550 layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints);
2552 for (int i = 0; i < ints.size(); i++)
2553 CV_Assert(total(ints[i]) > 0);
2555 for (int i = 0; i < os.size(); i++)
2556 CV_Assert(total(os[i]) > 0);
2559 void getLayersShapes(const ShapesVec& netInputShapes,
2560 LayersShapesMap& inOutShapes)
2562 inOutShapes.clear();
2564 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
2565 for (MapIdToLayerData::iterator it = layers.begin();
2566 it != layers.end(); it++)
2568 getLayerShapesRecursively(it->first, inOutShapes);
2572 void getLayerShapes(const ShapesVec& netInputShapes,
2574 LayerShapes& shapes)
2576 LayersShapesMap inOutShapes;
2577 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
2578 getLayerShapesRecursively(layerId, inOutShapes);
2579 shapes = inOutShapes[layerId];
2582 LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
2584 return *std::max_element(pins.begin(), pins.end());
2587 Mat getBlob(const LayerPin& pin)
2589 CV_TRACE_FUNCTION();
2592 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
2594 LayerData &ld = layers[pin.lid];
2595 if ((size_t)pin.oid >= ld.outputBlobs.size())
2597 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, "
2598 "the #%d was requested", ld.name.c_str(),
2599 ld.outputBlobs.size(), pin.oid));
2601 if (preferableTarget != DNN_TARGET_CPU)
2603 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
2604 // Transfer data to CPU if it's require.
2605 ld.outputBlobsWrappers[pin.oid]->copyToHost();
2608 if (ld.outputBlobs[pin.oid].depth() == CV_16S)
2610 convertFp16(ld.outputBlobs[pin.oid], output_blob);
2614 return ld.outputBlobs[pin.oid];
2617 Mat getBlob(String outputName)
2619 return getBlob(getPinByAlias(outputName));
2623 AsyncArray getBlobAsync(const LayerPin& pin)
2625 CV_TRACE_FUNCTION();
2626 #ifdef HAVE_INF_ENGINE
2628 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
2630 LayerData &ld = layers[pin.lid];
2631 if ((size_t)pin.oid >= ld.outputBlobs.size())
2633 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
2634 "the #%d was requested", ld.name.c_str(),
2635 (int)ld.outputBlobs.size(), (int)pin.oid));
2637 if (preferableTarget != DNN_TARGET_CPU)
2639 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
2640 // Transfer data to CPU if it's require.
2641 ld.outputBlobsWrappers[pin.oid]->copyToHost();
2643 CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE);
2645 Ptr<InfEngineBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<InfEngineBackendWrapper>();
2646 return std::move(wrapper->futureMat);
2648 CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE backend is required");
2652 AsyncArray getBlobAsync(String outputName)
2654 return getBlobAsync(getPinByAlias(outputName));
2659 Net::Net() : impl(new Net::Impl)
2663 Net Net::readFromModelOptimizer(const String& xml, const String& bin)
2665 #ifndef HAVE_INF_ENGINE
2666 CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
2668 InferenceEngine::CNNNetReader reader;
2669 reader.ReadNetwork(xml);
2670 reader.ReadWeights(bin);
2672 InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
2674 std::vector<String> inputsNames;
2675 std::vector<MatShape> inp_shapes;
2676 for (auto& it : ieNet.getInputsInfo())
2678 inputsNames.push_back(it.first);
2679 std::vector<size_t> dims = it.second->getTensorDesc().getDims();
2680 inp_shapes.push_back(std::vector<int>(dims.begin(), dims.end()));
2684 cvNet.setInputsNames(inputsNames);
2686 // set empty input to determine input shapes
2687 for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id)
2689 cvNet.setInput(Mat(inp_shapes[inp_id], CV_32F), inputsNames[inp_id]);
2692 Ptr<InfEngineBackendNode> backendNode(new InfEngineBackendNode(InferenceEngine::Builder::Layer("")));
2693 backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
2694 for (auto& it : ieNet.getOutputsInfo())
2696 Ptr<Layer> cvLayer(new InfEngineBackendLayer(ieNet));
2697 InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
2701 int lid = cvNet.addLayer(it.first, "", lp);
2703 LayerData& ld = cvNet.impl->layers[lid];
2704 cvLayer->name = it.first;
2705 cvLayer->type = ieLayer->type;
2706 ld.layerInstance = cvLayer;
2707 ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
2709 for (int i = 0; i < inputsNames.size(); ++i)
2710 cvNet.connect(0, i, lid, i);
2712 cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
2714 cvNet.impl->skipInfEngineInit = true;
2716 #endif // HAVE_INF_ENGINE
2723 int Net::addLayer(const String &name, const String &type, LayerParams ¶ms)
2725 CV_TRACE_FUNCTION();
2727 if (impl->getLayerId(name) >= 0)
2729 CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
2733 int id = ++impl->lastLayerId;
2734 impl->layerNameToId.insert(std::make_pair(name, id));
2735 impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
2740 int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms)
2742 CV_TRACE_FUNCTION();
2744 int prvLid = impl->lastLayerId;
2745 int newLid = this->addLayer(name, type, params);
2746 this->connect(prvLid, 0, newLid, 0);
2750 void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
2752 CV_TRACE_FUNCTION();
2754 impl->connect(outLayerId, outNum, inpLayerId, inpNum);
2757 void Net::connect(String _outPin, String _inPin)
2759 CV_TRACE_FUNCTION();
2761 LayerPin outPin = impl->getPinByAlias(_outPin);
2762 LayerPin inpPin = impl->getPinByAlias(_inPin);
2764 CV_Assert(outPin.valid() && inpPin.valid());
2766 impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
2769 Mat Net::forward(const String& outputName)
2771 CV_TRACE_FUNCTION();
2773 String layerName = outputName;
2775 if (layerName.empty())
2776 layerName = getLayerNames().back();
2778 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2779 impl->setUpNet(pins);
2780 impl->forwardToLayer(impl->getLayerData(layerName));
2782 return impl->getBlob(layerName);
2785 AsyncArray Net::forwardAsync(const String& outputName)
2787 CV_TRACE_FUNCTION();
2789 String layerName = outputName;
2791 if (layerName.empty())
2792 layerName = getLayerNames().back();
2794 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2795 impl->setUpNet(pins);
2797 if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)
2798 CV_Error(Error::StsNotImplemented, "Asynchronous forward for backend which is different from DNN_BACKEND_INFERENCE_ENGINE");
2800 impl->isAsync = true;
2801 impl->forwardToLayer(impl->getLayerData(layerName));
2802 impl->isAsync = false;
2804 return impl->getBlobAsync(layerName);
2806 CV_Error(Error::StsNotImplemented, "Asynchronous forward without C++11");
2810 void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
2812 CV_TRACE_FUNCTION();
2814 String layerName = outputName;
2816 if (layerName.empty())
2817 layerName = getLayerNames().back();
2819 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2820 impl->setUpNet(pins);
2821 impl->forwardToLayer(impl->getLayerData(layerName));
2823 LayerPin pin = impl->getPinByAlias(layerName);
2824 LayerData &ld = impl->layers[pin.lid];
2826 if (outputBlobs.isUMat())
2828 impl->getBlob(layerName).copyTo(outputBlobs);
2830 else if (outputBlobs.isMat())
2832 outputBlobs.assign(impl->getBlob(layerName));
2834 else if (outputBlobs.isMatVector())
2836 if (impl->preferableTarget != DNN_TARGET_CPU)
2838 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2840 CV_Assert(!ld.outputBlobsWrappers[i].empty());
2841 ld.outputBlobsWrappers[i]->copyToHost();
2844 if (ld.outputBlobs[0].depth() == CV_32F)
2846 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2847 outputvec = ld.outputBlobs;
2849 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2850 outputvec.resize(ld.outputBlobs.size());
2851 for (int i = 0; i < outputvec.size(); i++)
2852 convertFp16(ld.outputBlobs[i], outputvec[i]);
2855 else if (outputBlobs.isUMatVector())
2857 std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
2859 if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
2860 IS_DNN_OPENCL_TARGET(impl->preferableTarget))
2862 if (impl->preferableTarget == DNN_TARGET_OPENCL)
2863 outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2864 else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
2866 std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2867 outputvec.resize(out_vec.size());
2868 for (int i = 0; i < out_vec.size(); i++)
2869 convertFp16(out_vec[i], outputvec[i]);
2874 outputvec.resize(ld.outputBlobs.size());
2875 for (int i = 0; i < outputvec.size(); ++i)
2876 ld.outputBlobs[i].copyTo(outputvec[i]);
2881 void Net::forward(OutputArrayOfArrays outputBlobs,
2882 const std::vector<String>& outBlobNames)
2884 CV_TRACE_FUNCTION();
2886 std::vector<LayerPin> pins;
2887 for (int i = 0; i < outBlobNames.size(); i++)
2889 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
2892 impl->setUpNet(pins);
2894 LayerPin out = impl->getLatestLayerPin(pins);
2896 impl->forwardToLayer(impl->getLayerData(out.lid));
2898 std::vector<Mat> matvec;
2899 for (int i = 0; i < pins.size(); i++)
2901 matvec.push_back(impl->getBlob(pins[i]));
2904 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2908 void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
2909 const std::vector<String>& outBlobNames)
2911 CV_TRACE_FUNCTION();
2913 std::vector<LayerPin> pins;
2914 for (int i = 0; i < outBlobNames.size(); i++)
2916 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
2919 impl->setUpNet(pins);
2921 LayerPin out = impl->getLatestLayerPin(pins);
2923 impl->forwardToLayer(impl->getLayerData(out.lid));
2925 outputBlobs.resize(outBlobNames.size());
2926 for (int i = 0; i < outBlobNames.size(); i++)
2928 std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
2929 outputBlobs[i].resize(lp.size());
2930 for (int j = 0; j < lp.size(); j++)
2932 outputBlobs[i][j] = impl->getBlob(lp[j]);
2937 void Net::setPreferableBackend(int backendId)
2939 CV_TRACE_FUNCTION();
2940 CV_TRACE_ARG(backendId);
2942 if( impl->preferableBackend != backendId )
2944 impl->preferableBackend = backendId;
2945 impl->netWasAllocated = false;
2950 void Net::setPreferableTarget(int targetId)
2952 CV_TRACE_FUNCTION();
2953 CV_TRACE_ARG(targetId);
2955 if( impl->preferableTarget != targetId )
2957 impl->preferableTarget = targetId;
2958 if (IS_DNN_OPENCL_TARGET(targetId))
2961 #ifdef HAVE_INF_ENGINE
2962 if (impl->preferableBackend == DNN_BACKEND_OPENCV)
2964 if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
2965 impl->preferableBackend == DNN_BACKEND_OPENCV)
2966 #endif // HAVE_INF_ENGINE
2967 impl->preferableTarget = DNN_TARGET_CPU;
2969 bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
2970 if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
2971 impl->preferableTarget = DNN_TARGET_OPENCL;
2974 impl->netWasAllocated = false;
2979 void Net::setInputsNames(const std::vector<String> &inputBlobNames)
2981 CV_TRACE_FUNCTION();
2983 impl->netInputLayer->setNames(inputBlobNames);
2986 void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
2988 CV_TRACE_FUNCTION();
2989 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
2993 pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
2996 CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
2998 LayerData &ld = impl->layers[pin.lid];
2999 const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
3000 ld.outputBlobs.resize(numInputs);
3001 ld.outputBlobsWrappers.resize(numInputs);
3002 impl->netInputLayer->inputsData.resize(numInputs);
3003 impl->netInputLayer->scaleFactors.resize(numInputs);
3004 impl->netInputLayer->means.resize(numInputs);
3006 MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
3007 Mat blob_ = blob.getMat();
3008 bool oldShape = prevShape == shape(blob_);
3011 blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
3015 ld.outputBlobs[pin.oid] = blob_.clone();
3016 impl->netInputLayer->inputsData[pin.oid] = ld.outputBlobs[pin.oid];
3019 if (!ld.outputBlobsWrappers[pin.oid].empty())
3021 ld.outputBlobsWrappers[pin.oid]->setHostDirty();
3023 impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
3024 impl->netInputLayer->means[pin.oid] = mean;
3025 impl->netWasAllocated = impl->netWasAllocated && oldShape;
3028 Mat Net::getParam(LayerId layer, int numParam)
3030 LayerData &ld = impl->getLayerData(layer);
3031 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
3032 CV_Assert(numParam < (int)layerBlobs.size());
3033 return layerBlobs[numParam];
3036 void Net::setParam(LayerId layer, int numParam, const Mat &blob)
3038 LayerData &ld = impl->getLayerData(layer);
3040 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
3041 CV_Assert(numParam < (int)layerBlobs.size());
3042 //we don't make strong checks, use this function carefully
3043 layerBlobs[numParam] = blob;
3046 int Net::getLayerId(const String &layer)
3048 return impl->getLayerId(layer);
3051 String parseLayerParams(const String& name, const LayerParams& lp) {
3052 DictValue param = lp.get(name);
3053 std::ostringstream out;
3055 switch (param.size()) {
3056 case 1: out << ": "; break;
3057 case 2: out << "(HxW): "; break;
3058 case 3: out << "(DxHxW): "; break;
3059 default: CV_Error(Error::StsNotImplemented, format("Unsupported %s size = %d", name.c_str(), param.size()));
3061 for (size_t i = 0; i < param.size() - 1; i++) {
3062 out << param.get<int>(i) << " x ";
3064 out << param.get<int>(param.size() - 1) << "\\l";
3070 CV_Assert(!empty());
3072 if (impl->netInputLayer->inputsData.empty())
3073 CV_Error(Error::StsError, "Requested set input");
3075 if (!impl->netWasAllocated)
3078 std::ostringstream out;
3079 std::map<int, LayerData>& map = impl->layers;
3080 int prefBackend = impl->preferableBackend;
3081 std::vector<std::vector<int> > skippedLayers;
3082 std::vector<int> skipId;
3083 std::vector<int> allLayers(map.size(), -1);
3085 Ptr<BackendNode> prevNode;
3086 for (std::map<int, LayerData>::reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit)
3088 std::map<int, Ptr<BackendNode> >::iterator itBackend = rit->second.backendNodes.find(prefBackend);
3089 if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() ||
3090 itBackend->second.empty())
3092 if (rit->second.skip)
3093 skipId.push_back(rit->first);
3094 else if (!skipId.empty())
3096 if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty())
3097 skipId.push_back(rit->first);
3098 else if (idPrev != -1)
3099 skipId.push_back(idPrev);
3101 std::sort(skipId.begin(), skipId.end());
3102 for (int i = 0; i < skipId.size(); i++) {
3103 allLayers[skipId[i]] = skippedLayers.size();
3105 skippedLayers.push_back(skipId);
3111 if (itBackend->second == prevNode)
3112 skipId.push_back(idPrev);
3113 else if (!skipId.empty())
3115 skipId.push_back(idPrev);
3116 std::sort(skipId.begin(), skipId.end());
3117 for (int i = 0; i < skipId.size(); i++) {
3118 allLayers[skipId[i]] = skippedLayers.size();
3120 skippedLayers.push_back(skipId);
3123 idPrev = rit->first;
3124 prevNode = itBackend->second;
3127 String colors[] = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462"};
3129 switch (prefBackend) {
3130 case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break;
3131 case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break;
3132 case DNN_BACKEND_INFERENCE_ENGINE: backend = "DLIE/"; break;
3133 case DNN_BACKEND_OPENCV: backend = "OCV/"; break;
3135 out << "digraph G {" << '\n';
3137 for (std::map<int, LayerData>::iterator it = map.begin(); it != map.end(); ++it)
3139 String name = it->second.params.name;
3140 if (allLayers[it->first] == -1 && !name.empty()) {
3141 out << " " << "\"" << name << "\"" << " [label=\"";
3143 skipId.push_back(it->first);
3145 else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0])
3147 else { // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0]
3148 int cluster = allLayers[it->first];
3149 out << " " << "\"" << "cluster_" << cluster << "\"" << " [label=\"{";
3150 skipId = skippedLayers[allLayers[it->first]]; // vertices in current cluster
3152 for (int i = 0; i < skipId.size(); i++)
3154 LayerParams& lp = map[skipId[i]].params;
3155 if (!lp.name.empty()) {
3159 out << lp.name << "\\n" << lp.type << "\\n";
3160 if (lp.has("kernel_size")) {
3161 String kernel = parseLayerParams("kernel_size", lp);
3163 } else if (lp.has("kernel_h") && lp.has("kernel_w")) {
3164 DictValue h = lp.get("kernel_h");
3165 DictValue w = lp.get("kernel_w");
3166 out << "kernel (HxW): " << h << " x " << w << "\\l";
3168 if (lp.has("stride")) {
3169 String stride = parseLayerParams("stride", lp);
3171 } else if (lp.has("stride_h") && lp.has("stride_w")) {
3172 DictValue h = lp.get("stride_h");
3173 DictValue w = lp.get("stride_w");
3174 out << "stride (HxW): " << h << " x " << w << "\\l";
3176 if (lp.has("dilation")) {
3177 String dilation = parseLayerParams("dilation", lp);
3179 } else if (lp.has("dilation_h") && lp.has("dilation_w")) {
3180 DictValue h = lp.get("dilation_h");
3181 DictValue w = lp.get("dilation_w");
3182 out << "dilation (HxW): " << h << " x " << w << "\\l";
3184 if (lp.has("pad")) {
3185 DictValue pad = lp.get("pad");
3187 switch (pad.size()) {
3188 case 1: out << ": " << pad << "\\l"; break;
3189 case 2: out << "(HxW): (" << pad.get<int>(0) << " x " << pad.get<int>(1) << ")" << "\\l"; break;
3190 case 4: out << "(HxW): (" << pad.get<int>(0) << ", " << pad.get<int>(2) << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(3) << ")" << "\\l"; break;
3191 case 6: out << "(DxHxW): (" << pad.get<int>(0) << ", " << pad.get<int>(3) << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(4)
3192 << ") x (" << pad.get<int>(2) << ", " << pad.get<int>(5) << ")" << "\\l"; break;
3193 default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size()));
3195 } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) {
3196 DictValue l = lp.get("pad_l");
3197 DictValue t = lp.get("pad_t");
3198 DictValue r = lp.get("pad_r");
3199 DictValue b = lp.get("pad_b");
3200 out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")" << "\\l";
3202 else if (lp.has("pooled_w") || lp.has("pooled_h")) {
3203 DictValue h = lp.get("pooled_h");
3204 DictValue w = lp.get("pooled_w");
3205 out << "pad (HxW): " << h << " x " << w << "\\l";
3207 if (lp.has("pool")) {
3208 out << "pool: " << lp.get("pool") << "\\l";
3210 if (lp.has("global_pooling")) {
3211 out << "global_pooling: " << lp.get("global_pooling") << "\\l";
3213 if (lp.has("group")) {
3214 out << "group: " << lp.get("group") << "\\l";
3218 if (!it->second.outputBlobs.empty())
3219 out << "output: " << it->second.outputBlobs[0].size << "\\l";
3221 Ptr<BackendNode> layerBackend = it->second.backendNodes[prefBackend];
3222 out << (!layerBackend.empty() ? backend : "OCV/");
3224 switch (it->second.layerInstance->preferableTarget) {
3225 case DNN_TARGET_CPU: out << "CPU\\n"; colorId = layerBackend.empty() ? 0 : 5; break;
3226 case DNN_TARGET_OPENCL: out << "OCL\\n"; colorId = 1; break;
3227 case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16\\n"; colorId = 2; break;
3228 case DNN_TARGET_MYRIAD: out << "MYRIAD\\n"; colorId = 3; break;
3229 case DNN_TARGET_FPGA: out << "FPGA\\n"; colorId = 4; break;
3231 out << ((skipId.size() == 1)? "\" " : " }\" ");
3232 out << "fillcolor=\"" << colors[colorId] << "\" ";
3233 out << "style=filled ";
3234 out << "shape=" << ((skipId.size() == 1)? "box" : "record") << "]" << '\n';
3238 int inputsSize = impl->netInputLayer->outNames.size();
3239 for (std::map<int, LayerData>::iterator it = map.begin(); it != map.end(); ++it)
3241 if (allLayers[it->first] == -1) // node
3243 for (int i = 0; i < it->second.consumers.size(); i++)
3245 int outId = it->second.consumers[i].lid;
3246 if (it == map.begin() && inputsSize > 1)
3247 out << " " << "\"" << it->second.name << "_" << i << "\"" << " -> ";
3249 out << " " << "\"" << it->second.name << "\"" << " -> ";
3250 if (allLayers[outId] == -1) // node
3251 out << "\"" << map[outId].name << "\"" << '\n';
3253 out << "\"" << "cluster_" << allLayers[outId] << "\"" << '\n';
3256 else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster
3258 for (int i = 0; i < it->second.consumers.size(); i++)
3260 int outId = it->second.consumers[i].lid;
3261 if (allLayers[outId] == -1) { // node
3262 out << " " << "\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
3263 out << "\"" << map[outId].name << "\"" << '\n';
3265 else if (allLayers[outId] != allLayers[it->first]) { // another cluster
3266 out << " " << "\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
3267 out << "\"" << "cluster_" << allLayers[outId] << "\"" << '\n';
3276 void Net::dumpToFile(const String& path) {
3277 std::ofstream file(path.c_str());
3282 Ptr<Layer> Net::getLayer(LayerId layerId)
3284 LayerData &ld = impl->getLayerData(layerId);
3285 return ld.getLayerInstance();
3288 std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
3290 LayerData &ld = impl->getLayerData(layerId);
3291 if (!ld.layerInstance)
3292 CV_Error(Error::StsNullPtr, format("Requested layer \"%s\" was not initialized", ld.name.c_str()));
3294 std::vector<Ptr<Layer> > inputLayers;
3295 inputLayers.reserve(ld.inputLayersId.size());
3296 std::set<int>::iterator it;
3297 for (it = ld.inputLayersId.begin(); it != ld.inputLayersId.end(); ++it) {
3298 inputLayers.push_back(getLayer(*it));
3303 std::vector<String> Net::getLayerNames() const
3305 std::vector<String> res;
3306 res.reserve(impl->layers.size());
3308 Impl::MapIdToLayerData::iterator it;
3309 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
3311 if (it->second.id) //skip Data layer
3312 res.push_back(it->second.name);
3318 bool Net::empty() const
3320 return impl->layers.size() <= 1; //first layer is default Data layer
3323 std::vector<int> Net::getUnconnectedOutLayers() const
3325 std::vector<int> layersIds;
3327 Impl::MapIdToLayerData::iterator it;
3328 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
3330 int lid = it->first;
3331 LayerData &ld = it->second;
3333 if (ld.requiredOutputs.size() == 0)
3334 layersIds.push_back(lid);
3340 std::vector<String> Net::getUnconnectedOutLayersNames() const
3342 std::vector<int> ids = getUnconnectedOutLayers();
3343 const size_t n = ids.size();
3344 std::vector<String> names(n);
3345 for (size_t i = 0; i < n; ++i)
3347 names[i] = impl->layers[ids[i]].name;
3352 void Net::getLayersShapes(const ShapesVec& netInputShapes,
3353 std::vector<int>& layersIds,
3354 std::vector<ShapesVec>& inLayersShapes,
3355 std::vector<ShapesVec>& outLayersShapes) const
3358 inLayersShapes.clear();
3359 outLayersShapes.clear();
3361 Impl::LayersShapesMap inOutShapes;
3362 impl->getLayersShapes(netInputShapes, inOutShapes);
3364 for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
3365 it != inOutShapes.end(); it++)
3367 layersIds.push_back(it->first);
3368 inLayersShapes.push_back(it->second.in);
3369 outLayersShapes.push_back(it->second.out);
3373 void Net::getLayersShapes(const MatShape& netInputShape,
3374 std::vector<int>& layerIds,
3375 std::vector<ShapesVec>& inLayersShapes,
3376 std::vector<ShapesVec>& outLayersShapes) const
3378 getLayersShapes(ShapesVec(1, netInputShape),
3379 layerIds, inLayersShapes, outLayersShapes);
3382 void Net::getLayerShapes(const MatShape& netInputShape,
3384 ShapesVec& inLayerShapes,
3385 ShapesVec& outLayerShapes) const
3387 getLayerShapes(ShapesVec(1, netInputShape),
3388 layerId, inLayerShapes, outLayerShapes);
3392 void Net::getLayerShapes(const ShapesVec& netInputShapes,
3394 ShapesVec& inLayerShapes,
3395 ShapesVec& outLayerShapes) const
3398 impl->getLayerShapes(netInputShapes, layerId, shapes);
3399 inLayerShapes = shapes.in;
3400 outLayerShapes = shapes.out;
3403 int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
3405 CV_TRACE_FUNCTION();
3408 std::vector<int> ids;
3409 std::vector<std::vector<MatShape> > inShapes, outShapes;
3410 getLayersShapes(netInputShapes, ids, inShapes, outShapes);
3411 CV_Assert(inShapes.size() == outShapes.size());
3412 CV_Assert(inShapes.size() == ids.size());
3414 for(int i = 0; i < ids.size(); i++)
3416 flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i],
3423 int64 Net::getFLOPS(const MatShape& netInputShape) const
3425 return getFLOPS(std::vector<MatShape>(1, netInputShape));
3428 int64 Net::getFLOPS(const int layerId,
3429 const std::vector<MatShape>& netInputShapes) const
3431 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
3432 CV_Assert(layer != impl->layers.end());
3435 impl->getLayerShapes(netInputShapes, layerId, shapes);
3437 return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
3440 int64 Net::getFLOPS(const int layerId,
3441 const MatShape& netInputShape) const
3443 return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
3446 void Net::getLayerTypes(std::vector<String>& layersTypes) const
3448 layersTypes.clear();
3450 std::map<String, int> layers;
3451 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
3452 it != impl->layers.end(); it++)
3454 if (layers.find(it->second.type) == layers.end())
3455 layers[it->second.type] = 0;
3456 layers[it->second.type]++;
3459 for (std::map<String, int>::iterator it = layers.begin();
3460 it != layers.end(); it++)
3462 layersTypes.push_back(it->first);
3466 int Net::getLayersCount(const String& layerType) const
3469 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
3470 it != impl->layers.end(); it++)
3472 if (it->second.type == layerType)
3478 void Net::getMemoryConsumption(const int layerId,
3479 const std::vector<MatShape>& netInputShapes,
3480 size_t& weights, size_t& blobs) const
3482 CV_TRACE_FUNCTION();
3484 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
3485 CV_Assert(layer != impl->layers.end());
3487 weights = blobs = 0;
3489 for(int i = 0; i < layer->second.params.blobs.size(); i++)
3491 const Mat& weightsBlob = layer->second.params.blobs[i];
3492 weights += weightsBlob.total()*weightsBlob.elemSize();
3495 ShapesVec inLayerShapes, outLayerShapes;
3496 getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
3497 for(int i = 0; i < outLayerShapes.size(); i++)
3499 blobs += total(outLayerShapes[i]) * sizeof(float);
3503 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
3504 size_t& weights, size_t& blobs) const
3506 CV_TRACE_FUNCTION();
3508 std::vector<int> layerIds;
3509 std::vector<size_t> w, b;
3510 getMemoryConsumption(netInputShapes, layerIds, w, b);
3512 weights = blobs = 0;
3513 for(int i = 0; i < layerIds.size(); i++)
3520 void Net::getMemoryConsumption(const int layerId,
3521 const MatShape& netInputShape,
3522 size_t& weights, size_t& blobs) const
3524 getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
3528 void Net::getMemoryConsumption(const MatShape& netInputShape,
3529 size_t& weights, size_t& blobs) const
3531 getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
3535 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
3536 std::vector<int>& layerIds, std::vector<size_t>& weights,
3537 std::vector<size_t>& blobs) const
3539 CV_TRACE_FUNCTION();
3545 std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
3547 getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
3549 for(int i = 0; i < layerIds.size(); i++)
3552 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
3553 CV_Assert(layer != impl->layers.end());
3555 for(int j = 0; j < layer->second.params.blobs.size(); j++)
3557 const Mat& weightsBlob = layer->second.params.blobs[j];
3558 w += weightsBlob.total()*weightsBlob.elemSize();
3561 for(int j = 0; j < outLayerShapes[i].size(); j++)
3563 b += total(outLayerShapes[i][j]) * sizeof(float);
3566 weights.push_back(w);
3571 void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
3572 std::vector<size_t>& weights, std::vector<size_t>& blobs) const
3574 getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
3578 void Net::enableFusion(bool fusion)
3580 if( impl->fusion != fusion )
3582 impl->fusion = fusion;
3583 impl->netWasAllocated = false;
3588 void Net::setHalideScheduler(const String& scheduler)
3590 CV_TRACE_FUNCTION();
3591 CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
3593 impl->halideConfigFile = scheduler;
3596 int64 Net::getPerfProfile(std::vector<double>& timings)
3598 timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
3599 int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0);
3603 //////////////////////////////////////////////////////////////////////////
3605 Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
3607 Layer::Layer(const LayerParams ¶ms)
3608 : blobs(params.blobs), name(params.name), type(params.type)
3610 preferableTarget = DNN_TARGET_CPU;
3613 void Layer::setParamsFrom(const LayerParams ¶ms)
3615 blobs = params.blobs;
3620 int Layer::inputNameToIndex(String)
3625 int Layer::outputNameToIndex(const String&)
3630 bool Layer::supportBackend(int backendId)
3632 return backendId == DNN_BACKEND_OPENCV;
3635 Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper> > &)
3637 CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type +
3638 " layers is not defined.");
3639 return Ptr<BackendNode>();
3642 Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
3644 CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type +
3645 " layers is not defined.");
3646 return Ptr<BackendNode>();
3649 Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
3651 CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
3652 " layers is not defined.");
3653 return Ptr<BackendNode>();
3656 void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
3657 const std::vector<Mat> &outputs, int targetId) const
3660 CV_TRACE_FUNCTION();
3662 Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
3663 xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
3664 Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
3666 int outW, outH, outC, outN;
3667 getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
3669 if (targetId == DNN_TARGET_CPU)
3671 if (outW == 1 && outH == 1)
3673 if (outC + outN == 1)
3677 top.split(c, co, ci, 8)
3678 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
3682 top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
3689 top.reorder(x, c, y)
3690 .split(y, yo, yi, 2)
3694 .vectorize(x, outW >= 16 ? 16 : outW);
3698 else if (targetId == DNN_TARGET_OPENCL)
3700 if (outW == 1 && outH == 1)
3702 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
3703 top.split(c, co, ci, c_split)
3704 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
3710 int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
3711 int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
3712 // Supported vectorization widths: 2, 3, 4, 8, 16
3713 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
3714 top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
3715 .split(c, co, ci, c_split)
3716 .gpu_blocks(xo, yo, co)
3717 .gpu_threads(xi, yi)
3718 .reorder(xi, yi, ci, xo, yo, co)
3723 CV_Error(Error::StsNotImplemented, "Unknown target identifier");
3724 #endif // HAVE_HALIDE
3727 Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
3729 return Ptr<BackendNode>();
3732 bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
3733 bool Layer::tryFuse(Ptr<Layer>&) { return false; }
3734 void Layer::getScaleShift(Mat& scale, Mat& shift) const
3740 void Layer::unsetAttached()
3742 setActivation(Ptr<ActivationLayer>());
3745 template <typename T>
3746 static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
3748 pv.resize(v.size());
3749 for (size_t i = 0; i < v.size(); i++)
3750 pv[i] = const_cast<T*>(&v[i]);
3753 void Layer::finalize(const std::vector<Mat> &inputs, std::vector<Mat> &outputs)
3755 CV_TRACE_FUNCTION();
3756 this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
3759 void Layer::finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
3761 CV_UNUSED(input);CV_UNUSED(output);
3764 void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
3766 CV_TRACE_FUNCTION();
3767 std::vector<Mat> inputs, outputs;
3768 inputs_arr.getMatVector(inputs);
3769 outputs_arr.getMatVector(outputs);
3771 std::vector<Mat*> inputsp;
3772 vecToPVec(inputs, inputsp);
3773 this->finalize(inputsp, outputs);
3776 std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
3778 CV_TRACE_FUNCTION();
3780 std::vector<Mat> outputs;
3781 this->finalize(inputs, outputs);
3785 void Layer::forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
3787 // We kept this method for compatibility. DNN calls it now only to support users' implementations.
3790 void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
3792 CV_TRACE_FUNCTION();
3793 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3795 Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
3798 void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
3800 CV_TRACE_FUNCTION();
3801 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3803 if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
3805 std::vector<UMat> inputs;
3806 std::vector<UMat> outputs;
3807 std::vector<UMat> internals;
3809 std::vector<UMat> orig_inputs;
3810 std::vector<UMat> orig_outputs;
3811 std::vector<UMat> orig_internals;
3813 inputs_arr.getUMatVector(orig_inputs);
3814 outputs_arr.getUMatVector(orig_outputs);
3815 internals_arr.getUMatVector(orig_internals);
3817 inputs.resize(orig_inputs.size());
3818 for (size_t i = 0; i < orig_inputs.size(); i++)
3819 convertFp16(orig_inputs[i], inputs[i]);
3821 outputs.resize(orig_outputs.size());
3822 for (size_t i = 0; i < orig_outputs.size(); i++)
3823 outputs[i].create(shape(orig_outputs[i]), CV_32F);
3825 internals.resize(orig_internals.size());
3826 for (size_t i = 0; i < orig_internals.size(); i++)
3827 internals[i].create(shape(orig_internals[i]), CV_32F);
3829 forward(inputs, outputs, internals);
3831 for (size_t i = 0; i < outputs.size(); i++)
3832 convertFp16(outputs[i], orig_outputs[i]);
3834 // sync results back
3835 outputs_arr.assign(orig_outputs);
3836 internals_arr.assign(orig_internals);
3839 std::vector<Mat> inpvec;
3840 std::vector<Mat> outputs;
3841 std::vector<Mat> internals;
3843 inputs_arr.getMatVector(inpvec);
3844 outputs_arr.getMatVector(outputs);
3845 internals_arr.getMatVector(internals);
3847 std::vector<Mat*> inputs(inpvec.size());
3848 for (int i = 0; i < inpvec.size(); i++)
3849 inputs[i] = &inpvec[i];
3851 this->forward(inputs, outputs, internals);
3853 // sync results back
3854 outputs_arr.assign(outputs);
3855 internals_arr.assign(internals);
3858 void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
3860 CV_TRACE_FUNCTION();
3862 this->finalize(inputs, outputs);
3863 this->forward(inputs, outputs, internals);
3868 bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
3869 const int requiredOutputs,
3870 std::vector<MatShape> &outputs,
3871 std::vector<MatShape> &internals) const
3873 CV_Assert(inputs.size());
3874 outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
3878 //////////////////////////////////////////////////////////////////////////
3880 static Mutex& getLayerFactoryMutex()
3882 static Mutex* volatile instance = NULL;
3883 if (instance == NULL)
3885 cv::AutoLock lock(getInitializationMutex());
3886 if (instance == NULL)
3887 instance = new Mutex();
3892 typedef std::map<String, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
3894 static LayerFactory_Impl& getLayerFactoryImpl_()
3896 static LayerFactory_Impl impl;
3900 static LayerFactory_Impl& getLayerFactoryImpl()
3902 static LayerFactory_Impl* volatile instance = NULL;
3903 if (instance == NULL)
3905 cv::AutoLock lock(getLayerFactoryMutex());
3906 if (instance == NULL)
3908 instance = &getLayerFactoryImpl_();
3909 initializeLayerFactory();
3915 void LayerFactory::registerLayer(const String &type, Constructor constructor)
3917 CV_TRACE_FUNCTION();
3918 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3920 cv::AutoLock lock(getLayerFactoryMutex());
3921 String type_ = toLowerCase(type);
3922 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
3924 if (it != getLayerFactoryImpl().end())
3926 if (it->second.back() == constructor)
3927 CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered");
3928 it->second.push_back(constructor);
3930 getLayerFactoryImpl().insert(std::make_pair(type_, std::vector<Constructor>(1, constructor)));
3933 void LayerFactory::unregisterLayer(const String &type)
3935 CV_TRACE_FUNCTION();
3936 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3938 cv::AutoLock lock(getLayerFactoryMutex());
3939 String type_ = toLowerCase(type);
3941 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
3942 if (it != getLayerFactoryImpl().end())
3944 if (it->second.size() > 1)
3945 it->second.pop_back();
3947 getLayerFactoryImpl().erase(it);
3951 Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
3953 CV_TRACE_FUNCTION();
3954 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3956 cv::AutoLock lock(getLayerFactoryMutex());
3957 String type_ = toLowerCase(type);
3958 LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_);
3960 if (it != getLayerFactoryImpl().end())
3962 CV_Assert(!it->second.empty());
3963 return it->second.back()(params);
3967 return Ptr<Layer>(); //NULL
3971 BackendNode::BackendNode(int backendId) : backendId(backendId) {}
3973 BackendNode::~BackendNode() {};
3975 BackendWrapper::BackendWrapper(int backendId, int targetId)
3976 : backendId(backendId), targetId(targetId) {}
3978 BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
3980 CV_Error(Error::StsNotImplemented,
3981 "Constructor of backend wrapper must be implemented");
3984 BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
3986 CV_Error(Error::StsNotImplemented,
3987 "Constructor of backend wrapper must be implemented");
3990 BackendWrapper::~BackendWrapper() {}
3992 Net readNet(const String& _model, const String& _config, const String& _framework)
3994 String framework = toLowerCase(_framework);
3995 String model = _model;
3996 String config = _config;
3997 const std::string modelExt = model.substr(model.rfind('.') + 1);
3998 const std::string configExt = config.substr(config.rfind('.') + 1);
3999 if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
4000 modelExt == "prototxt" || configExt == "prototxt")
4002 if (modelExt == "prototxt" || configExt == "caffemodel")
4003 std::swap(model, config);
4004 return readNetFromCaffe(config, model);
4006 if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
4007 modelExt == "pbtxt" || configExt == "pbtxt")
4009 if (modelExt == "pbtxt" || configExt == "pb")
4010 std::swap(model, config);
4011 return readNetFromTensorflow(model, config);
4013 if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
4014 configExt == "t7" || configExt == "net")
4016 return readNetFromTorch(model.empty() ? config : model);
4018 if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
4019 modelExt == "cfg" || configExt == "cfg")
4021 if (modelExt == "cfg" || configExt == "weights")
4022 std::swap(model, config);
4023 return readNetFromDarknet(config, model);
4025 if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
4026 modelExt == "xml" || configExt == "xml")
4028 if (modelExt == "xml" || configExt == "bin")
4029 std::swap(model, config);
4030 return readNetFromModelOptimizer(config, model);
4032 if (framework == "onnx" || modelExt == "onnx")
4034 return readNetFromONNX(model);
4036 CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
4037 model + (config.empty() ? "" : ", " + config));
4040 Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
4041 const std::vector<uchar>& bufferConfig)
4043 String framework = toLowerCase(_framework);
4044 if (framework == "caffe")
4045 return readNetFromCaffe(bufferConfig, bufferModel);
4046 else if (framework == "tensorflow")
4047 return readNetFromTensorflow(bufferModel, bufferConfig);
4048 else if (framework == "darknet")
4049 return readNetFromDarknet(bufferConfig, bufferModel);
4050 else if (framework == "torch")
4051 CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
4052 else if (framework == "dldt")
4053 CV_Error(Error::StsNotImplemented, "Reading Intel's Model Optimizer models from buffers");
4054 CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
4057 Net readNetFromModelOptimizer(const String &xml, const String &bin)
4059 return Net::readFromModelOptimizer(xml, bin);
4062 CV__DNN_INLINE_NS_END