1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
43 #include "op_halide.hpp"
44 #include "op_inf_engine.hpp"
45 #include "op_vkcom.hpp"
46 #include "halide_scheduler.hpp"
54 #include <opencv2/dnn/shape_utils.hpp>
55 #include <opencv2/imgproc.hpp>
57 #include <opencv2/core/utils/configuration.private.hpp>
58 #include <opencv2/core/utils/logger.hpp>
62 CV__DNN_INLINE_NS_BEGIN
64 // this option is useful to run valgrind memory errors detection
65 static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
68 static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
71 static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
72 #ifdef HAVE_INF_ENGINE
73 (size_t)DNN_BACKEND_INFERENCE_ENGINE
75 (size_t)DNN_BACKEND_OPENCV
79 // Additional checks (slowdowns execution!)
80 static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
81 static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
82 static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
89 //==================================================================================================
94 typedef std::vector< std::pair<Backend, Target> > BackendsList;
95 const BackendsList & getBackends() const { return backends; }
96 static BackendRegistry & getRegistry()
98 static BackendRegistry impl;
105 backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
107 if (cv::ocl::useOpenCL())
108 backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
110 #endif // HAVE_HALIDE
112 #ifdef HAVE_INF_ENGINE
113 if (checkIETarget(DNN_TARGET_CPU))
114 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU));
115 if (checkIETarget(DNN_TARGET_MYRIAD))
116 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD));
117 if (checkIETarget(DNN_TARGET_FPGA))
118 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_FPGA));
120 if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel())
122 if (checkIETarget(DNN_TARGET_OPENCL))
123 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL));
124 if (checkIETarget(DNN_TARGET_OPENCL_FP16))
125 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16));
128 #endif // HAVE_INF_ENGINE
131 if (cv::ocl::useOpenCL())
133 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
134 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
138 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
142 backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN));
145 static inline bool checkIETarget(int target)
147 #ifndef HAVE_INF_ENGINE
151 cv::dnn::LayerParams lp;
152 lp.set("kernel_size", 1);
153 lp.set("num_output", 1);
154 lp.set("bias_term", false);
155 lp.type = "Convolution";
156 lp.name = "testLayer";
157 lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
158 net.addLayerToPrev(lp.name, lp.type, lp);
159 net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
160 net.setPreferableTarget(target);
161 static int inpDims[] = {1, 2, 3, 4};
162 net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0)));
175 BackendsList backends;
179 std::vector< std::pair<Backend, Target> > getAvailableBackends()
181 return BackendRegistry::getRegistry().getBackends();
184 std::vector<Target> getAvailableTargets(Backend be)
186 if (be == DNN_BACKEND_DEFAULT)
187 be = (Backend)PARAM_DNN_BACKEND_DEFAULT;
189 std::vector<Target> result;
190 const BackendRegistry::BackendsList all_backends = getAvailableBackends();
191 for(BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i )
194 result.push_back(i->second);
199 //==================================================================================================
203 typedef std::vector<MatShape> ShapesVec;
207 ShapesVec in, out, internal;
208 // No guarantees that layer which support in-place computations
209 // will be computed in-place (input.data_ptr == output.data_ptr).
210 // If layer said that it could work in-place and layers after it
211 // no longer use input blob, we'll set output = input.
213 LayerShapes() {supportInPlace = false;}
217 Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
218 const Scalar& mean, bool swapRB, bool crop, int ddepth)
222 blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
226 void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
227 const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
230 std::vector<Mat> images(1, image.getMat());
231 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
234 Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
235 const Scalar& mean, bool swapRB, bool crop, int ddepth)
239 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
243 void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
244 Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
247 CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
250 CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
251 CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
254 std::vector<Mat> images;
255 images_.getMatVector(images);
256 CV_Assert(!images.empty());
257 for (size_t i = 0; i < images.size(); i++)
259 Size imgSize = images[i].size();
266 float resizeFactor = std::max(size.width / (float)imgSize.width,
267 size.height / (float)imgSize.height);
268 resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
269 Rect crop(Point(0.5 * (images[i].cols - size.width),
270 0.5 * (images[i].rows - size.height)),
272 images[i] = images[i](crop);
275 resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
277 if(images[i].depth() == CV_8U && ddepth == CV_32F)
278 images[i].convertTo(images[i], CV_32F);
281 std::swap(mean[0], mean[2]);
284 images[i] *= scalefactor;
287 size_t nimages = images.size();
288 Mat image0 = images[0];
289 int nch = image0.channels();
290 CV_Assert(image0.dims == 2);
291 if (nch == 3 || nch == 4)
293 int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
294 blob_.create(4, sz, ddepth);
295 Mat blob = blob_.getMat();
298 for(size_t i = 0; i < nimages; i++ )
300 const Mat& image = images[i];
301 CV_Assert(image.depth() == blob_.depth());
302 nch = image.channels();
303 CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
304 CV_Assert(image.size() == image0.size());
306 for( int j = 0; j < nch; j++ )
307 ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
309 std::swap(ch[0], ch[2]);
316 int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
317 blob_.create(4, sz, ddepth);
318 Mat blob = blob_.getMat();
320 for(size_t i = 0; i < nimages; i++ )
322 const Mat& image = images[i];
323 CV_Assert(image.depth() == blob_.depth());
324 nch = image.channels();
325 CV_Assert(image.dims == 2 && (nch == 1));
326 CV_Assert(image.size() == image0.size());
328 image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
333 void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
337 //A blob is a 4 dimensional matrix in floating point precision
338 //blob_[0] = batchSize = nbOfImages
339 //blob_[1] = nbOfChannels
342 CV_Assert(blob_.depth() == CV_32F);
343 CV_Assert(blob_.dims == 4);
345 images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
347 std::vector<Mat> vectorOfChannels(blob_.size[1]);
348 for (int n = 0; n < blob_.size[0]; ++n)
350 for (int c = 0; c < blob_.size[1]; ++c)
352 vectorOfChannels[c] = getPlane(blob_, n, c);
354 cv::merge(vectorOfChannels, images_.getMatRef(n));
358 class OpenCLBackendWrapper : public BackendWrapper
361 OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
368 OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
369 : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
371 Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
372 CV_Assert(!base.empty());
376 int shape[] = {1, (int)base->umat.total()};
377 umat = base->umat.reshape(1, 2, &shape[0])
378 .colRange(0, host->total())
379 .reshape(1, host->dims, &host->size[0]);
383 static Ptr<BackendWrapper> create(Mat& m)
385 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
388 static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
390 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
393 static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
395 const int numWrappers = wrappers.size();
396 std::vector<UMat> mats(wrappers.size());
397 for (int i = 0; i < numWrappers; ++i)
399 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
400 CV_Assert(!umatWrapper.empty());
401 umatWrapper->copyToDevice();
402 mats[i] = umatWrapper->umat;
407 // Replaces all umats in wrappers to specific ones.
408 static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
409 const std::vector<UMat>& umats)
411 CV_Assert(wrappers.size() == umats.size());
412 for (int i = 0, n = umats.size(); i < n; ++i)
414 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
415 CV_Assert(!umatWrapper.empty());
416 umatWrapper->umat = umats[i];
420 ~OpenCLBackendWrapper() {}
422 // Copies data from device to a host memory.
423 virtual void copyToHost() CV_OVERRIDE
428 virtual void setHostDirty() CV_OVERRIDE
453 LayerPin(int layerId = -1, int outputId = -1)
454 : lid(layerId), oid(outputId) {}
458 return (lid >= 0 && oid >= 0);
461 bool equal(const LayerPin &r) const
463 return (lid == r.lid && oid == r.oid);
466 bool operator<(const LayerPin &r) const
468 return lid < r.lid || (lid == r.lid && oid < r.oid);
471 bool operator ==(const LayerPin &r) const
473 return lid == r.lid && oid == r.oid;
479 LayerData() : id(-1), skip(false), flag(0) {}
480 LayerData(int _id, const String &_name, const String &_type, LayerParams &_params)
481 : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0)
495 std::vector<LayerPin> inputBlobsId;
496 std::set<int> inputLayersId;
497 std::set<int> requiredOutputs;
498 std::vector<LayerPin> consumers;
499 std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
500 std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
501 std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
503 Ptr<Layer> layerInstance;
504 std::vector<Mat> outputBlobs;
505 std::vector<Mat*> inputBlobs;
506 std::vector<Mat> internals;
507 // Computation nodes of implemented backends (except DEFAULT).
508 std::map<int, Ptr<BackendNode> > backendNodes;
509 // Flag for skip layer computation for specific backend.
514 Ptr<Layer> getLayerInstance()
517 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
520 return layerInstance;
522 layerInstance = LayerFactory::createLayerInstance(type, params);
525 CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
528 return layerInstance;
532 //fake layer containing network input blobs
533 struct DataLayer : public Layer
535 DataLayer() : Layer()
540 virtual bool supportBackend(int backendId) CV_OVERRIDE
542 return backendId == DNN_BACKEND_OPENCV ||
543 (backendId == DNN_BACKEND_INFERENCE_ENGINE && inputsData.size() == 1);
546 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
549 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
551 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
552 forward_ocl(inputs_arr, outputs_arr, internals_arr))
554 if (outputs_arr.depth() == CV_16S)
556 forward_fallback(inputs_arr, outputs_arr, internals_arr);
560 std::vector<Mat> outputs, internals;
561 outputs_arr.getMatVector(outputs);
562 internals_arr.getMatVector(internals);
565 // | Input type | Output type |
568 for (int i = 0; i < inputsData.size(); ++i)
570 double scale = scaleFactors[i];
571 Scalar& mean = means[i];
572 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
573 CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
575 bool singleMean = true;
576 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
578 singleMean = mean[j] == mean[j - 1];
583 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
587 for (int n = 0; n < inputsData[i].size[0]; ++n)
588 for (int c = 0; c < inputsData[i].size[1]; ++c)
590 Mat inp = getPlane(inputsData[i], n, c);
591 Mat out = getPlane(outputs[i], n, c);
592 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
599 std::vector<Mat> tmp_expressions;
600 bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
603 // | Input type | Output type |
607 std::vector<UMat> outputs;
608 outputs_.getUMatVector(outputs);
610 tmp_expressions.clear();
611 for (int i = 0; i < inputsData.size(); ++i)
613 Mat inputData = inputsData[i];
615 double scale = scaleFactors[i];
616 Scalar& mean = means[i];
618 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
619 bool singleMean = true;
620 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
622 singleMean = mean[j] == mean[j - 1];
625 if (outputs_.depth() == CV_16S)
629 tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0])));
630 convertFp16(tmp_expressions.back(), outputs[i]);
634 for (int n = 0; n < inputsData[i].size[0]; ++n)
635 for (int c = 0; c < inputsData[i].size[1]; ++c)
637 Mat inp = getPlane(inputsData[i], n, c);
639 std::vector<cv::Range> plane(4, Range::all());
640 plane[0] = Range(n, n + 1);
641 plane[1] = Range(c, c + 1);
642 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
644 tmp_expressions.push_back(scale * (inp - mean[c]));
645 convertFp16(tmp_expressions.back(), out);
651 CV_Assert(outputs_.depth() == CV_32F);
654 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
658 for (int n = 0; n < inputsData[i].size[0]; ++n)
659 for (int c = 0; c < inputsData[i].size[1]; ++c)
661 Mat inp = getPlane(inputsData[i], n, c);
663 std::vector<cv::Range> plane(4, Range::all());
664 plane[0] = Range(n, n + 1);
665 plane[1] = Range(c, c + 1);
666 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
668 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
677 int outputNameToIndex(const String& tgtName) CV_OVERRIDE
679 int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
680 return (idx < (int)outNames.size()) ? idx : -1;
683 void setNames(const std::vector<String> &names)
685 outNames.assign(names.begin(), names.end());
688 bool getMemoryShapes(const std::vector<MatShape> &inputs,
689 const int requiredOutputs,
690 std::vector<MatShape> &outputs,
691 std::vector<MatShape> &internals) const CV_OVERRIDE
693 CV_Assert(inputs.size() == requiredOutputs);
694 outputs.assign(inputs.begin(), inputs.end());
698 virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
700 std::vector<Mat> outputs;
701 outputs_arr.getMatVector(outputs);
703 CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
704 inputsData.size() == outputs.size());
706 for (int i = 0; skip && i < inputsData.size(); ++i)
708 if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
713 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
715 #ifdef HAVE_INF_ENGINE
716 CV_CheckEQ(inputsData.size(), (size_t)1, "");
717 CV_CheckEQ(inputsData[0].dims, 4, "");
718 const size_t numChannels = inputsData[0].size[1];
719 CV_Assert(numChannels <= 4);
722 InferenceEngine::TensorDesc td(InferenceEngine::Precision::FP32, {numChannels},
723 InferenceEngine::Layout::C);
724 auto weights = InferenceEngine::make_shared_blob<float>(td);
727 float* weight_buf = weights->buffer().as<float*>();
728 std::fill(weight_buf, weight_buf + numChannels, scaleFactors[0]);
731 auto biases = InferenceEngine::make_shared_blob<float>(td);
733 float* bias_buf = biases->buffer().as<float*>();
735 for (int i = 0; i < numChannels; ++i)
737 bias_buf[i] = -means[0][i] * scaleFactors[0];
740 InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
741 addConstantData("weights", weights, ieLayer);
742 addConstantData("biases", biases, ieLayer);
743 return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
744 #endif // HAVE_INF_ENGINE
745 return Ptr<BackendNode>();
748 std::vector<String> outNames;
749 // Preprocessing parameters for each network's input.
750 std::vector<double> scaleFactors;
751 std::vector<Scalar> means;
752 std::vector<Mat> inputsData;
759 // Increase references counter to layer output.
760 void addReference(const LayerPin& lp)
762 std::map<LayerPin, int>::iterator it = refCounter.find(lp);
763 if (it == refCounter.end())
769 void addReferences(const std::vector<LayerPin>& pins)
771 for (int i = 0; i < pins.size(); i++)
773 addReference(pins[i]);
777 // Returns number of references to allocated memory that used in specific
779 int numReferences(const LayerPin& lp)
781 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
782 CV_Assert(mapIt != reuseMap.end());
783 LayerPin memHost = mapIt->second;
785 std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
786 CV_Assert(refIt != refCounter.end());
787 return refIt->second;
790 // Reuse data allocated in <host> inside the <user> blob.
791 void reuse(const LayerPin& host, const LayerPin& user)
793 CV_Assert(reuseMap.find(user) == reuseMap.end());
794 CV_Assert(reuseMap.find(host) != reuseMap.end());
795 LayerPin memHost = reuseMap[host];
796 reuseMap[user] = memHost;
797 if (refCounter.find(memHost) != refCounter.end())
799 std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
800 if (userRefIt != refCounter.end())
802 refCounter[memHost] += userRefIt->second;
803 refCounter.erase(userRefIt);
806 refCounter[memHost] += 1;
810 // Decrease references counter to allocated memory inside specific blob.
811 void releaseReference(const LayerPin& lp)
813 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
814 CV_Assert(mapIt != reuseMap.end());
816 std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
817 CV_Assert(refIt != refCounter.end());
818 CV_Assert(refIt->second > 0);
822 void releaseReferences(const std::vector<LayerPin>& pins)
824 for (int i = 0; i < pins.size(); i++)
826 releaseReference(pins[i]);
830 void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
832 if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
835 LayerPin bestBlobPin;
837 std::map<LayerPin, Mat>::iterator hostIt;
838 std::map<LayerPin, int>::iterator refIt;
840 const int targetTotal = total(shape);
841 int bestBlobTotal = INT_MAX;
843 for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
845 refIt = refCounter.find(hostIt->first);
846 // Use only blobs that had references before because if not,
847 // it might be used as output.
848 if (refIt != refCounter.end() && refIt->second == 0)
850 Mat& unusedBlob = hostIt->second;
851 if (unusedBlob.total() >= targetTotal &&
852 unusedBlob.total() < bestBlobTotal)
854 bestBlobPin = hostIt->first;
855 bestBlob = unusedBlob;
856 bestBlobTotal = unusedBlob.total();
860 if (!bestBlob.empty())
862 reuse(bestBlobPin, lp);
863 dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
869 // if dst already has been allocated with total(shape) elements,
870 // it won't be recreated and pointer of dst.data remains the same.
871 dst.create(shape, use_half ? CV_16S : CV_32F);
876 void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
877 std::vector<LayerPin>& pinsForInternalBlobs,
878 bool use_half = false)
882 pinsForInternalBlobs.clear();
884 std::vector<Mat>& outputBlobs = ld.outputBlobs,
885 &internalBlobs = ld.internals;
887 const ShapesVec& outShapes = layerShapes.out,
888 internalShapes = layerShapes.internal;
890 outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
891 internalBlobs.resize(internalShapes.size());
893 CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
895 // Check that layer could work in-place.
896 bool inPlace = false;
897 if (layerShapes.supportInPlace)
899 if (ld.inputBlobs.size() == 1)
901 // Get number of references to the input memory.
902 int numRef = numReferences(ld.inputBlobsId[0]);
903 // If current layer is one and only customer of this blob.
904 inPlace = numRef == 1;
908 ShapesVec shapes(outShapes);
909 shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
910 std::vector<Mat*> blobs;
911 for(int i = 0; i < outputBlobs.size(); i++)
913 blobs.push_back(&outputBlobs[i]);
916 for(int i = 0; i < internalBlobs.size(); i++)
918 blobs.push_back(&internalBlobs[i]);
919 if (total(internalShapes[i]))
921 pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
925 addReferences(pinsForInternalBlobs);
927 std::map<int, std::vector<int> > idxSizes;
928 for(int i = 0; i < shapes.size(); i++)
930 idxSizes[total(shapes[i])].push_back(i);
933 std::map<int, std::vector<int> >::reverse_iterator it;
934 for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
936 for(int j = 0; j < it->second.size(); j++)
938 int index = it->second[j];
939 if (total(shapes[index]))
941 LayerPin blobPin(ld.id, index);
942 if (index < outShapes.size() && inPlace)
944 CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
945 ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
946 reuse(ld.inputBlobsId[0], blobPin);
949 reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
955 // Clear internal state. Calls before an every reallocation.
966 // Register allocated memory.
967 void addHost(const LayerPin& lp, const Mat& mat)
969 CV_Assert(memHosts.find(lp) == memHosts.end());
974 std::map<LayerPin, int> refCounter;
975 // Maps pin to origin blob (for whom memory was allocated firstly).
976 // For origin blobs key == value.
977 std::map<LayerPin, LayerPin> reuseMap;
978 std::map<LayerPin, Mat> memHosts;
981 static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
983 if (backendId == DNN_BACKEND_OPENCV)
985 if (targetId == DNN_TARGET_CPU)
986 return Ptr<BackendWrapper>();
987 else if (IS_DNN_OPENCL_TARGET(targetId))
988 return OpenCLBackendWrapper::create(m);
990 CV_Error(Error::StsNotImplemented, "Unknown target identifier");
992 else if (backendId == DNN_BACKEND_HALIDE)
994 CV_Assert(haveHalide());
996 return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
997 #endif // HAVE_HALIDE
999 else if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
1001 CV_Assert(haveInfEngine());
1002 #ifdef HAVE_INF_ENGINE
1003 return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
1004 #endif // HAVE_INF_ENGINE
1006 else if (backendId == DNN_BACKEND_VKCOM)
1008 CV_Assert(haveVulkan());
1010 return Ptr<BackendWrapper>(new VkComBackendWrapper(m));
1011 #endif // HAVE_VULKAN
1014 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1015 return Ptr<BackendWrapper>();
1020 typedef std::map<int, LayerShapes> LayersShapesMap;
1021 typedef std::map<int, LayerData> MapIdToLayerData;
1025 //allocate fake net input layer
1026 netInputLayer = Ptr<DataLayer>(new DataLayer());
1027 LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
1029 netInputLayer->name = inpl.name = "_input";
1030 inpl.type = "__NetInputLayer__";
1031 inpl.layerInstance = netInputLayer;
1032 layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
1035 netWasAllocated = false;
1038 preferableBackend = DNN_BACKEND_DEFAULT;
1039 preferableTarget = DNN_TARGET_CPU;
1040 skipInfEngineInit = false;
1043 Ptr<DataLayer> netInputLayer;
1044 std::vector<LayerPin> blobsToKeep;
1045 MapIdToLayerData layers;
1046 std::map<String, int> layerNameToId;
1047 BlobManager blobManager;
1048 int preferableBackend;
1049 int preferableTarget;
1050 String halideConfigFile;
1051 bool skipInfEngineInit;
1052 // Map host data to backend specific wrapper.
1053 std::map<void*, Ptr<BackendWrapper> > backendWrappers;
1057 bool netWasAllocated;
1060 std::vector<int64> layersTimings;
1063 Ptr<BackendWrapper> wrap(Mat& host)
1065 if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
1066 return Ptr<BackendWrapper>();
1068 MatShape shape(host.dims);
1069 for (int i = 0; i < host.dims; ++i)
1070 shape[i] = host.size[i];
1072 void* data = host.data;
1073 if (backendWrappers.find(data) != backendWrappers.end())
1075 Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
1076 if (preferableBackend == DNN_BACKEND_OPENCV)
1078 CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
1079 return OpenCLBackendWrapper::create(baseBuffer, host);
1081 else if (preferableBackend == DNN_BACKEND_HALIDE)
1083 CV_Assert(haveHalide());
1085 return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
1086 #endif // HAVE_HALIDE
1088 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1090 return wrapMat(preferableBackend, preferableTarget, host);
1092 else if (preferableBackend == DNN_BACKEND_VKCOM)
1095 return Ptr<BackendWrapper>(new VkComBackendWrapper(baseBuffer, host));
1099 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1102 Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
1103 backendWrappers[data] = wrapper;
1108 void compileHalide()
1110 CV_TRACE_FUNCTION();
1112 CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
1114 HalideScheduler scheduler(halideConfigFile);
1115 std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
1116 for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
1118 LayerData &ld = it->second;
1119 Ptr<Layer> layer = ld.layerInstance;
1120 if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
1122 CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
1123 bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
1126 // Use automatic scheduling provided by layer.
1127 layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
1128 ld.inputBlobs, ld.outputBlobs,
1131 compileList.emplace_back(ld);
1134 std::atomic<int> progress(0);
1135 auto fn = ([&] () -> void
1139 int id = progress.fetch_add(1);
1140 if ((size_t)id >= compileList.size())
1142 const LayerData& ld = compileList[id].get();
1143 Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
1144 dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
1147 size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
1148 num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
1149 std::vector<std::thread> threads(num_threads - 1);
1150 for (auto& t: threads) t = std::thread(fn);
1151 fn(); // process own tasks
1152 for (auto& t: threads) t.join();
1158 CV_TRACE_FUNCTION();
1160 MapIdToLayerData::iterator it;
1161 for (it = layers.begin(); it != layers.end(); it++)
1163 if (it->second.id != 0) {
1164 it->second.inputBlobs.clear();
1165 it->second.outputBlobs.clear();
1166 it->second.internals.clear();
1168 it->second.skip = false;
1169 //it->second.consumers.clear();
1170 Ptr<Layer> currLayer = it->second.layerInstance;
1172 if( currLayer.empty() )
1175 currLayer->unsetAttached();
1178 layersTimings.clear();
1181 void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
1183 CV_TRACE_FUNCTION();
1185 if (preferableBackend == DNN_BACKEND_DEFAULT)
1186 preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
1188 CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
1189 preferableTarget == DNN_TARGET_CPU ||
1190 preferableTarget == DNN_TARGET_OPENCL ||
1191 preferableTarget == DNN_TARGET_OPENCL_FP16);
1192 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
1193 preferableTarget == DNN_TARGET_CPU ||
1194 preferableTarget == DNN_TARGET_OPENCL);
1195 CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE ||
1196 preferableTarget == DNN_TARGET_CPU ||
1197 preferableTarget == DNN_TARGET_OPENCL ||
1198 preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1199 preferableTarget == DNN_TARGET_MYRIAD ||
1200 preferableTarget == DNN_TARGET_FPGA);
1201 CV_Assert(preferableBackend != DNN_BACKEND_VKCOM ||
1202 preferableTarget == DNN_TARGET_VULKAN);
1203 if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
1205 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
1208 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
1209 preferableTarget = DNN_TARGET_CPU;
1213 if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
1215 // Current implementation is only valid for GPU (#11494)
1216 if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
1218 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
1219 preferableTarget = DNN_TARGET_CPU;
1221 else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
1223 CV_LOG_WARNING(NULL,
1224 "DNN: OpenCL target with fp16 precision is not supported "
1225 "with current OpenCL device (tested with Intel GPUs only), "
1226 "switching to OpenCL with fp32 precision.");
1227 preferableTarget = DNN_TARGET_OPENCL;
1232 if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan())
1234 preferableBackend = DNN_BACKEND_OPENCV;
1235 preferableTarget = DNN_TARGET_CPU;
1240 allocateLayers(blobsToKeep_);
1242 MapIdToLayerData::iterator it = layers.find(0);
1243 CV_Assert(it != layers.end());
1244 it->second.skip = netInputLayer->skip;
1248 if (!netWasAllocated )
1251 if (preferableBackend == DNN_BACKEND_HALIDE)
1254 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
1258 netWasAllocated = true;
1259 this->blobsToKeep = blobsToKeep_;
1263 int getLayerId(const String &layerName)
1265 std::map<String, int>::iterator it = layerNameToId.find(layerName);
1266 return (it != layerNameToId.end()) ? it->second : -1;
1269 int getLayerId(int id)
1271 MapIdToLayerData::iterator it = layers.find(id);
1272 return (it != layers.end()) ? id : -1;
1275 int getLayerId(DictValue &layerDesc)
1277 if (layerDesc.isInt())
1278 return getLayerId(layerDesc.get<int>());
1279 else if (layerDesc.isString())
1280 return getLayerId(layerDesc.get<String>());
1282 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1286 String getLayerName(int id)
1288 MapIdToLayerData::iterator it = layers.find(id);
1289 return (it != layers.end()) ? it->second.name : "(unknown layer)";
1292 LayerData& getLayerData(int id)
1294 MapIdToLayerData::iterator it = layers.find(id);
1296 if (it == layers.end())
1297 CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
1302 LayerData& getLayerData(const String &layerName)
1304 int id = getLayerId(layerName);
1307 CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
1309 return getLayerData(id);
1312 LayerData& getLayerData(const DictValue &layerDesc)
1314 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1315 if (layerDesc.isInt())
1316 return getLayerData(layerDesc.get<int>());
1317 else /*if (layerDesc.isString())*/
1318 return getLayerData(layerDesc.get<String>());
1321 static void addLayerInput(LayerData &ld, int inNum, LayerPin from)
1323 if ((int)ld.inputBlobsId.size() <= inNum)
1325 ld.inputBlobsId.resize(inNum + 1);
1329 LayerPin storedFrom = ld.inputBlobsId[inNum];
1330 if (storedFrom.valid() && !storedFrom.equal(from))
1331 CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
1332 inNum, ld.name.c_str()));
1335 ld.inputBlobsId[inNum] = from;
1338 int resolvePinOutputName(LayerData &ld, const String &outName)
1340 if (outName.empty())
1342 return ld.getLayerInstance()->outputNameToIndex(outName);
1345 LayerPin getPinByAlias(const String &layerName)
1348 pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1351 pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
1356 std::vector<LayerPin> getLayerOutPins(const String &layerName)
1358 int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1360 std::vector<LayerPin> pins;
1362 for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
1364 pins.push_back(LayerPin(lid, i));
1370 void connect(int outLayerId, int outNum, int inLayerId, int inNum)
1372 CV_Assert(outLayerId < inLayerId);
1373 LayerData &ldOut = getLayerData(outLayerId);
1374 LayerData &ldInp = getLayerData(inLayerId);
1376 addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
1377 ldOut.requiredOutputs.insert(outNum);
1378 ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
1383 CV_TRACE_FUNCTION();
1384 if (preferableBackend == DNN_BACKEND_OPENCV)
1385 CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
1386 else if (preferableBackend == DNN_BACKEND_HALIDE)
1387 initHalideBackend();
1388 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1389 initInfEngineBackend();
1390 else if (preferableBackend == DNN_BACKEND_VKCOM)
1393 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1396 void initHalideBackend()
1398 CV_TRACE_FUNCTION();
1399 CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
1401 // Iterator to current layer.
1402 MapIdToLayerData::iterator it = layers.begin();
1403 // Iterator to base layer for fusion. In example, in case of conv+bn+relu
1404 // it'll be a conv layer.
1405 MapIdToLayerData::iterator baseIt = layers.begin();
1406 for (; it != layers.end(); it++)
1408 LayerData &ldTop = it->second;
1409 Ptr<Layer> layerTop = ldTop.layerInstance;
1410 if (!layerTop->supportBackend(preferableBackend))
1412 // Move base iterator to layer that don't support preferable
1413 // backend to prevent fusion over layer of different backend.
1417 // Try to do layers fusion.
1418 LayerData &ldBot = baseIt->second;
1419 Ptr<Layer> layerBot = ldBot.layerInstance;
1420 // 1. Check that bottom and top from the same backends.
1421 if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
1423 // 2. Check that current layer works in-place.
1424 bool inPlace = ldTop.inputBlobs.size() == 1 &&
1425 ldBot.outputBlobs.size() == 1 &&
1426 ldTop.inputBlobs[0]->data ==
1427 ldBot.outputBlobs[0].data;
1430 // 3. Try to attach node.
1431 CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
1432 Ptr<BackendNode> fusedNode =
1433 layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
1434 if (!fusedNode.empty())
1437 ldBot.backendNodes[preferableBackend] = fusedNode;
1438 ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
1443 // No layers fusion.
1445 ldTop.backendNodes[DNN_BACKEND_HALIDE] =
1446 layerTop->initHalide(ldTop.inputBlobsWrappers);
1451 #ifdef HAVE_INF_ENGINE
1452 // Before launching Inference Engine graph we need to specify output blobs.
1453 // This function requests output blobs based on inputs references of
1454 // layers from default backend or layers from different graphs.
1455 void addInfEngineNetOutputs(LayerData &ld)
1457 Ptr<InfEngineBackendNet> layerNet;
1458 if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
1460 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1463 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1464 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1465 layerNet = ieNode->net;
1468 // For an every input reference we check that it belongs to one of
1469 // the Inference Engine backend graphs. Request an output blob if it is.
1470 // Do nothing if layer's input is from the same graph.
1471 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1473 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1474 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1475 if (!inpNode.empty())
1477 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1478 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1479 if (layerNet != ieInpNode->net)
1481 // layerNet is empty or nodes are from different graphs.
1482 ieInpNode->net->addOutput(ieInpNode->layer.getName());
1487 #endif // HAVE_INF_ENGINE
1489 void initVkComBackend()
1491 CV_TRACE_FUNCTION();
1492 CV_Assert(preferableBackend == DNN_BACKEND_VKCOM);
1497 MapIdToLayerData::iterator it = layers.begin();
1498 for (; it != layers.end(); it++)
1500 LayerData &ld = it->second;
1501 Ptr<Layer> layer = ld.layerInstance;
1502 if (!layer->supportBackend(preferableBackend))
1511 ld.backendNodes[DNN_BACKEND_VKCOM] =
1512 layer->initVkCom(ld.inputBlobsWrappers);
1514 catch (const cv::Exception& e)
1516 CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what());
1517 ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr<BackendNode>();
1523 void initInfEngineBackend()
1525 CV_TRACE_FUNCTION();
1526 CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE, haveInfEngine());
1527 #ifdef HAVE_INF_ENGINE
1528 MapIdToLayerData::iterator it;
1529 Ptr<InfEngineBackendNet> net;
1531 for (it = layers.begin(); it != layers.end(); ++it)
1533 LayerData &ld = it->second;
1536 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1537 (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1538 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1540 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1541 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1542 dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1544 dataPtr->setName(netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]);
1550 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1552 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1553 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1554 dataPtr->name = ld.name;
1556 dataPtr->setName(ld.name);
1562 if (skipInfEngineInit)
1564 Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1565 CV_Assert(!node.empty());
1567 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1568 CV_Assert(!ieNode.empty());
1570 for (it = layers.begin(); it != layers.end(); ++it)
1572 LayerData &ld = it->second;
1575 for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1577 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
1578 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1579 dataPtr->name = netInputLayer->outNames[i];
1581 dataPtr->setName(netInputLayer->outNames[i]);
1587 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1589 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1590 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1591 dataPtr->name = ld.name;
1593 dataPtr->setName(ld.name);
1597 ieNode->net->addBlobs(ld.inputBlobsWrappers);
1598 ieNode->net->addBlobs(ld.outputBlobsWrappers);
1601 layers[lastLayerId].skip = false;
1602 ieNode->net->init(preferableTarget);
1606 // Build Inference Engine networks from sets of layers that support this
1607 // backend. Split a whole model on several Inference Engine networks if
1608 // some of layers are not implemented.
1610 // Set of all input and output blobs wrappers for current network.
1611 std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
1612 for (it = layers.begin(); it != layers.end(); ++it)
1614 LayerData &ld = it->second;
1615 if (ld.id == 0 && ld.skip)
1617 bool fused = ld.skip;
1619 Ptr<Layer> layer = ld.layerInstance;
1620 if (!fused && !layer->supportBackend(preferableBackend))
1622 bool customizable = ld.id != 0 && ld.outputBlobs.size() == 1 &&
1623 INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2);
1624 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
1625 if (preferableTarget == DNN_TARGET_MYRIAD)
1627 for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
1629 customizable = ld.inputBlobs[i]->size[0] == 1;
1633 // TODO: fix these workarounds
1634 if (preferableTarget == DNN_TARGET_MYRIAD ||
1635 preferableTarget == DNN_TARGET_OPENCL ||
1636 preferableTarget == DNN_TARGET_OPENCL_FP16)
1637 customizable &= ld.type != "Concat";
1639 if (preferableTarget == DNN_TARGET_OPENCL ||
1640 preferableTarget == DNN_TARGET_OPENCL_FP16)
1641 customizable &= ld.type != "Power";
1643 if (preferableTarget == DNN_TARGET_OPENCL)
1644 customizable &= ld.type != "Eltwise";
1648 addInfEngineNetOutputs(ld);
1649 net = Ptr<InfEngineBackendNet>();
1650 netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
1651 layer->preferableTarget = DNN_TARGET_CPU;
1655 ld.skip = true; // Initially skip all Inference Engine supported layers.
1657 // Create a new network if one of inputs from different Inference Engine graph.
1658 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1660 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1661 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1662 if (!inpNode.empty())
1664 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1665 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1666 if (ieInpNode->net != net)
1668 net = Ptr<InfEngineBackendNet>();
1669 netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
1675 Ptr<BackendNode> node;
1680 bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
1681 ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
1683 node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
1684 ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
1688 net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
1692 if (layer->supportBackend(preferableBackend))
1693 node = layer->initInfEngine(ld.inputBlobsWrappers);
1696 node = Ptr<BackendNode>(new InfEngineBackendNode(
1697 ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
1700 else if (node.empty())
1703 CV_Assert(!node.empty());
1704 ld.backendNodes[preferableBackend] = node;
1706 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1707 CV_Assert(!ieNode.empty());
1710 // Convert weights in FP16 for specific targets.
1711 if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1712 preferableTarget == DNN_TARGET_MYRIAD ||
1713 preferableTarget == DNN_TARGET_FPGA) && !fused)
1715 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
1716 for (const std::string& name : {"weights", "biases"})
1718 auto it = ieNode->layer.getParameters().find(name);
1719 if (it != ieNode->layer.getParameters().end())
1721 InferenceEngine::Blob::Ptr bp = it->second.as<InferenceEngine::Blob::Ptr>();
1722 it->second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(bp));
1726 auto& blobs = ieNode->layer.getConstantData();
1729 // In case of non weightable layer we have to specify
1730 // it's precision adding dummy blob.
1731 auto blob = InferenceEngine::make_shared_blob<int16_t>(
1732 InferenceEngine::Precision::FP16,
1733 InferenceEngine::Layout::C, {1});
1739 for (auto& it : blobs)
1740 it.second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(it.second));
1746 net->addLayer(ieNode->layer);
1748 net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName());
1749 net->addBlobs(ld.inputBlobsWrappers);
1750 net->addBlobs(ld.outputBlobsWrappers);
1751 addInfEngineNetOutputs(ld);
1754 // Initialize all networks.
1755 for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
1757 LayerData &ld = it->second;
1758 if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
1761 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1765 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1769 CV_Assert(!ieNode->net.empty());
1771 if (!ieNode->net->isInitialized())
1773 ieNode->net->init(preferableTarget);
1777 #endif // HAVE_INF_ENGINE
1780 void allocateLayer(int lid, const LayersShapesMap& layersShapes)
1782 CV_TRACE_FUNCTION();
1784 LayerData &ld = layers[lid];
1790 size_t ninputs = ld.inputBlobsId.size();
1792 printf("layer %s:", ld.name.c_str());
1793 for (size_t i = 0; i < ninputs; i++)
1795 int inp_lid = ld.inputBlobsId[i].lid;
1796 LayerData &inp_ld = layers[inp_lid];
1797 int inp_outputs = (int)inp_ld.outputBlobs.size();
1798 std::cout << " " << inp_ld.name << "(" << inp_outputs;
1800 for( int j = 0; j < inp_outputs; j++ )
1802 std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
1809 //determine parent layers
1810 for (size_t i = 0; i < ninputs; i++)
1811 ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
1814 for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
1815 allocateLayer(*i, layersShapes);
1818 if (ld.id == 0) // DataLayer
1820 ninputs = netInputLayer->inputsData.size();
1821 ld.inputBlobsWrappers.resize(ninputs);
1822 for (size_t i = 0; i < ninputs; i++)
1824 ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
1829 ld.inputBlobs.resize(ninputs);
1830 ld.inputBlobsWrappers.resize(ninputs);
1831 for (size_t i = 0; i < ninputs; i++)
1833 LayerPin from = ld.inputBlobsId[i];
1834 CV_Assert(from.valid());
1835 CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
1836 ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
1837 ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
1841 LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
1843 CV_Assert(layerShapesIt != layersShapes.end());
1845 std::vector<LayerPin> pinsForInternalBlobs;
1846 blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
1847 preferableBackend == DNN_BACKEND_OPENCV &&
1848 preferableTarget == DNN_TARGET_OPENCL_FP16);
1849 ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
1850 for (int i = 0; i < ld.outputBlobs.size(); ++i)
1852 ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
1854 ld.internalBlobsWrappers.resize(ld.internals.size());
1855 for (int i = 0; i < ld.internals.size(); ++i)
1857 ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
1860 Ptr<Layer> layerPtr = ld.getLayerInstance();
1862 std::vector<Mat> inps(ld.inputBlobs.size());
1863 for (int i = 0; i < ld.inputBlobs.size(); ++i)
1865 inps[i] = *ld.inputBlobs[i];
1867 layerPtr->finalize(inps, ld.outputBlobs);
1868 layerPtr->preferableTarget = preferableTarget;
1870 std::cout << "\toutputs:";
1871 size_t noutputs = ld.outputBlobs.size();
1872 for (size_t j = 0; j < noutputs; j++)
1874 std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
1880 // After allocation of layer, we decrease counters to it's input blobs.
1881 blobManager.releaseReferences(ld.inputBlobsId);
1882 blobManager.releaseReferences(pinsForInternalBlobs);
1888 #define printf_(args) printf args
1890 #define printf_(args)
1893 void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
1895 if( !fusion || (preferableBackend != DNN_BACKEND_OPENCV &&
1896 preferableBackend != DNN_BACKEND_INFERENCE_ENGINE))
1899 CV_TRACE_FUNCTION();
1901 // scan through all the layers. If there is convolution layer followed by the activation layer,
1902 // we try to embed this activation into the convolution and disable separate execution of the activation
1903 std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
1904 blobsToKeep_.end());
1905 MapIdToLayerData::iterator it;
1906 for (it = layers.begin(); it != layers.end(); it++)
1908 int lid = it->first;
1909 LayerData& ld = layers[lid];
1912 printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
1915 printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
1917 // the optimization #1. try to fuse batch norm, scaling and/or activation layers
1918 // with the current layer if they follow it. Normally, the are fused with the convolution layer,
1919 // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
1920 // some other layers.
1921 Ptr<Layer>& currLayer = ld.layerInstance;
1922 if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
1924 LayerData* nextData = &layers[ld.consumers[0].lid];
1925 LayerPin lpNext(ld.consumers[0].lid, 0);
1928 Ptr<Layer> nextLayer = nextData->layerInstance;
1929 if (currLayer->tryFuse(nextLayer))
1931 printf_(("\tfused with %s\n", nextLayer->name.c_str()));
1932 nextData->skip = true;
1933 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1934 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
1935 if (nextData->consumers.size() == 1)
1937 int nextLayerId = nextData->consumers[0].lid;
1938 nextData = &layers[nextLayerId];
1939 lpNext = LayerPin(nextLayerId, 0);
1951 if (preferableBackend != DNN_BACKEND_OPENCV)
1952 continue; // Go to the next layer.
1954 // TODO: OpenCL target support more fusion styles.
1955 if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
1956 (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
1957 ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
1958 ld.layerInstance->type != "Concat")) )
1963 // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
1964 if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
1965 nextData->type != "ReLU" &&
1966 nextData->type != "ChannelsPReLU" &&
1967 nextData->type != "ReLU6" &&
1968 nextData->type != "TanH" &&
1969 nextData->type != "Power")
1972 Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
1973 if (nextActivLayer.empty())
1976 if (currLayer->setActivation(nextActivLayer))
1978 printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
1979 nextData->skip = true;
1980 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1981 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
1982 if (nextData->consumers.size() == 1)
1984 int nextLayerId = nextData->consumers[0].lid;
1985 nextData = &layers[nextLayerId];
1986 lpNext = LayerPin(nextLayerId, 0);
1998 // fuse convolution layer followed by eltwise + relu
1999 if ( IS_DNN_OPENCL_TARGET(preferableTarget) && ld.layerInstance->type == "Convolution" )
2001 Ptr<EltwiseLayer> nextEltwiseLayer;
2003 nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
2005 if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
2006 nextData && nextData->inputBlobsId.size() == 2 )
2008 LayerData *eltwiseData = nextData;
2010 // Eltwise layer has two inputs. We need to determine which
2011 // is a base convolution layer and which could be used as it's bias.
2012 LayerData* biasLayerData = 0;
2013 for (int i = 0; i < 2; ++i)
2015 LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
2016 CV_Assert(downLayerData);
2017 while (downLayerData->skip)
2019 if (downLayerData->inputBlobsId.size() == 1)
2020 downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
2027 if (downLayerData && ld.id == downLayerData->id)
2029 biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
2033 CV_Assert(biasLayerData);
2035 if( eltwiseData->consumers.size() == 1 )
2037 // fuse eltwise + activation layer
2038 if (biasLayerData->id < ld.id)
2040 nextData = &layers[eltwiseData->consumers[0].lid];
2041 lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
2042 Ptr<ActivationLayer> nextActivLayer;
2044 nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2046 if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
2047 (!nextData->type.compare("ReLU") ||
2048 !nextData->type.compare("ChannelsPReLU") ||
2049 !nextData->type.compare("Power")) &&
2050 currLayer->setActivation(nextActivLayer) )
2052 CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
2053 ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
2054 printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
2055 printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
2056 eltwiseData->skip = true;
2057 nextData->skip = true;
2058 // This optimization for cases like
2064 // This way all the element-wise computations
2065 // (i.e. some_layer+conv or some_layer*conv)
2066 // would be done at [conv] layer. So we need to
2067 // replace [conv]'s output blob to [eltwise]'s one
2068 // considering that [activ] is an in-place layer.
2069 // Also we need to move all the consumers' references.
2070 // To prevent memory collisions (i.e. when input of
2071 // [conv] and output of [eltwise] is the same blob)
2072 // we allocate a new blob.
2073 CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
2074 ld.outputBlobs[0] = ld.outputBlobs[0].clone();
2075 ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
2077 eltwiseData->outputBlobs = ld.outputBlobs;
2078 nextData->outputBlobs = ld.outputBlobs;
2079 eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
2080 nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
2082 // Move references of [activ] layer consumers to the newly allocated blob.
2083 for (int i = 0; i < nextData->consumers.size(); ++i)
2085 LayerData& consumer = layers[nextData->consumers[i].lid];
2086 for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
2088 if (consumer.inputBlobsId[j].lid == lpNext.lid)
2090 consumer.inputBlobs[j] = &ld.outputBlobs[0];
2091 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
2104 if (preferableBackend != DNN_BACKEND_OPENCV)
2105 continue; // Go to the next layer.
2107 // the optimization #2. if there is concat layer that concatenates channels
2108 // from the inputs together (i.e. axis == 1) then we make the inputs of
2109 // the concat layer to write to the concatenation output buffer
2110 // (and so we eliminate the concatenation layer, because the channels
2111 // are concatenated implicitly).
2112 Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
2113 if( !concatLayer.empty() && concatLayer->axis == 1 && !concatLayer->padding &&
2114 ld.outputBlobs.size() == 1 )
2116 Mat& output = ld.outputBlobs[0];
2118 if (!ld.outputBlobsWrappers.empty() &&
2119 (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
2121 size_t i, ninputs = ld.inputBlobsId.size();
2122 bool conv_layer = true;
2123 for( i = 0; i < ninputs; i++ )
2125 LayerPin pin = ld.inputBlobsId[i];
2126 LayerData* inp_i_data = &layers[pin.lid];
2127 while(inp_i_data->skip &&
2128 inp_i_data->inputBlobsId.size() == 1 &&
2129 inp_i_data->consumers.size() == 1)
2131 pin = inp_i_data->inputBlobsId[0];
2132 inp_i_data = &layers[pin.lid];
2134 conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
2138 std::vector<UMat> umat_outputBlobs;
2139 umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2140 umat_output = umat_outputBlobs[0];
2143 // TODO: in general, this optimization can always be done, but
2144 // many layers currently check that the input/output blobs are
2145 // continuous arrays. Unfortunately, this is not true when
2146 // the concatenation optimization is applied with batch_size > 1.
2147 // so, for now, we only apply this optimization in the most popular
2148 // case batch_size == 1.
2149 if( output.dims == 4 && output.size[0] == 1 )
2151 size_t i, ninputs = ld.inputBlobsId.size();
2152 std::vector<LayerPin> realinputs(ninputs);
2153 for( i = 0; i < ninputs; i++ )
2155 LayerPin pin = ld.inputBlobsId[i];
2156 LayerData* inp_i_data = &layers[pin.lid];
2157 while(inp_i_data->skip &&
2158 inp_i_data->inputBlobsId.size() == 1 &&
2159 inp_i_data->consumers.size() == 1)
2161 pin = inp_i_data->inputBlobsId[0];
2162 inp_i_data = &layers[pin.lid];
2164 printf_(("\treal input for %s is %s\n",
2165 layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
2166 inp_i_data->getLayerInstance()->name.c_str()));
2168 if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
2170 realinputs[i] = pin;
2175 // Allocate new memory to prevent collisions during memory
2176 // reusing (see https://github.com/opencv/opencv/pull/10456).
2177 output = output.clone();
2178 if (preferableBackend == DNN_BACKEND_OPENCV &&
2179 IS_DNN_OPENCL_TARGET(preferableTarget))
2181 std::vector<UMat> umats(1);
2182 umat_output = umat_output.clone();
2183 umats[0] = umat_output;
2184 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
2186 Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
2188 for( i = 0; i < ninputs; i++ )
2190 LayerPin pin = realinputs[i];
2191 LayerData* inp_i_data = &layers[pin.lid];
2192 int channels_i = ld.inputBlobs[i]->size[1];
2193 chrange[1] = Range(ofs, ofs + channels_i);
2194 printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
2195 pin.oid, ofs, ofs + channels_i));
2197 Mat output_slice = output(chrange);
2198 Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
2199 CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
2200 Mat* oldPtr = &curr_output;
2201 curr_output = output_slice;
2202 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2204 std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
2205 umats[pin.oid] = umat_output(chrange);
2206 OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
2208 // Layers that refer old input Mat will refer to the
2209 // new data but the same Mat object.
2210 CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
2213 printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
2220 void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
2222 CV_TRACE_FUNCTION();
2224 MapIdToLayerData::iterator it;
2225 for (it = layers.begin(); it != layers.end(); it++)
2226 it->second.flag = 0;
2228 CV_Assert(!layers[0].outputBlobs.empty());
2229 ShapesVec inputShapes;
2230 for(int i = 0; i < layers[0].outputBlobs.size(); i++)
2232 Mat& inp = layers[0].outputBlobs[i];
2233 CV_Assert(inp.total());
2234 if (preferableBackend == DNN_BACKEND_OPENCV &&
2235 preferableTarget == DNN_TARGET_OPENCL_FP16)
2237 layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
2239 inputShapes.push_back(shape(inp));
2241 LayersShapesMap layersShapes;
2242 getLayersShapes(inputShapes, layersShapes);
2244 blobManager.reset();
2245 backendWrappers.clear();
2246 // Fake references to input blobs.
2247 for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
2248 blobManager.addReference(LayerPin(0, i));
2249 for (it = layers.begin(); it != layers.end(); ++it)
2251 const LayerData& ld = it->second;
2252 blobManager.addReferences(ld.inputBlobsId);
2255 for (int i = 0; i < blobsToKeep_.size(); i++)
2257 blobManager.addReference(blobsToKeep_[i]);
2260 for (it = layers.begin(); it != layers.end(); it++)
2262 int lid = it->first;
2263 allocateLayer(lid, layersShapes);
2266 layersTimings.resize(lastLayerId + 1, 0);
2267 fuseLayers(blobsToKeep_);
2270 void forwardLayer(LayerData &ld)
2272 CV_TRACE_FUNCTION();
2274 Ptr<Layer> layer = ld.layerInstance;
2281 std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
2282 if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
2285 CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
2287 if (!layer->supportBackend(DNN_BACKEND_OPENCV))
2288 CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend",
2289 ld.name.c_str(), ld.type.c_str()));
2291 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2293 std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
2294 std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2295 std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
2296 layer->forward(umat_inputBlobs,
2298 umat_internalBlobs);
2299 if (DNN_CHECK_NAN_INF)
2302 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2304 UMat& u = umat_outputBlobs[i];
2306 if (u.depth() == CV_16S) // FP16
2309 m = u.getMat(ACCESS_READ);
2312 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2313 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2316 else if (!checkRange(m, true, NULL, -1e6, 1e6))
2318 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2319 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2325 for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
2327 UMat& u = umat_inputBlobs[i];
2329 if (u.depth() == CV_16S) // FP16
2332 m = u.getMat(ACCESS_READ);
2333 std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2334 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2336 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2338 UMat& u = umat_outputBlobs[i];
2340 if (u.depth() == CV_16S) // FP16
2343 m = u.getMat(ACCESS_READ);
2344 std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2345 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2347 for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
2349 UMat& u = umat_internalBlobs[i];
2351 if (u.depth() == CV_16S) // FP16
2354 m = u.getMat(ACCESS_READ);
2355 std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
2356 if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
2358 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2362 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
2366 for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
2368 if (!ld.inputBlobsWrappers[i].empty())
2369 ld.inputBlobsWrappers[i]->copyToHost();
2372 std::vector<Mat> inps(ld.inputBlobs.size());
2373 for (int i = 0; i < ld.inputBlobs.size(); ++i)
2375 inps[i] = *ld.inputBlobs[i];
2377 layer->forward(inps, ld.outputBlobs, ld.internals);
2379 if (DNN_CHECK_NAN_INF)
2382 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2384 const Mat& m = ld.outputBlobs[i];
2387 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2388 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2391 else if (!checkRange(m, true, NULL, -1e6, 1e6))
2393 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2394 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2400 for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
2402 const Mat* pM = ld.inputBlobs[i];
2405 std::cout << "INPUT " << i << " is NULL" << std::endl;
2409 std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2410 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2412 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2414 const Mat& m = ld.outputBlobs[i];
2415 std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2416 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2418 for (size_t i = 0; i < ld.internals.size(); ++i)
2420 const Mat& m = ld.internals[i];
2421 std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2422 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2424 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2429 for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
2431 if (!ld.outputBlobsWrappers[i].empty())
2432 ld.outputBlobsWrappers[i]->setHostDirty();
2438 Ptr<BackendNode> node = it->second;
2439 CV_Assert(!node.empty());
2440 if (preferableBackend == DNN_BACKEND_HALIDE)
2442 forwardHalide(ld.outputBlobsWrappers, node);
2444 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
2446 forwardInfEngine(ld.outputBlobsWrappers, node, isAsync);
2448 else if (preferableBackend == DNN_BACKEND_VKCOM)
2452 forwardVkCom(ld.outputBlobsWrappers, node);
2454 catch (const cv::Exception& e)
2456 CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what());
2457 it->second = Ptr<BackendNode>();
2463 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
2471 layersTimings[ld.id] = tm.getTimeTicks();
2476 void forwardToLayer(LayerData &ld, bool clearFlags = true)
2478 CV_TRACE_FUNCTION();
2482 MapIdToLayerData::iterator it;
2483 for (it = layers.begin(); it != layers.end(); it++)
2484 it->second.flag = 0;
2487 //already was forwarded
2492 MapIdToLayerData::iterator it;
2493 for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
2495 LayerData &ld = it->second;
2505 void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
2507 std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
2509 if (inOutShapes[0].in[0].empty() && !layers[0].outputBlobs.empty())
2512 for (int i = 0; i < layers[0].outputBlobs.size(); i++)
2514 Mat& inp = layers[0].outputBlobs[i];
2515 CV_Assert(inp.total());
2516 shapes.push_back(shape(inp));
2518 inOutShapes[0].in = shapes;
2521 if (inOutShapes[id].in.empty())
2523 for(int i = 0; i < inputLayerIds.size(); i++)
2525 int layerId = inputLayerIds[i].lid;
2526 LayersShapesMap::iterator it =
2527 inOutShapes.find(layerId);
2528 if(it == inOutShapes.end() ||
2529 it->second.out.empty())
2531 getLayerShapesRecursively(layerId, inOutShapes);
2533 const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
2534 inOutShapes[id].in.push_back(shape);
2537 const ShapesVec& is = inOutShapes[id].in;
2538 ShapesVec& os = inOutShapes[id].out;
2539 ShapesVec& ints = inOutShapes[id].internal;
2540 int requiredOutputs = layers[id].requiredOutputs.size();
2541 inOutShapes[id].supportInPlace =
2542 layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints);
2545 void getLayersShapes(const ShapesVec& netInputShapes,
2546 LayersShapesMap& inOutShapes)
2548 inOutShapes.clear();
2550 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
2551 for (MapIdToLayerData::iterator it = layers.begin();
2552 it != layers.end(); it++)
2554 getLayerShapesRecursively(it->first, inOutShapes);
2558 void getLayerShapes(const ShapesVec& netInputShapes,
2560 LayerShapes& shapes)
2562 LayersShapesMap inOutShapes;
2563 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
2564 getLayerShapesRecursively(layerId, inOutShapes);
2565 shapes = inOutShapes[layerId];
2568 LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
2570 return *std::max_element(pins.begin(), pins.end());
2573 Mat getBlob(const LayerPin& pin)
2575 CV_TRACE_FUNCTION();
2578 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
2580 LayerData &ld = layers[pin.lid];
2581 if ((size_t)pin.oid >= ld.outputBlobs.size())
2583 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, "
2584 "the #%d was requested", ld.name.c_str(),
2585 ld.outputBlobs.size(), pin.oid));
2587 if (preferableTarget != DNN_TARGET_CPU)
2589 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
2590 // Transfer data to CPU if it's require.
2591 ld.outputBlobsWrappers[pin.oid]->copyToHost();
2594 if (ld.outputBlobs[pin.oid].depth() == CV_16S)
2596 convertFp16(ld.outputBlobs[pin.oid], output_blob);
2600 return ld.outputBlobs[pin.oid];
2603 Mat getBlob(String outputName)
2605 return getBlob(getPinByAlias(outputName));
2609 AsyncArray getBlobAsync(const LayerPin& pin)
2611 CV_TRACE_FUNCTION();
2612 #ifdef HAVE_INF_ENGINE
2614 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
2616 LayerData &ld = layers[pin.lid];
2617 if ((size_t)pin.oid >= ld.outputBlobs.size())
2619 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
2620 "the #%d was requested", ld.name.c_str(),
2621 (int)ld.outputBlobs.size(), (int)pin.oid));
2623 if (preferableTarget != DNN_TARGET_CPU)
2625 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
2626 // Transfer data to CPU if it's require.
2627 ld.outputBlobsWrappers[pin.oid]->copyToHost();
2629 CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE);
2631 Ptr<InfEngineBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<InfEngineBackendWrapper>();
2632 return std::move(wrapper->futureMat);
2634 CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE backend is required");
2638 AsyncArray getBlobAsync(String outputName)
2640 return getBlobAsync(getPinByAlias(outputName));
2645 Net::Net() : impl(new Net::Impl)
2649 Net Net::readFromModelOptimizer(const String& xml, const String& bin)
2651 #ifndef HAVE_INF_ENGINE
2652 CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
2654 InferenceEngine::CNNNetReader reader;
2655 reader.ReadNetwork(xml);
2656 reader.ReadWeights(bin);
2658 InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
2660 std::vector<String> inputsNames;
2661 std::vector<MatShape> inp_shapes;
2662 for (auto& it : ieNet.getInputsInfo())
2664 inputsNames.push_back(it.first);
2665 std::vector<size_t> dims = it.second->getTensorDesc().getDims();
2666 inp_shapes.push_back(std::vector<int>(dims.begin(), dims.end()));
2670 cvNet.setInputsNames(inputsNames);
2672 // set empty input to determine input shapes
2673 for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id)
2675 cvNet.setInput(Mat(inp_shapes[inp_id], CV_32F), inputsNames[inp_id]);
2678 Ptr<InfEngineBackendNode> backendNode(new InfEngineBackendNode(InferenceEngine::Builder::Layer("")));
2679 backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
2680 for (auto& it : ieNet.getOutputsInfo())
2682 Ptr<Layer> cvLayer(new InfEngineBackendLayer(ieNet));
2683 InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
2687 int lid = cvNet.addLayer(it.first, "", lp);
2689 LayerData& ld = cvNet.impl->layers[lid];
2690 cvLayer->name = it.first;
2691 cvLayer->type = ieLayer->type;
2692 ld.layerInstance = cvLayer;
2693 ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
2695 for (int i = 0; i < inputsNames.size(); ++i)
2696 cvNet.connect(0, i, lid, i);
2698 cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
2700 cvNet.impl->skipInfEngineInit = true;
2702 #endif // HAVE_INF_ENGINE
2709 int Net::addLayer(const String &name, const String &type, LayerParams ¶ms)
2711 CV_TRACE_FUNCTION();
2713 if (impl->getLayerId(name) >= 0)
2715 CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
2719 int id = ++impl->lastLayerId;
2720 impl->layerNameToId.insert(std::make_pair(name, id));
2721 impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
2726 int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms)
2728 CV_TRACE_FUNCTION();
2730 int prvLid = impl->lastLayerId;
2731 int newLid = this->addLayer(name, type, params);
2732 this->connect(prvLid, 0, newLid, 0);
2736 void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
2738 CV_TRACE_FUNCTION();
2740 impl->connect(outLayerId, outNum, inpLayerId, inpNum);
2743 void Net::connect(String _outPin, String _inPin)
2745 CV_TRACE_FUNCTION();
2747 LayerPin outPin = impl->getPinByAlias(_outPin);
2748 LayerPin inpPin = impl->getPinByAlias(_inPin);
2750 CV_Assert(outPin.valid() && inpPin.valid());
2752 impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
2755 Mat Net::forward(const String& outputName)
2757 CV_TRACE_FUNCTION();
2759 String layerName = outputName;
2761 if (layerName.empty())
2762 layerName = getLayerNames().back();
2764 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2765 impl->setUpNet(pins);
2766 impl->forwardToLayer(impl->getLayerData(layerName));
2768 return impl->getBlob(layerName);
2771 AsyncArray Net::forwardAsync(const String& outputName)
2773 CV_TRACE_FUNCTION();
2775 String layerName = outputName;
2777 if (layerName.empty())
2778 layerName = getLayerNames().back();
2780 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2781 impl->setUpNet(pins);
2783 if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)
2784 CV_Error(Error::StsNotImplemented, "Asynchronous forward for backend which is different from DNN_BACKEND_INFERENCE_ENGINE");
2786 impl->isAsync = true;
2787 impl->forwardToLayer(impl->getLayerData(layerName));
2788 impl->isAsync = false;
2790 return impl->getBlobAsync(layerName);
2792 CV_Error(Error::StsNotImplemented, "Asynchronous forward without C++11");
2796 void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
2798 CV_TRACE_FUNCTION();
2800 String layerName = outputName;
2802 if (layerName.empty())
2803 layerName = getLayerNames().back();
2805 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2806 impl->setUpNet(pins);
2807 impl->forwardToLayer(impl->getLayerData(layerName));
2809 LayerPin pin = impl->getPinByAlias(layerName);
2810 LayerData &ld = impl->layers[pin.lid];
2812 if (outputBlobs.isUMat())
2814 impl->getBlob(layerName).copyTo(outputBlobs);
2816 else if (outputBlobs.isMat())
2818 outputBlobs.assign(impl->getBlob(layerName));
2820 else if (outputBlobs.isMatVector())
2822 if (impl->preferableTarget != DNN_TARGET_CPU)
2824 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2826 CV_Assert(!ld.outputBlobsWrappers[i].empty());
2827 ld.outputBlobsWrappers[i]->copyToHost();
2830 if (ld.outputBlobs[0].depth() == CV_32F)
2832 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2833 outputvec = ld.outputBlobs;
2835 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2836 outputvec.resize(ld.outputBlobs.size());
2837 for (int i = 0; i < outputvec.size(); i++)
2838 convertFp16(ld.outputBlobs[i], outputvec[i]);
2841 else if (outputBlobs.isUMatVector())
2843 std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
2845 if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
2846 IS_DNN_OPENCL_TARGET(impl->preferableTarget))
2848 if (impl->preferableTarget == DNN_TARGET_OPENCL)
2849 outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2850 else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
2852 std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2853 outputvec.resize(out_vec.size());
2854 for (int i = 0; i < out_vec.size(); i++)
2855 convertFp16(out_vec[i], outputvec[i]);
2860 outputvec.resize(ld.outputBlobs.size());
2861 for (int i = 0; i < outputvec.size(); ++i)
2862 ld.outputBlobs[i].copyTo(outputvec[i]);
2867 void Net::forward(OutputArrayOfArrays outputBlobs,
2868 const std::vector<String>& outBlobNames)
2870 CV_TRACE_FUNCTION();
2872 std::vector<LayerPin> pins;
2873 for (int i = 0; i < outBlobNames.size(); i++)
2875 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
2878 impl->setUpNet(pins);
2880 LayerPin out = impl->getLatestLayerPin(pins);
2882 impl->forwardToLayer(impl->getLayerData(out.lid));
2884 std::vector<Mat> matvec;
2885 for (int i = 0; i < pins.size(); i++)
2887 matvec.push_back(impl->getBlob(pins[i]));
2890 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2894 void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
2895 const std::vector<String>& outBlobNames)
2897 CV_TRACE_FUNCTION();
2899 std::vector<LayerPin> pins;
2900 for (int i = 0; i < outBlobNames.size(); i++)
2902 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
2905 impl->setUpNet(pins);
2907 LayerPin out = impl->getLatestLayerPin(pins);
2909 impl->forwardToLayer(impl->getLayerData(out.lid));
2911 outputBlobs.resize(outBlobNames.size());
2912 for (int i = 0; i < outBlobNames.size(); i++)
2914 std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
2915 outputBlobs[i].resize(lp.size());
2916 for (int j = 0; j < lp.size(); j++)
2918 outputBlobs[i][j] = impl->getBlob(lp[j]);
2923 void Net::setPreferableBackend(int backendId)
2925 CV_TRACE_FUNCTION();
2926 CV_TRACE_ARG(backendId);
2928 if( impl->preferableBackend != backendId )
2930 impl->preferableBackend = backendId;
2931 impl->netWasAllocated = false;
2936 void Net::setPreferableTarget(int targetId)
2938 CV_TRACE_FUNCTION();
2939 CV_TRACE_ARG(targetId);
2941 if( impl->preferableTarget != targetId )
2943 impl->preferableTarget = targetId;
2944 if (IS_DNN_OPENCL_TARGET(targetId))
2947 #ifdef HAVE_INF_ENGINE
2948 if (impl->preferableBackend == DNN_BACKEND_OPENCV)
2950 if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
2951 impl->preferableBackend == DNN_BACKEND_OPENCV)
2952 #endif // HAVE_INF_ENGINE
2953 impl->preferableTarget = DNN_TARGET_CPU;
2955 bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
2956 if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
2957 impl->preferableTarget = DNN_TARGET_OPENCL;
2960 impl->netWasAllocated = false;
2965 void Net::setInputsNames(const std::vector<String> &inputBlobNames)
2967 CV_TRACE_FUNCTION();
2969 impl->netInputLayer->setNames(inputBlobNames);
2972 void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
2974 CV_TRACE_FUNCTION();
2975 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
2979 pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
2982 CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
2984 LayerData &ld = impl->layers[pin.lid];
2985 const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
2986 ld.outputBlobs.resize(numInputs);
2987 ld.outputBlobsWrappers.resize(numInputs);
2988 impl->netInputLayer->inputsData.resize(numInputs);
2989 impl->netInputLayer->scaleFactors.resize(numInputs);
2990 impl->netInputLayer->means.resize(numInputs);
2992 MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
2993 Mat blob_ = blob.getMat();
2994 bool oldShape = prevShape == shape(blob_);
2997 blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
3001 ld.outputBlobs[pin.oid] = blob_.clone();
3002 impl->netInputLayer->inputsData[pin.oid] = ld.outputBlobs[pin.oid];
3005 if (!ld.outputBlobsWrappers[pin.oid].empty())
3007 ld.outputBlobsWrappers[pin.oid]->setHostDirty();
3009 impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
3010 impl->netInputLayer->means[pin.oid] = mean;
3011 impl->netWasAllocated = impl->netWasAllocated && oldShape;
3014 Mat Net::getParam(LayerId layer, int numParam)
3016 LayerData &ld = impl->getLayerData(layer);
3017 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
3018 CV_Assert(numParam < (int)layerBlobs.size());
3019 return layerBlobs[numParam];
3022 void Net::setParam(LayerId layer, int numParam, const Mat &blob)
3024 LayerData &ld = impl->getLayerData(layer);
3026 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
3027 CV_Assert(numParam < (int)layerBlobs.size());
3028 //we don't make strong checks, use this function carefully
3029 layerBlobs[numParam] = blob;
3032 int Net::getLayerId(const String &layer)
3034 return impl->getLayerId(layer);
3037 String parseLayerParams(const String& name, const LayerParams& lp) {
3038 DictValue param = lp.get(name);
3039 std::ostringstream out;
3041 switch (param.size()) {
3042 case 1: out << ": "; break;
3043 case 2: out << "(HxW): "; break;
3044 case 3: out << "(DxHxW): "; break;
3045 default: CV_Error(Error::StsNotImplemented, format("Unsupported %s size = %d", name.c_str(), param.size()));
3047 for (size_t i = 0; i < param.size() - 1; i++) {
3048 out << param.get<int>(i) << " x ";
3050 out << param.get<int>(param.size() - 1) << "\\l";
3056 CV_Assert(!empty());
3058 if (impl->netInputLayer->inputsData.empty())
3059 CV_Error(Error::StsError, "Requested set input");
3061 if (!impl->netWasAllocated)
3064 std::ostringstream out;
3065 std::map<int, LayerData>& map = impl->layers;
3066 int prefBackend = impl->preferableBackend;
3067 std::vector<std::vector<int> > skippedLayers;
3068 std::vector<int> skipId;
3069 std::vector<int> allLayers(map.size(), -1);
3071 Ptr<BackendNode> prevNode;
3072 for (std::map<int, LayerData>::reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit)
3074 std::map<int, Ptr<BackendNode> >::iterator itBackend = rit->second.backendNodes.find(prefBackend);
3075 if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() ||
3076 itBackend->second.empty())
3078 if (rit->second.skip)
3079 skipId.push_back(rit->first);
3080 else if (!skipId.empty())
3082 if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty())
3083 skipId.push_back(rit->first);
3084 else if (idPrev != -1)
3085 skipId.push_back(idPrev);
3087 std::sort(skipId.begin(), skipId.end());
3088 for (int i = 0; i < skipId.size(); i++) {
3089 allLayers[skipId[i]] = skippedLayers.size();
3091 skippedLayers.push_back(skipId);
3097 if (itBackend->second == prevNode)
3098 skipId.push_back(idPrev);
3099 else if (!skipId.empty())
3101 skipId.push_back(idPrev);
3102 std::sort(skipId.begin(), skipId.end());
3103 for (int i = 0; i < skipId.size(); i++) {
3104 allLayers[skipId[i]] = skippedLayers.size();
3106 skippedLayers.push_back(skipId);
3109 idPrev = rit->first;
3110 prevNode = itBackend->second;
3113 String colors[] = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462"};
3115 switch (prefBackend) {
3116 case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break;
3117 case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break;
3118 case DNN_BACKEND_INFERENCE_ENGINE: backend = "DLIE/"; break;
3119 case DNN_BACKEND_OPENCV: backend = "OCV/"; break;
3121 out << "digraph G {" << '\n';
3123 for (std::map<int, LayerData>::iterator it = map.begin(); it != map.end(); ++it)
3125 String name = it->second.params.name;
3126 if (allLayers[it->first] == -1 && !name.empty()) {
3127 out << " " << "\"" << name << "\"" << " [label=\"";
3129 skipId.push_back(it->first);
3131 else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0])
3133 else { // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0]
3134 int cluster = allLayers[it->first];
3135 out << " " << "\"" << "cluster_" << cluster << "\"" << " [label=\"{";
3136 skipId = skippedLayers[allLayers[it->first]]; // vertices in current cluster
3138 for (int i = 0; i < skipId.size(); i++)
3140 LayerParams& lp = map[skipId[i]].params;
3141 if (!lp.name.empty()) {
3145 out << lp.name << "\\n" << lp.type << "\\n";
3146 if (lp.has("kernel_size")) {
3147 String kernel = parseLayerParams("kernel_size", lp);
3149 } else if (lp.has("kernel_h") && lp.has("kernel_w")) {
3150 DictValue h = lp.get("kernel_h");
3151 DictValue w = lp.get("kernel_w");
3152 out << "kernel (HxW): " << h << " x " << w << "\\l";
3154 if (lp.has("stride")) {
3155 String stride = parseLayerParams("stride", lp);
3157 } else if (lp.has("stride_h") && lp.has("stride_w")) {
3158 DictValue h = lp.get("stride_h");
3159 DictValue w = lp.get("stride_w");
3160 out << "stride (HxW): " << h << " x " << w << "\\l";
3162 if (lp.has("dilation")) {
3163 String dilation = parseLayerParams("dilation", lp);
3165 } else if (lp.has("dilation_h") && lp.has("dilation_w")) {
3166 DictValue h = lp.get("dilation_h");
3167 DictValue w = lp.get("dilation_w");
3168 out << "dilation (HxW): " << h << " x " << w << "\\l";
3170 if (lp.has("pad")) {
3171 DictValue pad = lp.get("pad");
3173 switch (pad.size()) {
3174 case 1: out << ": " << pad << "\\l"; break;
3175 case 2: out << "(HxW): (" << pad.get<int>(0) << " x " << pad.get<int>(1) << ")" << "\\l"; break;
3176 case 4: out << "(HxW): (" << pad.get<int>(0) << ", " << pad.get<int>(2) << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(3) << ")" << "\\l"; break;
3177 case 6: out << "(DxHxW): (" << pad.get<int>(0) << ", " << pad.get<int>(3) << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(4)
3178 << ") x (" << pad.get<int>(2) << ", " << pad.get<int>(5) << ")" << "\\l"; break;
3179 default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size()));
3181 } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) {
3182 DictValue l = lp.get("pad_l");
3183 DictValue t = lp.get("pad_t");
3184 DictValue r = lp.get("pad_r");
3185 DictValue b = lp.get("pad_b");
3186 out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")" << "\\l";
3188 else if (lp.has("pooled_w") || lp.has("pooled_h")) {
3189 DictValue h = lp.get("pooled_h");
3190 DictValue w = lp.get("pooled_w");
3191 out << "pad (HxW): " << h << " x " << w << "\\l";
3193 if (lp.has("pool")) {
3194 out << "pool: " << lp.get("pool") << "\\l";
3196 if (lp.has("global_pooling")) {
3197 out << "global_pooling: " << lp.get("global_pooling") << "\\l";
3199 if (lp.has("group")) {
3200 out << "group: " << lp.get("group") << "\\l";
3204 if (!it->second.outputBlobs.empty())
3205 out << "output: " << it->second.outputBlobs[0].size << "\\l";
3207 Ptr<BackendNode> layerBackend = it->second.backendNodes[prefBackend];
3208 out << (!layerBackend.empty() ? backend : "OCV/");
3210 switch (it->second.layerInstance->preferableTarget) {
3211 case DNN_TARGET_CPU: out << "CPU\\n"; colorId = layerBackend.empty() ? 0 : 5; break;
3212 case DNN_TARGET_OPENCL: out << "OCL\\n"; colorId = 1; break;
3213 case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16\\n"; colorId = 2; break;
3214 case DNN_TARGET_MYRIAD: out << "MYRIAD\\n"; colorId = 3; break;
3215 case DNN_TARGET_FPGA: out << "FPGA\\n"; colorId = 4; break;
3217 out << ((skipId.size() == 1)? "\" " : " }\" ");
3218 out << "fillcolor=\"" << colors[colorId] << "\" ";
3219 out << "style=filled ";
3220 out << "shape=" << ((skipId.size() == 1)? "box" : "record") << "]" << '\n';
3224 int inputsSize = impl->netInputLayer->outNames.size();
3225 for (std::map<int, LayerData>::iterator it = map.begin(); it != map.end(); ++it)
3227 if (allLayers[it->first] == -1) // node
3229 for (int i = 0; i < it->second.consumers.size(); i++)
3231 int outId = it->second.consumers[i].lid;
3232 if (it == map.begin() && inputsSize > 1)
3233 out << " " << "\"" << it->second.name << "_" << i << "\"" << " -> ";
3235 out << " " << "\"" << it->second.name << "\"" << " -> ";
3236 if (allLayers[outId] == -1) // node
3237 out << "\"" << map[outId].name << "\"" << '\n';
3239 out << "\"" << "cluster_" << allLayers[outId] << "\"" << '\n';
3242 else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster
3244 for (int i = 0; i < it->second.consumers.size(); i++)
3246 int outId = it->second.consumers[i].lid;
3247 if (allLayers[outId] == -1) { // node
3248 out << " " << "\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
3249 out << "\"" << map[outId].name << "\"" << '\n';
3251 else if (allLayers[outId] != allLayers[it->first]) { // another cluster
3252 out << " " << "\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
3253 out << "\"" << "cluster_" << allLayers[outId] << "\"" << '\n';
3262 void Net::dumpToFile(const String& path) {
3263 std::ofstream file(path.c_str());
3268 Ptr<Layer> Net::getLayer(LayerId layerId)
3270 LayerData &ld = impl->getLayerData(layerId);
3271 return ld.getLayerInstance();
3274 std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
3276 LayerData &ld = impl->getLayerData(layerId);
3277 if (!ld.layerInstance)
3278 CV_Error(Error::StsNullPtr, format("Requested layer \"%s\" was not initialized", ld.name.c_str()));
3280 std::vector<Ptr<Layer> > inputLayers;
3281 inputLayers.reserve(ld.inputLayersId.size());
3282 std::set<int>::iterator it;
3283 for (it = ld.inputLayersId.begin(); it != ld.inputLayersId.end(); ++it) {
3284 inputLayers.push_back(getLayer(*it));
3289 std::vector<String> Net::getLayerNames() const
3291 std::vector<String> res;
3292 res.reserve(impl->layers.size());
3294 Impl::MapIdToLayerData::iterator it;
3295 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
3297 if (it->second.id) //skip Data layer
3298 res.push_back(it->second.name);
3304 bool Net::empty() const
3306 return impl->layers.size() <= 1; //first layer is default Data layer
3309 std::vector<int> Net::getUnconnectedOutLayers() const
3311 std::vector<int> layersIds;
3313 Impl::MapIdToLayerData::iterator it;
3314 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
3316 int lid = it->first;
3317 LayerData &ld = it->second;
3319 if (ld.requiredOutputs.size() == 0)
3320 layersIds.push_back(lid);
3326 std::vector<String> Net::getUnconnectedOutLayersNames() const
3328 std::vector<int> ids = getUnconnectedOutLayers();
3329 const size_t n = ids.size();
3330 std::vector<String> names(n);
3331 for (size_t i = 0; i < n; ++i)
3333 names[i] = impl->layers[ids[i]].name;
3338 void Net::getLayersShapes(const ShapesVec& netInputShapes,
3339 std::vector<int>& layersIds,
3340 std::vector<ShapesVec>& inLayersShapes,
3341 std::vector<ShapesVec>& outLayersShapes) const
3344 inLayersShapes.clear();
3345 outLayersShapes.clear();
3347 Impl::LayersShapesMap inOutShapes;
3348 impl->getLayersShapes(netInputShapes, inOutShapes);
3350 for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
3351 it != inOutShapes.end(); it++)
3353 layersIds.push_back(it->first);
3354 inLayersShapes.push_back(it->second.in);
3355 outLayersShapes.push_back(it->second.out);
3359 void Net::getLayersShapes(const MatShape& netInputShape,
3360 std::vector<int>& layerIds,
3361 std::vector<ShapesVec>& inLayersShapes,
3362 std::vector<ShapesVec>& outLayersShapes) const
3364 getLayersShapes(ShapesVec(1, netInputShape),
3365 layerIds, inLayersShapes, outLayersShapes);
3368 void Net::getLayerShapes(const MatShape& netInputShape,
3370 ShapesVec& inLayerShapes,
3371 ShapesVec& outLayerShapes) const
3373 getLayerShapes(ShapesVec(1, netInputShape),
3374 layerId, inLayerShapes, outLayerShapes);
3378 void Net::getLayerShapes(const ShapesVec& netInputShapes,
3380 ShapesVec& inLayerShapes,
3381 ShapesVec& outLayerShapes) const
3384 impl->getLayerShapes(netInputShapes, layerId, shapes);
3385 inLayerShapes = shapes.in;
3386 outLayerShapes = shapes.out;
3389 int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
3391 CV_TRACE_FUNCTION();
3394 std::vector<int> ids;
3395 std::vector<std::vector<MatShape> > inShapes, outShapes;
3396 getLayersShapes(netInputShapes, ids, inShapes, outShapes);
3397 CV_Assert(inShapes.size() == outShapes.size());
3398 CV_Assert(inShapes.size() == ids.size());
3400 for(int i = 0; i < ids.size(); i++)
3402 flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i],
3409 int64 Net::getFLOPS(const MatShape& netInputShape) const
3411 return getFLOPS(std::vector<MatShape>(1, netInputShape));
3414 int64 Net::getFLOPS(const int layerId,
3415 const std::vector<MatShape>& netInputShapes) const
3417 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
3418 CV_Assert(layer != impl->layers.end());
3421 impl->getLayerShapes(netInputShapes, layerId, shapes);
3423 return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
3426 int64 Net::getFLOPS(const int layerId,
3427 const MatShape& netInputShape) const
3429 return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
3432 void Net::getLayerTypes(std::vector<String>& layersTypes) const
3434 layersTypes.clear();
3436 std::map<String, int> layers;
3437 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
3438 it != impl->layers.end(); it++)
3440 if (layers.find(it->second.type) == layers.end())
3441 layers[it->second.type] = 0;
3442 layers[it->second.type]++;
3445 for (std::map<String, int>::iterator it = layers.begin();
3446 it != layers.end(); it++)
3448 layersTypes.push_back(it->first);
3452 int Net::getLayersCount(const String& layerType) const
3455 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
3456 it != impl->layers.end(); it++)
3458 if (it->second.type == layerType)
3464 void Net::getMemoryConsumption(const int layerId,
3465 const std::vector<MatShape>& netInputShapes,
3466 size_t& weights, size_t& blobs) const
3468 CV_TRACE_FUNCTION();
3470 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
3471 CV_Assert(layer != impl->layers.end());
3473 weights = blobs = 0;
3475 for(int i = 0; i < layer->second.params.blobs.size(); i++)
3477 const Mat& weightsBlob = layer->second.params.blobs[i];
3478 weights += weightsBlob.total()*weightsBlob.elemSize();
3481 ShapesVec inLayerShapes, outLayerShapes;
3482 getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
3483 for(int i = 0; i < outLayerShapes.size(); i++)
3485 blobs += total(outLayerShapes[i]) * sizeof(float);
3489 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
3490 size_t& weights, size_t& blobs) const
3492 CV_TRACE_FUNCTION();
3494 std::vector<int> layerIds;
3495 std::vector<size_t> w, b;
3496 getMemoryConsumption(netInputShapes, layerIds, w, b);
3498 weights = blobs = 0;
3499 for(int i = 0; i < layerIds.size(); i++)
3506 void Net::getMemoryConsumption(const int layerId,
3507 const MatShape& netInputShape,
3508 size_t& weights, size_t& blobs) const
3510 getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
3514 void Net::getMemoryConsumption(const MatShape& netInputShape,
3515 size_t& weights, size_t& blobs) const
3517 getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
3521 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
3522 std::vector<int>& layerIds, std::vector<size_t>& weights,
3523 std::vector<size_t>& blobs) const
3525 CV_TRACE_FUNCTION();
3531 std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
3533 getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
3535 for(int i = 0; i < layerIds.size(); i++)
3538 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
3539 CV_Assert(layer != impl->layers.end());
3541 for(int j = 0; j < layer->second.params.blobs.size(); j++)
3543 const Mat& weightsBlob = layer->second.params.blobs[j];
3544 w += weightsBlob.total()*weightsBlob.elemSize();
3547 for(int j = 0; j < outLayerShapes[i].size(); j++)
3549 b += total(outLayerShapes[i][j]) * sizeof(float);
3552 weights.push_back(w);
3557 void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
3558 std::vector<size_t>& weights, std::vector<size_t>& blobs) const
3560 getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
3564 void Net::enableFusion(bool fusion)
3566 if( impl->fusion != fusion )
3568 impl->fusion = fusion;
3569 impl->netWasAllocated = false;
3574 void Net::setHalideScheduler(const String& scheduler)
3576 CV_TRACE_FUNCTION();
3577 CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
3579 impl->halideConfigFile = scheduler;
3582 int64 Net::getPerfProfile(std::vector<double>& timings)
3584 timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
3585 int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0);
3589 //////////////////////////////////////////////////////////////////////////
3591 Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
3593 Layer::Layer(const LayerParams ¶ms)
3594 : blobs(params.blobs), name(params.name), type(params.type)
3596 preferableTarget = DNN_TARGET_CPU;
3599 void Layer::setParamsFrom(const LayerParams ¶ms)
3601 blobs = params.blobs;
3606 int Layer::inputNameToIndex(String)
3611 int Layer::outputNameToIndex(const String&)
3616 bool Layer::supportBackend(int backendId)
3618 return backendId == DNN_BACKEND_OPENCV;
3621 Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper> > &)
3623 CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type +
3624 " layers is not defined.");
3625 return Ptr<BackendNode>();
3628 Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
3630 CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type +
3631 " layers is not defined.");
3632 return Ptr<BackendNode>();
3635 Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
3637 CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
3638 " layers is not defined.");
3639 return Ptr<BackendNode>();
3642 void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
3643 const std::vector<Mat> &outputs, int targetId) const
3646 CV_TRACE_FUNCTION();
3648 Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
3649 xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
3650 Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
3652 int outW, outH, outC, outN;
3653 getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
3655 if (targetId == DNN_TARGET_CPU)
3657 if (outW == 1 && outH == 1)
3659 if (outC + outN == 1)
3663 top.split(c, co, ci, 8)
3664 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
3668 top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
3675 top.reorder(x, c, y)
3676 .split(y, yo, yi, 2)
3680 .vectorize(x, outW >= 16 ? 16 : outW);
3684 else if (targetId == DNN_TARGET_OPENCL)
3686 if (outW == 1 && outH == 1)
3688 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
3689 top.split(c, co, ci, c_split)
3690 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
3696 int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
3697 int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
3698 // Supported vectorization widths: 2, 3, 4, 8, 16
3699 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
3700 top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
3701 .split(c, co, ci, c_split)
3702 .gpu_blocks(xo, yo, co)
3703 .gpu_threads(xi, yi)
3704 .reorder(xi, yi, ci, xo, yo, co)
3709 CV_Error(Error::StsNotImplemented, "Unknown target identifier");
3710 #endif // HAVE_HALIDE
3713 Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
3715 return Ptr<BackendNode>();
3718 bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
3719 bool Layer::tryFuse(Ptr<Layer>&) { return false; }
3720 void Layer::getScaleShift(Mat& scale, Mat& shift) const
3726 void Layer::unsetAttached()
3728 setActivation(Ptr<ActivationLayer>());
3731 template <typename T>
3732 static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
3734 pv.resize(v.size());
3735 for (size_t i = 0; i < v.size(); i++)
3736 pv[i] = const_cast<T*>(&v[i]);
3739 void Layer::finalize(const std::vector<Mat> &inputs, std::vector<Mat> &outputs)
3741 CV_TRACE_FUNCTION();
3742 this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
3745 void Layer::finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
3747 CV_UNUSED(input);CV_UNUSED(output);
3750 void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
3752 CV_TRACE_FUNCTION();
3753 std::vector<Mat> inputs, outputs;
3754 inputs_arr.getMatVector(inputs);
3755 outputs_arr.getMatVector(outputs);
3757 std::vector<Mat*> inputsp;
3758 vecToPVec(inputs, inputsp);
3759 this->finalize(inputsp, outputs);
3762 std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
3764 CV_TRACE_FUNCTION();
3766 std::vector<Mat> outputs;
3767 this->finalize(inputs, outputs);
3771 void Layer::forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
3773 // We kept this method for compatibility. DNN calls it now only to support users' implementations.
3776 void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
3778 CV_TRACE_FUNCTION();
3779 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3781 Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
3784 void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
3786 CV_TRACE_FUNCTION();
3787 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3789 if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
3791 std::vector<UMat> inputs;
3792 std::vector<UMat> outputs;
3793 std::vector<UMat> internals;
3795 std::vector<UMat> orig_inputs;
3796 std::vector<UMat> orig_outputs;
3797 std::vector<UMat> orig_internals;
3799 inputs_arr.getUMatVector(orig_inputs);
3800 outputs_arr.getUMatVector(orig_outputs);
3801 internals_arr.getUMatVector(orig_internals);
3803 inputs.resize(orig_inputs.size());
3804 for (size_t i = 0; i < orig_inputs.size(); i++)
3805 convertFp16(orig_inputs[i], inputs[i]);
3807 outputs.resize(orig_outputs.size());
3808 for (size_t i = 0; i < orig_outputs.size(); i++)
3809 outputs[i].create(shape(orig_outputs[i]), CV_32F);
3811 internals.resize(orig_internals.size());
3812 for (size_t i = 0; i < orig_internals.size(); i++)
3813 internals[i].create(shape(orig_internals[i]), CV_32F);
3815 forward(inputs, outputs, internals);
3817 for (size_t i = 0; i < outputs.size(); i++)
3818 convertFp16(outputs[i], orig_outputs[i]);
3820 // sync results back
3821 outputs_arr.assign(orig_outputs);
3822 internals_arr.assign(orig_internals);
3825 std::vector<Mat> inpvec;
3826 std::vector<Mat> outputs;
3827 std::vector<Mat> internals;
3829 inputs_arr.getMatVector(inpvec);
3830 outputs_arr.getMatVector(outputs);
3831 internals_arr.getMatVector(internals);
3833 std::vector<Mat*> inputs(inpvec.size());
3834 for (int i = 0; i < inpvec.size(); i++)
3835 inputs[i] = &inpvec[i];
3837 this->forward(inputs, outputs, internals);
3839 // sync results back
3840 outputs_arr.assign(outputs);
3841 internals_arr.assign(internals);
3844 void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
3846 CV_TRACE_FUNCTION();
3848 this->finalize(inputs, outputs);
3849 this->forward(inputs, outputs, internals);
3854 bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
3855 const int requiredOutputs,
3856 std::vector<MatShape> &outputs,
3857 std::vector<MatShape> &internals) const
3859 CV_Assert(inputs.size());
3860 outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
3864 //////////////////////////////////////////////////////////////////////////
3866 static Mutex& getLayerFactoryMutex()
3868 static Mutex* volatile instance = NULL;
3869 if (instance == NULL)
3871 cv::AutoLock lock(getInitializationMutex());
3872 if (instance == NULL)
3873 instance = new Mutex();
3878 typedef std::map<String, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
3880 static LayerFactory_Impl& getLayerFactoryImpl_()
3882 static LayerFactory_Impl impl;
3886 static LayerFactory_Impl& getLayerFactoryImpl()
3888 static LayerFactory_Impl* volatile instance = NULL;
3889 if (instance == NULL)
3891 cv::AutoLock lock(getLayerFactoryMutex());
3892 if (instance == NULL)
3894 instance = &getLayerFactoryImpl_();
3895 initializeLayerFactory();
3901 void LayerFactory::registerLayer(const String &type, Constructor constructor)
3903 CV_TRACE_FUNCTION();
3904 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3906 cv::AutoLock lock(getLayerFactoryMutex());
3907 String type_ = toLowerCase(type);
3908 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
3910 if (it != getLayerFactoryImpl().end())
3912 if (it->second.back() == constructor)
3913 CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered");
3914 it->second.push_back(constructor);
3916 getLayerFactoryImpl().insert(std::make_pair(type_, std::vector<Constructor>(1, constructor)));
3919 void LayerFactory::unregisterLayer(const String &type)
3921 CV_TRACE_FUNCTION();
3922 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3924 cv::AutoLock lock(getLayerFactoryMutex());
3925 String type_ = toLowerCase(type);
3927 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
3928 if (it != getLayerFactoryImpl().end())
3930 if (it->second.size() > 1)
3931 it->second.pop_back();
3933 getLayerFactoryImpl().erase(it);
3937 Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
3939 CV_TRACE_FUNCTION();
3940 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3942 cv::AutoLock lock(getLayerFactoryMutex());
3943 String type_ = toLowerCase(type);
3944 LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_);
3946 if (it != getLayerFactoryImpl().end())
3948 CV_Assert(!it->second.empty());
3949 return it->second.back()(params);
3953 return Ptr<Layer>(); //NULL
3957 BackendNode::BackendNode(int backendId) : backendId(backendId) {}
3959 BackendNode::~BackendNode() {};
3961 BackendWrapper::BackendWrapper(int backendId, int targetId)
3962 : backendId(backendId), targetId(targetId) {}
3964 BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
3966 CV_Error(Error::StsNotImplemented,
3967 "Constructor of backend wrapper must be implemented");
3970 BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
3972 CV_Error(Error::StsNotImplemented,
3973 "Constructor of backend wrapper must be implemented");
3976 BackendWrapper::~BackendWrapper() {}
3978 Net readNet(const String& _model, const String& _config, const String& _framework)
3980 String framework = toLowerCase(_framework);
3981 String model = _model;
3982 String config = _config;
3983 const std::string modelExt = model.substr(model.rfind('.') + 1);
3984 const std::string configExt = config.substr(config.rfind('.') + 1);
3985 if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
3986 modelExt == "prototxt" || configExt == "prototxt")
3988 if (modelExt == "prototxt" || configExt == "caffemodel")
3989 std::swap(model, config);
3990 return readNetFromCaffe(config, model);
3992 if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
3993 modelExt == "pbtxt" || configExt == "pbtxt")
3995 if (modelExt == "pbtxt" || configExt == "pb")
3996 std::swap(model, config);
3997 return readNetFromTensorflow(model, config);
3999 if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
4000 configExt == "t7" || configExt == "net")
4002 return readNetFromTorch(model.empty() ? config : model);
4004 if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
4005 modelExt == "cfg" || configExt == "cfg")
4007 if (modelExt == "cfg" || configExt == "weights")
4008 std::swap(model, config);
4009 return readNetFromDarknet(config, model);
4011 if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
4012 modelExt == "xml" || configExt == "xml")
4014 if (modelExt == "xml" || configExt == "bin")
4015 std::swap(model, config);
4016 return readNetFromModelOptimizer(config, model);
4018 if (framework == "onnx" || modelExt == "onnx")
4020 return readNetFromONNX(model);
4022 CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
4023 model + (config.empty() ? "" : ", " + config));
4026 Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
4027 const std::vector<uchar>& bufferConfig)
4029 String framework = toLowerCase(_framework);
4030 if (framework == "caffe")
4031 return readNetFromCaffe(bufferConfig, bufferModel);
4032 else if (framework == "tensorflow")
4033 return readNetFromTensorflow(bufferModel, bufferConfig);
4034 else if (framework == "darknet")
4035 return readNetFromDarknet(bufferConfig, bufferModel);
4036 else if (framework == "torch")
4037 CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
4038 else if (framework == "dldt")
4039 CV_Error(Error::StsNotImplemented, "Reading Intel's Model Optimizer models from buffers");
4040 CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
4043 Net readNetFromModelOptimizer(const String &xml, const String &bin)
4045 return Net::readFromModelOptimizer(xml, bin);
4048 CV__DNN_INLINE_NS_END