1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
43 #include "op_halide.hpp"
44 #include "op_inf_engine.hpp"
45 #include "halide_scheduler.hpp"
52 #include <opencv2/dnn/shape_utils.hpp>
53 #include <opencv2/imgproc.hpp>
55 #include <opencv2/core/utils/configuration.private.hpp>
56 #include <opencv2/core/utils/logger.hpp>
60 CV__DNN_EXPERIMENTAL_NS_BEGIN
62 // this option is useful to run valgrind memory errors detection
63 static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
66 static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
69 static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
70 #ifdef HAVE_INF_ENGINE
71 (size_t)DNN_BACKEND_INFERENCE_ENGINE
73 (size_t)DNN_BACKEND_OPENCV
77 // Additional checks (slowdowns execution!)
78 static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
79 static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
80 static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
89 typedef std::vector<MatShape> ShapesVec;
93 ShapesVec in, out, internal;
94 // No guarantees that layer which support in-place computations
95 // will be computed in-place (input.data_ptr == output.data_ptr).
96 // If layer said that it could work in-place and layers after it
97 // no longer use input blob, we'll set output = input.
99 LayerShapes() {supportInPlace = false;}
103 Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
104 const Scalar& mean, bool swapRB, bool crop, int ddepth)
108 blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
112 void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
113 const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
116 std::vector<Mat> images(1, image.getMat());
117 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
120 Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
121 const Scalar& mean, bool swapRB, bool crop, int ddepth)
125 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
129 void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
130 Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
133 CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
136 CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
137 CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
140 std::vector<Mat> images;
141 images_.getMatVector(images);
142 CV_Assert(!images.empty());
143 for (int i = 0; i < images.size(); i++)
145 Size imgSize = images[i].size();
152 float resizeFactor = std::max(size.width / (float)imgSize.width,
153 size.height / (float)imgSize.height);
154 resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
155 Rect crop(Point(0.5 * (images[i].cols - size.width),
156 0.5 * (images[i].rows - size.height)),
158 images[i] = images[i](crop);
161 resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
163 if(images[i].depth() == CV_8U && ddepth == CV_32F)
164 images[i].convertTo(images[i], CV_32F);
167 std::swap(mean[0], mean[2]);
170 images[i] *= scalefactor;
173 size_t i, nimages = images.size();
174 Mat image0 = images[0];
175 int nch = image0.channels();
176 CV_Assert(image0.dims == 2);
178 if (nch == 3 || nch == 4)
180 int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
181 blob_.create(4, sz, ddepth);
182 Mat blob = blob_.getMat();
185 for( i = 0; i < nimages; i++ )
188 CV_Assert(image.depth() == blob_.depth());
189 nch = image.channels();
190 CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
191 CV_Assert(image.size() == image0.size());
193 for( int j = 0; j < nch; j++ )
194 ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
196 std::swap(ch[0], ch[2]);
203 int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
204 blob_.create(4, sz, ddepth);
205 Mat blob = blob_.getMat();
207 for( i = 0; i < nimages; i++ )
209 Mat image = images[i];
210 CV_Assert(image.depth() == blob_.depth());
211 nch = image.channels();
212 CV_Assert(image.dims == 2 && (nch == 1));
213 CV_Assert(image.size() == image0.size());
215 image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
220 void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
224 //A blob is a 4 dimensional matrix in floating point precision
225 //blob_[0] = batchSize = nbOfImages
226 //blob_[1] = nbOfChannels
229 CV_Assert(blob_.depth() == CV_32F);
230 CV_Assert(blob_.dims == 4);
232 images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
234 std::vector<Mat> vectorOfChannels(blob_.size[1]);
235 for (int n = 0; n < blob_.size[0]; ++n)
237 for (int c = 0; c < blob_.size[1]; ++c)
239 vectorOfChannels[c] = getPlane(blob_, n, c);
241 cv::merge(vectorOfChannels, images_.getMatRef(n));
245 class OpenCLBackendWrapper : public BackendWrapper
248 OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
255 OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
256 : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
258 Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
259 CV_Assert(!base.empty());
263 int shape[] = {1, (int)base->umat.total()};
264 umat = base->umat.reshape(1, 2, &shape[0])
265 .colRange(0, host->total())
266 .reshape(1, host->dims, &host->size[0]);
270 static Ptr<BackendWrapper> create(Mat& m)
272 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
275 static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
277 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
280 static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
282 const int numWrappers = wrappers.size();
283 std::vector<UMat> mats(wrappers.size());
284 for (int i = 0; i < numWrappers; ++i)
286 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
287 CV_Assert(!umatWrapper.empty());
288 umatWrapper->copyToDevice();
289 mats[i] = umatWrapper->umat;
294 // Replaces all umats in wrappers to specific ones.
295 static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
296 const std::vector<UMat>& umats)
298 CV_Assert(wrappers.size() == umats.size());
299 for (int i = 0, n = umats.size(); i < n; ++i)
301 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
302 CV_Assert(!umatWrapper.empty());
303 umatWrapper->umat = umats[i];
307 ~OpenCLBackendWrapper() {}
309 // Copies data from device to a host memory.
310 virtual void copyToHost() CV_OVERRIDE
315 virtual void setHostDirty() CV_OVERRIDE
340 LayerPin(int layerId = -1, int outputId = -1)
341 : lid(layerId), oid(outputId) {}
345 return (lid >= 0 && oid >= 0);
348 bool equal(const LayerPin &r) const
350 return (lid == r.lid && oid == r.oid);
353 bool operator<(const LayerPin &r) const
355 return lid < r.lid || lid == r.lid && oid < r.oid;
358 bool operator ==(const LayerPin &r) const
360 return lid == r.lid && oid == r.oid;
366 LayerData() : id(-1), skip(false), flag(0) {}
367 LayerData(int _id, const String &_name, const String &_type, LayerParams &_params)
368 : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0)
382 std::vector<LayerPin> inputBlobsId;
383 std::set<int> inputLayersId;
384 std::set<int> requiredOutputs;
385 std::vector<LayerPin> consumers;
386 std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
387 std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
388 std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
390 Ptr<Layer> layerInstance;
391 std::vector<Mat> outputBlobs;
392 std::vector<Mat*> inputBlobs;
393 std::vector<Mat> internals;
394 // Computation nodes of implemented backends (except DEFAULT).
395 std::map<int, Ptr<BackendNode> > backendNodes;
396 // Flag for skip layer computation for specific backend.
401 Ptr<Layer> getLayerInstance()
404 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
407 return layerInstance;
409 layerInstance = LayerFactory::createLayerInstance(type, params);
412 CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
415 return layerInstance;
419 //fake layer containing network input blobs
420 struct DataLayer : public Layer
422 DataLayer() : Layer()
427 virtual bool supportBackend(int backendId) CV_OVERRIDE
429 return backendId == DNN_BACKEND_OPENCV ||
430 backendId == DNN_BACKEND_INFERENCE_ENGINE && inputsData.size() == 1;
433 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
436 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
438 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
439 forward_ocl(inputs_arr, outputs_arr, internals_arr))
441 if (outputs_arr.depth() == CV_16S)
443 forward_fallback(inputs_arr, outputs_arr, internals_arr);
447 std::vector<Mat> outputs, internals;
448 outputs_arr.getMatVector(outputs);
449 internals_arr.getMatVector(internals);
452 // | Input type | Output type |
455 for (int i = 0; i < inputsData.size(); ++i)
457 double scale = scaleFactors[i];
458 Scalar& mean = means[i];
459 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
460 CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
462 bool singleMean = true;
463 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
465 singleMean = mean[j] == mean[j - 1];
470 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
474 for (int n = 0; n < inputsData[i].size[0]; ++n)
475 for (int c = 0; c < inputsData[i].size[1]; ++c)
477 Mat inp = getPlane(inputsData[i], n, c);
478 Mat out = getPlane(outputs[i], n, c);
479 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
486 std::vector<Mat> tmp_expressions;
487 bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
490 // | Input type | Output type |
494 std::vector<UMat> outputs;
495 outputs_.getUMatVector(outputs);
497 tmp_expressions.clear();
498 for (int i = 0; i < inputsData.size(); ++i)
500 Mat inputData = inputsData[i];
502 double scale = scaleFactors[i];
503 Scalar& mean = means[i];
505 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
506 bool singleMean = true;
507 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
509 singleMean = mean[j] == mean[j - 1];
512 if (outputs_.depth() == CV_16S)
516 tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0])));
517 convertFp16(tmp_expressions.back(), outputs[i]);
521 for (int n = 0; n < inputsData[i].size[0]; ++n)
522 for (int c = 0; c < inputsData[i].size[1]; ++c)
524 Mat inp = getPlane(inputsData[i], n, c);
526 std::vector<cv::Range> plane(4, Range::all());
527 plane[0] = Range(n, n + 1);
528 plane[1] = Range(c, c + 1);
529 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
531 tmp_expressions.push_back(scale * (inp - mean[c]));
532 convertFp16(tmp_expressions.back(), out);
538 CV_Assert(outputs_.depth() == CV_32F);
541 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
545 for (int n = 0; n < inputsData[i].size[0]; ++n)
546 for (int c = 0; c < inputsData[i].size[1]; ++c)
548 Mat inp = getPlane(inputsData[i], n, c);
550 std::vector<cv::Range> plane(4, Range::all());
551 plane[0] = Range(n, n + 1);
552 plane[1] = Range(c, c + 1);
553 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
555 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
564 int outputNameToIndex(const String& tgtName) CV_OVERRIDE
566 int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
567 return (idx < (int)outNames.size()) ? idx : -1;
570 void setNames(const std::vector<String> &names)
572 outNames.assign(names.begin(), names.end());
575 bool getMemoryShapes(const std::vector<MatShape> &inputs,
576 const int requiredOutputs,
577 std::vector<MatShape> &outputs,
578 std::vector<MatShape> &internals) const CV_OVERRIDE
580 CV_Assert(inputs.size() == requiredOutputs);
581 outputs.assign(inputs.begin(), inputs.end());
585 virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
587 std::vector<Mat> outputs;
588 outputs_arr.getMatVector(outputs);
590 CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
591 inputsData.size() == outputs.size());
593 for (int i = 0; skip && i < inputsData.size(); ++i)
595 if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
600 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
602 #ifdef HAVE_INF_ENGINE
603 InferenceEngine::LayerParams lp;
605 lp.type = "ScaleShift";
606 lp.precision = InferenceEngine::Precision::FP32;
607 std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
609 CV_CheckEQ(inputsData.size(), (size_t)1, "");
610 CV_CheckEQ(inputsData[0].dims, 4, "");
611 const size_t numChannels = inputsData[0].size[1];
612 CV_Assert(numChannels <= 4);
615 auto weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
618 weights->set(std::vector<float>(numChannels, scaleFactors[0]));
619 ieLayer->_weights = weights;
622 auto biases = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
625 std::vector<float> biasesVec(numChannels);
626 for (int i = 0; i < numChannels; ++i)
628 biasesVec[i] = -means[0][i] * scaleFactors[0];
630 biases->set(biasesVec);
631 ieLayer->_biases = biases;
633 return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
634 #endif // HAVE_INF_ENGINE
635 return Ptr<BackendNode>();
638 std::vector<String> outNames;
639 // Preprocessing parameters for each network's input.
640 std::vector<double> scaleFactors;
641 std::vector<Scalar> means;
642 std::vector<Mat> inputsData;
649 // Increase references counter to layer output.
650 void addReference(const LayerPin& lp)
652 std::map<LayerPin, int>::iterator it = refCounter.find(lp);
653 if (it == refCounter.end())
659 void addReferences(const std::vector<LayerPin>& pins)
661 for (int i = 0; i < pins.size(); i++)
663 addReference(pins[i]);
667 // Returns number of references to allocated memory that used in specific
669 int numReferences(const LayerPin& lp)
671 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
672 CV_Assert(mapIt != reuseMap.end());
673 LayerPin memHost = mapIt->second;
675 std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
676 CV_Assert(refIt != refCounter.end());
677 return refIt->second;
680 // Reuse data allocated in <host> inside the <user> blob.
681 void reuse(const LayerPin& host, const LayerPin& user)
683 CV_Assert(reuseMap.find(user) == reuseMap.end());
684 CV_Assert(reuseMap.find(host) != reuseMap.end());
685 LayerPin memHost = reuseMap[host];
686 reuseMap[user] = memHost;
687 if (refCounter.find(memHost) != refCounter.end())
689 std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
690 if (userRefIt != refCounter.end())
692 refCounter[memHost] += userRefIt->second;
693 refCounter.erase(userRefIt);
696 refCounter[memHost] += 1;
700 // Decrease references counter to allocated memory inside specific blob.
701 void releaseReference(const LayerPin& lp)
703 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
704 CV_Assert(mapIt != reuseMap.end());
706 std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
707 CV_Assert(refIt != refCounter.end());
708 CV_Assert(refIt->second > 0);
712 void releaseReferences(const std::vector<LayerPin>& pins)
714 for (int i = 0; i < pins.size(); i++)
716 releaseReference(pins[i]);
720 void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
722 if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
725 LayerPin bestBlobPin;
727 std::map<LayerPin, Mat>::iterator hostIt;
728 std::map<LayerPin, int>::iterator refIt;
730 const int targetTotal = total(shape);
731 int bestBlobTotal = INT_MAX;
733 for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
735 refIt = refCounter.find(hostIt->first);
736 // Use only blobs that had references before because if not,
737 // it might be used as output.
738 if (refIt != refCounter.end() && refIt->second == 0)
740 Mat& unusedBlob = hostIt->second;
741 if (unusedBlob.total() >= targetTotal &&
742 unusedBlob.total() < bestBlobTotal)
744 bestBlobPin = hostIt->first;
745 bestBlob = unusedBlob;
746 bestBlobTotal = unusedBlob.total();
750 if (!bestBlob.empty())
752 reuse(bestBlobPin, lp);
753 dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
759 // if dst already has been allocated with total(shape) elements,
760 // it won't be recreated and pointer of dst.data remains the same.
761 dst.create(shape, use_half ? CV_16S : CV_32F);
766 void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
767 std::vector<LayerPin>& pinsForInternalBlobs,
768 bool use_half = false)
772 pinsForInternalBlobs.clear();
774 std::vector<Mat>& outputBlobs = ld.outputBlobs,
775 &internalBlobs = ld.internals;
777 const ShapesVec& outShapes = layerShapes.out,
778 internalShapes = layerShapes.internal;
780 outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
781 internalBlobs.resize(internalShapes.size());
783 CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
785 // Check that layer could work in-place.
786 bool inPlace = false;
787 if (layerShapes.supportInPlace)
789 if (ld.inputBlobs.size() == 1)
791 // Get number of references to the input memory.
792 int numRef = numReferences(ld.inputBlobsId[0]);
793 // If current layer is one and only customer of this blob.
794 inPlace = numRef == 1;
798 ShapesVec shapes(outShapes);
799 shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
800 std::vector<Mat*> blobs;
801 for(int i = 0; i < outputBlobs.size(); i++)
803 blobs.push_back(&outputBlobs[i]);
806 for(int i = 0; i < internalBlobs.size(); i++)
808 blobs.push_back(&internalBlobs[i]);
809 if (total(internalShapes[i]))
811 pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
815 addReferences(pinsForInternalBlobs);
817 std::map<int, std::vector<int> > idxSizes;
818 for(int i = 0; i < shapes.size(); i++)
820 idxSizes[total(shapes[i])].push_back(i);
823 std::map<int, std::vector<int> >::reverse_iterator it;
824 for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
826 for(int j = 0; j < it->second.size(); j++)
828 int index = it->second[j];
829 if (total(shapes[index]))
831 LayerPin blobPin(ld.id, index);
832 if (index < outShapes.size() && inPlace)
834 CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
835 ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
836 reuse(ld.inputBlobsId[0], blobPin);
839 reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
845 // Clear internal state. Calls before an every reallocation.
856 // Register allocated memory.
857 void addHost(const LayerPin& lp, const Mat& mat)
859 CV_Assert(memHosts.find(lp) == memHosts.end());
864 std::map<LayerPin, int> refCounter;
865 // Maps pin to origin blob (for whom memory was allocated firstly).
866 // For origin blobs key == value.
867 std::map<LayerPin, LayerPin> reuseMap;
868 std::map<LayerPin, Mat> memHosts;
871 static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
873 if (backendId == DNN_BACKEND_OPENCV)
875 if (targetId == DNN_TARGET_CPU)
876 return Ptr<BackendWrapper>();
877 else if (IS_DNN_OPENCL_TARGET(targetId))
878 return OpenCLBackendWrapper::create(m);
880 CV_Error(Error::StsNotImplemented, "Unknown target identifier");
882 else if (backendId == DNN_BACKEND_HALIDE)
884 CV_Assert(haveHalide());
886 return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
887 #endif // HAVE_HALIDE
889 else if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
891 CV_Assert(haveInfEngine());
892 #ifdef HAVE_INF_ENGINE
893 return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
894 #endif // HAVE_INF_ENGINE
897 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
898 return Ptr<BackendWrapper>();
903 typedef std::map<int, LayerShapes> LayersShapesMap;
904 typedef std::map<int, LayerData> MapIdToLayerData;
908 //allocate fake net input layer
909 netInputLayer = Ptr<DataLayer>(new DataLayer());
910 LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
912 netInputLayer->name = inpl.name = "_input";
913 inpl.type = "__NetInputLayer__";
914 inpl.layerInstance = netInputLayer;
915 layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
918 netWasAllocated = false;
920 preferableBackend = DNN_BACKEND_DEFAULT;
921 preferableTarget = DNN_TARGET_CPU;
922 skipInfEngineInit = false;
925 Ptr<DataLayer> netInputLayer;
926 std::vector<LayerPin> blobsToKeep;
927 MapIdToLayerData layers;
928 std::map<String, int> layerNameToId;
929 BlobManager blobManager;
930 int preferableBackend;
931 int preferableTarget;
932 String halideConfigFile;
933 bool skipInfEngineInit;
934 // Map host data to backend specific wrapper.
935 std::map<void*, Ptr<BackendWrapper> > backendWrappers;
939 bool netWasAllocated;
941 std::vector<int64> layersTimings;
944 Ptr<BackendWrapper> wrap(Mat& host)
946 if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
947 return Ptr<BackendWrapper>();
949 MatShape shape(host.dims);
950 for (int i = 0; i < host.dims; ++i)
951 shape[i] = host.size[i];
953 void* data = host.data;
954 if (backendWrappers.find(data) != backendWrappers.end())
956 Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
957 if (preferableBackend == DNN_BACKEND_OPENCV)
959 CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
960 return OpenCLBackendWrapper::create(baseBuffer, host);
962 else if (preferableBackend == DNN_BACKEND_HALIDE)
964 CV_Assert(haveHalide());
966 return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
967 #endif // HAVE_HALIDE
969 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
971 return wrapMat(preferableBackend, preferableTarget, host);
974 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
977 Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
978 backendWrappers[data] = wrapper;
987 CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
989 HalideScheduler scheduler(halideConfigFile);
990 std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
991 for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
993 LayerData &ld = it->second;
994 Ptr<Layer> layer = ld.layerInstance;
995 if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
997 CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
998 bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
1001 // Use automatic scheduling provided by layer.
1002 layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
1003 ld.inputBlobs, ld.outputBlobs,
1006 compileList.emplace_back(ld);
1009 std::atomic<int> progress(0);
1010 auto fn = ([&] () -> void
1014 int id = progress.fetch_add(1);
1015 if ((size_t)id >= compileList.size())
1017 const LayerData& ld = compileList[id].get();
1018 Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
1019 dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
1022 size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
1023 num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
1024 std::vector<std::thread> threads(num_threads - 1);
1025 for (auto& t: threads) t = std::thread(fn);
1026 fn(); // process own tasks
1027 for (auto& t: threads) t.join();
1033 CV_TRACE_FUNCTION();
1035 MapIdToLayerData::iterator it;
1036 for (it = layers.begin(); it != layers.end(); it++)
1038 if (it->second.id != 0) {
1039 it->second.inputBlobs.clear();
1040 it->second.outputBlobs.clear();
1041 it->second.internals.clear();
1043 it->second.skip = false;
1044 //it->second.consumers.clear();
1045 Ptr<Layer> currLayer = it->second.layerInstance;
1047 if( currLayer.empty() )
1050 currLayer->unsetAttached();
1052 Ptr<PoolingLayer> poolingLayer = currLayer.dynamicCast<PoolingLayer>();
1053 if( !poolingLayer.empty() )
1055 poolingLayer->computeMaxIdx = true;
1059 layersTimings.clear();
1062 void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
1064 CV_TRACE_FUNCTION();
1066 if (preferableBackend == DNN_BACKEND_DEFAULT)
1067 preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
1069 CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
1070 preferableTarget == DNN_TARGET_CPU ||
1071 preferableTarget == DNN_TARGET_OPENCL ||
1072 preferableTarget == DNN_TARGET_OPENCL_FP16);
1073 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
1074 preferableTarget == DNN_TARGET_CPU ||
1075 preferableTarget == DNN_TARGET_OPENCL);
1076 CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE ||
1077 preferableTarget == DNN_TARGET_CPU ||
1078 preferableTarget == DNN_TARGET_OPENCL ||
1079 preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1080 preferableTarget == DNN_TARGET_MYRIAD);
1081 if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
1083 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
1086 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
1087 preferableTarget = DNN_TARGET_CPU;
1091 if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
1093 // Current implementation is only valid for GPU (#11494)
1094 if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
1096 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
1097 preferableTarget = DNN_TARGET_CPU;
1099 else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
1101 CV_LOG_WARNING(NULL,
1102 "DNN: OpenCL target with fp16 precision is not supported "
1103 "with current OpenCL device (tested with Intel GPUs only), "
1104 "switching to OpenCL with fp32 precision.");
1105 preferableTarget = DNN_TARGET_OPENCL;
1112 allocateLayers(blobsToKeep_);
1114 MapIdToLayerData::iterator it = layers.find(0);
1115 CV_Assert(it != layers.end());
1116 it->second.skip = netInputLayer->skip;
1120 if (!netWasAllocated )
1123 if (preferableBackend == DNN_BACKEND_HALIDE)
1126 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
1130 netWasAllocated = true;
1131 this->blobsToKeep = blobsToKeep_;
1135 int getLayerId(const String &layerName)
1137 std::map<String, int>::iterator it = layerNameToId.find(layerName);
1138 return (it != layerNameToId.end()) ? it->second : -1;
1141 int getLayerId(int id)
1143 MapIdToLayerData::iterator it = layers.find(id);
1144 return (it != layers.end()) ? id : -1;
1147 int getLayerId(DictValue &layerDesc)
1149 if (layerDesc.isInt())
1150 return getLayerId(layerDesc.get<int>());
1151 else if (layerDesc.isString())
1152 return getLayerId(layerDesc.get<String>());
1154 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1158 String getLayerName(int id)
1160 MapIdToLayerData::iterator it = layers.find(id);
1161 return (it != layers.end()) ? it->second.name : "(unknown layer)";
1164 LayerData& getLayerData(int id)
1166 MapIdToLayerData::iterator it = layers.find(id);
1168 if (it == layers.end())
1169 CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
1174 LayerData& getLayerData(const String &layerName)
1176 int id = getLayerId(layerName);
1179 CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
1181 return getLayerData(id);
1184 LayerData& getLayerData(const DictValue &layerDesc)
1186 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1187 if (layerDesc.isInt())
1188 return getLayerData(layerDesc.get<int>());
1189 else /*if (layerDesc.isString())*/
1190 return getLayerData(layerDesc.get<String>());
1193 static void addLayerInput(LayerData &ld, int inNum, LayerPin from)
1195 if ((int)ld.inputBlobsId.size() <= inNum)
1197 ld.inputBlobsId.resize(inNum + 1);
1201 LayerPin storedFrom = ld.inputBlobsId[inNum];
1202 if (storedFrom.valid() && !storedFrom.equal(from))
1203 CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
1204 inNum, ld.name.c_str()));
1207 ld.inputBlobsId[inNum] = from;
1210 int resolvePinOutputName(LayerData &ld, const String &outName)
1212 if (outName.empty())
1214 return ld.getLayerInstance()->outputNameToIndex(outName);
1217 LayerPin getPinByAlias(const String &layerName)
1220 pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1223 pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
1228 std::vector<LayerPin> getLayerOutPins(const String &layerName)
1230 int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1232 std::vector<LayerPin> pins;
1234 for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
1236 pins.push_back(LayerPin(lid, i));
1242 void connect(int outLayerId, int outNum, int inLayerId, int inNum)
1244 CV_Assert(outLayerId < inLayerId);
1245 LayerData &ldOut = getLayerData(outLayerId);
1246 LayerData &ldInp = getLayerData(inLayerId);
1248 addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
1249 ldOut.requiredOutputs.insert(outNum);
1250 ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
1255 CV_TRACE_FUNCTION();
1256 if (preferableBackend == DNN_BACKEND_OPENCV)
1257 CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
1258 else if (preferableBackend == DNN_BACKEND_HALIDE)
1259 initHalideBackend();
1260 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1261 initInfEngineBackend();
1263 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1266 void initHalideBackend()
1268 CV_TRACE_FUNCTION();
1269 CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
1271 // Iterator to current layer.
1272 MapIdToLayerData::iterator it = layers.begin();
1273 // Iterator to base layer for fusion. In example, in case of conv+bn+relu
1274 // it'll be a conv layer.
1275 MapIdToLayerData::iterator baseIt = layers.begin();
1276 for (; it != layers.end(); it++)
1278 LayerData &ldTop = it->second;
1279 Ptr<Layer> layerTop = ldTop.layerInstance;
1280 if (!layerTop->supportBackend(preferableBackend))
1282 // Move base iterator to layer that don't support preferable
1283 // backend to prevent fusion over layer of different backend.
1287 // Try to do layers fusion.
1288 LayerData &ldBot = baseIt->second;
1289 Ptr<Layer> layerBot = ldBot.layerInstance;
1290 // 1. Check that bottom and top from the same backends.
1291 if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
1293 // 2. Check that current layer works in-place.
1294 bool inPlace = ldTop.inputBlobs.size() == 1 &&
1295 ldBot.outputBlobs.size() == 1 &&
1296 ldTop.inputBlobs[0]->data ==
1297 ldBot.outputBlobs[0].data;
1300 // 3. Try to attach node.
1301 CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
1302 Ptr<BackendNode> fusedNode =
1303 layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
1304 if (!fusedNode.empty())
1307 ldBot.backendNodes[preferableBackend] = fusedNode;
1308 ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
1313 // No layers fusion.
1315 ldTop.backendNodes[DNN_BACKEND_HALIDE] =
1316 layerTop->initHalide(ldTop.inputBlobsWrappers);
1321 #ifdef HAVE_INF_ENGINE
1322 // Before launching Inference Engine graph we need to specify output blobs.
1323 // This function requests output blobs based on inputs references of
1324 // layers from default backend or layers from different graphs.
1325 void addInfEngineNetOutputs(LayerData &ld)
1327 Ptr<InfEngineBackendNet> layerNet;
1328 if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
1330 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1333 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1334 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1335 layerNet = ieNode->net;
1338 // For an every input reference we check that it belongs to one of
1339 // the Inference Engine backend graphs. Request an output blob if it is.
1340 // Do nothing if layer's input is from the same graph.
1341 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1343 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1344 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1345 if (!inpNode.empty())
1347 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1348 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1349 if (layerNet != ieInpNode->net)
1351 // layerNet is empty or nodes are from different graphs.
1352 ieInpNode->net->addOutput(ieInpNode->layer->name);
1357 #endif // HAVE_INF_ENGINE
1359 void initInfEngineBackend()
1361 CV_TRACE_FUNCTION();
1362 CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE, haveInfEngine());
1363 #ifdef HAVE_INF_ENGINE
1364 MapIdToLayerData::iterator it;
1365 Ptr<InfEngineBackendNet> net;
1367 for (it = layers.begin(); it != layers.end(); ++it)
1369 LayerData &ld = it->second;
1372 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1373 (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1374 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1376 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1377 dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1382 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1384 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1385 dataPtr->name = ld.name;
1390 if (skipInfEngineInit)
1392 Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1393 CV_Assert(!node.empty());
1395 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1396 CV_Assert(!ieNode.empty());
1398 for (it = layers.begin(); it != layers.end(); ++it)
1400 LayerData &ld = it->second;
1403 for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1405 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
1406 dataPtr->name = netInputLayer->outNames[i];
1411 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1413 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1414 dataPtr->name = ld.name;
1417 ieNode->net->addBlobs(ld.inputBlobsWrappers);
1418 ieNode->net->addBlobs(ld.outputBlobsWrappers);
1421 layers[lastLayerId].skip = false;
1422 ieNode->net->init(preferableTarget);
1426 // Build Inference Engine networks from sets of layers that support this
1427 // backend. Split a whole model on several Inference Engine networks if
1428 // some of layers is not implemented.
1430 // Set of all input and output blobs wrappers for current network.
1431 std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
1432 for (it = layers.begin(); it != layers.end(); ++it)
1434 LayerData &ld = it->second;
1435 if (ld.id == 0 && ld.skip)
1437 bool fused = ld.skip;
1439 Ptr<Layer> layer = ld.layerInstance;
1440 if (!fused && !layer->supportBackend(preferableBackend))
1442 addInfEngineNetOutputs(ld);
1443 net = Ptr<InfEngineBackendNet>();
1444 netBlobsWrappers.clear();
1445 layer->preferableTarget = DNN_TARGET_CPU;
1448 ld.skip = true; // Initially skip all Inference Engine supported layers.
1450 // Create a new network if one of inputs from different Inference Engine graph.
1451 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1453 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1454 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1455 if (!inpNode.empty())
1457 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1458 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1459 if (ieInpNode->net != net)
1461 net = Ptr<InfEngineBackendNet>();
1462 netBlobsWrappers.clear();
1468 // The same blobs wrappers cannot be shared between two Inference Engine
1469 // networks because of explicit references between layers and blobs.
1470 // So we need to rewrap all the external blobs.
1471 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1473 LayerPin inPin = ld.inputBlobsId[i];
1474 auto it = netBlobsWrappers.find(inPin);
1475 if (it == netBlobsWrappers.end())
1477 ld.inputBlobsWrappers[i] = InfEngineBackendWrapper::create(ld.inputBlobsWrappers[i]);
1478 netBlobsWrappers[inPin] = ld.inputBlobsWrappers[i];
1481 ld.inputBlobsWrappers[i] = it->second;
1483 netBlobsWrappers[LayerPin(ld.id, 0)] = ld.outputBlobsWrappers[0];
1485 Ptr<BackendNode> node;
1490 bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
1491 ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
1493 node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
1494 ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
1498 net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
1502 node = layer->initInfEngine(ld.inputBlobsWrappers);
1504 else if (node.empty())
1507 CV_Assert(!node.empty());
1508 ld.backendNodes[preferableBackend] = node;
1510 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1511 CV_Assert(!ieNode.empty());
1514 auto weightableLayer = std::dynamic_pointer_cast<InferenceEngine::WeightableLayer>(ieNode->layer);
1515 if ((preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD) && !fused)
1517 ieNode->layer->precision = InferenceEngine::Precision::FP16;
1518 if (weightableLayer)
1520 if (weightableLayer->_weights)
1521 weightableLayer->_weights = convertFp16(weightableLayer->_weights);
1522 if (weightableLayer->_biases)
1523 weightableLayer->_biases = convertFp16(weightableLayer->_biases);
1527 for (const auto& weights : {"weights", "biases"})
1529 auto it = ieNode->layer->blobs.find(weights);
1530 if (it != ieNode->layer->blobs.end())
1531 it->second = convertFp16(it->second);
1535 if (weightableLayer)
1537 if (weightableLayer->_weights)
1538 weightableLayer->blobs["weights"] = weightableLayer->_weights;
1539 if (weightableLayer->_biases)
1540 weightableLayer->blobs["biases"] = weightableLayer->_biases;
1542 ieNode->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers);
1543 net->addBlobs(ld.inputBlobsWrappers);
1544 net->addBlobs(ld.outputBlobsWrappers);
1547 net->addLayer(ieNode->layer);
1548 addInfEngineNetOutputs(ld);
1551 // Initialize all networks.
1552 std::set<InfEngineBackendNet> initializedNets;
1553 for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
1555 LayerData &ld = it->second;
1556 if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
1559 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1563 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1567 CV_Assert(!ieNode->net.empty());
1569 if (!ieNode->net->isInitialized())
1571 ieNode->net->init(preferableTarget);
1575 #endif // HAVE_INF_ENGINE
1578 void allocateLayer(int lid, const LayersShapesMap& layersShapes)
1580 CV_TRACE_FUNCTION();
1582 LayerData &ld = layers[lid];
1588 size_t ninputs = ld.inputBlobsId.size();
1590 printf("layer %s:", ld.name.c_str());
1591 for (size_t i = 0; i < ninputs; i++)
1593 int inp_lid = ld.inputBlobsId[i].lid;
1594 LayerData &inp_ld = layers[inp_lid];
1595 int inp_outputs = (int)inp_ld.outputBlobs.size();
1596 std::cout << " " << inp_ld.name << "(" << inp_outputs;
1598 for( int j = 0; j < inp_outputs; j++ )
1600 std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
1607 //determine parent layers
1608 for (size_t i = 0; i < ninputs; i++)
1609 ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
1612 for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
1613 allocateLayer(*i, layersShapes);
1616 if (ld.id == 0) // DataLayer
1618 ninputs = netInputLayer->inputsData.size();
1619 ld.inputBlobsWrappers.resize(ninputs);
1620 for (size_t i = 0; i < ninputs; i++)
1622 ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
1627 ld.inputBlobs.resize(ninputs);
1628 ld.inputBlobsWrappers.resize(ninputs);
1629 for (size_t i = 0; i < ninputs; i++)
1631 LayerPin from = ld.inputBlobsId[i];
1632 CV_Assert(from.valid());
1633 CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
1634 ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
1635 ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
1639 LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
1641 CV_Assert(layerShapesIt != layersShapes.end());
1643 std::vector<LayerPin> pinsForInternalBlobs;
1644 blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
1645 preferableBackend == DNN_BACKEND_OPENCV &&
1646 preferableTarget == DNN_TARGET_OPENCL_FP16);
1647 ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
1648 for (int i = 0; i < ld.outputBlobs.size(); ++i)
1650 ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
1652 ld.internalBlobsWrappers.resize(ld.internals.size());
1653 for (int i = 0; i < ld.internals.size(); ++i)
1655 ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
1658 Ptr<Layer> layerPtr = ld.getLayerInstance();
1660 std::vector<Mat> inps(ld.inputBlobs.size());
1661 for (int i = 0; i < ld.inputBlobs.size(); ++i)
1663 inps[i] = *ld.inputBlobs[i];
1665 layerPtr->finalize(inps, ld.outputBlobs);
1666 layerPtr->preferableTarget = preferableTarget;
1668 std::cout << "\toutputs:";
1669 size_t noutputs = ld.outputBlobs.size();
1670 for (size_t j = 0; j < noutputs; j++)
1672 std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
1678 // After allocation of layer, we decrease counters to it's input blobs.
1679 blobManager.releaseReferences(ld.inputBlobsId);
1680 blobManager.releaseReferences(pinsForInternalBlobs);
1686 #define printf_(args) printf args
1688 #define printf_(args)
1691 void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
1693 if( !fusion || preferableBackend != DNN_BACKEND_OPENCV &&
1694 preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)
1697 CV_TRACE_FUNCTION();
1699 // scan through all the layers. If there is convolution layer followed by the activation layer,
1700 // we try to embed this activation into the convolution and disable separate execution of the activation
1701 std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
1702 blobsToKeep_.end());
1703 MapIdToLayerData::iterator it;
1704 for (it = layers.begin(); it != layers.end(); it++)
1706 int lid = it->first;
1707 LayerData& ld = layers[lid];
1710 printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
1713 printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
1715 // the optimization #1. try to fuse batch norm, scaling and/or activation layers
1716 // with the current layer if they follow it. Normally, the are fused with the convolution layer,
1717 // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
1718 // some other layers.
1719 Ptr<Layer>& currLayer = ld.layerInstance;
1720 if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
1722 LayerData* nextData = &layers[ld.consumers[0].lid];
1723 LayerPin lpNext(ld.consumers[0].lid, 0);
1726 Ptr<Layer> nextLayer = nextData->layerInstance;
1727 if (currLayer->tryFuse(nextLayer))
1729 printf_(("\tfused with %s\n", nextLayer->name.c_str()));
1730 nextData->skip = true;
1731 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1732 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
1733 if (nextData->consumers.size() == 1)
1735 int nextLayerId = nextData->consumers[0].lid;
1736 nextData = &layers[nextLayerId];
1737 lpNext = LayerPin(nextLayerId, 0);
1749 if (preferableBackend != DNN_BACKEND_OPENCV)
1750 continue; // Go to the next layer.
1752 // TODO: OpenCL target support more fusion styles.
1753 if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
1754 (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
1755 ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
1756 ld.layerInstance->type != "Concat")) )
1761 // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
1762 if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
1763 nextData->type != "ReLU" &&
1764 nextData->type != "ChannelsPReLU" &&
1765 nextData->type != "ReLU6" &&
1766 nextData->type != "TanH" &&
1767 nextData->type != "Power")
1770 Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
1771 if (nextActivLayer.empty())
1774 if (currLayer->setActivation(nextActivLayer))
1776 printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
1777 nextData->skip = true;
1778 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1779 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
1780 if (nextData->consumers.size() == 1)
1782 int nextLayerId = nextData->consumers[0].lid;
1783 nextData = &layers[nextLayerId];
1784 lpNext = LayerPin(nextLayerId, 0);
1796 // fuse convolution layer followed by eltwise + relu
1797 if ( IS_DNN_OPENCL_TARGET(preferableTarget) && ld.layerInstance->type == "Convolution" )
1799 Ptr<EltwiseLayer> nextEltwiseLayer;
1801 nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
1803 if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
1804 nextData && nextData->inputBlobsId.size() == 2 )
1806 LayerData *eltwiseData = nextData;
1808 // Eltwise layer has two inputs. We need to determine which
1809 // is a base convolution layer and which could be used as it's bias.
1810 LayerData* biasLayerData = 0;
1811 for (int i = 0; i < 2; ++i)
1813 LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
1814 CV_Assert(downLayerData);
1815 while (downLayerData->skip)
1817 if (downLayerData->inputBlobsId.size() == 1)
1818 downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
1825 if (downLayerData && ld.id == downLayerData->id)
1827 biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
1831 CV_Assert(biasLayerData);
1833 if( eltwiseData->consumers.size() == 1 )
1835 // fuse eltwise + activation layer
1836 if (biasLayerData->id < ld.id)
1838 nextData = &layers[eltwiseData->consumers[0].lid];
1839 lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
1840 Ptr<ActivationLayer> nextActivLayer;
1842 nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
1844 if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
1845 (!nextData->type.compare("ReLU") ||
1846 !nextData->type.compare("ChannelsPReLU") ||
1847 !nextData->type.compare("Power")) &&
1848 currLayer->setActivation(nextActivLayer) )
1850 CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
1851 ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
1852 printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
1853 printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
1854 eltwiseData->skip = true;
1855 nextData->skip = true;
1856 // This optimization for cases like
1862 // This way all the element-wise computations
1863 // (i.e. some_layer+conv or some_layer*conv)
1864 // would be done at [conv] layer. So we need to
1865 // replace [conv]'s output blob to [eltwise]'s one
1866 // considering that [activ] is an in-place layer.
1867 // Also we need to move all the consumers' references.
1868 // To prevent memory collisions (i.e. when input of
1869 // [conv] and output of [eltwise] is the same blob)
1870 // we allocate a new blob.
1871 CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
1872 ld.outputBlobs[0] = ld.outputBlobs[0].clone();
1873 ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
1875 eltwiseData->outputBlobs = ld.outputBlobs;
1876 nextData->outputBlobs = ld.outputBlobs;
1877 eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
1878 nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
1880 // Move references of [activ] layer consumers to the newly allocated blob.
1881 for (int i = 0; i < nextData->consumers.size(); ++i)
1883 LayerData& consumer = layers[nextData->consumers[i].lid];
1884 for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
1886 if (consumer.inputBlobsId[j].lid == lpNext.lid)
1888 consumer.inputBlobs[j] = &ld.outputBlobs[0];
1889 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
1902 if (preferableBackend != DNN_BACKEND_OPENCV)
1903 continue; // Go to the next layer.
1905 // the optimization #2. if there is no layer that takes max pooling layer's computed
1906 // max indices (and only some semantical segmentation networks might need this;
1907 // many others only take the maximum values), then we switch the max pooling
1908 // layer to the faster operating mode.
1909 Ptr<PoolingLayer> poolingLayer = ld.layerInstance.dynamicCast<PoolingLayer>();
1910 if( !poolingLayer.empty() && !ld.consumers.empty() )
1912 size_t i = 0, nconsumers = ld.consumers.size();
1913 for( ; i < nconsumers; i++ )
1914 if( ld.consumers[i].oid > 0 )
1916 // if there is no layer that takes the second output pin of the pooling layer
1917 // on input then we don't need to compute the indices
1918 if( i >= nconsumers )
1920 poolingLayer->computeMaxIdx = false;
1921 printf_(("\tsimplified pooling layer %s\n", poolingLayer->name.c_str()));
1925 // the optimization #3. if there is concat layer that concatenates channels
1926 // from the inputs together (i.e. axis == 1) then we make the inputs of
1927 // the concat layer to write to the concatenation output buffer
1928 // (and so we eliminate the concatenation layer, because the channels
1929 // are concatenated implicitly).
1930 Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
1931 if( !concatLayer.empty() && concatLayer->axis == 1 && !concatLayer->padding &&
1932 ld.outputBlobs.size() == 1 )
1934 Mat& output = ld.outputBlobs[0];
1936 if (!ld.outputBlobsWrappers.empty() &&
1937 (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
1939 size_t i, ninputs = ld.inputBlobsId.size();
1940 bool conv_layer = true;
1941 for( i = 0; i < ninputs; i++ )
1943 LayerPin pin = ld.inputBlobsId[i];
1944 LayerData* inp_i_data = &layers[pin.lid];
1945 while(inp_i_data->skip &&
1946 inp_i_data->inputBlobsId.size() == 1 &&
1947 inp_i_data->consumers.size() == 1)
1949 pin = inp_i_data->inputBlobsId[0];
1950 inp_i_data = &layers[pin.lid];
1952 conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
1956 std::vector<UMat> umat_outputBlobs;
1957 umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
1958 umat_output = umat_outputBlobs[0];
1961 // TODO: in general, this optimization can always be done, but
1962 // many layers currently check that the input/output blobs are
1963 // continuous arrays. Unfortunately, this is not true when
1964 // the concatenation optimization is applied with batch_size > 1.
1965 // so, for now, we only apply this optimization in the most popular
1966 // case batch_size == 1.
1967 if( output.dims == 4 && output.size[0] == 1 )
1969 size_t i, ninputs = ld.inputBlobsId.size();
1970 std::vector<LayerPin> realinputs(ninputs);
1971 for( i = 0; i < ninputs; i++ )
1973 LayerPin pin = ld.inputBlobsId[i];
1974 LayerData* inp_i_data = &layers[pin.lid];
1975 while(inp_i_data->skip &&
1976 inp_i_data->inputBlobsId.size() == 1 &&
1977 inp_i_data->consumers.size() == 1)
1979 pin = inp_i_data->inputBlobsId[0];
1980 inp_i_data = &layers[pin.lid];
1982 printf_(("\treal input for %s is %s\n",
1983 layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
1984 inp_i_data->getLayerInstance()->name.c_str()));
1986 if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
1988 realinputs[i] = pin;
1993 // Allocate new memory to prevent collisions during memory
1994 // reusing (see https://github.com/opencv/opencv/pull/10456).
1995 output = output.clone();
1996 if (preferableBackend == DNN_BACKEND_OPENCV &&
1997 IS_DNN_OPENCL_TARGET(preferableTarget))
1999 std::vector<UMat> umats(1);
2000 umat_output = umat_output.clone();
2001 umats[0] = umat_output;
2002 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
2004 Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
2006 for( i = 0; i < ninputs; i++ )
2008 LayerPin pin = realinputs[i];
2009 LayerData* inp_i_data = &layers[pin.lid];
2010 int channels_i = ld.inputBlobs[i]->size[1];
2011 chrange[1] = Range(ofs, ofs + channels_i);
2012 printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
2013 pin.oid, ofs, ofs + channels_i));
2015 Mat output_slice = output(chrange);
2016 Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
2017 CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
2018 Mat* oldPtr = &curr_output;
2019 curr_output = output_slice;
2020 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2022 std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
2023 umats[pin.oid] = umat_output(chrange);
2024 OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
2026 // Layers that refer old input Mat will refer to the
2027 // new data but the same Mat object.
2028 CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
2031 printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
2038 void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
2040 CV_TRACE_FUNCTION();
2042 MapIdToLayerData::iterator it;
2043 for (it = layers.begin(); it != layers.end(); it++)
2044 it->second.flag = 0;
2046 CV_Assert(!layers[0].outputBlobs.empty());
2047 ShapesVec inputShapes;
2048 for(int i = 0; i < layers[0].outputBlobs.size(); i++)
2050 Mat& inp = layers[0].outputBlobs[i];
2051 CV_Assert(inp.total());
2052 if (preferableBackend == DNN_BACKEND_OPENCV &&
2053 preferableTarget == DNN_TARGET_OPENCL_FP16)
2055 layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
2057 inputShapes.push_back(shape(inp));
2059 LayersShapesMap layersShapes;
2060 getLayersShapes(inputShapes, layersShapes);
2062 blobManager.reset();
2063 backendWrappers.clear();
2064 // Fake references to input blobs.
2065 for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
2066 blobManager.addReference(LayerPin(0, i));
2067 for (it = layers.begin(); it != layers.end(); ++it)
2069 const LayerData& ld = it->second;
2070 blobManager.addReferences(ld.inputBlobsId);
2073 for (int i = 0; i < blobsToKeep_.size(); i++)
2075 blobManager.addReference(blobsToKeep_[i]);
2078 for (it = layers.begin(); it != layers.end(); it++)
2080 int lid = it->first;
2081 allocateLayer(lid, layersShapes);
2084 layersTimings.resize(lastLayerId + 1, 0);
2085 fuseLayers(blobsToKeep_);
2088 void forwardLayer(LayerData &ld)
2090 CV_TRACE_FUNCTION();
2092 Ptr<Layer> layer = ld.layerInstance;
2099 std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
2100 if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
2102 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2104 std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
2105 std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2106 std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
2107 layer->forward(umat_inputBlobs,
2109 umat_internalBlobs);
2110 if (DNN_CHECK_NAN_INF)
2113 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2115 UMat& u = umat_outputBlobs[i];
2117 if (u.depth() == CV_16S) // FP16
2120 m = u.getMat(ACCESS_READ);
2123 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2124 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2127 else if (!checkRange(m, true, NULL, -1e6, 1e6))
2129 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2130 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2136 for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
2138 UMat& u = umat_inputBlobs[i];
2140 if (u.depth() == CV_16S) // FP16
2143 m = u.getMat(ACCESS_READ);
2144 std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2145 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2147 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2149 UMat& u = umat_outputBlobs[i];
2151 if (u.depth() == CV_16S) // FP16
2154 m = u.getMat(ACCESS_READ);
2155 std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2156 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2158 for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
2160 UMat& u = umat_internalBlobs[i];
2162 if (u.depth() == CV_16S) // FP16
2165 m = u.getMat(ACCESS_READ);
2166 std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
2167 if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
2169 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2173 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
2177 for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
2179 if (!ld.inputBlobsWrappers[i].empty())
2180 ld.inputBlobsWrappers[i]->copyToHost();
2183 std::vector<Mat> inps(ld.inputBlobs.size());
2184 for (int i = 0; i < ld.inputBlobs.size(); ++i)
2186 inps[i] = *ld.inputBlobs[i];
2188 layer->forward(inps, ld.outputBlobs, ld.internals);
2190 if (DNN_CHECK_NAN_INF)
2193 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2195 const Mat& m = ld.outputBlobs[i];
2198 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2199 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2202 else if (!checkRange(m, true, NULL, -1e6, 1e6))
2204 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2205 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2211 for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
2213 const Mat* pM = ld.inputBlobs[i];
2216 std::cout << "INPUT " << i << " is NULL" << std::endl;
2220 std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2221 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2223 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2225 const Mat& m = ld.outputBlobs[i];
2226 std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2227 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2229 for (size_t i = 0; i < ld.internals.size(); ++i)
2231 const Mat& m = ld.internals[i];
2232 std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2233 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2235 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2240 for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
2242 if (!ld.outputBlobsWrappers[i].empty())
2243 ld.outputBlobsWrappers[i]->setHostDirty();
2249 Ptr<BackendNode> node = it->second;
2250 CV_Assert(!node.empty());
2251 if (preferableBackend == DNN_BACKEND_HALIDE)
2253 forwardHalide(ld.outputBlobsWrappers, node);
2255 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
2257 forwardInfEngine(node);
2261 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
2269 layersTimings[ld.id] = tm.getTimeTicks();
2274 void forwardToLayer(LayerData &ld, bool clearFlags = true)
2276 CV_TRACE_FUNCTION();
2280 MapIdToLayerData::iterator it;
2281 for (it = layers.begin(); it != layers.end(); it++)
2282 it->second.flag = 0;
2285 //already was forwarded
2290 MapIdToLayerData::iterator it;
2291 for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
2293 LayerData &ld = it->second;
2305 CV_TRACE_FUNCTION();
2307 MapIdToLayerData::reverse_iterator last_layer = layers.rbegin();
2308 CV_Assert(last_layer != layers.rend());
2309 forwardToLayer(last_layer->second, true);
2312 void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
2314 std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
2316 if (inOutShapes[id].in.empty())
2318 for(int i = 0; i < inputLayerIds.size(); i++)
2320 int layerId = inputLayerIds[i].lid;
2321 LayersShapesMap::iterator it =
2322 inOutShapes.find(layerId);
2323 if(it == inOutShapes.end() ||
2324 it->second.out.empty())
2326 getLayerShapesRecursively(layerId, inOutShapes);
2328 const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
2329 inOutShapes[id].in.push_back(shape);
2332 const ShapesVec& is = inOutShapes[id].in;
2333 ShapesVec& os = inOutShapes[id].out;
2334 ShapesVec& ints = inOutShapes[id].internal;
2335 int requiredOutputs = layers[id].requiredOutputs.size();
2336 inOutShapes[id].supportInPlace =
2337 layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints);
2340 void getLayersShapes(const ShapesVec& netInputShapes,
2341 LayersShapesMap& inOutShapes)
2343 inOutShapes.clear();
2345 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
2346 for (MapIdToLayerData::iterator it = layers.begin();
2347 it != layers.end(); it++)
2349 getLayerShapesRecursively(it->first, inOutShapes);
2353 void getLayerShapes(const ShapesVec& netInputShapes,
2355 LayerShapes& shapes)
2357 LayersShapesMap inOutShapes;
2358 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
2359 getLayerShapesRecursively(layerId, inOutShapes);
2360 shapes = inOutShapes[layerId];
2363 LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
2365 return *std::max_element(pins.begin(), pins.end());
2368 Mat getBlob(const LayerPin& pin)
2370 CV_TRACE_FUNCTION();
2373 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
2375 LayerData &ld = layers[pin.lid];
2376 if ((size_t)pin.oid >= ld.outputBlobs.size())
2378 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
2379 "the #%d was requested", ld.name.c_str(),
2380 ld.outputBlobs.size(), pin.oid));
2382 if (preferableTarget != DNN_TARGET_CPU)
2384 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
2385 // Transfer data to CPU if it's require.
2386 ld.outputBlobsWrappers[pin.oid]->copyToHost();
2389 if (ld.outputBlobs[pin.oid].depth() == CV_16S)
2391 convertFp16(ld.outputBlobs[pin.oid], output_blob);
2395 return ld.outputBlobs[pin.oid];
2398 Mat getBlob(String outputName)
2400 return getBlob(getPinByAlias(outputName));
2404 Net::Net() : impl(new Net::Impl)
2408 Net Net::readFromModelOptimizer(const String& xml, const String& bin)
2410 #ifndef HAVE_INF_ENGINE
2411 CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
2413 InferenceEngine::CNNNetReader reader;
2414 reader.ReadNetwork(xml);
2415 reader.ReadWeights(bin);
2417 InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
2419 std::vector<String> inputsNames;
2420 for (auto& it : ieNet.getInputsInfo())
2422 inputsNames.push_back(it.first);
2426 cvNet.setInputsNames(inputsNames);
2428 Ptr<InfEngineBackendNode> backendNode(new InfEngineBackendNode(0));
2429 backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
2430 for (auto& it : ieNet.getOutputsInfo())
2432 Ptr<Layer> cvLayer(new InfEngineBackendLayer(it.second));
2433 InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
2437 int lid = cvNet.addLayer(it.first, "", lp);
2439 LayerData& ld = cvNet.impl->layers[lid];
2440 cvLayer->name = it.first;
2441 cvLayer->type = ieLayer->type;
2442 ld.layerInstance = cvLayer;
2443 ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
2445 for (int i = 0; i < inputsNames.size(); ++i)
2446 cvNet.connect(0, i, lid, i);
2448 cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
2450 cvNet.impl->skipInfEngineInit = true;
2452 #endif // HAVE_INF_ENGINE
2459 int Net::addLayer(const String &name, const String &type, LayerParams ¶ms)
2461 CV_TRACE_FUNCTION();
2463 if (impl->getLayerId(name) >= 0)
2465 CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
2469 int id = ++impl->lastLayerId;
2470 impl->layerNameToId.insert(std::make_pair(name, id));
2471 impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
2476 int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms)
2478 CV_TRACE_FUNCTION();
2480 int prvLid = impl->lastLayerId;
2481 int newLid = this->addLayer(name, type, params);
2482 this->connect(prvLid, 0, newLid, 0);
2486 void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
2488 CV_TRACE_FUNCTION();
2490 impl->connect(outLayerId, outNum, inpLayerId, inpNum);
2493 void Net::connect(String _outPin, String _inPin)
2495 CV_TRACE_FUNCTION();
2497 LayerPin outPin = impl->getPinByAlias(_outPin);
2498 LayerPin inpPin = impl->getPinByAlias(_inPin);
2500 CV_Assert(outPin.valid() && inpPin.valid());
2502 impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
2505 Mat Net::forward(const String& outputName)
2507 CV_TRACE_FUNCTION();
2509 String layerName = outputName;
2511 if (layerName.empty())
2512 layerName = getLayerNames().back();
2514 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2515 impl->setUpNet(pins);
2516 impl->forwardToLayer(impl->getLayerData(layerName));
2518 return impl->getBlob(layerName);
2521 void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
2523 CV_TRACE_FUNCTION();
2525 String layerName = outputName;
2527 if (layerName.empty())
2528 layerName = getLayerNames().back();
2530 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2531 impl->setUpNet(pins);
2532 impl->forwardToLayer(impl->getLayerData(layerName));
2534 LayerPin pin = impl->getPinByAlias(layerName);
2535 LayerData &ld = impl->layers[pin.lid];
2537 if (outputBlobs.isUMat())
2539 impl->getBlob(layerName).copyTo(outputBlobs);
2541 else if (outputBlobs.isMat())
2543 outputBlobs.assign(impl->getBlob(layerName));
2545 else if (outputBlobs.isMatVector())
2547 if (impl->preferableTarget != DNN_TARGET_CPU)
2549 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2551 CV_Assert(!ld.outputBlobsWrappers[i].empty());
2552 ld.outputBlobsWrappers[i]->copyToHost();
2555 if (ld.outputBlobs[0].depth() == CV_32F)
2557 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2558 outputvec = ld.outputBlobs;
2560 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2561 outputvec.resize(ld.outputBlobs.size());
2562 for (int i = 0; i < outputvec.size(); i++)
2563 convertFp16(ld.outputBlobs[i], outputvec[i]);
2566 else if (outputBlobs.isUMatVector())
2568 std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
2570 if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
2571 IS_DNN_OPENCL_TARGET(impl->preferableTarget))
2573 if (impl->preferableTarget == DNN_TARGET_OPENCL)
2574 outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2575 else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
2577 std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2578 outputvec.resize(out_vec.size());
2579 for (int i = 0; i < out_vec.size(); i++)
2580 convertFp16(out_vec[i], outputvec[i]);
2585 outputvec.resize(ld.outputBlobs.size());
2586 for (int i = 0; i < outputvec.size(); ++i)
2587 ld.outputBlobs[i].copyTo(outputvec[i]);
2592 void Net::forward(OutputArrayOfArrays outputBlobs,
2593 const std::vector<String>& outBlobNames)
2595 CV_TRACE_FUNCTION();
2597 std::vector<LayerPin> pins;
2598 for (int i = 0; i < outBlobNames.size(); i++)
2600 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
2603 impl->setUpNet(pins);
2605 LayerPin out = impl->getLatestLayerPin(pins);
2607 impl->forwardToLayer(impl->getLayerData(out.lid));
2609 std::vector<Mat> matvec;
2610 for (int i = 0; i < pins.size(); i++)
2612 matvec.push_back(impl->getBlob(pins[i]));
2615 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2619 void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
2620 const std::vector<String>& outBlobNames)
2622 CV_TRACE_FUNCTION();
2624 std::vector<LayerPin> pins;
2625 for (int i = 0; i < outBlobNames.size(); i++)
2627 std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
2628 pins.insert(pins.end(), lp.begin(), lp.end());
2631 impl->setUpNet(pins);
2633 LayerPin out = impl->getLatestLayerPin(pins);
2635 impl->forwardToLayer(impl->getLayerData(out.lid));
2637 outputBlobs.resize(outBlobNames.size());
2638 for (int i = 0; i < outBlobNames.size(); i++)
2640 std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
2641 for (int i = 0; i < lp.size(); i++)
2643 outputBlobs[i].push_back(impl->getBlob(lp[i]));
2648 void Net::setPreferableBackend(int backendId)
2650 CV_TRACE_FUNCTION();
2651 CV_TRACE_ARG(backendId);
2653 if( impl->preferableBackend != backendId )
2655 impl->preferableBackend = backendId;
2656 impl->netWasAllocated = false;
2661 void Net::setPreferableTarget(int targetId)
2663 CV_TRACE_FUNCTION();
2664 CV_TRACE_ARG(targetId);
2666 if( impl->preferableTarget != targetId )
2668 impl->preferableTarget = targetId;
2669 if (IS_DNN_OPENCL_TARGET(targetId))
2672 #ifdef HAVE_INF_ENGINE
2673 if (impl->preferableBackend == DNN_BACKEND_OPENCV)
2675 if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
2676 impl->preferableBackend == DNN_BACKEND_OPENCV)
2677 #endif // HAVE_INF_ENGINE
2678 impl->preferableTarget = DNN_TARGET_CPU;
2680 bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
2681 if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
2682 impl->preferableTarget = DNN_TARGET_OPENCL;
2685 impl->netWasAllocated = false;
2690 void Net::setInputsNames(const std::vector<String> &inputBlobNames)
2692 CV_TRACE_FUNCTION();
2694 impl->netInputLayer->setNames(inputBlobNames);
2697 void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
2699 CV_TRACE_FUNCTION();
2700 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
2704 pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
2707 CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
2709 LayerData &ld = impl->layers[pin.lid];
2710 const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
2711 ld.outputBlobs.resize(numInputs);
2712 ld.outputBlobsWrappers.resize(numInputs);
2713 impl->netInputLayer->inputsData.resize(numInputs);
2714 impl->netInputLayer->scaleFactors.resize(numInputs);
2715 impl->netInputLayer->means.resize(numInputs);
2717 MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
2718 Mat blob_ = blob.getMat();
2719 bool oldShape = prevShape == shape(blob_);
2722 blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
2726 ld.outputBlobs[pin.oid] = blob_.clone();
2727 impl->netInputLayer->inputsData[pin.oid] = ld.outputBlobs[pin.oid];
2730 if (!ld.outputBlobsWrappers[pin.oid].empty())
2732 ld.outputBlobsWrappers[pin.oid]->setHostDirty();
2734 impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
2735 impl->netInputLayer->means[pin.oid] = mean;
2736 impl->netWasAllocated = impl->netWasAllocated && oldShape;
2739 Mat Net::getParam(LayerId layer, int numParam)
2741 LayerData &ld = impl->getLayerData(layer);
2742 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
2743 CV_Assert(numParam < (int)layerBlobs.size());
2744 return layerBlobs[numParam];
2747 void Net::setParam(LayerId layer, int numParam, const Mat &blob)
2749 LayerData &ld = impl->getLayerData(layer);
2751 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
2752 CV_Assert(numParam < (int)layerBlobs.size());
2753 //we don't make strong checks, use this function carefully
2754 layerBlobs[numParam] = blob;
2757 int Net::getLayerId(const String &layer)
2759 return impl->getLayerId(layer);
2762 Ptr<Layer> Net::getLayer(LayerId layerId)
2764 LayerData &ld = impl->getLayerData(layerId);
2765 return ld.getLayerInstance();
2768 std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
2770 LayerData &ld = impl->getLayerData(layerId);
2771 if (!ld.layerInstance)
2772 CV_Error(Error::StsNullPtr, format("Requested layer \"%s\" was not initialized", ld.name.c_str()));
2774 std::vector<Ptr<Layer> > inputLayers;
2775 inputLayers.reserve(ld.inputLayersId.size());
2776 std::set<int>::iterator it;
2777 for (it = ld.inputLayersId.begin(); it != ld.inputLayersId.end(); ++it) {
2778 inputLayers.push_back(getLayer(*it));
2783 std::vector<String> Net::getLayerNames() const
2785 std::vector<String> res;
2786 res.reserve(impl->layers.size());
2788 Impl::MapIdToLayerData::iterator it;
2789 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
2791 if (it->second.id) //skip Data layer
2792 res.push_back(it->second.name);
2798 bool Net::empty() const
2800 return impl->layers.size() <= 1; //first layer is default Data layer
2803 std::vector<int> Net::getUnconnectedOutLayers() const
2805 std::vector<int> layersIds;
2807 Impl::MapIdToLayerData::iterator it;
2808 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
2810 int lid = it->first;
2811 LayerData &ld = it->second;
2813 if (ld.requiredOutputs.size() == 0)
2814 layersIds.push_back(lid);
2820 std::vector<String> Net::getUnconnectedOutLayersNames() const
2822 std::vector<int> ids = getUnconnectedOutLayers();
2823 const size_t n = ids.size();
2824 std::vector<String> names(n);
2825 for (size_t i = 0; i < n; ++i)
2827 names[i] = impl->layers[ids[i]].name;
2832 void Net::getLayersShapes(const ShapesVec& netInputShapes,
2833 std::vector<int>& layersIds,
2834 std::vector<ShapesVec>& inLayersShapes,
2835 std::vector<ShapesVec>& outLayersShapes) const
2838 inLayersShapes.clear();
2839 outLayersShapes.clear();
2841 Impl::LayersShapesMap inOutShapes;
2842 impl->getLayersShapes(netInputShapes, inOutShapes);
2844 for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
2845 it != inOutShapes.end(); it++)
2847 layersIds.push_back(it->first);
2848 inLayersShapes.push_back(it->second.in);
2849 outLayersShapes.push_back(it->second.out);
2853 void Net::getLayersShapes(const MatShape& netInputShape,
2854 std::vector<int>& layerIds,
2855 std::vector<ShapesVec>& inLayersShapes,
2856 std::vector<ShapesVec>& outLayersShapes) const
2858 getLayersShapes(ShapesVec(1, netInputShape),
2859 layerIds, inLayersShapes, outLayersShapes);
2862 void Net::getLayerShapes(const MatShape& netInputShape,
2864 ShapesVec& inLayerShapes,
2865 ShapesVec& outLayerShapes) const
2867 getLayerShapes(ShapesVec(1, netInputShape),
2868 layerId, inLayerShapes, outLayerShapes);
2872 void Net::getLayerShapes(const ShapesVec& netInputShapes,
2874 ShapesVec& inLayerShapes,
2875 ShapesVec& outLayerShapes) const
2878 impl->getLayerShapes(netInputShapes, layerId, shapes);
2879 inLayerShapes = shapes.in;
2880 outLayerShapes = shapes.out;
2883 int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
2885 CV_TRACE_FUNCTION();
2888 std::vector<int> ids;
2889 std::vector<std::vector<MatShape> > inShapes, outShapes;
2890 getLayersShapes(netInputShapes, ids, inShapes, outShapes);
2891 CV_Assert(inShapes.size() == outShapes.size());
2892 CV_Assert(inShapes.size() == ids.size());
2894 for(int i = 0; i < ids.size(); i++)
2896 flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i],
2903 int64 Net::getFLOPS(const MatShape& netInputShape) const
2905 return getFLOPS(std::vector<MatShape>(1, netInputShape));
2908 int64 Net::getFLOPS(const int layerId,
2909 const std::vector<MatShape>& netInputShapes) const
2911 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
2912 CV_Assert(layer != impl->layers.end());
2915 impl->getLayerShapes(netInputShapes, layerId, shapes);
2917 return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
2920 int64 Net::getFLOPS(const int layerId,
2921 const MatShape& netInputShape) const
2923 return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
2926 void Net::getLayerTypes(std::vector<String>& layersTypes) const
2928 layersTypes.clear();
2930 std::map<String, int> layers;
2931 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
2932 it != impl->layers.end(); it++)
2934 if (layers.find(it->second.type) == layers.end())
2935 layers[it->second.type] = 0;
2936 layers[it->second.type]++;
2939 for (std::map<String, int>::iterator it = layers.begin();
2940 it != layers.end(); it++)
2942 layersTypes.push_back(it->first);
2946 int Net::getLayersCount(const String& layerType) const
2949 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
2950 it != impl->layers.end(); it++)
2952 if (it->second.type == layerType)
2958 void Net::getMemoryConsumption(const int layerId,
2959 const std::vector<MatShape>& netInputShapes,
2960 size_t& weights, size_t& blobs) const
2962 CV_TRACE_FUNCTION();
2964 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
2965 CV_Assert(layer != impl->layers.end());
2967 weights = blobs = 0;
2969 for(int i = 0; i < layer->second.params.blobs.size(); i++)
2971 const Mat& weightsBlob = layer->second.params.blobs[i];
2972 weights += weightsBlob.total()*weightsBlob.elemSize();
2975 ShapesVec inLayerShapes, outLayerShapes;
2976 getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
2977 for(int i = 0; i < outLayerShapes.size(); i++)
2979 blobs += total(outLayerShapes[i]) * sizeof(float);
2983 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
2984 size_t& weights, size_t& blobs) const
2986 CV_TRACE_FUNCTION();
2988 std::vector<int> layerIds;
2989 std::vector<size_t> w, b;
2990 getMemoryConsumption(netInputShapes, layerIds, w, b);
2992 weights = blobs = 0;
2993 for(int i = 0; i < layerIds.size(); i++)
3000 void Net::getMemoryConsumption(const int layerId,
3001 const MatShape& netInputShape,
3002 size_t& weights, size_t& blobs) const
3004 getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
3008 void Net::getMemoryConsumption(const MatShape& netInputShape,
3009 size_t& weights, size_t& blobs) const
3011 getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
3015 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
3016 std::vector<int>& layerIds, std::vector<size_t>& weights,
3017 std::vector<size_t>& blobs) const
3019 CV_TRACE_FUNCTION();
3025 std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
3027 getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
3029 for(int i = 0; i < layerIds.size(); i++)
3032 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
3033 CV_Assert(layer != impl->layers.end());
3035 for(int j = 0; j < layer->second.params.blobs.size(); j++)
3037 const Mat& weightsBlob = layer->second.params.blobs[j];
3038 w += weightsBlob.total()*weightsBlob.elemSize();
3041 for(int j = 0; j < outLayerShapes[i].size(); j++)
3043 b += total(outLayerShapes[i][j]) * sizeof(float);
3046 weights.push_back(w);
3051 void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
3052 std::vector<size_t>& weights, std::vector<size_t>& blobs) const
3054 getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
3058 void Net::enableFusion(bool fusion)
3060 if( impl->fusion != fusion )
3062 impl->fusion = fusion;
3063 impl->netWasAllocated = false;
3068 void Net::setHalideScheduler(const String& scheduler)
3070 CV_TRACE_FUNCTION();
3071 CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
3073 impl->halideConfigFile = scheduler;
3076 int64 Net::getPerfProfile(std::vector<double>& timings)
3078 timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
3079 int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0);
3083 //////////////////////////////////////////////////////////////////////////
3085 Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
3087 Layer::Layer(const LayerParams ¶ms)
3088 : blobs(params.blobs), name(params.name), type(params.type)
3090 preferableTarget = DNN_TARGET_CPU;
3093 void Layer::setParamsFrom(const LayerParams ¶ms)
3095 blobs = params.blobs;
3100 int Layer::inputNameToIndex(String)
3105 int Layer::outputNameToIndex(const String&)
3110 bool Layer::supportBackend(int backendId)
3112 return backendId == DNN_BACKEND_OPENCV;
3115 Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
3117 CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type +
3118 " layers is not defined.");
3119 return Ptr<BackendNode>();
3122 Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
3124 CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
3125 " layers is not defined.");
3126 return Ptr<BackendNode>();
3129 void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
3130 const std::vector<Mat> &outputs, int targetId) const
3133 CV_TRACE_FUNCTION();
3135 Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
3136 xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
3137 Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
3139 int outW, outH, outC, outN;
3140 getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
3142 if (targetId == DNN_TARGET_CPU)
3144 if (outW == 1 && outH == 1)
3146 if (outC + outN == 1)
3150 top.split(c, co, ci, 8)
3151 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
3155 top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
3162 top.reorder(x, c, y)
3163 .split(y, yo, yi, 2)
3167 .vectorize(x, outW >= 16 ? 16 : outW);
3171 else if (targetId == DNN_TARGET_OPENCL)
3173 if (outW == 1 && outH == 1)
3175 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
3176 top.split(c, co, ci, c_split)
3177 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
3183 int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
3184 int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
3185 // Supported vectorization widths: 2, 3, 4, 8, 16
3186 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
3187 top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
3188 .split(c, co, ci, c_split)
3189 .gpu_blocks(xo, yo, co)
3190 .gpu_threads(xi, yi)
3191 .reorder(xi, yi, ci, xo, yo, co)
3196 CV_Error(Error::StsNotImplemented, "Unknown target identifier");
3197 #endif // HAVE_HALIDE
3200 Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
3202 return Ptr<BackendNode>();
3205 bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
3206 bool Layer::tryFuse(Ptr<Layer>&) { return false; }
3207 void Layer::getScaleShift(Mat& scale, Mat& shift) const
3213 void Layer::unsetAttached()
3215 setActivation(Ptr<ActivationLayer>());
3218 template <typename T>
3219 static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
3221 pv.resize(v.size());
3222 for (size_t i = 0; i < v.size(); i++)
3223 pv[i] = const_cast<T*>(&v[i]);
3226 void Layer::finalize(const std::vector<Mat> &inputs, std::vector<Mat> &outputs)
3228 CV_TRACE_FUNCTION();
3229 this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
3232 void Layer::finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
3234 CV_UNUSED(input);CV_UNUSED(output);
3237 void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
3239 CV_TRACE_FUNCTION();
3240 std::vector<Mat> inputs, outputs;
3241 inputs_arr.getMatVector(inputs);
3242 outputs_arr.getMatVector(outputs);
3244 std::vector<Mat*> inputsp;
3245 vecToPVec(inputs, inputsp);
3246 this->finalize(inputsp, outputs);
3249 std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
3251 CV_TRACE_FUNCTION();
3253 std::vector<Mat> outputs;
3254 this->finalize(inputs, outputs);
3258 void Layer::forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
3260 // We kept this method for compatibility. DNN calls it now only to support users' implementations.
3263 void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
3265 CV_TRACE_FUNCTION();
3266 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3268 Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
3271 void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
3273 CV_TRACE_FUNCTION();
3274 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3276 if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
3278 std::vector<UMat> inputs;
3279 std::vector<UMat> outputs;
3280 std::vector<UMat> internals;
3282 std::vector<UMat> orig_inputs;
3283 std::vector<UMat> orig_outputs;
3284 std::vector<UMat> orig_internals;
3286 inputs_arr.getUMatVector(orig_inputs);
3287 outputs_arr.getUMatVector(orig_outputs);
3288 internals_arr.getUMatVector(orig_internals);
3290 inputs.resize(orig_inputs.size());
3291 for (size_t i = 0; i < orig_inputs.size(); i++)
3292 convertFp16(orig_inputs[i], inputs[i]);
3294 outputs.resize(orig_outputs.size());
3295 for (size_t i = 0; i < orig_outputs.size(); i++)
3296 outputs[i].create(shape(orig_outputs[i]), CV_32F);
3298 internals.resize(orig_internals.size());
3299 for (size_t i = 0; i < orig_internals.size(); i++)
3300 internals[i].create(shape(orig_internals[i]), CV_32F);
3302 forward(inputs, outputs, internals);
3304 for (size_t i = 0; i < outputs.size(); i++)
3305 convertFp16(outputs[i], orig_outputs[i]);
3307 // sync results back
3308 outputs_arr.assign(orig_outputs);
3309 internals_arr.assign(orig_internals);
3312 std::vector<Mat> inpvec;
3313 std::vector<Mat> outputs;
3314 std::vector<Mat> internals;
3316 inputs_arr.getMatVector(inpvec);
3317 outputs_arr.getMatVector(outputs);
3318 internals_arr.getMatVector(internals);
3320 std::vector<Mat*> inputs(inpvec.size());
3321 for (int i = 0; i < inpvec.size(); i++)
3322 inputs[i] = &inpvec[i];
3324 this->forward(inputs, outputs, internals);
3326 // sync results back
3327 outputs_arr.assign(outputs);
3328 internals_arr.assign(internals);
3331 void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
3333 CV_TRACE_FUNCTION();
3335 this->finalize(inputs, outputs);
3336 this->forward(inputs, outputs, internals);
3341 bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
3342 const int requiredOutputs,
3343 std::vector<MatShape> &outputs,
3344 std::vector<MatShape> &internals) const
3346 CV_Assert(inputs.size());
3347 outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
3351 //////////////////////////////////////////////////////////////////////////
3353 static Mutex& getLayerFactoryMutex()
3355 static Mutex* volatile instance = NULL;
3356 if (instance == NULL)
3358 cv::AutoLock lock(getInitializationMutex());
3359 if (instance == NULL)
3360 instance = new Mutex();
3365 typedef std::map<String, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
3367 static LayerFactory_Impl& getLayerFactoryImpl_()
3369 static LayerFactory_Impl impl;
3373 static LayerFactory_Impl& getLayerFactoryImpl()
3375 static LayerFactory_Impl* volatile instance = NULL;
3376 if (instance == NULL)
3378 cv::AutoLock lock(getLayerFactoryMutex());
3379 if (instance == NULL)
3381 instance = &getLayerFactoryImpl_();
3382 initializeLayerFactory();
3388 void LayerFactory::registerLayer(const String &type, Constructor constructor)
3390 CV_TRACE_FUNCTION();
3391 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3393 cv::AutoLock lock(getLayerFactoryMutex());
3394 String type_ = type.toLowerCase();
3395 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
3397 if (it != getLayerFactoryImpl().end())
3399 if (it->second.back() == constructor)
3400 CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered");
3401 it->second.push_back(constructor);
3403 getLayerFactoryImpl().insert(std::make_pair(type_, std::vector<Constructor>(1, constructor)));
3406 void LayerFactory::unregisterLayer(const String &type)
3408 CV_TRACE_FUNCTION();
3409 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3411 cv::AutoLock lock(getLayerFactoryMutex());
3412 String type_ = type.toLowerCase();
3414 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
3415 if (it != getLayerFactoryImpl().end())
3417 if (it->second.size() > 1)
3418 it->second.pop_back();
3420 getLayerFactoryImpl().erase(it);
3424 Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
3426 CV_TRACE_FUNCTION();
3427 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3429 cv::AutoLock lock(getLayerFactoryMutex());
3430 String type_ = type.toLowerCase();
3431 LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_);
3433 if (it != getLayerFactoryImpl().end())
3435 CV_Assert(!it->second.empty());
3436 return it->second.back()(params);
3440 return Ptr<Layer>(); //NULL
3444 BackendNode::BackendNode(int backendId) : backendId(backendId) {}
3446 BackendNode::~BackendNode() {};
3448 BackendWrapper::BackendWrapper(int backendId, int targetId)
3449 : backendId(backendId), targetId(targetId) {}
3451 BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
3453 CV_Error(Error::StsNotImplemented,
3454 "Constructor of backend wrapper must be implemented");
3457 BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
3459 CV_Error(Error::StsNotImplemented,
3460 "Constructor of backend wrapper must be implemented");
3463 BackendWrapper::~BackendWrapper() {}
3465 Net readNet(const String& _model, const String& _config, const String& _framework)
3467 String framework = _framework.toLowerCase();
3468 String model = _model;
3469 String config = _config;
3470 const std::string modelExt = model.substr(model.rfind('.') + 1);
3471 const std::string configExt = config.substr(config.rfind('.') + 1);
3472 if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
3473 modelExt == "prototxt" || configExt == "prototxt")
3475 if (modelExt == "prototxt" || configExt == "caffemodel")
3476 std::swap(model, config);
3477 return readNetFromCaffe(config, model);
3479 if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
3480 modelExt == "pbtxt" || configExt == "pbtxt")
3482 if (modelExt == "pbtxt" || configExt == "pb")
3483 std::swap(model, config);
3484 return readNetFromTensorflow(model, config);
3486 if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
3487 configExt == "t7" || configExt == "net")
3489 return readNetFromTorch(model.empty() ? config : model);
3491 if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
3492 modelExt == "cfg" || configExt == "cfg")
3494 if (modelExt == "cfg" || configExt == "weights")
3495 std::swap(model, config);
3496 return readNetFromDarknet(config, model);
3498 if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
3499 modelExt == "xml" || configExt == "xml")
3501 if (modelExt == "xml" || configExt == "bin")
3502 std::swap(model, config);
3503 return readNetFromModelOptimizer(config, model);
3505 if (framework == "onnx" || modelExt == "onnx")
3507 return readNetFromONNX(model);
3509 CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
3510 model + (config.empty() ? "" : ", " + config));
3513 Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
3514 const std::vector<uchar>& bufferConfig)
3516 String framework = _framework.toLowerCase();
3517 if (framework == "caffe")
3518 return readNetFromCaffe(bufferConfig, bufferModel);
3519 else if (framework == "tensorflow")
3520 return readNetFromTensorflow(bufferModel, bufferConfig);
3521 else if (framework == "darknet")
3522 return readNetFromDarknet(bufferConfig, bufferModel);
3523 else if (framework == "torch")
3524 CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
3525 else if (framework == "dldt")
3526 CV_Error(Error::StsNotImplemented, "Reading Intel's Model Optimizer models from buffers");
3527 CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
3530 Net readNetFromModelOptimizer(const String &xml, const String &bin)
3532 return Net::readFromModelOptimizer(xml, bin);
3535 CV__DNN_EXPERIMENTAL_NS_END