Merge remote-tracking branch 'upstream/3.4' into merge-3.4

author Alexander Alekhin <alexander.a.alekhin@gmail.com>

Sat, 26 Mar 2022 16:00:11 +0000 (16:00 +0000)

committer Alexander Alekhin <alexander.a.alekhin@gmail.com>

Sat, 26 Mar 2022 16:00:28 +0000 (16:00 +0000)
author Alexander Alekhin <alexander.a.alekhin@gmail.com>
Sat, 26 Mar 2022 16:00:11 +0000 (16:00 +0000)
committer Alexander Alekhin <alexander.a.alekhin@gmail.com>
Sat, 26 Mar 2022 16:00:28 +0000 (16:00 +0000)
diff --cc modules/core/include/opencv2/core/matx.hpp
Simple merge
diff --cc modules/core/include/opencv2/core/operations.hpp
Simple merge
diff --cc modules/core/src/mathfuncs.cpp
Simple merge
diff --cc modules/core/test/test_arithm.cpp
Simple merge
diff --cc modules/core/test/test_mat.cpp

index e32d978,a264330..6c885a4
--- 1/modules/core/test/test_mat.cpp
--- 2/modules/core/test/test_mat.cpp
+++ b/modules/core/test/test_mat.cpp
@@@ -2371,82 -2381,14 +2371,95 @@@ TEST(Mat, ptrVecni_20044
       EXPECT_EQ(int(6), *(ci));
   }
   
++
+ TEST(Mat, VecMatx_4650)
+ {
+   // Makes sure the following compiles.
+   cv::Vec3b a;
+   a = cv::Vec3b::ones();
+   a = cv::Vec3b::zeros();
+   a = cv::Vec3b::randn(0, 10);
+   a = cv::Vec3b::randu(0, 10);
+ }
+ 
++
+ +TEST(Mat, reverse_iterator_19967)
+ +{
+ +    // empty iterator (#16855)
+ +    cv::Mat m_empty;
+ +    EXPECT_NO_THROW(m_empty.rbegin<uchar>());
+ +    EXPECT_NO_THROW(m_empty.rend<uchar>());
+ +    EXPECT_TRUE(m_empty.rbegin<uchar>() == m_empty.rend<uchar>());
+ +
+ +    // 1D test
+ +    std::vector<uchar> data{0, 1, 2, 3};
+ +    const std::vector<int> sizes_1d{4};
+ +
+ +    //Base class
+ +    cv::Mat m_1d(sizes_1d, CV_8U, data.data());
+ +    auto mismatch_it_pair_1d = std::mismatch(data.rbegin(), data.rend(), m_1d.rbegin<uchar>());
+ +    EXPECT_EQ(mismatch_it_pair_1d.first, data.rend());  // expect no mismatch
+ +    EXPECT_EQ(mismatch_it_pair_1d.second, m_1d.rend<uchar>());
+ +
+ +    //Templated derived class
+ +    cv::Mat_<uchar> m_1d_t(static_cast<int>(sizes_1d.size()), sizes_1d.data(), data.data());
+ +    auto mismatch_it_pair_1d_t = std::mismatch(data.rbegin(), data.rend(), m_1d_t.rbegin());
+ +    EXPECT_EQ(mismatch_it_pair_1d_t.first, data.rend());  // expect no mismatch
+ +    EXPECT_EQ(mismatch_it_pair_1d_t.second, m_1d_t.rend());
+ +
+ +
+ +    // 2D test
+ +    const std::vector<int> sizes_2d{2, 2};
+ +
+ +    //Base class
+ +    cv::Mat m_2d(sizes_2d, CV_8U, data.data());
+ +    auto mismatch_it_pair_2d = std::mismatch(data.rbegin(), data.rend(), m_2d.rbegin<uchar>());
+ +    EXPECT_EQ(mismatch_it_pair_2d.first, data.rend());
+ +    EXPECT_EQ(mismatch_it_pair_2d.second, m_2d.rend<uchar>());
+ +
+ +    //Templated derived class
+ +    cv::Mat_<uchar> m_2d_t(static_cast<int>(sizes_2d.size()),sizes_2d.data(), data.data());
+ +    auto mismatch_it_pair_2d_t = std::mismatch(data.rbegin(), data.rend(), m_2d_t.rbegin());
+ +    EXPECT_EQ(mismatch_it_pair_2d_t.first, data.rend());
+ +    EXPECT_EQ(mismatch_it_pair_2d_t.second, m_2d_t.rend());
+ +
+ +    // 3D test
+ +    std::vector<uchar> data_3d{0, 1, 2, 3, 4, 5, 6, 7};
+ +    const std::vector<int> sizes_3d{2, 2, 2};
+ +
+ +    //Base class
+ +    cv::Mat m_3d(sizes_3d, CV_8U, data_3d.data());
+ +    auto mismatch_it_pair_3d = std::mismatch(data_3d.rbegin(), data_3d.rend(), m_3d.rbegin<uchar>());
+ +    EXPECT_EQ(mismatch_it_pair_3d.first, data_3d.rend());
+ +    EXPECT_EQ(mismatch_it_pair_3d.second, m_3d.rend<uchar>());
+ +
+ +    //Templated derived class
+ +    cv::Mat_<uchar> m_3d_t(static_cast<int>(sizes_3d.size()),sizes_3d.data(), data_3d.data());
+ +    auto mismatch_it_pair_3d_t = std::mismatch(data_3d.rbegin(), data_3d.rend(), m_3d_t.rbegin());
+ +    EXPECT_EQ(mismatch_it_pair_3d_t.first, data_3d.rend());
+ +    EXPECT_EQ(mismatch_it_pair_3d_t.second, m_3d_t.rend());
+ +
+ +    // const test base class
+ +    const cv::Mat m_1d_const(sizes_1d, CV_8U, data.data());
+ +
+ +    auto mismatch_it_pair_1d_const = std::mismatch(data.rbegin(), data.rend(), m_1d_const.rbegin<uchar>());
+ +    EXPECT_EQ(mismatch_it_pair_1d_const.first, data.rend());  // expect no mismatch
+ +    EXPECT_EQ(mismatch_it_pair_1d_const.second, m_1d_const.rend<uchar>());
+ +
+ +    EXPECT_FALSE((std::is_assignable<decltype(m_1d_const.rend<uchar>()), uchar>::value)) << "Constness of const iterator violated.";
+ +    EXPECT_FALSE((std::is_assignable<decltype(m_1d_const.rbegin<uchar>()), uchar>::value)) << "Constness of const iterator violated.";
+ +
+ +    // const test templated dervied class
+ +    const cv::Mat_<uchar> m_1d_const_t(static_cast<int>(sizes_1d.size()), sizes_1d.data(), data.data());
+ +
+ +    auto mismatch_it_pair_1d_const_t = std::mismatch(data.rbegin(), data.rend(), m_1d_const_t.rbegin());
+ +    EXPECT_EQ(mismatch_it_pair_1d_const_t.first, data.rend());  // expect no mismatch
+ +    EXPECT_EQ(mismatch_it_pair_1d_const_t.second, m_1d_const_t.rend());
+ +
+ +    EXPECT_FALSE((std::is_assignable<decltype(m_1d_const_t.rend()), uchar>::value)) << "Constness of const iterator violated.";
+ +    EXPECT_FALSE((std::is_assignable<decltype(m_1d_const_t.rbegin()), uchar>::value)) << "Constness of const iterator violated.";
+ +
+ +}
+ +
++
   }} // namespace
diff --cc modules/dnn/src/layers/recurrent_layers.cpp
Simple merge
diff --cc modules/dnn/src/net_impl.cpp

index 24f9c8c,0000000..315de13

mode 100644,000000..100644
--- 1/modules/dnn/src/net_impl.cpp
--- /dev/null
+++ b/modules/dnn/src/net_impl.cpp
@@@ -1,2087 -1,0 +1,2091 @@@
-             const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
+ +// This file is part of OpenCV project.
+ +// It is subject to the license terms in the LICENSE file found in the top-level directory
+ +// of this distribution and at http://opencv.org/license.html.
+ +
+ +#include "precomp.hpp"
+ +
+ +#include "net_impl.hpp"
+ +
+ +namespace cv {
+ +namespace dnn {
+ +CV__DNN_INLINE_NS_BEGIN
+ +
+ +
+ +static int g_networkId = 0;
+ +
+ +
+ +detail::NetImplBase::NetImplBase()
+ +    : networkId(CV_XADD(&g_networkId, 1))
+ +    , networkDumpCounter(0)
+ +    , dumpLevel(getParam_DNN_NETWORK_DUMP())
+ +{
+ +    // nothing
+ +}
+ +
+ +
+ +std::string detail::NetImplBase::getDumpFileNameBase() const
+ +{
+ +    std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++);
+ +    return dumpFileNameBase;
+ +}
+ +
+ +
+ +Net::Impl::Impl()
+ +{
+ +    // allocate fake net input layer
+ +    netInputLayer = Ptr<DataLayer>(new DataLayer());
+ +    LayerData& inpl = layers.insert(make_pair(0, LayerData())).first->second;
+ +    inpl.id = 0;
+ +    netInputLayer->name = inpl.name = "_input";
+ +    inpl.type = "__NetInputLayer__";
+ +    inpl.layerInstance = netInputLayer;
+ +    layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
+ +
+ +    lastLayerId = 0;
+ +    netWasAllocated = false;
+ +    netWasQuantized = false;
+ +    fusion = true;
+ +    isAsync = false;
+ +    preferableBackend = DNN_BACKEND_DEFAULT;
+ +    preferableTarget = DNN_TARGET_CPU;
+ +    skipInfEngineInit = false;
+ +    hasDynamicShapes = false;
+ +}
+ +
+ +
+ +bool Net::Impl::empty() const
+ +{
+ +    return layers.size() <= 1;  // first layer is default Data layer
+ +}
+ +
+ +
+ +void Net::Impl::clear()
+ +{
+ +    CV_TRACE_FUNCTION();
+ +
+ +    MapIdToLayerData::iterator it;
+ +    for (it = layers.begin(); it != layers.end(); it++)
+ +    {
+ +        if (it->second.id != 0)
+ +        {
+ +            it->second.inputBlobs.clear();
+ +            it->second.outputBlobs.clear();
+ +            it->second.internals.clear();
+ +        }
+ +        it->second.skip = false;
+ +        // it->second.consumers.clear();
+ +        Ptr<Layer> currLayer = it->second.layerInstance;
+ +
+ +        if (currLayer.empty())
+ +            continue;
+ +
+ +        currLayer->unsetAttached();
+ +    }
+ +    netWasAllocated = false;
+ +    layersTimings.clear();
+ +}
+ +
+ +
+ +void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
+ +{
+ +    CV_TRACE_FUNCTION();
+ +
+ +    if (dumpLevel && networkDumpCounter == 0)
+ +    {
+ +        dumpNetworkToFile();
+ +    }
+ +
+ +    if (preferableBackend == DNN_BACKEND_DEFAULT)
+ +        preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
+ +#ifdef HAVE_INF_ENGINE
+ +    if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
+ +        preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  // = getInferenceEngineBackendTypeParam();
+ +#endif
+ +
+ +    CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
+ +              preferableTarget == DNN_TARGET_CPU ||
+ +              preferableTarget == DNN_TARGET_OPENCL ||
+ +              preferableTarget == DNN_TARGET_OPENCL_FP16);
+ +    CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
+ +              preferableTarget == DNN_TARGET_CPU ||
+ +              preferableTarget == DNN_TARGET_OPENCL);
+ +#ifdef HAVE_INF_ENGINE
+ +    if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+ +    {
+ +        CV_Assert(
+ +              (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) ||
+ +              preferableTarget == DNN_TARGET_OPENCL ||
+ +              preferableTarget == DNN_TARGET_OPENCL_FP16 ||
+ +              preferableTarget == DNN_TARGET_MYRIAD ||
+ +              preferableTarget == DNN_TARGET_HDDL ||
+ +              preferableTarget == DNN_TARGET_FPGA
+ +        );
+ +    }
+ +#endif
+ +#ifdef HAVE_WEBNN
+ +    if (preferableBackend == DNN_BACKEND_WEBNN)
+ +    {
+ +        CV_Assert(preferableTarget == DNN_TARGET_CPU ||
+ +                  preferableTarget == DNN_TARGET_OPENCL);
+ +    }
+ +#endif
+ +    CV_Assert(preferableBackend != DNN_BACKEND_VKCOM ||
+ +              preferableTarget == DNN_TARGET_VULKAN);
+ +    CV_Assert(preferableBackend != DNN_BACKEND_CUDA ||
+ +              IS_DNN_CUDA_TARGET(preferableTarget));
+ +    if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
+ +    {
+ +        if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
+ +#ifndef HAVE_OPENCL
+ +        {
+ +            CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
+ +            preferableTarget = DNN_TARGET_CPU;
+ +        }
+ +#else
+ +        {
+ +            if (!getParam_DNN_OPENCL_ALLOW_ALL_DEVICES())
+ +            {
+ +                // Current implementation is only valid for GPU (#11494)
+ +                if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
+ +                {
+ +                    CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
+ +                    preferableTarget = DNN_TARGET_CPU;
+ +                }
+ +                else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
+ +                {
+ +                    CV_LOG_WARNING(NULL,
+ +                            "DNN: OpenCL target with fp16 precision is not supported "
+ +                            "with current OpenCL device (tested with Intel GPUs only), "
+ +                            "switching to OpenCL with fp32 precision.");
+ +                    preferableTarget = DNN_TARGET_OPENCL;
+ +                }
+ +            }
+ +        }
+ +#endif
+ +        if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan())
+ +        {
+ +            preferableBackend = DNN_BACKEND_OPENCV;
+ +            preferableTarget = DNN_TARGET_CPU;
+ +        }
+ +
+ +        if (preferableBackend == DNN_BACKEND_CUDA && !haveCUDA())
+ +        {
+ +#ifdef HAVE_CUDA
+ +            CV_LOG_WARNING(NULL, "unable to use CUDA backend; switching to CPU");
+ +#else
+ +            CV_LOG_WARNING(NULL, "DNN module was not built with CUDA backend; switching to CPU");
+ +#endif
+ +            preferableBackend = DNN_BACKEND_OPENCV;
+ +            preferableTarget = DNN_TARGET_CPU;
+ +        }
+ +
+ +        clear();
+ +
+ +        if (hasDynamicShapes)
+ +        {
+ +            updateLayersShapes();
+ +        }
+ +
+ +        this->blobsToKeep = blobsToKeep_;
+ +
+ +        allocateLayers(blobsToKeep_);
+ +
+ +        MapIdToLayerData::iterator it = layers.find(0);
+ +        CV_Assert(it != layers.end());
+ +        it->second.skip = netInputLayer->skip;
+ +
+ +        initBackend(blobsToKeep_);
+ +
+ +        if (!netWasAllocated)
+ +        {
+ +#ifdef HAVE_HALIDE
+ +            if (preferableBackend == DNN_BACKEND_HALIDE)
+ +                compileHalide();
+ +#else
+ +            CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
+ +#endif
+ +        }
+ +
+ +        netWasAllocated = true;
+ +
+ +        if (dumpLevel)
+ +        {
+ +            dumpNetworkToFile();
+ +        }
+ +    }
+ +}
+ +
+ +
+ +Ptr<Layer> Net::Impl::getLayer(int layerId) const
+ +{
+ +    LayerData& ld = getLayerData(layerId);
+ +    return ld.getLayerInstance();
+ +}
+ +
+ +
+ +Ptr<Layer> Net::Impl::getLayer(const LayerId& layerId) const
+ +{
+ +    LayerData& ld = getLayerData(layerId);
+ +    return ld.getLayerInstance();
+ +}
+ +
+ +
+ +int Net::Impl::getLayerId(const String& layerName) const
+ +{
+ +    std::map<String, int>::const_iterator it = layerNameToId.find(layerName);
+ +    return (it != layerNameToId.end()) ? it->second : -1;
+ +}
+ +
+ +
+ +int Net::Impl::getLayerId(int id) const
+ +{
+ +    MapIdToLayerData::const_iterator it = layers.find(id);
+ +    return (it != layers.end()) ? id : -1;
+ +}
+ +
+ +
+ +int Net::Impl::getLayerId(DictValue& layerDesc) const
+ +{
+ +    if (layerDesc.isInt())
+ +        return getLayerId(layerDesc.get<int>());
+ +    else if (layerDesc.isString())
+ +        return getLayerId(layerDesc.get<String>());
+ +
+ +    CV_Assert(layerDesc.isInt() || layerDesc.isString());
+ +    return -1;
+ +}
+ +
+ +
+ +String Net::Impl::getLayerName(int id) const
+ +{
+ +    MapIdToLayerData::const_iterator it = layers.find(id);
+ +    return (it != layers.end()) ? it->second.name : "(unknown layer)";
+ +}
+ +
+ +
+ +LayerData& Net::Impl::getLayerData(int id) const
+ +{
+ +    MapIdToLayerData::const_iterator it = layers.find(id);
+ +
+ +    if (it == layers.end())
+ +        CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
+ +
+ +    return const_cast<LayerData&>(it->second);
+ +}
+ +
+ +
+ +LayerData& Net::Impl::getLayerData(const String& layerName) const
+ +{
+ +    int id = getLayerId(layerName);
+ +
+ +    if (id < 0)
+ +        CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
+ +
+ +    return getLayerData(id);
+ +}
+ +
+ +
+ +LayerData& Net::Impl::getLayerData(const DictValue& layerDesc) const
+ +{
+ +    CV_Assert(layerDesc.isInt() || layerDesc.isString());
+ +    if (layerDesc.isInt())
+ +        return getLayerData(layerDesc.get<int>());
+ +    else /*if (layerDesc.isString())*/
+ +        return getLayerData(layerDesc.get<String>());
+ +}
+ +
+ +
+ +/*static*/
+ +void Net::Impl::addLayerInput(LayerData& ld, int inNum, LayerPin from)
+ +{
+ +    if ((int)ld.inputBlobsId.size() <= inNum)
+ +    {
+ +        ld.inputBlobsId.resize(inNum + 1);
+ +    }
+ +    else
+ +    {
+ +        LayerPin storedFrom = ld.inputBlobsId[inNum];
+ +        if (storedFrom.valid() && !storedFrom.equal(from))
+ +            CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
+ +                                             inNum, ld.name.c_str()));
+ +    }
+ +
+ +    ld.inputBlobsId[inNum] = from;
+ +}
+ +
+ +
+ +int Net::Impl::resolvePinOutputName(LayerData& ld, const String& outName) const
+ +{
+ +    if (outName.empty())
+ +        return 0;
+ +    return ld.getLayerInstance()->outputNameToIndex(outName);
+ +}
+ +
+ +
+ +LayerPin Net::Impl::getPinByAlias(const String& layerName) const
+ +{
+ +    LayerPin pin;
+ +    pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
+ +
+ +    if (pin.lid >= 0)
+ +        pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
+ +
+ +    return pin;
+ +}
+ +
+ +
+ +std::vector<LayerPin> Net::Impl::getLayerOutPins(const String& layerName) const
+ +{
+ +    int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
+ +
+ +    MapIdToLayerData::const_iterator it = layers.find(lid);
+ +    if (it == layers.end())
+ +        CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid", lid));
+ +    const size_t nOutputs = it->second.outputBlobs.size();
+ +
+ +    std::vector<LayerPin> pins;
+ +    for (int i = 0; i < nOutputs; i++)
+ +    {
+ +        pins.push_back(LayerPin(lid, i));
+ +    }
+ +
+ +    return pins;
+ +}
+ +
+ +
+ +// FIXIT remove dtype
+ +int Net::Impl::addLayer(const String& name, const String& type, const int& dtype, LayerParams& params)
+ +{
+ +    int id = getLayerId(name);
+ +    if (id >= 0)
+ +    {
+ +        if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented")
+ +        {
+ +            CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
+ +            return -1;
+ +        }
+ +        else
+ +        {
+ +            LayerData& ld = layers.find(id)->second;
+ +            ld.type = type;
+ +            ld.params = params;
+ +            return -1;
+ +        }
+ +    }
+ +
+ +    id = ++lastLayerId;
+ +    layerNameToId.insert(std::make_pair(name, id));
+ +    layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params)));
+ +    if (params.get<bool>("has_dynamic_shapes", false))
+ +        hasDynamicShapes = true;
+ +
+ +    if (dtype == CV_8S)
+ +        netWasQuantized = true;
+ +
+ +    return id;
+ +}
+ +
+ +
+ +int Net::Impl::addLayerToPrev(const String& name, const String& type, const int& dtype, LayerParams& params)
+ +{
+ +    int prvLid = lastLayerId;
+ +    int newLid = addLayer(name, type, dtype, params);
+ +    connect(prvLid, 0, newLid, 0);
+ +    return newLid;
+ +}
+ +
+ +
+ +void Net::Impl::connect(int outLayerId, int outNum, int inLayerId, int inNum)
+ +{
+ +    CV_Assert(outLayerId < inLayerId);
+ +    LayerData& ldOut = getLayerData(outLayerId);
+ +    LayerData& ldInp = getLayerData(inLayerId);
+ +
+ +    addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
+ +    ldOut.requiredOutputs.insert(outNum);
+ +    ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
+ +
+ +    CV_LOG_VERBOSE(NULL, 0, "DNN: connect(" << outLayerId << ":" << outNum << " ==> " << inLayerId << ":" << inNum << ")");
+ +}
+ +
+ +
+ +int Net::Impl::registerOutput(const std::string& outputName, int layerId, int outputPort)
+ +{
+ +    int checkLayerId = getLayerId(outputName);
+ +    if (checkLayerId >= 0)
+ +    {
+ +        if (checkLayerId == layerId)
+ +        {
+ +            if (outputPort == 0)
+ +            {
+ +                // layer name correlates with its output name
+ +                CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "': reuse layer with the same name and id=" << layerId << " to be linked");
+ +                outputNameToId.insert(std::make_pair(outputName, layerId));
+ +                return checkLayerId;
+ +            }
+ +        }
+ +        CV_Error_(Error::StsBadArg, ("Layer with name='%s' already exists id=%d (to be linked with %d:%d)", outputName.c_str(), checkLayerId, layerId, outputPort));
+ +    }
+ +#if 0  // TODO
+ +    if (outputPort == 0)
+ +        // make alias only, need to adopt getUnconnectedOutLayers() call
+ +#endif
+ +    LayerParams outputLayerParams;
+ +    outputLayerParams.name = outputName;
+ +    outputLayerParams.type = "Identity";
+ +    int dtype = CV_32F;  // FIXIT remove
+ +    int outputLayerId = addLayer(outputLayerParams.name, outputLayerParams.type, dtype, outputLayerParams);
+ +    connect(layerId, outputPort, outputLayerId, 0);
+ +    CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "' id=" << outputLayerId << " defined as " << layerId << ":" << outputPort);
+ +    outputNameToId.insert(std::make_pair(outputName, outputLayerId));
+ +    return outputLayerId;
+ +}
+ +
+ +
+ +void Net::Impl::allocateLayer(int lid, const LayersShapesMap& layersShapes)
+ +{
+ +    CV_TRACE_FUNCTION();
+ +
+ +    LayerData& ld = layers[lid];
+ +
+ +    // already allocated
+ +    if (ld.flag)
+ +        return;
+ +
+ +    size_t ninputs = ld.inputBlobsId.size();
+ +#if 0
+ +    printf("layer %s:", ld.name.c_str());
+ +    for (size_t i = 0; i < ninputs; i++)
+ +    {
+ +        int inp_lid = ld.inputBlobsId[i].lid;
+ +        LayerData &inp_ld = layers[inp_lid];
+ +        int inp_outputs = (int)inp_ld.outputBlobs.size();
+ +        std::cout << " " << inp_ld.name << "(" << inp_outputs;
+ +
+ +        for( int j = 0; j < inp_outputs; j++ )
+ +        {
+ +            std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
+ +        }
+ +        std::cout << ")";
+ +    }
+ +    printf("\n");
+ +#endif
+ +
+ +    // determine parent layers
+ +    for (size_t i = 0; i < ninputs; i++)
+ +        ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
+ +
+ +    // allocate parents
+ +    for (std::set<int>::const_iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
+ +        allocateLayer(*i, layersShapes);
+ +
+ +    // bind inputs
+ +    if (ld.id == 0)  // DataLayer
+ +    {
+ +        ninputs = netInputLayer->inputsData.size();
+ +        ld.inputBlobsWrappers.resize(ninputs);
+ +        for (size_t i = 0; i < ninputs; i++)
+ +            ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
+ +    }
+ +    else
+ +    {
+ +        ld.inputBlobs.resize(ninputs);
+ +        ld.inputBlobsWrappers.resize(ninputs);
+ +        for (size_t i = 0; i < ninputs; i++)
+ +        {
+ +            LayerPin from = ld.inputBlobsId[i];
+ +            CV_Assert(from.valid());
+ +            CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
+ +            ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
+ +            ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
+ +        }
+ +    }
+ +
+ +    LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
+ +
+ +    CV_Assert(layerShapesIt != layersShapes.end());
+ +
+ +    if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16 && ld.dtype == CV_32F)
+ +        ld.dtype = CV_16S;
+ +
+ +    std::vector<LayerPin> pinsForInternalBlobs;
+ +    blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
+ +    ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
+ +    for (int i = 0; i < ld.outputBlobs.size(); ++i)
+ +        ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
+ +
+ +    /* CUDA backend has its own system for internal blobs; we don't need these */
+ +    ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA) ? 0 : ld.internals.size());
+ +    for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i)
+ +        ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
+ +
+ +    Ptr<Layer> layerPtr = ld.getLayerInstance();
+ +    {
+ +        std::vector<Mat> inps(ld.inputBlobs.size());
+ +        for (int i = 0; i < ld.inputBlobs.size(); ++i)
+ +        {
+ +            inps[i] = *ld.inputBlobs[i];
+ +        }
+ +        layerPtr->finalize(inps, ld.outputBlobs);
+ +        layerPtr->preferableTarget = preferableTarget;
+ +#if 0
+ +        std::cout << "\toutputs:";
+ +        size_t noutputs = ld.outputBlobs.size();
+ +        for (size_t j = 0; j < noutputs; j++)
+ +        {
+ +            std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
+ +        }
+ +        std::cout << "\n";
+ +#endif
+ +    }
+ +
+ +    // After allocation of layer, we decrease counters to it's input blobs.
+ +    blobManager.releaseReferences(ld.inputBlobsId);
+ +    blobManager.releaseReferences(pinsForInternalBlobs);
+ +
+ +    ld.flag = 1;
+ +}
+ +
+ +
+ +void Net::Impl::allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
+ +{
+ +    CV_TRACE_FUNCTION();
+ +
+ +    for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
+ +        it->second.flag = 0;
+ +
+ +    CV_Assert(!layers[0].outputBlobs.empty());
+ +    ShapesVec inputShapes;
+ +    for (int i = 0; i < layers[0].outputBlobs.size(); i++)
+ +    {
+ +        Mat& inp = layers[0].outputBlobs[i];
+ +        CV_Assert(inp.total());
+ +        if (preferableBackend == DNN_BACKEND_OPENCV &&
+ +            preferableTarget == DNN_TARGET_OPENCL_FP16 &&
+ +            layers[0].dtype == CV_32F)
+ +        {
+ +            layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
+ +        }
+ +        inputShapes.push_back(shape(inp));
+ +    }
+ +    LayersShapesMap layersShapes;
+ +    getLayersShapes(inputShapes, layersShapes);
+ +
+ +    blobManager.reset();
+ +    backendWrappers.clear();
+ +
+ +    for (auto& layer : layers)
+ +    {
+ +        auto& ld = layer.second;
+ +        ld.inputBlobsWrappers.clear();
+ +        ld.outputBlobsWrappers.clear();
+ +        ld.internalBlobsWrappers.clear();
+ +    }
+ +
+ +    // Fake references to input blobs.
+ +    for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
+ +        blobManager.addReference(LayerPin(0, i));
+ +    for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it)
+ +    {
+ +        const LayerData& ld = it->second;
+ +        blobManager.addReferences(ld.inputBlobsId);
+ +    }
+ +
+ +    for (int i = 0; i < blobsToKeep_.size(); i++)
+ +    {
+ +        blobManager.addReference(blobsToKeep_[i]);
+ +    }
+ +
+ +    for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
+ +    {
+ +        int lid = it->first;
+ +        allocateLayer(lid, layersShapes);
+ +    }
+ +
+ +    layersTimings.resize(lastLayerId + 1, 0);
+ +    fuseLayers(blobsToKeep_);
+ +}
+ +
+ +
+ +void Net::Impl::forwardLayer(LayerData& ld)
+ +{
+ +    CV_TRACE_FUNCTION();
+ +
+ +    Ptr<Layer> layer = ld.layerInstance;
+ +
+ +    if (!ld.skip)
+ +    {
+ +        TickMeter tm;
+ +        tm.start();
+ +
+ +#ifndef HAVE_VULKAN
+ +        std::map<int, Ptr<BackendNode>>::const_iterator it = ld.backendNodes.find(preferableBackend);
+ +#else
+ +        std::map<int, Ptr<BackendNode>>::iterator it = ld.backendNodes.find(preferableBackend);
+ +#endif
+ +        if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
+ +        {
+ +            if (isAsync)
+ +                CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
+ +
+ +            if (!layer->supportBackend(DNN_BACKEND_OPENCV))
+ +                CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend",
+ +                                                   ld.name.c_str(), ld.type.c_str()));
+ +
+ +#ifdef HAVE_OPENCL
+ +            if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
+ +            {
+ +                std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
+ +                std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
+ +                std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
+ +                layer->forward(umat_inputBlobs,
+ +                               umat_outputBlobs,
+ +                               umat_internalBlobs);
+ +                if (getParam_DNN_CHECK_NAN_INF())
+ +                {
+ +                    bool fail = false;
+ +                    for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
+ +                    {
+ +                        UMat& u = umat_outputBlobs[i];
+ +                        Mat m;
+ +                        if (u.depth() == CV_16S)  // FP16
+ +                            convertFp16(u, m);
+ +                        else
+ +                            m = u.getMat(ACCESS_READ);
+ +                        if (!checkRange(m))
+ +                        {
+ +                            std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
+ +                            std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
+ +                            fail = true;
+ +                        }
+ +                        else if (!checkRange(m, true, NULL, -1e6, 1e6))
+ +                        {
+ +                            std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
+ +                            std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
+ +                            fail = true;
+ +                        }
+ +                    }
+ +                    if (fail)
+ +                    {
+ +                        for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
+ +                        {
+ +                            UMat& u = umat_inputBlobs[i];
+ +                            Mat m;
+ +                            if (u.depth() == CV_16S)  // FP16
+ +                                convertFp16(u, m);
+ +                            else
+ +                                m = u.getMat(ACCESS_READ);
+ +                            std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
+ +                            if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl;
+ +                        }
+ +                        for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
+ +                        {
+ +                            UMat& u = umat_outputBlobs[i];
+ +                            Mat m;
+ +                            if (u.depth() == CV_16S)  // FP16
+ +                                convertFp16(u, m);
+ +                            else
+ +                                m = u.getMat(ACCESS_READ);
+ +                            std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
+ +                            if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl;
+ +                        }
+ +                        for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
+ +                        {
+ +                            UMat& u = umat_internalBlobs[i];
+ +                            Mat m;
+ +                            if (u.depth() == CV_16S)  // FP16
+ +                                convertFp16(u, m);
+ +                            else
+ +                                m = u.getMat(ACCESS_READ);
+ +                            std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
+ +                            if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
+ +                        }
+ +                        if (getParam_DNN_CHECK_NAN_INF_RAISE_ERROR())
+ +                            CV_Assert(!fail);
+ +                    }
+ +                }
+ +                OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
+ +            }
+ +            else
+ +#endif
+ +            {
+ +                for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
+ +                {
+ +                    if (!ld.inputBlobsWrappers[i].empty())
+ +                        ld.inputBlobsWrappers[i]->copyToHost();
+ +                }
+ +
+ +                std::vector<Mat> inps(ld.inputBlobs.size());
+ +                for (int i = 0; i < ld.inputBlobs.size(); ++i)
+ +                {
+ +                    inps[i] = *ld.inputBlobs[i];
+ +                }
+ +                layer->forward(inps, ld.outputBlobs, ld.internals);
+ +
+ +                if (getParam_DNN_CHECK_NAN_INF())
+ +                {
+ +                    bool fail = false;
+ +                    for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
+ +                    {
+ +                        const Mat& m = ld.outputBlobs[i];
+ +                        if (!checkRange(m))
+ +                        {
+ +                            std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
+ +                            std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
+ +                            fail = true;
+ +                        }
+ +                        else if (!checkRange(m, true, NULL, -1e6, 1e6))
+ +                        {
+ +                            std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
+ +                            std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
+ +                            fail = true;
+ +                        }
+ +                    }
+ +                    if (fail)
+ +                    {
+ +                        for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
+ +                        {
+ +                            const Mat* pM = ld.inputBlobs[i];
+ +                            if (!pM)
+ +                            {
+ +                                std::cout << "INPUT " << i << " is NULL" << std::endl;
+ +                                continue;
+ +                            }
+ +                            const Mat& m = *pM;
+ +                            std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
+ +                            if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl;
+ +                        }
+ +                        for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
+ +                        {
+ +                            const Mat& m = ld.outputBlobs[i];
+ +                            std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
+ +                            if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl;
+ +                        }
+ +                        for (size_t i = 0; i < ld.internals.size(); ++i)
+ +                        {
+ +                            const Mat& m = ld.internals[i];
+ +                            std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
+ +                            if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl;
+ +                        }
+ +                        if (getParam_DNN_CHECK_NAN_INF_RAISE_ERROR())
+ +                            CV_Assert(!fail);
+ +                    }
+ +                }
+ +
+ +                for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
+ +                {
+ +                    if (!ld.outputBlobsWrappers[i].empty())
+ +                        ld.outputBlobsWrappers[i]->setHostDirty();
+ +                }
+ +            }
+ +        }
+ +        else
+ +        {
+ +            Ptr<BackendNode> node = it->second;
+ +            CV_Assert(!node.empty());
+ +            if (preferableBackend == DNN_BACKEND_CUDA)
+ +            {
+ +                CV_Assert(haveCUDA());
+ +
+ +#ifdef HAVE_CUDA
+ +                Ptr<CUDABackendNode> cudaNode = node.dynamicCast<CUDABackendNode>();
+ +                CV_Assert(!cudaNode.empty());
+ +
+ +                cudaNode->forward(ld.inputBlobsWrappers, ld.outputBlobsWrappers, cudaInfo->workspace);
+ +
+ +                for (auto id : ld.cudaD2HBackgroundTransfers)
+ +                {
+ +                    auto wrapper = ld.outputBlobsWrappers[id].dynamicCast<CUDABackendWrapper>();
+ +                    wrapper->copyToHostInBackground();
+ +                }
+ +#endif
+ +            }
+ +            else if (preferableBackend == DNN_BACKEND_HALIDE)
+ +            {
+ +                forwardHalide(ld.outputBlobsWrappers, node);
+ +            }
+ +#ifdef HAVE_INF_ENGINE
+ +            else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+ +            {
+ +                forwardNgraph(ld.outputBlobsWrappers, node, isAsync);
+ +            }
+ +#endif
+ +            else if (preferableBackend == DNN_BACKEND_WEBNN)
+ +            {
+ +                forwardWebnn(ld.outputBlobsWrappers, node, isAsync);
+ +            }
+ +#ifdef HAVE_VULKAN
+ +            else if (preferableBackend == DNN_BACKEND_VKCOM)
+ +            {
+ +                try
+ +                {
+ +                    forwardVkCom(ld.outputBlobsWrappers, node);
+ +                }
+ +                catch (const cv::Exception& e)
+ +                {
+ +                    CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what());
+ +                    it->second = Ptr<BackendNode>();
+ +                    forwardLayer(ld);
+ +                }
+ +            }
+ +#endif
+ +            else
+ +            {
+ +                CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
+ +            }
+ +        }
+ +
+ +        tm.stop();
+ +        int64 t = tm.getTimeTicks();
+ +        layersTimings[ld.id] = (t > 0) ? t : t + 1;  // zero for skipped layers only
+ +    }
+ +    else
+ +    {
+ +        layersTimings[ld.id] = 0;
+ +    }
+ +
+ +    ld.flag = 1;
+ +}
+ +
+ +
+ +void Net::Impl::forwardToLayer(LayerData& ld, bool clearFlags)
+ +{
+ +    CV_TRACE_FUNCTION();
+ +
+ +    if (clearFlags)
+ +    {
+ +        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
+ +            it->second.flag = 0;
+ +    }
+ +
+ +    // already was forwarded
+ +    if (ld.flag)
+ +        return;
+ +
+ +    // forward parents
+ +    for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
+ +    {
+ +        LayerData& ld = it->second;
+ +        if (ld.flag)
+ +            continue;
+ +        forwardLayer(ld);
+ +    }
+ +
+ +    // forward itself
+ +    forwardLayer(ld);
+ +
+ +#ifdef HAVE_CUDA
+ +    if (preferableBackend == DNN_BACKEND_CUDA)
+ +        cudaInfo->context.stream.synchronize();
+ +#endif
+ +}
+ +
+ +
+ +Mat Net::Impl::forward(const String& outputName)
+ +{
+ +    CV_Assert(!empty());
+ +    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+ +
+ +    String layerName = outputName;
+ +
+ +    if (layerName.empty())
+ +    {
+ +        std::vector<String> layerNames = getLayerNames();
+ +        CV_Assert(!layerNames.empty());
+ +        layerName = layerNames.back();
+ +    }
+ +
+ +    std::vector<LayerPin> pins(1, getPinByAlias(layerName));
+ +    setUpNet(pins);
+ +    forwardToLayer(getLayerData(layerName));
+ +
+ +    return getBlob(layerName);
+ +}
+ +
+ +
+ +AsyncArray Net::Impl::forwardAsync(const String& outputName)
+ +{
+ +    CV_Assert(!empty());
+ +    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+ +
+ +#ifdef CV_CXX11
+ +    String layerName = outputName;
+ +
+ +    if (layerName.empty())
+ +    {
+ +        std::vector<String> layerNames = getLayerNames();
+ +        CV_Assert(!layerNames.empty());
+ +        layerName = layerNames.back();
+ +    }
+ +
+ +    std::vector<LayerPin> pins(1, getPinByAlias(layerName));
+ +    setUpNet(pins);
+ +
+ +    if (preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+ +        CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backend only");
+ +
+ +    isAsync = true;
+ +    forwardToLayer(getLayerData(layerName));
+ +    isAsync = false;
+ +
+ +    return getBlobAsync(layerName);
+ +#else
+ +    CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11");
+ +#endif  // CV_CXX11
+ +}
+ +
+ +
+ +void Net::Impl::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
+ +{
+ +    CV_Assert(!empty());
+ +    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+ +
+ +    String layerName = outputName;
+ +
+ +    if (layerName.empty())
+ +    {
+ +        std::vector<String> layerNames = getLayerNames();
+ +        CV_Assert(!layerNames.empty());
+ +        layerName = layerNames.back();
+ +    }
+ +
+ +    std::vector<LayerPin> pins(1, getPinByAlias(layerName));
+ +    setUpNet(pins);
+ +    forwardToLayer(getLayerData(layerName));
+ +
+ +    LayerPin pin = getPinByAlias(layerName);
+ +    LayerData& ld = layers[pin.lid];
+ +
+ +    if (outputBlobs.isUMat())
+ +    {
+ +        getBlob(layerName).copyTo(outputBlobs);
+ +    }
+ +    else if (outputBlobs.isMat())
+ +    {
+ +        outputBlobs.assign(getBlob(layerName));
+ +    }
+ +    else if (outputBlobs.isMatVector())
+ +    {
+ +        if (preferableTarget != DNN_TARGET_CPU)
+ +        {
+ +            for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
+ +            {
+ +                CV_Assert(!ld.outputBlobsWrappers[i].empty());
+ +                ld.outputBlobsWrappers[i]->copyToHost();
+ +            }
+ +        }
+ +        if (ld.outputBlobs[0].depth() == CV_16S)
+ +        {
+ +            std::vector<Mat>& outputvec = *(std::vector<Mat>*)outputBlobs.getObj();
+ +            outputvec.resize(ld.outputBlobs.size());
+ +            for (int i = 0; i < outputvec.size(); i++)
+ +                convertFp16(ld.outputBlobs[i], outputvec[i]);
+ +        }
+ +        else
+ +        {
+ +            // Output depth can be CV_32F or CV_8S
+ +            std::vector<Mat>& outputvec = *(std::vector<Mat>*)outputBlobs.getObj();
+ +            outputvec = ld.outputBlobs;
+ +        }
+ +    }
+ +    else if (outputBlobs.isUMatVector())
+ +    {
+ +        std::vector<UMat>& outputvec = *(std::vector<UMat>*)outputBlobs.getObj();
+ +
+ +#ifdef HAVE_OPENCL
+ +        if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
+ +        {
+ +            if (preferableTarget == DNN_TARGET_OPENCL)
+ +                outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
+ +            else if (preferableTarget == DNN_TARGET_OPENCL_FP16)
+ +            {
+ +                std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
+ +                outputvec.resize(out_vec.size());
+ +                for (int i = 0; i < out_vec.size(); i++)
+ +                    convertFp16(out_vec[i], outputvec[i]);
+ +            }
+ +        }
+ +        else
+ +#endif
+ +        {
+ +            outputvec.resize(ld.outputBlobs.size());
+ +            for (int i = 0; i < outputvec.size(); ++i)
+ +                ld.outputBlobs[i].copyTo(outputvec[i]);
+ +        }
+ +    }
+ +}
+ +
+ +
+ +void Net::Impl::forward(OutputArrayOfArrays outputBlobs,
+ +        const std::vector<String>& outBlobNames)
+ +{
+ +    CV_Assert(!empty());
+ +    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+ +
+ +    std::vector<LayerPin> pins;
+ +    for (int i = 0; i < outBlobNames.size(); i++)
+ +    {
+ +        pins.push_back(getPinByAlias(outBlobNames[i]));
+ +    }
+ +
+ +    setUpNet(pins);
+ +
+ +    LayerPin out = getLatestLayerPin(pins);
+ +
+ +    forwardToLayer(getLayerData(out.lid));
+ +
+ +    std::vector<Mat> matvec;
+ +    for (int i = 0; i < pins.size(); i++)
+ +    {
+ +        matvec.push_back(getBlob(pins[i]));
+ +    }
+ +
+ +    outputBlobs.create((int)matvec.size(), 1, CV_32F/*FIXIT*/, -1);  // allocate vector
+ +    outputBlobs.assign(matvec);
+ +}
+ +
+ +
+ +void Net::Impl::forward(std::vector<std::vector<Mat>>& outputBlobs,
+ +        const std::vector<String>& outBlobNames)
+ +{
+ +    CV_Assert(!empty());
+ +    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+ +
+ +    std::vector<LayerPin> pins;
+ +    for (int i = 0; i < outBlobNames.size(); i++)
+ +    {
+ +        pins.push_back(getPinByAlias(outBlobNames[i]));
+ +    }
+ +
+ +    setUpNet(pins);
+ +
+ +    LayerPin out = getLatestLayerPin(pins);
+ +
+ +    forwardToLayer(getLayerData(out.lid));
+ +
+ +    outputBlobs.resize(outBlobNames.size());
+ +    for (int i = 0; i < outBlobNames.size(); i++)
+ +    {
+ +        std::vector<LayerPin> lp = getLayerOutPins(outBlobNames[i]);
+ +        outputBlobs[i].resize(lp.size());
+ +        for (int j = 0; j < lp.size(); j++)
+ +        {
+ +            outputBlobs[i][j] = getBlob(lp[j]);
+ +        }
+ +    }
+ +}
+ +
+ +
+ +void Net::Impl::getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
+ +{
+ +    CV_CheckGE(id, 0, "");
+ +    CV_CheckLT(id, (int)layers.size(), "");
+ +    LayerData& layerData = layers[id];
+ +    std::vector<LayerPin>& inputLayerIds = layerData.inputBlobsId;
+ +    LayerShapes& layerShapes = inOutShapes[id];
+ +
+ +    if (id == 0 && layerShapes.in[0].empty())
+ +    {
+ +        if (!layerData.outputBlobs.empty())
+ +        {
+ +            ShapesVec shapes;
+ +            for (int i = 0; i < layerData.outputBlobs.size(); i++)
+ +            {
+ +                Mat& inp = layerData.outputBlobs[i];
+ +                CV_Assert(!inp.empty());
+ +                shapes.push_back(shape(inp));
+ +            }
+ +            layerShapes.in = shapes;
+ +        }
+ +        else
+ +        {
+ +            const std::vector<MatShape>& inputShapes = netInputLayer->shapes;
+ +            bool none = true;
+ +            for (size_t i = 0; i < inputShapes.size(); i++)
+ +            {
+ +                if (!inputShapes[i].empty())
+ +                {
+ +                    none = false;
+ +                    break;
+ +                }
+ +            }
+ +            if (none)
+ +            {
+ +                layerShapes.out.clear();
+ +                return;
+ +            }
+ +            else
+ +            {
+ +                layerShapes.in = inputShapes;
+ +            }
+ +        }
+ +    }
+ +
+ +    if (layerShapes.in.empty())
+ +    {
+ +        for (int i = 0; i < inputLayerIds.size(); i++)
+ +        {
+ +            int layerId = inputLayerIds[i].lid;
+ +            LayersShapesMap::const_iterator it = inOutShapes.find(layerId);
+ +            if (it == inOutShapes.end() || it->second.out.empty())
+ +            {
+ +                getLayerShapesRecursively(layerId, inOutShapes);
++                it = inOutShapes.find(layerId);
++                CV_Assert(it != inOutShapes.end());
+ +            }
++            const int out_port = inputLayerIds[i].oid;
++            CV_CheckLT(out_port, (int)it->second.out.size(), "");
++            const MatShape& shape = it->second.out[out_port];
+ +            layerShapes.in.push_back(shape);
+ +        }
+ +    }
+ +    const ShapesVec& is = layerShapes.in;
+ +    ShapesVec& os = layerShapes.out;
+ +    ShapesVec& ints = layerShapes.internal;
+ +    int requiredOutputs = layerData.requiredOutputs.size();
+ +    Ptr<Layer> l = layerData.getLayerInstance();
+ +    CV_Assert(l);
+ +    bool layerSupportInPlace = false;
+ +    try
+ +    {
+ +        layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints);
+ +    }
+ +    catch (const cv::Exception& e)
+ +    {
+ +        CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." <<
+ +                " inputs=" << is.size() <<
+ +                " outputs=" << os.size() << "/" << requiredOutputs <<
+ +                " blobs=" << l->blobs.size());
+ +        for (size_t i = 0; i < is.size(); ++i)
+ +        {
+ +            CV_LOG_ERROR(NULL, "    input[" << i << "] = " << toString(is[i]));
+ +        }
+ +        for (size_t i = 0; i < os.size(); ++i)
+ +        {
+ +            CV_LOG_ERROR(NULL, "    output[" << i << "] = " << toString(os[i]));
+ +        }
+ +        for (size_t i = 0; i < l->blobs.size(); ++i)
+ +        {
+ +            CV_LOG_ERROR(NULL, "    blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i])));
+ +        }
+ +        CV_LOG_ERROR(NULL, "Exception message: " << e.what());
+ +        throw;
+ +    }
+ +    layerShapes.supportInPlace = layerSupportInPlace;
+ +
+ +    try
+ +    {
+ +        for (int i = 0; i < ints.size(); i++)
+ +            CV_CheckGT(total(ints[i]), 0, "");
+ +
+ +        for (int i = 0; i < os.size(); i++)
+ +            CV_CheckGT(total(os[i]), 0, "");
+ +    }
+ +    catch (const cv::Exception& e)
+ +    {
+ +        CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() post validation failed." <<
+ +                " inputs=" << is.size() <<
+ +                " outputs=" << os.size() << "/" << requiredOutputs <<
+ +                " blobs=" << l->blobs.size() <<
+ +                " inplace=" << layerSupportInPlace);
+ +        for (size_t i = 0; i < is.size(); ++i)
+ +        {
+ +            CV_LOG_ERROR(NULL, "    input[" << i << "] = " << toString(is[i]));
+ +        }
+ +        for (size_t i = 0; i < os.size(); ++i)
+ +        {
+ +            CV_LOG_ERROR(NULL, "    output[" << i << "] = " << toString(os[i]));
+ +        }
+ +        for (size_t i = 0; i < l->blobs.size(); ++i)
+ +        {
+ +            CV_LOG_ERROR(NULL, "    blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i])));
+ +        }
+ +        CV_LOG_ERROR(NULL, "Exception message: " << e.what());
+ +        throw;
+ +    }
+ +}
+ +
+ +void Net::Impl::getLayersShapes(
+ +        const ShapesVec& netInputShapes,
+ +        std::vector<int>& layersIds,
+ +        std::vector<ShapesVec>& inLayersShapes,
+ +        std::vector<ShapesVec>& outLayersShapes) /*const*/
+ +{
+ +    layersIds.clear();
+ +    inLayersShapes.clear();
+ +    outLayersShapes.clear();
+ +
+ +    Impl::LayersShapesMap inOutShapes;
+ +    getLayersShapes(netInputShapes, inOutShapes);
+ +
+ +    for (Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
+ +            it != inOutShapes.end(); it++)
+ +    {
+ +        layersIds.push_back(it->first);
+ +        inLayersShapes.push_back(it->second.in);
+ +        outLayersShapes.push_back(it->second.out);
+ +    }
+ +}
+ +
+ +
+ +void Net::Impl::getLayersShapes(const ShapesVec& netInputShapes,
+ +        LayersShapesMap& inOutShapes)
+ +{
+ +    inOutShapes.clear();
+ +
+ +    inOutShapes[0].in = netInputShapes;  // insert shape for first input layer
+ +    for (MapIdToLayerData::const_iterator it = layers.begin();
+ +            it != layers.end(); it++)
+ +    {
+ +        getLayerShapesRecursively(it->first, inOutShapes);
+ +    }
+ +}
+ +
+ +void Net::Impl::getLayerShapes(const ShapesVec& netInputShapes,
+ +        const int layerId,
+ +        LayerShapes& shapes)
+ +{
+ +    LayersShapesMap inOutShapes;
+ +    inOutShapes[0].in = netInputShapes;  // insert shape for first input layer
+ +    getLayerShapesRecursively(layerId, inOutShapes);
+ +    shapes = inOutShapes[layerId];
+ +}
+ +
+ +void Net::Impl::updateLayersShapes()
+ +{
+ +    CV_LOG_DEBUG(NULL, "updateLayersShapes() with layers.size=" << layers.size());
+ +    CV_Assert(netInputLayer);
+ +    DataLayer& inputLayer = *netInputLayer;
+ +    LayerData& inputLayerData = layers[0];
+ +    CV_Assert(inputLayerData.layerInstance.get() == &inputLayer);
+ +    CV_Assert(!inputLayerData.outputBlobs.empty());
+ +    ShapesVec inputShapes;
+ +    for (int i = 0; i < inputLayerData.outputBlobs.size(); i++)
+ +    {
+ +        Mat& inp = inputLayerData.outputBlobs[i];
+ +        CV_Assert(!inp.empty());
+ +        if (preferableBackend == DNN_BACKEND_OPENCV &&  // FIXIT: wrong place for output allocation
+ +            preferableTarget == DNN_TARGET_OPENCL_FP16 &&
+ +            inputLayerData.dtype == CV_32F)
+ +        {
+ +            inp.create(inp.dims, inp.size, CV_16S);
+ +        }
+ +        inputShapes.push_back(shape(inp));
+ +    }
+ +    CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes"));
+ +    LayersShapesMap layersShapes;
+ +    layersShapes[0].in = inputShapes;
+ +    for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
+ +    {
+ +        int layerId = it->first;
+ +        LayerData& layerData = it->second;
+ +        const std::vector<LayerPin>& inputLayerIds = layerData.inputBlobsId;
+ +        LayerShapes& layerShapes = layersShapes[layerId];
+ +        CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size());
+ +        if (layerShapes.in.empty())
+ +        {
+ +            for (int i = 0; i < inputLayerIds.size(); i++)
+ +            {
+ +                const LayerPin& inputPin = inputLayerIds[i];
+ +                int inputLayerId = inputPin.lid;
+ +                CV_LOG_DEBUG(NULL, "    input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")");
+ +                LayersShapesMap::const_iterator inputIt = layersShapes.find(inputLayerId);
+ +                if (inputIt == layersShapes.end() || inputIt->second.out.empty())
+ +                {
+ +                    getLayerShapesRecursively(inputLayerId, layersShapes);
+ +                }
+ +                const MatShape& shape = layersShapes[inputLayerId].out[inputPin.oid];
+ +                layerShapes.in.push_back(shape);
+ +            }
+ +            layerData.getLayerInstance()->updateMemoryShapes(layerShapes.in);
+ +        }
+ +        CV_LOG_DEBUG(NULL, "Layer " << layerId << ": " << toString(layerShapes.in, "input shapes"));
+ +        CV_LOG_IF_DEBUG(NULL, !layerShapes.out.empty(), "Layer " << layerId << ": " << toString(layerShapes.out, "output shapes"));
+ +        CV_LOG_IF_DEBUG(NULL, !layerShapes.internal.empty(), "Layer " << layerId << ": " << toString(layerShapes.internal, "internal shapes"));
+ +    }
+ +    CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE");
+ +}
+ +
+ +
+ +LayerPin Net::Impl::getLatestLayerPin(const std::vector<LayerPin>& pins) const
+ +{
+ +    return *std::max_element(pins.begin(), pins.end());
+ +}
+ +
+ +Mat Net::Impl::getBlob(const LayerPin& pin) const
+ +{
+ +    CV_TRACE_FUNCTION();
+ +
+ +    if (!pin.valid())
+ +        CV_Error(Error::StsObjectNotFound, "Requested blob not found");
+ +
+ +    MapIdToLayerData::const_iterator it = layers.find(pin.lid);
+ +    if (it == layers.end())
+ +        CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid (output #%d requested)", pin.lid, pin.oid));
+ +
+ +    const LayerData& ld = it->second;
+ +    if ((size_t)pin.oid >= ld.outputBlobs.size())
+ +    {
+ +        CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, "
+ +                                              "the #%d was requested",
+ +                                               ld.name.c_str(), ld.outputBlobs.size(), pin.oid));
+ +    }
+ +    if (preferableTarget != DNN_TARGET_CPU)
+ +    {
+ +        CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
+ +        // Transfer data to CPU if it's require.
+ +        ld.outputBlobsWrappers[pin.oid]->copyToHost();
+ +    }
+ +
+ +    if (ld.outputBlobs[pin.oid].depth() == CV_16S)
+ +    {
+ +        Mat output_blob;
+ +        convertFp16(ld.outputBlobs[pin.oid], output_blob);
+ +        return output_blob;
+ +    }
+ +    else
+ +        return ld.outputBlobs[pin.oid];
+ +}
+ +
+ +Mat Net::Impl::getBlob(String outputName) const
+ +{
+ +    return getBlob(getPinByAlias(outputName));
+ +}
+ +
+ +
+ +AsyncArray Net::Impl::getBlobAsync(const LayerPin& pin)
+ +{
+ +    CV_TRACE_FUNCTION();
+ +#ifdef HAVE_INF_ENGINE
+ +    if (!pin.valid())
+ +        CV_Error(Error::StsObjectNotFound, "Requested blob not found");
+ +
+ +    LayerData& ld = layers[pin.lid];
+ +    if ((size_t)pin.oid >= ld.outputBlobs.size())
+ +    {
+ +        CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
+ +                                              "the #%d was requested",
+ +                                               ld.name.c_str(), (int)ld.outputBlobs.size(), (int)pin.oid));
+ +    }
+ +    if (preferableTarget != DNN_TARGET_CPU)
+ +    {
+ +        CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
+ +        // Transfer data to CPU if it's require.
+ +        ld.outputBlobsWrappers[pin.oid]->copyToHost();
+ +    }
+ +    CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
+ +
+ +    Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
+ +    return std::move(wrapper->futureMat);
+ +#else
+ +    CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required");
+ +#endif  // HAVE_INF_ENGINE
+ +}
+ +
+ +
+ +AsyncArray Net::Impl::getBlobAsync(String outputName)
+ +{
+ +    return getBlobAsync(getPinByAlias(outputName));
+ +}
+ +
+ +
+ +void Net::Impl::setInputsNames(const std::vector<String>& inputBlobNames)
+ +{
+ +    CV_Assert(netInputLayer);
+ +    netInputLayer->setNames(inputBlobNames);
+ +}
+ +
+ +
+ +void Net::Impl::setInputShape(const String& inputName, const MatShape& shape)
+ +{
+ +    CV_Assert(netInputLayer);
+ +    netInputLayer->setInputShape(inputName, shape);
+ +}
+ +
+ +
+ +void Net::Impl::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
+ +{
+ +    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+ +
+ +    LayerPin pin;
+ +    pin.lid = 0;
+ +    pin.oid = resolvePinOutputName(getLayerData(pin.lid), name);
+ +
+ +    if (!pin.valid())
+ +        CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
+ +
+ +    Mat blob_ = blob.getMat();  // can't use InputArray directly due MatExpr stuff
+ +    MatShape blobShape = shape(blob_);
+ +
+ +    if (pin.lid == 0)
+ +    {
+ +        CV_Assert(!netInputLayer.empty());
+ +        const DataLayer& netInputLayer = *(this->netInputLayer);
+ +        if (!netInputLayer.shapes.empty())
+ +        {
+ +            CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), "");
+ +            const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid];
+ +            if (!inputShapeLimitation.empty())
+ +            {
+ +                CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), "");
+ +#if 0  // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0
+ +                const size_t dims = inputShapeLimitation.size();
+ +                for (size_t dim = 0; dim < dims; dim++)
+ +                {
+ +                    if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1)
+ +                        continue;  // don't limit batch
+ +                    CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], "");
+ +                }
+ +#endif
+ +            }
+ +        }
+ +    }
+ +
+ +    LayerData& ld = layers[pin.lid];
+ +    const int numInputs = std::max(pin.oid + 1, (int)ld.requiredOutputs.size());
+ +    ld.outputBlobs.resize(numInputs);
+ +    ld.outputBlobsWrappers.resize(numInputs);
+ +    netInputLayer->inputsData.resize(numInputs);
+ +    netInputLayer->scaleFactors.resize(numInputs);
+ +    netInputLayer->means.resize(numInputs);
+ +
+ +    MatShape prevShape = shape(netInputLayer->inputsData[pin.oid]);
+ +    bool oldShape = prevShape == blobShape;
+ +
+ +    blob_.copyTo(netInputLayer->inputsData[pin.oid]);
+ +    if (!oldShape)
+ +        ld.outputBlobs[pin.oid] = netInputLayer->inputsData[pin.oid];
+ +
+ +    if (!ld.outputBlobsWrappers[pin.oid].empty())
+ +    {
+ +        ld.outputBlobsWrappers[pin.oid]->setHostDirty();
+ +    }
+ +    netInputLayer->scaleFactors[pin.oid] = scalefactor;
+ +    netInputLayer->means[pin.oid] = mean;
+ +    netWasAllocated = netWasAllocated && oldShape;
+ +}
+ +
+ +
+ +Mat Net::Impl::getParam(int layer, int numParam) const
+ +{
+ +    LayerData& ld = getLayerData(layer);
+ +    std::vector<Mat>& layerBlobs = ld.getLayerInstance()->blobs;
+ +    CV_Assert(numParam < (int)layerBlobs.size());
+ +    return layerBlobs[numParam];
+ +}
+ +
+ +void Net::Impl::setParam(int layer, int numParam, const Mat& blob)
+ +{
+ +    LayerData& ld = getLayerData(layer);
+ +
+ +    std::vector<Mat>& layerBlobs = ld.getLayerInstance()->blobs;
+ +    CV_Assert(numParam < (int)layerBlobs.size());
+ +    // we don't make strong checks, use this function carefully
+ +    layerBlobs[numParam] = blob;
+ +}
+ +
+ +
+ +static
+ +string dumpLayerParameterSize(const string& name, const LayerParams& lp)
+ +{
+ +    std::ostringstream out(name, std::ios::ate);
+ +    DictValue param = lp.get(name);
+ +    switch (param.size())
+ +    {
+ +    case 1: out << " : "; break;
+ +    case 2: out << " (HxW): "; break;
+ +    case 3: out << " (DxHxW): "; break;
+ +    default:
+ +        CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size()));
+ +        out << ": ";
+ +    }
+ +    for (size_t i = 0; i < param.size(); i++)
+ +    {
+ +        if (i > 0)
+ +            out << " x ";
+ +        out << param.get<int>(i);
+ +    }
+ +    return out.str();
+ +}
+ +
+ +string Net::Impl::dump(bool forceAllocation) const
+ +{
+ +    bool hasInput = !netInputLayer->inputsData.empty();
+ +    if (forceAllocation)
+ +    {
+ +        if (!netWasAllocated)
+ +            const_cast<Net::Impl*>(this)->setUpNet();
+ +    }
+ +
+ +    std::ostringstream out;
+ +    const std::map<int, LayerData>& map = layers;
+ +
+ +    Backend prefBackend = (Backend)preferableBackend;
+ +    std::vector<std::vector<int>> skippedLayers;
+ +    std::vector<int> skipId;
+ +    std::vector<int> allLayers(map.size(), -1);
+ +    int idPrev = -1;
+ +    Ptr<BackendNode> prevNode;
+ +    for (std::map<int, LayerData>::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit)
+ +    {
+ +        std::map<int, Ptr<BackendNode>>::const_iterator itBackend = rit->second.backendNodes.find(prefBackend);
+ +        if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() || itBackend->second.empty())
+ +        {
+ +            if (rit->second.skip)
+ +                skipId.push_back(rit->first);
+ +            else if (!skipId.empty())
+ +            {
+ +                if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty())
+ +                    skipId.push_back(rit->first);
+ +                else if (idPrev != -1)
+ +                    skipId.push_back(idPrev);
+ +
+ +                std::sort(skipId.begin(), skipId.end());
+ +                for (int i = 0; i < skipId.size(); i++)
+ +                {
+ +                    allLayers[skipId[i]] = skippedLayers.size();
+ +                }
+ +                skippedLayers.push_back(skipId);
+ +                skipId.clear();
+ +            }
+ +        }
+ +        else
+ +        {
+ +            if (itBackend->second == prevNode)
+ +                skipId.push_back(idPrev);
+ +            else if (!skipId.empty())
+ +            {
+ +                skipId.push_back(idPrev);
+ +                std::sort(skipId.begin(), skipId.end());
+ +                for (int i = 0; i < skipId.size(); i++)
+ +                {
+ +                    allLayers[skipId[i]] = skippedLayers.size();
+ +                }
+ +                skippedLayers.push_back(skipId);
+ +                skipId.clear();
+ +            }
+ +            idPrev = rit->first;
+ +            prevNode = itBackend->second;
+ +        }
+ +    }
+ +    std::vector<string> colors = { "#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff" };
+ +    string backend;
+ +    switch (prefBackend)
+ +    {
+ +    case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break;
+ +    case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break;
+ +    case DNN_BACKEND_INFERENCE_ENGINE:  // fallthru
+ +    case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019:  // fallthru
+ +    case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "OpenVINO/"; break;
+ +    case DNN_BACKEND_OPENCV: backend = "OCV/"; break;
+ +    case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break;
+ +    case DNN_BACKEND_CUDA: backend = "CUDA/"; break;
+ +    case DNN_BACKEND_WEBNN:
+ +        backend = "WEBNN/";
+ +        break;
+ +        // don't use default:
+ +    }
+ +    out << "digraph G {\n";
+ +    // Add nodes
+ +    for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
+ +    {
+ +        const LayerData& ld = it->second;
+ +        string name = ld.params.name;
+ +        std::vector<int> clusterIds(1, it->first);
+ +        if (allLayers[it->first] == -1 && !name.empty())
+ +        {
+ +            out << "\t\"" << name << "\" [label=\"";
+ +        }
+ +        else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0])
+ +        {
+ +            continue;
+ +        }
+ +        else  // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0]
+ +        {
+ +            int cluster = allLayers[it->first];
+ +            out << "\t\""
+ +                << "cluster_" << cluster << "\" [label=\"{";
+ +            clusterIds = skippedLayers[allLayers[it->first]];  // vertices in current cluster
+ +        }
+ +        for (int i = 0; i < clusterIds.size(); i++)
+ +        {
+ +            CV_DbgAssert(map.find(clusterIds[i]) != map.end());
+ +            const LayerParams& lp = map.find(clusterIds[i])->second.params;
+ +            if (!lp.name.empty())
+ +            {
+ +                if (i > 0)
+ +                {
+ +                    out << " | ";
+ +                }
+ +                out << lp.name << "\\n"
+ +                    << lp.type << "\\n";  // align center
+ +                if (lp.has("kernel_size"))
+ +                {
+ +                    string kernel = dumpLayerParameterSize("kernel_size", lp);
+ +                    out << kernel;
+ +                    out << "\\l";  // align left
+ +                }
+ +                else if (lp.has("kernel_h") && lp.has("kernel_w"))
+ +                {
+ +                    DictValue h = lp.get("kernel_h");
+ +                    DictValue w = lp.get("kernel_w");
+ +                    out << "kernel (HxW): " << h << " x " << w;
+ +                    out << "\\l";  // align left
+ +                }
+ +                if (lp.has("stride"))
+ +                {
+ +                    string stride = dumpLayerParameterSize("stride", lp);
+ +                    out << stride;
+ +                    out << "\\l";  // align left
+ +                }
+ +                else if (lp.has("stride_h") && lp.has("stride_w"))
+ +                {
+ +                    DictValue h = lp.get("stride_h");
+ +                    DictValue w = lp.get("stride_w");
+ +                    out << "stride (HxW): " << h << " x " << w;
+ +                    out << "\\l";  // align left
+ +                }
+ +                if (lp.has("dilation"))
+ +                {
+ +                    string dilation = dumpLayerParameterSize("dilation", lp);
+ +                    out << dilation;
+ +                    out << "\\l";  // align left
+ +                }
+ +                else if (lp.has("dilation_h") && lp.has("dilation_w"))
+ +                {
+ +                    DictValue h = lp.get("dilation_h");
+ +                    DictValue w = lp.get("dilation_w");
+ +                    out << "dilation (HxW): " << h << " x " << w;
+ +                    out << "\\l";  // align left
+ +                }
+ +                if (lp.has("pad"))
+ +                {
+ +                    DictValue pad = lp.get("pad");
+ +                    out << "pad ";
+ +                    switch (pad.size())
+ +                    {
+ +                    case 1: out << ": " << pad; break;
+ +                    case 2:
+ +                        out << "(HxW): (" << pad.get<int>(0) << " x " << pad.get<int>(1) << ")";
+ +                        break;
+ +                    case 4:
+ +                        out << "(HxW): (" << pad.get<int>(0) << ", " << pad.get<int>(2)
+ +                            << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(3) << ")";
+ +                        break;
+ +                    case 6:
+ +                        out << "(DxHxW): (" << pad.get<int>(0) << ", " << pad.get<int>(3)
+ +                            << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(4)
+ +                            << ") x (" << pad.get<int>(2) << ", " << pad.get<int>(5) << ")";
+ +                        break;
+ +                    default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size()));
+ +                    }
+ +                    out << "\\l";  // align left
+ +                }
+ +                else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b"))
+ +                {
+ +                    DictValue l = lp.get("pad_l");
+ +                    DictValue t = lp.get("pad_t");
+ +                    DictValue r = lp.get("pad_r");
+ +                    DictValue b = lp.get("pad_b");
+ +                    out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")";
+ +                    out << "\\l";  // align left
+ +                }
+ +                else if (lp.has("pooled_w") || lp.has("pooled_h"))
+ +                {
+ +                    DictValue h = lp.get("pooled_h");
+ +                    DictValue w = lp.get("pooled_w");
+ +                    out << "pad pooled (HxW): " << h << " x " << w;
+ +                    out << "\\l";  // align left
+ +                }
+ +                if (lp.has("pool"))
+ +                {
+ +                    out << "pool: " << lp.get("pool");
+ +                    out << "\\l";  // align left
+ +                }
+ +                if (lp.has("global_pooling"))
+ +                {
+ +                    out << "global_pooling: " << lp.get("global_pooling");
+ +                    out << "\\l";  // align left
+ +                }
+ +                if (lp.has("group"))
+ +                {
+ +                    out << "group: " << lp.get("group");
+ +                    out << "\\l";  // align left
+ +                }
+ +            }
+ +        }
+ +        if (!ld.outputBlobs.empty())
+ +        {
+ +            out << "output: " << ld.outputBlobs[0].size;
+ +            out << "\\l";  // align left
+ +        }
+ +
+ +        Ptr<BackendNode> layerBackend;
+ +        std::map<int, Ptr<BackendNode>>::const_iterator ibn = ld.backendNodes.find(prefBackend);
+ +        if (ibn != ld.backendNodes.end())
+ +            layerBackend = ibn->second;
+ +        out << (!layerBackend.empty() ? backend : "OCV/");
+ +        int colorId = 0;
+ +        const Target target = ld.layerInstance.empty()
+ +                ? DNN_TARGET_CPU
+ +                : (Target)(ld.layerInstance->preferableTarget);  // TODO fix preferableTarget type
+ +        switch (target)
+ +        {
+ +        case DNN_TARGET_CPU:
+ +            out << "CPU";
+ +            colorId = layerBackend.empty() ? 0 : 5;
+ +            break;
+ +        case DNN_TARGET_OPENCL:
+ +            out << "OCL";
+ +            colorId = 1;
+ +            break;
+ +        case DNN_TARGET_OPENCL_FP16:
+ +            out << "OCL_FP16";
+ +            colorId = 2;
+ +            break;
+ +        case DNN_TARGET_MYRIAD:
+ +            out << "MYRIAD";
+ +            colorId = 3;
+ +            break;
+ +        case DNN_TARGET_HDDL:
+ +            out << "HDDL";
+ +            colorId = 8;
+ +            break;
+ +        case DNN_TARGET_VULKAN:
+ +            out << "VULKAN";
+ +            colorId = 7;
+ +            break;
+ +        case DNN_TARGET_FPGA:
+ +            out << "FPGA";
+ +            colorId = 4;
+ +            break;
+ +        case DNN_TARGET_CUDA:
+ +            out << "CUDA";
+ +            colorId = 5;
+ +            break;
+ +        case DNN_TARGET_CUDA_FP16:
+ +            out << "CUDA_FP16";
+ +            colorId = 6;
+ +            break;
+ +            // don't use default:
+ +        }
+ +        CV_Assert(colorId < colors.size());
+ +        out << "\\n";  // align center
+ +        out << ((clusterIds.size() == 1) ? "\" " : " }\" ");
+ +        out << "fillcolor=\"" << colors[colorId] << "\" ";
+ +        out << "style=filled ";
+ +        out << "shape=" << ((clusterIds.size() == 1) ? "box" : "record") << "]\n";
+ +    }
+ +    out << '\n';
+ +    // Add edges
+ +    int inputsSize = hasInput ? netInputLayer->outNames.size() : 0;
+ +    for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
+ +    {
+ +        const LayerData& ld = it->second;
+ +        if (allLayers[it->first] == -1)  // node
+ +        {
+ +            for (int i = 0; i < ld.consumers.size(); i++)
+ +            {
+ +                int outId = ld.consumers[i].lid;
+ +                if (it == map.begin() && inputsSize > 1)
+ +                    out << "\t\"" << ld.name << "_" << i << "\""
+ +                        << " -> ";
+ +                else
+ +                    out << "\t\"" << ld.name << "\""
+ +                        << " -> ";
+ +                if (allLayers[outId] == -1)  // node
+ +                {
+ +                    CV_DbgAssert(map.find(outId) != map.end());
+ +                    out << "\"" << map.find(outId)->second.name << "\"\n";
+ +                }
+ +                else  // cluster
+ +                {
+ +                    out << "\""
+ +                        << "cluster_" << allLayers[outId] << "\"\n";
+ +                }
+ +            }
+ +        }
+ +        else if (it->first == skippedLayers[allLayers[it->first]].back())  // edges from last layer in cluster
+ +        {
+ +            for (int i = 0; i < ld.consumers.size(); i++)
+ +            {
+ +                int outId = ld.consumers[i].lid;
+ +                if (allLayers[outId] == -1)  // node
+ +                {
+ +                    CV_DbgAssert(map.find(outId) != map.end());
+ +                    out << "\t\""
+ +                        << "cluster_" << allLayers[it->first] << "\""
+ +                        << " -> ";
+ +                    out << "\"" << map.find(outId)->second.name << "\"\n";
+ +                }
+ +                else if (allLayers[outId] != allLayers[it->first])
+ +                {  // another cluster
+ +                    out << "\t\""
+ +                        << "cluster_" << allLayers[it->first] << "\""
+ +                        << " -> ";
+ +                    out << "\""
+ +                        << "cluster_" << allLayers[outId] << "\"\n";
+ +                }
+ +            }
+ +        }
+ +    }
+ +    out << "}\n";
+ +    return out.str();
+ +}
+ +
+ +
+ +void Net::Impl::dumpNetworkToFile() const
+ +{
+ +#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
+ +    string dumpFileNameBase = getDumpFileNameBase();
+ +    string dumpFileName = dumpFileNameBase + ".dot";
+ +    try
+ +    {
+ +        string dumpStr = dump();
+ +        std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary);
+ +        out << dumpStr;
+ +    }
+ +    catch (const std::exception& e)
+ +    {
+ +        std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
+ +        out << "Exception: " << e.what() << std::endl;
+ +    }
+ +    catch (...)
+ +    {
+ +        std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
+ +        out << "Can't dump: unknown exception" << std::endl;
+ +    }
+ +#endif
+ +}
+ +
+ +
+ +std::vector<Ptr<Layer>> Net::Impl::getLayerInputs(int layerId) const
+ +{
+ +    LayerData& ld = getLayerData(layerId);
+ +
+ +    std::vector<Ptr<Layer>> inputLayers;
+ +    inputLayers.reserve(ld.inputBlobsId.size());
+ +    for (int i = 0; i < ld.inputBlobsId.size(); ++i)
+ +    {
+ +        inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid));
+ +    }
+ +    return inputLayers;
+ +}
+ +
+ +std::vector<String> Net::Impl::getLayerNames() const
+ +{
+ +    std::vector<String> res;
+ +    res.reserve(layers.size());
+ +
+ +    Impl::MapIdToLayerData::const_iterator it;
+ +    for (it = layers.begin(); it != layers.end(); it++)
+ +    {
+ +        if (it->second.id)  // skip Data layer
+ +            res.push_back(it->second.name);
+ +    }
+ +
+ +    return res;
+ +}
+ +
+ +
+ +// FIXIT drop "unconnected" API
+ +std::vector<int> Net::Impl::getUnconnectedOutLayers() const
+ +{
+ +    std::vector<int> layersIds;
+ +
+ +    // registerOutput() flow
+ +    if (!outputNameToId.empty())
+ +    {
+ +        for (std::map<std::string, int>::const_iterator it = outputNameToId.begin(); it != outputNameToId.end(); ++it)
+ +        {
+ +            layersIds.push_back(it->second);
+ +        }
+ +        return layersIds;
+ +    }
+ +
+ +    Impl::MapIdToLayerData::const_iterator it;
+ +    for (it = layers.begin(); it != layers.end(); it++)
+ +    {
+ +        int lid = it->first;
+ +        const LayerData& ld = it->second;
+ +
+ +        if (ld.requiredOutputs.size() == 0)
+ +            layersIds.push_back(lid);
+ +    }
+ +
+ +    return layersIds;
+ +}
+ +
+ +
+ +// FIXIT drop "unconnected" API
+ +std::vector<String> Net::Impl::getUnconnectedOutLayersNames() /*const*/
+ +{
+ +    std::vector<int> ids = getUnconnectedOutLayers();
+ +    const size_t n = ids.size();
+ +    std::vector<String> names(n);
+ +    for (size_t i = 0; i < n; ++i)
+ +    {
+ +        names[i] = layers[ids[i]].name;
+ +    }
+ +    return names;
+ +}
+ +
+ +
+ +int64 Net::Impl::getFLOPS(const std::vector<MatShape>& netInputShapes) /*const*/
+ +{
+ +    int64 flops = 0;
+ +    std::vector<int> ids;
+ +    std::vector<std::vector<MatShape>> inShapes, outShapes;
+ +    getLayersShapes(netInputShapes, ids, inShapes, outShapes);
+ +    CV_Assert(inShapes.size() == outShapes.size());
+ +    CV_Assert(inShapes.size() == ids.size());
+ +
+ +    for (int i = 0; i < ids.size(); i++)
+ +    {
+ +        flops += layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i], outShapes[i]);
+ +    }
+ +
+ +    return flops;
+ +}
+ +
+ +
+ +int64 Net::Impl::getFLOPS(
+ +        const int layerId,
+ +        const std::vector<MatShape>& netInputShapes) /*const*/
+ +{
+ +    Impl::MapIdToLayerData::const_iterator layer = layers.find(layerId);
+ +    CV_Assert(layer != layers.end());
+ +
+ +    LayerShapes shapes;
+ +    getLayerShapes(netInputShapes, layerId, shapes);
+ +
+ +    return const_cast<LayerData&>(layer->second).getLayerInstance()->getFLOPS(shapes.in, shapes.out);
+ +}
+ +
+ +
+ +void Net::Impl::getMemoryConsumption(
+ +        const int layerId,
+ +        const std::vector<MatShape>& netInputShapes,
+ +        size_t& weights, size_t& blobs) /*const*/
+ +{
+ +    Impl::MapIdToLayerData::const_iterator layer = layers.find(layerId);
+ +    CV_Assert(layer != layers.end());
+ +
+ +    weights = blobs = 0;
+ +
+ +    for (int i = 0; i < layer->second.params.blobs.size(); i++)
+ +    {
+ +        const Mat& weightsBlob = layer->second.params.blobs[i];
+ +        weights += weightsBlob.total() * weightsBlob.elemSize();
+ +    }
+ +
+ +    LayerShapes shapes;
+ +    getLayerShapes(netInputShapes, layerId, shapes);
+ +    const ShapesVec& outLayerShapes = shapes.out;
+ +
+ +    // FIXIT netWasQuantized check is not enough - per layer check should be done
+ +    size_t elemSize = netWasQuantized ? sizeof(char) : sizeof(float);
+ +    for (int i = 0; i < outLayerShapes.size(); i++)
+ +    {
+ +        blobs += total(outLayerShapes[i]) * elemSize;
+ +    }
+ +}
+ +
+ +
+ +void Net::Impl::getMemoryConsumption(
+ +        const std::vector<MatShape>& netInputShapes,
+ +        size_t& weights, size_t& blobs) /*const*/
+ +{
+ +    std::vector<int> layerIds;
+ +    std::vector<size_t> w, b;
+ +    getMemoryConsumption(netInputShapes, layerIds, w, b);
+ +
+ +    weights = blobs = 0;
+ +    for (int i = 0; i < layerIds.size(); i++)
+ +    {
+ +        weights += w[i];
+ +        blobs += b[i];
+ +    }
+ +}
+ +
+ +
+ +int64 Net::Impl::getPerfProfile(std::vector<double>& timings) const
+ +{
+ +    timings = std::vector<double>(layersTimings.begin() + 1, layersTimings.end());
+ +    int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0);
+ +    return total;
+ +}
+ +
+ +void Net::Impl::getMemoryConsumption(
+ +        const std::vector<MatShape>& netInputShapes,
+ +        std::vector<int>& layerIds, std::vector<size_t>& weights,
+ +        std::vector<size_t>& blobs) /*const*/
+ +{
+ +    layerIds.clear();
+ +    weights.clear();
+ +    blobs.clear();
+ +
+ +    std::vector<std::vector<MatShape>> inLayerShapes, outLayerShapes;
+ +
+ +    getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
+ +    // FIXIT netWasQuantized check is not enough - per layer check should be done
+ +    size_t elemSize = netWasQuantized ? sizeof(char) : sizeof(float);
+ +    for (int i = 0; i < layerIds.size(); i++)
+ +    {
+ +        int w = 0, b = 0;
+ +        Impl::MapIdToLayerData::const_iterator layer = layers.find(layerIds[i]);
+ +        CV_Assert(layer != layers.end());
+ +
+ +        for (int j = 0; j < layer->second.params.blobs.size(); j++)
+ +        {
+ +            const Mat& weightsBlob = layer->second.params.blobs[j];
+ +            w += weightsBlob.total() * weightsBlob.elemSize();
+ +        }
+ +
+ +        for (int j = 0; j < outLayerShapes[i].size(); j++)
+ +        {
+ +            b += total(outLayerShapes[i][j]) * elemSize;
+ +        }
+ +
+ +        weights.push_back(w);
+ +        blobs.push_back(b);
+ +    }
+ +}
+ +
+ +
+ +// TODO drop?
+ +void Net::Impl::getLayerTypes(std::vector<String>& layersTypes) const
+ +{
+ +    layersTypes.clear();
+ +
+ +    std::map<String, int> layers_type_map;
+ +    for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
+ +    {
+ +        if (layers_type_map.find(it->second.type) == layers_type_map.end())
+ +            layers_type_map[it->second.type] = 0;
+ +        layers_type_map[it->second.type]++;
+ +    }
+ +
+ +    for (std::map<String, int>::const_iterator it = layers_type_map.begin(); it != layers_type_map.end(); it++)
+ +    {
+ +        layersTypes.push_back(it->first);
+ +    }
+ +}
+ +
+ +
+ +// TODO drop?
+ +int Net::Impl::getLayersCount(const String& layerType) const
+ +{
+ +    int count = 0;
+ +    for (Impl::MapIdToLayerData::const_iterator it = layers.begin();
+ +            it != layers.end(); it++)
+ +    {
+ +        if (it->second.type == layerType)
+ +            count++;
+ +    }
+ +    return count;
+ +}
+ +
+ +
+ +CV__DNN_INLINE_NS_END
+ +}}  // namespace cv::dnn
diff --cc modules/dnn/src/onnx/onnx_importer.cpp

index 5713c02,e755226..b43bb5a
--- 1/modules/dnn/src/onnx/onnx_importer.cpp
--- 2/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@@ -79,9 -65,62 +79,17 @@@ class ONNXImporte
       void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
                      const std::string& input, size_t n);
       void addNegation(const LayerParams& layerParams, opencv_onnx::NodeProto& node_proto, int input_id);
+     void lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size);
+     void lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n);
+     std::string lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n);
+     std::string lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto,
+                               int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name,
+                               const int index);
+     void lstm_add_transform(int num_directions, int batch_size, int hidden_size,
+                             int index, const std::string& input_name, const std::string& output_name);
   public:
- -
- -    ONNXImporter(Net& net, const char *onnxFile)
- -        : dstNet(net), dispatch(buildDispatchMap())
- -        , onnx_opset(0)
- -    {
- -        hasDynamicShapes = false;
- -        CV_Assert(onnxFile);
- -        CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);
- -
- -        std::fstream input(onnxFile, std::ios::in | std::ios::binary);
- -        if (!input)
- -        {
- -            CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", onnxFile));
- -        }
- -
- -        if (!model_proto.ParseFromIstream(&input))
- -        {
- -            CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX model: %s", onnxFile));
- -        }
- -
- -        populateNet();
- -    }
- -
- -    ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
- -        : dstNet(net), dispatch(buildDispatchMap())
- -        , onnx_opset(0)
- -    {
- -        hasDynamicShapes = false;
- -        CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
- -
- -        struct _Buf : public std::streambuf
- -        {
- -            _Buf(const char* buffer, size_t sizeBuffer)
- -            {
- -                char* p = const_cast<char*>(buffer);
- -                setg(p, p, p + sizeBuffer);
- -            }
- -        };
- -
- -        _Buf buf(buffer, sizeBuffer);
- -        std::istream input(&buf);
- -
- -        if (!model_proto.ParseFromIstream(&input))
- -            CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array.");
- -
- -        populateNet();
- -    }
+ +    ONNXImporter(Net& net, const char *onnxFile);
+ +    ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer);
   
       void populateNet();
   
@@@ -1627,72 -1354,245 +1614,286 @@@ void transformBlobs(std::vector<Mat>& b
       Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
       Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
   
+     blobs[0] = Wh;
+     blobs[1] = Wx;
+     blobs[2] = b.reshape(1, 1);
+     blobs[3] = h0;
+     blobs[4] = c0;
   
-     lstmParams.blobs.resize(5);
-     lstmParams.blobs[0] = Wh;
-     lstmParams.blobs[1] = Wx;
-     lstmParams.blobs[2] = b;
-     lstmParams.blobs[3] = h0;
-     lstmParams.blobs[4] = c0;
+     if (blobs.size() == 5) {
+         // so that future patch removing copies can leave all indexing as is
+         blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end());
+         return;
+     }
   
-     // read direction attribute
-     lstmParams.set("reverse", lstmParams.get<String>("direction", "") == "reverse");
-     lstmParams.set("bidirectional", lstmParams.get<String>("direction", "") == "bidirectional");
+     Mat P = blobs[5];
+     blobs[5] = P.colRange(0, numHidden);
+     blobs[5] = blobs[5].clone().reshape(1, blobs[5].total());  // Single column.
+     blobs[5] = Mat::diag(blobs[5]);
   
-     node_proto.set_output(0, lstmParams.name);  // set different name so output shapes will be registered on that name
-     addLayer(lstmParams, node_proto);
+     blobs.push_back(P.colRange(numHidden, 2 * numHidden));
+     blobs[6] = blobs[6].clone().reshape(1, blobs[6].total());  // Single column.
+     blobs[6] = Mat::diag(blobs[6]);
   
-     MatShape lstmShape = outShapes[node_proto.output(0)];
+     blobs.push_back(P.colRange(2 * numHidden, 3 * numHidden));
+     blobs[7] = blobs[7].clone().reshape(1, blobs[7].total());  // Single column.
+     blobs[7] = Mat::diag(blobs[7]);
   
-     // Add fake 1 as it is done in ONNX
-     lstmShape.insert(lstmShape.begin() + 1, 1);
+     // so that future patch removing copies can leave all indexing as is
+     blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end());
+ }
   
-     layerParams.type = "Reshape";
-     layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size()));
-     node_proto.set_input(0, lstmParams.name);  // redirect input to LSTM
-     node_proto.set_output(0, output_name);  // keep origin LSTM's name
-     addLayer(layerParams, node_proto);
+ void ONNXImporter::lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size)
+ {
+         MatShape blobShape(blobShape_, blobShape_ + size);
+         Mat blob;
+         if (idx < lstm_proto.input_size() && !lstm_proto.input(idx).empty())
+         {
+             blob = getBlob(lstm_proto, idx);
+             CV_Assert(shape(blob) == blobShape);
+         }
+         else
+         {
+             blob = Mat(blobShape, CV_32FC1, 0.);
+         }
+         layerParams.blobs.push_back(blob);
+ };
+ 
+ void ONNXImporter::lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n)
+ {
+     LayerParams reshapeLp;
+     reshapeLp.name = cv::format("%s/reshape", input_name.c_str());
+     reshapeLp.type = "Reshape";
+     CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
+ 
+     reshapeLp.set("dim", DictValue::arrayInt(layerShape, n));
+ 
+     opencv_onnx::NodeProto reshape_proto;
+     reshape_proto.add_input(input_name);
+     reshape_proto.add_output(output_name);
+     addLayer(reshapeLp, reshape_proto);
+ };
+ 
+ std::string ONNXImporter::lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n)
+ {
+     LayerParams sliceLP;
+     sliceLP.name = cv::format("%s/slice_%d", input_name.c_str(), index);
+     sliceLP.type = "Slice";
+     CV_Assert(layer_id.find(sliceLP.name) == layer_id.end());
+ 
+     sliceLP.set("begin", DictValue::arrayInt(begin, n));
+     sliceLP.set("end", DictValue::arrayInt(end, n));
+     sliceLP.set("axis", 0);
+ 
+     opencv_onnx::NodeProto slice_proto;
+     slice_proto.add_input(input_name);
+     slice_proto.add_output(sliceLP.name);
+     addLayer(sliceLP, slice_proto);
+ 
+     return slice_proto.output(0);
+ };
+ 
+ std::string ONNXImporter::lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto,
+                                         int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name,
+                                         const int index)
+ {
+     std::string reshape_output = cv::format("%s/reshape_%d", layerParams.name.c_str(), index);
+ 
+     // reshape from Seq, Batch, Dirs*Hidden to Seq, Batch, Dirs, Hidden
+     // to not confuse reshape with dynamic first dimension, zero means 'leave unchanged'
+     int layerShape[] = {0, batch_size, num_directions, hidden_size};
+     lstm_add_reshape(lstm_proto.output(index), reshape_output, layerShape, sizeof(layerShape) / sizeof(layerShape[0]));
+ 
+     // permute from Seq, Batch, Dirs, Hidden to Seq, Dirs, Batch, Hidden
+     LayerParams permuteLP;
+     permuteLP.name = reshape_output + "/permute";
+     permuteLP.type = "Permute";
+     CV_Assert(layer_id.find(permuteLP.name) == layer_id.end());
+ 
+     int order[] = {0, 2, 1, 3};
+     permuteLP.set("order", DictValue::arrayInt(order, 4));
+ 
+     opencv_onnx::NodeProto permute_proto;
+     permute_proto.add_input(reshape_output);
+     permute_proto.add_output((need_y && index == 0) ? y_name : static_cast<std::string>(permuteLP.name));
+     addLayer(permuteLP, permute_proto);
+ 
+     return permute_proto.output(0);
+ };
+ 
+ void ONNXImporter::lstm_add_transform(int num_directions, int batch_size, int hidden_size,
+                                       int index, const std::string& input_name, const std::string& output_name)
+ {
+     if (num_directions == 1)
+     {
+         // Slice: Yh = Y[-1, :, :, :]
+         int begin[] = {-1}, end[] = {INT_MAX};
+         std::string slice_output = lstm_add_slice(index, input_name, begin, end, sizeof(begin) / sizeof(begin[0]));
+ 
+         // Reshape: 1x1xBxH -> 1xBxH
+         int layerShape[] = {1, batch_size, hidden_size};
+         lstm_add_reshape(slice_output, output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0]));
+     }
+     else
+     {
+         // Slice: SxDxBxH -> last sequence, first direction
+         int begin0[] = {-1, 0}, end0[] = {INT_MAX, 1};
+         std::string slice_0 = lstm_add_slice(0, input_name, begin0, end0, sizeof(begin0) / sizeof(begin0[0]));
+ 
+         // Slice: SxDxBxH -> first sequence, last direction
+         int begin1[] = {0, -1}, end1[] = {1, INT_MAX};
+         std::string slice_1 = lstm_add_slice(1, input_name, begin1, end1, sizeof(begin1) / sizeof(begin1[0]));
+ 
+         LayerParams concatLP;
+         concatLP.name = cv::format("%s/concat", input_name.c_str());
+         concatLP.type = "Concat";
+         CV_Assert(layer_id.find(concatLP.name) == layer_id.end());
+ 
+         concatLP.set("axis", 1); // 1x1xBxH -> 1x2xBxH
+ 
+         opencv_onnx::NodeProto concat_proto;
+         concat_proto.add_input(slice_0);
+         concat_proto.add_input(slice_1);
+         concat_proto.add_output(concatLP.name);
+         addLayer(concatLP, concat_proto);
+ 
+         // Reshape: 1x2xBxH -> 2xBxH
+         int layerShape[] = {2, batch_size, hidden_size};
+         lstm_add_reshape(concat_proto.output(0), output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0]));
+     }
+ };
+ 
+ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
+ {
+     opencv_onnx::NodeProto lstm_proto = node_proto_;
+     layerParams.name += "/lstm";
+ 
+     // https://github.com/onnx/onnx/blob/main/docs/Operators.md#LSTM
+     CV_Assert(lstm_proto.input_size() >= 3);
+     for (size_t i = 1; i < 3; ++i)
+     {
+         const std::string& name = lstm_proto.input(i);
+         CV_Assert(!name.empty() && constBlobs.count(name) == 1);
+     }
+ 
+     IterShape_t shapeIt = outShapes.find(lstm_proto.input(0));
+     CV_Assert(shapeIt != outShapes.end());
+     const MatShape x_shape = shapeIt->second;
+ 
+     const int seq_length = x_shape[0];
+     const int batch_size = x_shape[1];
+     const int input_size = x_shape[2];
+     const int hidden_size = layerParams.get<int>("hidden_size");
+     const int num_directions = constBlobs[lstm_proto.input(1)].size[0];
+ 
+     int w_size[] = {num_directions, 4*hidden_size, input_size};
+     lstm_extractConsts(layerParams, lstm_proto, 1, w_size, sizeof(w_size) / sizeof(w_size[0])); // W
+ 
+     int r_size[] =  {num_directions, 4*hidden_size, hidden_size};
+     lstm_extractConsts(layerParams, lstm_proto, 2, r_size, sizeof(r_size) / sizeof(r_size[0])); // R
+ 
+     int b_size[] = {num_directions, 8*hidden_size};
+     lstm_extractConsts(layerParams, lstm_proto, 3, b_size, sizeof(b_size) / sizeof(b_size[0])); // B
+ 
+     if (4 < lstm_proto.input_size() && !lstm_proto.input(4).empty())
+     {
+         Mat blob = getBlob(lstm_proto, 4);
+         CV_Assert(blob.total() == batch_size);
+         for (MatIterator_<int32_t> it = blob.begin<int32_t>(); it != blob.end<int32_t>(); ++it)
+         {
+             CV_Assert(*it == seq_length);
+         }
+     }
+ 
+     int h_size[] = {num_directions, batch_size, hidden_size};
+     lstm_extractConsts(layerParams, lstm_proto, 5, h_size, sizeof(h_size) / sizeof(h_size[0])); // initial_h
+ 
+     int c_size[] = {num_directions, batch_size, hidden_size};
+     lstm_extractConsts(layerParams, lstm_proto, 6, c_size, sizeof(c_size) / sizeof(c_size[0])); // initial_c
+ 
+     if (lstm_proto.input_size() > 7 && !lstm_proto.input(7).empty())
+     {
+         layerParams.set("use_peephole", true);
+         int p_size[] = {num_directions, 3 * hidden_size};
+         lstm_extractConsts(layerParams, lstm_proto, 7, p_size, sizeof(p_size) / sizeof(p_size[0])); // P
+     }
+ 
+     transformBlobs(layerParams.blobs);
+ 
+     layerParams.set("is_onnx", true);
+     layerParams.set("reverse", layerParams.get<String>("direction", "") == "reverse");
+     layerParams.set("bidirectional", layerParams.get<String>("direction", "") == "bidirectional");
+ 
+     bool need_yc = lstm_proto.output_size() > 2 && !lstm_proto.output(2).empty();
+     bool need_yh = lstm_proto.output_size() > 1 && !lstm_proto.output(1).empty();
+     bool need_y = lstm_proto.output_size() > 0 && !lstm_proto.output(0).empty();
+ 
+     const std::string y_name = need_y ? lstm_proto.output(0) : "";
+     const std::string yh_name = need_yh ? lstm_proto.output(1) : "";
+     const std::string yc_name = need_yc ? lstm_proto.output(2) : "";
+ 
+     layerParams.set("produce_cell_output", need_yc);
+ 
+     lstm_proto.clear_output();
+     if (need_y || need_yh)
+     {
+         // give random names to LSTMLayer's outputs because every output needs postprocessing
+         lstm_proto.add_output(cv::format("%s_y", layerParams.name.c_str()));
+     }
+     if (need_yc)
+     {
+         lstm_proto.add_output(yc_name);
+     }
+ 
- -    addLayer(layerParams, lstm_proto);
++    addLayer(layerParams, lstm_proto);
++
++    std::string y_output = lstm_fix_dims(layerParams, lstm_proto, batch_size, num_directions, hidden_size, need_y,
++                                         y_name, 0);
++    if (need_yh)
++    {
++        lstm_add_transform(num_directions, batch_size, hidden_size, 0, y_output, yh_name);
++    }
+ +}
+ +
+ +void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
+ +{
+ +    opencv_onnx::NodeProto node_proto = node_proto_;
+ +    const std::string output_name = node_proto.output(0);
+ +    LayerParams gruParams = layerParams;
+ +    gruParams.name += "/gru";
+ +
+ +    // https://pytorch.org/docs/stable/generated/torch.nn.GRU.html?highlight=gru#
+ +    CV_Assert(node_proto.input_size() == 6);
+ +    Mat Wx = getBlob(node_proto, 1);
+ +    Mat Wh = getBlob(node_proto, 2);
+ +    Mat b = getBlob(node_proto, 3);
+ +    Mat h0 = getBlob(node_proto, 5);
+ +
+ +    Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
+ +    Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
+ +    h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
+ +    b = b.reshape(1, b.size[0]);
+ +
+ +    gruParams.blobs.resize(4);
+ +    gruParams.blobs[0] = Wh;
+ +    gruParams.blobs[1] = Wx;
+ +    gruParams.blobs[2] = b;
+ +    gruParams.blobs[3] = h0;
+ +    gruParams.set("bidirectional", gruParams.get<String>("direction", "") == "bidirectional");
+ +
+ +    node_proto.set_output(0, gruParams.name);  // set different name so output shapes will be registered on that name
+ +    addLayer(gruParams, node_proto);
+ +
+ +    MatShape gruShape = outShapes[node_proto.output(0)];
+ +
+ +    // Add fake 1 as it is done in ONNX
+ +    gruShape.insert(gruShape.begin() + 1, 1);
   
- -    std::string y_output = lstm_fix_dims(layerParams, lstm_proto, batch_size, num_directions, hidden_size, need_y,
- -                                         y_name, 0);
- -    if (need_yh)
- -    {
- -        lstm_add_transform(num_directions, batch_size, hidden_size, 0, y_output, yh_name);
- -    }
+ +    layerParams.type = "Reshape";
+ +    layerParams.set("dim", DictValue::arrayInt(&gruShape[0], gruShape.size()));
+ +    node_proto.set_input(0, gruParams.name);  // redirect input to GRU
+ +    node_proto.set_output(0, output_name);  // keep origin GRU's name
+ +    addLayer(layerParams, node_proto);
   }
   
   void ONNXImporter::parseImageScaler(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
diff --cc modules/dnn/test/test_onnx_importer.cpp

index 4918c72,dda479c..be493c8
--- 1/modules/dnn/test/test_onnx_importer.cpp
--- 2/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@@ -1052,16 -787,13 +1054,23 @@@ TEST_P(Test_ONNX_layers, LSTM_hidden_bi
       testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false);
   }
   
+ +TEST_P(Test_ONNX_layers, GRU)
+ +{
+ +    testONNXModels("gru", npy, 0, 0, false, false);
+ +}
+ +
+ +TEST_P(Test_ONNX_layers, GRU_bidirectional)
+ +{
+ +    testONNXModels("gru_bi", npy, 0, 0, false, false);
+ +}
+ +
+ TEST_P(Test_ONNX_layers, LSTM_cell)
+ {
+     testONNXModels("lstm_cell_forward", npy, 0, 0, false, false);
+     testONNXModels("lstm_cell_bidirectional", npy, 0, 0, false, false);
+     testONNXModels("lstm_cell_with_peepholes", npy, 0, 0, false, false);
+ }
+ 
   TEST_P(Test_ONNX_layers, Pad2d_Unfused)
   {
       testONNXModels("ReflectionPad2d");
diff --cc modules/ts/include/opencv2/ts/ocl_test.hpp
Simple merge
author	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Sat, 26 Mar 2022 16:00:11 +0000 (16:00 +0000)
committer	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Sat, 26 Mar 2022 16:00:28 +0000 (16:00 +0000)
		1	2
modules/core/include/opencv2/core/matx.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/include/opencv2/core/operations.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/src/mathfuncs.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/test/test_arithm.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/test/test_mat.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/recurrent_layers.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/net_impl.cpp	patch \|	diff1 \|	\|	blob \| history
modules/dnn/src/onnx/onnx_importer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/test/test_onnx_importer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ts/include/opencv2/ts/ocl_test.hpp	patch \|	diff1 \|	diff2 \|	blob \| history