Merge remote-tracking branch 'upstream/3.4' into merge-3.4

author Alexander Alekhin <alexander.a.alekhin@gmail.com>

Fri, 6 Mar 2020 20:00:55 +0000 (20:00 +0000)

committer Alexander Alekhin <alexander.a.alekhin@gmail.com>

Fri, 6 Mar 2020 20:41:30 +0000 (20:41 +0000)
author Alexander Alekhin <alexander.a.alekhin@gmail.com>
Fri, 6 Mar 2020 20:00:55 +0000 (20:00 +0000)
committer Alexander Alekhin <alexander.a.alekhin@gmail.com>
Fri, 6 Mar 2020 20:41:30 +0000 (20:41 +0000)
diff --cc modules/core/include/opencv2/core/cvstd.inl.hpp
Simple merge
diff --cc modules/core/src/norm.cpp

index e25874e,9aaed8e..8611d1e
--- 1/modules/core/src/norm.cpp
--- 2/modules/core/src/norm.cpp
+++ b/modules/core/src/norm.cpp
@@@ -710,67 -710,58 +710,78 @@@ double cv::norm( InputArray _src, int n
       result;
       result.d = 0;
       NAryMatIterator it(arrays, ptrs);
-     int j, total = (int)it.size, blockSize = total;
-     bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) ||
-             ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
-     int isum = 0;
-     int *ibuf = &result.i;
-     AutoBuffer<float> fltbuf_;
-     float* fltbuf = 0;
-     size_t esz = 0;
- 
-     if( blockSum )
-     {
-         esz = src.elemSize();
+     CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
   
-         if( depth == CV_16F )
-         {
-             blockSize = std::min(blockSize, 1024);
-             fltbuf_.allocate(blockSize);
-             fltbuf = fltbuf_.data();
-         }
-         else
+     if ((normType == NORM_L1 && depth <= CV_16S) ||
+         ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
+     {
+         // special case to handle "integer" overflow in accumulator
+         const size_t esz = src.elemSize();
+         const int total = (int)it.size;
+         const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
+         const int blockSize = std::min(total, intSumBlockSize);
+         int isum = 0;
+         int count = 0;
+ 
+         for (size_t i = 0; i < it.nplanes; i++, ++it)
           {
-             int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
-             blockSize = std::min(blockSize, intSumBlockSize);
-             ibuf = &isum;
+             for (int j = 0; j < total; j += blockSize)
+             {
+                 int bsz = std::min(total - j, blockSize);
+                 func(ptrs[0], ptrs[1], (uchar*)&isum, bsz, cn);
+                 count += bsz;
+                 if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
+                 {
+                     result.d += isum;
+                     isum = 0;
+                     count = 0;
+                 }
+                 ptrs[0] += bsz*esz;
+                 if (ptrs[1])
+                     ptrs[1] += bsz;
+             }
           }
       }
- 
-     for( size_t i = 0; i < it.nplanes; i++, ++it )
++    else if (depth == CV_16F)
+ +    {
-         for( j = 0; j < total; j += blockSize )
++        const size_t esz = src.elemSize();
++        const int total = (int)it.size;
++        const int blockSize = std::min(total, divUp(1024, cn));
++        AutoBuffer<float, 1024> fltbuf(blockSize);
++        float* data0 = fltbuf.data();
++        for (size_t i = 0; i < it.nplanes; i++, ++it)
+ +        {
-             int bsz = std::min(total - j, blockSize);
-             const uchar* data = ptrs[0];
-             if( depth == CV_16F )
-             {
-                 hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz);
-                 data = (const uchar*)fltbuf;
-             }
-             func( data, ptrs[1], (uchar*)ibuf, bsz, cn );
-             if( blockSum && depth != CV_16F )
++            for (int j = 0; j < total; j += blockSize)
+ +            {
-                 result.d += isum;
-                 isum = 0;
++                int bsz = std::min(total - j, blockSize);
++                hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
++                func((uchar*)data0, ptrs[1], (uchar*)&result.d, bsz, cn);
++                ptrs[0] += bsz*esz;
++                if (ptrs[1])
++                    ptrs[1] += bsz;
+ +            }
-             ptrs[0] += bsz*esz;
-             if( ptrs[1] )
-                 ptrs[1] += bsz;
++        }
++    }
+     else
+     {
+         // generic implementation
+         for (size_t i = 0; i < it.nplanes; i++, ++it)
+         {
+             func(ptrs[0], ptrs[1], (uchar*)&result, (int)it.size, cn);
           }
       }
   
       if( normType == NORM_INF )
       {
--        if( depth == CV_64F )
-             ;
-         else if( depth == CV_32F )
-             result.d = result.f;
++        if(depth == CV_64F || depth == CV_16F)
+             return result.d;
- -        else if( depth == CV_32F )
++        else if (depth == CV_32F)
+             return result.f;
           else
-             result.d = result.i;
+             return result.i;
       }
       else if( normType == NORM_L2 )
-         result.d = std::sqrt(result.d);
+         return std::sqrt(result.d);
   
       return result.d;
   }
@@@ -1186,70 -1177,59 +1197,82 @@@ double cv::norm( InputArray _src1, Inpu
       result;
       result.d = 0;
       NAryMatIterator it(arrays, ptrs);
-     int j, total = (int)it.size, blockSize = total;
-     bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) ||
-             ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
-     unsigned isum = 0;
-     unsigned *ibuf = &result.u;
-     AutoBuffer<float> fltbuf_;
-     float* fltbuf = 0;
-     size_t esz = 0;
- 
-     if( blockSum )
-     {
-         esz = src1.elemSize();
+     CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
   
-         if( depth == CV_16F )
-         {
-             blockSize = std::min(blockSize, 1024);
-             fltbuf_.allocate(blockSize*2);
-             fltbuf = fltbuf_.data();
-         }
-         else
+     if ((normType == NORM_L1 && depth <= CV_16S) ||
+         ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
+     {
+         // special case to handle "integer" overflow in accumulator
+         const size_t esz = src1.elemSize();
+         const int total = (int)it.size;
+         const int intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
+         const int blockSize = std::min(total, intSumBlockSize);
+         int isum = 0;
+         int count = 0;
+ 
+         for (size_t i = 0; i < it.nplanes; i++, ++it)
           {
-             int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
-             blockSize = std::min(blockSize, intSumBlockSize);
-             ibuf = &isum;
+             for (int j = 0; j < total; j += blockSize)
+             {
+                 int bsz = std::min(total - j, blockSize);
+                 func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&isum, bsz, cn);
+                 count += bsz;
+                 if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
+                 {
+                     result.d += isum;
+                     isum = 0;
+                     count = 0;
+                 }
+                 ptrs[0] += bsz*esz;
+                 ptrs[1] += bsz*esz;
+                 if (ptrs[2])
+                     ptrs[2] += bsz;
+             }
           }
       }
- 
-     for( size_t i = 0; i < it.nplanes; i++, ++it )
++    else if (depth == CV_16F)
+ +    {
-         for( j = 0; j < total; j += blockSize )
++        const size_t esz = src1.elemSize();
++        const int total = (int)it.size;
++        const int blockSize = std::min(total, divUp(512, cn));
++        AutoBuffer<float, 1024> fltbuf(blockSize * 2);
++        float* data0 = fltbuf.data();
++        float* data1 = fltbuf.data() + blockSize * cn;
++        for (size_t i = 0; i < it.nplanes; i++, ++it)
+ +        {
-             int bsz = std::min(total - j, blockSize);
-             const uchar *data0 = ptrs[0], *data1 = ptrs[1];
-             if( depth == CV_16F )
-             {
-                 hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz);
-                 hal::cvt16f32f((const float16_t*)ptrs[1], fltbuf + bsz, bsz);
-                 data0 = (const uchar*)fltbuf;
-                 data1 = (const uchar*)(fltbuf + bsz);
-             }
-             func( data0, data1, ptrs[2], (uchar*)ibuf, bsz, cn );
-             if( blockSum && depth != CV_16F )
++            for (int j = 0; j < total; j += blockSize)
+ +            {
-                 result.d += isum;
-                 isum = 0;
++                int bsz = std::min(total - j, blockSize);
++                hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
++                hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn);
++                func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.d, bsz, cn);
++                ptrs[0] += bsz*esz;
++                ptrs[1] += bsz*esz;
++                if (ptrs[2])
++                    ptrs[2] += bsz;
+ +            }
-             ptrs[0] += bsz*esz;
-             ptrs[1] += bsz*esz;
-             if( ptrs[2] )
-                 ptrs[2] += bsz;
++        }
++    }
+     else
+     {
+         // generic implementation
+         for (size_t i = 0; i < it.nplanes; i++, ++it)
+         {
+             func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&result, (int)it.size, cn);
           }
       }
   
       if( normType == NORM_INF )
       {
--        if( depth == CV_64F )
-             ;
-         else if( depth == CV_32F )
-             result.d = result.f;
++        if (depth == CV_64F || depth == CV_16F)
+             return result.d;
- -        else if( depth == CV_32F )
++        else if (depth == CV_32F)
+             return result.f;
           else
-             result.d = result.u;
+             return result.u;
       }
       else if( normType == NORM_L2 )
-         result.d = std::sqrt(result.d);
+         return std::sqrt(result.d);
   
       return result.d;
   }
diff --cc modules/core/src/ocl.cpp
Simple merge
diff --cc modules/dnn/src/layers/blank_layer.cpp

index 229590e,bda5f61..d639f37
--- 1/modules/dnn/src/layers/blank_layer.cpp
--- 2/modules/dnn/src/layers/blank_layer.cpp
+++ b/modules/dnn/src/layers/blank_layer.cpp
@@@ -115,18 -108,6 +115,7 @@@ public
                   inputs[i].copyTo(outputs[i]);
       }
   
- #ifdef HAVE_CUDA
-     Ptr<BackendNode> initCUDA(
-         void *context_,
-         const std::vector<Ptr<BackendWrapper>>& inputs,
-         const std::vector<Ptr<BackendWrapper>>& outputs
-     ) override
-     {
-         auto context = reinterpret_cast<csl::CSLContext*>(context_);
-         return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
-     }
- #endif
+ +
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
       virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
       {
@@@ -163,6 -144,6 +152,20 @@@
           return Ptr<BackendNode>(new InfEngineNgraphNode(blank));
       }
   #endif  // HAVE_DNN_NGRAPH
++
++
++#ifdef HAVE_CUDA
++    Ptr<BackendNode> initCUDA(
++        void *context_,
++        const std::vector<Ptr<BackendWrapper>>& inputs,
++        const std::vector<Ptr<BackendWrapper>>& outputs
++    ) override
++    {
++        auto context = reinterpret_cast<csl::CSLContext*>(context_);
++        return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
++    }
++#endif
++
   };
   
   Ptr<Layer> BlankLayer::create(const LayerParams& params)
diff --cc modules/dnn/src/layers/const_layer.cpp

index afca48b,bc23064..bbea3e3
--- 1/modules/dnn/src/layers/const_layer.cpp
--- 2/modules/dnn/src/layers/const_layer.cpp
+++ b/modules/dnn/src/layers/const_layer.cpp
@@@ -75,6 -68,6 +75,7 @@@ public
           blobs[0].copyTo(outputs[0]);
       }
   
++
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
       virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
       {
@@@ -93,21 -87,7 +95,22 @@@
                                                              blobs[0].data);
           return Ptr<BackendNode>(new InfEngineNgraphNode(node));
       }
- #endif  // HAVE_DNN_IE_NN_BUILDER_2019
- -#endif  // HAVE_NGRAPH
++#endif  // HAVE_DNN_NGRAPH
++
+ +
+ +#ifdef HAVE_CUDA
+ +    Ptr<BackendNode> initCUDA(
+ +        void *context_,
+ +        const std::vector<Ptr<BackendWrapper>>& inputs,
+ +        const std::vector<Ptr<BackendWrapper>>& outputs
+ +    ) override
+ +    {
+ +        auto context = reinterpret_cast<csl::CSLContext*>(context_);
+ +
+ +        CV_Assert(blobs.size() == 1);
+ +        return make_cuda_node<cuda4dnn::ConstOp>(preferableTarget, std::move(context->stream), blobs[0]);
+ +    }
+ +#endif
   
   };
   
diff --cc modules/dnn/src/layers/flatten_layer.cpp

index fc3f5fc,f3434a5..b5ecd8b
--- 1/modules/dnn/src/layers/flatten_layer.cpp
--- 2/modules/dnn/src/layers/flatten_layer.cpp
+++ b/modules/dnn/src/layers/flatten_layer.cpp
@@@ -171,18 -164,6 +171,7 @@@ public
           }
       }
   
- #ifdef HAVE_CUDA
-     Ptr<BackendNode> initCUDA(
-         void *context_,
-         const std::vector<Ptr<BackendWrapper>>& inputs,
-         const std::vector<Ptr<BackendWrapper>>& outputs
-     ) override
-     {
-         auto context = reinterpret_cast<csl::CSLContext*>(context_);
-         return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
-     }
- #endif
+ +
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
       virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
       {
@@@ -197,6 -178,6 +186,7 @@@
       }
   #endif  // HAVE_DNN_IE_NN_BUILDER_2019
   
++
   #ifdef HAVE_DNN_NGRAPH
   virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                       const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
@@@ -224,6 -205,6 +214,20 @@@
       }
   #endif  // HAVE_DNN_NGRAPH
   
++
++#ifdef HAVE_CUDA
++    Ptr<BackendNode> initCUDA(
++        void *context_,
++        const std::vector<Ptr<BackendWrapper>>& inputs,
++        const std::vector<Ptr<BackendWrapper>>& outputs
++    ) override
++    {
++        auto context = reinterpret_cast<csl::CSLContext*>(context_);
++        return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
++    }
++#endif
++
++
       int _startAxis;
       int _endAxis;
   };
diff --cc modules/dnn/src/layers/normalize_bbox_layer.cpp

index e669e0d,b546a96..3958e50
--- 1/modules/dnn/src/layers/normalize_bbox_layer.cpp
--- 2/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@@@ -268,33 -261,6 +268,7 @@@ public
           }
       }
   
- #ifdef HAVE_CUDA
-     Ptr<BackendNode> initCUDA(
-         void *context_,
-         const std::vector<Ptr<BackendWrapper>>& inputs,
-         const std::vector<Ptr<BackendWrapper>>& outputs
-     ) override
-     {
-         auto context = reinterpret_cast<csl::CSLContext*>(context_);
- 
-         if(pnorm != 1 && pnorm != 2)
-             CV_Error(Error::StsNotImplemented, "Unsupported normalization mode");
- 
-         auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
-         auto input_shape = input_wrapper->getShape();
- 
-         NormalizeConfiguration<float> config;
-         config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
-         config.axis_start = clamp(startAxis, input_shape.size());
-         config.axis_end = clamp(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */
-         config.norm = pnorm;
-         config.eps = epsilon;
- 
-         const auto& weightsMat = blobs.empty() ? Mat() : blobs[0];
-         return make_cuda_node<cuda4dnn::NormalizeOp>(preferableTarget, std::move(context->stream), weightsMat, config);
-     }
- #endif
+ +
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
       virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
       {
@@@ -346,6 -312,6 +320,7 @@@
       }
   #endif  // HAVE_DNN_IE_NN_BUILDER_2019
   
++
   #ifdef HAVE_DNN_NGRAPH
       virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                           const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
@@@ -384,6 -350,6 +359,35 @@@
       }
   #endif  // HAVE_DNN_NGRAPH
   
++
++#ifdef HAVE_CUDA
++    Ptr<BackendNode> initCUDA(
++        void *context_,
++        const std::vector<Ptr<BackendWrapper>>& inputs,
++        const std::vector<Ptr<BackendWrapper>>& outputs
++    ) override
++    {
++        auto context = reinterpret_cast<csl::CSLContext*>(context_);
++
++        if(pnorm != 1 && pnorm != 2)
++            CV_Error(Error::StsNotImplemented, "Unsupported normalization mode");
++
++        auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
++        auto input_shape = input_wrapper->getShape();
++
++        NormalizeConfiguration<float> config;
++        config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
++        config.axis_start = clamp(startAxis, input_shape.size());
++        config.axis_end = clamp(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */
++        config.norm = pnorm;
++        config.eps = epsilon;
++
++        const auto& weightsMat = blobs.empty() ? Mat() : blobs[0];
++        return make_cuda_node<cuda4dnn::NormalizeOp>(preferableTarget, std::move(context->stream), weightsMat, config);
++    }
++#endif
++
++
   private:
       int startAxis, endAxis;
   };
diff --cc modules/dnn/src/layers/permute_layer.cpp

index 8624740,1931a01..eac279f
--- 1/modules/dnn/src/layers/permute_layer.cpp
--- 2/modules/dnn/src/layers/permute_layer.cpp
+++ b/modules/dnn/src/layers/permute_layer.cpp
@@@ -381,28 -371,6 +381,7 @@@ public
           }
       }
   
- #ifdef HAVE_CUDA
-     Ptr<BackendNode> initCUDA(
-         void *context_,
-         const std::vector<Ptr<BackendWrapper>>& inputs,
-         const std::vector<Ptr<BackendWrapper>>& outputs
-     ) override
-     {
-         auto context = reinterpret_cast<csl::CSLContext*>(context_);
-         return make_cuda_node<cuda4dnn::PermuteOp>(preferableTarget, std::move(context->stream), _order);
-     }
- #endif
- 
-     virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
-     {
- #ifdef HAVE_VULKAN
-         CV_Assert(!_order.empty());
-         std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPermute(_order));
-         return Ptr<BackendNode>(new VkComBackendNode(input, op));
- #endif // HAVE_VULKAN
-         return Ptr<BackendNode>();
-     }
+ +
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
       virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
       {
@@@ -412,6 -380,6 +391,7 @@@
       }
   #endif  // HAVE_DNN_IE_NN_BUILDER_2019
   
++
   #ifdef HAVE_DNN_NGRAPH
       virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                           const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
@@@ -424,6 -392,6 +404,30 @@@
       }
   #endif  // HAVE_DNN_NGRAPH
   
++
++#ifdef HAVE_CUDA
++    Ptr<BackendNode> initCUDA(
++        void *context_,
++        const std::vector<Ptr<BackendWrapper>>& inputs,
++        const std::vector<Ptr<BackendWrapper>>& outputs
++    ) override
++    {
++        auto context = reinterpret_cast<csl::CSLContext*>(context_);
++        return make_cuda_node<cuda4dnn::PermuteOp>(preferableTarget, std::move(context->stream), _order);
++    }
++#endif
++
++
++#ifdef HAVE_VULKAN
++    virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
++    {
++        CV_Assert(!_order.empty());
++        std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPermute(_order));
++        return Ptr<BackendNode>(new VkComBackendNode(input, op));
++    }
++#endif // HAVE_VULKAN
++
++
       size_t _count;
       std::vector<size_t> _order;
   
diff --cc modules/dnn/src/layers/pooling_layer.cpp

index 8ed3a11,83f3df0..a932f04
--- 1/modules/dnn/src/layers/pooling_layer.cpp
--- 2/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@@ -184,12 -174,8 +184,12 @@@ public
   
       virtual bool supportBackend(int backendId) CV_OVERRIDE
       {
+ +        if (backendId == DNN_BACKEND_CUDA)
+ +        {
+ +            return type == MAX || type == AVE || type == ROI;
+ +        }
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
-         else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
           {
               if (computeMaxIdx)
                   return false;
@@@ -211,7 -197,7 +211,7 @@@
           {
               return !computeMaxIdx && type != STOCHASTIC;
           }
-         else if (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_VKCOM)
- -        else if (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE)
++        if (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_VKCOM)
           {
               if (kernel_size.size() == 3)
                   return (backendId == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU);
@@@ -313,137 -297,6 +313,138 @@@
           }
       }
   
- #ifdef HAVE_VULKAN
+ +#ifdef HAVE_CUDA
+ +    Ptr<BackendNode> initCUDA(
+ +        void *context_,
+ +        const std::vector<Ptr<BackendWrapper>>& inputs,
+ +        const std::vector<Ptr<BackendWrapper>>& outputs
+ +    ) override
+ +    {
+ +        auto context = reinterpret_cast<csl::CSLContext*>(context_);
+ +        if (type == ROI)
+ +            return make_cuda_node<cuda4dnn::ROIPoolingOp>(preferableTarget, std::move(context->stream), spatialScale);
+ +
+ +        auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
+ +        auto input_shape = input_wrapper->getShape();
+ +
+ +        /* storing max indices is a special case and we deal with it separately */
+ +        if (computeMaxIdx) {
+ +            CV_Assert(type == MAX);
+ +
+ +            cuda4dnn::MaxPoolingConfiguration config;
+ +            config.window_size.assign(std::begin(kernel_size), std::end(kernel_size));
+ +            config.strides.assign(std::begin(strides), std::end(strides));
+ +
+ +            if (padMode.empty())
+ +            {
+ +                config.padMode = MaxPoolingConfiguration::PaddingMode::MANUAL;
+ +                config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin));
+ +            }
+ +            else if (padMode == "VALID")
+ +            {
+ +                config.padMode = MaxPoolingConfiguration::PaddingMode::VALID;
+ +            }
+ +            else if (padMode == "SAME")
+ +            {
+ +                config.padMode = MaxPoolingConfiguration::PaddingMode::SAME;
+ +            }
+ +            else
+ +            {
+ +                CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by PoolingLayer");
+ +            }
+ +
+ +            config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
+ +
+ +            return make_cuda_node<cuda4dnn::MaxPoolingOp>(preferableTarget, std::move(context->stream), config);
+ +        }
+ +
+ +        PoolingConfiguration config;
+ +        if (type == MAX)
+ +        {
+ +            config.poolMode = PoolingConfiguration::PoolingMode::MAX;
+ +        }
+ +        else if (type == AVE && !avePoolPaddedArea)
+ +        {
+ +            config.poolMode = PoolingConfiguration::PoolingMode::AVERAGE_EXCLUDE_PADDING;
+ +        }
+ +        else if (type == AVE && avePoolPaddedArea)
+ +        {
+ +            config.poolMode = PoolingConfiguration::PoolingMode::AVERAGE_INCLUDE_PADDING;
+ +        }
+ +        else
+ +        {
+ +            CV_Error(Error::StsNotImplemented, "Unsupported pooling mode");
+ +        }
+ +
+ +        config.window_size.assign(std::begin(kernel_size), std::end(kernel_size));
+ +        config.strides.assign(std::begin(strides), std::end(strides));
+ +
+ +        if (padMode.empty())
+ +        {
+ +            config.padMode = PoolingConfiguration::PaddingMode::MANUAL;
+ +            config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin));
+ +            config.pads_end.assign(std::begin(pads_end), std::end(pads_end));
+ +        }
+ +        else if (padMode == "VALID")
+ +        {
+ +            config.padMode = PoolingConfiguration::PaddingMode::VALID;
+ +        }
+ +        else if (padMode == "SAME")
+ +        {
+ +            config.padMode = PoolingConfiguration::PaddingMode::SAME;
+ +        }
+ +        else
+ +        {
+ +            CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by PoolingLayer");
+ +        }
+ +
+ +        if (ceilMode)
+ +            config.roundMode = PoolingConfiguration::RoundingMode::CEIL;
+ +        else
+ +            config.roundMode = PoolingConfiguration::RoundingMode::FLOOR;
+ +
+ +        config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
+ +
+ +        return make_cuda_node<cuda4dnn::PoolingOp>(preferableTarget, std::move(context->cudnn_handle), config);
+ +    }
+ +#endif
+ +
++
++#ifdef HAVE_VULKAN
+ +    virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
+ +    {
- #endif
-         return Ptr<BackendNode>();
+ +        int padding_mode;
+ +        vkcom::PoolType pool_type;
+ +        int filter_size[2] = {kernel.height, kernel.width};
+ +        int pad_size[2] = {pad.height, pad.width};
+ +        int stride_size[2] = {stride.height, stride.width};
+ +        pool_type = type == MAX ? vkcom::kPoolTypeMax:
+ +                   (type == AVE ? vkcom::kPoolTypeAvg:
+ +                            vkcom::kPoolTypeNum);
+ +
+ +        if (padMode.empty())
+ +        {
+ +            padding_mode = vkcom::kPaddingModeCaffe;
+ +        }
+ +        else if (padMode == "VALID")
+ +        {
+ +            padding_mode = vkcom::kPaddingModeValid;
+ +        }
+ +        else if (padMode == "SAME")
+ +        {
+ +            padding_mode = vkcom::kPaddingModeSame;
+ +        }
+ +        else
+ +            CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
+ +
+ +        std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPool(filter_size, pad_size,
+ +                                                            stride_size, padding_mode,
+ +                                                            pool_type, avePoolPaddedArea));
+ +        return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
+ +    }
++#endif
++
+ +
       virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
       {
           if (type == MAX)
@@@ -503,47 -356,47 +504,46 @@@
   #endif  // HAVE_DNN_IE_NN_BUILDER_2019
   
   
--
   #ifdef HAVE_DNN_NGRAPH
--virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
--                                    const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
--{
--    CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE)) || inputs.size() == 2, nodes.size() == inputs.size());
--    auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
--
--    ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
--    if (!padMode.empty())
--        pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::SAME_UPPER;
--
--    auto rounding_type = ceilMode ? ngraph::op::RoundingType::CEIL : ngraph::op::RoundingType::FLOOR;
--    if (type == AVE) {
--        auto exclude_pad = !avePoolPaddedArea;
--        auto ave_pool = std::make_shared<ngraph::op::v1::AvgPool>(ieInpNode, ngraph::Strides(strides),
--                        ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
--                        exclude_pad, rounding_type, pad_type);
--        return Ptr<BackendNode>(new InfEngineNgraphNode(ave_pool));
--    }
--    else if (type == MAX) {
--        auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
--                        ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
--                        rounding_type, pad_type);
--        return Ptr<BackendNode>(new InfEngineNgraphNode(max_pool));
--    }
--    else if (type == ROI) {
--        auto& coords = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
--        auto roi = std::make_shared<ngraph::op::ROIPooling>(ieInpNode, coords,
--                   ngraph::Shape{(size_t)pooledSize.height, (size_t)pooledSize.width}, spatialScale, "max");
--        return Ptr<BackendNode>(new InfEngineNgraphNode(roi));
--    }
--    else if (type == PSROI) {
--        auto& coords = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
--        auto psroi = std::make_shared<ngraph::op::PSROIPooling>(ieInpNode, coords,
--                     (size_t)psRoiOutChannels, (size_t)pooledSize.width, spatialScale, 1, 1, "average");
--        return Ptr<BackendNode>(new InfEngineNgraphNode(psroi));
++    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
++                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
++    {
++        CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE)) || inputs.size() == 2, nodes.size() == inputs.size());
++        auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
++
++        ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
++        if (!padMode.empty())
++            pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::SAME_UPPER;
++
++        auto rounding_type = ceilMode ? ngraph::op::RoundingType::CEIL : ngraph::op::RoundingType::FLOOR;
++        if (type == AVE) {
++            auto exclude_pad = !avePoolPaddedArea;
++            auto ave_pool = std::make_shared<ngraph::op::v1::AvgPool>(ieInpNode, ngraph::Strides(strides),
++                            ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
++                            exclude_pad, rounding_type, pad_type);
++            return Ptr<BackendNode>(new InfEngineNgraphNode(ave_pool));
++        }
++        else if (type == MAX) {
++            auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
++                            ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
++                            rounding_type, pad_type);
++            return Ptr<BackendNode>(new InfEngineNgraphNode(max_pool));
++        }
++        else if (type == ROI) {
++            auto& coords = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
++            auto roi = std::make_shared<ngraph::op::ROIPooling>(ieInpNode, coords,
++                       ngraph::Shape{(size_t)pooledSize.height, (size_t)pooledSize.width}, spatialScale, "max");
++            return Ptr<BackendNode>(new InfEngineNgraphNode(roi));
++        }
++        else if (type == PSROI) {
++            auto& coords = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
++            auto psroi = std::make_shared<ngraph::op::PSROIPooling>(ieInpNode, coords,
++                         (size_t)psRoiOutChannels, (size_t)pooledSize.width, spatialScale, 1, 1, "average");
++            return Ptr<BackendNode>(new InfEngineNgraphNode(psroi));
++        }
++        else
++            CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
       }
--    else
--        CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
--}
   #endif  // HAVE_DNN_NGRAPH
   
   
diff --cc modules/dnn/src/layers/prior_box_layer.cpp

index 053e6da,dbd5b04..c06065d
--- 1/modules/dnn/src/layers/prior_box_layer.cpp
--- 2/modules/dnn/src/layers/prior_box_layer.cpp
+++ b/modules/dnn/src/layers/prior_box_layer.cpp
@@@ -504,57 -494,6 +504,7 @@@ public
           }
       }
   
- #ifdef HAVE_CUDA
-     Ptr<BackendNode> initCUDA(
-         void *context_,
-         const std::vector<Ptr<BackendWrapper>>& inputs,
-         const std::vector<Ptr<BackendWrapper>>& outputs
-     ) override
-     {
-         auto context = reinterpret_cast<csl::CSLContext*>(context_);
- 
-         auto feature_map_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
-         auto feature_map_shape = feature_map_wrapper->getShape();
- 
-         auto image_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
-         auto image_shape = image_wrapper->getShape();
- 
-         PriorBoxConfiguration config;
-         config.feature_map_width = feature_map_shape.rbegin()[0];
-         config.feature_map_height = feature_map_shape.rbegin()[1];
-         config.image_width = image_shape.rbegin()[0];
-         config.image_height = image_shape.rbegin()[1];
- 
-         config.num_priors = _numPriors;
-         config.box_widths = _boxWidths;
-         config.box_heights = _boxHeights;
-         config.offsets_x = _offsetsX;
-         config.offsets_y = _offsetsY;
-         config.stepX = _stepX;
-         config.stepY = _stepY;
- 
-         config.variance = _variance;
- 
-         config.clip = _clip;
-         config.normalize = _bboxesNormalized;
- 
-         return make_cuda_node<cuda4dnn::PriorBoxOp>(preferableTarget, std::move(context->stream), config);
-     }
- #endif
- 
-     virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
-     {
- #ifdef HAVE_VULKAN
-         std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,
-                                                                 _clip, _numPriors,
-                                                                 _variance, _offsetsX,
-                                                                 _offsetsY, _boxWidths,
-                                                                 _boxHeights));
-         return Ptr<BackendNode>(new VkComBackendNode(input, op));
- #endif // HAVE_VULKAN
-         return Ptr<BackendNode>();
-     }
+ +
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
       virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
       {
@@@ -617,6 -556,6 +567,7 @@@
       }
   #endif  // HAVE_DNN_IE_NN_BUILDER_2019
   
++
   #ifdef HAVE_DNN_NGRAPH
       virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
       {
@@@ -679,6 -618,6 +630,58 @@@
   #endif  // HAVE_DNN_NGRAPH
   
   
++#ifdef HAVE_CUDA
++    Ptr<BackendNode> initCUDA(
++        void *context_,
++        const std::vector<Ptr<BackendWrapper>>& inputs,
++        const std::vector<Ptr<BackendWrapper>>& outputs
++    ) override
++    {
++        auto context = reinterpret_cast<csl::CSLContext*>(context_);
++
++        auto feature_map_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
++        auto feature_map_shape = feature_map_wrapper->getShape();
++
++        auto image_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
++        auto image_shape = image_wrapper->getShape();
++
++        PriorBoxConfiguration config;
++        config.feature_map_width = feature_map_shape.rbegin()[0];
++        config.feature_map_height = feature_map_shape.rbegin()[1];
++        config.image_width = image_shape.rbegin()[0];
++        config.image_height = image_shape.rbegin()[1];
++
++        config.num_priors = _numPriors;
++        config.box_widths = _boxWidths;
++        config.box_heights = _boxHeights;
++        config.offsets_x = _offsetsX;
++        config.offsets_y = _offsetsY;
++        config.stepX = _stepX;
++        config.stepY = _stepY;
++
++        config.variance = _variance;
++
++        config.clip = _clip;
++        config.normalize = _bboxesNormalized;
++
++        return make_cuda_node<cuda4dnn::PriorBoxOp>(preferableTarget, std::move(context->stream), config);
++    }
++#endif
++
++
++#ifdef HAVE_VULKAN
++    virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
++    {
++        std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,
++                                                                _clip, _numPriors,
++                                                                _variance, _offsetsX,
++                                                                _offsetsY, _boxWidths,
++                                                                _boxHeights));
++        return Ptr<BackendNode>(new VkComBackendNode(input, op));
++    }
++#endif // HAVE_VULKAN
++
++
       virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                              const std::vector<MatShape> &outputs) const CV_OVERRIDE
       {
diff --cc modules/dnn/src/layers/reorg_layer.cpp

index 9764088,d6fafa6..df76eb6
--- 1/modules/dnn/src/layers/reorg_layer.cpp
--- 2/modules/dnn/src/layers/reorg_layer.cpp
+++ b/modules/dnn/src/layers/reorg_layer.cpp
@@@ -193,18 -185,6 +193,7 @@@ public
           permute->forward(inputs, outputs, internals_arr);
       }
   
- #ifdef HAVE_CUDA
-     Ptr<BackendNode> initCUDA(
-         void *context_,
-         const std::vector<Ptr<BackendWrapper>>& inputs,
-         const std::vector<Ptr<BackendWrapper>>& outputs
-     ) override
-     {
-         auto context = reinterpret_cast<csl::CSLContext*>(context_);
-         return make_cuda_node<cuda4dnn::ReorgOp>(preferableTarget, std::move(context->stream), reorgStride);
-     }
- #endif
+ +
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
       virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
       {
@@@ -214,6 -194,6 +203,7 @@@
       }
   #endif  // HAVE_DNN_IE_NN_BUILDER_2019
   
++
   #ifdef HAVE_DNN_NGRAPH
       virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
                                           const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
@@@ -224,6 -204,6 +214,20 @@@
       }
   #endif  // HAVE_DNN_NGRAPH
   
++
++#ifdef HAVE_CUDA
++    Ptr<BackendNode> initCUDA(
++        void *context_,
++        const std::vector<Ptr<BackendWrapper>>& inputs,
++        const std::vector<Ptr<BackendWrapper>>& outputs
++    ) override
++    {
++        auto context = reinterpret_cast<csl::CSLContext*>(context_);
++        return make_cuda_node<cuda4dnn::ReorgOp>(preferableTarget, std::move(context->stream), reorgStride);
++    }
++#endif
++
++
       virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                              const std::vector<MatShape> &outputs) const CV_OVERRIDE
       {
diff --cc modules/dnn/src/layers/reshape_layer.cpp

index 62774db,a85a4e4..dbea5d5
--- 1/modules/dnn/src/layers/reshape_layer.cpp
--- 2/modules/dnn/src/layers/reshape_layer.cpp
+++ b/modules/dnn/src/layers/reshape_layer.cpp
@@@ -267,18 -260,6 +267,7 @@@ public
           }
       }
   
- #ifdef HAVE_CUDA
-     Ptr<BackendNode> initCUDA(
-         void *context_,
-         const std::vector<Ptr<BackendWrapper>>& inputs,
-         const std::vector<Ptr<BackendWrapper>>& outputs
-     ) override
-     {
-         auto context = reinterpret_cast<csl::CSLContext*>(context_);
-         return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
-     }
- #endif
+ +
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
       virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
       {
@@@ -289,6 -270,6 +278,7 @@@
       }
   #endif  // HAVE_DNN_IE_NN_BUILDER_2019
   
++
   #ifdef HAVE_DNN_NGRAPH
       virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                           const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
@@@ -304,6 -285,6 +294,20 @@@
       }
   #endif  // HAVE_DNN_NGRAPH
   
++
++#ifdef HAVE_CUDA
++    Ptr<BackendNode> initCUDA(
++        void *context_,
++        const std::vector<Ptr<BackendWrapper>>& inputs,
++        const std::vector<Ptr<BackendWrapper>>& outputs
++    ) override
++    {
++        auto context = reinterpret_cast<csl::CSLContext*>(context_);
++        return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
++    }
++#endif
++
++
   private:
       std::vector<MatShape> outShapes;
   };
diff --cc modules/dnn/src/layers/resize_layer.cpp

index e29b953,c86fa7f..091a47b
--- 1/modules/dnn/src/layers/resize_layer.cpp
--- 2/modules/dnn/src/layers/resize_layer.cpp
+++ b/modules/dnn/src/layers/resize_layer.cpp
@@@ -170,27 -161,6 +170,7 @@@ public
               CV_Error(Error::StsNotImplemented, "Unknown interpolation: " + interpolation);
       }
   
- #ifdef HAVE_CUDA
-     Ptr<BackendNode> initCUDA(
-         void *context_,
-         const std::vector<Ptr<BackendWrapper>>& inputs,
-         const std::vector<Ptr<BackendWrapper>>& outputs
-     ) override
-     {
-         auto context = reinterpret_cast<csl::CSLContext*>(context_);
- 
-         cuda4dnn::InterpolationType itype;
-         if (interpolation == "nearest")
-             itype = InterpolationType::NEAREST_NEIGHBOUR;
-         else if (interpolation == "bilinear")
-             itype = InterpolationType::BILINEAR;
-         else
-             CV_Error(Error::StsNotImplemented, "Requested interpolation mode is not available in resize layer.");
- 
-         return make_cuda_node<cuda4dnn::ResizeOp>(preferableTarget, std::move(context->stream), itype, scaleHeight, scaleWidth);
-     }
- #endif
+ +
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
       virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
       {
@@@ -251,6 -221,6 +231,29 @@@
       }
   #endif  // HAVE_DNN_NGRAPH
   
++
++#ifdef HAVE_CUDA
++    Ptr<BackendNode> initCUDA(
++        void *context_,
++        const std::vector<Ptr<BackendWrapper>>& inputs,
++        const std::vector<Ptr<BackendWrapper>>& outputs
++    ) override
++    {
++        auto context = reinterpret_cast<csl::CSLContext*>(context_);
++
++        cuda4dnn::InterpolationType itype;
++        if (interpolation == "nearest")
++            itype = InterpolationType::NEAREST_NEIGHBOUR;
++        else if (interpolation == "bilinear")
++            itype = InterpolationType::BILINEAR;
++        else
++            CV_Error(Error::StsNotImplemented, "Requested interpolation mode is not available in resize layer.");
++
++        return make_cuda_node<cuda4dnn::ResizeOp>(preferableTarget, std::move(context->stream), itype, scaleHeight, scaleWidth);
++    }
++#endif
++
++
   protected:
       int outWidth, outHeight;
       const int zoomFactorWidth, zoomFactorHeight;
diff --cc modules/dnn/src/layers/scale_layer.cpp
Simple merge
diff --cc modules/dnn/src/layers/slice_layer.cpp

index 80d3d86,6de7e93..701e64f
--- 1/modules/dnn/src/layers/slice_layer.cpp
--- 2/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@@@ -273,28 -266,6 +273,7 @@@ public
           }
       }
   
- #ifdef HAVE_CUDA
-     Ptr<BackendNode> initCUDA(
-         void *context_,
-         const std::vector<Ptr<BackendWrapper>>& inputs,
-         const std::vector<Ptr<BackendWrapper>>& outputs
-     ) override
-     {
-         auto context = reinterpret_cast<csl::CSLContext*>(context_);
- 
-         std::vector<std::vector<std::size_t>> offsets;
-         for (const auto& ranges : sliceRanges)
-         {
-             std::vector<std::size_t> offsets_i;
-             for (const auto& range : ranges)
-                 offsets_i.push_back(range.start);
-             offsets.push_back(std::move(offsets_i));
-         }
- 
-         return make_cuda_node<cuda4dnn::SliceOp>(preferableTarget, std::move(context->stream), std::move(offsets));
-     }
- #endif
+ +
   #ifdef HAVE_DNN_IE_NN_BUILDER_2019
   #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
       virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
@@@ -352,6 -323,6 +331,7 @@@
   #endif
   #endif
   
++
   #ifdef HAVE_DNN_NGRAPH
       virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                           const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
@@@ -381,6 -352,6 +361,29 @@@
       }
   #endif  // HAVE_DNN_NGRAPH
   
++
++#ifdef HAVE_CUDA
++    Ptr<BackendNode> initCUDA(
++        void *context_,
++        const std::vector<Ptr<BackendWrapper>>& inputs,
++        const std::vector<Ptr<BackendWrapper>>& outputs
++    ) override
++    {
++        auto context = reinterpret_cast<csl::CSLContext*>(context_);
++
++        std::vector<std::vector<std::size_t>> offsets;
++        for (const auto& ranges : sliceRanges)
++        {
++            std::vector<std::size_t> offsets_i;
++            for (const auto& range : ranges)
++                offsets_i.push_back(range.start);
++            offsets.push_back(std::move(offsets_i));
++        }
++
++        return make_cuda_node<cuda4dnn::SliceOp>(preferableTarget, std::move(context->stream), std::move(offsets));
++    }
++#endif
++
   };
   
   class CropLayerImpl CV_FINAL : public SliceLayerImpl
diff --cc modules/dnn/src/onnx/onnx_importer.cpp
Simple merge
diff --cc modules/dnn/test/test_onnx_importer.cpp
Simple merge
diff --cc modules/imgcodecs/src/grfmt_jpeg.cpp
Simple merge
author	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Fri, 6 Mar 2020 20:00:55 +0000 (20:00 +0000)
committer	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Fri, 6 Mar 2020 20:41:30 +0000 (20:41 +0000)
		1	2
modules/core/include/opencv2/core/cvstd.inl.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/src/norm.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/src/ocl.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/blank_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/const_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/flatten_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/normalize_bbox_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/permute_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/pooling_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/prior_box_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/reorg_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/reshape_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/resize_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/scale_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/slice_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/onnx/onnx_importer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/test/test_onnx_importer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/imgcodecs/src/grfmt_jpeg.cpp	patch \|	diff1 \|	diff2 \|	blob \| history