Merge remote-tracking branch 'upstream/3.4' into merge-3.4

author Alexander Alekhin <alexander.a.alekhin@gmail.com>

Mon, 9 Mar 2020 19:50:00 +0000 (19:50 +0000)

committer Alexander Alekhin <alexander.a.alekhin@gmail.com>

Mon, 9 Mar 2020 20:27:34 +0000 (20:27 +0000)
author Alexander Alekhin <alexander.a.alekhin@gmail.com>
Mon, 9 Mar 2020 19:50:00 +0000 (19:50 +0000)
committer Alexander Alekhin <alexander.a.alekhin@gmail.com>
Mon, 9 Mar 2020 20:27:34 +0000 (20:27 +0000)
diff --cc CMakeLists.txt

index a967c56,6d36719..41a4306
--- 1/CMakeLists.txt
--- 2/CMakeLists.txt
+++ b/CMakeLists.txt
@@@ -419,9 -432,9 +419,12 @@@ OCV_OPTION(WITH_IMGCODEC_PFM "Include P
   OCV_OPTION(WITH_QUIRC "Include library QR-code decoding" ON
     VISIBLE_IF TRUE
     VERIFY HAVE_QUIRC)
+ +OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)" (ANDROID_NATIVE_API_LEVEL GREATER 20)
+ +  VISIBLE_IF ANDROID
+ +  VERIFY HAVE_ANDROID_MEDIANDK)
+ OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF
+   VISIBLE_IF (ARM OR AARCH64) AND UNIX AND NOT ANDROID AND NOT IOS
+   VERIFY HAVE_TENGINE)
   
   # OpenCV build components
   # ===================================================
diff --cc modules/dnn/CMakeLists.txt

index 194324c,c9532ad..d784f0f
--- 1/modules/dnn/CMakeLists.txt
--- 2/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@@ -90,20 -86,17 +93,32 @@@ else(
     set(sources_options EXCLUDE_OPENCL)
   endif()
   
+ +if(OPENCV_DNN_CUDA AND HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN)
+ +  list(APPEND include_dirs ${CUDA_TOOLKIT_INCLUDE} ${CUDNN_INCLUDE_DIRS})
+ +  set(CC_LIST ${CUDA_ARCH_BIN})
+ +  separate_arguments(CC_LIST)
+ +  foreach(cc ${CC_LIST})
+ +    if(cc VERSION_LESS 3.0)
+ +      message(FATAL_ERROR "CUDA backend for DNN module requires CC 3.0 or higher. Please remove unsupported architectures from CUDA_ARCH_BIN option or disable OPENCV_DNN_CUDA=OFF.")
+ +    endif()
+ +  endforeach()
+ +  unset(CC_LIST)
+ +else()
+ +  set(sources_options ${sources_options} EXCLUDE_CUDA)
+ +endif()
+ +
++
+ if(HAVE_TENGINE)
+       list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS})
+       if(EXISTS ${TENGINE_LIBRARIES})
+               list(APPEND libs ${TENGINE_LIBRARIES})
+       else()
+               ocv_add_dependencies(opencv_dnn tengine)
+               list(APPEND libs ${TENGINE_LIBRARIES})
+       endif()
+ endif()
+ 
+ 
   ocv_module_include_directories(${include_dirs})
   if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override")  # GCC
diff --cc modules/dnn/src/layers/convolution_layer.cpp

index 05ef360,6deebb1..87e0468
--- 1/modules/dnn/src/layers/convolution_layer.cpp
--- 2/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@@ -57,13 -55,10 +57,16 @@@
   #include "opencl_kernels_dnn.hpp"
   using namespace cv::dnn::ocl4dnn;
   #endif
+ #ifdef HAVE_TENGINE
+ #include "../tengine4dnn/include/tengine_graph_convolution.hpp"
+ #endif
   
+ +#ifdef HAVE_CUDA
+ +#include "../cuda4dnn/primitives/convolution.hpp"
+ +#include "../cuda4dnn/primitives/transpose_convolution.hpp"
+ +using namespace cv::dnn::cuda4dnn;
+ +#endif
+ +
   namespace cv
   {
   namespace dnn
@@@ -1427,78 -1275,45 +1430,111 @@@ public
               }
           }
   
-         int nstripes = std::max(getNumThreads(), 1);
+ #ifdef HAVE_TENGINE
+         int inch = inputs[0].size[1];                 // inch
+         int in_h = inputs[0].size[2];                 // in_h
+         int in_w = inputs[0].size[3];                 // in_w
+ 
+         int out_b = outputs[0].size[0];     // out batch size
+         int outch = outputs[0].size[1];       // outch
+         int out_h = outputs[0].size[2];       // out_h
+         int out_w = outputs[0].size[3];       // out_w
+ 
+         float *input_  = inputs[0].ptr<float>();
+         float *output_ = outputs[0].ptr<float>();
+         float *kernel_ = weightsMat.ptr<float>();
+         float *teg_bias = &biasvec[0];
+ 
+         bool tengine_ret = tengine_forward(input_, inch, ngroups, in_h, in_w,
+                                     output_, out_b, outch, out_h, out_w,
+                                     kernel_, kernel_size.size(), kernel.height, kernel.width,
+                                     teg_bias, stride.height, stride.width,
+                                     pad.height,  pad.width, dilation.height, dilation.width,
+                                     weightsMat.step1(), padMode);
+         /* activation */
+         if((true == tengine_ret) && activ )
+         {
+             int out_cstep = out_h * out_w;        // out_cstep
   
-         ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
-                           kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
+             ActivationLayer* activ_ = activ.get();
+             activ_->forwardSlice(output_, output_, out_cstep, out_cstep, 0, outch);
+         }
+         if(false == tengine_ret)
+ #endif
+         {
+             int nstripes = std::max(getNumThreads(), 1);
+ 
+             ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
+                             kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
+         }
       }
   
+ +#ifdef HAVE_CUDA
+ +    Ptr<BackendNode> initCUDA(
+ +        void *context_,
+ +        const std::vector<Ptr<BackendWrapper>>& inputs,
+ +        const std::vector<Ptr<BackendWrapper>>& outputs
+ +    ) override
+ +    {
+ +        auto context = reinterpret_cast<csl::CSLContext*>(context_);
+ +
+ +        CV_Assert(inputs.size() == 1);
+ +        auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
+ +        auto input_shape = input_wrapper->getShape();
+ +
+ +        CV_Assert(outputs.size() == 1);
+ +        auto output_wrapper = outputs[0].dynamicCast<CUDABackendWrapper>();
+ +        auto output_shape = output_wrapper->getShape();
+ +
+ +        const auto output_feature_maps = blobs[0].size[0];
+ +        const auto input_feature_maps = input_shape[1];
+ +        const auto input_feature_maps_per_group = blobs[0].size[1];
+ +        const auto groups = input_feature_maps / input_feature_maps_per_group;
+ +
+ +        ConvolutionConfiguration config;
+ +        config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size));
+ +        config.dilations.assign(std::begin(dilations), std::end(dilations));
+ +        config.strides.assign(std::begin(strides), std::end(strides));
+ +
+ +        if (padMode.empty())
+ +        {
+ +            config.padMode = ConvolutionConfiguration::PaddingMode::MANUAL;
+ +            config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin));
+ +            config.pads_end.assign(std::begin(pads_end), std::end(pads_end));
+ +        }
+ +        else if (padMode == "VALID")
+ +        {
+ +            config.padMode = ConvolutionConfiguration::PaddingMode::VALID;
+ +        }
+ +        else if (padMode == "SAME")
+ +        {
+ +            config.padMode = ConvolutionConfiguration::PaddingMode::SAME;
+ +        }
+ +        else
+ +        {
+ +            CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by ConvolutionLayer");
+ +        }
+ +
+ +        config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
+ +        config.output_shape.assign(std::begin(output_shape), std::end(output_shape));
+ +        config.groups = groups;
+ +
+ +        config.activation_type = cudaActType;
+ +        config.relu_negative_slope = cuda_relu_slope;
+ +        config.crelu_floor = cuda_crelu_floor;
+ +        config.crelu_ceil = cuda_crelu_ceil;
+ +        config.power_exp = cuda_power_exp;
+ +
+ +        Mat filtersMat = fusedWeights ? weightsMat : blobs[0];
+ +        Mat biasMat = (hasBias() || fusedBias) ? Mat(output_feature_maps, 1, CV_32F, biasvec.data()) : Mat();
+ +        if (countNonZero(biasMat) == 0)
+ +            biasMat = Mat();
+ +
+ +        return make_cuda_node<cuda4dnn::ConvolutionOp>(
+ +            preferableTarget, std::move(context->stream), std::move(context->cudnn_handle), config, filtersMat, biasMat);
+ +    }
+ +#endif
+ +
       virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                              const std::vector<MatShape> &outputs) const CV_OVERRIDE
       {
diff --cc samples/cpp/stitching_detailed.cpp

index 15534e7,98a52b3..6090998
--- 1/samples/cpp/stitching_detailed.cpp
--- 2/samples/cpp/stitching_detailed.cpp
+++ b/samples/cpp/stitching_detailed.cpp
@@@ -17,10 -17,6 +17,11 @@@
   #include "opencv2/stitching/detail/warpers.hpp"
   #include "opencv2/stitching/warpers.hpp"
   
+ +#ifdef HAVE_OPENCV_XFEATURES2D
++#include "opencv2/xfeatures2d.hpp"
+ +#include "opencv2/xfeatures2d/nonfree.hpp"
+ +#endif
+ +
   #define ENABLE_LOG 1
   #define LOG(msg) std::cout << msg
   #define LOGLN(msg) std::cout << msg << std::endl
author	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Mon, 9 Mar 2020 19:50:00 +0000 (19:50 +0000)
committer	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Mon, 9 Mar 2020 20:27:34 +0000 (20:27 +0000)
		1	2
CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/convolution_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/cpp/stitching_detailed.cpp	patch \|	diff1 \|	diff2 \|	blob \| history