Merge remote-tracking branch 'upstream/3.4' into merge-3.4
authorAlexander Alekhin <alexander.a.alekhin@gmail.com>
Mon, 9 Mar 2020 19:50:00 +0000 (19:50 +0000)
committerAlexander Alekhin <alexander.a.alekhin@gmail.com>
Mon, 9 Mar 2020 20:27:34 +0000 (20:27 +0000)
1  2 
CMakeLists.txt
modules/dnn/CMakeLists.txt
modules/dnn/src/layers/convolution_layer.cpp
samples/cpp/stitching_detailed.cpp

diff --cc CMakeLists.txt
@@@ -419,9 -432,9 +419,12 @@@ OCV_OPTION(WITH_IMGCODEC_PFM "Include P
  OCV_OPTION(WITH_QUIRC "Include library QR-code decoding" ON
    VISIBLE_IF TRUE
    VERIFY HAVE_QUIRC)
 +OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)" (ANDROID_NATIVE_API_LEVEL GREATER 20)
 +  VISIBLE_IF ANDROID
 +  VERIFY HAVE_ANDROID_MEDIANDK)
+ OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF
+   VISIBLE_IF (ARM OR AARCH64) AND UNIX AND NOT ANDROID AND NOT IOS
+   VERIFY HAVE_TENGINE)
  
  # OpenCV build components
  # ===================================================
@@@ -90,20 -86,17 +93,32 @@@ else(
    set(sources_options EXCLUDE_OPENCL)
  endif()
  
 +if(OPENCV_DNN_CUDA AND HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN)
 +  list(APPEND include_dirs ${CUDA_TOOLKIT_INCLUDE} ${CUDNN_INCLUDE_DIRS})
 +  set(CC_LIST ${CUDA_ARCH_BIN})
 +  separate_arguments(CC_LIST)
 +  foreach(cc ${CC_LIST})
 +    if(cc VERSION_LESS 3.0)
 +      message(FATAL_ERROR "CUDA backend for DNN module requires CC 3.0 or higher. Please remove unsupported architectures from CUDA_ARCH_BIN option or disable OPENCV_DNN_CUDA=OFF.")
 +    endif()
 +  endforeach()
 +  unset(CC_LIST)
 +else()
 +  set(sources_options ${sources_options} EXCLUDE_CUDA)
 +endif()
 +
++
+ if(HAVE_TENGINE)
+       list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS})
+       if(EXISTS ${TENGINE_LIBRARIES})
+               list(APPEND libs ${TENGINE_LIBRARIES})
+       else()
+               ocv_add_dependencies(opencv_dnn tengine)
+               list(APPEND libs ${TENGINE_LIBRARIES})
+       endif()
+ endif()
  ocv_module_include_directories(${include_dirs})
  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override")  # GCC
  #include "opencl_kernels_dnn.hpp"
  using namespace cv::dnn::ocl4dnn;
  #endif
+ #ifdef HAVE_TENGINE
+ #include "../tengine4dnn/include/tengine_graph_convolution.hpp"
+ #endif
  
 +#ifdef HAVE_CUDA
 +#include "../cuda4dnn/primitives/convolution.hpp"
 +#include "../cuda4dnn/primitives/transpose_convolution.hpp"
 +using namespace cv::dnn::cuda4dnn;
 +#endif
 +
  namespace cv
  {
  namespace dnn
@@@ -1427,78 -1275,45 +1430,111 @@@ public
              }
          }
  
-         int nstripes = std::max(getNumThreads(), 1);
+ #ifdef HAVE_TENGINE
+         int inch = inputs[0].size[1];                 // inch
+         int in_h = inputs[0].size[2];                 // in_h
+         int in_w = inputs[0].size[3];                 // in_w
+         int out_b = outputs[0].size[0];     // out batch size
+         int outch = outputs[0].size[1];       // outch
+         int out_h = outputs[0].size[2];       // out_h
+         int out_w = outputs[0].size[3];       // out_w
+         float *input_  = inputs[0].ptr<float>();
+         float *output_ = outputs[0].ptr<float>();
+         float *kernel_ = weightsMat.ptr<float>();
+         float *teg_bias = &biasvec[0];
+         bool tengine_ret = tengine_forward(input_, inch, ngroups, in_h, in_w,
+                                     output_, out_b, outch, out_h, out_w,
+                                     kernel_, kernel_size.size(), kernel.height, kernel.width,
+                                     teg_bias, stride.height, stride.width,
+                                     pad.height,  pad.width, dilation.height, dilation.width,
+                                     weightsMat.step1(), padMode);
+         /* activation */
+         if((true == tengine_ret) && activ )
+         {
+             int out_cstep = out_h * out_w;        // out_cstep
  
-         ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
-                           kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
+             ActivationLayer* activ_ = activ.get();
+             activ_->forwardSlice(output_, output_, out_cstep, out_cstep, 0, outch);
+         }
+         if(false == tengine_ret)
+ #endif
+         {
+             int nstripes = std::max(getNumThreads(), 1);
+             ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
+                             kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
+         }
      }
  
 +#ifdef HAVE_CUDA
 +    Ptr<BackendNode> initCUDA(
 +        void *context_,
 +        const std::vector<Ptr<BackendWrapper>>& inputs,
 +        const std::vector<Ptr<BackendWrapper>>& outputs
 +    ) override
 +    {
 +        auto context = reinterpret_cast<csl::CSLContext*>(context_);
 +
 +        CV_Assert(inputs.size() == 1);
 +        auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
 +        auto input_shape = input_wrapper->getShape();
 +
 +        CV_Assert(outputs.size() == 1);
 +        auto output_wrapper = outputs[0].dynamicCast<CUDABackendWrapper>();
 +        auto output_shape = output_wrapper->getShape();
 +
 +        const auto output_feature_maps = blobs[0].size[0];
 +        const auto input_feature_maps = input_shape[1];
 +        const auto input_feature_maps_per_group = blobs[0].size[1];
 +        const auto groups = input_feature_maps / input_feature_maps_per_group;
 +
 +        ConvolutionConfiguration config;
 +        config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size));
 +        config.dilations.assign(std::begin(dilations), std::end(dilations));
 +        config.strides.assign(std::begin(strides), std::end(strides));
 +
 +        if (padMode.empty())
 +        {
 +            config.padMode = ConvolutionConfiguration::PaddingMode::MANUAL;
 +            config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin));
 +            config.pads_end.assign(std::begin(pads_end), std::end(pads_end));
 +        }
 +        else if (padMode == "VALID")
 +        {
 +            config.padMode = ConvolutionConfiguration::PaddingMode::VALID;
 +        }
 +        else if (padMode == "SAME")
 +        {
 +            config.padMode = ConvolutionConfiguration::PaddingMode::SAME;
 +        }
 +        else
 +        {
 +            CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by ConvolutionLayer");
 +        }
 +
 +        config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
 +        config.output_shape.assign(std::begin(output_shape), std::end(output_shape));
 +        config.groups = groups;
 +
 +        config.activation_type = cudaActType;
 +        config.relu_negative_slope = cuda_relu_slope;
 +        config.crelu_floor = cuda_crelu_floor;
 +        config.crelu_ceil = cuda_crelu_ceil;
 +        config.power_exp = cuda_power_exp;
 +
 +        Mat filtersMat = fusedWeights ? weightsMat : blobs[0];
 +        Mat biasMat = (hasBias() || fusedBias) ? Mat(output_feature_maps, 1, CV_32F, biasvec.data()) : Mat();
 +        if (countNonZero(biasMat) == 0)
 +            biasMat = Mat();
 +
 +        return make_cuda_node<cuda4dnn::ConvolutionOp>(
 +            preferableTarget, std::move(context->stream), std::move(context->cudnn_handle), config, filtersMat, biasMat);
 +    }
 +#endif
 +
      virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                             const std::vector<MatShape> &outputs) const CV_OVERRIDE
      {
  #include "opencv2/stitching/detail/warpers.hpp"
  #include "opencv2/stitching/warpers.hpp"
  
 +#ifdef HAVE_OPENCV_XFEATURES2D
++#include "opencv2/xfeatures2d.hpp"
 +#include "opencv2/xfeatures2d/nonfree.hpp"
 +#endif
 +
  #define ENABLE_LOG 1
  #define LOG(msg) std::cout << msg
  #define LOGLN(msg) std::cout << msg << std::endl