From: Alexander Alekhin Date: Mon, 9 Mar 2020 19:50:00 +0000 (+0000) Subject: Merge remote-tracking branch 'upstream/3.4' into merge-3.4 X-Git-Tag: submit/tizen/20210224.033012~2^2~280 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9b3be01b83045a33f1d42f8e8873cf3e8d144558;p=platform%2Fupstream%2Fopencv.git Merge remote-tracking branch 'upstream/3.4' into merge-3.4 --- 9b3be01b83045a33f1d42f8e8873cf3e8d144558 diff --cc CMakeLists.txt index a967c56,6d36719..41a4306 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@@ -419,9 -432,9 +419,12 @@@ OCV_OPTION(WITH_IMGCODEC_PFM "Include P OCV_OPTION(WITH_QUIRC "Include library QR-code decoding" ON VISIBLE_IF TRUE VERIFY HAVE_QUIRC) +OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)" (ANDROID_NATIVE_API_LEVEL GREATER 20) + VISIBLE_IF ANDROID + VERIFY HAVE_ANDROID_MEDIANDK) + OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF + VISIBLE_IF (ARM OR AARCH64) AND UNIX AND NOT ANDROID AND NOT IOS + VERIFY HAVE_TENGINE) # OpenCV build components # =================================================== diff --cc modules/dnn/CMakeLists.txt index 194324c,c9532ad..d784f0f --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@@ -90,20 -86,17 +93,32 @@@ else( set(sources_options EXCLUDE_OPENCL) endif() +if(OPENCV_DNN_CUDA AND HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN) + list(APPEND include_dirs ${CUDA_TOOLKIT_INCLUDE} ${CUDNN_INCLUDE_DIRS}) + set(CC_LIST ${CUDA_ARCH_BIN}) + separate_arguments(CC_LIST) + foreach(cc ${CC_LIST}) + if(cc VERSION_LESS 3.0) + message(FATAL_ERROR "CUDA backend for DNN module requires CC 3.0 or higher. Please remove unsupported architectures from CUDA_ARCH_BIN option or disable OPENCV_DNN_CUDA=OFF.") + endif() + endforeach() + unset(CC_LIST) +else() + set(sources_options ${sources_options} EXCLUDE_CUDA) +endif() + ++ + if(HAVE_TENGINE) + list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS}) + if(EXISTS ${TENGINE_LIBRARIES}) + list(APPEND libs ${TENGINE_LIBRARIES}) + else() + ocv_add_dependencies(opencv_dnn tengine) + list(APPEND libs ${TENGINE_LIBRARIES}) + endif() + endif() + + ocv_module_include_directories(${include_dirs}) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC diff --cc modules/dnn/src/layers/convolution_layer.cpp index 05ef360,6deebb1..87e0468 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@@ -57,13 -55,10 +57,16 @@@ #include "opencl_kernels_dnn.hpp" using namespace cv::dnn::ocl4dnn; #endif + #ifdef HAVE_TENGINE + #include "../tengine4dnn/include/tengine_graph_convolution.hpp" + #endif +#ifdef HAVE_CUDA +#include "../cuda4dnn/primitives/convolution.hpp" +#include "../cuda4dnn/primitives/transpose_convolution.hpp" +using namespace cv::dnn::cuda4dnn; +#endif + namespace cv { namespace dnn @@@ -1427,78 -1275,45 +1430,111 @@@ public } } - int nstripes = std::max(getNumThreads(), 1); + #ifdef HAVE_TENGINE + int inch = inputs[0].size[1]; // inch + int in_h = inputs[0].size[2]; // in_h + int in_w = inputs[0].size[3]; // in_w + + int out_b = outputs[0].size[0]; // out batch size + int outch = outputs[0].size[1]; // outch + int out_h = outputs[0].size[2]; // out_h + int out_w = outputs[0].size[3]; // out_w + + float *input_ = inputs[0].ptr(); + float *output_ = outputs[0].ptr(); + float *kernel_ = weightsMat.ptr(); + float *teg_bias = &biasvec[0]; + + bool tengine_ret = tengine_forward(input_, inch, ngroups, in_h, in_w, + output_, out_b, outch, out_h, out_w, + kernel_, kernel_size.size(), kernel.height, kernel.width, + teg_bias, stride.height, stride.width, + pad.height, pad.width, dilation.height, dilation.width, + weightsMat.step1(), padMode); + /* activation */ + if((true == tengine_ret) && activ ) + { + int out_cstep = out_h * out_w; // out_cstep - ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope, - kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes); + ActivationLayer* activ_ = activ.get(); + activ_->forwardSlice(output_, output_, out_cstep, out_cstep, 0, outch); + } + if(false == tengine_ret) + #endif + { + int nstripes = std::max(getNumThreads(), 1); + + ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope, + kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes); + } } +#ifdef HAVE_CUDA + Ptr initCUDA( + void *context_, + const std::vector>& inputs, + const std::vector>& outputs + ) override + { + auto context = reinterpret_cast(context_); + + CV_Assert(inputs.size() == 1); + auto input_wrapper = inputs[0].dynamicCast(); + auto input_shape = input_wrapper->getShape(); + + CV_Assert(outputs.size() == 1); + auto output_wrapper = outputs[0].dynamicCast(); + auto output_shape = output_wrapper->getShape(); + + const auto output_feature_maps = blobs[0].size[0]; + const auto input_feature_maps = input_shape[1]; + const auto input_feature_maps_per_group = blobs[0].size[1]; + const auto groups = input_feature_maps / input_feature_maps_per_group; + + ConvolutionConfiguration config; + config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size)); + config.dilations.assign(std::begin(dilations), std::end(dilations)); + config.strides.assign(std::begin(strides), std::end(strides)); + + if (padMode.empty()) + { + config.padMode = ConvolutionConfiguration::PaddingMode::MANUAL; + config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin)); + config.pads_end.assign(std::begin(pads_end), std::end(pads_end)); + } + else if (padMode == "VALID") + { + config.padMode = ConvolutionConfiguration::PaddingMode::VALID; + } + else if (padMode == "SAME") + { + config.padMode = ConvolutionConfiguration::PaddingMode::SAME; + } + else + { + CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by ConvolutionLayer"); + } + + config.input_shape.assign(std::begin(input_shape), std::end(input_shape)); + config.output_shape.assign(std::begin(output_shape), std::end(output_shape)); + config.groups = groups; + + config.activation_type = cudaActType; + config.relu_negative_slope = cuda_relu_slope; + config.crelu_floor = cuda_crelu_floor; + config.crelu_ceil = cuda_crelu_ceil; + config.power_exp = cuda_power_exp; + + Mat filtersMat = fusedWeights ? weightsMat : blobs[0]; + Mat biasMat = (hasBias() || fusedBias) ? Mat(output_feature_maps, 1, CV_32F, biasvec.data()) : Mat(); + if (countNonZero(biasMat) == 0) + biasMat = Mat(); + + return make_cuda_node( + preferableTarget, std::move(context->stream), std::move(context->cudnn_handle), config, filtersMat, biasMat); + } +#endif + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { diff --cc samples/cpp/stitching_detailed.cpp index 15534e7,98a52b3..6090998 --- a/samples/cpp/stitching_detailed.cpp +++ b/samples/cpp/stitching_detailed.cpp @@@ -17,10 -17,6 +17,11 @@@ #include "opencv2/stitching/detail/warpers.hpp" #include "opencv2/stitching/warpers.hpp" +#ifdef HAVE_OPENCV_XFEATURES2D ++#include "opencv2/xfeatures2d.hpp" +#include "opencv2/xfeatures2d/nonfree.hpp" +#endif + #define ENABLE_LOG 1 #define LOG(msg) std::cout << msg #define LOGLN(msg) std::cout << msg << std::endl