From 11d492b0b90a8b2fd3fedc2b99e6750fc2babffa Mon Sep 17 00:00:00 2001 From: zoom Date: Fri, 21 Oct 2022 13:11:22 +0800 Subject: [PATCH] Let part of the operators in nary_eltwise support cuda --- modules/dnn/perf/perf_layer.cpp | 12 ++++++++ modules/dnn/src/layers/nary_eltwise_layers.cpp | 41 ++++++++++++++++++++++++++ modules/dnn/src/op_cuda.cpp | 7 +++-- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp index f169f4e..ffe0240 100644 --- a/modules/dnn/perf/perf_layer.cpp +++ b/modules/dnn/perf/perf_layer.cpp @@ -55,6 +55,8 @@ struct Layer_Slice : public TestBaseWithParam > } }; +static std::set nary_eltwise_cuda_deny_ops = {"add", "equal", "greater", "less", "mean", "mul", "pow", "sub"}; + struct Layer_NaryEltwise : public TestBaseWithParam > { void test_layer(const std::vector& a_shape, const std::vector& b_shape, const String op, bool isRef = false) @@ -62,6 +64,13 @@ struct Layer_NaryEltwise : public TestBaseWithParam > int backendId = get<0>(GetParam()); int targetId = get<1>(GetParam()); + if (!isRef && backendId == DNN_BACKEND_CUDA) + { + if (a_shape != b_shape) + throw SkipTestException("The test is skipped because inputs with different shapes are not supported."); + if (nary_eltwise_cuda_deny_ops.find(op) != nary_eltwise_cuda_deny_ops.end()) + throw SkipTestException("The operator '" + op + "' is skipped because is not support with cuda currently."); + } Mat a(a_shape, CV_32FC1); Mat b(b_shape, CV_32FC1); @@ -410,6 +419,9 @@ PERF_TEST_P_(Layer_ScatterND, DISABLED_ScatterND_add) INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false)); INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); +#ifdef HAVE_CUDA +INSTANTIATE_TEST_CASE_P(CUDA, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_CUDA, DNN_TARGET_CUDA))); +#endif INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp index 9431ff2..ecd8b73 100644 --- a/modules/dnn/src/layers/nary_eltwise_layers.cpp +++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp @@ -4,12 +4,18 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_cuda.hpp" #include #include #include #include +#ifdef HAVE_CUDA +#include "../cuda4dnn/primitives/eltwise.hpp" +using namespace cv::dnn::cuda4dnn; +#endif + namespace cv { namespace dnn @@ -91,6 +97,9 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM || + op == OPERATION::PROD || op == OPERATION::DIV) + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA; return backendId == DNN_BACKEND_OPENCV; } @@ -641,6 +650,38 @@ public: }; } +#ifdef HAVE_CUDA + Ptr initCUDA( + void *context_, + const std::vector>& inputs, + const std::vector>& outputs + ) override + { + auto context = reinterpret_cast(context_); + + auto input_wrapper = inputs[0].dynamicCast(); + for (int i = 1; i < inputs.size(); i++) + { + auto from_wrapper = inputs[i].dynamicCast(); + if (input_wrapper->getShape() != from_wrapper->getShape()) + return Ptr(); + } + + auto op_ = [this] { + switch (op) { + case OPERATION::MAX: return cuda4dnn::EltwiseOpType::MAX; + case OPERATION::MIN: return cuda4dnn::EltwiseOpType::MIN; + case OPERATION::SUM: return cuda4dnn::EltwiseOpType::SUM; + case OPERATION::PROD: return cuda4dnn::EltwiseOpType::PRODUCT; + case OPERATION::DIV: return cuda4dnn::EltwiseOpType::DIV; + default: CV_Error(Error::StsNotImplemented, "Other operators except MAX, MIN, SUM, PRODUCT and DIV are not supported with cuda."); + } + }(); + + return make_cuda_node(preferableTarget, std::move(context->stream), op_, std::vector()); + } +#endif + virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { diff --git a/modules/dnn/src/op_cuda.cpp b/modules/dnn/src/op_cuda.cpp index a1b588e..46e68f7 100644 --- a/modules/dnn/src/op_cuda.cpp +++ b/modules/dnn/src/op_cuda.cpp @@ -86,8 +86,11 @@ void Net::Impl::initCUDABackend(const std::vector& blobsToKeep_) auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers); ld.backendNodes[DNN_BACKEND_CUDA] = node; - auto cudaNode = node.dynamicCast(); - cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes()); + if(!node.empty()) + { + auto cudaNode = node.dynamicCast(); + cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes()); + } } if (blobsToKeep_.size() > 1) -- 2.7.4