Let part of the operators in nary_eltwise support cuda

author zoom <zhongwl2018@mail.sustech.edu.cn>

Fri, 21 Oct 2022 05:11:22 +0000 (13:11 +0800)

committer zoom <zhongwl2018@mail.sustech.edu.cn>

Wed, 2 Nov 2022 06:08:21 +0000 (14:08 +0800)
author zoom <zhongwl2018@mail.sustech.edu.cn>
Fri, 21 Oct 2022 05:11:22 +0000 (13:11 +0800)
committer zoom <zhongwl2018@mail.sustech.edu.cn>
Wed, 2 Nov 2022 06:08:21 +0000 (14:08 +0800)
diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp

index f169f4e..ffe0240 100644 (file)
--- a/modules/dnn/perf/perf_layer.cpp
+++ b/modules/dnn/perf/perf_layer.cpp
@@ -55,6 +55,8 @@ struct Layer_Slice : public TestBaseWithParam<tuple<Backend, Target> >
      }
  };
  
+static std::set<std::string> nary_eltwise_cuda_deny_ops = {"add", "equal", "greater", "less", "mean", "mul", "pow", "sub"};
+
  struct Layer_NaryEltwise : public TestBaseWithParam<tuple<Backend, Target> >
  {
      void test_layer(const std::vector<int>& a_shape, const std::vector<int>& b_shape, const String op, bool isRef = false)
@@ -62,6 +64,13 @@ struct Layer_NaryEltwise : public TestBaseWithParam<tuple<Backend, Target> >
          int backendId = get<0>(GetParam());
          int targetId = get<1>(GetParam());
  
+        if (!isRef && backendId == DNN_BACKEND_CUDA)
+        {
+            if (a_shape != b_shape)
+                throw SkipTestException("The test is skipped because inputs with different shapes are not supported.");
+            if (nary_eltwise_cuda_deny_ops.find(op) != nary_eltwise_cuda_deny_ops.end())
+                throw SkipTestException("The operator '" + op + "' is skipped because is not support with cuda currently.");
+        }
          Mat a(a_shape, CV_32FC1);
          Mat b(b_shape, CV_32FC1);
  
@@ -410,6 +419,9 @@ PERF_TEST_P_(Layer_ScatterND, DISABLED_ScatterND_add)
  
  INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));
  INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
+#ifdef HAVE_CUDA
+INSTANTIATE_TEST_CASE_P(CUDA, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)));
+#endif
  INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
  INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
  
diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp

index 9431ff2..ecd8b73 100644 (file)
--- a/modules/dnn/src/layers/nary_eltwise_layers.cpp
+++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp
@@ -4,12 +4,18 @@
  
  #include "../precomp.hpp"
  #include "layers_common.hpp"
+#include "../op_cuda.hpp"
  #include <opencv2/dnn/shape_utils.hpp>
  
  #include <algorithm>
  #include <iterator>
  #include <numeric>
  
+#ifdef HAVE_CUDA
+#include "../cuda4dnn/primitives/eltwise.hpp"
+using namespace cv::dnn::cuda4dnn;
+#endif
+
  namespace cv
  {
  namespace dnn
@@ -91,6 +97,9 @@ public:
  
      virtual bool supportBackend(int backendId) CV_OVERRIDE
      {
+        if (op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM ||
+            op == OPERATION::PROD || op == OPERATION::DIV)
+            return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
          return backendId == DNN_BACKEND_OPENCV;
      }
  
@@ -641,6 +650,38 @@ public:
          };
      }
  
+#ifdef HAVE_CUDA
+    Ptr<BackendNode> initCUDA(
+        void *context_,
+        const std::vector<Ptr<BackendWrapper>>& inputs,
+        const std::vector<Ptr<BackendWrapper>>& outputs
+    ) override
+    {
+        auto context = reinterpret_cast<csl::CSLContext*>(context_);
+
+        auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
+        for (int i = 1; i < inputs.size(); i++)
+        {
+            auto from_wrapper = inputs[i].dynamicCast<CUDABackendWrapper>();
+            if (input_wrapper->getShape() != from_wrapper->getShape())
+                return Ptr<BackendNode>();
+        }
+
+        auto op_ = [this] {
+            switch (op) {
+                case OPERATION::MAX: return cuda4dnn::EltwiseOpType::MAX;
+                case OPERATION::MIN: return cuda4dnn::EltwiseOpType::MIN;
+                case OPERATION::SUM: return cuda4dnn::EltwiseOpType::SUM;
+                case OPERATION::PROD: return cuda4dnn::EltwiseOpType::PRODUCT;
+                case OPERATION::DIV: return cuda4dnn::EltwiseOpType::DIV;
+                default: CV_Error(Error::StsNotImplemented, "Other operators except MAX, MIN, SUM, PRODUCT and DIV are not supported with cuda.");
+            }
+        }();
+
+        return make_cuda_node<cuda4dnn::EltwiseOp>(preferableTarget, std::move(context->stream), op_, std::vector<float>());
+    }
+#endif
+
      virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
                               const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
      {
diff --git a/modules/dnn/src/op_cuda.cpp b/modules/dnn/src/op_cuda.cpp

index a1b588e..46e68f7 100644 (file)
--- a/modules/dnn/src/op_cuda.cpp
+++ b/modules/dnn/src/op_cuda.cpp
@@ -86,8 +86,11 @@ void Net::Impl::initCUDABackend(const std::vector<LayerPin>& blobsToKeep_)
          auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers);
          ld.backendNodes[DNN_BACKEND_CUDA] = node;
  
-        auto cudaNode = node.dynamicCast<CUDABackendNode>();
-        cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes());
+        if(!node.empty())
+        {
+            auto cudaNode = node.dynamicCast<CUDABackendNode>();
+            cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes());
+        }
      }
  
      if (blobsToKeep_.size() > 1)
author	zoom <zhongwl2018@mail.sustech.edu.cn>
	Fri, 21 Oct 2022 05:11:22 +0000 (13:11 +0800)
committer	zoom <zhongwl2018@mail.sustech.edu.cn>
	Wed, 2 Nov 2022 06:08:21 +0000 (14:08 +0800)
modules/dnn/perf/perf_layer.cpp		patch \| blob \| history
modules/dnn/src/layers/nary_eltwise_layers.cpp		patch \| blob \| history
modules/dnn/src/op_cuda.cpp		patch \| blob \| history