add eltwise layer ocl implementation

author Li Peng <peng.li@intel.com>

Wed, 3 Jan 2018 18:21:04 +0000 (02:21 +0800)

committer Li Peng <peng.li@intel.com>

Fri, 5 Jan 2018 11:38:30 +0000 (19:38 +0800)
author Li Peng <peng.li@intel.com>
Wed, 3 Jan 2018 18:21:04 +0000 (02:21 +0800)
committer Li Peng <peng.li@intel.com>
Fri, 5 Jan 2018 11:38:30 +0000 (19:38 +0800)
diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp

index 7e2214e..4037573 100644 (file)
--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
@@ -259,11 +259,63 @@ public:
          }
      };
  
+#ifdef HAVE_OPENCL
+    bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
+    {
+        std::vector<UMat> inputs;
+        std::vector<UMat> outputs;
+
+        inputs_.getUMatVector(inputs);
+        outputs_.getUMatVector(outputs);
+
+        switch (op)
+        {
+            case SUM:
+                if (coeffs.empty())
+                {
+                    add(inputs[0], inputs[1], outputs[0]);
+                    for (int i = 2; i < inputs.size(); ++i)
+                        add(outputs[0], inputs[i], outputs[0]);
+                }
+                else
+                {
+                    UMat mul0, mul1;
+                    multiply(coeffs[0], inputs[0], mul0);
+                    multiply(coeffs[1], inputs[1], mul1);
+                    add(mul0, mul1, outputs[0]);
+                    for (int i = 2; i < inputs.size(); ++i)
+                    {
+                        multiply(coeffs[i], inputs[i], mul0);
+                        add(mul0, outputs[0], outputs[0]);
+                    }
+                }
+                break;
+            case PROD:
+                multiply(inputs[0], inputs[1], outputs[0]);
+                for (int i = 2; i < inputs.size(); ++i)
+                    multiply(inputs[i], outputs[0], outputs[0]);
+                break;
+            case MAX:
+                max(inputs[0], inputs[1], outputs[0]);
+                for (int i = 2; i < inputs.size(); ++i)
+                    max(inputs[i], outputs[0], outputs[0]);
+                break;
+            default:
+                return false;
+        }
+        return true;
+    }
+#endif
+
      void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
      {
          CV_TRACE_FUNCTION();
          CV_TRACE_ARG_VALUE(name, "name", name.c_str());
  
+        CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+                   forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
          Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
      }
  
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp

index a966e84..cd23541 100644 (file)
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -320,6 +320,11 @@ TEST(Layer_Test_Eltwise, Accuracy)
      testLayerUsingCaffeModels("layer_eltwise");
  }
  
+OCL_TEST(Layer_Test_Eltwise, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_eltwise", DNN_TARGET_OPENCL);
+}
+
  TEST(Layer_Test_PReLU, Accuracy)
  {
      testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_CPU, true);
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp

index 2cad882..04b4c9a 100644 (file)
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -76,7 +76,7 @@ static std::string path(const std::string& file)
      return findDataFile("dnn/tensorflow/" + file, false);
  }
  
-static void runTensorFlowNet(const std::string& prefix, bool hasText = false,
+static void runTensorFlowNet(const std::string& prefix, int targetId = DNN_TARGET_CPU, bool hasText = false,
                               double l1 = 1e-5, double lInf = 1e-4,
                               bool memoryLoad = false)
  {
@@ -104,6 +104,9 @@ static void runTensorFlowNet(const std::string& prefix, bool hasText = false,
  
      ASSERT_FALSE(net.empty());
  
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(targetId);
+
      cv::Mat input = blobFromNPY(inpPath);
      cv::Mat target = blobFromNPY(outPath);
  
@@ -132,6 +135,11 @@ TEST(Test_TensorFlow, eltwise_add_mul)
      runTensorFlowNet("eltwise_add_mul");
  }
  
+OCL_TEST(Test_TensorFlow, eltwise_add_mul)
+{
+    runTensorFlowNet("eltwise_add_mul", DNN_TARGET_OPENCL);
+}
+
  TEST(Test_TensorFlow, pad_and_concat)
  {
      runTensorFlowNet("pad_and_concat");
@@ -141,7 +149,7 @@ TEST(Test_TensorFlow, batch_norm)
  {
      runTensorFlowNet("batch_norm");
      runTensorFlowNet("fused_batch_norm");
-    runTensorFlowNet("batch_norm_text", true);
+    runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true);
  }
  
  TEST(Test_TensorFlow, pooling)
@@ -179,15 +187,15 @@ TEST(Test_TensorFlow, fp16)
  {
      const float l1 = 1e-3;
      const float lInf = 1e-2;
-    runTensorFlowNet("fp16_single_conv", false, l1, lInf);
-    runTensorFlowNet("fp16_deconvolution", false, l1, lInf);
-    runTensorFlowNet("fp16_max_pool_odd_same", false, l1, lInf);
-    runTensorFlowNet("fp16_padding_valid", false, l1, lInf);
-    runTensorFlowNet("fp16_eltwise_add_mul", false, l1, lInf);
-    runTensorFlowNet("fp16_max_pool_odd_valid", false, l1, lInf);
-    runTensorFlowNet("fp16_pad_and_concat", false, l1, lInf);
-    runTensorFlowNet("fp16_max_pool_even", false, l1, lInf);
-    runTensorFlowNet("fp16_padding_same", false, l1, lInf);
+    runTensorFlowNet("fp16_single_conv", DNN_TARGET_CPU, false, l1, lInf);
+    runTensorFlowNet("fp16_deconvolution", DNN_TARGET_CPU, false, l1, lInf);
+    runTensorFlowNet("fp16_max_pool_odd_same", DNN_TARGET_CPU, false, l1, lInf);
+    runTensorFlowNet("fp16_padding_valid", DNN_TARGET_CPU, false, l1, lInf);
+    runTensorFlowNet("fp16_eltwise_add_mul", DNN_TARGET_CPU, false, l1, lInf);
+    runTensorFlowNet("fp16_max_pool_odd_valid", DNN_TARGET_CPU, false, l1, lInf);
+    runTensorFlowNet("fp16_pad_and_concat", DNN_TARGET_CPU, false, l1, lInf);
+    runTensorFlowNet("fp16_max_pool_even", DNN_TARGET_CPU, false, l1, lInf);
+    runTensorFlowNet("fp16_padding_same", DNN_TARGET_CPU, false, l1, lInf);
  }
  
  TEST(Test_TensorFlow, quantized)
@@ -267,7 +275,7 @@ OCL_TEST(Test_TensorFlow, MobileNet_SSD)
  
  TEST(Test_TensorFlow, lstm)
  {
-    runTensorFlowNet("lstm", true);
+    runTensorFlowNet("lstm", DNN_TARGET_CPU, true);
  }
  
  TEST(Test_TensorFlow, split)
@@ -284,11 +292,11 @@ TEST(Test_TensorFlow, memory_read)
  {
      double l1 = 1e-5;
      double lInf = 1e-4;
-    runTensorFlowNet("lstm", true, l1, lInf, true);
+    runTensorFlowNet("lstm", DNN_TARGET_CPU, true, l1, lInf, true);
  
-    runTensorFlowNet("batch_norm", false, l1, lInf, true);
-    runTensorFlowNet("fused_batch_norm", false, l1, lInf, true);
-    runTensorFlowNet("batch_norm_text", true, l1, lInf, true);
+    runTensorFlowNet("batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
+    runTensorFlowNet("fused_batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
+    runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
  }
  
  }
author	Li Peng <peng.li@intel.com>
	Wed, 3 Jan 2018 18:21:04 +0000 (02:21 +0800)
committer	Li Peng <peng.li@intel.com>
	Fri, 5 Jan 2018 11:38:30 +0000 (19:38 +0800)
modules/dnn/src/layers/eltwise_layer.cpp		patch \| blob \| history
modules/dnn/test/test_layers.cpp		patch \| blob \| history
modules/dnn/test/test_tf_importer.cpp		patch \| blob \| history