Added optimization fuse

author Alexander Nesterov <nesoldr@mail.ru>

Fri, 8 Feb 2019 14:12:33 +0000 (13:12 -0100)

committer Alexander Nesterov <nesoldr@mail.ru>

Tue, 5 Mar 2019 19:12:03 +0000 (18:12 -0100)
author Alexander Nesterov <nesoldr@mail.ru>
Fri, 8 Feb 2019 14:12:33 +0000 (13:12 -0100)
committer Alexander Nesterov <nesoldr@mail.ru>
Tue, 5 Mar 2019 19:12:03 +0000 (18:12 -0100)
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp

index 90645d5..b872130 100644 (file)
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -61,6 +61,8 @@ namespace dnn
  class BaseConvolutionLayerImpl : public ConvolutionLayer
  {
  public:
+    bool newWeightAndBias;
+    std::vector<double> weightsMultipliers;
      BaseConvolutionLayerImpl(const LayerParams &params)
      {
          setParamsFrom(params);
@@ -84,6 +86,8 @@ public:
          CV_Assert(numOutput % ngroups == 0);
          CV_Assert(adjustPad.width < stride.width &&
                    adjustPad.height < stride.height);
+
+        newWeightAndBias = false;
      }
  
      void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
@@ -134,6 +138,20 @@ public:
          (dilation.height == 1 && dilation.width == 1);
      }
  
+    virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
+    {
+        Mat w, b;
+        top->getScaleShift(w, b);
+        if (!w.empty() || !b.empty())
+        {
+            fuseWeights(w, b);
+            return true;
+        }
+        return false;
+    }
+
+    virtual void fuseWeights(const Mat& w_, const Mat& b_) = 0;
+
      virtual void applyHalideScheduler(Ptr<BackendNode>& node,
                                        const std::vector<Mat*> &inputs,
                                        const std::vector<Mat> &outputs,
@@ -184,11 +202,9 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
  public:
      enum { VEC_ALIGN = 8, DFT_TYPE = CV_32F };
      Mat weightsMat;
-    std::vector<double> weightsMultipliers;
      std::vector<float> biasvec;
      std::vector<float> reluslope;
      Ptr<ActivationLayer> activ;
-    bool newWeightAndBias;
      bool fusedBias;
  
  #ifdef HAVE_OPENCL
@@ -200,7 +216,6 @@ public:
  #endif
      ConvolutionLayerImpl(const LayerParams &params) : BaseConvolutionLayerImpl(params)
      {
-        newWeightAndBias = false;
          fusedBias = false;
  #ifdef HAVE_OPENCL
          newActiv = false;
@@ -346,19 +361,7 @@ public:
          return !activ.empty();
      }
  
-    virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
-    {
-        Mat w, b;
-        top->getScaleShift(w, b);
-        if (!w.empty() || !b.empty())
-        {
-            fuseWeights(w, b);
-            return true;
-        }
-        return false;
-    }
-
-    void fuseWeights(const Mat& w_, const Mat& b_)
+    void fuseWeights(const Mat& w_, const Mat& b_) CV_OVERRIDE
      {
          // Convolution weights have OIHW data layout. Parameters fusion in case of
          // (conv(I) + b1 ) * w + b2
@@ -1238,6 +1241,45 @@ public:
  
          pad.width = pad_l;
          pad.height = pad_t;
+
+        weightsMultipliers.assign(numOutput, 1.0);
+        if (weightsMat.empty())
+        {
+            transpose(blobs[0].reshape(1, blobs[0].size[0]), weightsMat);
+            biasesMat = hasBias() ? blobs[1].reshape(1, numOutput)
+                                  : Mat::zeros(numOutput, 1, CV_32F);
+        }
+    }
+
+    void fuseWeights(const Mat& w_, const Mat& b_) CV_OVERRIDE
+    {
+        Mat w = w_.total() == 1 ? Mat(1, numOutput, CV_32F, Scalar(w_.at<float>(0))) : w_;
+        Mat b = b_.total() == 1 ? Mat(1, numOutput, CV_32F, Scalar(b_.at<float>(0))) : b_;
+
+        CV_Assert_N(!weightsMat.empty(),
+                     w.empty() || numOutput == w.total(),
+                     b.empty() || numOutput == b.total());
+
+        if (!w.empty())
+        {
+            transpose(blobs[0].reshape(1, blobs[0].size[0]), weightsMat);
+            weightsMat = weightsMat.reshape(1, numOutput);
+            for (int i = 0; i < numOutput; ++i)
+            {
+                double wi = w.at<float>(i);
+                weightsMultipliers[i] *= wi;
+                cv::multiply(weightsMat.row(i), weightsMultipliers[i], weightsMat.row(i));
+                biasesMat.at<float>(i) *= wi;
+            }
+            weightsMat = weightsMat.reshape(1, weightsMat.total() / blobs[0].size[0]);
+        }
+
+        if (!b.empty())
+        {
+            cv::add(biasesMat, b.reshape(1, numOutput), biasesMat);
+        }
+
+        newWeightAndBias = !w.empty() || !b.empty();
      }
  
      class MatMulInvoker : public ParallelLoopBody
@@ -1505,11 +1547,19 @@ public:
  
          if (umat_weights.empty())
          {
-            transpose(blobs[0].reshape(1, inpCn), umat_weights);
-            if (hasBias())
-                blobs[1].reshape(1, outCn).copyTo(umat_biases);
+            if (newWeightAndBias)
+            {
+                weightsMat.copyTo(umat_weights);
+                biasesMat.copyTo(umat_biases);
+            }
              else
-                umat_biases = UMat::zeros(outCn, 1, CV_32F);
+            {
+                transpose(blobs[0].reshape(1, inpCn), umat_weights);
+                if (hasBias())
+                    blobs[1].reshape(1, outCn).copyTo(umat_biases);
+                else
+                    umat_biases = UMat::zeros(outCn, 1, CV_32F);
+            }
          }
  
          String buildopt = format("-DT=%s ", ocl::typeToStr(inputs[0].type()));
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp

index 8485bbe..10f7b02 100644 (file)
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -305,9 +305,16 @@ TEST_P(DNNTestNetwork, DenseNet_121)
  TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
  {
      if (backend == DNN_BACKEND_HALIDE ||
-        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ||
          (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
          throw SkipTestException("");
+
+#if defined(INF_ENGINE_RELEASE)
+#if INF_ENGINE_RELEASE <= 2018050000
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
+        throw SkipTestException("");
+#endif
+#endif
+
      Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
      Mat inp = blobFromImage(img, 1.0, Size(320, 240), Scalar(103.939, 116.779, 123.68), false, false);
      // Output image has values in range [-143.526, 148.539].
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp

index 4e00b10..11e6ee4 100644 (file)
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@@ -394,6 +394,14 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
  TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
  {
      checkBackend();
+
+#if defined(INF_ENGINE_RELEASE)
+#if INF_ENGINE_RELEASE <= 2018050000
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
+        throw SkipTestException("");
+#endif
+#endif
+
      std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
                              "dnn/fast_neural_style_instance_norm_feathers.t7"};
      std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
author	Alexander Nesterov <nesoldr@mail.ru>
	Fri, 8 Feb 2019 14:12:33 +0000 (13:12 -0100)
committer	Alexander Nesterov <nesoldr@mail.ru>
	Tue, 5 Mar 2019 19:12:03 +0000 (18:12 -0100)
modules/dnn/src/layers/convolution_layer.cpp		patch \| blob \| history
modules/dnn/test/test_backends.cpp		patch \| blob \| history
modules/dnn/test/test_torch_importer.cpp		patch \| blob \| history