Fuse tf.nn.l2_normalize layer

author Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>

Wed, 4 Apr 2018 17:32:00 +0000 (20:32 +0300)

committer Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>

Tue, 10 Apr 2018 07:12:44 +0000 (10:12 +0300)
author Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Wed, 4 Apr 2018 17:32:00 +0000 (20:32 +0300)
committer Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Tue, 10 Apr 2018 07:12:44 +0000 (10:12 +0300)
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp

index 2cdf700..9053842 100644 (file)
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -559,7 +559,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
      {
      public:
          float pnorm, epsilon;
-        bool acrossSpatial;
+        CV_DEPRECATED bool acrossSpatial;
  
          static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
      };
diff --git a/modules/dnn/misc/quantize_face_detector.py b/modules/dnn/misc/quantize_face_detector.py

index 06acae9..a9348c2 100644 (file)
--- a/modules/dnn/misc/quantize_face_detector.py
+++ b/modules/dnn/misc/quantize_face_detector.py
@@ -318,6 +318,7 @@ for node in graph_def.node:
          node.input.pop()
          node.input.pop()
          node.input.append(layer_256_1_relu1.name)
+        node.input.append('conv4_3_norm/l2_normalize/Sum/reduction_indices')
          break
  
  softmaxShape = NodeDef()
diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp

index 5772aad..580b6b3 100644 (file)
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@@ -54,6 +54,9 @@ public:
          pnorm = params.get<float>("p", 2);
          epsilon = params.get<float>("eps", 1e-10f);
          acrossSpatial = params.get<bool>("across_spatial", true);
+        startAxis = params.get<int>("start_axis", 1);
+        CV_Assert(!params.has("across_spatial") || !params.has("end_axis"));
+        endAxis = params.get<int>("end_axis", acrossSpatial ? -1 : startAxis);
          CV_Assert(pnorm > 0);
      }
  
@@ -85,20 +88,26 @@ public:
  
          const UMat& inp0 = inputs[0];
          UMat& buffer = internals[0];
-        size_t num = inp0.size[0];
-        size_t channels = inp0.size[1];
-        size_t channelSize = inp0.total() / (num * channels);
+        startAxis = clamp(startAxis, inp0.dims);
+        endAxis = clamp(endAxis, inp0.dims);
+
+        size_t num = total(shape(inp0.size), 0, startAxis);
+        size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
+        size_t planeSize = inp0.total() / (num * numPlanes);
+        MatShape s = shape(1, inputs[0].total());
+        UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
+        UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
          for (size_t i = 0; i < num; ++i)
          {
-            MatShape s = shape(channels, channelSize);
-            UMat src = inputs[i].reshape(1, s.size(), &s[0]);
-            UMat dst = outputs[i].reshape(1, s.size(), &s[0]);
+            s = shape(numPlanes, planeSize);
+            UMat src = inp.row(i).reshape(1, s.size(), &s[0]);
+            UMat dst = out.row(i).reshape(1, s.size(), &s[0]);
  
              UMat abs_mat;
              absdiff(src, cv::Scalar::all(0), abs_mat);
              pow(abs_mat, pnorm, buffer);
  
-            if (acrossSpatial)
+            if (planeSize == 1)
              {
                  // add eps to avoid overflow
                  float absSum = sum(buffer)[0] + epsilon;
@@ -114,7 +123,7 @@ public:
                  // compute inverted norm to call multiply instead divide
                  cv::pow(norm, -1.0f / pnorm, norm);
  
-                repeat(norm, channels, 1, buffer);
+                repeat(norm, numPlanes, 1, buffer);
                  multiply(src, buffer, dst);
              }
  
@@ -130,7 +139,7 @@ public:
                  else
                  {
                      // _scale: _channels x 1
-                    CV_Assert(scale.total() == channels);
+                    CV_Assert(scale.total() == numPlanes);
                      repeat(scale, 1, dst.cols, buffer);
                      multiply(dst, buffer, dst);
                  }
@@ -162,17 +171,22 @@ public:
  
          const Mat& inp0 = *inputs[0];
          Mat& buffer = internals[0];
-        size_t num = inp0.size[0];
-        size_t channels = inp0.size[1];
-        size_t channelSize = inp0.total() / (num * channels);
+        startAxis = clamp(startAxis, inp0.dims);
+        endAxis = clamp(endAxis, inp0.dims);
+
+        const float* inpData = inp0.ptr<float>();
+        float* outData = outputs[0].ptr<float>();
+
+        size_t num = total(shape(inp0.size), 0, startAxis);
+        size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
+        size_t planeSize = inp0.total() / (num * numPlanes);
          for (size_t n = 0; n < num; ++n)
          {
-            Mat src = Mat(channels, channelSize, CV_32F, (void*)inp0.ptr<float>(n));
-            Mat dst = Mat(channels, channelSize, CV_32F, (void*)outputs[0].ptr<float>(n));
-
+            Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData);
+            Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData);
              cv::pow(abs(src), pnorm, buffer);
  
-            if (acrossSpatial)
+            if (planeSize == 1)
              {
                  // add eps to avoid overflow
                  float absSum = sum(buffer)[0] + epsilon;
@@ -188,7 +202,7 @@ public:
                  // compute inverted norm to call multiply instead divide
                  cv::pow(norm, -1.0f / pnorm, norm);
  
-                repeat(norm, channels, 1, buffer);
+                repeat(norm, numPlanes, 1, buffer);
                  multiply(src, buffer, dst);
              }
  
@@ -204,13 +218,18 @@ public:
                  else
                  {
                      // _scale: _channels x 1
-                    CV_Assert(scale.total() == channels);
+                    CV_Assert(scale.total() == numPlanes);
                      repeat(scale, 1, dst.cols, buffer);
                      multiply(dst, buffer, dst);
                  }
              }
+            inpData += numPlanes * planeSize;
+            outData += numPlanes * planeSize;
          }
      }
+
+private:
+    int startAxis, endAxis;
  };
  
  
diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp

index 2cfb42f..cfb472e 100644 (file)
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
@@ -80,14 +80,16 @@ public:
      {
          CV_Assert(inpId < node.input_size());
          std::string name = node.input(inpId);
+        // If operation produces several tensors, they are specified by index
+        // after ':' character. In example, "input:0".
+        name = name.substr(0, name.rfind(':'));
          const int numNodes = net.node_size();
          for (int i = 0; i < numNodes; ++i)
          {
              if (net.node(i).name() == name)
                  return net.node(i);
          }
-        CV_Error(Error::StsParseError, "Input node with name " + name + " not found");
-        return net.node(0);  // just return something
+        CV_ErrorNoReturn(Error::StsParseError, "Input node with name " + name + " not found");
      }
  
      // Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused.
@@ -400,6 +402,23 @@ private:
      int numOutDims;
  };
  
+class L2NormalizeSubgraph : public Subgraph
+{
+public:
+    L2NormalizeSubgraph()
+    {
+        int input = addNodeToMatch("");
+        int square = addNodeToMatch("Square", input);
+        int reductionIndices = addNodeToMatch("Const");
+        int sum = addNodeToMatch("Sum", square, reductionIndices);
+        int y = addNodeToMatch("Const");
+        int maximum = addNodeToMatch("Maximum", sum, y);
+        int rsqrt = addNodeToMatch("Rsqrt", maximum);
+        addNodeToMatch("Mul", input, rsqrt);
+        setFusedNode("L2Normalize", input, reductionIndices);
+    }
+};
+
  void simplifySubgraphs(tensorflow::GraphDef& net)
  {
      std::vector<Ptr<Subgraph> > subgraphs;
@@ -410,6 +429,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
      subgraphs.push_back(Ptr<Subgraph>(new SoftMaxKerasSubgraph()));
      subgraphs.push_back(Ptr<Subgraph>(new ReLU6KerasSubgraph()));
      subgraphs.push_back(Ptr<Subgraph>(new ReshapeKerasSubgraph(3)));
+    subgraphs.push_back(Ptr<Subgraph>(new L2NormalizeSubgraph()));
  
      int numNodes = net.node_size();
      std::vector<int> matchedNodesIds;
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp

index 6ea9e96..f580916 100644 (file)
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -37,7 +37,13 @@ using ::google::protobuf::Reflection;
  namespace
  {
  
-static int toNCHW[] = {0, 2, 3, 1};
+static int toNCHW(int idx)
+{
+    CV_Assert(-4 <= idx && idx < 4);
+    if (idx == 0) return 0;
+    else if (idx > 0) return idx % 3 + 1;
+    else return (4 + idx) % 3 + 1;
+}
  
  // This values are used to indicate layer output's data layout where it's possible.
  enum DataLayout
@@ -556,11 +562,23 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
  // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
  static int predictOutputDataLayout(const tensorflow::NodeDef& layer, const std::map<String, int>& data_layouts)
  {
+    if (hasLayerAttr(layer, "data_format"))
+    {
+        std::string format = getLayerAttr(layer, "data_format").s();
+        if (format == "NHWC" || format == "channels_last")
+            return DATA_LAYOUT_NHWC;
+        else if (format == "NCHW" || format == "channels_first")
+            return DATA_LAYOUT_NCHW;
+        else
+            CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
+    }
+
+    // Determine layout by layer's inputs
      int layout = DATA_LAYOUT_UNKNOWN;
      std::map<String, int>::const_iterator it;
      for (int i = 0, n = layer.input_size(); i < n; ++i)
      {
-        it = data_layouts.find(layer.input(i));
+        it = data_layouts.find(layer.input(i).substr(0, layer.input(i).rfind(':')));
          if (it != data_layouts.end())
          {
              if (it->second == DATA_LAYOUT_UNKNOWN)
@@ -708,17 +726,7 @@ void TFImporter::populateNet(Net dstNet)
              // one input only
              connect(layer_id, dstNet, parsePin(input), id, 0);
  
-            if (hasLayerAttr(layer, "data_format"))
-            {
-                std::string format = getLayerAttr(layer, "data_format").s();
-                if (format == "NHWC" || format == "channels_last")
-                    data_layouts[name] = DATA_LAYOUT_NHWC;
-                else if (format == "NCHW" || format == "channels_first")
-                    data_layouts[name] = DATA_LAYOUT_NCHW;
-                else
-                    CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
-            }
-            else
+            if (data_layouts[name] == DATA_LAYOUT_UNKNOWN)
                  data_layouts[name] = DATA_LAYOUT_NHWC;
          }
          else if (type == "BiasAdd" || type == "Add")
@@ -956,7 +964,7 @@ void TFImporter::populateNet(Net dstNet)
          {
              int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
              int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
-            layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW[axis] : axis);
+            layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW(axis) : axis);
  
              int id = dstNet.addLayer(name, "Concat", layerParams);
              layer_id[name] = id;
@@ -1017,7 +1025,7 @@ void TFImporter::populateNet(Net dstNet)
              // num_split
              // 1st blob is dims tensor
              int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
-            layerParams.set("axis", toNCHW[axis]);
+            layerParams.set("axis", toNCHW(axis));
  
              int id = dstNet.addLayer(name, "Slice", layerParams);
              layer_id[name] = id;
@@ -1410,9 +1418,26 @@ void TFImporter::populateNet(Net dstNet)
          {
              // op: "L2Normalize"
              // input: "input"
-            CV_Assert(layer.input_size() == 1);
-            layerParams.set("across_spatial", false);
-            layerParams.set("channel_shared", false);
+            // input: "reduction_indices" (axis)
+            CV_Assert(layer.input_size() == 2);
+            Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
+            CV_Assert(reductionIndices.type() == CV_32SC1);
+
+            const int numAxes = reductionIndices.total();
+            if (data_layouts[name] == DATA_LAYOUT_NHWC)
+                for (int i = 0; i < numAxes; ++i)
+                    reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
+
+            cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
+            for (int i = 1; i < numAxes; ++i)
+            {
+                CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
+                // Axes have the same sign.
+                CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
+            }
+            layerParams.set("start_axis", reductionIndices.at<int>(0));
+            layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
+
              int id = dstNet.addLayer(name, "Normalize", layerParams);
              layer_id[name] = id;
              connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp

index ff21228..8d4f4b6 100644 (file)
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -193,6 +193,13 @@ TEST_P(Test_TensorFlow_layers, reshape)
      runTensorFlowNet("unfused_flatten_unknown_batch", targetId);
  }
  
+TEST_P(Test_TensorFlow_layers, l2_normalize)
+{
+    int targetId = GetParam();
+    runTensorFlowNet("l2_normalize", targetId);
+    runTensorFlowNet("l2_normalize_3d", targetId);
+}
+
  INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets());
  
  typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets;
diff --git a/samples/dnn/face_detector/opencv_face_detector.pbtxt b/samples/dnn/face_detector/opencv_face_detector.pbtxt

index 78ba0bd..e537e00 100644 (file)
--- a/samples/dnn/face_detector/opencv_face_detector.pbtxt
+++ b/samples/dnn/face_detector/opencv_face_detector.pbtxt
@@ -482,6 +482,7 @@ node {
    name: "conv4_3_norm/l2_normalize"
    op: "L2Normalize"
    input: "Relu_4:0"
+  input: "conv4_3_norm/l2_normalize/Sum/reduction_indices"
  }
  node {
    name: "conv4_3_norm/mul_1"
author	Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
	Wed, 4 Apr 2018 17:32:00 +0000 (20:32 +0300)
committer	Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
	Tue, 10 Apr 2018 07:12:44 +0000 (10:12 +0300)
modules/dnn/include/opencv2/dnn/all_layers.hpp		patch \| blob \| history
modules/dnn/misc/quantize_face_detector.py		patch \| blob \| history
modules/dnn/src/layers/normalize_bbox_layer.cpp		patch \| blob \| history
modules/dnn/src/tensorflow/tf_graph_simplifier.cpp		patch \| blob \| history
modules/dnn/src/tensorflow/tf_importer.cpp		patch \| blob \| history
modules/dnn/test/test_tf_importer.cpp		patch \| blob \| history
samples/dnn/face_detector/opencv_face_detector.pbtxt		patch \| blob \| history