From: Dmitry Kurtaev Date: Wed, 4 Apr 2018 17:32:00 +0000 (+0300) Subject: Fuse tf.nn.l2_normalize layer X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~1^2~634^2~7^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1ba72ca0d3b537c1a9b22808145fb40dc8373bc4;p=platform%2Fupstream%2Fopencv.git Fuse tf.nn.l2_normalize layer --- diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 2cdf700..9053842 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -559,7 +559,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN { public: float pnorm, epsilon; - bool acrossSpatial; + CV_DEPRECATED bool acrossSpatial; static Ptr create(const LayerParams& params); }; diff --git a/modules/dnn/misc/quantize_face_detector.py b/modules/dnn/misc/quantize_face_detector.py index 06acae9..a9348c2 100644 --- a/modules/dnn/misc/quantize_face_detector.py +++ b/modules/dnn/misc/quantize_face_detector.py @@ -318,6 +318,7 @@ for node in graph_def.node: node.input.pop() node.input.pop() node.input.append(layer_256_1_relu1.name) + node.input.append('conv4_3_norm/l2_normalize/Sum/reduction_indices') break softmaxShape = NodeDef() diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index 5772aad..580b6b3 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -54,6 +54,9 @@ public: pnorm = params.get("p", 2); epsilon = params.get("eps", 1e-10f); acrossSpatial = params.get("across_spatial", true); + startAxis = params.get("start_axis", 1); + CV_Assert(!params.has("across_spatial") || !params.has("end_axis")); + endAxis = params.get("end_axis", acrossSpatial ? -1 : startAxis); CV_Assert(pnorm > 0); } @@ -85,20 +88,26 @@ public: const UMat& inp0 = inputs[0]; UMat& buffer = internals[0]; - size_t num = inp0.size[0]; - size_t channels = inp0.size[1]; - size_t channelSize = inp0.total() / (num * channels); + startAxis = clamp(startAxis, inp0.dims); + endAxis = clamp(endAxis, inp0.dims); + + size_t num = total(shape(inp0.size), 0, startAxis); + size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1); + size_t planeSize = inp0.total() / (num * numPlanes); + MatShape s = shape(1, inputs[0].total()); + UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num); + UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num); for (size_t i = 0; i < num; ++i) { - MatShape s = shape(channels, channelSize); - UMat src = inputs[i].reshape(1, s.size(), &s[0]); - UMat dst = outputs[i].reshape(1, s.size(), &s[0]); + s = shape(numPlanes, planeSize); + UMat src = inp.row(i).reshape(1, s.size(), &s[0]); + UMat dst = out.row(i).reshape(1, s.size(), &s[0]); UMat abs_mat; absdiff(src, cv::Scalar::all(0), abs_mat); pow(abs_mat, pnorm, buffer); - if (acrossSpatial) + if (planeSize == 1) { // add eps to avoid overflow float absSum = sum(buffer)[0] + epsilon; @@ -114,7 +123,7 @@ public: // compute inverted norm to call multiply instead divide cv::pow(norm, -1.0f / pnorm, norm); - repeat(norm, channels, 1, buffer); + repeat(norm, numPlanes, 1, buffer); multiply(src, buffer, dst); } @@ -130,7 +139,7 @@ public: else { // _scale: _channels x 1 - CV_Assert(scale.total() == channels); + CV_Assert(scale.total() == numPlanes); repeat(scale, 1, dst.cols, buffer); multiply(dst, buffer, dst); } @@ -162,17 +171,22 @@ public: const Mat& inp0 = *inputs[0]; Mat& buffer = internals[0]; - size_t num = inp0.size[0]; - size_t channels = inp0.size[1]; - size_t channelSize = inp0.total() / (num * channels); + startAxis = clamp(startAxis, inp0.dims); + endAxis = clamp(endAxis, inp0.dims); + + const float* inpData = inp0.ptr(); + float* outData = outputs[0].ptr(); + + size_t num = total(shape(inp0.size), 0, startAxis); + size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1); + size_t planeSize = inp0.total() / (num * numPlanes); for (size_t n = 0; n < num; ++n) { - Mat src = Mat(channels, channelSize, CV_32F, (void*)inp0.ptr(n)); - Mat dst = Mat(channels, channelSize, CV_32F, (void*)outputs[0].ptr(n)); - + Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData); + Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData); cv::pow(abs(src), pnorm, buffer); - if (acrossSpatial) + if (planeSize == 1) { // add eps to avoid overflow float absSum = sum(buffer)[0] + epsilon; @@ -188,7 +202,7 @@ public: // compute inverted norm to call multiply instead divide cv::pow(norm, -1.0f / pnorm, norm); - repeat(norm, channels, 1, buffer); + repeat(norm, numPlanes, 1, buffer); multiply(src, buffer, dst); } @@ -204,13 +218,18 @@ public: else { // _scale: _channels x 1 - CV_Assert(scale.total() == channels); + CV_Assert(scale.total() == numPlanes); repeat(scale, 1, dst.cols, buffer); multiply(dst, buffer, dst); } } + inpData += numPlanes * planeSize; + outData += numPlanes * planeSize; } } + +private: + int startAxis, endAxis; }; diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp index 2cfb42f..cfb472e 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp @@ -80,14 +80,16 @@ public: { CV_Assert(inpId < node.input_size()); std::string name = node.input(inpId); + // If operation produces several tensors, they are specified by index + // after ':' character. In example, "input:0". + name = name.substr(0, name.rfind(':')); const int numNodes = net.node_size(); for (int i = 0; i < numNodes; ++i) { if (net.node(i).name() == name) return net.node(i); } - CV_Error(Error::StsParseError, "Input node with name " + name + " not found"); - return net.node(0); // just return something + CV_ErrorNoReturn(Error::StsParseError, "Input node with name " + name + " not found"); } // Match TensorFlow subgraph starting from with a set of nodes to be fused. @@ -400,6 +402,23 @@ private: int numOutDims; }; +class L2NormalizeSubgraph : public Subgraph +{ +public: + L2NormalizeSubgraph() + { + int input = addNodeToMatch(""); + int square = addNodeToMatch("Square", input); + int reductionIndices = addNodeToMatch("Const"); + int sum = addNodeToMatch("Sum", square, reductionIndices); + int y = addNodeToMatch("Const"); + int maximum = addNodeToMatch("Maximum", sum, y); + int rsqrt = addNodeToMatch("Rsqrt", maximum); + addNodeToMatch("Mul", input, rsqrt); + setFusedNode("L2Normalize", input, reductionIndices); + } +}; + void simplifySubgraphs(tensorflow::GraphDef& net) { std::vector > subgraphs; @@ -410,6 +429,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net) subgraphs.push_back(Ptr(new SoftMaxKerasSubgraph())); subgraphs.push_back(Ptr(new ReLU6KerasSubgraph())); subgraphs.push_back(Ptr(new ReshapeKerasSubgraph(3))); + subgraphs.push_back(Ptr(new L2NormalizeSubgraph())); int numNodes = net.node_size(); std::vector matchedNodesIds; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 6ea9e96..f580916 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -37,7 +37,13 @@ using ::google::protobuf::Reflection; namespace { -static int toNCHW[] = {0, 2, 3, 1}; +static int toNCHW(int idx) +{ + CV_Assert(-4 <= idx && idx < 4); + if (idx == 0) return 0; + else if (idx > 0) return idx % 3 + 1; + else return (4 + idx) % 3 + 1; +} // This values are used to indicate layer output's data layout where it's possible. enum DataLayout @@ -556,11 +562,23 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map& cons // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. static int predictOutputDataLayout(const tensorflow::NodeDef& layer, const std::map& data_layouts) { + if (hasLayerAttr(layer, "data_format")) + { + std::string format = getLayerAttr(layer, "data_format").s(); + if (format == "NHWC" || format == "channels_last") + return DATA_LAYOUT_NHWC; + else if (format == "NCHW" || format == "channels_first") + return DATA_LAYOUT_NCHW; + else + CV_Error(Error::StsParseError, "Unknown data_format value: " + format); + } + + // Determine layout by layer's inputs int layout = DATA_LAYOUT_UNKNOWN; std::map::const_iterator it; for (int i = 0, n = layer.input_size(); i < n; ++i) { - it = data_layouts.find(layer.input(i)); + it = data_layouts.find(layer.input(i).substr(0, layer.input(i).rfind(':'))); if (it != data_layouts.end()) { if (it->second == DATA_LAYOUT_UNKNOWN) @@ -708,17 +726,7 @@ void TFImporter::populateNet(Net dstNet) // one input only connect(layer_id, dstNet, parsePin(input), id, 0); - if (hasLayerAttr(layer, "data_format")) - { - std::string format = getLayerAttr(layer, "data_format").s(); - if (format == "NHWC" || format == "channels_last") - data_layouts[name] = DATA_LAYOUT_NHWC; - else if (format == "NCHW" || format == "channels_first") - data_layouts[name] = DATA_LAYOUT_NCHW; - else - CV_Error(Error::StsParseError, "Unknown data_format value: " + format); - } - else + if (data_layouts[name] == DATA_LAYOUT_UNKNOWN) data_layouts[name] = DATA_LAYOUT_NHWC; } else if (type == "BiasAdd" || type == "Add") @@ -956,7 +964,7 @@ void TFImporter::populateNet(Net dstNet) { int axisId = (type == "Concat" ? 0 : layer.input_size() - 1); int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); - layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW[axis] : axis); + layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW(axis) : axis); int id = dstNet.addLayer(name, "Concat", layerParams); layer_id[name] = id; @@ -1017,7 +1025,7 @@ void TFImporter::populateNet(Net dstNet) // num_split // 1st blob is dims tensor int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); - layerParams.set("axis", toNCHW[axis]); + layerParams.set("axis", toNCHW(axis)); int id = dstNet.addLayer(name, "Slice", layerParams); layer_id[name] = id; @@ -1410,9 +1418,26 @@ void TFImporter::populateNet(Net dstNet) { // op: "L2Normalize" // input: "input" - CV_Assert(layer.input_size() == 1); - layerParams.set("across_spatial", false); - layerParams.set("channel_shared", false); + // input: "reduction_indices" (axis) + CV_Assert(layer.input_size() == 2); + Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(reductionIndices.type() == CV_32SC1); + + const int numAxes = reductionIndices.total(); + if (data_layouts[name] == DATA_LAYOUT_NHWC) + for (int i = 0; i < numAxes; ++i) + reductionIndices.at(i) = toNCHW(reductionIndices.at(i)); + + cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING); + for (int i = 1; i < numAxes; ++i) + { + CV_Assert(reductionIndices.at(i) == reductionIndices.at(i - 1) + 1); + // Axes have the same sign. + CV_Assert(reductionIndices.at(i) * reductionIndices.at(i - 1) >= 0); + } + layerParams.set("start_axis", reductionIndices.at(0)); + layerParams.set("end_axis", reductionIndices.at(numAxes - 1)); + int id = dstNet.addLayer(name, "Normalize", layerParams); layer_id[name] = id; connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index ff21228..8d4f4b6 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -193,6 +193,13 @@ TEST_P(Test_TensorFlow_layers, reshape) runTensorFlowNet("unfused_flatten_unknown_batch", targetId); } +TEST_P(Test_TensorFlow_layers, l2_normalize) +{ + int targetId = GetParam(); + runTensorFlowNet("l2_normalize", targetId); + runTensorFlowNet("l2_normalize_3d", targetId); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets()); typedef testing::TestWithParam Test_TensorFlow_nets; diff --git a/samples/dnn/face_detector/opencv_face_detector.pbtxt b/samples/dnn/face_detector/opencv_face_detector.pbtxt index 78ba0bd..e537e00 100644 --- a/samples/dnn/face_detector/opencv_face_detector.pbtxt +++ b/samples/dnn/face_detector/opencv_face_detector.pbtxt @@ -482,6 +482,7 @@ node { name: "conv4_3_norm/l2_normalize" op: "L2Normalize" input: "Relu_4:0" + input: "conv4_3_norm/l2_normalize/Sum/reduction_indices" } node { name: "conv4_3_norm/mul_1"