1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2018, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
10 #include "tf_graph_simplifier.hpp"
12 namespace cv { namespace dnn {
13 CV__DNN_EXPERIMENTAL_NS_BEGIN
15 using ::google::protobuf::RepeatedField;
16 using ::google::protobuf::MapPair;
18 class Subgraph // Interface to match and replace TensorFlow subgraphs.
21 // Add a node to be matched in the origin graph. Specify ids of nodes that
22 // are expected to be inputs. Returns id of a newly added node.
23 // TODO: Replace inputs to std::vector<int> in C++11
24 int addNodeToMatch(const std::string& op, int input_0 = -1, int input_1 = -1,
25 int input_2 = -1, int input_3 = -1)
27 int nodeInputs[] = {input_0, input_1, input_2, input_3};
29 for (int i = 0; i < 4; ++i)
31 numInputs += (int)(nodeInputs[i] != -1);
33 return addNodeToMatch(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
36 int addNodeToMatch(const std::string& op, const std::vector<int>& inputs_)
38 for (int i = 0; i < inputs_.size(); ++i)
40 CV_Assert(inputs_[i] < (int)nodes.size());
43 inputs.push_back(inputs_);
44 return nodes.size() - 1;
47 // Specify resulting node. All the matched nodes in subgraph excluding
48 // input nodes will be fused into this single node.
49 // TODO: Replace inputs to std::vector<int> in C++11
50 void setFusedNode(const std::string& op, int input_0 = -1, int input_1 = -1,
51 int input_2 = -1, int input_3 = -1, int input_4 = -1,
54 int nodeInputs[] = {input_0, input_1, input_2, input_3, input_4, input_5};
56 for (int i = 0; i < 6; ++i)
58 CV_Assert(nodeInputs[i] < (int)nodes.size());
59 numInputs += (int)(nodeInputs[i] != -1);
61 setFusedNode(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
64 void setFusedNode(const std::string& op, const std::vector<int>& inputs_)
66 fusedNodeInputs = inputs_;
69 for (int i = 0; i < nodes.size(); ++i)
71 if (std::find(fusedNodeInputs.begin(), fusedNodeInputs.end(), i) == fusedNodeInputs.end() &&
73 nodesToFuse.push_back(i);
77 static const tensorflow::NodeDef& getInputNode(const tensorflow::GraphDef& net,
78 const tensorflow::NodeDef& node,
81 CV_Assert(inpId < node.input_size());
82 std::string name = node.input(inpId);
83 // If operation produces several tensors, they are specified by index
84 // after ':' character. In example, "input:0".
85 name = name.substr(0, name.rfind(':'));
86 const int numNodes = net.node_size();
87 for (int i = 0; i < numNodes; ++i)
89 if (net.node(i).name() == name)
92 CV_ErrorNoReturn(Error::StsParseError, "Input node with name " + name + " not found");
95 // Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused.
96 // Const nodes are skipped during matching. Returns true if nodes are matched and can be fused.
97 virtual bool match(const tensorflow::GraphDef& net, int nodeId, std::vector<int>& matchedNodesIds)
99 matchedNodesIds.clear();
100 matchedNodesIds.reserve(nodesToFuse.size());
102 int numNodes = net.node_size();
103 for (int i = 0; i < nodesToFuse.size(); ++i)
105 while (nodeId < numNodes && net.node(nodeId).op() == "Const")
109 if (nodeId > numNodes - 1)
112 const tensorflow::NodeDef& node = net.node(nodeId);
114 if (node.op() != nodes[nodesToFuse[i]])
117 std::vector<int>& inputNodes = inputs[nodesToFuse[i]];
118 if (inputNodes.size() != node.input_size())
120 for (int j = 0; j < inputNodes.size(); ++j)
122 if (nodes[inputNodes[j]].empty()) // Unknown input node type.
124 const tensorflow::NodeDef& inpNode = getInputNode(net, node, j);
125 if (inpNode.op() != nodes[inputNodes[j]])
129 matchedNodesIds.push_back(nodeId);
135 // Fuse matched subgraph.
136 void replace(tensorflow::GraphDef& net, const std::vector<int>& matchedNodesIds)
138 // Extract names of input nodes.
139 std::vector<std::string> inputsNames(fusedNodeInputs.size());
140 for (int i = 0; i < fusedNodeInputs.size(); ++i)
143 // Find input node name looking at inputs of fused nodes.
144 for (int j = 0; j < matchedNodesIds.size() && inpName.empty(); ++j)
146 const tensorflow::NodeDef &node = net.node(matchedNodesIds[j]);
147 std::vector<int>& inpIndices = inputs[nodesToFuse[j]];
149 CV_Assert(node.input_size() == inpIndices.size());
150 for (int k = 0; k < inpIndices.size(); ++k)
152 if (inpIndices[k] == fusedNodeInputs[i])
154 inpName = node.input(k);
159 CV_Assert(!inpName.empty());
160 inputsNames[i] = inpName;
163 // Remove matched nodes except the last one. Indices in ascending order are expected.
164 tensorflow::NodeDef* node = net.mutable_node(matchedNodesIds.back());
165 for (int i = matchedNodesIds.size() - 2; i >= 0; --i)
166 net.mutable_node()->DeleteSubrange(matchedNodesIds[i], 1);
168 // Modify the last node to be a fused one.
169 node->set_op(fusedNodeOp);
171 for (int i = 0; i < inputsNames.size(); ++i)
173 node->add_input(inputsNames[i]);
176 std::vector<tensorflow::NodeDef*> inputNodes(inputsNames.size());
177 for (int i = 0; i < inputsNames.size(); ++i)
179 inputNodes[i] = (tensorflow::NodeDef*)&getInputNode(net, *node, i);
181 finalize(net, node, inputNodes);
184 virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef*,
185 std::vector<tensorflow::NodeDef*>&) {}
188 std::vector<std::string> nodes; // Nodes to be matched in the origin graph.
189 std::vector<std::vector<int> > inputs; // Connections of an every node to it's inputs.
191 std::string fusedNodeOp; // Operation name of resulting fused node.
192 std::vector<int> nodesToFuse; // Set of nodes to be fused.
193 std::vector<int> fusedNodeInputs; // Inputs of fused node.
196 class BatchNormSubgraph : public Subgraph
201 int input = addNodeToMatch("");
202 int epsilon = addNodeToMatch("Const");
203 int moving_variance = addNodeToMatch("Const");
204 int moving_mean = addNodeToMatch("Const");
205 int beta = addNodeToMatch("Const");
206 int gamma = addNodeToMatch("Const");
207 int add = addNodeToMatch("Add", moving_variance, epsilon);
208 int rsqrt = addNodeToMatch("Rsqrt", add);
209 int mul = addNodeToMatch("Mul", rsqrt, gamma);
210 int mul_1 = addNodeToMatch("Mul", input, mul);
211 int mul_2 = addNodeToMatch("Mul", moving_mean, mul);
212 int sub = addNodeToMatch("Sub", beta, mul_2);
213 addNodeToMatch("Add", mul_1, sub);
215 setFusedNode("FusedBatchNorm", input, gamma, beta, moving_mean, moving_variance, epsilon);
218 virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef* fusedNode,
219 std::vector<tensorflow::NodeDef*>& inputNodes) CV_OVERRIDE
221 Mat epsMat = getTensorContent(inputNodes.back()->attr().at("value").tensor());
222 CV_Assert(epsMat.total() == 1, epsMat.type() == CV_32FC1);
224 fusedNode->mutable_input()->RemoveLast();
225 fusedNode->clear_attr();
226 tensorflow::AttrValue epsilon;
227 epsilon.set_f(epsMat.at<float>(0));
228 fusedNode->mutable_attr()->insert(MapPair<std::string, tensorflow::AttrValue>("epsilon", epsilon));
232 class BatchNormNoGammaSubgraph : public Subgraph
235 BatchNormNoGammaSubgraph()
237 int input = addNodeToMatch("");
238 int epsilon = addNodeToMatch("Const");
239 int moving_variance = addNodeToMatch("Const");
240 int moving_mean = addNodeToMatch("Const");
241 int beta = addNodeToMatch("Const");
242 int add = addNodeToMatch("Add", moving_variance, epsilon);
243 int rsqrt = addNodeToMatch("Rsqrt", add);
244 int mul = addNodeToMatch("Mul", input, rsqrt);
245 int mul_1 = addNodeToMatch("Mul", moving_mean, rsqrt);
246 int sub = addNodeToMatch("Sub", beta, mul_1);
247 addNodeToMatch("Add", mul, sub);
249 // There is a fake reference to beta that will be replaced to a new gamma tensor.
250 setFusedNode("FusedBatchNorm", input, beta, beta, moving_mean, moving_variance, epsilon);
253 virtual void finalize(tensorflow::GraphDef& net, tensorflow::NodeDef* fusedNode,
254 std::vector<tensorflow::NodeDef*>& inputNodes) CV_OVERRIDE
256 Mat epsMat = getTensorContent(inputNodes.back()->attr().at("value").tensor());
257 CV_Assert(epsMat.total() == 1, epsMat.type() == CV_32FC1);
259 fusedNode->mutable_input()->RemoveLast();
260 fusedNode->clear_attr();
261 tensorflow::AttrValue epsilon;
262 epsilon.set_f(epsMat.at<float>(0));
263 fusedNode->mutable_attr()->insert(MapPair<std::string, tensorflow::AttrValue>("epsilon", epsilon));
265 tensorflow::NodeDef* gamma = net.add_node();
266 gamma->set_op("Const");
267 gamma->set_name(fusedNode->name() + "/gamma");
268 // Just put a single value to recognize this node as Const.
269 gamma->mutable_attr()->insert(MapPair<std::string, tensorflow::AttrValue>("value", epsilon));
270 fusedNode->set_input(1, gamma->name());
274 // tf.contrib.layers.flatten
275 class FlattenSubgraph : public Subgraph
280 int input = addNodeToMatch("");
281 int shape = addNodeToMatch("Const");
282 int stack = addNodeToMatch("Const");
283 int stack_1 = addNodeToMatch("Const");
284 int stack_2 = addNodeToMatch("Const");
285 int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
286 int shape_pack = addNodeToMatch("Const");
287 int pack = addNodeToMatch("Pack", strided_slice, shape_pack);
288 addNodeToMatch("Reshape", input, pack);
290 setFusedNode("Flatten", input);
294 // tf.contrib.layers.flatten in case of unknown batch size
295 class FlattenShapeSubgraph : public Subgraph
298 FlattenShapeSubgraph()
300 int input = addNodeToMatch("");
301 int shape = addNodeToMatch("Shape", input);
302 int stack = addNodeToMatch("Const");
303 int stack_1 = addNodeToMatch("Const");
304 int stack_2 = addNodeToMatch("Const");
305 int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
306 int shape_pack = addNodeToMatch("Const");
307 int pack = addNodeToMatch("Pack", strided_slice, shape_pack);
308 addNodeToMatch("Reshape", input, pack);
310 setFusedNode("Flatten", input);
315 class SoftMaxKerasSubgraph : public Subgraph
318 SoftMaxKerasSubgraph()
320 int input = addNodeToMatch("");
321 int maxReductionIndices = addNodeToMatch("Const");
322 int smMax = addNodeToMatch("Max", input, maxReductionIndices);
323 int smSub = addNodeToMatch("Sub", input, smMax);
324 int smExp = addNodeToMatch("Exp", smSub);
325 int sumReductionIndices = addNodeToMatch("Const");
326 int smSum = addNodeToMatch("Sum", smExp, sumReductionIndices);
327 addNodeToMatch("RealDiv", smExp, smSum);
329 setFusedNode("Softmax", input);
333 class ReLU6KerasSubgraph : public Subgraph
338 int input = addNodeToMatch("");
339 int relu = addNodeToMatch("Relu", input);
340 int maxValue = addNodeToMatch("Const");
341 int clipValue = addNodeToMatch("Const");
342 int minimum = addNodeToMatch("Minimum", relu, maxValue);
343 addNodeToMatch("Maximum", minimum, clipValue);
345 setFusedNode("Relu6", input);
348 virtual bool match(const tensorflow::GraphDef& net, int nodeId, std::vector<int>& matchedNodesIds) CV_OVERRIDE
350 if (!Subgraph::match(net, nodeId, matchedNodesIds))
352 Mat maxValue = getTensorContent(net.node(nodeId + 1).attr().at("value").tensor());
353 return maxValue.type() == CV_32FC1 && maxValue.total() == 1 && maxValue.at<float>(0) == 6;
357 // Keras' reshape stores output shape in separate Const nodes by one value.
358 // Need to merge them into a single Const node.
359 class ReshapeKerasSubgraph : public Subgraph
362 ReshapeKerasSubgraph(int _numOutDims) : numOutDims(_numOutDims)
364 int input = addNodeToMatch("");
365 int shape = addNodeToMatch("Shape", input);
366 int stack = addNodeToMatch("Const");
367 int stack_1 = addNodeToMatch("Const");
368 int stack_2 = addNodeToMatch("Const");
369 int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
371 std::vector<int> ids(1 + numOutDims);
372 ids[0] = strided_slice;
373 for (int i = 0; i < numOutDims; ++i)
374 ids[1 + i] = addNodeToMatch("Const");
375 int pack = addNodeToMatch("Pack", ids);
376 addNodeToMatch("Reshape", input, pack);
379 setFusedNode("Reshape", ids);
382 virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef* fusedNode,
383 std::vector<tensorflow::NodeDef*>& inputNodes) CV_OVERRIDE
385 std::vector<int> shape(numOutDims + 1); // batch size in Keras is implicit.
387 for (int i = 0; i < numOutDims; ++i)
389 shape[1 + i] = inputNodes[1 + i]->attr().at("value").tensor().int_val(0);
391 tensorflow::TensorProto* shapeTensor = inputNodes[1]->mutable_attr()->at("value").mutable_tensor();
392 fusedNode->mutable_input()->DeleteSubrange(2, numOutDims - 1);
394 shapeTensor->clear_int_val();
395 for (int i = 0; i < shape.size(); ++i)
397 shapeTensor->add_int_val(shape[i]);
405 class L2NormalizeSubgraph : public Subgraph
408 L2NormalizeSubgraph()
410 int input = addNodeToMatch("");
411 int square = addNodeToMatch("Square", input);
412 int reductionIndices = addNodeToMatch("Const");
413 int sum = addNodeToMatch("Sum", square, reductionIndices);
414 int y = addNodeToMatch("Const");
415 int maximum = addNodeToMatch("Maximum", sum, y);
416 int rsqrt = addNodeToMatch("Rsqrt", maximum);
417 addNodeToMatch("Mul", input, rsqrt);
418 setFusedNode("L2Normalize", input, reductionIndices);
422 void simplifySubgraphs(tensorflow::GraphDef& net)
424 std::vector<Ptr<Subgraph> > subgraphs;
425 subgraphs.push_back(Ptr<Subgraph>(new BatchNormSubgraph()));
426 subgraphs.push_back(Ptr<Subgraph>(new BatchNormNoGammaSubgraph()));
427 subgraphs.push_back(Ptr<Subgraph>(new FlattenSubgraph()));
428 subgraphs.push_back(Ptr<Subgraph>(new FlattenShapeSubgraph()));
429 subgraphs.push_back(Ptr<Subgraph>(new SoftMaxKerasSubgraph()));
430 subgraphs.push_back(Ptr<Subgraph>(new ReLU6KerasSubgraph()));
431 subgraphs.push_back(Ptr<Subgraph>(new ReshapeKerasSubgraph(3)));
432 subgraphs.push_back(Ptr<Subgraph>(new L2NormalizeSubgraph()));
434 int numNodes = net.node_size();
435 std::vector<int> matchedNodesIds;
436 for (int i = 0; i < numNodes; ++i)
438 for (int j = 0; j < subgraphs.size(); ++j)
440 if (subgraphs[j]->match(net, i, matchedNodesIds))
442 subgraphs[j]->replace(net, matchedNodesIds);
443 numNodes -= matchedNodesIds.size() - 1; // #matchedNodes removed and one added.
450 void RemoveIdentityOps(tensorflow::GraphDef& net)
452 typedef std::map<String, String> IdentityOpsMap;
453 IdentityOpsMap identity_ops;
455 std::vector<int> identity_ops_idx;
457 int layersCount = net.node_size();
458 for (int li = 0; li < layersCount; li++)
460 const tensorflow::NodeDef &layer = net.node(li);
461 String type = layer.op();
463 if (type == "Identity" || type == "Dropout") {
464 identity_ops_idx.push_back(li);
465 identity_ops[layer.name()] = layer.input(0);
469 for (int li = 0; li < layersCount; li++)
471 tensorflow::NodeDef* layer = net.mutable_node(li);
472 for (int input_id = 0; input_id < layer->input_size(); input_id++) {
473 String input_op_name = layer->input(input_id);
474 IdentityOpsMap::iterator it = identity_ops.find(input_op_name);
476 if (it != identity_ops.end()) {
477 layer->set_input(input_id, it->second);
482 std::sort(identity_ops_idx.begin(), identity_ops_idx.end());
484 int removed_nodes = 0;
485 for(size_t i = 0; i < identity_ops_idx.size(); i++) {
486 int start_id = identity_ops_idx[i] - removed_nodes;
487 net.mutable_node()->DeleteSubrange(start_id, 1);
492 Mat getTensorContent(const tensorflow::TensorProto &tensor)
494 std::string content = tensor.tensor_content();
495 switch (tensor.dtype())
497 case tensorflow::DT_FLOAT:
499 if (!content.empty())
500 return Mat(1, content.size() / sizeof(float), CV_32FC1, (void*)content.c_str()).clone();
503 const RepeatedField<float>& field = tensor.float_val();
504 CV_Assert(!field.empty());
505 return Mat(1, field.size(), CV_32FC1, (void*)field.data()).clone();
508 case tensorflow::DT_DOUBLE:
510 if (!content.empty())
511 return Mat(1, content.size() / sizeof(double), CV_64FC1, (void*)content.c_str()).clone();
514 const RepeatedField<double>& field = tensor.double_val();
515 CV_Assert(!field.empty());
516 return Mat(1, field.size(), CV_64FC1, (void*)field.data()).clone();
519 case tensorflow::DT_INT32:
521 if (!content.empty())
522 return Mat(1, content.size() / sizeof(int32_t), CV_32SC1, (void*)content.c_str()).clone();
525 const RepeatedField<int32_t>& field = tensor.int_val();
526 CV_Assert(!field.empty());
527 return Mat(1, field.size(), CV_32SC1, (void*)field.data()).clone();
530 case tensorflow::DT_HALF:
533 if (!content.empty())
535 static const int kHalfSize = 2;
536 halfs = Mat(1, content.size() / kHalfSize, CV_16UC1, (void*)content.c_str());
540 const RepeatedField<int32_t>& field = tensor.half_val();
541 CV_Assert(!field.empty());
542 Mat ints(1, field.size(), CV_32SC1, (void*)field.data());
543 ints.convertTo(halfs, CV_16UC1);
545 // Reinterpret as a signed shorts just for a convertFp16 call.
546 Mat halfsSigned(halfs.size(), CV_16SC1, halfs.data);
547 Mat floats(halfs.size(), CV_32FC1);
548 convertFp16(halfsSigned, floats);
551 case tensorflow::DT_QUINT8:
553 CV_Assert(!content.empty());
554 return Mat(1, content.size(), CV_8UC1, (void*)content.c_str()).clone();
557 CV_Error(Error::StsError, "Tensor's data type is not supported");
563 CV__DNN_EXPERIMENTAL_NS_END
564 }} // namespace dnn, namespace cv
566 #endif // HAVE_PROTOBUF