modules/dnn/src/tensorflow/tf_graph_simplifier.cpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 // Copyright (C) 2018, Intel Corporation, all rights reserved.
   6 // Third party copyrights are property of their respective owners.
   7
   8 #ifdef HAVE_PROTOBUF
   9
  10 #include "tf_graph_simplifier.hpp"
  11
  12 namespace cv { namespace dnn {
  13 CV__DNN_EXPERIMENTAL_NS_BEGIN
  14
  15 using ::google::protobuf::RepeatedField;
  16 using ::google::protobuf::MapPair;
  17
  18 class Subgraph  // Interface to match and replace TensorFlow subgraphs.
  19 {
  20 public:
  21     // Add a node to be matched in the origin graph. Specify ids of nodes that
  22     // are expected to be inputs. Returns id of a newly added node.
  23     // TODO: Replace inputs to std::vector<int> in C++11
  24     int addNodeToMatch(const std::string& op, int input_0 = -1, int input_1 = -1,
  25                        int input_2 = -1, int input_3 = -1)
  26     {
  27         int nodeInputs[] = {input_0, input_1, input_2, input_3};
  28         int numInputs = 0;
  29         for (int i = 0; i < 4; ++i)
  30         {
  31             numInputs += (int)(nodeInputs[i] != -1);
  32         }
  33         return addNodeToMatch(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
  34     }
  35
  36     int addNodeToMatch(const std::string& op, const std::vector<int>& inputs_)
  37     {
  38         for (int i = 0; i < inputs_.size(); ++i)
  39         {
  40             CV_Assert(inputs_[i] < (int)nodes.size());
  41         }
  42         nodes.push_back(op);
  43         inputs.push_back(inputs_);
  44         return nodes.size() - 1;
  45     }
  46
  47     // Specify resulting node. All the matched nodes in subgraph excluding
  48     // input nodes will be fused into this single node.
  49     // TODO: Replace inputs to std::vector<int> in C++11
  50     void setFusedNode(const std::string& op, int input_0 = -1, int input_1 = -1,
  51                       int input_2 = -1, int input_3 = -1, int input_4 = -1,
  52                       int input_5 = -1)
  53     {
  54         int nodeInputs[] = {input_0, input_1, input_2, input_3, input_4, input_5};
  55         int numInputs = 0;
  56         for (int i = 0; i < 6; ++i)
  57         {
  58             CV_Assert(nodeInputs[i] < (int)nodes.size());
  59             numInputs += (int)(nodeInputs[i] != -1);
  60         }
  61         setFusedNode(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
  62     }
  63
  64     void setFusedNode(const std::string& op, const std::vector<int>& inputs_)
  65     {
  66         fusedNodeInputs = inputs_;
  67         fusedNodeOp = op;
  68         nodesToFuse.clear();
  69         for (int i = 0; i < nodes.size(); ++i)
  70         {
  71             if (std::find(fusedNodeInputs.begin(), fusedNodeInputs.end(), i) == fusedNodeInputs.end() &&
  72                 nodes[i] != "Const")
  73                 nodesToFuse.push_back(i);
  74         }
  75     }
  76
  77     static const tensorflow::NodeDef& getInputNode(const tensorflow::GraphDef& net,
  78                                                    const tensorflow::NodeDef& node,
  79                                                    int inpId)
  80     {
  81         CV_Assert(inpId < node.input_size());
  82         std::string name = node.input(inpId);
  83         // If operation produces several tensors, they are specified by index
  84         // after ':' character. In example, "input:0".
  85         name = name.substr(0, name.rfind(':'));
  86         const int numNodes = net.node_size();
  87         for (int i = 0; i < numNodes; ++i)
  88         {
  89             if (net.node(i).name() == name)
  90                 return net.node(i);
  91         }
  92         CV_ErrorNoReturn(Error::StsParseError, "Input node with name " + name + " not found");
  93     }
  94
  95     // Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused.
  96     // Const nodes are skipped during matching. Returns true if nodes are matched and can be fused.
  97     virtual bool match(const tensorflow::GraphDef& net, int nodeId, std::vector<int>& matchedNodesIds)
  98     {
  99         matchedNodesIds.clear();
 100         matchedNodesIds.reserve(nodesToFuse.size());
 101
 102         int numNodes = net.node_size();
 103         for (int i = 0; i < nodesToFuse.size(); ++i)
 104         {
 105             while (nodeId < numNodes && net.node(nodeId).op() == "Const")
 106             {
 107                 nodeId += 1;
 108             }
 109             if (nodeId > numNodes - 1)
 110                 return false;
 111
 112             const tensorflow::NodeDef& node = net.node(nodeId);
 113
 114             if (node.op() != nodes[nodesToFuse[i]])
 115                 return false;
 116
 117             std::vector<int>& inputNodes = inputs[nodesToFuse[i]];
 118             if (inputNodes.size() != node.input_size())
 119                 return false;
 120             for (int j = 0; j < inputNodes.size(); ++j)
 121             {
 122                 if (nodes[inputNodes[j]].empty())  // Unknown input node type.
 123                     continue;
 124                 const tensorflow::NodeDef& inpNode = getInputNode(net, node, j);
 125                 if (inpNode.op() != nodes[inputNodes[j]])
 126                     return false;
 127             }
 128
 129             matchedNodesIds.push_back(nodeId);
 130             nodeId += 1;
 131         }
 132         return true;
 133     }
 134
 135     // Fuse matched subgraph.
 136     void replace(tensorflow::GraphDef& net, const std::vector<int>& matchedNodesIds)
 137     {
 138         // Extract names of input nodes.
 139         std::vector<std::string> inputsNames(fusedNodeInputs.size());
 140         for (int i = 0; i < fusedNodeInputs.size(); ++i)
 141         {
 142             std::string inpName;
 143             // Find input node name looking at inputs of fused nodes.
 144             for (int j = 0; j < matchedNodesIds.size() && inpName.empty(); ++j)
 145             {
 146                 const tensorflow::NodeDef &node = net.node(matchedNodesIds[j]);
 147                 std::vector<int>& inpIndices = inputs[nodesToFuse[j]];
 148
 149                 CV_Assert(node.input_size() == inpIndices.size());
 150                 for (int k = 0; k < inpIndices.size(); ++k)
 151                 {
 152                     if (inpIndices[k] == fusedNodeInputs[i])
 153                     {
 154                         inpName = node.input(k);
 155                         break;
 156                     }
 157                 }
 158             }
 159             CV_Assert(!inpName.empty());
 160             inputsNames[i] = inpName;
 161         }
 162
 163         // Remove matched nodes except the last one. Indices in ascending order are expected.
 164         tensorflow::NodeDef* node = net.mutable_node(matchedNodesIds.back());
 165         for (int i = matchedNodesIds.size() - 2; i >= 0; --i)
 166             net.mutable_node()->DeleteSubrange(matchedNodesIds[i], 1);
 167
 168         // Modify the last node to be a fused one.
 169         node->set_op(fusedNodeOp);
 170         node->clear_input();
 171         for (int i = 0; i < inputsNames.size(); ++i)
 172         {
 173             node->add_input(inputsNames[i]);
 174         }
 175
 176         std::vector<tensorflow::NodeDef*> inputNodes(inputsNames.size());
 177         for (int i = 0; i < inputsNames.size(); ++i)
 178         {
 179             inputNodes[i] = (tensorflow::NodeDef*)&getInputNode(net, *node, i);
 180         }
 181         finalize(net, node, inputNodes);
 182     }
 183
 184     virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef*,
 185                           std::vector<tensorflow::NodeDef*>&) {}
 186
 187 private:
 188     std::vector<std::string> nodes;         // Nodes to be matched in the origin graph.
 189     std::vector<std::vector<int> > inputs;  // Connections of an every node to it's inputs.
 190
 191     std::string fusedNodeOp;           // Operation name of resulting fused node.
 192     std::vector<int> nodesToFuse;      // Set of nodes to be fused.
 193     std::vector<int> fusedNodeInputs;  // Inputs of fused node.
 194 };
 195
 196 class BatchNormSubgraph : public Subgraph
 197 {
 198 public:
 199     BatchNormSubgraph()
 200     {
 201         int input = addNodeToMatch("");
 202         int epsilon = addNodeToMatch("Const");
 203         int moving_variance = addNodeToMatch("Const");
 204         int moving_mean = addNodeToMatch("Const");
 205         int beta = addNodeToMatch("Const");
 206         int gamma = addNodeToMatch("Const");
 207         int add = addNodeToMatch("Add", moving_variance, epsilon);
 208         int rsqrt = addNodeToMatch("Rsqrt", add);
 209         int mul = addNodeToMatch("Mul", rsqrt, gamma);
 210         int mul_1 = addNodeToMatch("Mul", input, mul);
 211         int mul_2 = addNodeToMatch("Mul", moving_mean, mul);
 212         int sub = addNodeToMatch("Sub", beta, mul_2);
 213         addNodeToMatch("Add", mul_1, sub);
 214
 215         setFusedNode("FusedBatchNorm", input, gamma, beta, moving_mean, moving_variance, epsilon);
 216     }
 217
 218     virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef* fusedNode,
 219                           std::vector<tensorflow::NodeDef*>& inputNodes) CV_OVERRIDE
 220     {
 221         Mat epsMat = getTensorContent(inputNodes.back()->attr().at("value").tensor());
 222         CV_Assert(epsMat.total() == 1, epsMat.type() == CV_32FC1);
 223
 224         fusedNode->mutable_input()->RemoveLast();
 225         fusedNode->clear_attr();
 226         tensorflow::AttrValue epsilon;
 227         epsilon.set_f(epsMat.at<float>(0));
 228         fusedNode->mutable_attr()->insert(MapPair<std::string, tensorflow::AttrValue>("epsilon", epsilon));
 229     }
 230 };
 231
 232 class BatchNormNoGammaSubgraph : public Subgraph
 233 {
 234 public:
 235     BatchNormNoGammaSubgraph()
 236     {
 237         int input = addNodeToMatch("");
 238         int epsilon = addNodeToMatch("Const");
 239         int moving_variance = addNodeToMatch("Const");
 240         int moving_mean = addNodeToMatch("Const");
 241         int beta = addNodeToMatch("Const");
 242         int add = addNodeToMatch("Add", moving_variance, epsilon);
 243         int rsqrt = addNodeToMatch("Rsqrt", add);
 244         int mul = addNodeToMatch("Mul", input, rsqrt);
 245         int mul_1 = addNodeToMatch("Mul", moving_mean, rsqrt);
 246         int sub = addNodeToMatch("Sub", beta, mul_1);
 247         addNodeToMatch("Add", mul, sub);
 248
 249         // There is a fake reference to beta that will be replaced to a new gamma tensor.
 250         setFusedNode("FusedBatchNorm", input, beta, beta, moving_mean, moving_variance, epsilon);
 251     }
 252
 253     virtual void finalize(tensorflow::GraphDef& net, tensorflow::NodeDef* fusedNode,
 254                           std::vector<tensorflow::NodeDef*>& inputNodes) CV_OVERRIDE
 255     {
 256         Mat epsMat = getTensorContent(inputNodes.back()->attr().at("value").tensor());
 257         CV_Assert(epsMat.total() == 1, epsMat.type() == CV_32FC1);
 258
 259         fusedNode->mutable_input()->RemoveLast();
 260         fusedNode->clear_attr();
 261         tensorflow::AttrValue epsilon;
 262         epsilon.set_f(epsMat.at<float>(0));
 263         fusedNode->mutable_attr()->insert(MapPair<std::string, tensorflow::AttrValue>("epsilon", epsilon));
 264
 265         tensorflow::NodeDef* gamma = net.add_node();
 266         gamma->set_op("Const");
 267         gamma->set_name(fusedNode->name() + "/gamma");
 268         // Just put a single value to recognize this node as Const.
 269         gamma->mutable_attr()->insert(MapPair<std::string, tensorflow::AttrValue>("value", epsilon));
 270         fusedNode->set_input(1, gamma->name());
 271     }
 272 };
 273
 274 // tf.contrib.layers.flatten
 275 class FlattenSubgraph : public Subgraph
 276 {
 277 public:
 278     FlattenSubgraph()
 279     {
 280         int input = addNodeToMatch("");
 281         int shape = addNodeToMatch("Const");
 282         int stack = addNodeToMatch("Const");
 283         int stack_1 = addNodeToMatch("Const");
 284         int stack_2 = addNodeToMatch("Const");
 285         int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
 286         int shape_pack = addNodeToMatch("Const");
 287         int pack = addNodeToMatch("Pack", strided_slice, shape_pack);
 288         addNodeToMatch("Reshape", input, pack);
 289
 290         setFusedNode("Flatten", input);
 291     }
 292 };
 293
 294 // tf.contrib.layers.flatten in case of unknown batch size
 295 class FlattenShapeSubgraph : public Subgraph
 296 {
 297 public:
 298     FlattenShapeSubgraph()
 299     {
 300         int input = addNodeToMatch("");
 301         int shape = addNodeToMatch("Shape", input);
 302         int stack = addNodeToMatch("Const");
 303         int stack_1 = addNodeToMatch("Const");
 304         int stack_2 = addNodeToMatch("Const");
 305         int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
 306         int shape_pack = addNodeToMatch("Const");
 307         int pack = addNodeToMatch("Pack", strided_slice, shape_pack);
 308         addNodeToMatch("Reshape", input, pack);
 309
 310         setFusedNode("Flatten", input);
 311     }
 312 };
 313
 314 // K.layers.Softmax
 315 class SoftMaxKerasSubgraph : public Subgraph
 316 {
 317 public:
 318     SoftMaxKerasSubgraph()
 319     {
 320         int input = addNodeToMatch("");
 321         int maxReductionIndices = addNodeToMatch("Const");
 322         int smMax = addNodeToMatch("Max", input, maxReductionIndices);
 323         int smSub = addNodeToMatch("Sub", input, smMax);
 324         int smExp = addNodeToMatch("Exp", smSub);
 325         int sumReductionIndices = addNodeToMatch("Const");
 326         int smSum = addNodeToMatch("Sum", smExp, sumReductionIndices);
 327         addNodeToMatch("RealDiv", smExp, smSum);
 328
 329         setFusedNode("Softmax", input);
 330     }
 331 };
 332
 333 class ReLU6KerasSubgraph : public Subgraph
 334 {
 335 public:
 336     ReLU6KerasSubgraph()
 337     {
 338         int input = addNodeToMatch("");
 339         int relu = addNodeToMatch("Relu", input);
 340         int maxValue = addNodeToMatch("Const");
 341         int clipValue = addNodeToMatch("Const");
 342         int minimum = addNodeToMatch("Minimum", relu, maxValue);
 343         addNodeToMatch("Maximum", minimum, clipValue);
 344
 345         setFusedNode("Relu6", input);
 346     }
 347
 348     virtual bool match(const tensorflow::GraphDef& net, int nodeId, std::vector<int>& matchedNodesIds) CV_OVERRIDE
 349     {
 350         if (!Subgraph::match(net, nodeId, matchedNodesIds))
 351             return false;
 352         Mat maxValue = getTensorContent(net.node(nodeId + 1).attr().at("value").tensor());
 353         return maxValue.type() == CV_32FC1 && maxValue.total() == 1 && maxValue.at<float>(0) == 6;
 354     }
 355 };
 356
 357 // Keras' reshape stores output shape in separate Const nodes by one value.
 358 // Need to merge them into a single Const node.
 359 class ReshapeKerasSubgraph : public Subgraph
 360 {
 361 public:
 362     ReshapeKerasSubgraph(int _numOutDims) : numOutDims(_numOutDims)
 363     {
 364         int input = addNodeToMatch("");
 365         int shape = addNodeToMatch("Shape", input);
 366         int stack = addNodeToMatch("Const");
 367         int stack_1 = addNodeToMatch("Const");
 368         int stack_2 = addNodeToMatch("Const");
 369         int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
 370
 371         std::vector<int> ids(1 + numOutDims);
 372         ids[0] = strided_slice;
 373         for (int i = 0; i < numOutDims; ++i)
 374             ids[1 + i] = addNodeToMatch("Const");
 375         int pack = addNodeToMatch("Pack", ids);
 376         addNodeToMatch("Reshape", input, pack);
 377
 378         ids[0] = input;
 379         setFusedNode("Reshape", ids);
 380     }
 381
 382     virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef* fusedNode,
 383                           std::vector<tensorflow::NodeDef*>& inputNodes) CV_OVERRIDE
 384     {
 385         std::vector<int> shape(numOutDims + 1);  // batch size in Keras is implicit.
 386         shape[0] = -1;
 387         for (int i = 0; i < numOutDims; ++i)
 388         {
 389             shape[1 + i] = inputNodes[1 + i]->attr().at("value").tensor().int_val(0);
 390         }
 391         tensorflow::TensorProto* shapeTensor = inputNodes[1]->mutable_attr()->at("value").mutable_tensor();
 392         fusedNode->mutable_input()->DeleteSubrange(2, numOutDims - 1);
 393
 394         shapeTensor->clear_int_val();
 395         for (int i = 0; i < shape.size(); ++i)
 396         {
 397             shapeTensor->add_int_val(shape[i]);
 398         }
 399     }
 400
 401 private:
 402     int numOutDims;
 403 };
 404
 405 class L2NormalizeSubgraph : public Subgraph
 406 {
 407 public:
 408     L2NormalizeSubgraph()
 409     {
 410         int input = addNodeToMatch("");
 411         int square = addNodeToMatch("Square", input);
 412         int reductionIndices = addNodeToMatch("Const");
 413         int sum = addNodeToMatch("Sum", square, reductionIndices);
 414         int y = addNodeToMatch("Const");
 415         int maximum = addNodeToMatch("Maximum", sum, y);
 416         int rsqrt = addNodeToMatch("Rsqrt", maximum);
 417         addNodeToMatch("Mul", input, rsqrt);
 418         setFusedNode("L2Normalize", input, reductionIndices);
 419     }
 420 };
 421
 422 void simplifySubgraphs(tensorflow::GraphDef& net)
 423 {
 424     std::vector<Ptr<Subgraph> > subgraphs;
 425     subgraphs.push_back(Ptr<Subgraph>(new BatchNormSubgraph()));
 426     subgraphs.push_back(Ptr<Subgraph>(new BatchNormNoGammaSubgraph()));
 427     subgraphs.push_back(Ptr<Subgraph>(new FlattenSubgraph()));
 428     subgraphs.push_back(Ptr<Subgraph>(new FlattenShapeSubgraph()));
 429     subgraphs.push_back(Ptr<Subgraph>(new SoftMaxKerasSubgraph()));
 430     subgraphs.push_back(Ptr<Subgraph>(new ReLU6KerasSubgraph()));
 431     subgraphs.push_back(Ptr<Subgraph>(new ReshapeKerasSubgraph(3)));
 432     subgraphs.push_back(Ptr<Subgraph>(new L2NormalizeSubgraph()));
 433
 434     int numNodes = net.node_size();
 435     std::vector<int> matchedNodesIds;
 436     for (int i = 0; i < numNodes; ++i)
 437     {
 438         for (int j = 0; j < subgraphs.size(); ++j)
 439         {
 440             if (subgraphs[j]->match(net, i, matchedNodesIds))
 441             {
 442                 subgraphs[j]->replace(net, matchedNodesIds);
 443                 numNodes -= matchedNodesIds.size() - 1;  // #matchedNodes removed and one added.
 444                 break;
 445             }
 446         }
 447     }
 448 }
 449
 450 void RemoveIdentityOps(tensorflow::GraphDef& net)
 451 {
 452     typedef std::map<String, String>  IdentityOpsMap;
 453     IdentityOpsMap identity_ops;
 454
 455     std::vector<int> identity_ops_idx;
 456
 457     int layersCount = net.node_size();
 458     for (int li = 0; li < layersCount; li++)
 459     {
 460         const tensorflow::NodeDef &layer = net.node(li);
 461         String type = layer.op();
 462
 463         if (type == "Identity" || type == "Dropout") {
 464             identity_ops_idx.push_back(li);
 465             identity_ops[layer.name()] = layer.input(0);
 466         }
 467     }
 468
 469     for (int li = 0; li < layersCount; li++)
 470     {
 471         tensorflow::NodeDef* layer = net.mutable_node(li);
 472         for (int input_id = 0; input_id < layer->input_size(); input_id++) {
 473             String input_op_name = layer->input(input_id);
 474             IdentityOpsMap::iterator it = identity_ops.find(input_op_name);
 475
 476             if (it != identity_ops.end()) {
 477                 layer->set_input(input_id, it->second);
 478             }
 479         }
 480     }
 481
 482     std::sort(identity_ops_idx.begin(), identity_ops_idx.end());
 483
 484     int removed_nodes = 0;
 485     for(size_t i = 0; i < identity_ops_idx.size(); i++) {
 486         int start_id = identity_ops_idx[i] - removed_nodes;
 487         net.mutable_node()->DeleteSubrange(start_id, 1);
 488         removed_nodes++;
 489     }
 490 }
 491
 492 Mat getTensorContent(const tensorflow::TensorProto &tensor)
 493 {
 494     std::string content = tensor.tensor_content();
 495     switch (tensor.dtype())
 496     {
 497         case tensorflow::DT_FLOAT:
 498         {
 499             if (!content.empty())
 500                 return Mat(1, content.size() / sizeof(float), CV_32FC1, (void*)content.c_str()).clone();
 501             else
 502             {
 503                 const RepeatedField<float>& field = tensor.float_val();
 504                 CV_Assert(!field.empty());
 505                 return Mat(1, field.size(), CV_32FC1, (void*)field.data()).clone();
 506             }
 507         }
 508         case tensorflow::DT_DOUBLE:
 509         {
 510             if (!content.empty())
 511                 return Mat(1, content.size() / sizeof(double), CV_64FC1, (void*)content.c_str()).clone();
 512             else
 513             {
 514                 const RepeatedField<double>& field = tensor.double_val();
 515                 CV_Assert(!field.empty());
 516                 return Mat(1, field.size(), CV_64FC1, (void*)field.data()).clone();
 517             }
 518         }
 519         case tensorflow::DT_INT32:
 520         {
 521             if (!content.empty())
 522                 return Mat(1, content.size() / sizeof(int32_t), CV_32SC1, (void*)content.c_str()).clone();
 523             else
 524             {
 525                 const RepeatedField<int32_t>& field = tensor.int_val();
 526                 CV_Assert(!field.empty());
 527                 return Mat(1, field.size(), CV_32SC1, (void*)field.data()).clone();
 528             }
 529         }
 530         case tensorflow::DT_HALF:
 531         {
 532             Mat halfs;
 533             if (!content.empty())
 534             {
 535                 static const int kHalfSize = 2;
 536                 halfs = Mat(1, content.size() / kHalfSize, CV_16UC1, (void*)content.c_str());
 537             }
 538             else
 539             {
 540                 const RepeatedField<int32_t>& field = tensor.half_val();
 541                 CV_Assert(!field.empty());
 542                 Mat ints(1, field.size(), CV_32SC1, (void*)field.data());
 543                 ints.convertTo(halfs, CV_16UC1);
 544             }
 545             // Reinterpret as a signed shorts just for a convertFp16 call.
 546             Mat halfsSigned(halfs.size(), CV_16SC1, halfs.data);
 547             Mat floats(halfs.size(), CV_32FC1);
 548             convertFp16(halfsSigned, floats);
 549             return floats;
 550         }
 551         case tensorflow::DT_QUINT8:
 552         {
 553             CV_Assert(!content.empty());
 554             return Mat(1, content.size(), CV_8UC1, (void*)content.c_str()).clone();
 555         }
 556         default:
 557             CV_Error(Error::StsError, "Tensor's data type is not supported");
 558             break;
 559     }
 560     return Mat();
 561 }
 562
 563 CV__DNN_EXPERIMENTAL_NS_END
 564 }}  // namespace dnn, namespace cv
 565
 566 #endif  // HAVE_PROTOBUF