1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
9 Implementation of Tensorflow models parser
12 #include "../precomp.hpp"
22 #include "tf_graph_simplifier.hpp"
27 CV__DNN_EXPERIMENTAL_NS_BEGIN
31 using ::google::protobuf::RepeatedField;
32 using ::google::protobuf::RepeatedPtrField;
33 using ::google::protobuf::Message;
34 using ::google::protobuf::Descriptor;
35 using ::google::protobuf::FieldDescriptor;
36 using ::google::protobuf::Reflection;
41 static int toNCHW(int idx)
43 CV_Assert(-4 <= idx && idx < 4);
44 if (idx == 0) return 0;
45 else if (idx > 0) return idx % 3 + 1;
46 else return (4 + idx) % 3 + 1;
49 // This values are used to indicate layer output's data layout where it's possible.
55 DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d)
58 typedef std::vector<std::pair<String, int> > StrIntVector;
62 Pin(const std::string &_name, int _blobIndex = 0) :
63 name(_name), blobIndex(_blobIndex) {}
66 name(""), blobIndex(-1) {}
72 void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape)
75 if (tensor.has_tensor_shape())
77 const tensorflow::TensorShapeProto &_shape = tensor.tensor_shape();
78 int i, n = _shape.dim_size();
83 for (i = 0; i < n; i++)
84 shape[i] = (int)_shape.dim(i).size();
87 shape.resize(1, 1); // Scalar.
91 CV_Error(Error::StsError, "Unknown shape of input tensor");
96 void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
99 blobShapeFromTensor(tensor, shape);
100 int dims = (int)shape.size();
104 // REORDER blob NHWC to NCHW
105 swap(shape[2], shape[3]); // NHCW
106 swap(shape[1], shape[2]); // NCHW
109 dstBlob.create(shape, CV_32F);
111 Mat tensorContent = getTensorContent(tensor);
112 int size = tensorContent.total();
113 CV_Assert(size == (int)dstBlob.total());
115 float *dstData = dstBlob.ptr<float>();
116 const T *data = reinterpret_cast<const T*>(tensorContent.data);
120 int num = shape[0], channels = shape[1], height = shape[2], width = shape[3];
121 int total = num*channels*height*width;
122 for(int i_n = 0; i_n < shape[0]; i_n++) {
123 for(int i_c = 0; i_c < shape[1]; i_c++) {
124 for(int i_h = 0; i_h < shape[2]; i_h++) {
125 for(int i_w = 0; i_w < shape[3]; i_w++) {
126 int dst_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w;
127 int src_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w;
129 CV_Assert(dst_i < total);
130 CV_Assert(src_i < total);
132 dstData[dst_i] = data[src_i];
138 for (int i = 0; i < size; i++)
139 dstData[i] = data[i];
143 void blobFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
145 switch (tensor.dtype()) {
146 case tensorflow::DT_FLOAT:
147 case tensorflow::DT_HALF:
148 parseTensor<float>(tensor, dstBlob);
150 case tensorflow::DT_DOUBLE:
151 parseTensor<double>(tensor, dstBlob);
154 CV_Error(Error::StsError, "Tensor's data type is not supported");
159 void printList(const tensorflow::AttrValue::ListValue &val)
162 for (int i = 0; i < val.i_size(); i++)
163 std::cout << " " << val.i(i);
167 void printTensorShape(const tensorflow::TensorShapeProto &shape)
170 for (int d = 0; d < shape.dim_size(); d++)
171 std::cout << shape.dim(d).name() <<
172 ":" << shape.dim(d).size() << " ";
176 void printTensor(const tensorflow::TensorProto &tensor)
178 printTensorShape(tensor.tensor_shape());
180 if (tensor.tensor_content().empty())
183 switch (tensor.dtype())
185 case tensorflow::DT_FLOAT:
187 const float *data = reinterpret_cast<const float*>(tensor.tensor_content().c_str());
188 int size = tensor.tensor_content().size() / sizeof(float);
189 for (int i = 0; i < std::min(10, size); i++)
190 std::cout << " " << data[i];
192 std::cout << " ... " << size - 10 << " more";
195 case tensorflow::DT_INT32:
197 const int *data = reinterpret_cast<const int*>(tensor.tensor_content().c_str());
198 int size = tensor.tensor_content().size() / sizeof(int);
199 for (int i = 0; i < std::min(10, size); i++)
200 std::cout << " " << data[i];
202 std::cout << " ... " << size - 10 << " more";
206 CV_Error(Error::StsError, "Tensor type is not supported");
211 void printLayerAttr(const tensorflow::NodeDef &layer)
213 std::cout << std::endl << layer.name() << ":" << layer.op();
214 for (int ii = 0; ii < layer.input_size(); ii++)
215 std::cout << "(" << layer.input(ii) << ")";
216 std::cout << std::endl;
217 google::protobuf::Map<std::string, tensorflow::AttrValue> attr
219 for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
220 ai != attr.end(); ++ai)
222 std::cout << ai->first << ":";
223 if (ai->first == "dtype" || ai->first == "T")
224 std::cout << ai->second.i();
225 else if (ai->first == "padding")
226 std::cout << ai->second.s();
227 else if (ai->first == "transpose_a" || ai->first == "transpose_b")
228 std::cout << ai->second.b();
229 // else if (ai->first == "shape")
230 // printTensorShape(ai->second.shape());
231 else if (ai->first == "strides" || ai->first == "ksize")
232 printList(ai->second.list());
234 printTensor(ai->second.tensor());
235 std::cout << std::endl;
239 bool hasLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
241 google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
242 return attr.find(name) != attr.end();
245 const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
247 return layer.attr().at(name);
250 static int getDataLayout(const tensorflow::NodeDef& layer)
252 if (hasLayerAttr(layer, "data_format"))
254 std::string format = getLayerAttr(layer, "data_format").s();
255 if (format == "NHWC" || format == "channels_last")
256 return DATA_LAYOUT_NHWC;
257 else if (format == "NCHW" || format == "channels_first")
258 return DATA_LAYOUT_NCHW;
260 CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
262 return DATA_LAYOUT_UNKNOWN;
265 static inline std::string getNodeName(const std::string& tensorName)
267 return tensorName.substr(0, tensorName.rfind(':'));
270 static inline int getDataLayout(const std::string& layerName,
271 const std::map<String, int>& data_layouts)
273 std::map<String, int>::const_iterator it = data_layouts.find(getNodeName(layerName));
274 return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN;
277 void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
279 if (hasLayerAttr(layer, "strides"))
281 const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
282 int dimX, dimY, dimC;
283 int layout = getDataLayout(layer);
284 if (layout == DATA_LAYOUT_NCHW)
286 dimC = 1; dimY = 2; dimX = 3;
290 dimY = 1; dimX = 2; dimC = 3;
292 if (val.list().i_size() != 4 ||
293 val.list().i(0) != 1 || val.list().i(dimC) != 1)
294 CV_Error(Error::StsError, "Unsupported strides");
295 layerParams.set("stride_h", static_cast<int>(val.list().i(dimY)));
296 layerParams.set("stride_w", static_cast<int>(val.list().i(dimX)));
300 DictValue parseDims(const tensorflow::TensorProto &tensor) {
302 blobShapeFromTensor(tensor, shape);
303 int dims = (int)shape.size();
305 CV_Assert(tensor.dtype() == tensorflow::DT_INT32);
306 CV_Assert(dims == 1);
308 Mat values = getTensorContent(tensor);
309 CV_Assert(values.type() == CV_32SC1);
310 // TODO: add reordering shape if dims == 4
311 return DictValue::arrayInt((int*)values.data, values.total());
314 void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
316 if (hasLayerAttr(layer, "ksize"))
318 const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
319 int dimX, dimY, dimC;
320 int layout = getDataLayout(layer);
321 if (layout == DATA_LAYOUT_NCHW)
323 dimC = 1; dimY = 2; dimX = 3;
327 dimY = 1; dimX = 2; dimC = 3;
329 if (val.list().i_size() != 4 ||
330 val.list().i(0) != 1 || val.list().i(dimC) != 1)
331 CV_Error(Error::StsError, "Unsupported ksize");
332 layerParams.set("kernel_h", static_cast<int>(val.list().i(dimY)));
333 layerParams.set("kernel_w", static_cast<int>(val.list().i(dimX)));
337 layerParams.set("kernel_h", 1);
338 layerParams.set("kernel_w", 1);
342 void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
344 if (hasLayerAttr(layer, "padding"))
345 layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
348 Pin parsePin(const std::string &name)
352 size_t delimiter_pos = name.find_first_of(":");
353 if (delimiter_pos != std::string::npos)
355 pin.name = name.substr(0, delimiter_pos);
356 std::istringstream(name.substr(delimiter_pos + 1)) >> pin.blobIndex;
362 StrIntVector getNextLayers(const tensorflow::GraphDef& net, const String& layer_name, const String& type = "")
366 for (int li = 0; li < net.node_size(); li++)
368 const tensorflow::NodeDef& layer = net.node(li);
369 for (int input_id = 0; input_id < layer.input_size(); input_id++) {
370 String input_op_name = parsePin(layer.input(input_id)).name;
371 bool type_ok = type.empty() ? true : type == layer.op();
372 if (input_op_name == layer_name && type_ok)
373 layers.push_back(std::make_pair(layer.name(), li));
380 void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int input_blob_index, bool remove_from_net = true) {
381 String layer_name = net.node(layer_index).name();
382 StrIntVector layers = getNextLayers(net, layer_name);
384 String removed_layer_input = net.node(layer_index).input(input_blob_index);
386 for (size_t i = 0; i < layers.size(); i++)
388 tensorflow::NodeDef* layer = net.mutable_node(layers[i].second);
389 for (int input_id = 0; input_id < layer->input_size(); input_id++) {
390 String input_op_name = layer->input(input_id);
392 if (input_op_name == layer_name) {
393 layer->set_input(input_id, removed_layer_input);
399 net.mutable_node()->DeleteSubrange(layer_index, 1);
404 TFImporter(const char *model, const char *config = NULL);
405 TFImporter(const char *dataModel, size_t lenModel,
406 const char *dataConfig = NULL, size_t lenConfig = 0);
408 void populateNet(Net dstNet);
411 void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob);
413 void connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
414 const int input_layer_id, const int input_blob_id);
415 void connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
416 const int input_layer_id, const int input_blobs_count);
417 const tensorflow::TensorProto& getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
418 int input_blob_index = -1, int* actual_inp_blob_idx = 0);
421 // Binary serialized TensorFlow graph includes weights.
422 tensorflow::GraphDef netBin;
423 // Optional text definition of TensorFlow graph. More flexible than binary format
424 // and may be used to build the network using binary format only as a weights storage.
425 // This approach is similar to Caffe's `.prorotxt` and `.caffemodel`.
426 tensorflow::GraphDef netTxt;
428 std::vector<String> netInputsNames;
431 TFImporter::TFImporter(const char *model, const char *config)
433 if (model && model[0])
434 ReadTFNetParamsFromBinaryFileOrDie(model, &netBin);
435 if (config && config[0])
436 ReadTFNetParamsFromTextFileOrDie(config, &netTxt);
439 TFImporter::TFImporter(const char *dataModel, size_t lenModel,
440 const char *dataConfig, size_t lenConfig)
442 if (dataModel != NULL && lenModel > 0)
443 ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin);
444 if (dataConfig != NULL && lenConfig > 0)
445 ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt);
448 void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
451 blobShapeFromTensor(tensor, shape);
452 int dims = (int)shape.size();
454 // TODO: other blob types
455 CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||
456 tensor.dtype() == tensorflow::DT_HALF);
457 CV_Assert(dims == 4);
459 // REORDER kernel HWIO to OIHW
460 swap(shape[0], shape[2]); // IWHO
461 swap(shape[1], shape[3]); // IOHW
462 swap(shape[0], shape[1]); // OIHW
464 dstBlob.create(shape, CV_32F);
466 Mat tensorContent = getTensorContent(tensor);
467 int size = tensorContent.total();
468 CV_Assert(size == (int)dstBlob.total());
470 float *dstData = dstBlob.ptr<float>();
471 const float *data = reinterpret_cast<const float*>(tensorContent.data);
473 int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3];
474 int total = out_c*input_c*height*width;
475 for(int i_oc = 0; i_oc < out_c; i_oc++) {
476 for(int i_ic = 0; i_ic < input_c; i_ic++) {
477 for(int i_h = 0; i_h < height; i_h++) {
478 for(int i_w = 0; i_w < width; i_w++) {
479 int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w;
480 int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc;
481 CV_Assert(dst_i < total);
482 CV_Assert(src_i < total);
483 dstData[dst_i] = data[src_i];
490 void TFImporter::connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
491 const int input_layer_id, const int input_blob_id)
493 std::map<String, int>::const_iterator it = layers_name_id_map.find(outPin.name);
494 if (it == layers_name_id_map.end())
495 CV_Error(Error::StsError, "Input layer not found: " + outPin.name);
497 std::vector<String>::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name);
499 if (inpNameIt == netInputsNames.end())
500 blobIndex = outPin.blobIndex;
502 blobIndex = inpNameIt - netInputsNames.begin();
503 network.connect(it->second, blobIndex, input_layer_id, input_blob_id);
506 void TFImporter::connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
507 const int input_layer_id, const int input_blobs_count)
509 for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++)
510 connect(layer_id, network, outPin, input_layer_id, input_blob_id);
513 const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
514 int input_blob_index, int* actual_inp_blob_idx) {
515 if (input_blob_index == -1) {
516 for(int i = 0; i < layer.input_size(); i++) {
517 Pin input = parsePin(layer.input(i));
518 if (const_layers.find(input.name) != const_layers.end()) {
519 if (input_blob_index != -1)
520 CV_Error(Error::StsError, "More than one input is Const op");
522 input_blob_index = i;
527 if (input_blob_index == -1)
528 CV_Error(Error::StsError, "Const input blob for weights not found");
530 Pin kernel_inp = parsePin(layer.input(input_blob_index));
531 if (const_layers.find(kernel_inp.name) == const_layers.end())
532 CV_Error(Error::StsError, "Const kernel input not found");
533 if (kernel_inp.blobIndex != 0)
534 CV_Error(Error::StsError, "Unsupported kernel input");
536 if(actual_inp_blob_idx) {
537 *actual_inp_blob_idx = input_blob_index;
540 int nodeIdx = const_layers.at(kernel_inp.name);
541 if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name)
543 return netBin.node(nodeIdx).attr().at("value").tensor();
547 CV_Assert(nodeIdx < netTxt.node_size(),
548 netTxt.node(nodeIdx).name() == kernel_inp.name);
549 return netTxt.node(nodeIdx).attr().at("value").tensor();
553 static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& const_layers,
554 std::set<String>& layers_to_ignore)
556 for (int li = 0; li < net.node_size(); li++)
558 const tensorflow::NodeDef &layer = net.node(li);
559 String name = layer.name();
560 String type = layer.op();
562 if (type == "Dequantize")
564 // Example of Dequantize node:
565 // name: "conv2d_1/bias"
567 // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8)
568 // input: "conv2d_1/bias_quantized_min"
569 // input: "conv2d_1/bias_quantized_max"
570 // attr { key: "T" value { type: DT_QUINT8 } } (quantized type)
571 // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique)
572 CV_Assert(layer.input_size() == 3);
573 for (int i = 0; i < 3; ++i)
574 CV_Assert(const_layers.find(layer.input(i)) != const_layers.end());
575 CV_Assert(hasLayerAttr(layer, "mode") &&
576 getLayerAttr(layer, "mode").s() == "MIN_FIRST");
578 int tensorId = const_layers[layer.input(0)];
579 int minId = const_layers[layer.input(1)];
580 int maxId = const_layers[layer.input(2)];
582 tensorflow::TensorProto* tensor = net.mutable_node(tensorId)
583 ->mutable_attr()->at("value")
585 CV_Assert(tensor->dtype() == tensorflow::DT_QUINT8);
587 Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor());
588 Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor());
589 CV_Assert(qMin.total() == 1, qMin.type() == CV_32FC1,
590 qMax.total() == 1, qMax.type() == CV_32FC1);
592 Mat content = getTensorContent(*tensor);
594 float minVal = qMin.at<float>(0);
595 float rangeScale = (qMax.at<float>(0) - minVal) / 255;
596 CV_Assert(rangeScale >= 0);
597 content.convertTo(content, CV_32FC1, rangeScale,
598 rangeScale * cvRound(minVal / rangeScale));
600 tensor->set_dtype(tensorflow::DT_FLOAT);
601 tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
603 net.mutable_node(tensorId)->set_name(name);
604 CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
605 layers_to_ignore.insert(name);
608 else if (type != "Const")
609 continue; // only Const parameters are supported
611 if (layer.attr().find("value") != layer.attr().end())
613 CV_Assert(const_layers.insert(std::make_pair(name, li)).second);
615 layers_to_ignore.insert(name);
619 // If all inputs of specific layer have the same data layout we can say that
620 // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
621 static int predictOutputDataLayout(const tensorflow::GraphDef& net,
622 const tensorflow::NodeDef& layer,
623 const std::map<String, int>& data_layouts)
625 int layout = getDataLayout(layer);
626 if (layout != DATA_LAYOUT_UNKNOWN)
629 // Determine layout by layer's inputs
630 std::map<String, int>::const_iterator it;
631 for (int i = 0, n = layer.input_size(); i < n; ++i)
633 it = data_layouts.find(getNodeName(layer.input(i)));
634 if (it != data_layouts.end())
636 if (layout != DATA_LAYOUT_UNKNOWN)
638 if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN)
639 return DATA_LAYOUT_UNKNOWN;
646 if (layout != DATA_LAYOUT_UNKNOWN)
649 // Determine layout by layer's consumers recursively.
650 it = data_layouts.find(layer.name());
651 CV_Assert(it != data_layouts.end());
655 void TFImporter::populateNet(Net dstNet)
657 RemoveIdentityOps(netBin);
658 RemoveIdentityOps(netTxt);
660 if (!netTxt.ByteSize())
661 simplifySubgraphs(netBin);
663 std::set<String> layers_to_ignore;
665 tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
667 int layersSize = net.node_size();
669 std::map<String, int> data_layouts;
670 // Pre-fill data layouts where they are set explicitly.
671 // Assuming that nodes are in topological order
672 for (int i = net.node_size() - 1; i >= 0; --i)
674 const tensorflow::NodeDef& layer = net.node(i);
675 std::string name = layer.name();
677 int layout = getDataLayout(layer);
678 std::map<String, int>::iterator it = data_layouts.find(name);
679 if (it != data_layouts.end())
681 if (layout != DATA_LAYOUT_UNKNOWN)
683 if (it->second == DATA_LAYOUT_UNKNOWN)
685 else if (it->second != layout)
687 it->second = DATA_LAYOUT_UNKNOWN;
688 layout = DATA_LAYOUT_UNKNOWN;
695 data_layouts[name] = layout;
697 // Specify input layers to have the same data layout.
698 for (int j = 0; j < layer.input_size(); ++j)
700 name = getNodeName(layer.input(j));
701 it = data_layouts.find(name);
702 if (it != data_layouts.end())
704 if (layout != DATA_LAYOUT_UNKNOWN)
706 if (it->second == DATA_LAYOUT_UNKNOWN)
708 else if (it->second != layout)
709 it->second = DATA_LAYOUT_UNKNOWN;
713 data_layouts[name] = layout;
717 // find all Const layers for params
718 std::map<String, int> value_id;
719 addConstNodes(netBin, value_id, layers_to_ignore);
720 addConstNodes(netTxt, value_id, layers_to_ignore);
722 std::map<String, int> layer_id;
724 for (int li = 0; li < layersSize; li++)
726 tensorflow::NodeDef layer = net.node(li);
727 String name = layer.name();
728 String type = layer.op();
729 LayerParams layerParams;
731 if(layers_to_ignore.find(name) != layers_to_ignore.end())
734 int predictedLayout = predictOutputDataLayout(net, layer, data_layouts);
735 data_layouts[name] = predictedLayout;
737 if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative")
739 // The first node of dilated convolution subgraph.
740 // Extract input node, dilation rate and paddings.
741 std::string input = layer.input(0);
742 if (type == "SpaceToBatchND")
744 // op: "SpaceToBatchND"
746 // input: "SpaceToBatchND/block_shape"
747 // input: "SpaceToBatchND/paddings"
748 CV_Assert(layer.input_size() == 3);
750 DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
751 CV_Assert(dilation.size() == 2);
752 layerParams.set("dilation_h", dilation.get<int>(0));
753 layerParams.set("dilation_w", dilation.get<int>(1));
756 parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
758 // paddings is a 2x2 matrix: [[top, bot], [left, right]]
759 layerParams.set("pad_h", paddings.at<float>(0));
760 layerParams.set("pad_w", paddings.at<float>(2));
762 StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
763 if (next_layers.empty())
765 next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
767 CV_Assert(next_layers.size() == 1);
768 layer = net.node(next_layers[0].second);
769 layers_to_ignore.insert(next_layers[0].first);
774 // For the object detection networks, TensorFlow Object Detection API
775 // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
776 // order. We can manage it at DetectionOutput layer parsing predictions
777 // or shuffle last convolution's weights.
778 bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&
779 getLayerAttr(layer, "loc_pred_transposed").b();
781 layerParams.set("bias_term", false);
782 layerParams.blobs.resize(1);
784 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
785 if (next_layers.size() == 1) {
786 layerParams.set("bias_term", true);
787 layerParams.blobs.resize(2);
789 int weights_layer_index = next_layers[0].second;
791 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
792 ExcludeLayer(net, weights_layer_index, 0, false);
793 layers_to_ignore.insert(next_layers[0].first);
795 // Shuffle bias from yxYX to xyXY.
796 if (locPredTransposed)
798 const int numWeights = layerParams.blobs[1].total();
799 float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);
800 CV_Assert(numWeights % 4 == 0);
801 for (int i = 0; i < numWeights; i += 2)
803 std::swap(biasData[i], biasData[i + 1]);
808 const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id);
809 kernelFromTensor(kernelTensor, layerParams.blobs[0]);
810 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
811 int* kshape = layerParams.blobs[0].size.p;
812 const int outCh = kshape[0];
813 const int inCh = kshape[1];
814 const int height = kshape[2];
815 const int width = kshape[3];
816 if (type == "DepthwiseConv2dNative")
818 CV_Assert(!locPredTransposed);
819 const int chMultiplier = kshape[0];
821 Mat copy = layerParams.blobs[0].clone();
822 float* src = (float*)copy.data;
823 float* dst = (float*)layerParams.blobs[0].data;
824 for (int i = 0; i < chMultiplier; ++i)
825 for (int j = 0; j < inCh; ++j)
826 for (int s = 0; s < height * width; ++s)
828 int src_i = (i * inCh + j) * height * width + s;
829 int dst_i = (j * chMultiplier + i) * height* width + s;
830 dst[dst_i] = src[src_i];
832 // TODO Use reshape instead
833 kshape[0] = inCh * chMultiplier;
835 size_t* kstep = layerParams.blobs[0].step.p;
836 kstep[0] = kstep[1]; // fix steps too
838 layerParams.set("kernel_h", height);
839 layerParams.set("kernel_w", width);
840 layerParams.set("num_output", outCh);
842 // Shuffle output channels from yxYX to xyXY.
843 if (locPredTransposed)
845 const int slice = height * width * inCh;
846 for (int i = 0; i < outCh; i += 2)
848 cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i));
849 cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i + 1));
850 std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());
854 setStrides(layerParams, layer);
855 setPadding(layerParams, layer);
857 // The final node of dilated convolution subgraph.
858 next_layers = getNextLayers(net, name, "BatchToSpaceND");
859 if (!next_layers.empty())
861 layerParams.set("pad_mode", ""); // We use padding values.
862 CV_Assert(next_layers.size() == 1);
863 ExcludeLayer(net, next_layers[0].second, 0, false);
864 layers_to_ignore.insert(next_layers[0].first);
867 int id = dstNet.addLayer(name, "Convolution", layerParams);
871 connect(layer_id, dstNet, parsePin(input), id, 0);
874 if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN)
875 data_layouts[name] = DATA_LAYOUT_NHWC;
877 else if (type == "BiasAdd" || type == "Add")
879 bool haveConst = false;
880 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
882 Pin input = parsePin(layer.input(ii));
883 haveConst = value_id.find(input.name) != value_id.end();
885 CV_Assert(!haveConst || layer.input_size() == 2);
889 Mat values = getTensorContent(getConstBlob(layer, value_id));
890 CV_Assert(values.type() == CV_32FC1);
893 if (values.total() == 1) // is a scalar.
895 layerParams.set("shift", values.at<float>(0));
896 id = dstNet.addLayer(name, "Power", layerParams);
900 layerParams.blobs.resize(1, values);
901 id = dstNet.addLayer(name, "Shift", layerParams);
906 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
910 layerParams.set("operation", "sum");
911 int id = dstNet.addLayer(name, "Eltwise", layerParams);
914 for (int ii = 0; ii < layer.input_size(); ii++)
916 Pin inp = parsePin(layer.input(ii));
917 if (layer_id.find(inp.name) == layer_id.end())
918 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
919 connect(layer_id, dstNet, inp, id, ii);
923 else if (type == "Sub")
925 bool haveConst = false;
926 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
928 Pin input = parsePin(layer.input(ii));
929 haveConst = value_id.find(input.name) != value_id.end();
931 CV_Assert(haveConst);
933 Mat values = getTensorContent(getConstBlob(layer, value_id));
934 CV_Assert(values.type() == CV_32FC1);
938 if (values.total() == 1) // is a scalar.
940 layerParams.set("shift", values.at<float>(0));
941 id = dstNet.addLayer(name, "Power", layerParams);
945 layerParams.blobs.resize(1, values);
946 id = dstNet.addLayer(name, "Shift", layerParams);
951 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
953 else if (type == "MatMul")
955 CV_Assert(layer.input_size() == 2);
957 layerParams.set("bias_term", false);
958 layerParams.blobs.resize(1);
960 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
961 if (next_layers.empty())
963 next_layers = getNextLayers(net, name, "Add");
965 if (next_layers.size() == 1) {
966 layerParams.set("bias_term", true);
967 layerParams.blobs.resize(2);
969 int weights_layer_index = next_layers[0].second;
970 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
971 ExcludeLayer(net, weights_layer_index, 0, false);
972 layers_to_ignore.insert(next_layers[0].first);
975 int kernel_blob_index = -1;
976 const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index);
977 blobFromTensor(kernelTensor, layerParams.blobs[0]);
978 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
980 if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed
981 Mat data = layerParams.blobs[0].t();
982 layerParams.blobs[0] = data.clone();
985 layerParams.set("num_output", layerParams.blobs[0].size[0]);
987 int id = dstNet.addLayer(name, "InnerProduct", layerParams);
991 int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
992 connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
993 data_layouts[name] = DATA_LAYOUT_PLANAR;
995 else if (type == "Reshape")
997 Pin inpId = parsePin(layer.input(0));
998 Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));
1000 int inpLayout = getDataLayout(layer.input(0), data_layouts);
1001 if (newShape.total() != 4 && inpLayout == DATA_LAYOUT_NHWC)
1004 int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
1005 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
1007 std::string permName = name + "/nchw";
1008 CV_Assert(layer_id.find(permName) == layer_id.end());
1009 int permId = dstNet.addLayer(permName, "Permute", permLP);
1010 layer_id[permName] = permId;
1011 connect(layer_id, dstNet, inpId, permId, 0);
1012 inpId = Pin(permName);
1014 else if (newShape.total() == 4 && inpLayout == DATA_LAYOUT_NHWC)
1017 std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
1018 std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));
1020 layerParams.set("dim", DictValue::arrayInt<int*>(newShape.ptr<int>(), newShape.total()));
1022 int id = dstNet.addLayer(name, "Reshape", layerParams);
1023 layer_id[name] = id;
1026 connect(layer_id, dstNet, inpId, id, 0);
1027 data_layouts[name] = newShape.total() == 2 ? DATA_LAYOUT_PLANAR : DATA_LAYOUT_UNKNOWN;
1029 else if (type == "Flatten" || type == "Squeeze")
1031 Pin inpId = parsePin(layer.input(0));
1032 int inpLayout = getDataLayout(layer.input(0), data_layouts);
1033 if (type == "Squeeze")
1035 CV_Assert(hasLayerAttr(layer, "squeeze_dims"));
1036 const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims");
1037 if (inpLayout == DATA_LAYOUT_NHWC)
1039 if (dims.list().i_size() != 2 || dims.list().i(0) != 1 || dims.list().i(1) != 2)
1040 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1042 else if (inpLayout == DATA_LAYOUT_NCHW)
1044 if (dims.list().i_size() != 2 || dims.list().i(0) != 2 || dims.list().i(1) != 3)
1045 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1048 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1050 if (inpLayout == DATA_LAYOUT_NHWC)
1053 int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
1054 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
1056 std::string permName = name + "/nchw";
1057 CV_Assert(layer_id.find(permName) == layer_id.end());
1058 int permId = dstNet.addLayer(permName, "Permute", permLP);
1059 layer_id[permName] = permId;
1060 connect(layer_id, dstNet, inpId, permId, 0);
1061 inpId = Pin(permName);
1063 int id = dstNet.addLayer(name, "Flatten", layerParams);
1064 layer_id[name] = id;
1065 connect(layer_id, dstNet, inpId, id, 0);
1066 data_layouts[name] = DATA_LAYOUT_PLANAR;
1068 else if (type == "Transpose")
1070 Mat perm = getTensorContent(getConstBlob(layer, value_id, 1));
1071 CV_Assert(perm.type() == CV_32SC1);
1072 int* permData = (int*)perm.data;
1073 if (perm.total() == 4)
1075 // Only NHWC <-> NCHW permutations are allowed. OpenCV is always
1076 // keep NCHW layout this way.
1077 int inpLayout = getDataLayout(layer.input(0), data_layouts);
1078 if (inpLayout == DATA_LAYOUT_NHWC)
1080 if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
1082 // in TensorFlow: NHWC->NCHW
1083 // in OpenCV: NCHW->NCHW
1084 data_layouts[name] = DATA_LAYOUT_NCHW;
1086 else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1088 // in TensorFlow: NHWC->NHWC
1089 // in OpenCV: NCHW->NCHW
1090 data_layouts[name] = DATA_LAYOUT_NHWC;
1093 CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1095 else if (inpLayout == DATA_LAYOUT_NCHW)
1097 if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
1099 // in TensorFlow: NCHW->NHWC
1100 // in OpenCV: NCHW->NCHW
1101 data_layouts[name] = DATA_LAYOUT_NHWC;
1103 else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1105 // in TensorFlow: NCHW->NCHW
1106 // in OpenCV: NCHW->NCHW
1107 data_layouts[name] = DATA_LAYOUT_NCHW;
1110 CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1112 int id = dstNet.addLayer(name, "Identity", layerParams);
1113 layer_id[name] = id;
1114 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1118 layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
1120 int id = dstNet.addLayer(name, "Permute", layerParams);
1121 layer_id[name] = id;
1124 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1125 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1128 else if (type == "Const")
1131 else if (type == "LRN")
1133 if(hasLayerAttr(layer, "alpha")) {
1134 layerParams.set("alpha", getLayerAttr(layer, "alpha").f());
1136 if(hasLayerAttr(layer, "beta")) {
1137 layerParams.set("beta", getLayerAttr(layer, "beta").f());
1139 if(hasLayerAttr(layer, "depth_radius")) {
1140 int radius = (int)getLayerAttr(layer, "depth_radius").i();
1141 layerParams.set("local_size", 2*radius + 1);
1143 if(hasLayerAttr(layer, "bias")) {
1144 layerParams.set("bias", getLayerAttr(layer, "bias").f());
1146 layerParams.set("norm_by_size", false);
1148 int id = dstNet.addLayer(name, "LRN", layerParams);
1149 layer_id[name] = id;
1151 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1153 else if (type == "Concat" || type == "ConcatV2")
1155 int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
1156 int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
1158 if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1159 axis = toNCHW(axis);
1160 layerParams.set("axis", axis);
1162 int id = dstNet.addLayer(name, "Concat", layerParams);
1163 layer_id[name] = id;
1166 int from = (type == "Concat" ? 1 : 0);
1167 int to = (type == "Concat" ? layer.input_size() : layer.input_size() - 1);
1169 // input(0) or input(n-1) is concat_dim
1170 for (int ii = from; ii < to; ii++)
1172 Pin inp = parsePin(layer.input(ii));
1173 if (layer_id.find(inp.name) == layer_id.end())
1174 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1175 connect(layer_id, dstNet, inp, id, ii - from);
1178 else if (type == "MaxPool")
1180 layerParams.set("pool", "max");
1182 setKSize(layerParams, layer);
1183 setStrides(layerParams, layer);
1184 setPadding(layerParams, layer);
1186 int id = dstNet.addLayer(name, "Pooling", layerParams);
1187 layer_id[name] = id;
1189 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1191 else if (type == "AvgPool")
1193 layerParams.set("pool", "ave");
1194 layerParams.set("ave_pool_padded_area", false);
1196 setKSize(layerParams, layer);
1197 setStrides(layerParams, layer);
1198 setPadding(layerParams, layer);
1200 int id = dstNet.addLayer(name, "Pooling", layerParams);
1201 layer_id[name] = id;
1203 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1205 else if (type == "Placeholder")
1207 if (!hasLayerAttr(layer, "dtype") ||
1208 getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL) // If input is not a train/test flag.
1210 netInputsNames.push_back(name);
1214 else if (type == "Split") {
1215 // TODO: determining axis index remapping by input dimensions order of input blob
1216 // TODO: slicing input may be Const op
1217 // TODO: slicing kernels for convolutions - in current implementation it is impossible
1218 // TODO: add parsing num of slices parameter
1219 CV_Assert(layer.input_size() == 2);
1221 // 1st blob is dims tensor
1222 int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
1223 layerParams.set("axis", toNCHW(axis));
1225 int id = dstNet.addLayer(name, "Slice", layerParams);
1226 layer_id[name] = id;
1229 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1231 else if (type == "Slice")
1234 // input: "input_node"
1235 // input: "Slice/begin"
1236 // input: "Slice/size"
1237 CV_Assert(layer.input_size() == 3);
1238 Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));
1239 Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2));
1240 CV_Assert(!begins.empty(), !sizes.empty(), begins.type() == CV_32SC1,
1241 sizes.type() == CV_32SC1);
1243 if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1245 // Swap NHWC parameters' order to NCHW.
1246 std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
1247 std::swap(*begins.ptr<int32_t>(0, 1), *begins.ptr<int32_t>(0, 2));
1248 std::swap(*sizes.ptr<int32_t>(0, 2), *sizes.ptr<int32_t>(0, 3));
1249 std::swap(*sizes.ptr<int32_t>(0, 1), *sizes.ptr<int32_t>(0, 2));
1251 layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
1252 layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total()));
1254 int id = dstNet.addLayer(name, "Slice", layerParams);
1255 layer_id[name] = id;
1257 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1259 else if (type == "Mul")
1261 bool haveConst = false;
1262 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
1264 Pin input = parsePin(layer.input(ii));
1265 haveConst = value_id.find(input.name) != value_id.end();
1267 CV_Assert(!haveConst || layer.input_size() == 2);
1271 // Multiplication by constant.
1272 CV_Assert(layer.input_size() == 2);
1273 Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
1274 CV_Assert(scaleMat.type() == CV_32FC1);
1277 if (scaleMat.total() == 1) // is a scalar.
1279 // Try to match with a LeakyRelu:
1281 // name: "LeakyRelu/mul"
1283 // input: "LeakyRelu/alpha"
1287 // name: "LeakyRelu/Maximum"
1289 // input: "LeakyRelu/mul"
1292 StrIntVector next_layers = getNextLayers(net, name, "Maximum");
1293 if (!next_layers.empty())
1295 int maximumLayerIdx = next_layers[0].second;
1296 ExcludeLayer(net, maximumLayerIdx, 0, false);
1297 layers_to_ignore.insert(next_layers[0].first);
1299 layerParams.set("negative_slope", scaleMat.at<float>(0));
1300 id = dstNet.addLayer(name, "ReLU", layerParams);
1304 // Just a multiplication.
1305 layerParams.set("scale", scaleMat.at<float>(0));
1306 id = dstNet.addLayer(name, "Power", layerParams);
1311 layerParams.blobs.resize(1, scaleMat);
1313 StrIntVector next_layers = getNextLayers(net, name, "Add");
1314 if (!next_layers.empty())
1316 layerParams.set("bias_term", true);
1317 layerParams.blobs.resize(2);
1319 int weights_layer_index = next_layers[0].second;
1320 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back());
1321 ExcludeLayer(net, weights_layer_index, 0, false);
1322 layers_to_ignore.insert(next_layers[0].first);
1325 if (hasLayerAttr(layer, "axis"))
1326 layerParams.set("axis", getLayerAttr(layer, "axis").i());
1328 id = dstNet.addLayer(name, "Scale", layerParams);
1330 layer_id[name] = id;
1332 Pin inp0 = parsePin(layer.input(0));
1333 if (layer_id.find(inp0.name) != layer_id.end())
1334 // First operand is a constant.
1335 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1337 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1341 layerParams.set("operation", "prod");
1342 int id = dstNet.addLayer(name, "Eltwise", layerParams);
1343 layer_id[name] = id;
1345 for (int ii = 0; ii < layer.input_size(); ii++)
1347 Pin inp = parsePin(layer.input(ii));
1348 if (layer_id.find(inp.name) == layer_id.end())
1349 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1350 connect(layer_id, dstNet, inp, id, ii);
1354 else if (type == "Pad")
1356 Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
1357 CV_Assert(paddings.type() == CV_32SC1);
1358 if (paddings.total() == 8)
1360 // Perhabs, we have NHWC padding dimensions order.
1363 std::swap(*paddings.ptr<int32_t>(0, 2), *paddings.ptr<int32_t>(0, 6));
1364 std::swap(*paddings.ptr<int32_t>(0, 3), *paddings.ptr<int32_t>(0, 7));
1367 std::swap(*paddings.ptr<int32_t>(0, 4), *paddings.ptr<int32_t>(0, 6));
1368 std::swap(*paddings.ptr<int32_t>(0, 5), *paddings.ptr<int32_t>(0, 7));
1372 layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
1374 int id = dstNet.addLayer(name, "Padding", layerParams);
1375 layer_id[name] = id;
1377 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1379 else if (type == "FusedBatchNorm")
1381 // op: "FusedBatchNorm"
1383 // input: "BatchNorm/gamma"
1384 // input: "BatchNorm/beta"
1385 // input: "BatchNorm/moving_mean"
1386 // input: "BatchNorm/moving_variance"
1387 if (layer.input_size() != 5)
1388 CV_Error(Error::StsNotImplemented,
1389 "Expected gamma, beta, mean and std");
1390 Pin inpId = parsePin(layer.input(0));
1392 bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
1394 layerParams.blobs.resize(2);
1396 const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1);
1397 if (!gammaTensor.tensor_content().empty())
1399 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1400 layerParams.set("has_weight", true);
1401 blobFromTensor(gammaTensor, layerParams.blobs.back());
1404 layerParams.set("has_weight", false);
1406 const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2);
1407 if (!betaTensor.tensor_content().empty())
1409 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1410 layerParams.set("has_bias", true);
1411 blobFromTensor(betaTensor, layerParams.blobs.back());
1414 layerParams.set("has_bias", false);
1419 if (layerParams.blobs.size() == 2)
1420 CV_Error(Error::StsNotImplemented, "Cannot determine number "
1421 "of parameters for batch normalization layer.");
1422 mean = Mat::zeros(1, layerParams.blobs[3].total(), CV_32F);
1423 std = Mat::ones(1, layerParams.blobs[3].total(), CV_32F);
1425 // Add an extra layer: Mean-Variance normalization
1426 LayerParams mvnParams;
1427 std::string mvnName = name + "/MVN";
1428 CV_Assert(layer_id.find(mvnName) == layer_id.end());
1429 int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
1430 layer_id[mvnName] = mvnId;
1431 connect(layer_id, dstNet, inpId, mvnId, 0);
1432 inpId = Pin(mvnName);
1436 blobFromTensor(getConstBlob(layer, value_id, 3), mean);
1437 blobFromTensor(getConstBlob(layer, value_id, 4), std);
1439 layerParams.blobs[0] = mean;
1440 layerParams.blobs[1] = std;
1442 if (hasLayerAttr(layer, "epsilon"))
1443 layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
1445 int id = dstNet.addLayer(name, "BatchNorm", layerParams);
1446 layer_id[name] = id;
1449 connect(layer_id, dstNet, inpId, id, 0);
1451 else if (type == "Conv2DBackpropInput")
1453 // op: "Conv2DBackpropInput"
1454 // input: "conv2d_transpose/output_shape"
1457 if (layer.input_size() != 3)
1458 CV_Error(Error::StsNotImplemented,
1459 "Expected output shape, weights and input nodes");
1461 layerParams.set("bias_term", false);
1462 layerParams.blobs.resize(1);
1464 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
1465 if (next_layers.size() == 1)
1467 layerParams.set("bias_term", true);
1468 layerParams.blobs.resize(2);
1470 int weights_layer_index = next_layers[0].second;
1472 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1473 ExcludeLayer(net, weights_layer_index, 0, false);
1474 layers_to_ignore.insert(next_layers[0].first);
1477 kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
1479 const int* kshape = layerParams.blobs[0].size.p;
1480 const int kernelH = kshape[2];
1481 const int kernelW = kshape[3];
1482 layerParams.set("kernel_h", kernelH);
1483 layerParams.set("kernel_w", kernelW);
1484 layerParams.set("num_output", kshape[1]);
1486 setStrides(layerParams, layer);
1487 setPadding(layerParams, layer);
1489 // For convolution layer, output shape computes as
1490 // o = 1 + (i - k + 2*p) / s
1491 // i - input size, o - output size, k - kernel size, p - pad, s - stride
1492 // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
1493 // considering that k is odd.
1494 // SAME: o = 1 + (i - 1) / s
1495 // VALID: o = 1 + i / s
1496 // Deconvolution's layer output shape computes as
1497 // SAME: o = 1 + (i - 1)*s
1498 // VALID: o = (i - 1)*s
1499 // If output_shape differs from formulas above then adjust padding is applied.
1501 const int strideY = layerParams.get<int>("stride_h");
1502 const int strideX = layerParams.get<int>("stride_w");
1503 Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
1504 const int outH = outShape.at<int>(1);
1505 const int outW = outShape.at<int>(2);
1506 if (layerParams.get<String>("pad_mode") == "SAME")
1508 layerParams.set("adj_w", (outW - 1) % strideX);
1509 layerParams.set("adj_h", (outH - 1) % strideY);
1511 else if (layerParams.get<String>("pad_mode") == "VALID")
1513 layerParams.set("adj_w", (outW - kernelW) % strideX);
1514 layerParams.set("adj_h", (outH - kernelH) % strideY);
1516 int id = dstNet.addLayer(name, "Deconvolution", layerParams);
1517 layer_id[name] = id;
1520 connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
1522 else if (type == "BlockLSTM")
1525 // input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps)
1527 // input: "lstm_block_wrapper/zeros" (ignore)
1528 // input: "lstm_block_wrapper/zeros" (ignore)
1529 // input: "lstm_block_wrapper/kernel"
1530 // input: "lstm_block_wrapper/w_i_diag"
1531 // input: "lstm_block_wrapper/w_f_diag"
1532 // input: "lstm_block_wrapper/w_o_diag"
1533 // input: "lstm_block_wrapper/bias"
1534 if (layer.input_size() != 9)
1535 CV_Error(Error::StsNotImplemented, "Unexpected number of input nodes");
1537 if (hasLayerAttr(layer, "forget_bias"))
1538 layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f());
1540 if (hasLayerAttr(layer, "forget_bias"))
1542 float cellClip = getLayerAttr(layer, "cell_clip").f();
1543 // Cell clip disabled if it's negative.
1546 layerParams.set("use_cell_clip", true);
1547 layerParams.set("cell_clip", cellClip);
1552 blobFromTensor(getConstBlob(layer, value_id, 4), W);
1553 blobFromTensor(getConstBlob(layer, value_id, 8), b);
1554 const int outSize = W.cols / 4;
1557 float* weightData = (float*)W.data;
1558 for (int i = 0; i < W.rows; ++i)
1559 for (int j = 0; j < outSize; ++j)
1561 std::swap(weightData[i * W.cols + 1 * outSize + j],
1562 weightData[i * W.cols + 2 * outSize + j]);
1563 std::swap(weightData[i * W.cols + 2 * outSize + j],
1564 weightData[i * W.cols + 3 * outSize + j]);
1566 Wx = W.rowRange(0, W.rows - outSize).t();
1567 Wh = W.rowRange(W.rows - outSize, W.rows).t();
1569 layerParams.blobs.resize(3);
1570 layerParams.blobs[0] = Wh;
1571 layerParams.blobs[1] = Wx;
1572 layerParams.blobs[2] = b;
1574 if (hasLayerAttr(layer, "use_peephole"))
1576 bool usePeephole = getLayerAttr(layer, "use_peephole").b();
1579 layerParams.set("use_peephole", true);
1580 layerParams.blobs.resize(6);
1581 for (int i = 0; i < 3; ++i)
1584 blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
1585 w = w.reshape(1, w.total()); // Single column.
1586 w = Mat::diag(w); // Make a diagonal matrix.
1587 layerParams.blobs[3 + i] = w;
1592 int id = dstNet.addLayer(name, "LSTM", layerParams);
1593 layer_id[name] = id;
1596 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1597 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1599 else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear")
1601 if (layer.input_size() == 2)
1603 Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
1604 CV_Assert(outSize.type() == CV_32SC1, outSize.total() == 2);
1605 layerParams.set("height", outSize.at<int>(0, 0));
1606 layerParams.set("width", outSize.at<int>(0, 1));
1608 else if (layer.input_size() == 3)
1610 Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1));
1611 Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2));
1612 CV_Assert(factorHeight.type() == CV_32SC1, factorHeight.total() == 1,
1613 factorWidth.type() == CV_32SC1, factorWidth.total() == 1);
1614 layerParams.set("zoom_factor_x", factorWidth.at<int>(0));
1615 layerParams.set("zoom_factor_y", factorHeight.at<int>(0));
1618 CV_Assert(layer.input_size() == 2 || layer.input_size() == 3);
1620 if (type == "ResizeNearestNeighbor")
1621 layerParams.set("interpolation", "nearest");
1623 layerParams.set("interpolation", "bilinear");
1625 if (hasLayerAttr(layer, "align_corners"))
1626 layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());
1628 int id = dstNet.addLayer(name, "Resize", layerParams);
1629 layer_id[name] = id;
1631 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1633 else if (type == "L2Normalize")
1635 // op: "L2Normalize"
1637 // input: "reduction_indices" (axis)
1638 CV_Assert(layer.input_size() == 2);
1639 Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
1640 CV_Assert(reductionIndices.type() == CV_32SC1);
1642 const int numAxes = reductionIndices.total();
1643 if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1644 for (int i = 0; i < numAxes; ++i)
1645 reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
1647 cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
1648 for (int i = 1; i < numAxes; ++i)
1650 CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
1651 // Axes have the same sign.
1652 CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
1654 layerParams.set("start_axis", reductionIndices.at<int>(0));
1655 layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
1657 int id = dstNet.addLayer(name, "Normalize", layerParams);
1658 layer_id[name] = id;
1659 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1661 else if (type == "PriorBox")
1663 if (hasLayerAttr(layer, "min_size"))
1664 layerParams.set("min_size", getLayerAttr(layer, "min_size").i());
1665 if (hasLayerAttr(layer, "max_size"))
1666 layerParams.set("max_size", getLayerAttr(layer, "max_size").i());
1667 if (hasLayerAttr(layer, "flip"))
1668 layerParams.set("flip", getLayerAttr(layer, "flip").b());
1669 if (hasLayerAttr(layer, "clip"))
1670 layerParams.set("clip", getLayerAttr(layer, "clip").b());
1671 if (hasLayerAttr(layer, "offset"))
1672 layerParams.set("offset", getLayerAttr(layer, "offset").f());
1673 if (hasLayerAttr(layer, "step"))
1674 layerParams.set("step", getLayerAttr(layer, "step").f());
1676 const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
1678 for (int i = 0; i < 5; ++i)
1680 if (hasLayerAttr(layer, paramNames[i]))
1682 Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor());
1683 layerParams.set(paramNames[i],
1684 DictValue::arrayReal<float*>((float*)values.data, values.total()));
1687 int id = dstNet.addLayer(name, "PriorBox", layerParams);
1688 layer_id[name] = id;
1689 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1690 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1691 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1693 else if (type == "DetectionOutput")
1695 // op: "DetectionOutput"
1696 // input_0: "locations"
1697 // input_1: "classifications"
1698 // input_2: "prior_boxes"
1699 if (hasLayerAttr(layer, "num_classes"))
1700 layerParams.set("num_classes", getLayerAttr(layer, "num_classes").i());
1701 if (hasLayerAttr(layer, "share_location"))
1702 layerParams.set("share_location", getLayerAttr(layer, "share_location").b());
1703 if (hasLayerAttr(layer, "background_label_id"))
1704 layerParams.set("background_label_id", getLayerAttr(layer, "background_label_id").i());
1705 if (hasLayerAttr(layer, "nms_threshold"))
1706 layerParams.set("nms_threshold", getLayerAttr(layer, "nms_threshold").f());
1707 if (hasLayerAttr(layer, "top_k"))
1708 layerParams.set("top_k", getLayerAttr(layer, "top_k").i());
1709 if (hasLayerAttr(layer, "code_type"))
1710 layerParams.set("code_type", getLayerAttr(layer, "code_type").s());
1711 if (hasLayerAttr(layer, "keep_top_k"))
1712 layerParams.set("keep_top_k", getLayerAttr(layer, "keep_top_k").i());
1713 if (hasLayerAttr(layer, "confidence_threshold"))
1714 layerParams.set("confidence_threshold", getLayerAttr(layer, "confidence_threshold").f());
1715 if (hasLayerAttr(layer, "loc_pred_transposed"))
1716 layerParams.set("loc_pred_transposed", getLayerAttr(layer, "loc_pred_transposed").b());
1717 if (hasLayerAttr(layer, "clip"))
1718 layerParams.set("clip", getLayerAttr(layer, "clip").b());
1719 if (hasLayerAttr(layer, "variance_encoded_in_target"))
1720 layerParams.set("variance_encoded_in_target", getLayerAttr(layer, "variance_encoded_in_target").b());
1722 int id = dstNet.addLayer(name, "DetectionOutput", layerParams);
1723 layer_id[name] = id;
1724 for (int i = 0; i < 3; ++i)
1725 connect(layer_id, dstNet, parsePin(layer.input(i)), id, i);
1726 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1728 else if (type == "Softmax")
1730 if (hasLayerAttr(layer, "axis"))
1731 layerParams.set("axis", getLayerAttr(layer, "axis").i());
1733 int id = dstNet.addLayer(name, "Softmax", layerParams);
1734 layer_id[name] = id;
1735 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1737 else if (type == "CropAndResize")
1739 // op: "CropAndResize"
1743 CV_Assert(layer.input_size() == 3);
1745 Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2));
1746 CV_Assert(cropSize.type() == CV_32SC1, cropSize.total() == 2);
1748 layerParams.set("height", cropSize.at<int>(0));
1749 layerParams.set("width", cropSize.at<int>(1));
1751 int id = dstNet.addLayer(name, "CropAndResize", layerParams);
1752 layer_id[name] = id;
1754 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1755 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1757 else if (type == "Mean")
1759 Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));
1760 CV_Assert(indices.type() == CV_32SC1);
1762 if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
1763 CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
1765 layerParams.set("pool", "ave");
1766 layerParams.set("global_pooling", true);
1768 int id = dstNet.addLayer(name, "Pooling", layerParams);
1769 layer_id[name] = id;
1771 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1773 // There are two attributes, "keepdims" and a deprecated "keep_dims".
1774 bool keepDims = false;
1775 if (hasLayerAttr(layer, "keepdims"))
1776 keepDims = getLayerAttr(layer, "keepdims").b();
1777 else if (hasLayerAttr(layer, "keep_dims"))
1778 keepDims = getLayerAttr(layer, "keep_dims").b();
1782 LayerParams flattenLp;
1783 std::string flattenName = name + "/flatten";
1784 CV_Assert(layer_id.find(flattenName) == layer_id.end());
1785 int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
1786 layer_id[flattenName] = flattenId;
1787 connect(layer_id, dstNet, Pin(name), flattenId, 0);
1790 else if (type == "ClipByValue")
1792 // op: "ClipByValue"
1796 CV_Assert(layer.input_size() == 3);
1798 Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1));
1799 Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2));
1800 CV_Assert(minValue.total() == 1, minValue.type() == CV_32F,
1801 maxValue.total() == 1, maxValue.type() == CV_32F);
1803 layerParams.set("min_value", minValue.at<float>(0));
1804 layerParams.set("max_value", maxValue.at<float>(0));
1806 int id = dstNet.addLayer(name, "ReLU6", layerParams);
1807 layer_id[name] = id;
1809 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1811 else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
1812 type == "Relu" || type == "Elu" ||
1813 type == "Identity" || type == "Relu6")
1815 std::string dnnType = type;
1816 if (type == "Abs") dnnType = "AbsVal";
1817 else if (type == "Tanh") dnnType = "TanH";
1818 else if (type == "Relu") dnnType = "ReLU";
1819 else if (type == "Relu6") dnnType = "ReLU6";
1820 else if (type == "Elu") dnnType = "ELU";
1822 int id = dstNet.addLayer(name, dnnType, layerParams);
1823 layer_id[name] = id;
1824 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1828 // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer.
1829 // However we create a layer with the same type and rely that user defined a custom layer.
1831 // All the attributes are added to LayerParams.
1832 google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
1833 for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
1834 ai != attr.end(); ++ai)
1836 if (ai->second.value_case() == tensorflow::AttrValue::kS) // string
1837 layerParams.set(ai->first, ai->second.s());
1838 if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64
1839 layerParams.set(ai->first, ai->second.i());
1840 if (ai->second.value_case() == tensorflow::AttrValue::kF) // float
1841 layerParams.set(ai->first, ai->second.f());
1842 if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool
1843 layerParams.set(ai->first, ai->second.b());
1846 // All the Const input nodes are added to layer's blobs.
1847 std::vector<std::string> inputsNames;
1848 for (int i = 0; i < layer.input_size(); ++i)
1850 // Check if input is a Const node.
1851 if (value_id.find(layer.input(i)) != value_id.end())
1853 Mat blob = getTensorContent(getConstBlob(layer, value_id, i));
1854 layerParams.blobs.push_back(blob);
1857 inputsNames.push_back(layer.input(i));
1859 int id = dstNet.addLayer(name, type, layerParams);
1860 layer_id[name] = id;
1862 for (int i = 0; i < inputsNames.size(); ++i)
1864 connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i);
1868 dstNet.setInputsNames(netInputsNames);
1873 #endif //HAVE_PROTOBUF
1875 Net readNetFromTensorflow(const String &model, const String &config)
1877 TFImporter importer(model.c_str(), config.c_str());
1879 importer.populateNet(net);
1883 Net readNetFromTensorflow(const char* bufferModel, size_t lenModel,
1884 const char* bufferConfig, size_t lenConfig)
1886 TFImporter importer(bufferModel, lenModel, bufferConfig, lenConfig);
1888 importer.populateNet(net);
1892 Net readNetFromTensorflow(const std::vector<uchar>& bufferModel, const std::vector<uchar>& bufferConfig)
1894 const char* bufferModelPtr = reinterpret_cast<const char*>(&bufferModel[0]);
1895 const char* bufferConfigPtr = bufferConfig.empty() ? NULL :
1896 reinterpret_cast<const char*>(&bufferConfig[0]);
1897 return readNetFromTensorflow(bufferModelPtr, bufferModel.size(),
1898 bufferConfigPtr, bufferConfig.size());
1901 CV__DNN_EXPERIMENTAL_NS_END