1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
9 Implementation of Tensorflow models parser
12 #include "../precomp.hpp"
22 #include "tf_graph_simplifier.hpp"
27 CV__DNN_EXPERIMENTAL_NS_BEGIN
31 using ::google::protobuf::RepeatedField;
32 using ::google::protobuf::RepeatedPtrField;
33 using ::google::protobuf::Message;
34 using ::google::protobuf::Descriptor;
35 using ::google::protobuf::FieldDescriptor;
36 using ::google::protobuf::Reflection;
41 static int toNCHW(int idx)
43 CV_Assert(-4 <= idx && idx < 4);
44 if (idx == 0) return 0;
45 else if (idx > 0) return idx % 3 + 1;
46 else return (4 + idx) % 3 + 1;
49 // This values are used to indicate layer output's data layout where it's possible.
55 DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d)
58 typedef std::vector<std::pair<String, int> > StrIntVector;
62 Pin(const std::string &_name, int _blobIndex = 0) :
63 name(_name), blobIndex(_blobIndex) {}
66 name(""), blobIndex(-1) {}
72 void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape)
75 if (tensor.has_tensor_shape())
77 const tensorflow::TensorShapeProto &_shape = tensor.tensor_shape();
78 int i, n = _shape.dim_size();
83 for (i = 0; i < n; i++)
84 shape[i] = (int)_shape.dim(i).size();
87 shape.resize(1, 1); // Scalar.
91 CV_Error(Error::StsError, "Unknown shape of input tensor");
96 void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
99 blobShapeFromTensor(tensor, shape);
100 int dims = (int)shape.size();
104 // REORDER blob NHWC to NCHW
105 swap(shape[2], shape[3]); // NHCW
106 swap(shape[1], shape[2]); // NCHW
109 dstBlob.create(shape, CV_32F);
111 Mat tensorContent = getTensorContent(tensor);
112 int size = tensorContent.total();
113 CV_Assert(size == (int)dstBlob.total());
115 float *dstData = dstBlob.ptr<float>();
116 const T *data = reinterpret_cast<const T*>(tensorContent.data);
120 int num = shape[0], channels = shape[1], height = shape[2], width = shape[3];
121 int total = num*channels*height*width;
122 for(int i_n = 0; i_n < shape[0]; i_n++) {
123 for(int i_c = 0; i_c < shape[1]; i_c++) {
124 for(int i_h = 0; i_h < shape[2]; i_h++) {
125 for(int i_w = 0; i_w < shape[3]; i_w++) {
126 int dst_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w;
127 int src_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w;
129 CV_Assert(dst_i < total);
130 CV_Assert(src_i < total);
132 dstData[dst_i] = data[src_i];
138 for (int i = 0; i < size; i++)
139 dstData[i] = data[i];
143 void blobFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
145 switch (tensor.dtype()) {
146 case tensorflow::DT_FLOAT:
147 case tensorflow::DT_HALF:
148 parseTensor<float>(tensor, dstBlob);
150 case tensorflow::DT_DOUBLE:
151 parseTensor<double>(tensor, dstBlob);
154 CV_Error(Error::StsError, "Tensor's data type is not supported");
159 void printList(const tensorflow::AttrValue::ListValue &val)
162 for (int i = 0; i < val.i_size(); i++)
163 std::cout << " " << val.i(i);
167 void printTensorShape(const tensorflow::TensorShapeProto &shape)
170 for (int d = 0; d < shape.dim_size(); d++)
171 std::cout << shape.dim(d).name() <<
172 ":" << shape.dim(d).size() << " ";
176 void printTensor(const tensorflow::TensorProto &tensor)
178 printTensorShape(tensor.tensor_shape());
180 if (tensor.tensor_content().empty())
183 switch (tensor.dtype())
185 case tensorflow::DT_FLOAT:
187 const float *data = reinterpret_cast<const float*>(tensor.tensor_content().c_str());
188 int size = tensor.tensor_content().size() / sizeof(float);
189 for (int i = 0; i < std::min(10, size); i++)
190 std::cout << " " << data[i];
192 std::cout << " ... " << size - 10 << " more";
195 case tensorflow::DT_INT32:
197 const int *data = reinterpret_cast<const int*>(tensor.tensor_content().c_str());
198 int size = tensor.tensor_content().size() / sizeof(int);
199 for (int i = 0; i < std::min(10, size); i++)
200 std::cout << " " << data[i];
202 std::cout << " ... " << size - 10 << " more";
206 CV_Error(Error::StsError, "Tensor type is not supported");
211 void printLayerAttr(const tensorflow::NodeDef &layer)
213 std::cout << std::endl << layer.name() << ":" << layer.op();
214 for (int ii = 0; ii < layer.input_size(); ii++)
215 std::cout << "(" << layer.input(ii) << ")";
216 std::cout << std::endl;
217 google::protobuf::Map<std::string, tensorflow::AttrValue> attr
219 for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
220 ai != attr.end(); ++ai)
222 std::cout << ai->first << ":";
223 if (ai->first == "dtype" || ai->first == "T")
224 std::cout << ai->second.i();
225 else if (ai->first == "padding")
226 std::cout << ai->second.s();
227 else if (ai->first == "transpose_a" || ai->first == "transpose_b")
228 std::cout << ai->second.b();
229 // else if (ai->first == "shape")
230 // printTensorShape(ai->second.shape());
231 else if (ai->first == "strides" || ai->first == "ksize")
232 printList(ai->second.list());
234 printTensor(ai->second.tensor());
235 std::cout << std::endl;
239 bool hasLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
241 google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
242 return attr.find(name) != attr.end();
245 const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
247 return layer.attr().at(name);
250 static int getDataLayout(const tensorflow::NodeDef& layer)
252 if (hasLayerAttr(layer, "data_format"))
254 std::string format = getLayerAttr(layer, "data_format").s();
255 if (format == "NHWC" || format == "channels_last")
256 return DATA_LAYOUT_NHWC;
257 else if (format == "NCHW" || format == "channels_first")
258 return DATA_LAYOUT_NCHW;
260 CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
262 return DATA_LAYOUT_UNKNOWN;
265 static inline std::string getNodeName(const std::string& tensorName)
267 return tensorName.substr(0, tensorName.rfind(':'));
270 static inline int getDataLayout(const std::string& layerName,
271 const std::map<String, int>& data_layouts)
273 std::map<String, int>::const_iterator it = data_layouts.find(getNodeName(layerName));
274 return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN;
277 void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
279 if (hasLayerAttr(layer, "strides"))
281 const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
282 int dimX, dimY, dimC;
283 int layout = getDataLayout(layer);
284 if (layout == DATA_LAYOUT_NCHW)
286 dimC = 1; dimY = 2; dimX = 3;
290 dimY = 1; dimX = 2; dimC = 3;
292 if (val.list().i_size() != 4 ||
293 val.list().i(0) != 1 || val.list().i(dimC) != 1)
294 CV_Error(Error::StsError, "Unsupported strides");
295 layerParams.set("stride_h", static_cast<int>(val.list().i(dimY)));
296 layerParams.set("stride_w", static_cast<int>(val.list().i(dimX)));
300 DictValue parseDims(const tensorflow::TensorProto &tensor) {
302 blobShapeFromTensor(tensor, shape);
303 int dims = (int)shape.size();
305 CV_Assert(tensor.dtype() == tensorflow::DT_INT32);
306 CV_Assert(dims == 1);
308 Mat values = getTensorContent(tensor);
309 CV_Assert(values.type() == CV_32SC1);
310 // TODO: add reordering shape if dims == 4
311 return DictValue::arrayInt((int*)values.data, values.total());
314 void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
316 if (hasLayerAttr(layer, "ksize"))
318 const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
319 int dimX, dimY, dimC;
320 int layout = getDataLayout(layer);
321 if (layout == DATA_LAYOUT_NCHW)
323 dimC = 1; dimY = 2; dimX = 3;
327 dimY = 1; dimX = 2; dimC = 3;
329 if (val.list().i_size() != 4 ||
330 val.list().i(0) != 1 || val.list().i(dimC) != 1)
331 CV_Error(Error::StsError, "Unsupported ksize");
332 layerParams.set("kernel_h", static_cast<int>(val.list().i(dimY)));
333 layerParams.set("kernel_w", static_cast<int>(val.list().i(dimX)));
337 layerParams.set("kernel_h", 1);
338 layerParams.set("kernel_w", 1);
342 void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
344 if (hasLayerAttr(layer, "padding"))
345 layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
348 Pin parsePin(const std::string &name)
352 size_t delimiter_pos = name.find_first_of(":");
353 if (delimiter_pos != std::string::npos)
355 pin.name = name.substr(0, delimiter_pos);
356 std::istringstream(name.substr(delimiter_pos + 1)) >> pin.blobIndex;
362 StrIntVector getNextLayers(const tensorflow::GraphDef& net, const String& layer_name, const String& type = "")
366 for (int li = 0; li < net.node_size(); li++)
368 const tensorflow::NodeDef& layer = net.node(li);
369 for (int input_id = 0; input_id < layer.input_size(); input_id++) {
370 String input_op_name = parsePin(layer.input(input_id)).name;
371 bool type_ok = type.empty() ? true : type == layer.op();
372 if (input_op_name == layer_name && type_ok)
373 layers.push_back(std::make_pair(layer.name(), li));
380 void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int input_blob_index, bool remove_from_net = true) {
381 String layer_name = net.node(layer_index).name();
382 StrIntVector layers = getNextLayers(net, layer_name);
384 String removed_layer_input = net.node(layer_index).input(input_blob_index);
386 for (size_t i = 0; i < layers.size(); i++)
388 tensorflow::NodeDef* layer = net.mutable_node(layers[i].second);
389 for (int input_id = 0; input_id < layer->input_size(); input_id++) {
390 String input_op_name = layer->input(input_id);
392 if (input_op_name == layer_name) {
393 layer->set_input(input_id, removed_layer_input);
399 net.mutable_node()->DeleteSubrange(layer_index, 1);
404 TFImporter(const char *model, const char *config = NULL);
405 TFImporter(const char *dataModel, size_t lenModel,
406 const char *dataConfig = NULL, size_t lenConfig = 0);
408 void populateNet(Net dstNet);
411 void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob);
413 void connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
414 const int input_layer_id, const int input_blob_id);
415 void connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
416 const int input_layer_id, const int input_blobs_count);
417 const tensorflow::TensorProto& getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
418 int input_blob_index = -1, int* actual_inp_blob_idx = 0);
421 // Binary serialized TensorFlow graph includes weights.
422 tensorflow::GraphDef netBin;
423 // Optional text definition of TensorFlow graph. More flexible than binary format
424 // and may be used to build the network using binary format only as a weights storage.
425 // This approach is similar to Caffe's `.prorotxt` and `.caffemodel`.
426 tensorflow::GraphDef netTxt;
428 std::vector<String> netInputsNames;
431 TFImporter::TFImporter(const char *model, const char *config)
433 if (model && model[0])
434 ReadTFNetParamsFromBinaryFileOrDie(model, &netBin);
435 if (config && config[0])
436 ReadTFNetParamsFromTextFileOrDie(config, &netTxt);
439 TFImporter::TFImporter(const char *dataModel, size_t lenModel,
440 const char *dataConfig, size_t lenConfig)
442 if (dataModel != NULL && lenModel > 0)
443 ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin);
444 if (dataConfig != NULL && lenConfig > 0)
445 ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt);
448 void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
451 blobShapeFromTensor(tensor, shape);
452 int dims = (int)shape.size();
454 // TODO: other blob types
455 CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||
456 tensor.dtype() == tensorflow::DT_HALF);
457 CV_Assert(dims == 4);
459 // REORDER kernel HWIO to OIHW
460 swap(shape[0], shape[2]); // IWHO
461 swap(shape[1], shape[3]); // IOHW
462 swap(shape[0], shape[1]); // OIHW
464 dstBlob.create(shape, CV_32F);
466 Mat tensorContent = getTensorContent(tensor);
467 int size = tensorContent.total();
468 CV_Assert(size == (int)dstBlob.total());
470 float *dstData = dstBlob.ptr<float>();
471 const float *data = reinterpret_cast<const float*>(tensorContent.data);
473 int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3];
474 int total = out_c*input_c*height*width;
475 for(int i_oc = 0; i_oc < out_c; i_oc++) {
476 for(int i_ic = 0; i_ic < input_c; i_ic++) {
477 for(int i_h = 0; i_h < height; i_h++) {
478 for(int i_w = 0; i_w < width; i_w++) {
479 int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w;
480 int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc;
481 CV_Assert(dst_i < total);
482 CV_Assert(src_i < total);
483 dstData[dst_i] = data[src_i];
490 void TFImporter::connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
491 const int input_layer_id, const int input_blob_id)
493 std::map<String, int>::const_iterator it = layers_name_id_map.find(outPin.name);
494 if (it == layers_name_id_map.end())
495 CV_Error(Error::StsError, "Input layer not found: " + outPin.name);
497 std::vector<String>::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name);
499 if (inpNameIt == netInputsNames.end())
500 blobIndex = outPin.blobIndex;
502 blobIndex = inpNameIt - netInputsNames.begin();
503 network.connect(it->second, blobIndex, input_layer_id, input_blob_id);
506 void TFImporter::connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
507 const int input_layer_id, const int input_blobs_count)
509 for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++)
510 connect(layer_id, network, outPin, input_layer_id, input_blob_id);
513 const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
514 int input_blob_index, int* actual_inp_blob_idx) {
515 if (input_blob_index == -1) {
516 for(int i = 0; i < layer.input_size(); i++) {
517 Pin input = parsePin(layer.input(i));
518 if (const_layers.find(input.name) != const_layers.end()) {
519 if (input_blob_index != -1)
520 CV_Error(Error::StsError, "More than one input is Const op");
522 input_blob_index = i;
527 if (input_blob_index == -1)
528 CV_Error(Error::StsError, "Const input blob for weights not found");
530 Pin kernel_inp = parsePin(layer.input(input_blob_index));
531 if (const_layers.find(kernel_inp.name) == const_layers.end())
532 CV_Error(Error::StsError, "Const kernel input not found");
533 if (kernel_inp.blobIndex != 0)
534 CV_Error(Error::StsError, "Unsupported kernel input");
536 if(actual_inp_blob_idx) {
537 *actual_inp_blob_idx = input_blob_index;
540 int nodeIdx = const_layers.at(kernel_inp.name);
541 if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name)
543 return netBin.node(nodeIdx).attr().at("value").tensor();
547 CV_Assert(nodeIdx < netTxt.node_size(),
548 netTxt.node(nodeIdx).name() == kernel_inp.name);
549 return netTxt.node(nodeIdx).attr().at("value").tensor();
553 static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& const_layers,
554 std::set<String>& layers_to_ignore)
556 for (int li = 0; li < net.node_size(); li++)
558 const tensorflow::NodeDef &layer = net.node(li);
559 String name = layer.name();
560 String type = layer.op();
562 if (type == "Dequantize")
564 // Example of Dequantize node:
565 // name: "conv2d_1/bias"
567 // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8)
568 // input: "conv2d_1/bias_quantized_min"
569 // input: "conv2d_1/bias_quantized_max"
570 // attr { key: "T" value { type: DT_QUINT8 } } (quantized type)
571 // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique)
572 CV_Assert(layer.input_size() == 3);
573 for (int i = 0; i < 3; ++i)
574 CV_Assert(const_layers.find(layer.input(i)) != const_layers.end());
575 CV_Assert(hasLayerAttr(layer, "mode") &&
576 getLayerAttr(layer, "mode").s() == "MIN_FIRST");
578 int tensorId = const_layers[layer.input(0)];
579 int minId = const_layers[layer.input(1)];
580 int maxId = const_layers[layer.input(2)];
582 tensorflow::TensorProto* tensor = net.mutable_node(tensorId)
583 ->mutable_attr()->at("value")
585 CV_Assert(tensor->dtype() == tensorflow::DT_QUINT8);
587 Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor());
588 Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor());
589 CV_Assert(qMin.total() == 1, qMin.type() == CV_32FC1,
590 qMax.total() == 1, qMax.type() == CV_32FC1);
592 Mat content = getTensorContent(*tensor);
594 float minVal = qMin.at<float>(0);
595 float rangeScale = (qMax.at<float>(0) - minVal) / 255;
596 CV_Assert(rangeScale >= 0);
597 content.convertTo(content, CV_32FC1, rangeScale,
598 rangeScale * cvRound(minVal / rangeScale));
600 tensor->set_dtype(tensorflow::DT_FLOAT);
601 tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
603 net.mutable_node(tensorId)->set_name(name);
604 CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
605 layers_to_ignore.insert(name);
608 else if (type != "Const")
609 continue; // only Const parameters are supported
611 if (layer.attr().find("value") != layer.attr().end())
613 CV_Assert(const_layers.insert(std::make_pair(name, li)).second);
615 layers_to_ignore.insert(name);
619 // If all inputs of specific layer have the same data layout we can say that
620 // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
621 static int predictOutputDataLayout(const tensorflow::GraphDef& net,
622 const tensorflow::NodeDef& layer,
623 const std::map<String, int>& data_layouts)
625 int layout = getDataLayout(layer);
626 if (layout != DATA_LAYOUT_UNKNOWN)
629 // Determine layout by layer's inputs
630 std::map<String, int>::const_iterator it;
631 for (int i = 0, n = layer.input_size(); i < n; ++i)
633 it = data_layouts.find(getNodeName(layer.input(i)));
634 if (it != data_layouts.end())
636 if (layout != DATA_LAYOUT_UNKNOWN)
638 if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN)
639 return DATA_LAYOUT_UNKNOWN;
646 if (layout != DATA_LAYOUT_UNKNOWN)
649 // Determine layout by layer's consumers recursively.
650 it = data_layouts.find(layer.name());
651 CV_Assert(it != data_layouts.end());
655 void TFImporter::populateNet(Net dstNet)
657 RemoveIdentityOps(netBin);
658 RemoveIdentityOps(netTxt);
660 if (!netTxt.ByteSize())
661 simplifySubgraphs(netBin);
663 std::set<String> layers_to_ignore;
665 tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
667 int layersSize = net.node_size();
669 std::map<String, int> data_layouts;
670 // Pre-fill data layouts where they are set explicitly.
671 // Assuming that nodes are in topological order
672 for (int i = net.node_size() - 1; i >= 0; --i)
674 const tensorflow::NodeDef& layer = net.node(i);
675 std::string name = layer.name();
677 int layout = getDataLayout(layer);
678 std::map<String, int>::iterator it = data_layouts.find(name);
679 if (it != data_layouts.end())
681 if (layout != DATA_LAYOUT_UNKNOWN)
683 if (it->second == DATA_LAYOUT_UNKNOWN)
685 else if (it->second != layout)
687 it->second = DATA_LAYOUT_UNKNOWN;
688 layout = DATA_LAYOUT_UNKNOWN;
695 data_layouts[name] = layout;
697 // Specify input layers to have the same data layout.
698 for (int j = 0; j < layer.input_size(); ++j)
700 name = getNodeName(layer.input(j));
701 it = data_layouts.find(name);
702 if (it != data_layouts.end())
704 if (layout != DATA_LAYOUT_UNKNOWN)
706 if (it->second == DATA_LAYOUT_UNKNOWN)
708 else if (it->second != layout)
709 it->second = DATA_LAYOUT_UNKNOWN;
713 data_layouts[name] = layout;
717 // find all Const layers for params
718 std::map<String, int> value_id;
719 addConstNodes(netBin, value_id, layers_to_ignore);
720 addConstNodes(netTxt, value_id, layers_to_ignore);
722 std::map<String, int> layer_id;
724 for (int li = 0; li < layersSize; li++)
726 tensorflow::NodeDef layer = net.node(li);
727 String name = layer.name();
728 String type = layer.op();
729 LayerParams layerParams;
731 if(layers_to_ignore.find(name) != layers_to_ignore.end())
734 int predictedLayout = predictOutputDataLayout(net, layer, data_layouts);
735 data_layouts[name] = predictedLayout;
737 if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative")
739 // The first node of dilated convolution subgraph.
740 // Extract input node, dilation rate and paddings.
741 std::string input = layer.input(0);
742 if (type == "SpaceToBatchND")
744 // op: "SpaceToBatchND"
746 // input: "SpaceToBatchND/block_shape"
747 // input: "SpaceToBatchND/paddings"
748 CV_Assert(layer.input_size() == 3);
750 DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
751 CV_Assert(dilation.size() == 2);
752 layerParams.set("dilation_h", dilation.get<int>(0));
753 layerParams.set("dilation_w", dilation.get<int>(1));
756 parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
758 // paddings is a 2x2 matrix: [[top, bot], [left, right]]
759 layerParams.set("pad_h", paddings.at<float>(0));
760 layerParams.set("pad_w", paddings.at<float>(2));
762 StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
763 if (next_layers.empty())
765 next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
767 CV_Assert(next_layers.size() == 1);
768 layer = net.node(next_layers[0].second);
769 layers_to_ignore.insert(next_layers[0].first);
774 layerParams.set("bias_term", false);
775 layerParams.blobs.resize(1);
777 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
778 if (next_layers.size() == 1) {
779 layerParams.set("bias_term", true);
780 layerParams.blobs.resize(2);
782 int weights_layer_index = next_layers[0].second;
784 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
785 ExcludeLayer(net, weights_layer_index, 0, false);
786 layers_to_ignore.insert(next_layers[0].first);
789 const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id);
790 kernelFromTensor(kernelTensor, layerParams.blobs[0]);
791 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
792 int* kshape = layerParams.blobs[0].size.p;
793 if (type == "DepthwiseConv2dNative")
795 const int chMultiplier = kshape[0];
796 const int inCh = kshape[1];
797 const int height = kshape[2];
798 const int width = kshape[3];
800 Mat copy = layerParams.blobs[0].clone();
801 float* src = (float*)copy.data;
802 float* dst = (float*)layerParams.blobs[0].data;
803 for (int i = 0; i < chMultiplier; ++i)
804 for (int j = 0; j < inCh; ++j)
805 for (int s = 0; s < height * width; ++s)
807 int src_i = (i * inCh + j) * height * width + s;
808 int dst_i = (j * chMultiplier + i) * height* width + s;
809 dst[dst_i] = src[src_i];
811 // TODO Use reshape instead
812 kshape[0] = inCh * chMultiplier;
814 size_t* kstep = layerParams.blobs[0].step.p;
815 kstep[0] = kstep[1]; // fix steps too
817 layerParams.set("kernel_h", kshape[2]);
818 layerParams.set("kernel_w", kshape[3]);
819 layerParams.set("num_output", kshape[0]);
821 setStrides(layerParams, layer);
822 setPadding(layerParams, layer);
824 // The final node of dilated convolution subgraph.
825 next_layers = getNextLayers(net, name, "BatchToSpaceND");
826 if (!next_layers.empty())
828 layerParams.set("pad_mode", ""); // We use padding values.
829 CV_Assert(next_layers.size() == 1);
830 ExcludeLayer(net, next_layers[0].second, 0, false);
831 layers_to_ignore.insert(next_layers[0].first);
834 int id = dstNet.addLayer(name, "Convolution", layerParams);
838 connect(layer_id, dstNet, parsePin(input), id, 0);
841 if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN)
842 data_layouts[name] = DATA_LAYOUT_NHWC;
844 else if (type == "BiasAdd" || type == "Add")
846 bool haveConst = false;
847 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
849 Pin input = parsePin(layer.input(ii));
850 haveConst = value_id.find(input.name) != value_id.end();
852 CV_Assert(!haveConst || layer.input_size() == 2);
856 Mat values = getTensorContent(getConstBlob(layer, value_id));
857 CV_Assert(values.type() == CV_32FC1);
860 if (values.total() == 1) // is a scalar.
862 layerParams.set("shift", values.at<float>(0));
863 id = dstNet.addLayer(name, "Power", layerParams);
867 layerParams.blobs.resize(1, values);
868 id = dstNet.addLayer(name, "Shift", layerParams);
873 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
877 layerParams.set("operation", "sum");
878 int id = dstNet.addLayer(name, "Eltwise", layerParams);
881 for (int ii = 0; ii < layer.input_size(); ii++)
883 Pin inp = parsePin(layer.input(ii));
884 if (layer_id.find(inp.name) == layer_id.end())
885 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
886 connect(layer_id, dstNet, inp, id, ii);
890 else if (type == "Sub")
892 bool haveConst = false;
893 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
895 Pin input = parsePin(layer.input(ii));
896 haveConst = value_id.find(input.name) != value_id.end();
898 CV_Assert(haveConst);
900 Mat values = getTensorContent(getConstBlob(layer, value_id));
901 CV_Assert(values.type() == CV_32FC1);
905 if (values.total() == 1) // is a scalar.
907 layerParams.set("shift", values.at<float>(0));
908 id = dstNet.addLayer(name, "Power", layerParams);
912 layerParams.blobs.resize(1, values);
913 id = dstNet.addLayer(name, "Shift", layerParams);
918 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
920 else if (type == "MatMul")
922 CV_Assert(layer.input_size() == 2);
924 layerParams.set("bias_term", false);
925 layerParams.blobs.resize(1);
927 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
928 if (next_layers.empty())
930 next_layers = getNextLayers(net, name, "Add");
932 if (next_layers.size() == 1) {
933 layerParams.set("bias_term", true);
934 layerParams.blobs.resize(2);
936 int weights_layer_index = next_layers[0].second;
937 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
938 ExcludeLayer(net, weights_layer_index, 0, false);
939 layers_to_ignore.insert(next_layers[0].first);
942 int kernel_blob_index = -1;
943 const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index);
944 blobFromTensor(kernelTensor, layerParams.blobs[0]);
945 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
947 if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed
948 Mat data = layerParams.blobs[0].t();
949 layerParams.blobs[0] = data.clone();
952 layerParams.set("num_output", layerParams.blobs[0].size[0]);
954 int id = dstNet.addLayer(name, "InnerProduct", layerParams);
958 int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
959 connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
960 data_layouts[name] = DATA_LAYOUT_PLANAR;
962 else if (type == "Reshape")
964 Pin inpId = parsePin(layer.input(0));
965 Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));
967 int inpLayout = getDataLayout(layer.input(0), data_layouts);
968 if (newShape.total() != 4 && inpLayout == DATA_LAYOUT_NHWC)
971 int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
972 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
974 std::string permName = name + "/nchw";
975 CV_Assert(layer_id.find(permName) == layer_id.end());
976 int permId = dstNet.addLayer(permName, "Permute", permLP);
977 layer_id[permName] = permId;
978 connect(layer_id, dstNet, inpId, permId, 0);
979 inpId = Pin(permName);
981 else if (newShape.total() == 4 && inpLayout == DATA_LAYOUT_NHWC)
984 std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
985 std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));
987 layerParams.set("dim", DictValue::arrayInt<int*>(newShape.ptr<int>(), newShape.total()));
989 int id = dstNet.addLayer(name, "Reshape", layerParams);
993 connect(layer_id, dstNet, inpId, id, 0);
994 data_layouts[name] = newShape.total() == 2 ? DATA_LAYOUT_PLANAR : DATA_LAYOUT_UNKNOWN;
996 else if (type == "Flatten" || type == "Squeeze")
998 Pin inpId = parsePin(layer.input(0));
999 int inpLayout = getDataLayout(layer.input(0), data_layouts);
1000 if (type == "Squeeze")
1002 CV_Assert(hasLayerAttr(layer, "squeeze_dims"));
1003 const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims");
1004 if (inpLayout == DATA_LAYOUT_NHWC)
1006 if (dims.list().i_size() != 2 || dims.list().i(0) != 1 || dims.list().i(1) != 2)
1007 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1009 else if (inpLayout == DATA_LAYOUT_NCHW)
1011 if (dims.list().i_size() != 2 || dims.list().i(0) != 2 || dims.list().i(1) != 3)
1012 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1015 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1017 if (inpLayout == DATA_LAYOUT_NHWC)
1020 int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
1021 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
1023 std::string permName = name + "/nchw";
1024 CV_Assert(layer_id.find(permName) == layer_id.end());
1025 int permId = dstNet.addLayer(permName, "Permute", permLP);
1026 layer_id[permName] = permId;
1027 connect(layer_id, dstNet, inpId, permId, 0);
1028 inpId = Pin(permName);
1030 int id = dstNet.addLayer(name, "Flatten", layerParams);
1031 layer_id[name] = id;
1032 connect(layer_id, dstNet, inpId, id, 0);
1033 data_layouts[name] = DATA_LAYOUT_PLANAR;
1035 else if (type == "Transpose")
1037 Mat perm = getTensorContent(getConstBlob(layer, value_id, 1));
1038 CV_Assert(perm.type() == CV_32SC1);
1039 int* permData = (int*)perm.data;
1040 if (perm.total() == 4)
1042 // Only NHWC <-> NCHW permutations are allowed. OpenCV is always
1043 // keep NCHW layout this way.
1044 int inpLayout = getDataLayout(layer.input(0), data_layouts);
1045 if (inpLayout == DATA_LAYOUT_NHWC)
1047 if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
1049 // in TensorFlow: NHWC->NCHW
1050 // in OpenCV: NCHW->NCHW
1051 data_layouts[name] = DATA_LAYOUT_NCHW;
1053 else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1055 // in TensorFlow: NHWC->NHWC
1056 // in OpenCV: NCHW->NCHW
1057 data_layouts[name] = DATA_LAYOUT_NHWC;
1060 CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1062 else if (inpLayout == DATA_LAYOUT_NCHW)
1064 if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
1066 // in TensorFlow: NCHW->NHWC
1067 // in OpenCV: NCHW->NCHW
1068 data_layouts[name] = DATA_LAYOUT_NHWC;
1070 else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1072 // in TensorFlow: NCHW->NCHW
1073 // in OpenCV: NCHW->NCHW
1074 data_layouts[name] = DATA_LAYOUT_NCHW;
1077 CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1079 int id = dstNet.addLayer(name, "Identity", layerParams);
1080 layer_id[name] = id;
1081 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1085 layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
1087 int id = dstNet.addLayer(name, "Permute", layerParams);
1088 layer_id[name] = id;
1091 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1092 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1095 else if (type == "Const")
1098 else if (type == "LRN")
1100 if(hasLayerAttr(layer, "alpha")) {
1101 layerParams.set("alpha", getLayerAttr(layer, "alpha").f());
1103 if(hasLayerAttr(layer, "beta")) {
1104 layerParams.set("beta", getLayerAttr(layer, "beta").f());
1106 if(hasLayerAttr(layer, "depth_radius")) {
1107 int radius = (int)getLayerAttr(layer, "depth_radius").i();
1108 layerParams.set("local_size", 2*radius + 1);
1110 if(hasLayerAttr(layer, "bias")) {
1111 layerParams.set("bias", getLayerAttr(layer, "bias").f());
1113 layerParams.set("norm_by_size", false);
1115 int id = dstNet.addLayer(name, "LRN", layerParams);
1116 layer_id[name] = id;
1118 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1120 else if (type == "Concat" || type == "ConcatV2")
1122 int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
1123 int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
1125 if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1126 axis = toNCHW(axis);
1127 layerParams.set("axis", axis);
1129 int id = dstNet.addLayer(name, "Concat", layerParams);
1130 layer_id[name] = id;
1133 int from = (type == "Concat" ? 1 : 0);
1134 int to = (type == "Concat" ? layer.input_size() : layer.input_size() - 1);
1136 // input(0) or input(n-1) is concat_dim
1137 for (int ii = from; ii < to; ii++)
1139 Pin inp = parsePin(layer.input(ii));
1140 if (layer_id.find(inp.name) == layer_id.end())
1141 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1142 connect(layer_id, dstNet, inp, id, ii - from);
1145 else if (type == "MaxPool")
1147 layerParams.set("pool", "max");
1149 setKSize(layerParams, layer);
1150 setStrides(layerParams, layer);
1151 setPadding(layerParams, layer);
1153 int id = dstNet.addLayer(name, "Pooling", layerParams);
1154 layer_id[name] = id;
1156 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1158 else if (type == "AvgPool")
1160 layerParams.set("pool", "ave");
1161 layerParams.set("ave_pool_padded_area", false);
1163 setKSize(layerParams, layer);
1164 setStrides(layerParams, layer);
1165 setPadding(layerParams, layer);
1167 int id = dstNet.addLayer(name, "Pooling", layerParams);
1168 layer_id[name] = id;
1170 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1172 else if (type == "Placeholder")
1174 if (!hasLayerAttr(layer, "dtype") ||
1175 getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL) // If input is not a train/test flag.
1177 netInputsNames.push_back(name);
1181 else if (type == "Split") {
1182 // TODO: determining axis index remapping by input dimensions order of input blob
1183 // TODO: slicing input may be Const op
1184 // TODO: slicing kernels for convolutions - in current implementation it is impossible
1185 // TODO: add parsing num of slices parameter
1186 CV_Assert(layer.input_size() == 2);
1188 // 1st blob is dims tensor
1189 int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
1190 layerParams.set("axis", toNCHW(axis));
1192 int id = dstNet.addLayer(name, "Slice", layerParams);
1193 layer_id[name] = id;
1196 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1198 else if (type == "Slice")
1201 // input: "input_node"
1202 // input: "Slice/begin"
1203 // input: "Slice/size"
1204 CV_Assert(layer.input_size() == 3);
1205 Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));
1206 Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2));
1207 CV_Assert(!begins.empty(), !sizes.empty(), begins.type() == CV_32SC1,
1208 sizes.type() == CV_32SC1);
1210 if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1212 // Swap NHWC parameters' order to NCHW.
1213 std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
1214 std::swap(*begins.ptr<int32_t>(0, 1), *begins.ptr<int32_t>(0, 2));
1215 std::swap(*sizes.ptr<int32_t>(0, 2), *sizes.ptr<int32_t>(0, 3));
1216 std::swap(*sizes.ptr<int32_t>(0, 1), *sizes.ptr<int32_t>(0, 2));
1218 layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
1219 layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total()));
1221 int id = dstNet.addLayer(name, "Slice", layerParams);
1222 layer_id[name] = id;
1224 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1226 else if (type == "Mul")
1228 bool haveConst = false;
1229 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
1231 Pin input = parsePin(layer.input(ii));
1232 haveConst = value_id.find(input.name) != value_id.end();
1234 CV_Assert(!haveConst || layer.input_size() == 2);
1238 // Multiplication by constant.
1239 CV_Assert(layer.input_size() == 2);
1240 Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
1241 CV_Assert(scaleMat.type() == CV_32FC1);
1244 if (scaleMat.total() == 1) // is a scalar.
1246 // Try to match with a LeakyRelu:
1248 // name: "LeakyRelu/mul"
1250 // input: "LeakyRelu/alpha"
1254 // name: "LeakyRelu/Maximum"
1256 // input: "LeakyRelu/mul"
1259 StrIntVector next_layers = getNextLayers(net, name, "Maximum");
1260 if (!next_layers.empty())
1262 int maximumLayerIdx = next_layers[0].second;
1263 ExcludeLayer(net, maximumLayerIdx, 0, false);
1264 layers_to_ignore.insert(next_layers[0].first);
1266 layerParams.set("negative_slope", scaleMat.at<float>(0));
1267 id = dstNet.addLayer(name, "ReLU", layerParams);
1271 // Just a multiplication.
1272 layerParams.set("scale", scaleMat.at<float>(0));
1273 id = dstNet.addLayer(name, "Power", layerParams);
1278 layerParams.blobs.resize(1, scaleMat);
1280 StrIntVector next_layers = getNextLayers(net, name, "Add");
1281 if (!next_layers.empty())
1283 layerParams.set("bias_term", true);
1284 layerParams.blobs.resize(2);
1286 int weights_layer_index = next_layers[0].second;
1287 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back());
1288 ExcludeLayer(net, weights_layer_index, 0, false);
1289 layers_to_ignore.insert(next_layers[0].first);
1292 if (hasLayerAttr(layer, "axis"))
1293 layerParams.set("axis", getLayerAttr(layer, "axis").i());
1295 id = dstNet.addLayer(name, "Scale", layerParams);
1297 layer_id[name] = id;
1299 Pin inp0 = parsePin(layer.input(0));
1300 if (layer_id.find(inp0.name) != layer_id.end())
1301 // First operand is a constant.
1302 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1304 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1308 layerParams.set("operation", "prod");
1309 int id = dstNet.addLayer(name, "Eltwise", layerParams);
1310 layer_id[name] = id;
1312 for (int ii = 0; ii < layer.input_size(); ii++)
1314 Pin inp = parsePin(layer.input(ii));
1315 if (layer_id.find(inp.name) == layer_id.end())
1316 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1317 connect(layer_id, dstNet, inp, id, ii);
1321 else if (type == "Pad")
1323 Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
1324 CV_Assert(paddings.type() == CV_32SC1);
1325 if (paddings.total() == 8)
1327 // Perhabs, we have NHWC padding dimensions order.
1330 std::swap(*paddings.ptr<int32_t>(0, 2), *paddings.ptr<int32_t>(0, 6));
1331 std::swap(*paddings.ptr<int32_t>(0, 3), *paddings.ptr<int32_t>(0, 7));
1334 std::swap(*paddings.ptr<int32_t>(0, 4), *paddings.ptr<int32_t>(0, 6));
1335 std::swap(*paddings.ptr<int32_t>(0, 5), *paddings.ptr<int32_t>(0, 7));
1339 layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
1341 int id = dstNet.addLayer(name, "Padding", layerParams);
1342 layer_id[name] = id;
1344 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1346 else if (type == "FusedBatchNorm")
1348 // op: "FusedBatchNorm"
1350 // input: "BatchNorm/gamma"
1351 // input: "BatchNorm/beta"
1352 // input: "BatchNorm/moving_mean"
1353 // input: "BatchNorm/moving_variance"
1354 if (layer.input_size() != 5)
1355 CV_Error(Error::StsNotImplemented,
1356 "Expected gamma, beta, mean and std");
1357 Pin inpId = parsePin(layer.input(0));
1359 bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
1361 layerParams.blobs.resize(2);
1363 const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1);
1364 if (!gammaTensor.tensor_content().empty())
1366 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1367 layerParams.set("has_weight", true);
1368 blobFromTensor(gammaTensor, layerParams.blobs.back());
1371 layerParams.set("has_weight", false);
1373 const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2);
1374 if (!betaTensor.tensor_content().empty())
1376 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1377 layerParams.set("has_bias", true);
1378 blobFromTensor(betaTensor, layerParams.blobs.back());
1381 layerParams.set("has_bias", false);
1386 if (layerParams.blobs.size() == 2)
1387 CV_Error(Error::StsNotImplemented, "Cannot determine number "
1388 "of parameters for batch normalization layer.");
1389 mean = Mat::zeros(1, layerParams.blobs[3].total(), CV_32F);
1390 std = Mat::ones(1, layerParams.blobs[3].total(), CV_32F);
1392 // Add an extra layer: Mean-Variance normalization
1393 LayerParams mvnParams;
1394 std::string mvnName = name + "/MVN";
1395 CV_Assert(layer_id.find(mvnName) == layer_id.end());
1396 int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
1397 layer_id[mvnName] = mvnId;
1398 connect(layer_id, dstNet, inpId, mvnId, 0);
1399 inpId = Pin(mvnName);
1403 blobFromTensor(getConstBlob(layer, value_id, 3), mean);
1404 blobFromTensor(getConstBlob(layer, value_id, 4), std);
1406 layerParams.blobs[0] = mean;
1407 layerParams.blobs[1] = std;
1409 if (hasLayerAttr(layer, "epsilon"))
1410 layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
1412 int id = dstNet.addLayer(name, "BatchNorm", layerParams);
1413 layer_id[name] = id;
1416 connect(layer_id, dstNet, inpId, id, 0);
1418 else if (type == "Conv2DBackpropInput")
1420 // op: "Conv2DBackpropInput"
1421 // input: "conv2d_transpose/output_shape"
1424 if (layer.input_size() != 3)
1425 CV_Error(Error::StsNotImplemented,
1426 "Expected output shape, weights and input nodes");
1428 layerParams.set("bias_term", false);
1429 layerParams.blobs.resize(1);
1431 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
1432 if (next_layers.size() == 1)
1434 layerParams.set("bias_term", true);
1435 layerParams.blobs.resize(2);
1437 int weights_layer_index = next_layers[0].second;
1439 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1440 ExcludeLayer(net, weights_layer_index, 0, false);
1441 layers_to_ignore.insert(next_layers[0].first);
1444 kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
1446 const int* kshape = layerParams.blobs[0].size.p;
1447 const int kernelH = kshape[2];
1448 const int kernelW = kshape[3];
1449 layerParams.set("kernel_h", kernelH);
1450 layerParams.set("kernel_w", kernelW);
1451 layerParams.set("num_output", kshape[1]);
1453 setStrides(layerParams, layer);
1454 setPadding(layerParams, layer);
1456 // For convolution layer, output shape computes as
1457 // o = 1 + (i - k + 2*p) / s
1458 // i - input size, o - output size, k - kernel size, p - pad, s - stride
1459 // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
1460 // considering that k is odd.
1461 // SAME: o = 1 + (i - 1) / s
1462 // VALID: o = 1 + i / s
1463 // Deconvolution's layer output shape computes as
1464 // SAME: o = 1 + (i - 1)*s
1465 // VALID: o = (i - 1)*s
1466 // If output_shape differs from formulas above then adjust padding is applied.
1468 const int strideY = layerParams.get<int>("stride_h");
1469 const int strideX = layerParams.get<int>("stride_w");
1470 Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
1471 const int outH = outShape.at<int>(1);
1472 const int outW = outShape.at<int>(2);
1473 if (layerParams.get<String>("pad_mode") == "SAME")
1475 layerParams.set("adj_w", (outW - 1) % strideX);
1476 layerParams.set("adj_h", (outH - 1) % strideY);
1478 else if (layerParams.get<String>("pad_mode") == "VALID")
1480 layerParams.set("adj_w", (outW - kernelW) % strideX);
1481 layerParams.set("adj_h", (outH - kernelH) % strideY);
1483 int id = dstNet.addLayer(name, "Deconvolution", layerParams);
1484 layer_id[name] = id;
1487 connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
1489 else if (type == "BlockLSTM")
1492 // input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps)
1494 // input: "lstm_block_wrapper/zeros" (ignore)
1495 // input: "lstm_block_wrapper/zeros" (ignore)
1496 // input: "lstm_block_wrapper/kernel"
1497 // input: "lstm_block_wrapper/w_i_diag"
1498 // input: "lstm_block_wrapper/w_f_diag"
1499 // input: "lstm_block_wrapper/w_o_diag"
1500 // input: "lstm_block_wrapper/bias"
1501 if (layer.input_size() != 9)
1502 CV_Error(Error::StsNotImplemented, "Unexpected number of input nodes");
1504 if (hasLayerAttr(layer, "forget_bias"))
1505 layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f());
1507 if (hasLayerAttr(layer, "forget_bias"))
1509 float cellClip = getLayerAttr(layer, "cell_clip").f();
1510 // Cell clip disabled if it's negative.
1513 layerParams.set("use_cell_clip", true);
1514 layerParams.set("cell_clip", cellClip);
1519 blobFromTensor(getConstBlob(layer, value_id, 4), W);
1520 blobFromTensor(getConstBlob(layer, value_id, 8), b);
1521 const int outSize = W.cols / 4;
1524 float* weightData = (float*)W.data;
1525 for (int i = 0; i < W.rows; ++i)
1526 for (int j = 0; j < outSize; ++j)
1528 std::swap(weightData[i * W.cols + 1 * outSize + j],
1529 weightData[i * W.cols + 2 * outSize + j]);
1530 std::swap(weightData[i * W.cols + 2 * outSize + j],
1531 weightData[i * W.cols + 3 * outSize + j]);
1533 Wx = W.rowRange(0, W.rows - outSize).t();
1534 Wh = W.rowRange(W.rows - outSize, W.rows).t();
1536 layerParams.blobs.resize(3);
1537 layerParams.blobs[0] = Wh;
1538 layerParams.blobs[1] = Wx;
1539 layerParams.blobs[2] = b;
1541 if (hasLayerAttr(layer, "use_peephole"))
1543 bool usePeephole = getLayerAttr(layer, "use_peephole").b();
1546 layerParams.set("use_peephole", true);
1547 layerParams.blobs.resize(6);
1548 for (int i = 0; i < 3; ++i)
1551 blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
1552 w = w.reshape(1, w.total()); // Single column.
1553 w = Mat::diag(w); // Make a diagonal matrix.
1554 layerParams.blobs[3 + i] = w;
1559 int id = dstNet.addLayer(name, "LSTM", layerParams);
1560 layer_id[name] = id;
1563 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1564 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1566 else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear")
1568 if (layer.input_size() == 2)
1570 Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
1571 CV_Assert(outSize.type() == CV_32SC1, outSize.total() == 2);
1572 layerParams.set("height", outSize.at<int>(0, 0));
1573 layerParams.set("width", outSize.at<int>(0, 1));
1575 else if (layer.input_size() == 3)
1577 Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1));
1578 Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2));
1579 CV_Assert(factorHeight.type() == CV_32SC1, factorHeight.total() == 1,
1580 factorWidth.type() == CV_32SC1, factorWidth.total() == 1);
1581 layerParams.set("zoom_factor_x", factorWidth.at<int>(0));
1582 layerParams.set("zoom_factor_y", factorHeight.at<int>(0));
1585 CV_Assert(layer.input_size() == 2 || layer.input_size() == 3);
1587 if (type == "ResizeNearestNeighbor")
1588 layerParams.set("interpolation", "nearest");
1590 layerParams.set("interpolation", "bilinear");
1592 if (hasLayerAttr(layer, "align_corners"))
1593 layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());
1595 int id = dstNet.addLayer(name, "Resize", layerParams);
1596 layer_id[name] = id;
1598 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1600 else if (type == "L2Normalize")
1602 // op: "L2Normalize"
1604 // input: "reduction_indices" (axis)
1605 CV_Assert(layer.input_size() == 2);
1606 Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
1607 CV_Assert(reductionIndices.type() == CV_32SC1);
1609 const int numAxes = reductionIndices.total();
1610 if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1611 for (int i = 0; i < numAxes; ++i)
1612 reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
1614 cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
1615 for (int i = 1; i < numAxes; ++i)
1617 CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
1618 // Axes have the same sign.
1619 CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
1621 layerParams.set("start_axis", reductionIndices.at<int>(0));
1622 layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
1624 int id = dstNet.addLayer(name, "Normalize", layerParams);
1625 layer_id[name] = id;
1626 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1628 else if (type == "PriorBox")
1630 if (hasLayerAttr(layer, "min_size"))
1631 layerParams.set("min_size", getLayerAttr(layer, "min_size").i());
1632 if (hasLayerAttr(layer, "max_size"))
1633 layerParams.set("max_size", getLayerAttr(layer, "max_size").i());
1634 if (hasLayerAttr(layer, "flip"))
1635 layerParams.set("flip", getLayerAttr(layer, "flip").b());
1636 if (hasLayerAttr(layer, "clip"))
1637 layerParams.set("clip", getLayerAttr(layer, "clip").b());
1638 if (hasLayerAttr(layer, "offset"))
1639 layerParams.set("offset", getLayerAttr(layer, "offset").f());
1640 if (hasLayerAttr(layer, "step"))
1641 layerParams.set("step", getLayerAttr(layer, "step").f());
1643 const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
1645 for (int i = 0; i < 5; ++i)
1647 if (hasLayerAttr(layer, paramNames[i]))
1649 Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor());
1650 layerParams.set(paramNames[i],
1651 DictValue::arrayReal<float*>((float*)values.data, values.total()));
1654 int id = dstNet.addLayer(name, "PriorBox", layerParams);
1655 layer_id[name] = id;
1656 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1657 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1658 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1660 else if (type == "DetectionOutput")
1662 // op: "DetectionOutput"
1663 // input_0: "locations"
1664 // input_1: "classifications"
1665 // input_2: "prior_boxes"
1666 if (hasLayerAttr(layer, "num_classes"))
1667 layerParams.set("num_classes", getLayerAttr(layer, "num_classes").i());
1668 if (hasLayerAttr(layer, "share_location"))
1669 layerParams.set("share_location", getLayerAttr(layer, "share_location").b());
1670 if (hasLayerAttr(layer, "background_label_id"))
1671 layerParams.set("background_label_id", getLayerAttr(layer, "background_label_id").i());
1672 if (hasLayerAttr(layer, "nms_threshold"))
1673 layerParams.set("nms_threshold", getLayerAttr(layer, "nms_threshold").f());
1674 if (hasLayerAttr(layer, "top_k"))
1675 layerParams.set("top_k", getLayerAttr(layer, "top_k").i());
1676 if (hasLayerAttr(layer, "code_type"))
1677 layerParams.set("code_type", getLayerAttr(layer, "code_type").s());
1678 if (hasLayerAttr(layer, "keep_top_k"))
1679 layerParams.set("keep_top_k", getLayerAttr(layer, "keep_top_k").i());
1680 if (hasLayerAttr(layer, "confidence_threshold"))
1681 layerParams.set("confidence_threshold", getLayerAttr(layer, "confidence_threshold").f());
1682 if (hasLayerAttr(layer, "loc_pred_transposed"))
1683 layerParams.set("loc_pred_transposed", getLayerAttr(layer, "loc_pred_transposed").b());
1684 if (hasLayerAttr(layer, "clip"))
1685 layerParams.set("clip", getLayerAttr(layer, "clip").b());
1686 if (hasLayerAttr(layer, "variance_encoded_in_target"))
1687 layerParams.set("variance_encoded_in_target", getLayerAttr(layer, "variance_encoded_in_target").b());
1689 int id = dstNet.addLayer(name, "DetectionOutput", layerParams);
1690 layer_id[name] = id;
1691 for (int i = 0; i < 3; ++i)
1692 connect(layer_id, dstNet, parsePin(layer.input(i)), id, i);
1693 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1695 else if (type == "Softmax")
1697 if (hasLayerAttr(layer, "axis"))
1698 layerParams.set("axis", getLayerAttr(layer, "axis").i());
1700 int id = dstNet.addLayer(name, "Softmax", layerParams);
1701 layer_id[name] = id;
1702 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1704 else if (type == "CropAndResize")
1706 // op: "CropAndResize"
1710 CV_Assert(layer.input_size() == 3);
1712 Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2));
1713 CV_Assert(cropSize.type() == CV_32SC1, cropSize.total() == 2);
1715 layerParams.set("height", cropSize.at<int>(0));
1716 layerParams.set("width", cropSize.at<int>(1));
1718 int id = dstNet.addLayer(name, "CropAndResize", layerParams);
1719 layer_id[name] = id;
1721 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1722 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1724 else if (type == "Mean")
1726 Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));
1727 CV_Assert(indices.type() == CV_32SC1);
1729 if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
1730 CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
1732 layerParams.set("pool", "ave");
1733 layerParams.set("global_pooling", true);
1735 int id = dstNet.addLayer(name, "Pooling", layerParams);
1736 layer_id[name] = id;
1738 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1740 // There are two attributes, "keepdims" and a deprecated "keep_dims".
1741 bool keepDims = false;
1742 if (hasLayerAttr(layer, "keepdims"))
1743 keepDims = getLayerAttr(layer, "keepdims").b();
1744 else if (hasLayerAttr(layer, "keep_dims"))
1745 keepDims = getLayerAttr(layer, "keep_dims").b();
1749 LayerParams flattenLp;
1750 std::string flattenName = name + "/flatten";
1751 CV_Assert(layer_id.find(flattenName) == layer_id.end());
1752 int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
1753 layer_id[flattenName] = flattenId;
1754 connect(layer_id, dstNet, Pin(name), flattenId, 0);
1757 else if (type == "ClipByValue")
1759 // op: "ClipByValue"
1763 CV_Assert(layer.input_size() == 3);
1765 Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1));
1766 Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2));
1767 CV_Assert(minValue.total() == 1, minValue.type() == CV_32F,
1768 maxValue.total() == 1, maxValue.type() == CV_32F);
1770 layerParams.set("min_value", minValue.at<float>(0));
1771 layerParams.set("max_value", maxValue.at<float>(0));
1773 int id = dstNet.addLayer(name, "ReLU6", layerParams);
1774 layer_id[name] = id;
1776 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1778 else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
1779 type == "Relu" || type == "Elu" ||
1780 type == "Identity" || type == "Relu6")
1782 std::string dnnType = type;
1783 if (type == "Abs") dnnType = "AbsVal";
1784 else if (type == "Tanh") dnnType = "TanH";
1785 else if (type == "Relu") dnnType = "ReLU";
1786 else if (type == "Relu6") dnnType = "ReLU6";
1787 else if (type == "Elu") dnnType = "ELU";
1789 int id = dstNet.addLayer(name, dnnType, layerParams);
1790 layer_id[name] = id;
1791 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1795 // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer.
1796 // However we create a layer with the same type and rely that user defined a custom layer.
1798 // All the attributes are added to LayerParams.
1799 google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
1800 for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
1801 ai != attr.end(); ++ai)
1803 if (ai->second.value_case() == tensorflow::AttrValue::kS) // string
1804 layerParams.set(ai->first, ai->second.s());
1805 if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64
1806 layerParams.set(ai->first, ai->second.i());
1807 if (ai->second.value_case() == tensorflow::AttrValue::kF) // float
1808 layerParams.set(ai->first, ai->second.f());
1809 if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool
1810 layerParams.set(ai->first, ai->second.b());
1813 // All the Const input nodes are added to layer's blobs.
1814 std::vector<std::string> inputsNames;
1815 for (int i = 0; i < layer.input_size(); ++i)
1817 // Check if input is a Const node.
1818 if (value_id.find(layer.input(i)) != value_id.end())
1820 Mat blob = getTensorContent(getConstBlob(layer, value_id, i));
1821 layerParams.blobs.push_back(blob);
1824 inputsNames.push_back(layer.input(i));
1826 int id = dstNet.addLayer(name, type, layerParams);
1827 layer_id[name] = id;
1829 for (int i = 0; i < inputsNames.size(); ++i)
1831 connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i);
1835 dstNet.setInputsNames(netInputsNames);
1840 #endif //HAVE_PROTOBUF
1842 Net readNetFromTensorflow(const String &model, const String &config)
1844 TFImporter importer(model.c_str(), config.c_str());
1846 importer.populateNet(net);
1850 Net readNetFromTensorflow(const char* bufferModel, size_t lenModel,
1851 const char* bufferConfig, size_t lenConfig)
1853 TFImporter importer(bufferModel, lenModel, bufferConfig, lenConfig);
1855 importer.populateNet(net);
1859 Net readNetFromTensorflow(const std::vector<uchar>& bufferModel, const std::vector<uchar>& bufferConfig)
1861 const char* bufferModelPtr = reinterpret_cast<const char*>(&bufferModel[0]);
1862 const char* bufferConfigPtr = bufferConfig.empty() ? NULL :
1863 reinterpret_cast<const char*>(&bufferConfig[0]);
1864 return readNetFromTensorflow(bufferModelPtr, bufferModel.size(),
1865 bufferConfigPtr, bufferConfig.size());
1868 CV__DNN_EXPERIMENTAL_NS_END