1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
9 Implementation of Tensorflow models parser
12 #include "../precomp.hpp"
21 #include "tf_graph_simplifier.hpp"
26 CV__DNN_EXPERIMENTAL_NS_BEGIN
30 using ::google::protobuf::RepeatedField;
31 using ::google::protobuf::RepeatedPtrField;
32 using ::google::protobuf::Message;
33 using ::google::protobuf::Descriptor;
34 using ::google::protobuf::FieldDescriptor;
35 using ::google::protobuf::Reflection;
40 static int toNCHW(int idx)
42 CV_Assert(-4 <= idx && idx < 4);
43 if (idx == 0) return 0;
44 else if (idx > 0) return idx % 3 + 1;
45 else return (4 + idx) % 3 + 1;
48 // This values are used to indicate layer output's data layout where it's possible.
56 typedef std::vector<std::pair<String, int> > StrIntVector;
60 Pin(const std::string &_name, int _blobIndex = 0) :
61 name(_name), blobIndex(_blobIndex) {}
64 name(""), blobIndex(-1) {}
70 void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape)
73 if (tensor.has_tensor_shape())
75 const tensorflow::TensorShapeProto &_shape = tensor.tensor_shape();
76 int i, n = _shape.dim_size();
81 for (i = 0; i < n; i++)
82 shape[i] = (int)_shape.dim(i).size();
85 shape.resize(1, 1); // Scalar.
89 CV_Error(Error::StsError, "Unknown shape of input tensor");
94 void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
97 blobShapeFromTensor(tensor, shape);
98 int dims = (int)shape.size();
102 // REORDER blob NHWC to NCHW
103 swap(shape[2], shape[3]); // NHCW
104 swap(shape[1], shape[2]); // NCHW
107 dstBlob.create(shape, CV_32F);
109 Mat tensorContent = getTensorContent(tensor);
110 int size = tensorContent.total();
111 CV_Assert(size == (int)dstBlob.total());
113 float *dstData = dstBlob.ptr<float>();
114 const T *data = reinterpret_cast<const T*>(tensorContent.data);
118 int num = shape[0], channels = shape[1], height = shape[2], width = shape[3];
119 int total = num*channels*height*width;
120 for(int i_n = 0; i_n < shape[0]; i_n++) {
121 for(int i_c = 0; i_c < shape[1]; i_c++) {
122 for(int i_h = 0; i_h < shape[2]; i_h++) {
123 for(int i_w = 0; i_w < shape[3]; i_w++) {
124 int dst_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w;
125 int src_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w;
127 CV_Assert(dst_i < total);
128 CV_Assert(src_i < total);
130 dstData[dst_i] = data[src_i];
136 for (int i = 0; i < size; i++)
137 dstData[i] = data[i];
141 void blobFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
143 switch (tensor.dtype()) {
144 case tensorflow::DT_FLOAT:
145 case tensorflow::DT_HALF:
146 parseTensor<float>(tensor, dstBlob);
148 case tensorflow::DT_DOUBLE:
149 parseTensor<double>(tensor, dstBlob);
152 CV_Error(Error::StsError, "Tensor's data type is not supported");
157 void printList(const tensorflow::AttrValue::ListValue &val)
160 for (int i = 0; i < val.i_size(); i++)
161 std::cout << " " << val.i(i);
165 void printTensorShape(const tensorflow::TensorShapeProto &shape)
168 for (int d = 0; d < shape.dim_size(); d++)
169 std::cout << shape.dim(d).name() <<
170 ":" << shape.dim(d).size() << " ";
174 void printTensor(const tensorflow::TensorProto &tensor)
176 printTensorShape(tensor.tensor_shape());
178 if (tensor.tensor_content().empty())
181 switch (tensor.dtype())
183 case tensorflow::DT_FLOAT:
185 const float *data = reinterpret_cast<const float*>(tensor.tensor_content().c_str());
186 int size = tensor.tensor_content().size() / sizeof(float);
187 for (int i = 0; i < std::min(10, size); i++)
188 std::cout << " " << data[i];
190 std::cout << " ... " << size - 10 << " more";
193 case tensorflow::DT_INT32:
195 const int *data = reinterpret_cast<const int*>(tensor.tensor_content().c_str());
196 int size = tensor.tensor_content().size() / sizeof(int);
197 for (int i = 0; i < std::min(10, size); i++)
198 std::cout << " " << data[i];
200 std::cout << " ... " << size - 10 << " more";
204 CV_Error(Error::StsError, "Tensor type is not supported");
209 void printLayerAttr(const tensorflow::NodeDef &layer)
211 std::cout << std::endl << layer.name() << ":" << layer.op();
212 for (int ii = 0; ii < layer.input_size(); ii++)
213 std::cout << "(" << layer.input(ii) << ")";
214 std::cout << std::endl;
215 google::protobuf::Map<std::string, tensorflow::AttrValue> attr
217 for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
218 ai != attr.end(); ++ai)
220 std::cout << ai->first << ":";
221 if (ai->first == "dtype" || ai->first == "T")
222 std::cout << ai->second.i();
223 else if (ai->first == "padding")
224 std::cout << ai->second.s();
225 else if (ai->first == "transpose_a" || ai->first == "transpose_b")
226 std::cout << ai->second.b();
227 // else if (ai->first == "shape")
228 // printTensorShape(ai->second.shape());
229 else if (ai->first == "strides" || ai->first == "ksize")
230 printList(ai->second.list());
232 printTensor(ai->second.tensor());
233 std::cout << std::endl;
237 bool hasLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
239 google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
240 return attr.find(name) != attr.end();
243 const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
245 return layer.attr().at(name);
248 void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
250 if (hasLayerAttr(layer, "strides"))
252 const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
253 if (val.list().i_size() != 4 ||
254 val.list().i(0) != 1 || val.list().i(3) != 1)
255 CV_Error(Error::StsError, "Unsupported strides");
256 layerParams.set("stride_h", static_cast<int>(val.list().i(1)));
257 layerParams.set("stride_w", static_cast<int>(val.list().i(2)));
261 DictValue parseDims(const tensorflow::TensorProto &tensor) {
263 blobShapeFromTensor(tensor, shape);
264 int dims = (int)shape.size();
266 CV_Assert(tensor.dtype() == tensorflow::DT_INT32);
267 CV_Assert(dims == 1);
269 Mat values = getTensorContent(tensor);
270 CV_Assert(values.type() == CV_32SC1);
271 // TODO: add reordering shape if dims == 4
272 return DictValue::arrayInt((int*)values.data, values.total());
275 void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
277 if (hasLayerAttr(layer, "ksize"))
279 const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
280 if (val.list().i_size() != 4 ||
281 val.list().i(0) != 1 || val.list().i(3) != 1)
282 CV_Error(Error::StsError, "Unsupported ksize");
283 layerParams.set("kernel_h", static_cast<int>(val.list().i(1)));
284 layerParams.set("kernel_w", static_cast<int>(val.list().i(2)));
288 layerParams.set("kernel_h", 1);
289 layerParams.set("kernel_w", 1);
293 void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
295 if (hasLayerAttr(layer, "padding"))
296 layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
299 Pin parsePin(const std::string &name)
303 size_t delimiter_pos = name.find_first_of(":");
304 if (delimiter_pos != std::string::npos)
306 pin.name = name.substr(0, delimiter_pos);
307 std::istringstream(name.substr(delimiter_pos + 1)) >> pin.blobIndex;
313 StrIntVector getNextLayers(const tensorflow::GraphDef& net, const String& layer_name, const String& type = "")
317 for (int li = 0; li < net.node_size(); li++)
319 const tensorflow::NodeDef& layer = net.node(li);
320 for (int input_id = 0; input_id < layer.input_size(); input_id++) {
321 String input_op_name = parsePin(layer.input(input_id)).name;
322 bool type_ok = type.empty() ? true : type == layer.op();
323 if (input_op_name == layer_name && type_ok)
324 layers.push_back(std::make_pair(layer.name(), li));
331 void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int input_blob_index, bool remove_from_net = true) {
332 String layer_name = net.node(layer_index).name();
333 StrIntVector layers = getNextLayers(net, layer_name);
335 String removed_layer_input = net.node(layer_index).input(input_blob_index);
337 for (size_t i = 0; i < layers.size(); i++)
339 tensorflow::NodeDef* layer = net.mutable_node(layers[i].second);
340 for (int input_id = 0; input_id < layer->input_size(); input_id++) {
341 String input_op_name = layer->input(input_id);
343 if (input_op_name == layer_name) {
344 layer->set_input(input_id, removed_layer_input);
350 net.mutable_node()->DeleteSubrange(layer_index, 1);
355 TFImporter(const char *model, const char *config = NULL);
356 TFImporter(const char *dataModel, size_t lenModel,
357 const char *dataConfig = NULL, size_t lenConfig = 0);
359 void populateNet(Net dstNet);
362 void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob);
364 void connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
365 const int input_layer_id, const int input_blob_id);
366 void connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
367 const int input_layer_id, const int input_blobs_count);
368 const tensorflow::TensorProto& getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
369 int input_blob_index = -1, int* actual_inp_blob_idx = 0);
372 // Binary serialized TensorFlow graph includes weights.
373 tensorflow::GraphDef netBin;
374 // Optional text definition of TensorFlow graph. More flexible than binary format
375 // and may be used to build the network using binary format only as a weights storage.
376 // This approach is similar to Caffe's `.prorotxt` and `.caffemodel`.
377 tensorflow::GraphDef netTxt;
380 TFImporter::TFImporter(const char *model, const char *config)
382 if (model && model[0])
383 ReadTFNetParamsFromBinaryFileOrDie(model, &netBin);
384 if (config && config[0])
385 ReadTFNetParamsFromTextFileOrDie(config, &netTxt);
388 TFImporter::TFImporter(const char *dataModel, size_t lenModel,
389 const char *dataConfig, size_t lenConfig)
391 if (dataModel != NULL && lenModel > 0)
392 ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin);
393 if (dataConfig != NULL && lenConfig > 0)
394 ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt);
397 void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
400 blobShapeFromTensor(tensor, shape);
401 int dims = (int)shape.size();
403 // TODO: other blob types
404 CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||
405 tensor.dtype() == tensorflow::DT_HALF);
406 CV_Assert(dims == 4);
408 // REORDER kernel HWIO to OIHW
409 swap(shape[0], shape[2]); // IWHO
410 swap(shape[1], shape[3]); // IOHW
411 swap(shape[0], shape[1]); // OIHW
413 dstBlob.create(shape, CV_32F);
415 Mat tensorContent = getTensorContent(tensor);
416 int size = tensorContent.total();
417 CV_Assert(size == (int)dstBlob.total());
419 float *dstData = dstBlob.ptr<float>();
420 const float *data = reinterpret_cast<const float*>(tensorContent.data);
422 int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3];
423 int total = out_c*input_c*height*width;
424 for(int i_oc = 0; i_oc < out_c; i_oc++) {
425 for(int i_ic = 0; i_ic < input_c; i_ic++) {
426 for(int i_h = 0; i_h < height; i_h++) {
427 for(int i_w = 0; i_w < width; i_w++) {
428 int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w;
429 int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc;
430 CV_Assert(dst_i < total);
431 CV_Assert(src_i < total);
432 dstData[dst_i] = data[src_i];
439 void TFImporter::connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
440 const int input_layer_id, const int input_blob_id)
442 std::map<String, int>::const_iterator it = layers_name_id_map.find(outPin.name);
443 if (it == layers_name_id_map.end())
444 CV_Error(Error::StsError, "Input layer not found: " + outPin.name);
445 network.connect(it->second, outPin.blobIndex, input_layer_id, input_blob_id);
448 void TFImporter::connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
449 const int input_layer_id, const int input_blobs_count)
451 for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++)
452 connect(layer_id, network, outPin, input_layer_id, input_blob_id);
455 const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
456 int input_blob_index, int* actual_inp_blob_idx) {
457 if (input_blob_index == -1) {
458 for(int i = 0; i < layer.input_size(); i++) {
459 Pin input = parsePin(layer.input(i));
460 if (const_layers.find(input.name) != const_layers.end()) {
461 if (input_blob_index != -1)
462 CV_Error(Error::StsError, "More than one input is Const op");
464 input_blob_index = i;
469 if (input_blob_index == -1)
470 CV_Error(Error::StsError, "Const input blob for weights not found");
472 Pin kernel_inp = parsePin(layer.input(input_blob_index));
473 if (const_layers.find(kernel_inp.name) == const_layers.end())
474 CV_Error(Error::StsError, "Const kernel input not found");
475 if (kernel_inp.blobIndex != 0)
476 CV_Error(Error::StsError, "Unsupported kernel input");
478 if(actual_inp_blob_idx) {
479 *actual_inp_blob_idx = input_blob_index;
482 int nodeIdx = const_layers.at(kernel_inp.name);
483 if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name)
485 return netBin.node(nodeIdx).attr().at("value").tensor();
489 CV_Assert(nodeIdx < netTxt.node_size(),
490 netTxt.node(nodeIdx).name() == kernel_inp.name);
491 return netTxt.node(nodeIdx).attr().at("value").tensor();
495 static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& const_layers,
496 std::set<String>& layers_to_ignore)
498 for (int li = 0; li < net.node_size(); li++)
500 const tensorflow::NodeDef &layer = net.node(li);
501 String name = layer.name();
502 String type = layer.op();
504 if (type == "Dequantize")
506 // Example of Dequantize node:
507 // name: "conv2d_1/bias"
509 // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8)
510 // input: "conv2d_1/bias_quantized_min"
511 // input: "conv2d_1/bias_quantized_max"
512 // attr { key: "T" value { type: DT_QUINT8 } } (quantized type)
513 // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique)
514 CV_Assert(layer.input_size() == 3);
515 for (int i = 0; i < 3; ++i)
516 CV_Assert(const_layers.find(layer.input(i)) != const_layers.end());
517 CV_Assert(hasLayerAttr(layer, "mode") &&
518 getLayerAttr(layer, "mode").s() == "MIN_FIRST");
520 int tensorId = const_layers[layer.input(0)];
521 int minId = const_layers[layer.input(1)];
522 int maxId = const_layers[layer.input(2)];
524 tensorflow::TensorProto* tensor = net.mutable_node(tensorId)
525 ->mutable_attr()->at("value")
527 CV_Assert(tensor->dtype() == tensorflow::DT_QUINT8);
529 Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor());
530 Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor());
531 CV_Assert(qMin.total() == 1, qMin.type() == CV_32FC1,
532 qMax.total() == 1, qMax.type() == CV_32FC1);
534 Mat content = getTensorContent(*tensor);
536 float minVal = qMin.at<float>(0);
537 float rangeScale = (qMax.at<float>(0) - minVal) / 255;
538 CV_Assert(rangeScale >= 0);
539 content.convertTo(content, CV_32FC1, rangeScale,
540 rangeScale * cvRound(minVal / rangeScale));
542 tensor->set_dtype(tensorflow::DT_FLOAT);
543 tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
545 net.mutable_node(tensorId)->set_name(name);
546 CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
547 layers_to_ignore.insert(name);
550 else if (type != "Const")
551 continue; // only Const parameters are supported
553 if (layer.attr().find("value") != layer.attr().end())
555 CV_Assert(const_layers.insert(std::make_pair(name, li)).second);
557 layers_to_ignore.insert(name);
561 // If all inputs of specific layer have the same data layout we can say that
562 // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
563 static int predictOutputDataLayout(const tensorflow::NodeDef& layer, const std::map<String, int>& data_layouts)
565 if (hasLayerAttr(layer, "data_format"))
567 std::string format = getLayerAttr(layer, "data_format").s();
568 if (format == "NHWC" || format == "channels_last")
569 return DATA_LAYOUT_NHWC;
570 else if (format == "NCHW" || format == "channels_first")
571 return DATA_LAYOUT_NCHW;
573 CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
576 // Determine layout by layer's inputs
577 int layout = DATA_LAYOUT_UNKNOWN;
578 std::map<String, int>::const_iterator it;
579 for (int i = 0, n = layer.input_size(); i < n; ++i)
581 it = data_layouts.find(layer.input(i).substr(0, layer.input(i).rfind(':')));
582 if (it != data_layouts.end())
584 if (it->second == DATA_LAYOUT_UNKNOWN)
585 return DATA_LAYOUT_UNKNOWN;
586 else if (it->second != layout)
588 if (layout == DATA_LAYOUT_UNKNOWN)
591 return DATA_LAYOUT_UNKNOWN;
598 void TFImporter::populateNet(Net dstNet)
600 RemoveIdentityOps(netBin);
601 RemoveIdentityOps(netTxt);
603 if (!netTxt.ByteSize())
604 simplifySubgraphs(netBin);
606 std::set<String> layers_to_ignore;
608 tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
610 int layersSize = net.node_size();
612 std::map<String, int> data_layouts;
614 // find all Const layers for params
615 std::map<String, int> value_id;
616 addConstNodes(netBin, value_id, layers_to_ignore);
617 addConstNodes(netTxt, value_id, layers_to_ignore);
619 std::map<String, int> layer_id;
621 for (int li = 0; li < layersSize; li++)
623 tensorflow::NodeDef layer = net.node(li);
624 String name = layer.name();
625 String type = layer.op();
626 LayerParams layerParams;
628 if(layers_to_ignore.find(name) != layers_to_ignore.end())
631 data_layouts[name] = predictOutputDataLayout(layer, data_layouts);
633 if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative")
635 // The first node of dilated convolution subgraph.
636 // Extract input node, dilation rate and paddings.
637 std::string input = layer.input(0);
638 if (type == "SpaceToBatchND")
640 // op: "SpaceToBatchND"
642 // input: "SpaceToBatchND/block_shape"
643 // input: "SpaceToBatchND/paddings"
644 CV_Assert(layer.input_size() == 3);
646 DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
647 CV_Assert(dilation.size() == 2);
648 layerParams.set("dilation_h", dilation.get<int>(0));
649 layerParams.set("dilation_w", dilation.get<int>(1));
652 parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
654 // paddings is a 2x2 matrix: [[top, bot], [left, right]]
655 layerParams.set("pad_h", paddings.at<float>(0));
656 layerParams.set("pad_w", paddings.at<float>(2));
658 StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
659 if (next_layers.empty())
661 next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
663 CV_Assert(next_layers.size() == 1);
664 layer = net.node(next_layers[0].second);
665 layers_to_ignore.insert(next_layers[0].first);
670 layerParams.set("bias_term", false);
671 layerParams.blobs.resize(1);
673 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
674 if (next_layers.size() == 1) {
675 layerParams.set("bias_term", true);
676 layerParams.blobs.resize(2);
678 int weights_layer_index = next_layers[0].second;
680 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
681 ExcludeLayer(net, weights_layer_index, 0, false);
682 layers_to_ignore.insert(next_layers[0].first);
685 const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id);
686 kernelFromTensor(kernelTensor, layerParams.blobs[0]);
687 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
688 int* kshape = layerParams.blobs[0].size.p;
689 if (type == "DepthwiseConv2dNative")
691 const int chMultiplier = kshape[0];
692 const int inCh = kshape[1];
693 const int height = kshape[2];
694 const int width = kshape[3];
696 Mat copy = layerParams.blobs[0].clone();
697 float* src = (float*)copy.data;
698 float* dst = (float*)layerParams.blobs[0].data;
699 for (int i = 0; i < chMultiplier; ++i)
700 for (int j = 0; j < inCh; ++j)
701 for (int s = 0; s < height * width; ++s)
703 int src_i = (i * inCh + j) * height * width + s;
704 int dst_i = (j * chMultiplier + i) * height* width + s;
705 dst[dst_i] = src[src_i];
707 // TODO Use reshape instead
708 kshape[0] = inCh * chMultiplier;
710 size_t* kstep = layerParams.blobs[0].step.p;
711 kstep[0] = kstep[1]; // fix steps too
713 layerParams.set("kernel_h", kshape[2]);
714 layerParams.set("kernel_w", kshape[3]);
715 layerParams.set("num_output", kshape[0]);
717 setStrides(layerParams, layer);
718 setPadding(layerParams, layer);
720 // The final node of dilated convolution subgraph.
721 next_layers = getNextLayers(net, name, "BatchToSpaceND");
722 if (!next_layers.empty())
724 layerParams.set("pad_mode", ""); // We use padding values.
725 CV_Assert(next_layers.size() == 1);
726 ExcludeLayer(net, next_layers[0].second, 0, false);
727 layers_to_ignore.insert(next_layers[0].first);
730 int id = dstNet.addLayer(name, "Convolution", layerParams);
734 connect(layer_id, dstNet, parsePin(input), id, 0);
736 if (data_layouts[name] == DATA_LAYOUT_UNKNOWN)
737 data_layouts[name] = DATA_LAYOUT_NHWC;
739 else if (type == "BiasAdd" || type == "Add")
741 bool haveConst = false;
742 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
744 Pin input = parsePin(layer.input(ii));
745 haveConst = value_id.find(input.name) != value_id.end();
747 CV_Assert(!haveConst || layer.input_size() == 2);
751 Mat values = getTensorContent(getConstBlob(layer, value_id));
752 CV_Assert(values.type() == CV_32FC1);
755 if (values.total() == 1) // is a scalar.
757 layerParams.set("shift", values.at<float>(0));
758 id = dstNet.addLayer(name, "Power", layerParams);
762 layerParams.blobs.resize(1, values);
763 id = dstNet.addLayer(name, "Shift", layerParams);
768 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
772 layerParams.set("operation", "sum");
773 int id = dstNet.addLayer(name, "Eltwise", layerParams);
776 for (int ii = 0; ii < layer.input_size(); ii++)
778 Pin inp = parsePin(layer.input(ii));
779 if (layer_id.find(inp.name) == layer_id.end())
780 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
781 dstNet.connect(layer_id.at(inp.name), inp.blobIndex, id, ii);
785 else if (type == "Sub")
787 bool haveConst = false;
788 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
790 Pin input = parsePin(layer.input(ii));
791 haveConst = value_id.find(input.name) != value_id.end();
793 CV_Assert(haveConst);
795 Mat values = getTensorContent(getConstBlob(layer, value_id));
796 CV_Assert(values.type() == CV_32FC1);
800 if (values.total() == 1) // is a scalar.
802 layerParams.set("shift", values.at<float>(0));
803 id = dstNet.addLayer(name, "Power", layerParams);
807 layerParams.blobs.resize(1, values);
808 id = dstNet.addLayer(name, "Shift", layerParams);
813 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
815 else if (type == "MatMul")
817 CV_Assert(layer.input_size() == 2);
819 layerParams.set("bias_term", false);
820 layerParams.blobs.resize(1);
822 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
823 if (next_layers.empty())
825 next_layers = getNextLayers(net, name, "Add");
827 if (next_layers.size() == 1) {
828 layerParams.set("bias_term", true);
829 layerParams.blobs.resize(2);
831 int weights_layer_index = next_layers[0].second;
832 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
833 ExcludeLayer(net, weights_layer_index, 0, false);
834 layers_to_ignore.insert(next_layers[0].first);
837 int kernel_blob_index = -1;
838 const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index);
839 blobFromTensor(kernelTensor, layerParams.blobs[0]);
840 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
842 if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed
843 Mat data = layerParams.blobs[0].t();
844 layerParams.blobs[0] = data.clone();
847 layerParams.set("num_output", layerParams.blobs[0].size[0]);
849 int id = dstNet.addLayer(name, "InnerProduct", layerParams);
853 int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
854 connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
855 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
857 else if (type == "Reshape")
859 Pin inpId = parsePin(layer.input(0));
860 Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));
862 if (newShape.total() != 4 && data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
865 int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
866 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
868 std::string permName = name + "/nchw";
869 CV_Assert(layer_id.find(permName) == layer_id.end());
870 int permId = dstNet.addLayer(permName, "Permute", permLP);
871 layer_id[permName] = permId;
872 connect(layer_id, dstNet, inpId, permId, 0);
873 inpId = Pin(permName);
875 else if (newShape.total() == 4 && data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
878 std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
879 std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));
881 layerParams.set("dim", DictValue::arrayInt<int*>(newShape.ptr<int>(), newShape.total()));
883 int id = dstNet.addLayer(name, "Reshape", layerParams);
887 connect(layer_id, dstNet, inpId, id, 0);
889 else if (type == "Flatten" || type == "Squeeze")
891 Pin inpId = parsePin(layer.input(0));
892 int inpLayout = data_layouts[layer.input(0)];
893 if (type == "Squeeze")
895 CV_Assert(hasLayerAttr(layer, "squeeze_dims"));
896 const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims");
897 if (inpLayout == DATA_LAYOUT_NHWC)
899 if (dims.list().i_size() != 2 || dims.list().i(0) != 1 || dims.list().i(1) != 2)
900 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
902 else if (inpLayout == DATA_LAYOUT_NCHW)
904 if (dims.list().i_size() != 2 || dims.list().i(0) != 2 || dims.list().i(1) != 3)
905 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
908 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
910 if (inpLayout == DATA_LAYOUT_NHWC)
913 int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
914 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
916 std::string permName = name + "/nchw";
917 CV_Assert(layer_id.find(permName) == layer_id.end());
918 int permId = dstNet.addLayer(permName, "Permute", permLP);
919 layer_id[permName] = permId;
920 connect(layer_id, dstNet, inpId, permId, 0);
921 inpId = Pin(permName);
923 int id = dstNet.addLayer(name, "Flatten", layerParams);
925 connect(layer_id, dstNet, inpId, id, 0);
926 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
928 else if (type == "Transpose")
930 Mat perm = getTensorContent(getConstBlob(layer, value_id, 1));
931 CV_Assert(perm.type() == CV_32SC1);
932 int* permData = (int*)perm.data;
933 if (perm.total() == 4)
935 // Only NHWC <-> NCHW permutations are allowed. OpenCV is always
936 // keep NCHW layout this way.
937 if (data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
939 if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
941 // in TensorFlow: NHWC->NCHW
942 // in OpenCV: NCHW->NCHW
943 data_layouts[name] = DATA_LAYOUT_NCHW;
945 else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
947 // in TensorFlow: NHWC->NHWC
948 // in OpenCV: NCHW->NCHW
949 data_layouts[name] = DATA_LAYOUT_NHWC;
952 CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
954 else if (data_layouts[layer.input(0)] == DATA_LAYOUT_NCHW)
956 if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
958 // in TensorFlow: NCHW->NHWC
959 // in OpenCV: NCHW->NCHW
960 data_layouts[name] = DATA_LAYOUT_NHWC;
962 else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
964 // in TensorFlow: NCHW->NCHW
965 // in OpenCV: NCHW->NCHW
966 data_layouts[name] = DATA_LAYOUT_NCHW;
969 CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
971 int id = dstNet.addLayer(name, "Identity", layerParams);
973 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
977 layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
979 int id = dstNet.addLayer(name, "Permute", layerParams);
983 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
984 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
987 else if (type == "Const")
990 else if (type == "LRN")
992 if(hasLayerAttr(layer, "alpha")) {
993 layerParams.set("alpha", getLayerAttr(layer, "alpha").f());
995 if(hasLayerAttr(layer, "beta")) {
996 layerParams.set("beta", getLayerAttr(layer, "beta").f());
998 if(hasLayerAttr(layer, "depth_radius")) {
999 int radius = (int)getLayerAttr(layer, "depth_radius").i();
1000 layerParams.set("local_size", 2*radius + 1);
1002 if(hasLayerAttr(layer, "bias")) {
1003 layerParams.set("bias", getLayerAttr(layer, "bias").f());
1005 layerParams.set("norm_by_size", false);
1007 int id = dstNet.addLayer(name, "LRN", layerParams);
1008 layer_id[name] = id;
1010 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1012 else if (type == "Concat" || type == "ConcatV2")
1014 int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
1015 int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
1016 layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW(axis) : axis);
1018 int id = dstNet.addLayer(name, "Concat", layerParams);
1019 layer_id[name] = id;
1022 int from = (type == "Concat" ? 1 : 0);
1023 int to = (type == "Concat" ? layer.input_size() : layer.input_size() - 1);
1025 // input(0) or input(n-1) is concat_dim
1026 for (int ii = from; ii < to; ii++)
1028 Pin inp = parsePin(layer.input(ii));
1029 if (layer_id.find(inp.name) == layer_id.end())
1030 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1031 dstNet.connect(layer_id.at(inp.name), inp.blobIndex, id, ii - from);
1034 else if (type == "MaxPool")
1036 layerParams.set("pool", "max");
1038 setKSize(layerParams, layer);
1039 setStrides(layerParams, layer);
1040 setPadding(layerParams, layer);
1042 int id = dstNet.addLayer(name, "Pooling", layerParams);
1043 layer_id[name] = id;
1045 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1047 else if (type == "AvgPool")
1049 layerParams.set("pool", "ave");
1050 layerParams.set("ave_pool_padded_area", false);
1052 setKSize(layerParams, layer);
1053 setStrides(layerParams, layer);
1054 setPadding(layerParams, layer);
1056 int id = dstNet.addLayer(name, "Pooling", layerParams);
1057 layer_id[name] = id;
1059 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1061 else if (type == "Placeholder")
1063 std::vector<String> netInputs(1);
1064 netInputs[0] = name;
1066 dstNet.setInputsNames(netInputs);
1068 else if (type == "Split") {
1069 // TODO: determining axis index remapping by input dimensions order of input blob
1070 // TODO: slicing input may be Const op
1071 // TODO: slicing kernels for convolutions - in current implementation it is impossible
1072 // TODO: add parsing num of slices parameter
1073 CV_Assert(layer.input_size() == 2);
1075 // 1st blob is dims tensor
1076 int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
1077 layerParams.set("axis", toNCHW(axis));
1079 int id = dstNet.addLayer(name, "Slice", layerParams);
1080 layer_id[name] = id;
1083 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1085 else if (type == "Slice")
1088 // input: "input_node"
1089 // input: "Slice/begin"
1090 // input: "Slice/size"
1091 CV_Assert(layer.input_size() == 3);
1092 Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));
1093 Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2));
1094 CV_Assert(!begins.empty(), !sizes.empty(), begins.type() == CV_32SC1,
1095 sizes.type() == CV_32SC1);
1097 if (begins.total() == 4 && data_layouts[name] == DATA_LAYOUT_NHWC)
1099 // Swap NHWC parameters' order to NCHW.
1100 std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
1101 std::swap(*begins.ptr<int32_t>(0, 1), *begins.ptr<int32_t>(0, 2));
1102 std::swap(*sizes.ptr<int32_t>(0, 2), *sizes.ptr<int32_t>(0, 3));
1103 std::swap(*sizes.ptr<int32_t>(0, 1), *sizes.ptr<int32_t>(0, 2));
1105 layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
1106 layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total()));
1108 int id = dstNet.addLayer(name, "Slice", layerParams);
1109 layer_id[name] = id;
1111 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1113 else if (type == "Mul")
1115 bool haveConst = false;
1116 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
1118 Pin input = parsePin(layer.input(ii));
1119 haveConst = value_id.find(input.name) != value_id.end();
1121 CV_Assert(!haveConst || layer.input_size() == 2);
1125 // Multiplication by constant.
1126 CV_Assert(layer.input_size() == 2);
1127 Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
1128 CV_Assert(scaleMat.type() == CV_32FC1);
1131 if (scaleMat.total() == 1) // is a scalar.
1133 // Try to match with a LeakyRelu:
1135 // name: "LeakyRelu/mul"
1137 // input: "LeakyRelu/alpha"
1141 // name: "LeakyRelu/Maximum"
1143 // input: "LeakyRelu/mul"
1146 StrIntVector next_layers = getNextLayers(net, name, "Maximum");
1147 if (!next_layers.empty())
1149 int maximumLayerIdx = next_layers[0].second;
1150 ExcludeLayer(net, maximumLayerIdx, 0, false);
1151 layers_to_ignore.insert(next_layers[0].first);
1153 layerParams.set("negative_slope", scaleMat.at<float>(0));
1154 id = dstNet.addLayer(name, "ReLU", layerParams);
1158 // Just a multiplication.
1159 layerParams.set("scale", scaleMat.at<float>(0));
1160 id = dstNet.addLayer(name, "Power", layerParams);
1165 layerParams.blobs.resize(1, scaleMat);
1167 StrIntVector next_layers = getNextLayers(net, name, "Add");
1168 if (!next_layers.empty())
1170 layerParams.set("bias_term", true);
1171 layerParams.blobs.resize(2);
1173 int weights_layer_index = next_layers[0].second;
1174 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back());
1175 ExcludeLayer(net, weights_layer_index, 0, false);
1176 layers_to_ignore.insert(next_layers[0].first);
1179 if (hasLayerAttr(layer, "axis"))
1180 layerParams.set("axis", getLayerAttr(layer, "axis").i());
1182 id = dstNet.addLayer(name, "Scale", layerParams);
1184 layer_id[name] = id;
1186 Pin inp0 = parsePin(layer.input(0));
1187 if (layer_id.find(inp0.name) != layer_id.end())
1188 // First operand is a constant.
1189 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1191 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1195 layerParams.set("operation", "prod");
1196 int id = dstNet.addLayer(name, "Eltwise", layerParams);
1197 layer_id[name] = id;
1199 for (int ii = 0; ii < layer.input_size(); ii++)
1201 Pin inp = parsePin(layer.input(ii));
1202 if (layer_id.find(inp.name) == layer_id.end())
1203 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1204 dstNet.connect(layer_id.at(inp.name), inp.blobIndex, id, ii);
1208 else if (type == "Pad")
1210 Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
1211 CV_Assert(paddings.type() == CV_32SC1);
1212 if (paddings.total() == 8)
1214 // Perhabs, we have NHWC padding dimensions order.
1217 std::swap(*paddings.ptr<int32_t>(0, 2), *paddings.ptr<int32_t>(0, 6));
1218 std::swap(*paddings.ptr<int32_t>(0, 3), *paddings.ptr<int32_t>(0, 7));
1221 std::swap(*paddings.ptr<int32_t>(0, 4), *paddings.ptr<int32_t>(0, 6));
1222 std::swap(*paddings.ptr<int32_t>(0, 5), *paddings.ptr<int32_t>(0, 7));
1226 layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
1228 int id = dstNet.addLayer(name, "Padding", layerParams);
1229 layer_id[name] = id;
1231 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1233 else if (type == "FusedBatchNorm")
1235 // op: "FusedBatchNorm"
1237 // input: "BatchNorm/gamma"
1238 // input: "BatchNorm/beta"
1239 // input: "BatchNorm/moving_mean"
1240 // input: "BatchNorm/moving_variance"
1241 if (layer.input_size() != 5)
1242 CV_Error(Error::StsNotImplemented,
1243 "Expected gamma, beta, mean and std");
1244 Pin inpId = parsePin(layer.input(0));
1246 bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
1248 layerParams.blobs.resize(2);
1250 const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1);
1251 if (!gammaTensor.tensor_content().empty())
1253 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1254 layerParams.set("has_weight", true);
1255 blobFromTensor(gammaTensor, layerParams.blobs.back());
1258 layerParams.set("has_weight", false);
1260 const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2);
1261 if (!betaTensor.tensor_content().empty())
1263 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1264 layerParams.set("has_bias", true);
1265 blobFromTensor(betaTensor, layerParams.blobs.back());
1268 layerParams.set("has_bias", false);
1273 if (layerParams.blobs.size() == 2)
1274 CV_Error(Error::StsNotImplemented, "Cannot determine number "
1275 "of parameters for batch normalization layer.");
1276 mean = Mat::zeros(1, layerParams.blobs[3].total(), CV_32F);
1277 std = Mat::ones(1, layerParams.blobs[3].total(), CV_32F);
1279 // Add an extra layer: Mean-Variance normalization
1280 LayerParams mvnParams;
1281 std::string mvnName = name + "/MVN";
1282 CV_Assert(layer_id.find(mvnName) == layer_id.end());
1283 int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
1284 layer_id[mvnName] = mvnId;
1285 connect(layer_id, dstNet, inpId, mvnId, 0);
1286 inpId = Pin(mvnName);
1290 blobFromTensor(getConstBlob(layer, value_id, 3), mean);
1291 blobFromTensor(getConstBlob(layer, value_id, 4), std);
1293 layerParams.blobs[0] = mean;
1294 layerParams.blobs[1] = std;
1296 if (hasLayerAttr(layer, "epsilon"))
1297 layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
1299 int id = dstNet.addLayer(name, "BatchNorm", layerParams);
1300 layer_id[name] = id;
1303 connect(layer_id, dstNet, inpId, id, 0);
1305 else if (type == "Conv2DBackpropInput")
1307 // op: "Conv2DBackpropInput"
1308 // input: "conv2d_transpose/output_shape"
1311 if (layer.input_size() != 3)
1312 CV_Error(Error::StsNotImplemented,
1313 "Expected output shape, weights and input nodes");
1315 layerParams.set("bias_term", false);
1316 layerParams.blobs.resize(1);
1318 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
1319 if (next_layers.size() == 1)
1321 layerParams.set("bias_term", true);
1322 layerParams.blobs.resize(2);
1324 int weights_layer_index = next_layers[0].second;
1326 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1327 ExcludeLayer(net, weights_layer_index, 0, false);
1328 layers_to_ignore.insert(next_layers[0].first);
1331 kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
1333 const int* kshape = layerParams.blobs[0].size.p;
1334 const int kernelH = kshape[2];
1335 const int kernelW = kshape[3];
1336 layerParams.set("kernel_h", kernelH);
1337 layerParams.set("kernel_w", kernelW);
1338 layerParams.set("num_output", kshape[1]);
1340 setStrides(layerParams, layer);
1341 setPadding(layerParams, layer);
1343 // For convolution layer, output shape computes as
1344 // o = 1 + (i - k + 2*p) / s
1345 // i - input size, o - output size, k - kernel size, p - pad, s - stride
1346 // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
1347 // considering that k is odd.
1348 // SAME: o = 1 + (i - 1) / s
1349 // VALID: o = 1 + i / s
1350 // Deconvolution's layer output shape computes as
1351 // SAME: o = 1 + (i - 1)*s
1352 // VALID: o = (i - 1)*s
1353 // If output_shape differs from formulas above then adjust padding is applied.
1355 const int strideY = layerParams.get<int>("stride_h");
1356 const int strideX = layerParams.get<int>("stride_w");
1357 Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
1358 const int outH = outShape.at<int>(1);
1359 const int outW = outShape.at<int>(2);
1360 if (layerParams.get<String>("pad_mode") == "SAME")
1362 layerParams.set("adj_w", (outW - 1) % strideX);
1363 layerParams.set("adj_h", (outH - 1) % strideY);
1365 else if (layerParams.get<String>("pad_mode") == "VALID")
1367 layerParams.set("adj_w", (outW - kernelW) % strideX);
1368 layerParams.set("adj_h", (outH - kernelH) % strideY);
1370 int id = dstNet.addLayer(name, "Deconvolution", layerParams);
1371 layer_id[name] = id;
1374 connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
1376 else if (type == "BlockLSTM")
1379 // input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps)
1381 // input: "lstm_block_wrapper/zeros" (ignore)
1382 // input: "lstm_block_wrapper/zeros" (ignore)
1383 // input: "lstm_block_wrapper/kernel"
1384 // input: "lstm_block_wrapper/w_i_diag"
1385 // input: "lstm_block_wrapper/w_f_diag"
1386 // input: "lstm_block_wrapper/w_o_diag"
1387 // input: "lstm_block_wrapper/bias"
1388 if (layer.input_size() != 9)
1389 CV_Error(Error::StsNotImplemented, "Unexpected number of input nodes");
1391 if (hasLayerAttr(layer, "forget_bias"))
1392 layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f());
1394 if (hasLayerAttr(layer, "forget_bias"))
1396 float cellClip = getLayerAttr(layer, "cell_clip").f();
1397 // Cell clip disabled if it's negative.
1400 layerParams.set("use_cell_clip", true);
1401 layerParams.set("cell_clip", cellClip);
1406 blobFromTensor(getConstBlob(layer, value_id, 4), W);
1407 blobFromTensor(getConstBlob(layer, value_id, 8), b);
1408 const int outSize = W.cols / 4;
1411 float* weightData = (float*)W.data;
1412 for (int i = 0; i < W.rows; ++i)
1413 for (int j = 0; j < outSize; ++j)
1415 std::swap(weightData[i * W.cols + 1 * outSize + j],
1416 weightData[i * W.cols + 2 * outSize + j]);
1417 std::swap(weightData[i * W.cols + 2 * outSize + j],
1418 weightData[i * W.cols + 3 * outSize + j]);
1420 Wx = W.rowRange(0, W.rows - outSize).t();
1421 Wh = W.rowRange(W.rows - outSize, W.rows).t();
1423 layerParams.blobs.resize(3);
1424 layerParams.blobs[0] = Wh;
1425 layerParams.blobs[1] = Wx;
1426 layerParams.blobs[2] = b;
1428 if (hasLayerAttr(layer, "use_peephole"))
1430 bool usePeephole = getLayerAttr(layer, "use_peephole").b();
1433 layerParams.set("use_peephole", true);
1434 layerParams.blobs.resize(6);
1435 for (int i = 0; i < 3; ++i)
1438 blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
1439 w = w.reshape(1, w.total()); // Single column.
1440 w = Mat::diag(w); // Make a diagonal matrix.
1441 layerParams.blobs[3 + i] = w;
1446 int id = dstNet.addLayer(name, "LSTM", layerParams);
1447 layer_id[name] = id;
1450 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1451 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1453 else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear")
1455 if (layer.input_size() == 2)
1457 Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
1458 CV_Assert(outSize.type() == CV_32SC1, outSize.total() == 2);
1459 layerParams.set("height", outSize.at<int>(0, 0));
1460 layerParams.set("width", outSize.at<int>(0, 1));
1462 else if (layer.input_size() == 3)
1464 Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1));
1465 Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2));
1466 CV_Assert(factorHeight.type() == CV_32SC1, factorHeight.total() == 1,
1467 factorWidth.type() == CV_32SC1, factorWidth.total() == 1);
1468 layerParams.set("zoom_factor_x", factorWidth.at<int>(0));
1469 layerParams.set("zoom_factor_y", factorHeight.at<int>(0));
1472 CV_Assert(layer.input_size() == 2 || layer.input_size() == 3);
1474 if (type == "ResizeNearestNeighbor")
1475 layerParams.set("interpolation", "nearest");
1477 layerParams.set("interpolation", "bilinear");
1479 if (hasLayerAttr(layer, "align_corners"))
1480 layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());
1482 int id = dstNet.addLayer(name, "Resize", layerParams);
1483 layer_id[name] = id;
1485 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1487 else if (type == "L2Normalize")
1489 // op: "L2Normalize"
1491 // input: "reduction_indices" (axis)
1492 CV_Assert(layer.input_size() == 2);
1493 Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
1494 CV_Assert(reductionIndices.type() == CV_32SC1);
1496 const int numAxes = reductionIndices.total();
1497 if (data_layouts[name] == DATA_LAYOUT_NHWC)
1498 for (int i = 0; i < numAxes; ++i)
1499 reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
1501 cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
1502 for (int i = 1; i < numAxes; ++i)
1504 CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
1505 // Axes have the same sign.
1506 CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
1508 layerParams.set("start_axis", reductionIndices.at<int>(0));
1509 layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
1511 int id = dstNet.addLayer(name, "Normalize", layerParams);
1512 layer_id[name] = id;
1513 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1515 else if (type == "PriorBox")
1517 if (hasLayerAttr(layer, "min_size"))
1518 layerParams.set("min_size", getLayerAttr(layer, "min_size").i());
1519 if (hasLayerAttr(layer, "max_size"))
1520 layerParams.set("max_size", getLayerAttr(layer, "max_size").i());
1521 if (hasLayerAttr(layer, "flip"))
1522 layerParams.set("flip", getLayerAttr(layer, "flip").b());
1523 if (hasLayerAttr(layer, "clip"))
1524 layerParams.set("clip", getLayerAttr(layer, "clip").b());
1525 if (hasLayerAttr(layer, "offset"))
1526 layerParams.set("offset", getLayerAttr(layer, "offset").f());
1527 if (hasLayerAttr(layer, "step"))
1528 layerParams.set("step", getLayerAttr(layer, "step").f());
1530 const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
1532 for (int i = 0; i < 5; ++i)
1534 if (hasLayerAttr(layer, paramNames[i]))
1536 Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor());
1537 layerParams.set(paramNames[i],
1538 DictValue::arrayReal<float*>((float*)values.data, values.total()));
1541 int id = dstNet.addLayer(name, "PriorBox", layerParams);
1542 layer_id[name] = id;
1543 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1544 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1545 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1547 else if (type == "DetectionOutput")
1549 // op: "DetectionOutput"
1550 // input_0: "locations"
1551 // input_1: "classifications"
1552 // input_2: "prior_boxes"
1553 if (hasLayerAttr(layer, "num_classes"))
1554 layerParams.set("num_classes", getLayerAttr(layer, "num_classes").i());
1555 if (hasLayerAttr(layer, "share_location"))
1556 layerParams.set("share_location", getLayerAttr(layer, "share_location").b());
1557 if (hasLayerAttr(layer, "background_label_id"))
1558 layerParams.set("background_label_id", getLayerAttr(layer, "background_label_id").i());
1559 if (hasLayerAttr(layer, "nms_threshold"))
1560 layerParams.set("nms_threshold", getLayerAttr(layer, "nms_threshold").f());
1561 if (hasLayerAttr(layer, "top_k"))
1562 layerParams.set("top_k", getLayerAttr(layer, "top_k").i());
1563 if (hasLayerAttr(layer, "code_type"))
1564 layerParams.set("code_type", getLayerAttr(layer, "code_type").s());
1565 if (hasLayerAttr(layer, "keep_top_k"))
1566 layerParams.set("keep_top_k", getLayerAttr(layer, "keep_top_k").i());
1567 if (hasLayerAttr(layer, "confidence_threshold"))
1568 layerParams.set("confidence_threshold", getLayerAttr(layer, "confidence_threshold").f());
1569 if (hasLayerAttr(layer, "loc_pred_transposed"))
1570 layerParams.set("loc_pred_transposed", getLayerAttr(layer, "loc_pred_transposed").b());
1571 if (hasLayerAttr(layer, "clip"))
1572 layerParams.set("clip", getLayerAttr(layer, "clip").b());
1573 if (hasLayerAttr(layer, "variance_encoded_in_target"))
1574 layerParams.set("variance_encoded_in_target", getLayerAttr(layer, "variance_encoded_in_target").b());
1576 int id = dstNet.addLayer(name, "DetectionOutput", layerParams);
1577 layer_id[name] = id;
1578 for (int i = 0; i < 3; ++i)
1579 connect(layer_id, dstNet, parsePin(layer.input(i)), id, i);
1580 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1582 else if (type == "Softmax")
1584 if (hasLayerAttr(layer, "axis"))
1585 layerParams.set("axis", getLayerAttr(layer, "axis").i());
1587 int id = dstNet.addLayer(name, "Softmax", layerParams);
1588 layer_id[name] = id;
1589 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1591 else if (type == "CropAndResize")
1593 // op: "CropAndResize"
1597 CV_Assert(layer.input_size() == 3);
1599 Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2));
1600 CV_Assert(cropSize.type() == CV_32SC1, cropSize.total() == 2);
1602 layerParams.set("height", cropSize.at<int>(0));
1603 layerParams.set("width", cropSize.at<int>(1));
1605 int id = dstNet.addLayer(name, "CropAndResize", layerParams);
1606 layer_id[name] = id;
1608 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1609 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1611 else if (type == "Mean")
1613 Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));
1614 CV_Assert(indices.type() == CV_32SC1);
1616 if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
1617 CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
1619 layerParams.set("pool", "ave");
1620 layerParams.set("global_pooling", true);
1622 int id = dstNet.addLayer(name, "Pooling", layerParams);
1623 layer_id[name] = id;
1625 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1627 // There are two attributes, "keepdims" and a deprecated "keep_dims".
1628 bool keepDims = false;
1629 if (hasLayerAttr(layer, "keepdims"))
1630 keepDims = getLayerAttr(layer, "keepdims").b();
1631 else if (hasLayerAttr(layer, "keep_dims"))
1632 keepDims = getLayerAttr(layer, "keep_dims").b();
1636 LayerParams flattenLp;
1637 std::string flattenName = name + "/flatten";
1638 CV_Assert(layer_id.find(flattenName) == layer_id.end());
1639 int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
1640 layer_id[flattenName] = flattenId;
1641 connect(layer_id, dstNet, Pin(name), flattenId, 0);
1644 else if (type == "ClipByValue")
1646 // op: "ClipByValue"
1650 CV_Assert(layer.input_size() == 3);
1652 Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1));
1653 Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2));
1654 CV_Assert(minValue.total() == 1, minValue.type() == CV_32F,
1655 maxValue.total() == 1, maxValue.type() == CV_32F);
1657 layerParams.set("min_value", minValue.at<float>(0));
1658 layerParams.set("max_value", maxValue.at<float>(0));
1660 int id = dstNet.addLayer(name, "ReLU6", layerParams);
1661 layer_id[name] = id;
1663 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1665 else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
1666 type == "Relu" || type == "Elu" ||
1667 type == "Identity" || type == "Relu6")
1669 std::string dnnType = type;
1670 if (type == "Abs") dnnType = "AbsVal";
1671 else if (type == "Tanh") dnnType = "TanH";
1672 else if (type == "Relu") dnnType = "ReLU";
1673 else if (type == "Relu6") dnnType = "ReLU6";
1674 else if (type == "Elu") dnnType = "ELU";
1676 int id = dstNet.addLayer(name, dnnType, layerParams);
1677 layer_id[name] = id;
1678 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1682 // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer.
1683 // However we create a layer with the same type and rely that user defined a custom layer.
1685 // All the attributes are added to LayerParams.
1686 google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
1687 for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
1688 ai != attr.end(); ++ai)
1690 if (ai->second.value_case() == tensorflow::AttrValue::kS) // string
1691 layerParams.set(ai->first, ai->second.s());
1692 if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64
1693 layerParams.set(ai->first, ai->second.i());
1694 if (ai->second.value_case() == tensorflow::AttrValue::kF) // float
1695 layerParams.set(ai->first, ai->second.f());
1696 if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool
1697 layerParams.set(ai->first, ai->second.b());
1700 // All the Const input nodes are added to layer's blobs.
1701 std::vector<std::string> inputsNames;
1702 for (int i = 0; i < layer.input_size(); ++i)
1704 // Check if input is a Const node.
1705 if (value_id.find(layer.input(i)) != value_id.end())
1707 Mat blob = getTensorContent(getConstBlob(layer, value_id, i));
1708 layerParams.blobs.push_back(blob);
1711 inputsNames.push_back(layer.input(i));
1713 int id = dstNet.addLayer(name, type, layerParams);
1714 layer_id[name] = id;
1716 for (int i = 0; i < inputsNames.size(); ++i)
1718 connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i);
1726 #endif //HAVE_PROTOBUF
1728 Net readNetFromTensorflow(const String &model, const String &config)
1730 TFImporter importer(model.c_str(), config.c_str());
1732 importer.populateNet(net);
1736 Net readNetFromTensorflow(const char* bufferModel, size_t lenModel,
1737 const char* bufferConfig, size_t lenConfig)
1739 TFImporter importer(bufferModel, lenModel, bufferConfig, lenConfig);
1741 importer.populateNet(net);
1745 CV__DNN_EXPERIMENTAL_NS_END