1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
9 Implementation of Tensorflow models parser
12 #include "../precomp.hpp"
22 #include "tf_graph_simplifier.hpp"
27 CV__DNN_EXPERIMENTAL_NS_BEGIN
31 using ::google::protobuf::RepeatedField;
32 using ::google::protobuf::RepeatedPtrField;
33 using ::google::protobuf::Message;
34 using ::google::protobuf::Descriptor;
35 using ::google::protobuf::FieldDescriptor;
36 using ::google::protobuf::Reflection;
41 static int toNCHW(int idx)
43 CV_Assert(-4 <= idx && idx < 4);
44 if (idx == 0) return 0;
45 else if (idx > 0) return idx % 3 + 1;
46 else return (4 + idx) % 3 + 1;
49 // This values are used to indicate layer output's data layout where it's possible.
57 typedef std::vector<std::pair<String, int> > StrIntVector;
61 Pin(const std::string &_name, int _blobIndex = 0) :
62 name(_name), blobIndex(_blobIndex) {}
65 name(""), blobIndex(-1) {}
71 void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape)
74 if (tensor.has_tensor_shape())
76 const tensorflow::TensorShapeProto &_shape = tensor.tensor_shape();
77 int i, n = _shape.dim_size();
82 for (i = 0; i < n; i++)
83 shape[i] = (int)_shape.dim(i).size();
86 shape.resize(1, 1); // Scalar.
90 CV_Error(Error::StsError, "Unknown shape of input tensor");
95 void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
98 blobShapeFromTensor(tensor, shape);
99 int dims = (int)shape.size();
103 // REORDER blob NHWC to NCHW
104 swap(shape[2], shape[3]); // NHCW
105 swap(shape[1], shape[2]); // NCHW
108 dstBlob.create(shape, CV_32F);
110 Mat tensorContent = getTensorContent(tensor);
111 int size = tensorContent.total();
112 CV_Assert(size == (int)dstBlob.total());
114 float *dstData = dstBlob.ptr<float>();
115 const T *data = reinterpret_cast<const T*>(tensorContent.data);
119 int num = shape[0], channels = shape[1], height = shape[2], width = shape[3];
120 int total = num*channels*height*width;
121 for(int i_n = 0; i_n < shape[0]; i_n++) {
122 for(int i_c = 0; i_c < shape[1]; i_c++) {
123 for(int i_h = 0; i_h < shape[2]; i_h++) {
124 for(int i_w = 0; i_w < shape[3]; i_w++) {
125 int dst_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w;
126 int src_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w;
128 CV_Assert(dst_i < total);
129 CV_Assert(src_i < total);
131 dstData[dst_i] = data[src_i];
137 for (int i = 0; i < size; i++)
138 dstData[i] = data[i];
142 void blobFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
144 switch (tensor.dtype()) {
145 case tensorflow::DT_FLOAT:
146 case tensorflow::DT_HALF:
147 parseTensor<float>(tensor, dstBlob);
149 case tensorflow::DT_DOUBLE:
150 parseTensor<double>(tensor, dstBlob);
153 CV_Error(Error::StsError, "Tensor's data type is not supported");
158 void printList(const tensorflow::AttrValue::ListValue &val)
161 for (int i = 0; i < val.i_size(); i++)
162 std::cout << " " << val.i(i);
166 void printTensorShape(const tensorflow::TensorShapeProto &shape)
169 for (int d = 0; d < shape.dim_size(); d++)
170 std::cout << shape.dim(d).name() <<
171 ":" << shape.dim(d).size() << " ";
175 void printTensor(const tensorflow::TensorProto &tensor)
177 printTensorShape(tensor.tensor_shape());
179 if (tensor.tensor_content().empty())
182 switch (tensor.dtype())
184 case tensorflow::DT_FLOAT:
186 const float *data = reinterpret_cast<const float*>(tensor.tensor_content().c_str());
187 int size = tensor.tensor_content().size() / sizeof(float);
188 for (int i = 0; i < std::min(10, size); i++)
189 std::cout << " " << data[i];
191 std::cout << " ... " << size - 10 << " more";
194 case tensorflow::DT_INT32:
196 const int *data = reinterpret_cast<const int*>(tensor.tensor_content().c_str());
197 int size = tensor.tensor_content().size() / sizeof(int);
198 for (int i = 0; i < std::min(10, size); i++)
199 std::cout << " " << data[i];
201 std::cout << " ... " << size - 10 << " more";
205 CV_Error(Error::StsError, "Tensor type is not supported");
210 void printLayerAttr(const tensorflow::NodeDef &layer)
212 std::cout << std::endl << layer.name() << ":" << layer.op();
213 for (int ii = 0; ii < layer.input_size(); ii++)
214 std::cout << "(" << layer.input(ii) << ")";
215 std::cout << std::endl;
216 google::protobuf::Map<std::string, tensorflow::AttrValue> attr
218 for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
219 ai != attr.end(); ++ai)
221 std::cout << ai->first << ":";
222 if (ai->first == "dtype" || ai->first == "T")
223 std::cout << ai->second.i();
224 else if (ai->first == "padding")
225 std::cout << ai->second.s();
226 else if (ai->first == "transpose_a" || ai->first == "transpose_b")
227 std::cout << ai->second.b();
228 // else if (ai->first == "shape")
229 // printTensorShape(ai->second.shape());
230 else if (ai->first == "strides" || ai->first == "ksize")
231 printList(ai->second.list());
233 printTensor(ai->second.tensor());
234 std::cout << std::endl;
238 bool hasLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
240 google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
241 return attr.find(name) != attr.end();
244 const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
246 return layer.attr().at(name);
249 void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
251 if (hasLayerAttr(layer, "strides"))
253 const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
254 if (val.list().i_size() != 4 ||
255 val.list().i(0) != 1 || val.list().i(3) != 1)
256 CV_Error(Error::StsError, "Unsupported strides");
257 layerParams.set("stride_h", static_cast<int>(val.list().i(1)));
258 layerParams.set("stride_w", static_cast<int>(val.list().i(2)));
262 DictValue parseDims(const tensorflow::TensorProto &tensor) {
264 blobShapeFromTensor(tensor, shape);
265 int dims = (int)shape.size();
267 CV_Assert(tensor.dtype() == tensorflow::DT_INT32);
268 CV_Assert(dims == 1);
270 Mat values = getTensorContent(tensor);
271 CV_Assert(values.type() == CV_32SC1);
272 // TODO: add reordering shape if dims == 4
273 return DictValue::arrayInt((int*)values.data, values.total());
276 void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
278 if (hasLayerAttr(layer, "ksize"))
280 const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
281 if (val.list().i_size() != 4 ||
282 val.list().i(0) != 1 || val.list().i(3) != 1)
283 CV_Error(Error::StsError, "Unsupported ksize");
284 layerParams.set("kernel_h", static_cast<int>(val.list().i(1)));
285 layerParams.set("kernel_w", static_cast<int>(val.list().i(2)));
289 layerParams.set("kernel_h", 1);
290 layerParams.set("kernel_w", 1);
294 void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
296 if (hasLayerAttr(layer, "padding"))
297 layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
300 Pin parsePin(const std::string &name)
304 size_t delimiter_pos = name.find_first_of(":");
305 if (delimiter_pos != std::string::npos)
307 pin.name = name.substr(0, delimiter_pos);
308 std::istringstream(name.substr(delimiter_pos + 1)) >> pin.blobIndex;
314 StrIntVector getNextLayers(const tensorflow::GraphDef& net, const String& layer_name, const String& type = "")
318 for (int li = 0; li < net.node_size(); li++)
320 const tensorflow::NodeDef& layer = net.node(li);
321 for (int input_id = 0; input_id < layer.input_size(); input_id++) {
322 String input_op_name = parsePin(layer.input(input_id)).name;
323 bool type_ok = type.empty() ? true : type == layer.op();
324 if (input_op_name == layer_name && type_ok)
325 layers.push_back(std::make_pair(layer.name(), li));
332 void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int input_blob_index, bool remove_from_net = true) {
333 String layer_name = net.node(layer_index).name();
334 StrIntVector layers = getNextLayers(net, layer_name);
336 String removed_layer_input = net.node(layer_index).input(input_blob_index);
338 for (size_t i = 0; i < layers.size(); i++)
340 tensorflow::NodeDef* layer = net.mutable_node(layers[i].second);
341 for (int input_id = 0; input_id < layer->input_size(); input_id++) {
342 String input_op_name = layer->input(input_id);
344 if (input_op_name == layer_name) {
345 layer->set_input(input_id, removed_layer_input);
351 net.mutable_node()->DeleteSubrange(layer_index, 1);
356 TFImporter(const char *model, const char *config = NULL);
357 TFImporter(const char *dataModel, size_t lenModel,
358 const char *dataConfig = NULL, size_t lenConfig = 0);
360 void populateNet(Net dstNet);
363 void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob);
365 void connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
366 const int input_layer_id, const int input_blob_id);
367 void connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
368 const int input_layer_id, const int input_blobs_count);
369 const tensorflow::TensorProto& getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
370 int input_blob_index = -1, int* actual_inp_blob_idx = 0);
373 // Binary serialized TensorFlow graph includes weights.
374 tensorflow::GraphDef netBin;
375 // Optional text definition of TensorFlow graph. More flexible than binary format
376 // and may be used to build the network using binary format only as a weights storage.
377 // This approach is similar to Caffe's `.prorotxt` and `.caffemodel`.
378 tensorflow::GraphDef netTxt;
380 std::vector<String> netInputsNames;
383 TFImporter::TFImporter(const char *model, const char *config)
385 if (model && model[0])
386 ReadTFNetParamsFromBinaryFileOrDie(model, &netBin);
387 if (config && config[0])
388 ReadTFNetParamsFromTextFileOrDie(config, &netTxt);
391 TFImporter::TFImporter(const char *dataModel, size_t lenModel,
392 const char *dataConfig, size_t lenConfig)
394 if (dataModel != NULL && lenModel > 0)
395 ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin);
396 if (dataConfig != NULL && lenConfig > 0)
397 ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt);
400 void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
403 blobShapeFromTensor(tensor, shape);
404 int dims = (int)shape.size();
406 // TODO: other blob types
407 CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||
408 tensor.dtype() == tensorflow::DT_HALF);
409 CV_Assert(dims == 4);
411 // REORDER kernel HWIO to OIHW
412 swap(shape[0], shape[2]); // IWHO
413 swap(shape[1], shape[3]); // IOHW
414 swap(shape[0], shape[1]); // OIHW
416 dstBlob.create(shape, CV_32F);
418 Mat tensorContent = getTensorContent(tensor);
419 int size = tensorContent.total();
420 CV_Assert(size == (int)dstBlob.total());
422 float *dstData = dstBlob.ptr<float>();
423 const float *data = reinterpret_cast<const float*>(tensorContent.data);
425 int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3];
426 int total = out_c*input_c*height*width;
427 for(int i_oc = 0; i_oc < out_c; i_oc++) {
428 for(int i_ic = 0; i_ic < input_c; i_ic++) {
429 for(int i_h = 0; i_h < height; i_h++) {
430 for(int i_w = 0; i_w < width; i_w++) {
431 int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w;
432 int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc;
433 CV_Assert(dst_i < total);
434 CV_Assert(src_i < total);
435 dstData[dst_i] = data[src_i];
442 void TFImporter::connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
443 const int input_layer_id, const int input_blob_id)
445 std::map<String, int>::const_iterator it = layers_name_id_map.find(outPin.name);
446 if (it == layers_name_id_map.end())
447 CV_Error(Error::StsError, "Input layer not found: " + outPin.name);
449 std::vector<String>::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name);
451 if (inpNameIt == netInputsNames.end())
452 blobIndex = outPin.blobIndex;
454 blobIndex = inpNameIt - netInputsNames.begin();
455 network.connect(it->second, blobIndex, input_layer_id, input_blob_id);
458 void TFImporter::connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
459 const int input_layer_id, const int input_blobs_count)
461 for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++)
462 connect(layer_id, network, outPin, input_layer_id, input_blob_id);
465 const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
466 int input_blob_index, int* actual_inp_blob_idx) {
467 if (input_blob_index == -1) {
468 for(int i = 0; i < layer.input_size(); i++) {
469 Pin input = parsePin(layer.input(i));
470 if (const_layers.find(input.name) != const_layers.end()) {
471 if (input_blob_index != -1)
472 CV_Error(Error::StsError, "More than one input is Const op");
474 input_blob_index = i;
479 if (input_blob_index == -1)
480 CV_Error(Error::StsError, "Const input blob for weights not found");
482 Pin kernel_inp = parsePin(layer.input(input_blob_index));
483 if (const_layers.find(kernel_inp.name) == const_layers.end())
484 CV_Error(Error::StsError, "Const kernel input not found");
485 if (kernel_inp.blobIndex != 0)
486 CV_Error(Error::StsError, "Unsupported kernel input");
488 if(actual_inp_blob_idx) {
489 *actual_inp_blob_idx = input_blob_index;
492 int nodeIdx = const_layers.at(kernel_inp.name);
493 if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name)
495 return netBin.node(nodeIdx).attr().at("value").tensor();
499 CV_Assert(nodeIdx < netTxt.node_size(),
500 netTxt.node(nodeIdx).name() == kernel_inp.name);
501 return netTxt.node(nodeIdx).attr().at("value").tensor();
505 static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& const_layers,
506 std::set<String>& layers_to_ignore)
508 for (int li = 0; li < net.node_size(); li++)
510 const tensorflow::NodeDef &layer = net.node(li);
511 String name = layer.name();
512 String type = layer.op();
514 if (type == "Dequantize")
516 // Example of Dequantize node:
517 // name: "conv2d_1/bias"
519 // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8)
520 // input: "conv2d_1/bias_quantized_min"
521 // input: "conv2d_1/bias_quantized_max"
522 // attr { key: "T" value { type: DT_QUINT8 } } (quantized type)
523 // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique)
524 CV_Assert(layer.input_size() == 3);
525 for (int i = 0; i < 3; ++i)
526 CV_Assert(const_layers.find(layer.input(i)) != const_layers.end());
527 CV_Assert(hasLayerAttr(layer, "mode") &&
528 getLayerAttr(layer, "mode").s() == "MIN_FIRST");
530 int tensorId = const_layers[layer.input(0)];
531 int minId = const_layers[layer.input(1)];
532 int maxId = const_layers[layer.input(2)];
534 tensorflow::TensorProto* tensor = net.mutable_node(tensorId)
535 ->mutable_attr()->at("value")
537 CV_Assert(tensor->dtype() == tensorflow::DT_QUINT8);
539 Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor());
540 Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor());
541 CV_Assert(qMin.total() == 1, qMin.type() == CV_32FC1,
542 qMax.total() == 1, qMax.type() == CV_32FC1);
544 Mat content = getTensorContent(*tensor);
546 float minVal = qMin.at<float>(0);
547 float rangeScale = (qMax.at<float>(0) - minVal) / 255;
548 CV_Assert(rangeScale >= 0);
549 content.convertTo(content, CV_32FC1, rangeScale,
550 rangeScale * cvRound(minVal / rangeScale));
552 tensor->set_dtype(tensorflow::DT_FLOAT);
553 tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
555 net.mutable_node(tensorId)->set_name(name);
556 CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
557 layers_to_ignore.insert(name);
560 else if (type != "Const")
561 continue; // only Const parameters are supported
563 if (layer.attr().find("value") != layer.attr().end())
565 CV_Assert(const_layers.insert(std::make_pair(name, li)).second);
567 layers_to_ignore.insert(name);
571 static int getDataLayout(const tensorflow::NodeDef& layer)
573 if (hasLayerAttr(layer, "data_format"))
575 std::string format = getLayerAttr(layer, "data_format").s();
576 if (format == "NHWC" || format == "channels_last")
577 return DATA_LAYOUT_NHWC;
578 else if (format == "NCHW" || format == "channels_first")
579 return DATA_LAYOUT_NCHW;
581 CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
583 return DATA_LAYOUT_UNKNOWN;
586 static inline std::string getNodeName(const std::string& tensorName)
588 return tensorName.substr(0, tensorName.rfind(':'));
591 // If all inputs of specific layer have the same data layout we can say that
592 // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
593 static int predictOutputDataLayout(const tensorflow::GraphDef& net,
594 const tensorflow::NodeDef& layer,
595 const std::map<String, int>& data_layouts)
597 int layout = getDataLayout(layer);
598 if (layout != DATA_LAYOUT_UNKNOWN)
601 // Determine layout by layer's inputs
602 std::map<String, int>::const_iterator it;
603 for (int i = 0, n = layer.input_size(); i < n; ++i)
605 it = data_layouts.find(getNodeName(layer.input(i)));
606 if (it != data_layouts.end())
608 if (layout != DATA_LAYOUT_UNKNOWN)
610 if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN)
611 return DATA_LAYOUT_UNKNOWN;
618 if (layout != DATA_LAYOUT_UNKNOWN)
621 // Determine layout by layer's consumers recursively.
622 it = data_layouts.find(layer.name());
623 CV_Assert(it != data_layouts.end());
627 void TFImporter::populateNet(Net dstNet)
629 RemoveIdentityOps(netBin);
630 RemoveIdentityOps(netTxt);
632 if (!netTxt.ByteSize())
633 simplifySubgraphs(netBin);
635 std::set<String> layers_to_ignore;
637 tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
639 int layersSize = net.node_size();
641 std::map<String, int> data_layouts;
642 // Pre-fill data layouts where they are set explicitly.
643 // Assuming that nodes are in topological order
644 for (int i = net.node_size() - 1; i >= 0; --i)
646 const tensorflow::NodeDef& layer = net.node(i);
647 std::string name = layer.name();
649 int layout = getDataLayout(layer);
650 std::map<String, int>::iterator it = data_layouts.find(name);
651 if (it != data_layouts.end())
653 if (layout != DATA_LAYOUT_UNKNOWN)
655 if (it->second == DATA_LAYOUT_UNKNOWN)
657 else if (it->second != layout)
659 it->second = DATA_LAYOUT_UNKNOWN;
660 layout = DATA_LAYOUT_UNKNOWN;
667 data_layouts[name] = layout;
669 // Specify input layers to have the same data layout.
670 for (int j = 0; j < layer.input_size(); ++j)
672 name = getNodeName(layer.input(j));
673 it = data_layouts.find(name);
674 if (it != data_layouts.end())
676 if (layout != DATA_LAYOUT_UNKNOWN)
678 if (it->second == DATA_LAYOUT_UNKNOWN)
680 else if (it->second != layout)
681 it->second = DATA_LAYOUT_UNKNOWN;
685 data_layouts[name] = layout;
689 // find all Const layers for params
690 std::map<String, int> value_id;
691 addConstNodes(netBin, value_id, layers_to_ignore);
692 addConstNodes(netTxt, value_id, layers_to_ignore);
694 std::map<String, int> layer_id;
696 for (int li = 0; li < layersSize; li++)
698 tensorflow::NodeDef layer = net.node(li);
699 String name = layer.name();
700 String type = layer.op();
701 LayerParams layerParams;
703 if(layers_to_ignore.find(name) != layers_to_ignore.end())
706 int predictedLayout = predictOutputDataLayout(net, layer, data_layouts);
707 data_layouts[name] = predictedLayout;
709 if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative")
711 // The first node of dilated convolution subgraph.
712 // Extract input node, dilation rate and paddings.
713 std::string input = layer.input(0);
714 if (type == "SpaceToBatchND")
716 // op: "SpaceToBatchND"
718 // input: "SpaceToBatchND/block_shape"
719 // input: "SpaceToBatchND/paddings"
720 CV_Assert(layer.input_size() == 3);
722 DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
723 CV_Assert(dilation.size() == 2);
724 layerParams.set("dilation_h", dilation.get<int>(0));
725 layerParams.set("dilation_w", dilation.get<int>(1));
728 parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
730 // paddings is a 2x2 matrix: [[top, bot], [left, right]]
731 layerParams.set("pad_h", paddings.at<float>(0));
732 layerParams.set("pad_w", paddings.at<float>(2));
734 StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
735 if (next_layers.empty())
737 next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
739 CV_Assert(next_layers.size() == 1);
740 layer = net.node(next_layers[0].second);
741 layers_to_ignore.insert(next_layers[0].first);
746 layerParams.set("bias_term", false);
747 layerParams.blobs.resize(1);
749 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
750 if (next_layers.size() == 1) {
751 layerParams.set("bias_term", true);
752 layerParams.blobs.resize(2);
754 int weights_layer_index = next_layers[0].second;
756 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
757 ExcludeLayer(net, weights_layer_index, 0, false);
758 layers_to_ignore.insert(next_layers[0].first);
761 const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id);
762 kernelFromTensor(kernelTensor, layerParams.blobs[0]);
763 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
764 int* kshape = layerParams.blobs[0].size.p;
765 if (type == "DepthwiseConv2dNative")
767 const int chMultiplier = kshape[0];
768 const int inCh = kshape[1];
769 const int height = kshape[2];
770 const int width = kshape[3];
772 Mat copy = layerParams.blobs[0].clone();
773 float* src = (float*)copy.data;
774 float* dst = (float*)layerParams.blobs[0].data;
775 for (int i = 0; i < chMultiplier; ++i)
776 for (int j = 0; j < inCh; ++j)
777 for (int s = 0; s < height * width; ++s)
779 int src_i = (i * inCh + j) * height * width + s;
780 int dst_i = (j * chMultiplier + i) * height* width + s;
781 dst[dst_i] = src[src_i];
783 // TODO Use reshape instead
784 kshape[0] = inCh * chMultiplier;
786 size_t* kstep = layerParams.blobs[0].step.p;
787 kstep[0] = kstep[1]; // fix steps too
789 layerParams.set("kernel_h", kshape[2]);
790 layerParams.set("kernel_w", kshape[3]);
791 layerParams.set("num_output", kshape[0]);
793 setStrides(layerParams, layer);
794 setPadding(layerParams, layer);
796 // The final node of dilated convolution subgraph.
797 next_layers = getNextLayers(net, name, "BatchToSpaceND");
798 if (!next_layers.empty())
800 layerParams.set("pad_mode", ""); // We use padding values.
801 CV_Assert(next_layers.size() == 1);
802 ExcludeLayer(net, next_layers[0].second, 0, false);
803 layers_to_ignore.insert(next_layers[0].first);
806 int id = dstNet.addLayer(name, "Convolution", layerParams);
810 connect(layer_id, dstNet, parsePin(input), id, 0);
812 if (data_layouts[name] == DATA_LAYOUT_UNKNOWN)
813 data_layouts[name] = DATA_LAYOUT_NHWC;
815 else if (type == "BiasAdd" || type == "Add")
817 bool haveConst = false;
818 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
820 Pin input = parsePin(layer.input(ii));
821 haveConst = value_id.find(input.name) != value_id.end();
823 CV_Assert(!haveConst || layer.input_size() == 2);
827 Mat values = getTensorContent(getConstBlob(layer, value_id));
828 CV_Assert(values.type() == CV_32FC1);
831 if (values.total() == 1) // is a scalar.
833 layerParams.set("shift", values.at<float>(0));
834 id = dstNet.addLayer(name, "Power", layerParams);
838 layerParams.blobs.resize(1, values);
839 id = dstNet.addLayer(name, "Shift", layerParams);
844 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
848 layerParams.set("operation", "sum");
849 int id = dstNet.addLayer(name, "Eltwise", layerParams);
852 for (int ii = 0; ii < layer.input_size(); ii++)
854 Pin inp = parsePin(layer.input(ii));
855 if (layer_id.find(inp.name) == layer_id.end())
856 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
857 connect(layer_id, dstNet, inp, id, ii);
861 else if (type == "Sub")
863 bool haveConst = false;
864 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
866 Pin input = parsePin(layer.input(ii));
867 haveConst = value_id.find(input.name) != value_id.end();
869 CV_Assert(haveConst);
871 Mat values = getTensorContent(getConstBlob(layer, value_id));
872 CV_Assert(values.type() == CV_32FC1);
876 if (values.total() == 1) // is a scalar.
878 layerParams.set("shift", values.at<float>(0));
879 id = dstNet.addLayer(name, "Power", layerParams);
883 layerParams.blobs.resize(1, values);
884 id = dstNet.addLayer(name, "Shift", layerParams);
889 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
891 else if (type == "MatMul")
893 CV_Assert(layer.input_size() == 2);
895 layerParams.set("bias_term", false);
896 layerParams.blobs.resize(1);
898 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
899 if (next_layers.empty())
901 next_layers = getNextLayers(net, name, "Add");
903 if (next_layers.size() == 1) {
904 layerParams.set("bias_term", true);
905 layerParams.blobs.resize(2);
907 int weights_layer_index = next_layers[0].second;
908 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
909 ExcludeLayer(net, weights_layer_index, 0, false);
910 layers_to_ignore.insert(next_layers[0].first);
913 int kernel_blob_index = -1;
914 const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index);
915 blobFromTensor(kernelTensor, layerParams.blobs[0]);
916 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
918 if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed
919 Mat data = layerParams.blobs[0].t();
920 layerParams.blobs[0] = data.clone();
923 layerParams.set("num_output", layerParams.blobs[0].size[0]);
925 int id = dstNet.addLayer(name, "InnerProduct", layerParams);
929 int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
930 connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
931 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
933 else if (type == "Reshape")
935 Pin inpId = parsePin(layer.input(0));
936 Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));
938 if (newShape.total() != 4 && data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
941 int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
942 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
944 std::string permName = name + "/nchw";
945 CV_Assert(layer_id.find(permName) == layer_id.end());
946 int permId = dstNet.addLayer(permName, "Permute", permLP);
947 layer_id[permName] = permId;
948 connect(layer_id, dstNet, inpId, permId, 0);
949 inpId = Pin(permName);
951 else if (newShape.total() == 4 && data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
954 std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
955 std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));
957 layerParams.set("dim", DictValue::arrayInt<int*>(newShape.ptr<int>(), newShape.total()));
959 int id = dstNet.addLayer(name, "Reshape", layerParams);
963 connect(layer_id, dstNet, inpId, id, 0);
964 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
966 else if (type == "Flatten" || type == "Squeeze")
968 Pin inpId = parsePin(layer.input(0));
969 int inpLayout = data_layouts[layer.input(0)];
970 if (type == "Squeeze")
972 CV_Assert(hasLayerAttr(layer, "squeeze_dims"));
973 const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims");
974 if (inpLayout == DATA_LAYOUT_NHWC)
976 if (dims.list().i_size() != 2 || dims.list().i(0) != 1 || dims.list().i(1) != 2)
977 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
979 else if (inpLayout == DATA_LAYOUT_NCHW)
981 if (dims.list().i_size() != 2 || dims.list().i(0) != 2 || dims.list().i(1) != 3)
982 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
985 CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
987 if (inpLayout == DATA_LAYOUT_NHWC)
990 int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
991 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
993 std::string permName = name + "/nchw";
994 CV_Assert(layer_id.find(permName) == layer_id.end());
995 int permId = dstNet.addLayer(permName, "Permute", permLP);
996 layer_id[permName] = permId;
997 connect(layer_id, dstNet, inpId, permId, 0);
998 inpId = Pin(permName);
1000 int id = dstNet.addLayer(name, "Flatten", layerParams);
1001 layer_id[name] = id;
1002 connect(layer_id, dstNet, inpId, id, 0);
1003 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1005 else if (type == "Transpose")
1007 Mat perm = getTensorContent(getConstBlob(layer, value_id, 1));
1008 CV_Assert(perm.type() == CV_32SC1);
1009 int* permData = (int*)perm.data;
1010 if (perm.total() == 4)
1012 // Only NHWC <-> NCHW permutations are allowed. OpenCV is always
1013 // keep NCHW layout this way.
1014 if (data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
1016 if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
1018 // in TensorFlow: NHWC->NCHW
1019 // in OpenCV: NCHW->NCHW
1020 data_layouts[name] = DATA_LAYOUT_NCHW;
1022 else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1024 // in TensorFlow: NHWC->NHWC
1025 // in OpenCV: NCHW->NCHW
1026 data_layouts[name] = DATA_LAYOUT_NHWC;
1029 CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1031 else if (data_layouts[layer.input(0)] == DATA_LAYOUT_NCHW)
1033 if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
1035 // in TensorFlow: NCHW->NHWC
1036 // in OpenCV: NCHW->NCHW
1037 data_layouts[name] = DATA_LAYOUT_NHWC;
1039 else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1041 // in TensorFlow: NCHW->NCHW
1042 // in OpenCV: NCHW->NCHW
1043 data_layouts[name] = DATA_LAYOUT_NCHW;
1046 CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1048 int id = dstNet.addLayer(name, "Identity", layerParams);
1049 layer_id[name] = id;
1050 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1054 layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
1056 int id = dstNet.addLayer(name, "Permute", layerParams);
1057 layer_id[name] = id;
1060 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1061 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1064 else if (type == "Const")
1067 else if (type == "LRN")
1069 if(hasLayerAttr(layer, "alpha")) {
1070 layerParams.set("alpha", getLayerAttr(layer, "alpha").f());
1072 if(hasLayerAttr(layer, "beta")) {
1073 layerParams.set("beta", getLayerAttr(layer, "beta").f());
1075 if(hasLayerAttr(layer, "depth_radius")) {
1076 int radius = (int)getLayerAttr(layer, "depth_radius").i();
1077 layerParams.set("local_size", 2*radius + 1);
1079 if(hasLayerAttr(layer, "bias")) {
1080 layerParams.set("bias", getLayerAttr(layer, "bias").f());
1082 layerParams.set("norm_by_size", false);
1084 int id = dstNet.addLayer(name, "LRN", layerParams);
1085 layer_id[name] = id;
1087 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1089 else if (type == "Concat" || type == "ConcatV2")
1091 int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
1092 int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
1094 if (data_layouts[name] == DATA_LAYOUT_NHWC)
1095 axis = toNCHW(axis);
1096 layerParams.set("axis", axis);
1098 int id = dstNet.addLayer(name, "Concat", layerParams);
1099 layer_id[name] = id;
1102 int from = (type == "Concat" ? 1 : 0);
1103 int to = (type == "Concat" ? layer.input_size() : layer.input_size() - 1);
1105 // input(0) or input(n-1) is concat_dim
1106 for (int ii = from; ii < to; ii++)
1108 Pin inp = parsePin(layer.input(ii));
1109 if (layer_id.find(inp.name) == layer_id.end())
1110 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1111 connect(layer_id, dstNet, inp, id, ii - from);
1114 else if (type == "MaxPool")
1116 layerParams.set("pool", "max");
1118 setKSize(layerParams, layer);
1119 setStrides(layerParams, layer);
1120 setPadding(layerParams, layer);
1122 int id = dstNet.addLayer(name, "Pooling", layerParams);
1123 layer_id[name] = id;
1125 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1127 else if (type == "AvgPool")
1129 layerParams.set("pool", "ave");
1130 layerParams.set("ave_pool_padded_area", false);
1132 setKSize(layerParams, layer);
1133 setStrides(layerParams, layer);
1134 setPadding(layerParams, layer);
1136 int id = dstNet.addLayer(name, "Pooling", layerParams);
1137 layer_id[name] = id;
1139 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1141 else if (type == "Placeholder")
1143 if (!hasLayerAttr(layer, "dtype") ||
1144 getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL) // If input is not a train/test flag.
1146 netInputsNames.push_back(name);
1150 else if (type == "Split") {
1151 // TODO: determining axis index remapping by input dimensions order of input blob
1152 // TODO: slicing input may be Const op
1153 // TODO: slicing kernels for convolutions - in current implementation it is impossible
1154 // TODO: add parsing num of slices parameter
1155 CV_Assert(layer.input_size() == 2);
1157 // 1st blob is dims tensor
1158 int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
1159 layerParams.set("axis", toNCHW(axis));
1161 int id = dstNet.addLayer(name, "Slice", layerParams);
1162 layer_id[name] = id;
1165 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1167 else if (type == "Slice")
1170 // input: "input_node"
1171 // input: "Slice/begin"
1172 // input: "Slice/size"
1173 CV_Assert(layer.input_size() == 3);
1174 Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));
1175 Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2));
1176 CV_Assert(!begins.empty(), !sizes.empty(), begins.type() == CV_32SC1,
1177 sizes.type() == CV_32SC1);
1179 if (begins.total() == 4 && data_layouts[name] == DATA_LAYOUT_NHWC)
1181 // Swap NHWC parameters' order to NCHW.
1182 std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
1183 std::swap(*begins.ptr<int32_t>(0, 1), *begins.ptr<int32_t>(0, 2));
1184 std::swap(*sizes.ptr<int32_t>(0, 2), *sizes.ptr<int32_t>(0, 3));
1185 std::swap(*sizes.ptr<int32_t>(0, 1), *sizes.ptr<int32_t>(0, 2));
1187 layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
1188 layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total()));
1190 int id = dstNet.addLayer(name, "Slice", layerParams);
1191 layer_id[name] = id;
1193 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1195 else if (type == "Mul")
1197 bool haveConst = false;
1198 for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
1200 Pin input = parsePin(layer.input(ii));
1201 haveConst = value_id.find(input.name) != value_id.end();
1203 CV_Assert(!haveConst || layer.input_size() == 2);
1207 // Multiplication by constant.
1208 CV_Assert(layer.input_size() == 2);
1209 Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
1210 CV_Assert(scaleMat.type() == CV_32FC1);
1213 if (scaleMat.total() == 1) // is a scalar.
1215 // Try to match with a LeakyRelu:
1217 // name: "LeakyRelu/mul"
1219 // input: "LeakyRelu/alpha"
1223 // name: "LeakyRelu/Maximum"
1225 // input: "LeakyRelu/mul"
1228 StrIntVector next_layers = getNextLayers(net, name, "Maximum");
1229 if (!next_layers.empty())
1231 int maximumLayerIdx = next_layers[0].second;
1232 ExcludeLayer(net, maximumLayerIdx, 0, false);
1233 layers_to_ignore.insert(next_layers[0].first);
1235 layerParams.set("negative_slope", scaleMat.at<float>(0));
1236 id = dstNet.addLayer(name, "ReLU", layerParams);
1240 // Just a multiplication.
1241 layerParams.set("scale", scaleMat.at<float>(0));
1242 id = dstNet.addLayer(name, "Power", layerParams);
1247 layerParams.blobs.resize(1, scaleMat);
1249 StrIntVector next_layers = getNextLayers(net, name, "Add");
1250 if (!next_layers.empty())
1252 layerParams.set("bias_term", true);
1253 layerParams.blobs.resize(2);
1255 int weights_layer_index = next_layers[0].second;
1256 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back());
1257 ExcludeLayer(net, weights_layer_index, 0, false);
1258 layers_to_ignore.insert(next_layers[0].first);
1261 if (hasLayerAttr(layer, "axis"))
1262 layerParams.set("axis", getLayerAttr(layer, "axis").i());
1264 id = dstNet.addLayer(name, "Scale", layerParams);
1266 layer_id[name] = id;
1268 Pin inp0 = parsePin(layer.input(0));
1269 if (layer_id.find(inp0.name) != layer_id.end())
1270 // First operand is a constant.
1271 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1273 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1277 layerParams.set("operation", "prod");
1278 int id = dstNet.addLayer(name, "Eltwise", layerParams);
1279 layer_id[name] = id;
1281 for (int ii = 0; ii < layer.input_size(); ii++)
1283 Pin inp = parsePin(layer.input(ii));
1284 if (layer_id.find(inp.name) == layer_id.end())
1285 CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1286 connect(layer_id, dstNet, inp, id, ii);
1290 else if (type == "Pad")
1292 Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
1293 CV_Assert(paddings.type() == CV_32SC1);
1294 if (paddings.total() == 8)
1296 // Perhabs, we have NHWC padding dimensions order.
1299 std::swap(*paddings.ptr<int32_t>(0, 2), *paddings.ptr<int32_t>(0, 6));
1300 std::swap(*paddings.ptr<int32_t>(0, 3), *paddings.ptr<int32_t>(0, 7));
1303 std::swap(*paddings.ptr<int32_t>(0, 4), *paddings.ptr<int32_t>(0, 6));
1304 std::swap(*paddings.ptr<int32_t>(0, 5), *paddings.ptr<int32_t>(0, 7));
1308 layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
1310 int id = dstNet.addLayer(name, "Padding", layerParams);
1311 layer_id[name] = id;
1313 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1315 else if (type == "FusedBatchNorm")
1317 // op: "FusedBatchNorm"
1319 // input: "BatchNorm/gamma"
1320 // input: "BatchNorm/beta"
1321 // input: "BatchNorm/moving_mean"
1322 // input: "BatchNorm/moving_variance"
1323 if (layer.input_size() != 5)
1324 CV_Error(Error::StsNotImplemented,
1325 "Expected gamma, beta, mean and std");
1326 Pin inpId = parsePin(layer.input(0));
1328 bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
1330 layerParams.blobs.resize(2);
1332 const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1);
1333 if (!gammaTensor.tensor_content().empty())
1335 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1336 layerParams.set("has_weight", true);
1337 blobFromTensor(gammaTensor, layerParams.blobs.back());
1340 layerParams.set("has_weight", false);
1342 const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2);
1343 if (!betaTensor.tensor_content().empty())
1345 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1346 layerParams.set("has_bias", true);
1347 blobFromTensor(betaTensor, layerParams.blobs.back());
1350 layerParams.set("has_bias", false);
1355 if (layerParams.blobs.size() == 2)
1356 CV_Error(Error::StsNotImplemented, "Cannot determine number "
1357 "of parameters for batch normalization layer.");
1358 mean = Mat::zeros(1, layerParams.blobs[3].total(), CV_32F);
1359 std = Mat::ones(1, layerParams.blobs[3].total(), CV_32F);
1361 // Add an extra layer: Mean-Variance normalization
1362 LayerParams mvnParams;
1363 std::string mvnName = name + "/MVN";
1364 CV_Assert(layer_id.find(mvnName) == layer_id.end());
1365 int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
1366 layer_id[mvnName] = mvnId;
1367 connect(layer_id, dstNet, inpId, mvnId, 0);
1368 inpId = Pin(mvnName);
1372 blobFromTensor(getConstBlob(layer, value_id, 3), mean);
1373 blobFromTensor(getConstBlob(layer, value_id, 4), std);
1375 layerParams.blobs[0] = mean;
1376 layerParams.blobs[1] = std;
1378 if (hasLayerAttr(layer, "epsilon"))
1379 layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
1381 int id = dstNet.addLayer(name, "BatchNorm", layerParams);
1382 layer_id[name] = id;
1385 connect(layer_id, dstNet, inpId, id, 0);
1387 else if (type == "Conv2DBackpropInput")
1389 // op: "Conv2DBackpropInput"
1390 // input: "conv2d_transpose/output_shape"
1393 if (layer.input_size() != 3)
1394 CV_Error(Error::StsNotImplemented,
1395 "Expected output shape, weights and input nodes");
1397 layerParams.set("bias_term", false);
1398 layerParams.blobs.resize(1);
1400 StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
1401 if (next_layers.size() == 1)
1403 layerParams.set("bias_term", true);
1404 layerParams.blobs.resize(2);
1406 int weights_layer_index = next_layers[0].second;
1408 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1409 ExcludeLayer(net, weights_layer_index, 0, false);
1410 layers_to_ignore.insert(next_layers[0].first);
1413 kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
1415 const int* kshape = layerParams.blobs[0].size.p;
1416 const int kernelH = kshape[2];
1417 const int kernelW = kshape[3];
1418 layerParams.set("kernel_h", kernelH);
1419 layerParams.set("kernel_w", kernelW);
1420 layerParams.set("num_output", kshape[1]);
1422 setStrides(layerParams, layer);
1423 setPadding(layerParams, layer);
1425 // For convolution layer, output shape computes as
1426 // o = 1 + (i - k + 2*p) / s
1427 // i - input size, o - output size, k - kernel size, p - pad, s - stride
1428 // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
1429 // considering that k is odd.
1430 // SAME: o = 1 + (i - 1) / s
1431 // VALID: o = 1 + i / s
1432 // Deconvolution's layer output shape computes as
1433 // SAME: o = 1 + (i - 1)*s
1434 // VALID: o = (i - 1)*s
1435 // If output_shape differs from formulas above then adjust padding is applied.
1437 const int strideY = layerParams.get<int>("stride_h");
1438 const int strideX = layerParams.get<int>("stride_w");
1439 Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
1440 const int outH = outShape.at<int>(1);
1441 const int outW = outShape.at<int>(2);
1442 if (layerParams.get<String>("pad_mode") == "SAME")
1444 layerParams.set("adj_w", (outW - 1) % strideX);
1445 layerParams.set("adj_h", (outH - 1) % strideY);
1447 else if (layerParams.get<String>("pad_mode") == "VALID")
1449 layerParams.set("adj_w", (outW - kernelW) % strideX);
1450 layerParams.set("adj_h", (outH - kernelH) % strideY);
1452 int id = dstNet.addLayer(name, "Deconvolution", layerParams);
1453 layer_id[name] = id;
1456 connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
1458 else if (type == "BlockLSTM")
1461 // input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps)
1463 // input: "lstm_block_wrapper/zeros" (ignore)
1464 // input: "lstm_block_wrapper/zeros" (ignore)
1465 // input: "lstm_block_wrapper/kernel"
1466 // input: "lstm_block_wrapper/w_i_diag"
1467 // input: "lstm_block_wrapper/w_f_diag"
1468 // input: "lstm_block_wrapper/w_o_diag"
1469 // input: "lstm_block_wrapper/bias"
1470 if (layer.input_size() != 9)
1471 CV_Error(Error::StsNotImplemented, "Unexpected number of input nodes");
1473 if (hasLayerAttr(layer, "forget_bias"))
1474 layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f());
1476 if (hasLayerAttr(layer, "forget_bias"))
1478 float cellClip = getLayerAttr(layer, "cell_clip").f();
1479 // Cell clip disabled if it's negative.
1482 layerParams.set("use_cell_clip", true);
1483 layerParams.set("cell_clip", cellClip);
1488 blobFromTensor(getConstBlob(layer, value_id, 4), W);
1489 blobFromTensor(getConstBlob(layer, value_id, 8), b);
1490 const int outSize = W.cols / 4;
1493 float* weightData = (float*)W.data;
1494 for (int i = 0; i < W.rows; ++i)
1495 for (int j = 0; j < outSize; ++j)
1497 std::swap(weightData[i * W.cols + 1 * outSize + j],
1498 weightData[i * W.cols + 2 * outSize + j]);
1499 std::swap(weightData[i * W.cols + 2 * outSize + j],
1500 weightData[i * W.cols + 3 * outSize + j]);
1502 Wx = W.rowRange(0, W.rows - outSize).t();
1503 Wh = W.rowRange(W.rows - outSize, W.rows).t();
1505 layerParams.blobs.resize(3);
1506 layerParams.blobs[0] = Wh;
1507 layerParams.blobs[1] = Wx;
1508 layerParams.blobs[2] = b;
1510 if (hasLayerAttr(layer, "use_peephole"))
1512 bool usePeephole = getLayerAttr(layer, "use_peephole").b();
1515 layerParams.set("use_peephole", true);
1516 layerParams.blobs.resize(6);
1517 for (int i = 0; i < 3; ++i)
1520 blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
1521 w = w.reshape(1, w.total()); // Single column.
1522 w = Mat::diag(w); // Make a diagonal matrix.
1523 layerParams.blobs[3 + i] = w;
1528 int id = dstNet.addLayer(name, "LSTM", layerParams);
1529 layer_id[name] = id;
1532 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1533 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1535 else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear")
1537 if (layer.input_size() == 2)
1539 Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
1540 CV_Assert(outSize.type() == CV_32SC1, outSize.total() == 2);
1541 layerParams.set("height", outSize.at<int>(0, 0));
1542 layerParams.set("width", outSize.at<int>(0, 1));
1544 else if (layer.input_size() == 3)
1546 Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1));
1547 Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2));
1548 CV_Assert(factorHeight.type() == CV_32SC1, factorHeight.total() == 1,
1549 factorWidth.type() == CV_32SC1, factorWidth.total() == 1);
1550 layerParams.set("zoom_factor_x", factorWidth.at<int>(0));
1551 layerParams.set("zoom_factor_y", factorHeight.at<int>(0));
1554 CV_Assert(layer.input_size() == 2 || layer.input_size() == 3);
1556 if (type == "ResizeNearestNeighbor")
1557 layerParams.set("interpolation", "nearest");
1559 layerParams.set("interpolation", "bilinear");
1561 if (hasLayerAttr(layer, "align_corners"))
1562 layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());
1564 int id = dstNet.addLayer(name, "Resize", layerParams);
1565 layer_id[name] = id;
1567 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1569 else if (type == "L2Normalize")
1571 // op: "L2Normalize"
1573 // input: "reduction_indices" (axis)
1574 CV_Assert(layer.input_size() == 2);
1575 Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
1576 CV_Assert(reductionIndices.type() == CV_32SC1);
1578 const int numAxes = reductionIndices.total();
1579 if (data_layouts[name] == DATA_LAYOUT_NHWC)
1580 for (int i = 0; i < numAxes; ++i)
1581 reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
1583 cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
1584 for (int i = 1; i < numAxes; ++i)
1586 CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
1587 // Axes have the same sign.
1588 CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
1590 layerParams.set("start_axis", reductionIndices.at<int>(0));
1591 layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
1593 int id = dstNet.addLayer(name, "Normalize", layerParams);
1594 layer_id[name] = id;
1595 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1597 else if (type == "PriorBox")
1599 if (hasLayerAttr(layer, "min_size"))
1600 layerParams.set("min_size", getLayerAttr(layer, "min_size").i());
1601 if (hasLayerAttr(layer, "max_size"))
1602 layerParams.set("max_size", getLayerAttr(layer, "max_size").i());
1603 if (hasLayerAttr(layer, "flip"))
1604 layerParams.set("flip", getLayerAttr(layer, "flip").b());
1605 if (hasLayerAttr(layer, "clip"))
1606 layerParams.set("clip", getLayerAttr(layer, "clip").b());
1607 if (hasLayerAttr(layer, "offset"))
1608 layerParams.set("offset", getLayerAttr(layer, "offset").f());
1609 if (hasLayerAttr(layer, "step"))
1610 layerParams.set("step", getLayerAttr(layer, "step").f());
1612 const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
1614 for (int i = 0; i < 5; ++i)
1616 if (hasLayerAttr(layer, paramNames[i]))
1618 Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor());
1619 layerParams.set(paramNames[i],
1620 DictValue::arrayReal<float*>((float*)values.data, values.total()));
1623 int id = dstNet.addLayer(name, "PriorBox", layerParams);
1624 layer_id[name] = id;
1625 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1626 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1627 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1629 else if (type == "DetectionOutput")
1631 // op: "DetectionOutput"
1632 // input_0: "locations"
1633 // input_1: "classifications"
1634 // input_2: "prior_boxes"
1635 if (hasLayerAttr(layer, "num_classes"))
1636 layerParams.set("num_classes", getLayerAttr(layer, "num_classes").i());
1637 if (hasLayerAttr(layer, "share_location"))
1638 layerParams.set("share_location", getLayerAttr(layer, "share_location").b());
1639 if (hasLayerAttr(layer, "background_label_id"))
1640 layerParams.set("background_label_id", getLayerAttr(layer, "background_label_id").i());
1641 if (hasLayerAttr(layer, "nms_threshold"))
1642 layerParams.set("nms_threshold", getLayerAttr(layer, "nms_threshold").f());
1643 if (hasLayerAttr(layer, "top_k"))
1644 layerParams.set("top_k", getLayerAttr(layer, "top_k").i());
1645 if (hasLayerAttr(layer, "code_type"))
1646 layerParams.set("code_type", getLayerAttr(layer, "code_type").s());
1647 if (hasLayerAttr(layer, "keep_top_k"))
1648 layerParams.set("keep_top_k", getLayerAttr(layer, "keep_top_k").i());
1649 if (hasLayerAttr(layer, "confidence_threshold"))
1650 layerParams.set("confidence_threshold", getLayerAttr(layer, "confidence_threshold").f());
1651 if (hasLayerAttr(layer, "loc_pred_transposed"))
1652 layerParams.set("loc_pred_transposed", getLayerAttr(layer, "loc_pred_transposed").b());
1653 if (hasLayerAttr(layer, "clip"))
1654 layerParams.set("clip", getLayerAttr(layer, "clip").b());
1655 if (hasLayerAttr(layer, "variance_encoded_in_target"))
1656 layerParams.set("variance_encoded_in_target", getLayerAttr(layer, "variance_encoded_in_target").b());
1658 int id = dstNet.addLayer(name, "DetectionOutput", layerParams);
1659 layer_id[name] = id;
1660 for (int i = 0; i < 3; ++i)
1661 connect(layer_id, dstNet, parsePin(layer.input(i)), id, i);
1662 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1664 else if (type == "Softmax")
1666 if (hasLayerAttr(layer, "axis"))
1667 layerParams.set("axis", getLayerAttr(layer, "axis").i());
1669 int id = dstNet.addLayer(name, "Softmax", layerParams);
1670 layer_id[name] = id;
1671 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1673 else if (type == "CropAndResize")
1675 // op: "CropAndResize"
1679 CV_Assert(layer.input_size() == 3);
1681 Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2));
1682 CV_Assert(cropSize.type() == CV_32SC1, cropSize.total() == 2);
1684 layerParams.set("height", cropSize.at<int>(0));
1685 layerParams.set("width", cropSize.at<int>(1));
1687 int id = dstNet.addLayer(name, "CropAndResize", layerParams);
1688 layer_id[name] = id;
1690 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1691 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1693 else if (type == "Mean")
1695 Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));
1696 CV_Assert(indices.type() == CV_32SC1);
1698 if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
1699 CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
1701 layerParams.set("pool", "ave");
1702 layerParams.set("global_pooling", true);
1704 int id = dstNet.addLayer(name, "Pooling", layerParams);
1705 layer_id[name] = id;
1707 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1709 // There are two attributes, "keepdims" and a deprecated "keep_dims".
1710 bool keepDims = false;
1711 if (hasLayerAttr(layer, "keepdims"))
1712 keepDims = getLayerAttr(layer, "keepdims").b();
1713 else if (hasLayerAttr(layer, "keep_dims"))
1714 keepDims = getLayerAttr(layer, "keep_dims").b();
1718 LayerParams flattenLp;
1719 std::string flattenName = name + "/flatten";
1720 CV_Assert(layer_id.find(flattenName) == layer_id.end());
1721 int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
1722 layer_id[flattenName] = flattenId;
1723 connect(layer_id, dstNet, Pin(name), flattenId, 0);
1726 else if (type == "ClipByValue")
1728 // op: "ClipByValue"
1732 CV_Assert(layer.input_size() == 3);
1734 Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1));
1735 Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2));
1736 CV_Assert(minValue.total() == 1, minValue.type() == CV_32F,
1737 maxValue.total() == 1, maxValue.type() == CV_32F);
1739 layerParams.set("min_value", minValue.at<float>(0));
1740 layerParams.set("max_value", maxValue.at<float>(0));
1742 int id = dstNet.addLayer(name, "ReLU6", layerParams);
1743 layer_id[name] = id;
1745 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1747 else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
1748 type == "Relu" || type == "Elu" ||
1749 type == "Identity" || type == "Relu6")
1751 std::string dnnType = type;
1752 if (type == "Abs") dnnType = "AbsVal";
1753 else if (type == "Tanh") dnnType = "TanH";
1754 else if (type == "Relu") dnnType = "ReLU";
1755 else if (type == "Relu6") dnnType = "ReLU6";
1756 else if (type == "Elu") dnnType = "ELU";
1758 int id = dstNet.addLayer(name, dnnType, layerParams);
1759 layer_id[name] = id;
1760 connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1764 // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer.
1765 // However we create a layer with the same type and rely that user defined a custom layer.
1767 // All the attributes are added to LayerParams.
1768 google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
1769 for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
1770 ai != attr.end(); ++ai)
1772 if (ai->second.value_case() == tensorflow::AttrValue::kS) // string
1773 layerParams.set(ai->first, ai->second.s());
1774 if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64
1775 layerParams.set(ai->first, ai->second.i());
1776 if (ai->second.value_case() == tensorflow::AttrValue::kF) // float
1777 layerParams.set(ai->first, ai->second.f());
1778 if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool
1779 layerParams.set(ai->first, ai->second.b());
1782 // All the Const input nodes are added to layer's blobs.
1783 std::vector<std::string> inputsNames;
1784 for (int i = 0; i < layer.input_size(); ++i)
1786 // Check if input is a Const node.
1787 if (value_id.find(layer.input(i)) != value_id.end())
1789 Mat blob = getTensorContent(getConstBlob(layer, value_id, i));
1790 layerParams.blobs.push_back(blob);
1793 inputsNames.push_back(layer.input(i));
1795 int id = dstNet.addLayer(name, type, layerParams);
1796 layer_id[name] = id;
1798 for (int i = 0; i < inputsNames.size(); ++i)
1800 connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i);
1804 dstNet.setInputsNames(netInputsNames);
1809 #endif //HAVE_PROTOBUF
1811 Net readNetFromTensorflow(const String &model, const String &config)
1813 TFImporter importer(model.c_str(), config.c_str());
1815 importer.populateNet(net);
1819 Net readNetFromTensorflow(const char* bufferModel, size_t lenModel,
1820 const char* bufferConfig, size_t lenConfig)
1822 TFImporter importer(bufferModel, lenModel, bufferConfig, lenConfig);
1824 importer.populateNet(net);
1828 CV__DNN_EXPERIMENTAL_NS_END