Fuse tf.nn.l2_normalize layer
[platform/upstream/opencv.git] / modules / dnn / src / tensorflow / tf_importer.cpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7
8 /*
9 Implementation of Tensorflow models parser
10 */
11
12 #include "../precomp.hpp"
13
14 #ifdef HAVE_PROTOBUF
15 #include "tf_io.hpp"
16
17 #include <iostream>
18 #include <fstream>
19 #include <algorithm>
20 #include <string>
21 #include "tf_graph_simplifier.hpp"
22 #endif
23
24 namespace cv {
25 namespace dnn {
26 CV__DNN_EXPERIMENTAL_NS_BEGIN
27
28 #if HAVE_PROTOBUF
29
30 using ::google::protobuf::RepeatedField;
31 using ::google::protobuf::RepeatedPtrField;
32 using ::google::protobuf::Message;
33 using ::google::protobuf::Descriptor;
34 using ::google::protobuf::FieldDescriptor;
35 using ::google::protobuf::Reflection;
36
37 namespace
38 {
39
40 static int toNCHW(int idx)
41 {
42     CV_Assert(-4 <= idx && idx < 4);
43     if (idx == 0) return 0;
44     else if (idx > 0) return idx % 3 + 1;
45     else return (4 + idx) % 3 + 1;
46 }
47
48 // This values are used to indicate layer output's data layout where it's possible.
49 enum DataLayout
50 {
51     DATA_LAYOUT_NHWC,
52     DATA_LAYOUT_NCHW,
53     DATA_LAYOUT_UNKNOWN
54 };
55
56 typedef std::vector<std::pair<String, int> > StrIntVector;
57
58 struct Pin
59 {
60     Pin(const std::string &_name, int _blobIndex = 0) :
61         name(_name), blobIndex(_blobIndex) {}
62
63     Pin() :
64         name(""), blobIndex(-1) {}
65
66     std::string name;
67     int blobIndex;
68 };
69
70 void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape)
71 {
72     shape.clear();
73     if (tensor.has_tensor_shape())
74     {
75         const tensorflow::TensorShapeProto &_shape = tensor.tensor_shape();
76         int i, n = _shape.dim_size();
77         if (n)
78         {
79             shape.resize(n);
80
81             for (i = 0; i < n; i++)
82                 shape[i] = (int)_shape.dim(i).size();
83         }
84         else
85             shape.resize(1, 1);  // Scalar.
86     }
87     else
88     {
89         CV_Error(Error::StsError, "Unknown shape of input tensor");
90     }
91 }
92
93 template <typename T>
94 void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
95 {
96     MatShape shape;
97     blobShapeFromTensor(tensor, shape);
98     int dims = (int)shape.size();
99
100     if (dims == 4)
101     {
102         // REORDER blob NHWC to NCHW
103         swap(shape[2], shape[3]); // NHCW
104         swap(shape[1], shape[2]); // NCHW
105     }
106
107     dstBlob.create(shape, CV_32F);
108
109     Mat tensorContent = getTensorContent(tensor);
110     int size = tensorContent.total();
111     CV_Assert(size == (int)dstBlob.total());
112
113     float *dstData = dstBlob.ptr<float>();
114     const T *data = reinterpret_cast<const T*>(tensorContent.data);
115
116     if (dims == 4)
117     {
118         int num = shape[0], channels = shape[1], height = shape[2], width = shape[3];
119         int total = num*channels*height*width;
120         for(int i_n = 0; i_n < shape[0]; i_n++) {
121             for(int i_c = 0; i_c < shape[1]; i_c++) {
122                 for(int i_h = 0; i_h < shape[2]; i_h++) {
123                     for(int i_w = 0; i_w < shape[3]; i_w++) {
124                        int dst_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w;
125                        int src_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w;
126
127                        CV_Assert(dst_i < total);
128                        CV_Assert(src_i < total);
129
130                        dstData[dst_i] = data[src_i];
131                     }
132                 }
133             }
134         }
135     } else {
136         for (int i = 0; i < size; i++)
137             dstData[i] = data[i];
138     }
139 }
140
141 void blobFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
142 {
143     switch (tensor.dtype()) {
144         case tensorflow::DT_FLOAT:
145         case tensorflow::DT_HALF:
146             parseTensor<float>(tensor, dstBlob);
147             break;
148         case tensorflow::DT_DOUBLE:
149             parseTensor<double>(tensor, dstBlob);
150             break;
151         default:
152             CV_Error(Error::StsError, "Tensor's data type is not supported");
153             break;
154     }
155 }
156
157 void printList(const tensorflow::AttrValue::ListValue &val)
158 {
159     std::cout << "(";
160     for (int i = 0; i < val.i_size(); i++)
161         std::cout << " " << val.i(i);
162     std::cout << " )";
163 }
164
165 void printTensorShape(const tensorflow::TensorShapeProto &shape)
166 {
167     std::cout << "[ ";
168     for (int d = 0; d < shape.dim_size(); d++)
169         std::cout << shape.dim(d).name() <<
170                      ":" << shape.dim(d).size() << " ";
171     std::cout << "]";
172 }
173
174 void printTensor(const tensorflow::TensorProto &tensor)
175 {
176     printTensorShape(tensor.tensor_shape());
177
178     if (tensor.tensor_content().empty())
179         return;
180
181     switch (tensor.dtype())
182     {
183     case tensorflow::DT_FLOAT:
184         {
185             const float *data = reinterpret_cast<const float*>(tensor.tensor_content().c_str());
186             int size = tensor.tensor_content().size() / sizeof(float);
187             for (int i = 0; i < std::min(10, size); i++)
188                 std::cout << " " << data[i];
189             if (size > 10)
190                 std::cout << " ... " << size - 10 << " more";
191             break;
192         }
193     case tensorflow::DT_INT32:
194         {
195             const int *data = reinterpret_cast<const int*>(tensor.tensor_content().c_str());
196             int size = tensor.tensor_content().size() / sizeof(int);
197             for (int i = 0; i < std::min(10, size); i++)
198                 std::cout << " " << data[i];
199             if (size > 10)
200                 std::cout << " ... " << size - 10 << " more";
201             break;
202         }
203     default:
204         CV_Error(Error::StsError, "Tensor type is not supported");
205         break;
206     }
207 }
208
209 void printLayerAttr(const tensorflow::NodeDef &layer)
210 {
211     std::cout << std::endl << layer.name() << ":" << layer.op();
212     for (int ii = 0; ii < layer.input_size(); ii++)
213         std::cout << "(" << layer.input(ii) << ")";
214     std::cout << std::endl;
215     google::protobuf::Map<std::string, tensorflow::AttrValue> attr
216             = layer.attr();
217     for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
218          ai != attr.end(); ++ai)
219     {
220         std::cout << ai->first << ":";
221         if (ai->first == "dtype" || ai->first == "T")
222             std::cout << ai->second.i();
223         else if (ai->first == "padding")
224             std::cout << ai->second.s();
225         else if (ai->first == "transpose_a" || ai->first == "transpose_b")
226             std::cout << ai->second.b();
227         //            else if (ai->first == "shape")
228         //              printTensorShape(ai->second.shape());
229         else if (ai->first == "strides" || ai->first == "ksize")
230             printList(ai->second.list());
231         else
232             printTensor(ai->second.tensor());
233         std::cout << std::endl;
234     }
235 }
236
237 bool hasLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
238 {
239     google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
240     return attr.find(name) != attr.end();
241 }
242
243 const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
244 {
245     return layer.attr().at(name);
246 }
247
248 void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
249 {
250     if (hasLayerAttr(layer, "strides"))
251     {
252         const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
253         if (val.list().i_size() != 4 ||
254             val.list().i(0) != 1 || val.list().i(3) != 1)
255             CV_Error(Error::StsError, "Unsupported strides");
256         layerParams.set("stride_h", static_cast<int>(val.list().i(1)));
257         layerParams.set("stride_w", static_cast<int>(val.list().i(2)));
258     }
259 }
260
261 DictValue parseDims(const tensorflow::TensorProto &tensor) {
262     MatShape shape;
263     blobShapeFromTensor(tensor, shape);
264     int dims = (int)shape.size();
265
266     CV_Assert(tensor.dtype() == tensorflow::DT_INT32);
267     CV_Assert(dims == 1);
268
269     Mat values = getTensorContent(tensor);
270     CV_Assert(values.type() == CV_32SC1);
271     // TODO: add reordering shape if dims == 4
272     return DictValue::arrayInt((int*)values.data, values.total());
273 }
274
275 void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
276 {
277     if (hasLayerAttr(layer, "ksize"))
278     {
279         const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
280         if (val.list().i_size() != 4 ||
281             val.list().i(0) != 1 || val.list().i(3) != 1)
282             CV_Error(Error::StsError, "Unsupported ksize");
283         layerParams.set("kernel_h", static_cast<int>(val.list().i(1)));
284         layerParams.set("kernel_w", static_cast<int>(val.list().i(2)));
285     }
286     else
287     {
288         layerParams.set("kernel_h", 1);
289         layerParams.set("kernel_w", 1);
290     }
291 }
292
293 void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
294 {
295     if (hasLayerAttr(layer, "padding"))
296         layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
297 }
298
299 Pin parsePin(const std::string &name)
300 {
301     Pin pin(name);
302
303     size_t delimiter_pos = name.find_first_of(":");
304     if (delimiter_pos != std::string::npos)
305     {
306         pin.name = name.substr(0, delimiter_pos);
307         std::istringstream(name.substr(delimiter_pos + 1)) >> pin.blobIndex;
308     }
309
310     return pin;
311 }
312
313 StrIntVector getNextLayers(const tensorflow::GraphDef& net, const String& layer_name, const String& type = "")
314 {
315    StrIntVector layers;
316
317    for (int li = 0; li < net.node_size(); li++)
318    {
319        const tensorflow::NodeDef& layer = net.node(li);
320        for (int input_id = 0; input_id < layer.input_size(); input_id++) {
321            String input_op_name = parsePin(layer.input(input_id)).name;
322            bool type_ok = type.empty() ? true : type == layer.op();
323            if (input_op_name == layer_name && type_ok)
324                layers.push_back(std::make_pair(layer.name(), li));
325        }
326    }
327
328    return layers;
329 }
330
331 void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int input_blob_index, bool remove_from_net = true) {
332     String layer_name = net.node(layer_index).name();
333     StrIntVector layers = getNextLayers(net, layer_name);
334
335     String removed_layer_input = net.node(layer_index).input(input_blob_index);
336
337     for (size_t i = 0; i < layers.size(); i++)
338     {
339         tensorflow::NodeDef* layer = net.mutable_node(layers[i].second);
340         for (int input_id = 0; input_id < layer->input_size(); input_id++) {
341                 String input_op_name = layer->input(input_id);
342
343                 if (input_op_name == layer_name) {
344                     layer->set_input(input_id, removed_layer_input);
345                 }
346         }
347     }
348
349     if (remove_from_net)
350         net.mutable_node()->DeleteSubrange(layer_index, 1);
351 }
352
353 class TFImporter {
354 public:
355     TFImporter(const char *model, const char *config = NULL);
356     TFImporter(const char *dataModel, size_t lenModel,
357                const char *dataConfig = NULL, size_t lenConfig = 0);
358
359     void populateNet(Net dstNet);
360
361 private:
362     void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob);
363
364     void connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
365                  const int input_layer_id, const int input_blob_id);
366     void connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
367                            const int input_layer_id, const int input_blobs_count);
368     const tensorflow::TensorProto& getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
369                                                 int input_blob_index = -1, int* actual_inp_blob_idx = 0);
370
371
372     // Binary serialized TensorFlow graph includes weights.
373     tensorflow::GraphDef netBin;
374     // Optional text definition of TensorFlow graph. More flexible than binary format
375     // and may be used to build the network using binary format only as a weights storage.
376     // This approach is similar to Caffe's `.prorotxt` and `.caffemodel`.
377     tensorflow::GraphDef netTxt;
378 };
379
380 TFImporter::TFImporter(const char *model, const char *config)
381 {
382     if (model && model[0])
383         ReadTFNetParamsFromBinaryFileOrDie(model, &netBin);
384     if (config && config[0])
385         ReadTFNetParamsFromTextFileOrDie(config, &netTxt);
386 }
387
388 TFImporter::TFImporter(const char *dataModel, size_t lenModel,
389                        const char *dataConfig, size_t lenConfig)
390 {
391     if (dataModel != NULL && lenModel > 0)
392         ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin);
393     if (dataConfig != NULL && lenConfig > 0)
394         ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt);
395 }
396
397 void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
398 {
399     MatShape shape;
400     blobShapeFromTensor(tensor, shape);
401     int dims = (int)shape.size();
402
403     // TODO: other blob types
404     CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||
405               tensor.dtype() == tensorflow::DT_HALF);
406     CV_Assert(dims == 4);
407
408     // REORDER kernel HWIO to OIHW
409     swap(shape[0], shape[2]); // IWHO
410     swap(shape[1], shape[3]); // IOHW
411     swap(shape[0], shape[1]); // OIHW
412
413     dstBlob.create(shape, CV_32F);
414
415     Mat tensorContent = getTensorContent(tensor);
416     int size = tensorContent.total();
417     CV_Assert(size == (int)dstBlob.total());
418
419     float *dstData = dstBlob.ptr<float>();
420     const float *data = reinterpret_cast<const float*>(tensorContent.data);
421
422     int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3];
423     int total = out_c*input_c*height*width;
424     for(int i_oc = 0; i_oc < out_c; i_oc++) {
425         for(int i_ic = 0; i_ic < input_c; i_ic++) {
426             for(int i_h = 0; i_h < height; i_h++) {
427                 for(int i_w = 0; i_w < width; i_w++) {
428                     int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w;
429                     int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc;
430                     CV_Assert(dst_i < total);
431                     CV_Assert(src_i < total);
432                    dstData[dst_i] = data[src_i];
433                 }
434             }
435         }
436     }
437 }
438
439 void TFImporter::connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
440              const int input_layer_id, const int input_blob_id)
441 {
442     std::map<String, int>::const_iterator it = layers_name_id_map.find(outPin.name);
443     if (it == layers_name_id_map.end())
444         CV_Error(Error::StsError, "Input layer not found: " + outPin.name);
445     network.connect(it->second, outPin.blobIndex, input_layer_id, input_blob_id);
446 }
447
448 void TFImporter::connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
449                      const int input_layer_id, const int input_blobs_count)
450 {
451     for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++)
452         connect(layer_id, network, outPin, input_layer_id, input_blob_id);
453 }
454
455 const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
456                                               int input_blob_index, int* actual_inp_blob_idx) {
457     if (input_blob_index == -1) {
458         for(int i = 0; i < layer.input_size(); i++) {
459             Pin input = parsePin(layer.input(i));
460             if (const_layers.find(input.name) != const_layers.end()) {
461                 if (input_blob_index != -1)
462                     CV_Error(Error::StsError, "More than one input is Const op");
463
464                 input_blob_index = i;
465             }
466         }
467     }
468
469     if (input_blob_index == -1)
470         CV_Error(Error::StsError, "Const input blob for weights not found");
471
472     Pin kernel_inp = parsePin(layer.input(input_blob_index));
473     if (const_layers.find(kernel_inp.name) == const_layers.end())
474         CV_Error(Error::StsError, "Const kernel input not found");
475     if (kernel_inp.blobIndex != 0)
476         CV_Error(Error::StsError, "Unsupported kernel input");
477
478     if(actual_inp_blob_idx) {
479         *actual_inp_blob_idx = input_blob_index;
480     }
481
482     int nodeIdx = const_layers.at(kernel_inp.name);
483     if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name)
484     {
485         return netBin.node(nodeIdx).attr().at("value").tensor();
486     }
487     else
488     {
489         CV_Assert(nodeIdx < netTxt.node_size(),
490                   netTxt.node(nodeIdx).name() == kernel_inp.name);
491         return netTxt.node(nodeIdx).attr().at("value").tensor();
492     }
493 }
494
495 static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& const_layers,
496                           std::set<String>& layers_to_ignore)
497 {
498     for (int li = 0; li < net.node_size(); li++)
499     {
500         const tensorflow::NodeDef &layer = net.node(li);
501         String name = layer.name();
502         String type = layer.op();
503
504         if (type == "Dequantize")
505         {
506             // Example of Dequantize node:
507             //   name: "conv2d_1/bias"
508             //   op: "Dequantize"
509             //   input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8)
510             //   input: "conv2d_1/bias_quantized_min"
511             //   input: "conv2d_1/bias_quantized_max"
512             //   attr { key: "T" value { type: DT_QUINT8 } }   (quantized type)
513             //   attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique)
514             CV_Assert(layer.input_size() == 3);
515             for (int i = 0; i < 3; ++i)
516                 CV_Assert(const_layers.find(layer.input(i)) != const_layers.end());
517             CV_Assert(hasLayerAttr(layer, "mode") &&
518                       getLayerAttr(layer, "mode").s() == "MIN_FIRST");
519
520             int tensorId = const_layers[layer.input(0)];
521             int minId = const_layers[layer.input(1)];
522             int maxId = const_layers[layer.input(2)];
523
524             tensorflow::TensorProto* tensor = net.mutable_node(tensorId)
525                                                 ->mutable_attr()->at("value")
526                                                  .mutable_tensor();
527             CV_Assert(tensor->dtype() == tensorflow::DT_QUINT8);
528
529             Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor());
530             Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor());
531             CV_Assert(qMin.total() == 1, qMin.type() == CV_32FC1,
532                       qMax.total() == 1, qMax.type() == CV_32FC1);
533
534             Mat content = getTensorContent(*tensor);
535
536             float minVal = qMin.at<float>(0);
537             float rangeScale = (qMax.at<float>(0) - minVal) / 255;
538             CV_Assert(rangeScale >= 0);
539             content.convertTo(content, CV_32FC1, rangeScale,
540                               rangeScale * cvRound(minVal / rangeScale));
541
542             tensor->set_dtype(tensorflow::DT_FLOAT);
543             tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
544
545             net.mutable_node(tensorId)->set_name(name);
546             CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
547             layers_to_ignore.insert(name);
548             continue;
549         }
550         else if (type != "Const")
551             continue;  // only Const parameters are supported
552
553         if (layer.attr().find("value") != layer.attr().end())
554         {
555             CV_Assert(const_layers.insert(std::make_pair(name, li)).second);
556         }
557         layers_to_ignore.insert(name);
558     }
559 }
560
561 // If all inputs of specific layer have the same data layout we can say that
562 // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
563 static int predictOutputDataLayout(const tensorflow::NodeDef& layer, const std::map<String, int>& data_layouts)
564 {
565     if (hasLayerAttr(layer, "data_format"))
566     {
567         std::string format = getLayerAttr(layer, "data_format").s();
568         if (format == "NHWC" || format == "channels_last")
569             return DATA_LAYOUT_NHWC;
570         else if (format == "NCHW" || format == "channels_first")
571             return DATA_LAYOUT_NCHW;
572         else
573             CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
574     }
575
576     // Determine layout by layer's inputs
577     int layout = DATA_LAYOUT_UNKNOWN;
578     std::map<String, int>::const_iterator it;
579     for (int i = 0, n = layer.input_size(); i < n; ++i)
580     {
581         it = data_layouts.find(layer.input(i).substr(0, layer.input(i).rfind(':')));
582         if (it != data_layouts.end())
583         {
584             if (it->second == DATA_LAYOUT_UNKNOWN)
585                 return DATA_LAYOUT_UNKNOWN;
586             else if (it->second != layout)
587             {
588                 if (layout == DATA_LAYOUT_UNKNOWN)
589                     layout = it->second;
590                 else
591                     return DATA_LAYOUT_UNKNOWN;
592             }
593         }
594     }
595     return layout;
596 }
597
598 void TFImporter::populateNet(Net dstNet)
599 {
600     RemoveIdentityOps(netBin);
601     RemoveIdentityOps(netTxt);
602
603     if (!netTxt.ByteSize())
604         simplifySubgraphs(netBin);
605
606     std::set<String> layers_to_ignore;
607
608     tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
609
610     int layersSize = net.node_size();
611
612     std::map<String, int> data_layouts;
613
614     // find all Const layers for params
615     std::map<String, int> value_id;
616     addConstNodes(netBin, value_id, layers_to_ignore);
617     addConstNodes(netTxt, value_id, layers_to_ignore);
618
619     std::map<String, int> layer_id;
620
621     for (int li = 0; li < layersSize; li++)
622     {
623         tensorflow::NodeDef layer = net.node(li);
624         String name = layer.name();
625         String type = layer.op();
626         LayerParams layerParams;
627
628         if(layers_to_ignore.find(name) != layers_to_ignore.end())
629             continue;
630
631         data_layouts[name] = predictOutputDataLayout(layer, data_layouts);
632
633         if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative")
634         {
635             // The first node of dilated convolution subgraph.
636             // Extract input node, dilation rate and paddings.
637             std::string input = layer.input(0);
638             if (type == "SpaceToBatchND")
639             {
640                 // op: "SpaceToBatchND"
641                 // input: "input"
642                 // input: "SpaceToBatchND/block_shape"
643                 // input: "SpaceToBatchND/paddings"
644                 CV_Assert(layer.input_size() == 3);
645
646                 DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
647                 CV_Assert(dilation.size() == 2 && dilation.get<int>(0) == dilation.get<int>(1));
648                 layerParams.set("dilation", dilation.get<int>(0));
649
650                 Mat paddings;
651                 parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
652
653                 // paddings is a 2x2 matrix: [[top, bot], [left, right]]
654                 layerParams.set("pad_h", paddings.at<float>(0));
655                 layerParams.set("pad_w", paddings.at<float>(2));
656
657                 StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
658                 CV_Assert(next_layers.size() == 1);
659                 layer = net.node(next_layers[0].second);
660                 layers_to_ignore.insert(next_layers[0].first);
661                 name = layer.name();
662                 type = layer.op();
663             }
664
665             layerParams.set("bias_term", false);
666             layerParams.blobs.resize(1);
667
668             StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
669             if (next_layers.size() == 1) {
670                 layerParams.set("bias_term", true);
671                 layerParams.blobs.resize(2);
672
673                 int weights_layer_index = next_layers[0].second;
674
675                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
676                 ExcludeLayer(net, weights_layer_index, 0, false);
677                 layers_to_ignore.insert(next_layers[0].first);
678             }
679
680             kernelFromTensor(getConstBlob(layer, value_id), layerParams.blobs[0]);
681             int* kshape = layerParams.blobs[0].size.p;
682             if (type == "DepthwiseConv2dNative")
683             {
684                 const int chMultiplier = kshape[0];
685                 const int inCh = kshape[1];
686                 const int height = kshape[2];
687                 const int width = kshape[3];
688
689                 Mat copy = layerParams.blobs[0].clone();
690                 float* src = (float*)copy.data;
691                 float* dst = (float*)layerParams.blobs[0].data;
692                 for (int i = 0; i < chMultiplier; ++i)
693                     for (int j = 0; j < inCh; ++j)
694                         for (int s = 0; s < height * width; ++s)
695                             {
696                                 int src_i = (i * inCh + j) * height * width + s;
697                                 int dst_i = (j * chMultiplier + i) * height* width + s;
698                                 dst[dst_i] = src[src_i];
699                             }
700                 // TODO Use reshape instead
701                 kshape[0] = inCh * chMultiplier;
702                 kshape[1] = 1;
703                 size_t* kstep = layerParams.blobs[0].step.p;
704                 kstep[0] = kstep[1]; // fix steps too
705             }
706             layerParams.set("kernel_h", kshape[2]);
707             layerParams.set("kernel_w", kshape[3]);
708             layerParams.set("num_output", kshape[0]);
709
710             setStrides(layerParams, layer);
711             setPadding(layerParams, layer);
712
713             // The final node of dilated convolution subgraph.
714             next_layers = getNextLayers(net, name, "BatchToSpaceND");
715             if (!next_layers.empty())
716             {
717                 layerParams.set("pad_mode", "");  // We use padding values.
718                 CV_Assert(next_layers.size() == 1);
719                 ExcludeLayer(net, next_layers[0].second, 0, false);
720                 layers_to_ignore.insert(next_layers[0].first);
721             }
722
723             int id = dstNet.addLayer(name, "Convolution", layerParams);
724             layer_id[name] = id;
725
726             // one input only
727             connect(layer_id, dstNet, parsePin(input), id, 0);
728
729             if (data_layouts[name] == DATA_LAYOUT_UNKNOWN)
730                 data_layouts[name] = DATA_LAYOUT_NHWC;
731         }
732         else if (type == "BiasAdd" || type == "Add")
733         {
734             bool haveConst = false;
735             for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
736             {
737                 Pin input = parsePin(layer.input(ii));
738                 haveConst = value_id.find(input.name) != value_id.end();
739             }
740             CV_Assert(!haveConst || layer.input_size() == 2);
741
742             if (haveConst)
743             {
744                 layerParams.blobs.resize(1);
745                 blobFromTensor(getConstBlob(layer, value_id), layerParams.blobs[0]);
746
747                 int id = dstNet.addLayer(name, "Shift", layerParams);
748                 layer_id[name] = id;
749
750                 // one input only
751                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
752             }
753             else
754             {
755                 layerParams.set("operation", "sum");
756                 int id = dstNet.addLayer(name, "Eltwise", layerParams);
757                 layer_id[name] = id;
758
759                 for (int ii = 0; ii < layer.input_size(); ii++)
760                 {
761                     Pin inp = parsePin(layer.input(ii));
762                     if (layer_id.find(inp.name) == layer_id.end())
763                         CV_Error(Error::StsError, "Input layer not found: " + inp.name);
764                     dstNet.connect(layer_id.at(inp.name), inp.blobIndex, id, ii);
765                 }
766             }
767         }
768         else if (type == "MatMul")
769         {
770             CV_Assert(layer.input_size() == 2);
771
772             layerParams.set("bias_term", false);
773             layerParams.blobs.resize(1);
774
775             StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
776             if (next_layers.empty())
777             {
778                 next_layers = getNextLayers(net, name, "Add");
779             }
780             if (next_layers.size() == 1) {
781                 layerParams.set("bias_term", true);
782                 layerParams.blobs.resize(2);
783
784                 int weights_layer_index = next_layers[0].second;
785                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
786                 ExcludeLayer(net, weights_layer_index, 0, false);
787                 layers_to_ignore.insert(next_layers[0].first);
788             }
789
790             int kernel_blob_index = -1;
791             blobFromTensor(getConstBlob(layer, value_id, -1, &kernel_blob_index), layerParams.blobs[0]);
792
793             if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed
794                 Mat data = layerParams.blobs[0].t();
795                 layerParams.blobs[0] = data.clone();
796             }
797
798             layerParams.set("num_output", layerParams.blobs[0].size[0]);
799
800             int id = dstNet.addLayer(name, "InnerProduct", layerParams);
801             layer_id[name] = id;
802
803             // one input only
804             int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
805             connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
806             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
807         }
808         else if (type == "Reshape")
809         {
810             Pin inpId = parsePin(layer.input(0));
811             Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));
812
813             if (newShape.total() != 4 && data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
814             {
815                 LayerParams permLP;
816                 int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
817                 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
818
819                 std::string permName = name + "/nchw";
820                 CV_Assert(layer_id.find(permName) == layer_id.end());
821                 int permId = dstNet.addLayer(permName, "Permute", permLP);
822                 layer_id[permName] = permId;
823                 connect(layer_id, dstNet, inpId, permId, 0);
824                 inpId = Pin(permName);
825             }
826             else if (newShape.total() == 4 && data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
827             {
828                 // NHWC->NCHW
829                 std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
830                 std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));
831             }
832             layerParams.set("dim", DictValue::arrayInt<int*>(newShape.ptr<int>(), newShape.total()));
833
834             int id = dstNet.addLayer(name, "Reshape", layerParams);
835             layer_id[name] = id;
836
837             // one input only
838             connect(layer_id, dstNet, inpId, id, 0);
839         }
840         else if (type == "Flatten" || type == "Squeeze")
841         {
842             Pin inpId = parsePin(layer.input(0));
843             int inpLayout = data_layouts[layer.input(0)];
844             if (type == "Squeeze")
845             {
846                 CV_Assert(hasLayerAttr(layer, "squeeze_dims"));
847                 const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims");
848                 if (inpLayout == DATA_LAYOUT_NHWC)
849                 {
850                     if (dims.list().i_size() != 2 || dims.list().i(0) != 1 || dims.list().i(1) != 2)
851                         CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
852                 }
853                 else if (inpLayout == DATA_LAYOUT_NCHW)
854                 {
855                     if (dims.list().i_size() != 2 || dims.list().i(0) != 2 || dims.list().i(1) != 3)
856                         CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
857                 }
858                 else
859                     CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
860             }
861             if (inpLayout == DATA_LAYOUT_NHWC)
862             {
863                 LayerParams permLP;
864                 int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
865                 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
866
867                 std::string permName = name + "/nchw";
868                 CV_Assert(layer_id.find(permName) == layer_id.end());
869                 int permId = dstNet.addLayer(permName, "Permute", permLP);
870                 layer_id[permName] = permId;
871                 connect(layer_id, dstNet, inpId, permId, 0);
872                 inpId = Pin(permName);
873             }
874             int id = dstNet.addLayer(name, "Flatten", layerParams);
875             layer_id[name] = id;
876             connect(layer_id, dstNet, inpId, id, 0);
877             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
878         }
879         else if (type == "Transpose")
880         {
881             Mat perm = getTensorContent(getConstBlob(layer, value_id, 1));
882             CV_Assert(perm.type() == CV_32SC1);
883             int* permData = (int*)perm.data;
884             if (perm.total() == 4)
885             {
886                 // Only NHWC <-> NCHW permutations are allowed. OpenCV is always
887                 // keep NCHW layout this way.
888                 if (data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
889                 {
890                     if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
891                     {
892                         // in TensorFlow: NHWC->NCHW
893                         // in OpenCV: NCHW->NCHW
894                         data_layouts[name] = DATA_LAYOUT_NCHW;
895                     }
896                     else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
897                     {
898                         // in TensorFlow: NHWC->NHWC
899                         // in OpenCV: NCHW->NCHW
900                         data_layouts[name] = DATA_LAYOUT_NHWC;
901                     }
902                     else
903                         CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
904                 }
905                 else if (data_layouts[layer.input(0)] == DATA_LAYOUT_NCHW)
906                 {
907                     if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
908                     {
909                         // in TensorFlow: NCHW->NHWC
910                         // in OpenCV: NCHW->NCHW
911                         data_layouts[name] = DATA_LAYOUT_NHWC;
912                     }
913                     else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
914                     {
915                         // in TensorFlow: NCHW->NCHW
916                         // in OpenCV: NCHW->NCHW
917                         data_layouts[name] = DATA_LAYOUT_NCHW;
918                     }
919                     else
920                         CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
921                 }
922                 int id = dstNet.addLayer(name, "Identity", layerParams);
923                 layer_id[name] = id;
924                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
925             }
926             else
927             {
928                 layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
929
930                 int id = dstNet.addLayer(name, "Permute", layerParams);
931                 layer_id[name] = id;
932
933                 // one input only
934                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
935                 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
936             }
937         }
938         else if (type == "Const")
939         {
940         }
941         else if (type == "LRN")
942         {
943             if(hasLayerAttr(layer, "alpha")) {
944                 layerParams.set("alpha", getLayerAttr(layer, "alpha").f());
945             }
946             if(hasLayerAttr(layer, "beta")) {
947                 layerParams.set("beta", getLayerAttr(layer, "beta").f());
948             }
949             if(hasLayerAttr(layer, "depth_radius")) {
950                 int radius = (int)getLayerAttr(layer, "depth_radius").i();
951                 layerParams.set("local_size", 2*radius + 1);
952             }
953             if(hasLayerAttr(layer, "bias")) {
954                 layerParams.set("bias", getLayerAttr(layer, "bias").f());
955             }
956             layerParams.set("norm_by_size", false);
957
958             int id = dstNet.addLayer(name, "LRN", layerParams);
959             layer_id[name] = id;
960
961             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
962         }
963         else if (type == "Concat" || type == "ConcatV2")
964         {
965             int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
966             int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
967             layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW(axis) : axis);
968
969             int id = dstNet.addLayer(name, "Concat", layerParams);
970             layer_id[name] = id;
971
972
973             int from = (type == "Concat" ? 1 : 0);
974             int to = (type == "Concat" ? layer.input_size() : layer.input_size() - 1);
975
976             // input(0) or input(n-1) is concat_dim
977             for (int ii = from; ii < to; ii++)
978             {
979                 Pin inp = parsePin(layer.input(ii));
980                 if (layer_id.find(inp.name) == layer_id.end())
981                     CV_Error(Error::StsError, "Input layer not found: " + inp.name);
982                 dstNet.connect(layer_id.at(inp.name), inp.blobIndex, id, ii - from);
983             }
984         }
985         else if (type == "MaxPool")
986         {
987             layerParams.set("pool", "max");
988
989             setKSize(layerParams, layer);
990             setStrides(layerParams, layer);
991             setPadding(layerParams, layer);
992
993             int id = dstNet.addLayer(name, "Pooling", layerParams);
994             layer_id[name] = id;
995
996             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
997         }
998         else if (type == "AvgPool")
999         {
1000             layerParams.set("pool", "ave");
1001             layerParams.set("ave_pool_padded_area", false);
1002
1003             setKSize(layerParams, layer);
1004             setStrides(layerParams, layer);
1005             setPadding(layerParams, layer);
1006
1007             int id = dstNet.addLayer(name, "Pooling", layerParams);
1008             layer_id[name] = id;
1009
1010             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1011         }
1012         else if (type == "Placeholder")
1013         {
1014             std::vector<String> netInputs(1);
1015             netInputs[0] = name;
1016             layer_id[name] = 0;
1017             dstNet.setInputsNames(netInputs);
1018         }
1019         else if (type == "Split") {
1020             // TODO: determining axis index remapping by input dimensions order of input blob
1021             // TODO: slicing input may be Const op
1022             // TODO: slicing kernels for convolutions - in current implementation it is impossible
1023             // TODO: add parsing num of slices parameter
1024             CV_Assert(layer.input_size() == 2);
1025             // num_split
1026             // 1st blob is dims tensor
1027             int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
1028             layerParams.set("axis", toNCHW(axis));
1029
1030             int id = dstNet.addLayer(name, "Slice", layerParams);
1031             layer_id[name] = id;
1032
1033             // one input only
1034             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1035         }
1036         else if (type == "Slice")
1037         {
1038             // op: "Slice"
1039             // input: "input_node"
1040             // input: "Slice/begin"
1041             // input: "Slice/size"
1042             CV_Assert(layer.input_size() == 3);
1043             Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));
1044             Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2));
1045             CV_Assert(!begins.empty(), !sizes.empty(), begins.type() == CV_32SC1,
1046                       sizes.type() == CV_32SC1);
1047
1048             if (begins.total() == 4)
1049             {
1050                 // Perhabs, we have an NHWC order. Swap it to NCHW.
1051                 std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
1052                 std::swap(*begins.ptr<int32_t>(0, 1), *begins.ptr<int32_t>(0, 2));
1053                 std::swap(*sizes.ptr<int32_t>(0, 2), *sizes.ptr<int32_t>(0, 3));
1054                 std::swap(*sizes.ptr<int32_t>(0, 1), *sizes.ptr<int32_t>(0, 2));
1055             }
1056             layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
1057             layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total()));
1058
1059             int id = dstNet.addLayer(name, "Slice", layerParams);
1060             layer_id[name] = id;
1061
1062             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1063         }
1064         else if (type == "Mul")
1065         {
1066             bool haveConst = false;
1067             for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
1068             {
1069                 Pin input = parsePin(layer.input(ii));
1070                 haveConst = value_id.find(input.name) != value_id.end();
1071             }
1072             CV_Assert(!haveConst || layer.input_size() == 2);
1073
1074             if (haveConst)
1075             {
1076                 // Multiplication by constant.
1077                 CV_Assert(layer.input_size() == 2);
1078                 Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
1079                 CV_Assert(scaleMat.type() == CV_32FC1);
1080
1081                 int id;
1082                 if (scaleMat.total() == 1)  // is a scalar.
1083                 {
1084                     // Try to match with a LeakyRelu:
1085                     // node {
1086                     //   name: "LeakyRelu/mul"
1087                     //   op: "Mul"
1088                     //   input: "LeakyRelu/alpha"
1089                     //   input: "input"
1090                     // }
1091                     // node {
1092                     //   name: "LeakyRelu/Maximum"
1093                     //   op: "Maximum"
1094                     //   input: "LeakyRelu/mul"
1095                     //   input: "input"
1096                     // }
1097                     StrIntVector next_layers = getNextLayers(net, name, "Maximum");
1098                     if (!next_layers.empty())
1099                     {
1100                         int maximumLayerIdx = next_layers[0].second;
1101                         ExcludeLayer(net, maximumLayerIdx, 0, false);
1102                         layers_to_ignore.insert(next_layers[0].first);
1103
1104                         layerParams.set("negative_slope", scaleMat.at<float>(0));
1105                         id = dstNet.addLayer(name, "ReLU", layerParams);
1106                     }
1107                     else
1108                     {
1109                         // Just a multiplication.
1110                         layerParams.set("scale", scaleMat.at<float>(0));
1111                         id = dstNet.addLayer(name, "Power", layerParams);
1112                     }
1113                 }
1114                 else  // is a vector
1115                 {
1116                     layerParams.blobs.resize(1, scaleMat);
1117
1118                    StrIntVector next_layers = getNextLayers(net, name, "Add");
1119                    if (!next_layers.empty())
1120                    {
1121                        layerParams.set("bias_term", true);
1122                        layerParams.blobs.resize(2);
1123
1124                        int weights_layer_index = next_layers[0].second;
1125                        blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back());
1126                        ExcludeLayer(net, weights_layer_index, 0, false);
1127                        layers_to_ignore.insert(next_layers[0].first);
1128                    }
1129
1130                     id = dstNet.addLayer(name, "Scale", layerParams);
1131                 }
1132                 layer_id[name] = id;
1133
1134                 Pin inp0 = parsePin(layer.input(0));
1135                 if (layer_id.find(inp0.name) != layer_id.end())
1136                     // First operand is a constant.
1137                     connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1138                 else
1139                     connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1140             }
1141             else
1142             {
1143                 layerParams.set("operation", "prod");
1144                 int id = dstNet.addLayer(name, "Eltwise", layerParams);
1145                 layer_id[name] = id;
1146
1147                 for (int ii = 0; ii < layer.input_size(); ii++)
1148                 {
1149                     Pin inp = parsePin(layer.input(ii));
1150                     if (layer_id.find(inp.name) == layer_id.end())
1151                         CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1152                     dstNet.connect(layer_id.at(inp.name), inp.blobIndex, id, ii);
1153                 }
1154             }
1155         }
1156         else if (type == "Pad")
1157         {
1158             Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
1159             CV_Assert(paddings.type() == CV_32SC1);
1160             if (paddings.total() == 8)
1161             {
1162                 // Perhabs, we have NHWC padding dimensions order.
1163                 //  N    H    W    C
1164                 // 0 1  2 3  4 5  6 7
1165                 std::swap(*paddings.ptr<int32_t>(0, 2), *paddings.ptr<int32_t>(0, 6));
1166                 std::swap(*paddings.ptr<int32_t>(0, 3), *paddings.ptr<int32_t>(0, 7));
1167                 //  N    C    W    H
1168                 // 0 1  2 3  4 5  6 7
1169                 std::swap(*paddings.ptr<int32_t>(0, 4), *paddings.ptr<int32_t>(0, 6));
1170                 std::swap(*paddings.ptr<int32_t>(0, 5), *paddings.ptr<int32_t>(0, 7));
1171                 //  N    C    H    W
1172                 // 0 1  2 3  4 5  6 7
1173             }
1174             layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
1175
1176             int id = dstNet.addLayer(name, "Padding", layerParams);
1177             layer_id[name] = id;
1178
1179             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1180         }
1181         else if (type == "FusedBatchNorm")
1182         {
1183             // op: "FusedBatchNorm"
1184             // input: "input"
1185             // input: "BatchNorm/gamma"
1186             // input: "BatchNorm/beta"
1187             // input: "BatchNorm/moving_mean"
1188             // input: "BatchNorm/moving_variance"
1189             if (layer.input_size() != 5)
1190                 CV_Error(Error::StsNotImplemented,
1191                          "Expected gamma, beta, mean and std");
1192             Pin inpId = parsePin(layer.input(0));
1193
1194             bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
1195
1196             layerParams.blobs.resize(2);
1197
1198             const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1);
1199             if (!gammaTensor.tensor_content().empty())
1200             {
1201                 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1202                 layerParams.set("has_weight", true);
1203                 blobFromTensor(gammaTensor, layerParams.blobs.back());
1204             }
1205             else
1206                 layerParams.set("has_weight", false);
1207
1208             const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2);
1209             if (!betaTensor.tensor_content().empty())
1210             {
1211                 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1212                 layerParams.set("has_bias", true);
1213                 blobFromTensor(betaTensor, layerParams.blobs.back());
1214             }
1215             else
1216                 layerParams.set("has_bias", false);
1217
1218             Mat mean, std;
1219             if (isTraining)
1220             {
1221                 if (layerParams.blobs.size() == 2)
1222                     CV_Error(Error::StsNotImplemented, "Cannot determine number "
1223                              "of parameters for batch normalization layer.");
1224                 mean = Mat::zeros(1, layerParams.blobs[3].total(), CV_32F);
1225                 std = Mat::ones(1, layerParams.blobs[3].total(), CV_32F);
1226
1227                 // Add an extra layer: Mean-Variance normalization
1228                 LayerParams mvnParams;
1229                 std::string mvnName = name + "/MVN";
1230                 CV_Assert(layer_id.find(mvnName) == layer_id.end());
1231                 int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
1232                 layer_id[mvnName] = mvnId;
1233                 connect(layer_id, dstNet, inpId, mvnId, 0);
1234                 inpId = Pin(mvnName);
1235             }
1236             else
1237             {
1238                 blobFromTensor(getConstBlob(layer, value_id, 3), mean);
1239                 blobFromTensor(getConstBlob(layer, value_id, 4), std);
1240             }
1241             layerParams.blobs[0] = mean;
1242             layerParams.blobs[1] = std;
1243
1244             if (hasLayerAttr(layer, "epsilon"))
1245                 layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
1246
1247             int id = dstNet.addLayer(name, "BatchNorm", layerParams);
1248             layer_id[name] = id;
1249
1250             // one input only
1251             connect(layer_id, dstNet, inpId, id, 0);
1252         }
1253         else if (type == "Conv2DBackpropInput")
1254         {
1255             // op: "Conv2DBackpropInput"
1256             // input: "conv2d_transpose/output_shape"
1257             // input: "weights"
1258             // input: "input"
1259             if (layer.input_size() != 3)
1260                 CV_Error(Error::StsNotImplemented,
1261                          "Expected output shape, weights and input nodes");
1262
1263             layerParams.set("bias_term", false);
1264             layerParams.blobs.resize(1);
1265
1266             StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
1267             if (next_layers.size() == 1)
1268             {
1269                 layerParams.set("bias_term", true);
1270                 layerParams.blobs.resize(2);
1271
1272                 int weights_layer_index = next_layers[0].second;
1273
1274                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1275                 ExcludeLayer(net, weights_layer_index, 0, false);
1276                 layers_to_ignore.insert(next_layers[0].first);
1277             }
1278
1279             kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
1280
1281             const int* kshape = layerParams.blobs[0].size.p;
1282             const int kernelH = kshape[2];
1283             const int kernelW = kshape[3];
1284             layerParams.set("kernel_h", kernelH);
1285             layerParams.set("kernel_w", kernelW);
1286             layerParams.set("num_output", kshape[1]);
1287
1288             setStrides(layerParams, layer);
1289             setPadding(layerParams, layer);
1290
1291             // For convolution layer, output shape computes as
1292             // o = 1 + (i - k + 2*p) / s
1293             // i - input size, o - output size, k - kernel size, p - pad, s - stride
1294             // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
1295             // considering that k is odd.
1296             // SAME:  o = 1 + (i - 1) / s
1297             // VALID: o = 1 + i / s
1298             // Deconvolution's layer output shape computes as
1299             // SAME:  o = 1 + (i - 1)*s
1300             // VALID: o = (i - 1)*s
1301             // If output_shape differs from formulas above then adjust padding is applied.
1302
1303             const int strideY = layerParams.get<int>("stride_h");
1304             const int strideX = layerParams.get<int>("stride_w");
1305             Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
1306             const int outH = outShape.at<int>(2);
1307             const int outW = outShape.at<int>(1);
1308             if (layerParams.get<String>("pad_mode") == "SAME")
1309             {
1310                 layerParams.set("adj_w", (outW - 1) % strideX);
1311                 layerParams.set("adj_h", (outH - 1) % strideY);
1312             }
1313             else if (layerParams.get<String>("pad_mode") == "VALID")
1314             {
1315                 layerParams.set("adj_w", (outW - kernelW) % strideX);
1316                 layerParams.set("adj_h", (outH - kernelH) % strideY);
1317             }
1318             int id = dstNet.addLayer(name, "Deconvolution", layerParams);
1319             layer_id[name] = id;
1320
1321             // one input only
1322             connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
1323         }
1324         else if (type == "BlockLSTM")
1325         {
1326             // op: "BlockLSTM"
1327             // input: "lstm_block_wrapper/ToInt64/x"  (ignore, number of time stamps)
1328             // input: "input"
1329             // input: "lstm_block_wrapper/zeros"      (ignore)
1330             // input: "lstm_block_wrapper/zeros"      (ignore)
1331             // input: "lstm_block_wrapper/kernel"
1332             // input: "lstm_block_wrapper/w_i_diag"
1333             // input: "lstm_block_wrapper/w_f_diag"
1334             // input: "lstm_block_wrapper/w_o_diag"
1335             // input: "lstm_block_wrapper/bias"
1336             if (layer.input_size() != 9)
1337                 CV_Error(Error::StsNotImplemented, "Unexpected number of input nodes");
1338
1339             if (hasLayerAttr(layer, "forget_bias"))
1340                 layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f());
1341
1342             if (hasLayerAttr(layer, "forget_bias"))
1343             {
1344                 float cellClip = getLayerAttr(layer, "cell_clip").f();
1345                 // Cell clip disabled if it's negative.
1346                 if (cellClip >= 0)
1347                 {
1348                     layerParams.set("use_cell_clip", true);
1349                     layerParams.set("cell_clip", cellClip);
1350                 }
1351             }
1352
1353             Mat W, Wh, Wx, b;
1354             blobFromTensor(getConstBlob(layer, value_id, 4), W);
1355             blobFromTensor(getConstBlob(layer, value_id, 8), b);
1356             const int outSize = W.cols / 4;
1357
1358             // IGFO->IFOG
1359             float* weightData = (float*)W.data;
1360             for (int i = 0; i < W.rows; ++i)
1361                 for (int j = 0; j < outSize; ++j)
1362                 {
1363                     std::swap(weightData[i * W.cols + 1 * outSize + j],
1364                               weightData[i * W.cols + 2 * outSize + j]);
1365                     std::swap(weightData[i * W.cols + 2 * outSize + j],
1366                               weightData[i * W.cols + 3 * outSize + j]);
1367                 }
1368             Wx = W.rowRange(0, W.rows - outSize).t();
1369             Wh = W.rowRange(W.rows - outSize, W.rows).t();
1370
1371             layerParams.blobs.resize(3);
1372             layerParams.blobs[0] = Wh;
1373             layerParams.blobs[1] = Wx;
1374             layerParams.blobs[2] = b;
1375
1376             if (hasLayerAttr(layer, "use_peephole"))
1377             {
1378                 bool usePeephole = getLayerAttr(layer, "use_peephole").b();
1379                 if (usePeephole)
1380                 {
1381                     layerParams.set("use_peephole", true);
1382                     layerParams.blobs.resize(6);
1383                     for (int i = 0; i < 3; ++i)
1384                     {
1385                         Mat w;
1386                         blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
1387                         w = w.reshape(1, w.total());  // Single column.
1388                         w = Mat::diag(w);  // Make a diagonal matrix.
1389                         layerParams.blobs[3 + i] = w;
1390                     }
1391                 }
1392             }
1393
1394             int id = dstNet.addLayer(name, "LSTM", layerParams);
1395             layer_id[name] = id;
1396
1397             // one input only
1398             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1399             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1400         }
1401         else if (type == "ResizeNearestNeighbor")
1402         {
1403             Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
1404             CV_Assert(outSize.type() == CV_32SC1, outSize.total() == 2);
1405
1406             layerParams.set("height", outSize.at<int>(0, 0));
1407             layerParams.set("width", outSize.at<int>(0, 1));
1408
1409             if (hasLayerAttr(layer, "align_corners"))
1410                 layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());
1411
1412             int id = dstNet.addLayer(name, "ResizeNearestNeighbor", layerParams);
1413             layer_id[name] = id;
1414
1415             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1416         }
1417         else if (type == "L2Normalize")
1418         {
1419             // op: "L2Normalize"
1420             // input: "input"
1421             // input: "reduction_indices" (axis)
1422             CV_Assert(layer.input_size() == 2);
1423             Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
1424             CV_Assert(reductionIndices.type() == CV_32SC1);
1425
1426             const int numAxes = reductionIndices.total();
1427             if (data_layouts[name] == DATA_LAYOUT_NHWC)
1428                 for (int i = 0; i < numAxes; ++i)
1429                     reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
1430
1431             cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
1432             for (int i = 1; i < numAxes; ++i)
1433             {
1434                 CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
1435                 // Axes have the same sign.
1436                 CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
1437             }
1438             layerParams.set("start_axis", reductionIndices.at<int>(0));
1439             layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
1440
1441             int id = dstNet.addLayer(name, "Normalize", layerParams);
1442             layer_id[name] = id;
1443             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1444         }
1445         else if (type == "PriorBox")
1446         {
1447             if (hasLayerAttr(layer, "min_size"))
1448                 layerParams.set("min_size", getLayerAttr(layer, "min_size").i());
1449             if (hasLayerAttr(layer, "max_size"))
1450                 layerParams.set("max_size", getLayerAttr(layer, "max_size").i());
1451             if (hasLayerAttr(layer, "flip"))
1452                 layerParams.set("flip", getLayerAttr(layer, "flip").b());
1453             if (hasLayerAttr(layer, "clip"))
1454                 layerParams.set("clip", getLayerAttr(layer, "clip").b());
1455             if (hasLayerAttr(layer, "offset"))
1456                 layerParams.set("offset", getLayerAttr(layer, "offset").f());
1457             if (hasLayerAttr(layer, "step"))
1458                 layerParams.set("step", getLayerAttr(layer, "step").f());
1459
1460             const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
1461                                               "width", "height"};
1462             for (int i = 0; i < 5; ++i)
1463             {
1464                 if (hasLayerAttr(layer, paramNames[i]))
1465                 {
1466                     Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor());
1467                     layerParams.set(paramNames[i],
1468                                     DictValue::arrayReal<float*>((float*)values.data, values.total()));
1469                 }
1470             }
1471             int id = dstNet.addLayer(name, "PriorBox", layerParams);
1472             layer_id[name] = id;
1473             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1474             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1475             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1476         }
1477         else if (type == "DetectionOutput")
1478         {
1479             // op: "DetectionOutput"
1480             // input_0: "locations"
1481             // input_1: "classifications"
1482             // input_2: "prior_boxes"
1483             if (hasLayerAttr(layer, "num_classes"))
1484                 layerParams.set("num_classes", getLayerAttr(layer, "num_classes").i());
1485             if (hasLayerAttr(layer, "share_location"))
1486                 layerParams.set("share_location", getLayerAttr(layer, "share_location").b());
1487             if (hasLayerAttr(layer, "background_label_id"))
1488                 layerParams.set("background_label_id", getLayerAttr(layer, "background_label_id").i());
1489             if (hasLayerAttr(layer, "nms_threshold"))
1490                 layerParams.set("nms_threshold", getLayerAttr(layer, "nms_threshold").f());
1491             if (hasLayerAttr(layer, "top_k"))
1492                 layerParams.set("top_k", getLayerAttr(layer, "top_k").i());
1493             if (hasLayerAttr(layer, "code_type"))
1494                 layerParams.set("code_type", getLayerAttr(layer, "code_type").s());
1495             if (hasLayerAttr(layer, "keep_top_k"))
1496                 layerParams.set("keep_top_k", getLayerAttr(layer, "keep_top_k").i());
1497             if (hasLayerAttr(layer, "confidence_threshold"))
1498                 layerParams.set("confidence_threshold", getLayerAttr(layer, "confidence_threshold").f());
1499             if (hasLayerAttr(layer, "loc_pred_transposed"))
1500                 layerParams.set("loc_pred_transposed", getLayerAttr(layer, "loc_pred_transposed").b());
1501
1502             int id = dstNet.addLayer(name, "DetectionOutput", layerParams);
1503             layer_id[name] = id;
1504             for (int i = 0; i < 3; ++i)
1505                 connect(layer_id, dstNet, parsePin(layer.input(i)), id, i);
1506             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1507         }
1508         else if (type == "Softmax")
1509         {
1510             if (hasLayerAttr(layer, "axis"))
1511                 layerParams.set("axis", getLayerAttr(layer, "axis").i());
1512
1513             int id = dstNet.addLayer(name, "Softmax", layerParams);
1514             layer_id[name] = id;
1515             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1516         }
1517         else if (type == "Mean")
1518         {
1519             Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));
1520             CV_Assert(indices.type() == CV_32SC1);
1521
1522             if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
1523                 CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
1524
1525             layerParams.set("pool", "ave");
1526             layerParams.set("global_pooling", true);
1527
1528             int id = dstNet.addLayer(name, "Pooling", layerParams);
1529             layer_id[name] = id;
1530
1531             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1532
1533             // There are two attributes, "keepdims" and a deprecated "keep_dims".
1534             bool keepDims = false;
1535             if (hasLayerAttr(layer, "keepdims"))
1536                 keepDims = getLayerAttr(layer, "keepdims").b();
1537             else if (hasLayerAttr(layer, "keep_dims"))
1538                 keepDims = getLayerAttr(layer, "keep_dims").b();
1539
1540             if (!keepDims)
1541             {
1542                 LayerParams flattenLp;
1543                 std::string flattenName = name + "/flatten";
1544                 CV_Assert(layer_id.find(flattenName) == layer_id.end());
1545                 int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
1546                 layer_id[flattenName] = flattenId;
1547                 connect(layer_id, dstNet, Pin(name), flattenId, 0);
1548             }
1549         }
1550         else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
1551                  type == "Relu" || type == "Elu" ||
1552                  type == "Identity" || type == "Relu6")
1553         {
1554             std::string dnnType = type;
1555             if (type == "Abs") dnnType = "AbsVal";
1556             else if (type == "Tanh") dnnType = "TanH";
1557             else if (type == "Relu") dnnType = "ReLU";
1558             else if (type == "Relu6") dnnType = "ReLU6";
1559             else if (type == "Elu") dnnType = "ELU";
1560
1561             int id = dstNet.addLayer(name, dnnType, layerParams);
1562             layer_id[name] = id;
1563             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1564         }
1565         else
1566         {
1567             printLayerAttr(layer);
1568             CV_Error_(Error::StsError, ("Unknown layer type %s in op %s", type.c_str(), name.c_str()));
1569         }
1570     }
1571 }
1572
1573 } // namespace
1574
1575 #endif //HAVE_PROTOBUF
1576
1577 Net readNetFromTensorflow(const String &model, const String &config)
1578 {
1579     TFImporter importer(model.c_str(), config.c_str());
1580     Net net;
1581     importer.populateNet(net);
1582     return net;
1583 }
1584
1585 Net readNetFromTensorflow(const char* bufferModel, size_t lenModel,
1586                           const char* bufferConfig, size_t lenConfig)
1587 {
1588     TFImporter importer(bufferModel, lenModel, bufferConfig, lenConfig);
1589     Net net;
1590     importer.populateNet(net);
1591     return net;
1592 }
1593
1594 CV__DNN_EXPERIMENTAL_NS_END
1595 }} // namespace