modules/dnn/src/tensorflow/tf_importer.cpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
   6 // Third party copyrights are property of their respective owners.
   7
   8 /*
   9 Implementation of Tensorflow models parser
  10 */
  11
  12 #include "../precomp.hpp"
  13
  14 #include <opencv2/core/utils/logger.defines.hpp>
  15 #undef CV_LOG_STRIP_LEVEL
  16 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
  17 #include <opencv2/core/utils/logger.hpp>
  18
  19 #ifdef HAVE_PROTOBUF
  20 #include "tf_io.hpp"
  21
  22 #include <iostream>
  23 #include <fstream>
  24 #include <algorithm>
  25 #include <string>
  26 #include <queue>
  27 #include "tf_graph_simplifier.hpp"
  28 #endif
  29
  30 namespace cv {
  31 namespace dnn {
  32 CV__DNN_INLINE_NS_BEGIN
  33
  34 #if HAVE_PROTOBUF
  35
  36 using ::google::protobuf::RepeatedField;
  37 using ::google::protobuf::RepeatedPtrField;
  38 using ::google::protobuf::Message;
  39 using ::google::protobuf::Descriptor;
  40 using ::google::protobuf::FieldDescriptor;
  41 using ::google::protobuf::Reflection;
  42
  43 namespace
  44 {
  45
  46 static int toNCHW(int idx)
  47 {
  48     CV_Assert(-4 <= idx && idx < 4);
  49     if (idx == 0) return 0;
  50     else if (idx > 0) return idx % 3 + 1;
  51     else return (4 + idx) % 3 + 1;
  52 }
  53
  54 static int toNCDHW(int idx)
  55 {
  56     CV_Assert(-5 <= idx && idx < 5);
  57     if (idx == 0) return 0;
  58     else if (idx > 0) return idx % 4 + 1;
  59     else return (5 + idx) % 4 + 1;
  60 }
  61
  62 // This values are used to indicate layer output's data layout where it's possible.
  63 enum DataLayout
  64 {
  65     DATA_LAYOUT_NHWC,
  66     DATA_LAYOUT_NCHW,
  67     DATA_LAYOUT_NDHWC,
  68     DATA_LAYOUT_UNKNOWN,
  69     DATA_LAYOUT_PLANAR  // 2-dimensional outputs (matmul, flatten, reshape to 2d)
  70 };
  71
  72 typedef std::vector<std::pair<String, int> > StrIntVector;
  73
  74 struct Pin
  75 {
  76     Pin(const std::string &_name, int _blobIndex = 0) :
  77         name(_name), blobIndex(_blobIndex) {}
  78
  79     Pin() :
  80         name(""), blobIndex(-1) {}
  81
  82     std::string name;
  83     int blobIndex;
  84 };
  85
  86 void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape)
  87 {
  88     shape.clear();
  89     if (tensor.has_tensor_shape())
  90     {
  91         const tensorflow::TensorShapeProto &_shape = tensor.tensor_shape();
  92         int i, n = _shape.dim_size();
  93         if (n)
  94         {
  95             shape.resize(n);
  96
  97             for (i = 0; i < n; i++)
  98                 shape[i] = (int)_shape.dim(i).size();
  99         }
 100         else
 101             shape.resize(1, 1);  // Scalar. // FIXIT: should be empty
 102     }
 103     else
 104     {
 105         CV_Error(Error::StsError, "Unknown shape of input tensor");
 106     }
 107 }
 108
 109 template <typename T>
 110 void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
 111 {
 112     MatShape shape;
 113     blobShapeFromTensor(tensor, shape);
 114     int dims = (int)shape.size();
 115
 116     if (dims == 4)
 117     {
 118         // REORDER blob NHWC to NCHW
 119         swap(shape[2], shape[3]); // NHCW
 120         swap(shape[1], shape[2]); // NCHW
 121     }
 122
 123     dstBlob.create(shape, CV_32F);
 124
 125     Mat tensorContent = getTensorContent(tensor, /*no copy*/false);
 126     int size = tensorContent.total();
 127     CV_Assert(size == (int)dstBlob.total());
 128
 129     float *dstData = dstBlob.ptr<float>();
 130     const T *data = reinterpret_cast<const T*>(tensorContent.data);
 131
 132     if (dims == 4)
 133     {
 134         int num = shape[0], channels = shape[1], height = shape[2], width = shape[3];
 135         int total = num*channels*height*width;
 136         for(int i_n = 0; i_n < shape[0]; i_n++) {
 137             for(int i_c = 0; i_c < shape[1]; i_c++) {
 138                 for(int i_h = 0; i_h < shape[2]; i_h++) {
 139                     for(int i_w = 0; i_w < shape[3]; i_w++) {
 140                        int dst_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w;
 141                        int src_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w;
 142
 143                        CV_Assert(dst_i < total);
 144                        CV_Assert(src_i < total);
 145
 146                        dstData[dst_i] = data[src_i];
 147                     }
 148                 }
 149             }
 150         }
 151     } else {
 152         for (int i = 0; i < size; i++)
 153             dstData[i] = data[i];
 154     }
 155 }
 156
 157 void blobFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
 158 {
 159     switch (tensor.dtype()) {
 160         case tensorflow::DT_FLOAT:
 161         case tensorflow::DT_HALF:
 162             parseTensor<float>(tensor, dstBlob);
 163             break;
 164         case tensorflow::DT_DOUBLE:
 165             parseTensor<double>(tensor, dstBlob);
 166             break;
 167         default:
 168             CV_Error(Error::StsError, "Tensor's data type is not supported");
 169             break;
 170     }
 171 }
 172
 173 #if 0
 174 void printList(const tensorflow::AttrValue::ListValue &val)
 175 {
 176     std::cout << "(";
 177     for (int i = 0; i < val.i_size(); i++)
 178         std::cout << " " << val.i(i);
 179     std::cout << " )";
 180 }
 181
 182 void printTensorShape(const tensorflow::TensorShapeProto &shape)
 183 {
 184     std::cout << "[ ";
 185     for (int d = 0; d < shape.dim_size(); d++)
 186         std::cout << shape.dim(d).name() <<
 187                      ":" << shape.dim(d).size() << " ";
 188     std::cout << "]";
 189 }
 190
 191 void printTensor(const tensorflow::TensorProto &tensor)
 192 {
 193     printTensorShape(tensor.tensor_shape());
 194
 195     if (tensor.tensor_content().empty())
 196         return;
 197
 198     switch (tensor.dtype())
 199     {
 200     case tensorflow::DT_FLOAT:
 201         {
 202             const float *data = reinterpret_cast<const float*>(tensor.tensor_content().c_str());
 203             int size = tensor.tensor_content().size() / sizeof(float);
 204             for (int i = 0; i < std::min(10, size); i++)
 205                 std::cout << " " << data[i];
 206             if (size > 10)
 207                 std::cout << " ... " << size - 10 << " more";
 208             break;
 209         }
 210     case tensorflow::DT_INT32:
 211         {
 212             const int *data = reinterpret_cast<const int*>(tensor.tensor_content().c_str());
 213             int size = tensor.tensor_content().size() / sizeof(int);
 214             for (int i = 0; i < std::min(10, size); i++)
 215                 std::cout << " " << data[i];
 216             if (size > 10)
 217                 std::cout << " ... " << size - 10 << " more";
 218             break;
 219         }
 220     default:
 221         CV_Error(Error::StsError, "Tensor type is not supported");
 222         break;
 223     }
 224 }
 225
 226 void printLayerAttr(const tensorflow::NodeDef &layer)
 227 {
 228     std::cout << std::endl << layer.name() << ":" << layer.op();
 229     for (int ii = 0; ii < layer.input_size(); ii++)
 230         std::cout << "(" << layer.input(ii) << ")";
 231     std::cout << std::endl;
 232     google::protobuf::Map<std::string, tensorflow::AttrValue> attr
 233             = layer.attr();
 234     for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
 235          ai != attr.end(); ++ai)
 236     {
 237         std::cout << ai->first << ":";
 238         if (ai->first == "dtype" || ai->first == "T")
 239             std::cout << ai->second.i();
 240         else if (ai->first == "padding")
 241             std::cout << ai->second.s();
 242         else if (ai->first == "transpose_a" || ai->first == "transpose_b")
 243             std::cout << ai->second.b();
 244         //            else if (ai->first == "shape")
 245         //              printTensorShape(ai->second.shape());
 246         else if (ai->first == "strides" || ai->first == "ksize")
 247             printList(ai->second.list());
 248         else
 249             printTensor(ai->second.tensor());
 250         std::cout << std::endl;
 251     }
 252 }
 253 #endif
 254
 255 bool hasLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
 256 {
 257     google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
 258     return attr.find(name) != attr.end();
 259 }
 260
 261 const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
 262 {
 263     return layer.attr().at(name);
 264 }
 265
 266 static DataLayout getDataLayout(const tensorflow::NodeDef& layer)
 267 {
 268     if (hasLayerAttr(layer, "data_format"))
 269     {
 270         std::string format = getLayerAttr(layer, "data_format").s();
 271         if (format == "NHWC" || format == "channels_last")
 272             return DATA_LAYOUT_NHWC;
 273         else if (format == "NCHW" || format == "channels_first")
 274             return DATA_LAYOUT_NCHW;
 275         else if (format == "NDHWC")
 276             return DATA_LAYOUT_NDHWC;
 277         else
 278             CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
 279     }
 280     return DATA_LAYOUT_UNKNOWN;
 281 }
 282
 283 static inline std::string getNodeName(const std::string& tensorName)
 284 {
 285     return tensorName.substr(0, tensorName.rfind(':'));
 286 }
 287
 288 static inline
 289 DataLayout getDataLayout(
 290         const std::string& layerName,
 291         const std::map<String, DataLayout>& data_layouts
 292 )
 293 {
 294     std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layerName));
 295     return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN;
 296 }
 297
 298 void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
 299 {
 300     if (hasLayerAttr(layer, "strides"))
 301     {
 302         const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
 303         int dimX, dimY, dimC, dimD;
 304         int layout = getDataLayout(layer);
 305         if (layout == DATA_LAYOUT_NCHW)
 306         {
 307             dimC = 1; dimY = 2; dimX = 3;
 308         }
 309         else if (layout == DATA_LAYOUT_NDHWC)
 310         {
 311             dimD = 1; dimY = 2; dimX = 3; dimC = 4;
 312         }
 313         else
 314         {
 315             dimY = 1; dimX = 2; dimC = 3;
 316         }
 317         if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
 318             val.list().i(0) != 1 || val.list().i(dimC) != 1)
 319             CV_Error(Error::StsError, "Unsupported strides");
 320         if (layout == DATA_LAYOUT_NDHWC) {
 321             int strides[] = {static_cast<int>(val.list().i(dimD)),
 322                              static_cast<int>(val.list().i(dimY)),
 323                              static_cast<int>(val.list().i(dimX))};
 324             layerParams.set("stride",  DictValue::arrayInt(strides, 3));
 325         }
 326         else
 327         {
 328             layerParams.set("stride_h", static_cast<int>(val.list().i(dimY)));
 329             layerParams.set("stride_w", static_cast<int>(val.list().i(dimX)));
 330         }
 331     }
 332 }
 333
 334 DictValue parseDims(const tensorflow::TensorProto &tensor) {
 335     MatShape shape;
 336     blobShapeFromTensor(tensor, shape);
 337     int dims = (int)shape.size();
 338
 339     CV_Assert(tensor.dtype() == tensorflow::DT_INT32);
 340     CV_Assert(dims == 1);
 341
 342     Mat values = getTensorContent(tensor);
 343     CV_Assert(values.type() == CV_32SC1);
 344     // TODO: add reordering shape if dims == 4
 345     return DictValue::arrayInt((int*)values.data, values.total());
 346 }
 347
 348 void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
 349 {
 350     if (hasLayerAttr(layer, "ksize"))
 351     {
 352         const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
 353         int dimX, dimY, dimC, dimD;
 354         int layout = getDataLayout(layer);
 355         if (layout == DATA_LAYOUT_NCHW)
 356         {
 357             dimC = 1; dimY = 2; dimX = 3;
 358         }
 359         else if (layout == DATA_LAYOUT_NDHWC)
 360         {
 361             dimD = 1; dimY = 2; dimX = 3; dimC = 4;
 362         }
 363         else
 364         {
 365             dimY = 1; dimX = 2; dimC = 3;
 366         }
 367         if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
 368             val.list().i(0) != 1 || val.list().i(dimC) != 1)
 369             CV_Error(Error::StsError, "Unsupported ksize");
 370
 371         if (layout == DATA_LAYOUT_NDHWC) {
 372             int kernel[] = {static_cast<int>(val.list().i(dimD)),
 373                             static_cast<int>(val.list().i(dimY)),
 374                             static_cast<int>(val.list().i(dimX))};
 375             layerParams.set("kernel_size",  DictValue::arrayInt(kernel, 3));
 376         }
 377         else
 378         {
 379             layerParams.set("kernel_h", static_cast<int>(val.list().i(dimY)));
 380             layerParams.set("kernel_w", static_cast<int>(val.list().i(dimX)));
 381         }
 382     }
 383     else
 384     {
 385         layerParams.set("kernel_h", 1);
 386         layerParams.set("kernel_w", 1);
 387     }
 388 }
 389
 390 void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
 391 {
 392     if (hasLayerAttr(layer, "padding"))
 393         layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
 394 }
 395
 396 Pin parsePin(const std::string &name)
 397 {
 398     Pin pin(name);
 399
 400     size_t delimiter_pos = name.find_first_of(':');
 401     if (delimiter_pos != std::string::npos)
 402     {
 403         pin.name = name.substr(0, delimiter_pos);
 404         std::istringstream(name.substr(delimiter_pos + 1)) >> pin.blobIndex;
 405     }
 406
 407     return pin;
 408 }
 409
 410 StrIntVector getNextLayers(const tensorflow::GraphDef& net, const String& layer_name, const String& type = "")
 411 {
 412    StrIntVector layers;
 413
 414    for (int li = 0; li < net.node_size(); li++)
 415    {
 416        const tensorflow::NodeDef& layer = net.node(li);
 417        for (int input_id = 0; input_id < layer.input_size(); input_id++) {
 418            String input_op_name = parsePin(layer.input(input_id)).name;
 419            bool type_ok = type.empty() ? true : type == layer.op();
 420            if (input_op_name == layer_name && type_ok)
 421                layers.push_back(std::make_pair(layer.name(), li));
 422        }
 423    }
 424
 425    return layers;
 426 }
 427
 428 void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int input_blob_index, bool remove_from_net = true) {
 429     String layer_name = net.node(layer_index).name();
 430     StrIntVector layers = getNextLayers(net, layer_name);
 431
 432     String removed_layer_input = net.node(layer_index).input(input_blob_index);
 433
 434     for (size_t i = 0; i < layers.size(); i++)
 435     {
 436         tensorflow::NodeDef* layer = net.mutable_node(layers[i].second);
 437         for (int input_id = 0; input_id < layer->input_size(); input_id++) {
 438                 String input_op_name = layer->input(input_id);
 439
 440                 if (input_op_name == layer_name) {
 441                     layer->set_input(input_id, removed_layer_input);
 442                 }
 443         }
 444     }
 445
 446     if (remove_from_net)
 447         net.mutable_node()->DeleteSubrange(layer_index, 1);
 448 }
 449
 450 class TFImporter
 451 {
 452 public:
 453     TFImporter(Net& net, const char *model, const char *config = NULL);
 454     TFImporter(Net& net, const char *dataModel, size_t lenModel,
 455                const char *dataConfig = NULL, size_t lenConfig = 0);
 456 protected:
 457     Net& dstNet;
 458     void populateNet();
 459
 460     void parseNode(const tensorflow::NodeDef& layer);
 461
 462     DataLayout predictOutputDataLayout(const tensorflow::NodeDef& layer);
 463
 464     void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob);
 465
 466     void connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
 467                  const int input_layer_id, const int input_blob_id);
 468     void connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
 469                            const int input_layer_id, const int input_blobs_count);
 470     const tensorflow::TensorProto& getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
 471                                                 int input_blob_index = -1, int* actual_inp_blob_idx = 0);
 472
 473
 474     // Binary serialized TensorFlow graph includes weights.
 475     tensorflow::GraphDef netBin;
 476     // Optional text definition of TensorFlow graph. More flexible than binary format
 477     // and may be used to build the network using binary format only as a weights storage.
 478     // This approach is similar to Caffe's `.prorotxt` and `.caffemodel`.
 479     tensorflow::GraphDef netTxt;
 480
 481     std::vector<String> netInputsNames;
 482     std::vector<MatShape> netInputShapes;
 483
 484     std::set<String> layers_to_ignore;
 485     std::map<String, DataLayout> data_layouts;
 486
 487     // find all Const layers for params
 488     std::map<String, int> value_id;
 489     // A map with constant blobs which are shared between multiple layers.
 490     std::map<String, Mat> sharedWeights;
 491
 492     std::map<String, int> layer_id;
 493 };
 494
 495 TFImporter::TFImporter(Net& net, const char *model, const char *config)
 496     : dstNet(net)
 497 {
 498     if (model && model[0])
 499     {
 500         CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from file: " << model);
 501         ReadTFNetParamsFromBinaryFileOrDie(model, &netBin);
 502     }
 503     if (config && config[0])
 504     {
 505         CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from file: " << config);
 506         ReadTFNetParamsFromTextFileOrDie(config, &netTxt);
 507     }
 508
 509     populateNet();
 510 }
 511
 512 TFImporter::TFImporter(
 513         Net& net,
 514         const char *dataModel, size_t lenModel,
 515         const char *dataConfig, size_t lenConfig
 516 )
 517     : dstNet(net)
 518 {
 519     if (dataModel != NULL && lenModel > 0)
 520     {
 521         CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from memory (" << lenModel << " bytes)");
 522         ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin);
 523     }
 524     if (dataConfig != NULL && lenConfig > 0)
 525     {
 526         CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from memory (" << lenConfig << " bytes)");
 527         ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt);
 528     }
 529     populateNet();
 530 }
 531
 532 void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
 533 {
 534     MatShape shape;
 535     blobShapeFromTensor(tensor, shape);
 536     int dims = (int)shape.size();
 537
 538     // TODO: other blob types
 539     CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||
 540               tensor.dtype() == tensorflow::DT_HALF);
 541     CV_Assert(dims == 4 || dims == 5);
 542
 543     int out_c, input_c, depth, height, width;
 544     if (dims == 4)
 545     {
 546         // REORDER kernel HWIO to OIHW
 547         swap(shape[0], shape[2]); // IWHO
 548         swap(shape[1], shape[3]); // IOHW
 549         swap(shape[0], shape[1]); // OIHW
 550         depth = 1; height = shape[2]; width = shape[3];
 551     }
 552     else
 553     {
 554         // REORDER kernel DHWIO to OIDHW
 555         swap(shape[0], shape[4]); // OHWID
 556         swap(shape[1], shape[3]); // OIWHD
 557         swap(shape[2], shape[4]); // OIDHW
 558         depth = shape[2]; height = shape[3]; width = shape[4];
 559     }
 560     out_c = shape[0]; input_c = shape[1];
 561
 562     dstBlob.create(shape, CV_32F);
 563
 564     Mat tensorContent = getTensorContent(tensor, /*no copy*/false);
 565     int size = tensorContent.total();
 566     CV_Assert(size == (int)dstBlob.total());
 567
 568     float *dstData = dstBlob.ptr<float>();
 569     const float *data = reinterpret_cast<const float*>(tensorContent.data);
 570
 571     int total = out_c * input_c * depth * height * width;
 572     for (int i_oc = 0; i_oc < out_c; i_oc++) {
 573         for (int i_ic = 0; i_ic < input_c; i_ic++) {
 574             for (int i_d = 0; i_d < depth; i_d++) {
 575                 for (int i_h = 0; i_h < height; i_h++) {
 576                     for (int i_w = 0; i_w < width; i_w++) {
 577                         int dst_i = input_c * depth * height * width * i_oc +
 578                                     depth * height * width * i_ic + height * width * i_d + width * i_h + i_w;
 579                         int src_i = out_c * input_c * width * height * i_d +
 580                                     out_c * input_c * width * i_h + out_c * input_c * i_w + out_c * i_ic + i_oc;
 581                         CV_Assert(dst_i < total);
 582                         CV_Assert(src_i < total);
 583                        dstData[dst_i] = data[src_i];
 584                    }
 585                 }
 586             }
 587         }
 588     }
 589 }
 590
 591 void TFImporter::connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
 592              const int input_layer_id, const int input_blob_id)
 593 {
 594     std::map<String, int>::const_iterator it = layers_name_id_map.find(outPin.name);
 595     if (it == layers_name_id_map.end())
 596         CV_Error(Error::StsError, "Input layer not found: " + outPin.name);
 597
 598     std::vector<String>::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name);
 599     int blobIndex;
 600     if (inpNameIt == netInputsNames.end())
 601         blobIndex = outPin.blobIndex;
 602     else
 603         blobIndex = inpNameIt - netInputsNames.begin();
 604     network.connect(it->second, blobIndex, input_layer_id, input_blob_id);
 605 }
 606
 607 void TFImporter::connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
 608                      const int input_layer_id, const int input_blobs_count)
 609 {
 610     for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++)
 611         connect(layer_id, network, outPin, input_layer_id, input_blob_id);
 612 }
 613
 614 const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
 615                                               int input_blob_index, int* actual_inp_blob_idx) {
 616     if (input_blob_index == -1) {
 617         for(int i = 0; i < layer.input_size(); i++) {
 618             Pin input = parsePin(layer.input(i));
 619             if (const_layers.find(input.name) != const_layers.end()) {
 620                 if (input_blob_index != -1)
 621                     CV_Error(Error::StsError, "More than one input is Const op");
 622
 623                 input_blob_index = i;
 624             }
 625         }
 626     }
 627
 628     if (input_blob_index == -1)
 629         CV_Error(Error::StsError, "Const input blob for weights not found");
 630
 631     Pin kernel_inp = parsePin(layer.input(input_blob_index));
 632     if (const_layers.find(kernel_inp.name) == const_layers.end())
 633         CV_Error(Error::StsError, "Input [" + layer.input(input_blob_index) +
 634                                   "] for node [" + layer.name() + "] not found");
 635     if (kernel_inp.blobIndex != 0)
 636         CV_Error(Error::StsError, "Unsupported kernel input");
 637
 638     if(actual_inp_blob_idx) {
 639         *actual_inp_blob_idx = input_blob_index;
 640     }
 641
 642     int nodeIdx = const_layers.at(kernel_inp.name);
 643     if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name)
 644     {
 645         return netBin.node(nodeIdx).attr().at("value").tensor();
 646     }
 647     else
 648     {
 649         CV_Assert_N(nodeIdx < netTxt.node_size(),
 650                     netTxt.node(nodeIdx).name() == kernel_inp.name);
 651         return netTxt.node(nodeIdx).attr().at("value").tensor();
 652     }
 653 }
 654
 655 static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& const_layers,
 656                           std::set<String>& layers_to_ignore)
 657 {
 658     CV_LOG_DEBUG(NULL, "DNN/TF: addConstNodes(): handling " << net.node_size() << " nodes...");
 659     for (int li = 0; li < net.node_size(); li++)
 660     {
 661         const tensorflow::NodeDef &layer = net.node(li);
 662         String name = layer.name();
 663         String type = layer.op();
 664
 665         //CV_LOG_DEBUG(NULL, "DNN/TF: layer_id=" << li << " - '" << name << "' @ " << type);
 666
 667         try
 668         {
 669             if (type == "Dequantize")
 670             {
 671                 // Example of Dequantize node:
 672                 //   name: "conv2d_1/bias"
 673                 //   op: "Dequantize"
 674                 //   input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8)
 675                 //   input: "conv2d_1/bias_quantized_min"
 676                 //   input: "conv2d_1/bias_quantized_max"
 677                 //   attr { key: "T" value { type: DT_QUINT8 } }   (quantized type)
 678                 //   attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique)
 679                 CV_CheckEQ(layer.input_size(), 3, "Dequantize: 3 inputs is supported only");
 680                 for (int i = 0; i < 3; ++i)
 681                     CV_Assert(const_layers.find(layer.input(i)) != const_layers.end());
 682                 CV_Assert(hasLayerAttr(layer, "mode") &&
 683                           getLayerAttr(layer, "mode").s() == "MIN_FIRST");
 684
 685                 int tensorId = const_layers[layer.input(0)];
 686                 int minId = const_layers[layer.input(1)];
 687                 int maxId = const_layers[layer.input(2)];
 688
 689                 tensorflow::TensorProto* tensor = net.mutable_node(tensorId)
 690                                                     ->mutable_attr()->at("value")
 691                                                      .mutable_tensor();
 692                 CV_CheckEQ((int)tensor->dtype(), (int)tensorflow::DT_QUINT8, "");
 693
 694                 Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor());
 695                 Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor());
 696                 CV_CheckEQ(qMin.total(), (size_t)1, "");
 697                 CV_CheckTypeEQ(qMin.type(), CV_32FC1, "");
 698                 CV_CheckEQ(qMax.total(), (size_t)1, "");
 699                 CV_CheckTypeEQ(qMax.type(), CV_32FC1, "");
 700
 701                 Mat content = getTensorContent(*tensor);
 702
 703                 float minVal = qMin.at<float>(0);
 704                 float rangeScale = (qMax.at<float>(0) - minVal) / 255;
 705                 CV_Assert(rangeScale >= 0);
 706                 content.convertTo(content, CV_32FC1, rangeScale,
 707                                   rangeScale * cvRound(minVal / rangeScale));
 708
 709                 tensor->set_dtype(tensorflow::DT_FLOAT);
 710                 tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
 711
 712                 net.mutable_node(tensorId)->set_name(name);
 713                 CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
 714                 layers_to_ignore.insert(name);
 715                 continue;
 716             }
 717             else if (type != "Const")
 718                 continue;  // only Const parameters are supported
 719
 720             if (layer.attr().find("value") != layer.attr().end())
 721             {
 722                 CV_Assert(const_layers.insert(std::make_pair(name, li)).second);
 723             }
 724             layers_to_ignore.insert(name);
 725         }
 726         catch (const std::exception& e)
 727         {
 728             CV_LOG_ERROR(NULL, "DNN/TF: Can't handle node='" << name << "'. Exception: " << e.what());
 729             throw;
 730         }
 731     }
 732     CV_LOG_DEBUG(NULL, "DNN/TF: layers_to_ignore.size() = " << layers_to_ignore.size());
 733 }
 734
 735 // If all inputs of specific layer have the same data layout we can say that
 736 // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
 737 DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer)
 738 {
 739     DataLayout layout = getDataLayout(layer);
 740     if (layout != DATA_LAYOUT_UNKNOWN)
 741     {
 742         CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)");
 743         return layout;
 744     }
 745
 746     // Determine layout by layer's inputs
 747     for (int i = 0, n = layer.input_size(); i < n; ++i)
 748     {
 749         std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layer.input(i)));
 750         if (it != data_layouts.end())
 751         {
 752             if (layout != DATA_LAYOUT_UNKNOWN)
 753             {
 754                 if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN)
 755                     return DATA_LAYOUT_UNKNOWN;
 756             }
 757             else
 758                 layout = it->second;
 759         }
 760     }
 761
 762     if (layout != DATA_LAYOUT_UNKNOWN)
 763     {
 764         CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)");
 765         return layout;
 766     }
 767
 768     // Determine layout by layer's consumers recursively.
 769     std::map<String, DataLayout>::const_iterator it = data_layouts.find(layer.name());
 770     CV_Assert(it != data_layouts.end());
 771     return it->second;
 772 }
 773
 774 void TFImporter::populateNet()
 775 {
 776     CV_Assert(netBin.ByteSize() || netTxt.ByteSize());
 777
 778     CV_LOG_INFO(NULL, "DNN/TF: parsing model"
 779         << (netBin.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netBin.versions().producer(), (int)netBin.versions().min_consumer()) : cv::String(" (N/A version info)"))
 780         << ". Number of nodes = " << netBin.node_size()
 781     );
 782
 783     if (netTxt.ByteSize())
 784     {
 785         CV_LOG_INFO(NULL, "DNN/TF: parsing config"
 786             << (netTxt.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netTxt.versions().producer(), (int)netTxt.versions().min_consumer()) : cv::String(" (N/A version info)"))
 787             << ". Number of nodes = " << netTxt.node_size()
 788         );
 789
 790         RemoveIdentityOps(netBin);
 791         CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes");
 792         RemoveIdentityOps(netTxt);
 793         CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(config) => " << netTxt.node_size() << " nodes");
 794
 795         sortByExecutionOrder(netTxt);
 796         CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(config) => " << netTxt.node_size() << " nodes");
 797     }
 798     else
 799     {
 800         removePhaseSwitches(netBin);
 801         CV_LOG_DEBUG(NULL, "DNN/TF: removePhaseSwitches(model) => " << netBin.node_size() << " nodes");
 802
 803         RemoveIdentityOps(netBin);
 804         CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes");
 805
 806         simplifySubgraphs(netBin);
 807         CV_LOG_DEBUG(NULL, "DNN/TF: simplifySubgraphs(model) => " << netBin.node_size() << " nodes");
 808         sortByExecutionOrder(netBin);
 809         CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(model) => " << netBin.node_size() << " nodes");
 810     }
 811
 812     tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
 813
 814     int layersSize = net.node_size();
 815
 816     // Pre-fill data layouts where they are set explicitly.
 817     // Assuming that nodes are in topological order
 818     for (int i = layersSize - 1; i >= 0; --i)
 819     {
 820         const tensorflow::NodeDef& layer = net.node(i);
 821         std::string name = layer.name();
 822
 823         CV_LOG_DEBUG(NULL, "DNN/TF: node(" << i << " - '" << name << "') propagating layout...");
 824
 825         try
 826         {
 827             DataLayout layout = getDataLayout(layer);
 828             std::map<String, DataLayout>::iterator it = data_layouts.find(name);
 829             if (it != data_layouts.end())
 830             {
 831                 if (layout != DATA_LAYOUT_UNKNOWN)
 832                 {
 833                     if (it->second == DATA_LAYOUT_UNKNOWN)
 834                         it->second = layout;
 835                     else if (it->second != layout)
 836                     {
 837                         it->second = DATA_LAYOUT_UNKNOWN;
 838                         layout = DATA_LAYOUT_UNKNOWN;
 839                     }
 840                 }
 841                 else
 842                     layout = it->second;
 843             }
 844             else
 845                 data_layouts[name] = layout;
 846
 847             // Specify input layers to have the same data layout.
 848             for (int j = 0; j < layer.input_size(); ++j)
 849             {
 850                 name = getNodeName(layer.input(j));
 851                 it = data_layouts.find(name);
 852                 if (it != data_layouts.end())
 853                 {
 854                     if (layout != DATA_LAYOUT_UNKNOWN)
 855                     {
 856                         if (it->second == DATA_LAYOUT_UNKNOWN)
 857                             it->second = layout;
 858                         else if (it->second != layout)
 859                             it->second = DATA_LAYOUT_UNKNOWN;
 860                     }
 861                 }
 862                 else
 863                     data_layouts[name] = layout;
 864             }
 865         }
 866         catch (const std::exception& e)
 867         {
 868             CV_LOG_ERROR(NULL, "DNN/TF: Can't propagate layout for node='" << name << "'. Exception: " << e.what());
 869             throw;
 870         }
 871     }
 872
 873     addConstNodes(netBin, value_id, layers_to_ignore);
 874     addConstNodes(netTxt, value_id, layers_to_ignore);
 875
 876
 877     for (int li = 0; li < layersSize; li++)
 878     {
 879         const tensorflow::NodeDef& layer = net.node(li);
 880
 881         const std::string name = layer.name();
 882         const std::string type = layer.op();
 883         const int ninputs = layer.input_size();
 884         CV_LOG_DEBUG(NULL, "DNN/TF: (" << li << "/" << layersSize << ") Parse layer " << name << " @ " << type << " with " << ninputs << " inputs");
 885
 886         parseNode(layer);
 887     }
 888
 889     for (size_t i = 0; i < netInputsNames.size(); i++)
 890     {
 891         CV_LOG_DEBUG(NULL, "DNN/TF: Model input: " << i << " - '" << netInputsNames[i] << "'");
 892         CV_Assert(!netInputsNames[i].empty());
 893     }
 894     dstNet.setInputsNames(netInputsNames);
 895     CV_LOG_DEBUG(NULL, "DNN/TF: ===================== Import completed =====================");
 896 }
 897
 898 void TFImporter::parseNode(const tensorflow::NodeDef& layer_)
 899 {
 900     tensorflow::NodeDef layer = layer_;
 901
 902     tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
 903
 904     /*const*/ std::string name = layer.name();
 905     /*const*/ std::string type = layer.op();
 906     /*const*/ int num_inputs = layer.input_size();
 907
 908     try
 909     {
 910         LayerParams layerParams;
 911
 912         if (layers_to_ignore.find(name) != layers_to_ignore.end())
 913         {
 914             CV_LOG_DEBUG(NULL, "DNN/TF:     ignored");
 915             return;
 916         }
 917
 918         DataLayout predictedLayout = predictOutputDataLayout(layer);
 919         data_layouts[name] = predictedLayout;
 920
 921         if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad" || type == "MirrorPad" || type == "Conv3D")
 922         {
 923             CV_CheckGT(num_inputs, 0, "");
 924             // The first node of dilated convolution subgraph.
 925             // Extract input node, dilation rate and paddings.
 926             std::string input = layer.input(0);
 927             StrIntVector next_layers;
 928             if (type == "SpaceToBatchND" || type == "Pad")
 929             {
 930                 next_layers = getNextLayers(net, name, "Conv2D");
 931                 if (next_layers.empty())
 932                     next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
 933             }
 934
 935             if (type == "SpaceToBatchND")
 936             {
 937                 // op: "SpaceToBatchND"
 938                 // input: "input"
 939                 // input: "SpaceToBatchND/block_shape"
 940                 // input: "SpaceToBatchND/paddings"
 941                 CV_CheckEQ(num_inputs, 3, "");
 942
 943                 DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
 944                 CV_Assert(dilation.size() == 2);
 945                 layerParams.set("dilation_h", dilation.get<int>(0));
 946                 layerParams.set("dilation_w", dilation.get<int>(1));
 947
 948                 Mat paddings;
 949                 parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
 950
 951                 // paddings is a 2x2 matrix: [[top, bot], [left, right]]
 952                 layerParams.set("pad_h", paddings.at<float>(0));
 953                 layerParams.set("pad_w", paddings.at<float>(2));
 954
 955                 CV_Assert(next_layers.size() == 1);
 956                 layers_to_ignore.insert(next_layers[0].first);
 957
 958                 // FIXIT don't override, rewrite this code
 959                 layer = net.node(next_layers[0].second);
 960                 name = layer.name();
 961                 type = layer.op();
 962                 num_inputs = layer.input_size();
 963                 CV_LOG_DEBUG(NULL, "DNN/TF:     switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs");
 964             }
 965             else if (type == "Pad" || type == "MirrorPad")
 966             {
 967                 Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
 968                 CV_Assert(paddings.type() == CV_32SC1);
 969                 if (paddings.total() == 8)
 970                 {
 971                     // Perhaps, we have NHWC padding dimensions order.
 972                     //  N    H    W    C
 973                     // 0 1  2 3  4 5  6 7
 974                     std::swap(paddings.at<int32_t>(2), paddings.at<int32_t>(6));
 975                     std::swap(paddings.at<int32_t>(3), paddings.at<int32_t>(7));
 976                     //  N    C    W    H
 977                     // 0 1  2 3  4 5  6 7
 978                     std::swap(paddings.at<int32_t>(4), paddings.at<int32_t>(6));
 979                     std::swap(paddings.at<int32_t>(5), paddings.at<int32_t>(7));
 980                     //  N    C    H    W
 981                     // 0 1  2 3  4 5  6 7
 982                 }
 983
 984                 if (next_layers.empty() || paddings.total() != 8 ||
 985                     paddings.at<int32_t>(4) != paddings.at<int32_t>(5) ||
 986                     paddings.at<int32_t>(6) != paddings.at<int32_t>(7) || type == "MirrorPad")
 987                 {
 988                     // Just a single padding layer.
 989                     layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
 990                     if (type == "MirrorPad")
 991                         layerParams.set("type", "reflect");
 992
 993                     int id = dstNet.addLayer(name, "Padding", layerParams);
 994                     layer_id[name] = id;
 995
 996                     connect(layer_id, dstNet, parsePin(input), id, 0);
 997                     return;
 998                 }
 999                 else
1000                 {
1001                     // Merge with subsequent convolutional layer.
1002                     CV_Assert(next_layers.size() == 1);
1003
1004                     layerParams.set("pad_h", paddings.at<int32_t>(4));
1005                     layerParams.set("pad_w", paddings.at<int32_t>(6));
1006
1007                     layers_to_ignore.insert(next_layers[0].first);
1008
1009                     // FIXIT don't override, rewrite this code
1010                     layer = net.node(next_layers[0].second);
1011                     name = layer.name();
1012                     type = layer.op();
1013                     num_inputs = layer.input_size();
1014                     CV_LOG_DEBUG(NULL, "DNN/TF:     switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs");
1015                 }
1016             }
1017
1018             // For the object detection networks, TensorFlow Object Detection API
1019             // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
1020             // order. We can manage it at DetectionOutput layer parsing predictions
1021             // or shuffle last convolution's weights.
1022             bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&
1023                                      getLayerAttr(layer, "loc_pred_transposed").b();
1024
1025             layerParams.set("bias_term", false);
1026             layerParams.blobs.resize(1);
1027
1028             next_layers = getNextLayers(net, name, "BiasAdd");
1029             if (next_layers.size() == 1) {
1030                 layerParams.set("bias_term", true);
1031                 layerParams.blobs.resize(2);
1032
1033                 int weights_layer_index = next_layers[0].second;
1034
1035                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1036                 ExcludeLayer(net, weights_layer_index, 0, false);
1037                 layers_to_ignore.insert(next_layers[0].first);
1038
1039                 // Shuffle bias from yxYX to xyXY.
1040                 if (locPredTransposed)
1041                 {
1042                     const int numWeights = layerParams.blobs[1].total();
1043                     float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);
1044                     CV_Assert(numWeights % 4 == 0);
1045                     for (int i = 0; i < numWeights; i += 2)
1046                     {
1047                         std::swap(biasData[i], biasData[i + 1]);
1048                     }
1049                 }
1050             }
1051
1052             int kernelTensorInpId = -1;
1053             const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernelTensorInpId);
1054             const String kernelTensorName = layer.input(kernelTensorInpId);
1055             std::map<String, Mat>::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName);
1056             if (sharedWeightsIt == sharedWeights.end())
1057             {
1058                 kernelFromTensor(kernelTensor, layerParams.blobs[0]);
1059                 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
1060
1061                 int* kshape = layerParams.blobs[0].size.p;
1062                 const int outCh = kshape[0];
1063                 const int inCh = kshape[1];
1064                 const int height = kshape[2];
1065                 const int width = kshape[3];
1066                 if (type == "DepthwiseConv2dNative")
1067                 {
1068                     CV_Assert(!locPredTransposed);
1069                     const int chMultiplier = kshape[0];
1070
1071                     Mat copy = layerParams.blobs[0].clone();
1072                     float* src = (float*)copy.data;
1073                     float* dst = (float*)layerParams.blobs[0].data;
1074                     for (int i = 0; i < chMultiplier; ++i)
1075                         for (int j = 0; j < inCh; ++j)
1076                             for (int s = 0; s < height * width; ++s)
1077                                 {
1078                                     int src_i = (i * inCh + j) * height * width + s;
1079                                     int dst_i = (j * chMultiplier + i) * height* width + s;
1080                                     dst[dst_i] = src[src_i];
1081                                 }
1082                     // TODO Use reshape instead
1083                     kshape[0] = inCh * chMultiplier;
1084                     kshape[1] = 1;
1085                     size_t* kstep = layerParams.blobs[0].step.p;
1086                     kstep[0] = kstep[1]; // fix steps too
1087                 }
1088
1089                 // Shuffle output channels from yxYX to xyXY.
1090                 if (locPredTransposed)
1091                 {
1092                     const int slice = height * width * inCh;
1093                     for (int i = 0; i < outCh; i += 2)
1094                     {
1095                         cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i));
1096                         cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i + 1));
1097                         std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());
1098                     }
1099                 }
1100                 sharedWeights[kernelTensorName] = layerParams.blobs[0];
1101             }
1102             else
1103             {
1104                 layerParams.blobs[0] = sharedWeightsIt->second;
1105             }
1106             Mat weights = layerParams.blobs[0];
1107             layerParams.set("kernel_size",  DictValue::arrayInt(&weights.size[2], weights.dims - 2));
1108
1109             layerParams.set("num_output", layerParams.blobs[0].size[0]);
1110
1111             setStrides(layerParams, layer);
1112             if (!layerParams.has("pad_w") && !layerParams.has("pad_h"))
1113                 setPadding(layerParams, layer);
1114
1115             // The final node of dilated convolution subgraph.
1116             next_layers = getNextLayers(net, name, "BatchToSpaceND");
1117             if (!next_layers.empty())
1118             {
1119                 CV_Assert(next_layers.size() == 1);
1120                 ExcludeLayer(net, next_layers[0].second, 0, false);
1121                 layers_to_ignore.insert(next_layers[0].first);
1122             }
1123
1124             int id = dstNet.addLayer(name, "Convolution", layerParams);
1125             layer_id[name] = id;
1126
1127             // one input only
1128             connect(layer_id, dstNet, parsePin(input), id, 0);
1129
1130
1131             if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN)
1132                 data_layouts[name] = DATA_LAYOUT_NHWC;
1133         }
1134         else if (type == "BiasAdd" || type == "Add" || type == "AddV2" || type == "Sub" || type=="AddN")
1135         {
1136             CV_CheckGT(num_inputs, 0, "");
1137             bool haveConst = false;
1138             for(int ii = 0; !haveConst && ii < num_inputs; ++ii)
1139             {
1140                 Pin input = parsePin(layer.input(ii));
1141                 haveConst = value_id.find(input.name) != value_id.end();
1142             }
1143             CV_Assert(!haveConst || num_inputs == 2);
1144
1145             if (haveConst)
1146             {
1147                 Mat values = getTensorContent(getConstBlob(layer, value_id));
1148                 CV_Assert(values.type() == CV_32FC1);
1149                 if (type == "Sub")
1150                     values *= -1.0f;
1151
1152                 int id;
1153                 if (values.total() == 1)  // is a scalar.
1154                 {
1155                     layerParams.set("shift", values.at<float>(0));
1156                     id = dstNet.addLayer(name, "Power", layerParams);
1157                 }
1158                 else  // is a vector
1159                 {
1160                     layerParams.blobs.resize(1, values);
1161                     id = dstNet.addLayer(name, "Shift", layerParams);
1162                 }
1163                 layer_id[name] = id;
1164
1165                 // one input only
1166                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1167             }
1168             else
1169             {
1170                 layerParams.set("operation", "sum");
1171                 if (type == "Sub")
1172                 {
1173                     static float subCoeffs[] = {1.f, -1.f};
1174                     layerParams.set("coeff", DictValue::arrayReal<float*>(subCoeffs, 2));
1175                 }
1176
1177                 int id = dstNet.addLayer(name, "Eltwise", layerParams);
1178                 layer_id[name] = id;
1179
1180                 for (int ii = 0; ii < num_inputs; ii++)
1181                 {
1182                     Pin inp = parsePin(layer.input(ii));
1183                     if (layer_id.find(inp.name) == layer_id.end())
1184                         CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1185                     connect(layer_id, dstNet, inp, id, ii);
1186                 }
1187             }
1188         }
1189         else if (type == "MatMul")
1190         {
1191             CV_CheckEQ(num_inputs, 2, "");
1192
1193             // For the object detection networks, TensorFlow Object Detection API
1194             // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
1195             // order. We can manage it at DetectionOutput layer parsing predictions
1196             // or shuffle last Faster-RCNN's matmul weights.
1197             bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&
1198                                      getLayerAttr(layer, "loc_pred_transposed").b();
1199
1200             layerParams.set("bias_term", false);
1201             layerParams.blobs.resize(1);
1202
1203             StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");  // FIXIT Use layers fusion instead
1204             if (next_layers.empty())
1205             {
1206                 next_layers = getNextLayers(net, name, "Add");
1207             }
1208             if (next_layers.size() == 1) {
1209                 layerParams.set("bias_term", true);
1210                 layerParams.blobs.resize(2);
1211
1212                 int weights_layer_index = next_layers[0].second;
1213                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1214                 ExcludeLayer(net, weights_layer_index, 0, false);
1215                 layers_to_ignore.insert(next_layers[0].first);
1216
1217                 if (locPredTransposed)
1218                 {
1219                     const int numWeights = layerParams.blobs[1].total();
1220                     float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);
1221                     CV_Assert(numWeights % 4 == 0);
1222                     for (int i = 0; i < numWeights; i += 2)
1223                     {
1224                         std::swap(biasData[i], biasData[i + 1]);
1225                     }
1226                 }
1227             }
1228
1229             int kernel_blob_index = -1;
1230             const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index);
1231             const String kernelTensorName = layer.input(kernel_blob_index);
1232             std::map<String, Mat>::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName);
1233             if (sharedWeightsIt == sharedWeights.end())
1234             {
1235                 blobFromTensor(kernelTensor, layerParams.blobs[0]);
1236                 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
1237                 sharedWeights[kernelTensorName] = layerParams.blobs[0];
1238             }
1239             else
1240             {
1241                 layerParams.blobs[0] = sharedWeightsIt->second;
1242             }
1243
1244             if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed
1245                 Mat data = layerParams.blobs[0].t();
1246                 layerParams.blobs[0] = data.clone();
1247             }
1248
1249             layerParams.set("num_output", layerParams.blobs[0].size[0]);
1250             if (locPredTransposed)
1251             {
1252                 CV_Assert(layerParams.blobs[0].dims == 2);
1253                 for (int i = 0; i < layerParams.blobs[0].size[0]; i += 2)
1254                 {
1255                     cv::Mat src = layerParams.blobs[0].row(i);
1256                     cv::Mat dst = layerParams.blobs[0].row(i + 1);
1257                     std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());
1258                 }
1259             }
1260
1261             int id = dstNet.addLayer(name, "InnerProduct", layerParams);
1262             layer_id[name] = id;
1263
1264             // one input only
1265             int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
1266             connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
1267             data_layouts[name] = DATA_LAYOUT_PLANAR;
1268         }
1269         else if (type == "Reshape")
1270         {
1271             CV_CheckGT(num_inputs, 0, "");
1272             Pin inpId = parsePin(layer.input(0));
1273             DataLayout inpLayout = getDataLayout(layer.input(0), data_layouts);
1274             // There are two possible implementations: reshape an input using
1275             // predefined sizes or use a second input blob as a source of new shape.
1276             if (value_id.find(layer.input(1)) != value_id.end())
1277             {
1278                 Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));
1279                 if (newShape.total() == 4)
1280                 {
1281                     // NHWC->NCHW
1282                     std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
1283                     std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));
1284                 }
1285                 if (inpLayout == DATA_LAYOUT_NHWC)
1286                 {
1287                     if (newShape.total() != 4 || newShape.at<int>(1) == 1)
1288                     {
1289                         LayerParams permLP;
1290                         int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
1291                         permLP.set("order", DictValue::arrayInt<int*>(order, 4));
1292
1293                         std::string permName = name + "/nchw";
1294                         CV_Assert(layer_id.find(permName) == layer_id.end());
1295                         int permId = dstNet.addLayer(permName, "Permute", permLP);
1296                         layer_id[permName] = permId;
1297                         connect(layer_id, dstNet, inpId, permId, 0);
1298                         inpId = Pin(permName);
1299                         inpLayout = DATA_LAYOUT_NCHW;
1300                     }
1301                 }
1302                 layerParams.set("dim", DictValue::arrayInt<int*>(newShape.ptr<int>(), newShape.total()));
1303
1304                 int id = dstNet.addLayer(name, "Reshape", layerParams);
1305                 layer_id[name] = id;
1306
1307                 // one input only
1308                 connect(layer_id, dstNet, inpId, id, 0);
1309                 data_layouts[name] = newShape.total() == 2 ? DATA_LAYOUT_PLANAR : inpLayout;
1310             }
1311             else
1312             {
1313                 int id = dstNet.addLayer(name, "Reshape", layerParams);
1314                 layer_id[name] = id;
1315                 connect(layer_id, dstNet, inpId, id, 0);
1316                 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1317                 data_layouts[name] = inpLayout;
1318             }
1319         }
1320         else if (type == "Flatten" || type == "Squeeze")
1321         {
1322             CV_CheckGT(num_inputs, 0, "");
1323             Pin inpId = parsePin(layer.input(0));
1324             int inpLayout = getDataLayout(layer.input(0), data_layouts);
1325             if (type == "Squeeze")
1326             {
1327                 CV_Assert(hasLayerAttr(layer, "squeeze_dims"));
1328                 const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims");
1329                 std::vector<int> dimsVector(dims.list().i_size());
1330                 for (int i = 0; i < dimsVector.size(); ++i)
1331                     dimsVector[i] = dims.list().i(i);
1332
1333                 // Flatten layer can squeeze dimensions range into one.
1334                 std::sort(dimsVector.begin(), dimsVector.end());
1335                 for (int i = 1; i < dimsVector.size(); ++i)
1336                 {
1337                     if (dimsVector[i] != dimsVector[i - 1] + 1)
1338                         CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1339                 }
1340                 int start = dimsVector.front() - 1, end = dimsVector.back();
1341                 if (start == -1 && end == 0)  // squeeze 0th dimension
1342                 {
1343                     start = 0;
1344                     end = 1;
1345                 }
1346                 layerParams.set("axis", start);
1347                 layerParams.set("end_axis", end);
1348             }
1349             if (inpLayout == DATA_LAYOUT_NHWC)
1350             {
1351                 LayerParams permLP;
1352                 int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
1353                 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
1354
1355                 std::string permName = name + "/nchw";
1356                 CV_Assert(layer_id.find(permName) == layer_id.end());
1357                 int permId = dstNet.addLayer(permName, "Permute", permLP);
1358                 layer_id[permName] = permId;
1359                 connect(layer_id, dstNet, inpId, permId, 0);
1360                 inpId = Pin(permName);
1361             }
1362             int id = dstNet.addLayer(name, "Flatten", layerParams);
1363             layer_id[name] = id;
1364             connect(layer_id, dstNet, inpId, id, 0);
1365             data_layouts[name] = DATA_LAYOUT_PLANAR;
1366         }
1367         else if (type == "Transpose")
1368         {
1369             CV_CheckGT(num_inputs, 0, "");
1370             Mat perm = getTensorContent(getConstBlob(layer, value_id, 1));
1371             CV_Assert(perm.type() == CV_32SC1);
1372             int* permData = (int*)perm.data;
1373             if (perm.total() == 4)
1374             {
1375                 // Only NHWC <-> NCHW permutations are allowed. OpenCV is always
1376                 // keep NCHW layout this way.
1377                 int inpLayout = getDataLayout(layer.input(0), data_layouts);
1378                 std::string type = "Identity";
1379                 if (inpLayout == DATA_LAYOUT_NHWC)
1380                 {
1381                     if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
1382                     {
1383                         // in TensorFlow: NHWC->NCHW
1384                         // in OpenCV: NCHW->NCHW
1385                         data_layouts[name] = DATA_LAYOUT_NCHW;
1386                     }
1387                     else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1388                     {
1389                         // in TensorFlow: NHWC->NHWC
1390                         // in OpenCV: NCHW->NCHW
1391                         data_layouts[name] = DATA_LAYOUT_NHWC;
1392                     }
1393                     else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1)
1394                     {
1395                         // in TensorFlow: NHWC->NCWH
1396                         // in OpenCV: NCHW->NCWH
1397                         int permData[] = {0, 1, 3, 2};
1398                         layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
1399                         data_layouts[name] = DATA_LAYOUT_NCHW;  // we keep track NCHW because channels position only matters
1400                         type = "Permute";
1401                     }
1402                     else
1403                         CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1404                 }
1405                 else if (inpLayout == DATA_LAYOUT_NCHW)
1406                 {
1407                     if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
1408                     {
1409                         // in TensorFlow: NCHW->NHWC
1410                         // in OpenCV: NCHW->NCHW
1411                         data_layouts[name] = DATA_LAYOUT_NHWC;
1412                     }
1413                     else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1414                     {
1415                         // in TensorFlow: NCHW->NCHW
1416                         // in OpenCV: NCHW->NCHW
1417                         data_layouts[name] = DATA_LAYOUT_NCHW;
1418                     }
1419                     else
1420                         CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1421                 }
1422                 int id = dstNet.addLayer(name, type, layerParams);
1423                 layer_id[name] = id;
1424                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1425             }
1426             else
1427             {
1428                 layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
1429
1430                 int id = dstNet.addLayer(name, "Permute", layerParams);
1431                 layer_id[name] = id;
1432
1433                 // one input only
1434                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1435                 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1436             }
1437         }
1438         else if (type == "Const")
1439         {
1440         }
1441         else if (type == "LRN")
1442         {
1443             CV_CheckGT(num_inputs, 0, "");
1444             if(hasLayerAttr(layer, "alpha")) {
1445                 layerParams.set("alpha", getLayerAttr(layer, "alpha").f());
1446             }
1447             if(hasLayerAttr(layer, "beta")) {
1448                 layerParams.set("beta", getLayerAttr(layer, "beta").f());
1449             }
1450             if(hasLayerAttr(layer, "depth_radius")) {
1451                 int radius = (int)getLayerAttr(layer, "depth_radius").i();
1452                 layerParams.set("local_size", 2*radius + 1);
1453             }
1454             if(hasLayerAttr(layer, "bias")) {
1455                 layerParams.set("bias", getLayerAttr(layer, "bias").f());
1456             }
1457             layerParams.set("norm_by_size", false);
1458
1459             int id = dstNet.addLayer(name, "LRN", layerParams);
1460             layer_id[name] = id;
1461
1462             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
1463         }
1464         else if (type == "Concat" || type == "ConcatV2")
1465         {
1466             CV_CheckGT(num_inputs, 0, "");
1467             int axisId = (type == "Concat" ? 0 : num_inputs - 1);
1468             int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
1469
1470             if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1471                 axis = toNCHW(axis);
1472             else if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NDHWC)
1473                 axis = toNCDHW(axis);
1474             layerParams.set("axis", axis);
1475
1476             // input(0) or input(n-1) is concat_dim
1477             int from = (type == "Concat" ? 1 : 0);
1478             int to = (type == "Concat" ? num_inputs : num_inputs - 1);
1479
1480             for (int ii = from; ii < to; ii++)
1481             {
1482                 Pin inp = parsePin(layer.input(ii));
1483                 if (layer_id.find(inp.name) == layer_id.end())
1484                 {
1485                     // There are constant inputs.
1486                     LayerParams lp;
1487                     lp.name = inp.name;
1488                     lp.type = "Const";
1489                     lp.blobs.resize(1);
1490                     blobFromTensor(getConstBlob(layer, value_id, ii), lp.blobs.back());
1491                     CV_Assert_N(!lp.blobs[0].empty(), lp.blobs[0].type() == CV_32F);
1492
1493                     int constInpId = dstNet.addLayer(lp.name, lp.type, lp);
1494                     layer_id[lp.name] = constInpId;
1495                 }
1496             }
1497
1498             int id = dstNet.addLayer(name, "Concat", layerParams);
1499             layer_id[name] = id;
1500
1501             for (int ii = from; ii < to; ii++)
1502             {
1503                 Pin inp = parsePin(layer.input(ii));
1504                 if (layer_id.find(inp.name) == layer_id.end())
1505                     CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1506                 connect(layer_id, dstNet, inp, id, ii - from);
1507             }
1508         }
1509         else if (type == "MaxPool" || type == "MaxPool3D")
1510         {
1511             CV_CheckGT(num_inputs, 0, "");
1512             layerParams.set("pool", "max");
1513
1514             setKSize(layerParams, layer);
1515             setStrides(layerParams, layer);
1516             setPadding(layerParams, layer);
1517             // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU
1518             layerParams.set("ceil_mode", false);
1519
1520             int id = dstNet.addLayer(name, "Pooling", layerParams);
1521             layer_id[name] = id;
1522
1523             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
1524         }
1525         else if (type == "AvgPool" || type == "AvgPool3D")
1526         {
1527             CV_CheckGT(num_inputs, 0, "");
1528             layerParams.set("pool", "ave");
1529             layerParams.set("ave_pool_padded_area", false);
1530             setKSize(layerParams, layer);
1531             setStrides(layerParams, layer);
1532             setPadding(layerParams, layer);
1533
1534             int id = dstNet.addLayer(name, "Pooling", layerParams);
1535             layer_id[name] = id;
1536
1537             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
1538         }
1539         else if (type == "MaxPoolGrad")
1540         {
1541             CV_CheckEQ(num_inputs, 3, "");
1542
1543             layerParams.set("pool_k_h", 0);
1544             layerParams.set("pool_k_w", 0);
1545             layerParams.set("pool_stride_h", 0);
1546             layerParams.set("pool_stride_w", 0);
1547             layerParams.set("pool_pad_h", 0);
1548             layerParams.set("pool_pad_w", 0);
1549
1550             int id = dstNet.addLayer(name, "MaxUnpool", layerParams);
1551             layer_id[name] = id;
1552
1553             connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
1554             connect(layer_id, dstNet, parsePin(layer.input(1) + ":1"), id, 1);
1555             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 2);
1556         }
1557         else if (type == "Placeholder")
1558         {
1559             if (!hasLayerAttr(layer, "dtype") ||
1560                 getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL)  // If input is not a train/test flag.
1561             {
1562                 netInputsNames.push_back(name);
1563                 layer_id[name] = 0;
1564             }
1565             tensorflow::TensorShapeProto shape;
1566             if (hasLayerAttr(layer, "shape"))
1567                 shape = getLayerAttr(layer, "shape").shape();
1568             else if (hasLayerAttr(layer, "_output_shapes"))
1569             {
1570                 tensorflow::AttrValue_ListValue list = getLayerAttr(layer, "_output_shapes").list();
1571                 if (list.shape_size())
1572                     shape = list.shape()[0];
1573             }
1574             if (shape.dim_size())
1575             {
1576                 MatShape dims(shape.dim_size());
1577                 for (int i = 0; i < dims.size(); ++i)
1578                     dims[i] = shape.dim(i).size();
1579                 if (dims.size() == 4 && predictedLayout == DATA_LAYOUT_NHWC)
1580                 {
1581                     std::swap(dims[1], dims[3]);  // NHWC->NCWH
1582                     std::swap(dims[2], dims[3]);  // NCWH->NCHW
1583                     if (dims[0] == -1)  // It's OK to have undetermined batch size
1584                         dims[0] = 1;
1585                 }
1586                 bool hasNeg = false;
1587                 for (int i = 0; i < dims.size() && !hasNeg; ++i)
1588                 {
1589                     hasNeg = dims[i] < 0;
1590                 }
1591                 if (!hasNeg)
1592                     netInputShapes.push_back(dims);
1593             }
1594         }
1595         else if (type == "Split") {
1596             // TODO: determining axis index remapping by input dimensions order of input blob
1597             // TODO: slicing input may be Const op
1598             // TODO: slicing kernels for convolutions - in current implementation it is impossible
1599             // TODO: add parsing num of slices parameter
1600             CV_CheckEQ(num_inputs, 2, "");
1601             // num_split
1602             // 1st blob is dims tensor
1603             int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
1604             if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1605                 axis = toNCHW(axis);
1606             layerParams.set("axis", axis);
1607
1608             if (hasLayerAttr(layer, "num_split"))
1609                 layerParams.set("num_split", getLayerAttr(layer, "num_split").i());
1610
1611             int id = dstNet.addLayer(name, "Slice", layerParams);
1612             layer_id[name] = id;
1613
1614             // one input only
1615             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1616         }
1617         else if (type == "Slice")
1618         {
1619             // op: "Slice"
1620             // input: "input_node"
1621             // input: "Slice/begin"
1622             // input: "Slice/size"
1623             CV_CheckEQ(num_inputs, 3, "");
1624             Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));
1625             Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2));
1626             CV_Assert_N(!begins.empty(), !sizes.empty());
1627             CV_CheckTypeEQ(begins.type(), CV_32SC1, "");
1628             CV_CheckTypeEQ(sizes.type(), CV_32SC1, "");
1629
1630             if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1631             {
1632                 // Swap NHWC parameters' order to NCHW.
1633                 std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
1634                 std::swap(*begins.ptr<int32_t>(0, 1), *begins.ptr<int32_t>(0, 2));
1635                 std::swap(*sizes.ptr<int32_t>(0, 2), *sizes.ptr<int32_t>(0, 3));
1636                 std::swap(*sizes.ptr<int32_t>(0, 1), *sizes.ptr<int32_t>(0, 2));
1637             }
1638             layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
1639             layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total()));
1640
1641             int id = dstNet.addLayer(name, "Slice", layerParams);
1642             layer_id[name] = id;
1643
1644             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1645         }
1646         else if (type == "StridedSlice")
1647         {
1648             CV_CheckEQ(num_inputs, 4, "");
1649             Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));
1650             Mat ends = getTensorContent(getConstBlob(layer, value_id, 2));
1651             Mat strides = getTensorContent(getConstBlob(layer, value_id, 3));
1652             CV_CheckTypeEQ(begins.type(), CV_32SC1, "");
1653             CV_CheckTypeEQ(ends.type(), CV_32SC1, "");
1654             CV_CheckTypeEQ(strides.type(), CV_32SC1, "");
1655             const int num = begins.total();
1656             CV_Assert_N(num == ends.total(), num == strides.total());
1657
1658             int end_mask = getLayerAttr(layer, "end_mask").i();
1659             for (int i = 0; i < num; ++i)
1660             {
1661                 if (ends.at<int>(i) < 0)
1662                     ends.at<int>(i) -= 1;
1663                 if (end_mask & (1 << i))
1664                     ends.at<int>(i) = -1;
1665                 if (strides.at<int>(i) != 1)
1666                     CV_Error(Error::StsNotImplemented,
1667                              format("StridedSlice with stride %d", strides.at<int>(i)));
1668             }
1669             if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1670             {
1671                 // Swap NHWC parameters' order to NCHW.
1672                 std::swap(begins.at<int>(2), begins.at<int>(3));
1673                 std::swap(begins.at<int>(1), begins.at<int>(2));
1674                 std::swap(ends.at<int>(2), ends.at<int>(3));
1675                 std::swap(ends.at<int>(1), ends.at<int>(2));
1676             }
1677             layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
1678             layerParams.set("end", DictValue::arrayInt((int*)ends.data, ends.total()));
1679
1680             int id = dstNet.addLayer(name, "Slice", layerParams);
1681             layer_id[name] = id;
1682
1683             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1684         }
1685         else if (type == "Mul" || type == "RealDiv")
1686         {
1687             CV_CheckGT(num_inputs, 0, "");
1688             int constId = -1;
1689             for(int ii = 0; ii < num_inputs; ++ii)
1690             {
1691                 Pin input = parsePin(layer.input(ii));
1692                 if (value_id.find(input.name) != value_id.end())
1693                 {
1694                     constId = ii;
1695                     break;
1696                 }
1697             }
1698             CV_Assert((constId != -1) || (num_inputs == 2));
1699
1700             if (constId != -1)
1701             {
1702                 // Multiplication by constant.
1703                 CV_CheckEQ(num_inputs, 2, "");
1704                 Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
1705                 CV_Assert(scaleMat.type() == CV_32FC1);
1706                 if (type == "RealDiv")
1707                 {
1708                     if (constId == 0)
1709                         CV_Error(Error::StsNotImplemented, "Division of constant over variable");
1710                     scaleMat = 1.0f / scaleMat;
1711                 }
1712
1713                 int id;
1714                 if (scaleMat.total() == 1)  // is a scalar.
1715                 {
1716                     // Try to match with a LeakyRelu:
1717                     // node {
1718                     //   name: "LeakyRelu/mul"
1719                     //   op: "Mul"
1720                     //   input: "LeakyRelu/alpha"
1721                     //   input: "input"
1722                     // }
1723                     // node {
1724                     //   name: "LeakyRelu/Maximum"
1725                     //   op: "Maximum"
1726                     //   input: "LeakyRelu/mul"
1727                     //   input: "input"
1728                     // }
1729                     StrIntVector next_layers = getNextLayers(net, name, "Maximum");
1730                     if (!next_layers.empty())
1731                     {
1732                         int maximumLayerIdx = next_layers[0].second;
1733
1734                         CV_Assert(net.node(maximumLayerIdx).input_size() == 2);
1735
1736                         // The input from the Mul layer can also be at index 1.
1737                         int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1;
1738
1739                         ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false);
1740                         layers_to_ignore.insert(next_layers[0].first);
1741
1742                         layerParams.set("negative_slope", scaleMat.at<float>(0));
1743                         id = dstNet.addLayer(name, "ReLU", layerParams);
1744                     }
1745                     else
1746                     {
1747                         // Just a multiplication.
1748                         layerParams.set("scale", scaleMat.at<float>(0));
1749                         id = dstNet.addLayer(name, "Power", layerParams);
1750                     }
1751                 }
1752                 else  // is a vector
1753                 {
1754                     layerParams.blobs.resize(1, scaleMat);
1755
1756                    StrIntVector next_layers = getNextLayers(net, name, "Add");
1757                    if (!next_layers.empty())
1758                    {
1759                        layerParams.set("bias_term", true);
1760                        layerParams.blobs.resize(2);
1761
1762                        int weights_layer_index = next_layers[0].second;
1763                        blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back());
1764                        ExcludeLayer(net, weights_layer_index, 0, false);
1765                        layers_to_ignore.insert(next_layers[0].first);
1766                    }
1767
1768                     if (hasLayerAttr(layer, "axis"))
1769                         layerParams.set("axis", getLayerAttr(layer, "axis").i());
1770
1771                     id = dstNet.addLayer(name, "Scale", layerParams);
1772                 }
1773                 layer_id[name] = id;
1774
1775                 Pin inp0 = parsePin(layer.input(0));
1776                 if (layer_id.find(inp0.name) != layer_id.end())
1777                     // First operand is a constant.
1778                     connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1779                 else
1780                     connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1781             }
1782             else
1783             {
1784                 // Check if all the inputs have the same shape.
1785                 bool equalInpShapes = true;
1786                 MatShape outShape0;
1787                 for (int ii = 0; ii < num_inputs && !netInputShapes.empty(); ii++)
1788                 {
1789                     Pin pin = parsePin(layer.input(ii));
1790                     int inpId = layer_id.find(pin.name)->second;
1791
1792                     // Get input shape
1793                     MatShape outShape;
1794                     std::vector<MatShape> inpShapes, outShapes;
1795                     dstNet.getLayerShapes(netInputShapes, inpId, inpShapes, outShapes);
1796                     CV_CheckGT(static_cast<int>(outShapes.size()), pin.blobIndex, "");
1797                     outShape = outShapes[pin.blobIndex];
1798
1799                     if (ii == 0)
1800                     {
1801                         outShape0 = outShape;
1802                     }
1803                     else if (outShape != outShape0)
1804                     {
1805                         equalInpShapes = false;
1806                         break;
1807                     }
1808                 }
1809
1810                 int id;
1811                 if (equalInpShapes || netInputShapes.empty())
1812                 {
1813                     layerParams.set("operation", type == "RealDiv" ? "div" : "prod");
1814                     id = dstNet.addLayer(name, "Eltwise", layerParams);
1815                 }
1816                 else
1817                 {
1818                     if (type == "RealDiv")
1819                         CV_Error(Error::StsNotImplemented, "Division of non equal tensors");
1820                     id = dstNet.addLayer(name, "Scale", layerParams);
1821                 }
1822
1823                 layer_id[name] = id;
1824
1825                 for (int ii = 0; ii < num_inputs; ii++)
1826                 {
1827                     Pin inp = parsePin(layer.input(ii));
1828                     if (layer_id.find(inp.name) == layer_id.end())
1829                         CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1830                     connect(layer_id, dstNet, inp, id, ii);
1831                 }
1832             }
1833         }
1834         else if (type == "FusedBatchNorm" || type == "FusedBatchNormV3")
1835         {
1836             // op: "FusedBatchNorm"
1837             // input: "input"
1838             // input: "BatchNorm/gamma"
1839             // input: "BatchNorm/beta"
1840             // input: "BatchNorm/moving_mean"
1841             // input: "BatchNorm/moving_variance"
1842             CV_CheckEQ(num_inputs, 5, "Expected gamma, beta, mean and std");
1843             Pin inpId = parsePin(layer.input(0));
1844
1845             bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
1846
1847             layerParams.blobs.resize(2);
1848
1849             const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1);
1850             if (!gammaTensor.tensor_content().empty())
1851             {
1852                 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1853                 layerParams.set("has_weight", true);
1854                 blobFromTensor(gammaTensor, layerParams.blobs.back());
1855             }
1856             else
1857                 layerParams.set("has_weight", false);
1858
1859             const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2);
1860             if (!betaTensor.tensor_content().empty())
1861             {
1862                 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1863                 layerParams.set("has_bias", true);
1864                 blobFromTensor(betaTensor, layerParams.blobs.back());
1865             }
1866             else
1867                 layerParams.set("has_bias", false);
1868
1869             Mat mean, std;
1870             if (isTraining)
1871             {
1872                 if (layerParams.blobs.size() == 2)
1873                     CV_Error(Error::StsNotImplemented, "Cannot determine number "
1874                              "of parameters for batch normalization layer.");
1875                 mean = Mat::zeros(1, layerParams.blobs[2].total(), CV_32F);
1876                 std = Mat::ones(1, layerParams.blobs[2].total(), CV_32F);
1877
1878                 // Add an extra layer: Mean-Variance normalization
1879                 LayerParams mvnParams;
1880                 std::string mvnName = name + "/MVN";
1881                 CV_Assert(layer_id.find(mvnName) == layer_id.end());
1882                 int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
1883                 layer_id[mvnName] = mvnId;
1884                 connect(layer_id, dstNet, inpId, mvnId, 0);
1885                 inpId = Pin(mvnName);
1886             }
1887             else
1888             {
1889                 blobFromTensor(getConstBlob(layer, value_id, 3), mean);
1890                 blobFromTensor(getConstBlob(layer, value_id, 4), std);
1891             }
1892             layerParams.blobs[0] = mean;
1893             layerParams.blobs[1] = std;
1894
1895             if (hasLayerAttr(layer, "epsilon"))
1896                 layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
1897
1898             int id = dstNet.addLayer(name, "BatchNorm", layerParams);
1899             layer_id[name] = id;
1900
1901             // one input only
1902             connect(layer_id, dstNet, inpId, id, 0);
1903         }
1904         else if (type == "Conv2DBackpropInput")
1905         {
1906             // op: "Conv2DBackpropInput"
1907             // input: "conv2d_transpose/output_shape"
1908             // input: "weights"
1909             // input: "input"
1910             CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes");
1911
1912             layerParams.set("bias_term", false);
1913             layerParams.blobs.resize(1);
1914
1915             StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
1916             if (next_layers.size() == 1)
1917             {
1918                 layerParams.set("bias_term", true);
1919                 layerParams.blobs.resize(2);
1920
1921                 int weights_layer_index = next_layers[0].second;
1922
1923                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1924                 ExcludeLayer(net, weights_layer_index, 0, false);
1925                 layers_to_ignore.insert(next_layers[0].first);
1926             }
1927
1928             kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
1929
1930             const int* kshape = layerParams.blobs[0].size.p;
1931             const int kernelH = kshape[2];
1932             const int kernelW = kshape[3];
1933             layerParams.set("kernel_h", kernelH);
1934             layerParams.set("kernel_w", kernelW);
1935             layerParams.set("num_output", kshape[1]);
1936
1937             setStrides(layerParams, layer);
1938             setPadding(layerParams, layer);
1939
1940             // For convolution layer, output shape computes as
1941             // o = 1 + (i - k + 2*p) / s
1942             // i - input size, o - output size, k - kernel size, p - pad, s - stride
1943             // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
1944             // considering that k is odd.
1945             // SAME:  o = 1 + (i - 1) / s
1946             // VALID: o = 1 + i / s
1947             // Deconvolution's layer output shape computes as
1948             // SAME:  o = 1 + (i - 1)*s
1949             // VALID: o = (i - 1)*s
1950             // If output_shape differs from formulas above then adjust padding is applied.
1951
1952             const int strideY = layerParams.get<int>("stride_h");
1953             const int strideX = layerParams.get<int>("stride_w");
1954             Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
1955             const int outH = outShape.at<int>(1);
1956             const int outW = outShape.at<int>(2);
1957             if (layerParams.get<String>("pad_mode") == "SAME")
1958             {
1959                 layerParams.set("adj_w", (outW - 1) % strideX);
1960                 layerParams.set("adj_h", (outH - 1) % strideY);
1961             }
1962             else if (layerParams.get<String>("pad_mode") == "VALID")
1963             {
1964                 layerParams.set("adj_w", (outW - kernelW) % strideX);
1965                 layerParams.set("adj_h", (outH - kernelH) % strideY);
1966             }
1967             int id = dstNet.addLayer(name, "Deconvolution", layerParams);
1968             layer_id[name] = id;
1969
1970             // one input only
1971             connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
1972         }
1973         else if (type == "BlockLSTM")
1974         {
1975             // op: "BlockLSTM"
1976             // input: "lstm_block_wrapper/ToInt64/x"  (ignore, number of time stamps)
1977             // input: "input"
1978             // input: "lstm_block_wrapper/zeros"      (ignore)
1979             // input: "lstm_block_wrapper/zeros"      (ignore)
1980             // input: "lstm_block_wrapper/kernel"
1981             // input: "lstm_block_wrapper/w_i_diag"
1982             // input: "lstm_block_wrapper/w_f_diag"
1983             // input: "lstm_block_wrapper/w_o_diag"
1984             // input: "lstm_block_wrapper/bias"
1985             CV_CheckEQ(num_inputs, 9, "Unexpected number of input nodes");
1986
1987             if (hasLayerAttr(layer, "forget_bias"))
1988                 layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f());
1989
1990             if (hasLayerAttr(layer, "forget_bias"))
1991             {
1992                 float cellClip = getLayerAttr(layer, "cell_clip").f();
1993                 // Cell clip disabled if it's negative.
1994                 if (cellClip >= 0)
1995                 {
1996                     layerParams.set("use_cell_clip", true);
1997                     layerParams.set("cell_clip", cellClip);
1998                 }
1999             }
2000
2001             Mat W, Wh, Wx, b;
2002             blobFromTensor(getConstBlob(layer, value_id, 4), W);
2003             blobFromTensor(getConstBlob(layer, value_id, 8), b);
2004             const int outSize = W.cols / 4;
2005
2006             // IGFO->IFOG
2007             float* weightData = (float*)W.data;
2008             for (int i = 0; i < W.rows; ++i)
2009                 for (int j = 0; j < outSize; ++j)
2010                 {
2011                     std::swap(weightData[i * W.cols + 1 * outSize + j],
2012                               weightData[i * W.cols + 2 * outSize + j]);
2013                     std::swap(weightData[i * W.cols + 2 * outSize + j],
2014                               weightData[i * W.cols + 3 * outSize + j]);
2015                 }
2016             Wx = W.rowRange(0, W.rows - outSize).t();
2017             Wh = W.rowRange(W.rows - outSize, W.rows).t();
2018
2019             layerParams.blobs.resize(3);
2020             layerParams.blobs[0] = Wh;
2021             layerParams.blobs[1] = Wx;
2022             layerParams.blobs[2] = b;
2023
2024             if (hasLayerAttr(layer, "use_peephole"))
2025             {
2026                 bool usePeephole = getLayerAttr(layer, "use_peephole").b();
2027                 if (usePeephole)
2028                 {
2029                     layerParams.set("use_peephole", true);
2030                     layerParams.blobs.resize(6);
2031                     for (int i = 0; i < 3; ++i)
2032                     {
2033                         Mat w;
2034                         blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
2035                         w = w.reshape(1, w.total());  // Single column.
2036                         w = Mat::diag(w);  // Make a diagonal matrix.
2037                         layerParams.blobs[3 + i] = w;
2038                     }
2039                 }
2040             }
2041
2042             int id = dstNet.addLayer(name, "LSTM", layerParams);
2043             layer_id[name] = id;
2044
2045             // one input only
2046             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
2047             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
2048         }
2049         else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear" || type == "FusedResizeAndPadConv2D")
2050         {
2051             CV_CheckGT(num_inputs, 0, "");
2052             std::string convWeights = "";
2053             if (type == "FusedResizeAndPadConv2D")
2054             {
2055                 // input: "mul_1"
2056                 // input: "decoder/ResizeBilinear/size"
2057                 // input: "decoder/decoder_conv0/Conv2D_dummy_paddings"
2058                 // input: "decoder/decoder_conv0/weights"
2059                 CV_CheckEQ(num_inputs, 4, "Number of input for FusedResizeAndPadConv2D");
2060
2061                 Mat paddings = getTensorContent(getConstBlob(layer, value_id, 2));
2062                 CV_CheckEQ(countNonZero(paddings), 0, "Unsupported mode");
2063
2064                 convWeights = layer.input(3);
2065                 layer.mutable_input()->DeleteSubrange(2, 2);  // FIXIT do NOT modify input model
2066                 num_inputs = layer.input_size();
2067                 name = name + "/resize";
2068
2069                 if (hasLayerAttr(layer, "resize_align_corners"))
2070                 {
2071                     // FIXIT do NOT modify input model
2072                     layer.mutable_attr()->insert(
2073                         ::google::protobuf::MapPair<std::string, tensorflow::AttrValue>("align_corners",
2074                                                                                         getLayerAttr(layer, "resize_align_corners")));
2075                 }
2076             }
2077             if (num_inputs == 2)
2078             {
2079                 Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
2080                 CV_CheckTypeEQ(outSize.type(), CV_32SC1, ""); CV_CheckEQ(outSize.total(), (size_t)2, "");
2081                 layerParams.set("height", outSize.at<int>(0, 0));
2082                 layerParams.set("width", outSize.at<int>(0, 1));
2083             }
2084             else if (num_inputs == 3)
2085             {
2086                 Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1));
2087                 Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2));
2088                 factorHeight.convertTo(factorHeight, CV_32F);
2089                 factorWidth.convertTo(factorWidth, CV_32F);
2090                 layerParams.set("zoom_factor_x", factorWidth.at<float>(0));
2091                 layerParams.set("zoom_factor_y", factorHeight.at<float>(0));
2092             }
2093             else
2094                 CV_Check(num_inputs, num_inputs == 2 || num_inputs == 3, "");
2095
2096             if (type == "ResizeNearestNeighbor")
2097                 layerParams.set("interpolation", "nearest");
2098             else
2099                 layerParams.set("interpolation", "bilinear");
2100
2101             if (hasLayerAttr(layer, "align_corners"))
2102                 layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());
2103
2104             if (hasLayerAttr(layer, "half_pixel_centers"))
2105                 layerParams.set("half_pixel_centers", getLayerAttr(layer, "half_pixel_centers").b());
2106
2107             int id = dstNet.addLayer(name, "Resize", layerParams);
2108             layer_id[name] = id;
2109
2110             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2111
2112             // Step back to add convolution
2113             if (type == "FusedResizeAndPadConv2D")
2114             {
2115                 tensorflow::NodeDef conv = layer_;
2116                 conv.clear_input();
2117                 conv.add_input(name);
2118                 conv.add_input(convWeights);
2119                 conv.set_op("Conv2D");
2120                 parseNode(conv);
2121             }
2122         }
2123         else if (type == "L2Normalize")
2124         {
2125             // op: "L2Normalize"
2126             // input: "input"
2127             // input: "reduction_indices" (axis)
2128             CV_CheckEQ(num_inputs, 2, "");
2129             Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
2130             CV_Assert(reductionIndices.type() == CV_32SC1);
2131
2132             const int numAxes = reductionIndices.total();
2133             if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
2134                 for (int i = 0; i < numAxes; ++i)
2135                     reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
2136
2137             cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
2138             for (int i = 1; i < numAxes; ++i)
2139             {
2140                 CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
2141                 // Axes have the same sign.
2142                 CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
2143             }
2144             layerParams.set("start_axis", reductionIndices.at<int>(0));
2145             layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
2146
2147             int id = dstNet.addLayer(name, "Normalize", layerParams);
2148             layer_id[name] = id;
2149             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2150         }
2151         else if (type == "PriorBox")
2152         {
2153             CV_CheckEQ(num_inputs, 2, "");
2154             if (hasLayerAttr(layer, "min_size"))
2155                 layerParams.set("min_size", getLayerAttr(layer, "min_size").i());
2156             if (hasLayerAttr(layer, "max_size"))
2157                 layerParams.set("max_size", getLayerAttr(layer, "max_size").i());
2158             if (hasLayerAttr(layer, "flip"))
2159                 layerParams.set("flip", getLayerAttr(layer, "flip").b());
2160             if (hasLayerAttr(layer, "clip"))
2161                 layerParams.set("clip", getLayerAttr(layer, "clip").b());
2162             if (hasLayerAttr(layer, "offset"))
2163                 layerParams.set("offset", getLayerAttr(layer, "offset").f());
2164             if (hasLayerAttr(layer, "step"))
2165                 layerParams.set("step", getLayerAttr(layer, "step").f());
2166
2167             const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
2168                                               "width", "height"};
2169             for (int i = 0; i < 5; ++i)
2170             {
2171                 if (hasLayerAttr(layer, paramNames[i]))
2172                 {
2173                     Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor());
2174                     layerParams.set(paramNames[i],
2175                                     DictValue::arrayReal<float*>((float*)values.data, values.total()));
2176                 }
2177             }
2178             int id = dstNet.addLayer(name, "PriorBox", layerParams);
2179             layer_id[name] = id;
2180             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2181             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
2182             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
2183         }
2184         else if (type == "Softmax")
2185         {
2186             CV_CheckGT(num_inputs, 0, "");
2187             if (hasLayerAttr(layer, "axis"))
2188                 layerParams.set("axis", getLayerAttr(layer, "axis").i());
2189
2190             int id = dstNet.addLayer(name, "Softmax", layerParams);
2191             layer_id[name] = id;
2192             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
2193         }
2194         else if (type == "CropAndResize")
2195         {
2196             // op: "CropAndResize"
2197             // input: "input"
2198             // input: "boxes"
2199             // input: "sizes"
2200             CV_CheckEQ(num_inputs, 3, "");
2201
2202             Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2));
2203             CV_CheckTypeEQ(cropSize.type(), CV_32SC1, ""); CV_CheckEQ(cropSize.total(), (size_t)2, "");
2204
2205             layerParams.set("height", cropSize.at<int>(0));
2206             layerParams.set("width", cropSize.at<int>(1));
2207
2208             int id = dstNet.addLayer(name, "CropAndResize", layerParams);
2209             layer_id[name] = id;
2210
2211             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2212             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
2213         }
2214         else if (type == "Mean" || type == "Sum")
2215         {
2216             // Computes the mean of elements across dimensions of a tensor.
2217             // If keepdims is false (default) reduces input_tensor along the dimensions given in axis,
2218             // else the reduced dimensions are retained with length 1.
2219             // if indices = [1, 2] in NHWC layout we use global pooling: NxCxHxW --Pooling--> NxCx1x1
2220             // if keepdims is false we use Flatten after Pooling: out_shape = NxC
2221             // if indices = [0] we use a global pooling by indices.
2222             // To return correct shape, we use Reshape after Pooling. To determine input shape use Slice for input,
2223             // if keepdims is false we use Flatten after Slice.
2224             // Example: input_shape = NxCxHxW
2225             // determine out shape: NxCxHxW --Slice--> 1xCxHxW
2226             //                      out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW)
2227             // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape
2228             CV_CheckGT(num_inputs, 0, "");
2229
2230             Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));
2231             CV_Assert(indices.type() == CV_32SC1);
2232
2233             // There are two attributes, "keepdims" and a deprecated "keep_dims".
2234             bool keepDims = false;
2235             if (hasLayerAttr(layer, "keepdims"))
2236                 keepDims = getLayerAttr(layer, "keepdims").b();
2237             else if (hasLayerAttr(layer, "keep_dims"))
2238                 keepDims = getLayerAttr(layer, "keep_dims").b();
2239
2240             if (indices.total() == 1 && indices.at<int>(0) == 0)
2241             {
2242                 LayerParams flattenLp;
2243                 std::string flattenName = name + "/flatten";
2244                 CV_Assert(layer_id.find(flattenName) == layer_id.end());
2245                 int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
2246                 layer_id[flattenName] = flattenId;
2247                 connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0);
2248
2249                 LayerParams reshapeLp;
2250                 std::string reshapeName = name + "/reshape";
2251                 CV_Assert(layer_id.find(reshapeName) == layer_id.end());
2252                 reshapeLp.set("axis", 0);
2253                 reshapeLp.set("num_axes", 1);
2254                 int newShape[] = {1, 1, -1};
2255                 reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3));
2256
2257                 int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp);
2258                 layer_id[reshapeName] = reshapeId;
2259                 connect(layer_id, dstNet, Pin(flattenName), reshapeId, 0);
2260
2261                 LayerParams avgLp;
2262                 std::string avgName = name + "/avg";
2263                 CV_Assert(layer_id.find(avgName) == layer_id.end());
2264                 avgLp.set("pool", type == "Mean" ? "ave" : "sum");
2265                 // pooling kernel H x 1
2266                 avgLp.set("global_pooling_h", true);
2267                 avgLp.set("kernel_w", 1);
2268                 int avgId = dstNet.addLayer(avgName, "Pooling", avgLp);
2269                 layer_id[avgName] = avgId;
2270                 connect(layer_id, dstNet, Pin(reshapeName), avgId, 0);
2271
2272                 LayerParams sliceLp;
2273                 std::string layerShapeName = name + "/slice";
2274                 CV_Assert(layer_id.find(layerShapeName) == layer_id.end());
2275                 sliceLp.set("axis", 0);
2276                 int begin[] = {0};
2277                 int size[] = {1};
2278                 sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1));
2279                 sliceLp.set("size", DictValue::arrayInt(&size[0], 1));
2280                 int sliceId = dstNet.addLayer(layerShapeName, "Slice", sliceLp);
2281                 layer_id[layerShapeName] = sliceId;
2282                 connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0);
2283
2284                 if (!keepDims)
2285                 {
2286                     LayerParams squeezeLp;
2287                     std::string squeezeName = name + "/squeeze";
2288                     CV_Assert(layer_id.find(squeezeName) == layer_id.end());
2289                     squeezeLp.set("axis", 0);
2290                     squeezeLp.set("end_axis", 1);
2291                     int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
2292                     layer_id[squeezeName] = squeezeId;
2293                     connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0);
2294                     layerShapeName = squeezeName;
2295                 }
2296
2297                 int id = dstNet.addLayer(name, "Reshape", layerParams);
2298                 layer_id[name] = id;
2299                 connect(layer_id, dstNet, Pin(avgName), id, 0);
2300                 connect(layer_id, dstNet, Pin(layerShapeName), id, 1);
2301             } else if (indices.total() == 1) {
2302                 int axis = toNCHW(indices.at<int>(0));
2303                 if (axis == 2 || axis == 3)
2304                 {
2305                     layerParams.set("pool", type == "Mean" ? "ave" : "sum");
2306                     layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1);
2307                     layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true);
2308                     int id = dstNet.addLayer(name, "Pooling", layerParams);
2309                     layer_id[name] = id;
2310                     connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2311
2312                     if (!keepDims)
2313                     {
2314                         // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC
2315                         LayerParams permLP;
2316                         int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
2317                         permLP.set("order", DictValue::arrayInt<int*>(order, 4));
2318                         std::string permName = name + "/nchw";
2319                         CV_Assert(layer_id.find(permName) == layer_id.end());
2320                         int permId = dstNet.addLayer(permName, "Permute", permLP);
2321                         layer_id[permName] = permId;
2322                         connect(layer_id, dstNet, Pin(name), permId, 0);
2323
2324                         LayerParams squeezeLp;
2325                         std::string squeezeName = name + "/squeeze";
2326                         CV_Assert(layer_id.find(squeezeName) == layer_id.end());
2327                         squeezeLp.set("axis", indices.at<int>(0));
2328                         squeezeLp.set("end_axis", indices.at<int>(0) + 1);
2329                         int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
2330                         layer_id[squeezeName] = squeezeId;
2331                         connect(layer_id, dstNet, Pin(permName), squeezeId, 0);
2332                     }
2333                 }
2334             } else {
2335                 if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
2336                     CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation.");
2337
2338                 layerParams.set("pool", type == "Mean" ? "ave" : "sum");
2339                 layerParams.set("global_pooling", true);
2340                 int id = dstNet.addLayer(name, "Pooling", layerParams);
2341                 layer_id[name] = id;
2342                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2343
2344                 if (!keepDims)
2345                 {
2346                     LayerParams flattenLp;
2347                     std::string flattenName = name + "/flatten";
2348                     CV_Assert(layer_id.find(flattenName) == layer_id.end());
2349                     int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
2350                     layer_id[flattenName] = flattenId;
2351                     connect(layer_id, dstNet, Pin(name), flattenId, 0);
2352                 }
2353             }
2354         }
2355         else if (type == "Pack")
2356         {
2357             // op: tf.stack(list of tensors, axis=0)
2358             // Join a list of inputs along a new axis.
2359             // The "axis" specifies the index of the new axis in the dimensions of the output.
2360             // Example: given a list with "N" tensors of shape (C, H, W):
2361             // if axis == 0 then the output tensor will have the shape (N, C, H, W),
2362             // if axis == 1 then the output tensor will have the shape (C, N, H, W).
2363             CV_CheckGT(num_inputs, 0, "");
2364             CV_Assert(hasLayerAttr(layer, "axis"));
2365             int dim = (int)getLayerAttr(layer, "axis").i();
2366             if (dim != 0)
2367                 CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation.");
2368
2369             CV_Assert(hasLayerAttr(layer, "N"));
2370             int num = (int)getLayerAttr(layer, "N").i();
2371             CV_CheckEQ(num_inputs, num, "");
2372             std::string base_name = name + "/reshape_";
2373             std::vector<int> reshape_ids;
2374             for (int i = 0; i < num; i++) {
2375                 std::ostringstream ss;
2376                 ss << i;
2377                 std::string reshape_name = base_name + ss.str();
2378                 LayerParams reshapeLP;
2379                 reshapeLP.set("axis", dim);
2380                 reshapeLP.set("num_axes", 1);
2381                 int outShape[] = {1, -1};
2382                 reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2));
2383                 int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP);
2384                 layer_id[reshape_name] = id;
2385                 reshape_ids.push_back(id);
2386                 connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0);
2387             }
2388
2389             layerParams.set("axis", dim);
2390             int id = dstNet.addLayer(name, "Concat", layerParams);
2391             layer_id[name] = id;
2392
2393             for (int li = 0; li < num; li++)
2394                 dstNet.connect(reshape_ids[li], 0, id, li);
2395         }
2396         else if (type == "ClipByValue")
2397         {
2398             // op: "ClipByValue"
2399             // input: "input"
2400             // input: "mix"
2401             // input: "max"
2402             CV_CheckEQ(num_inputs, 3, "");
2403
2404             Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1));
2405             Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2));
2406             CV_CheckEQ(minValue.total(), (size_t)1, ""); CV_CheckTypeEQ(minValue.type(), CV_32FC1, "");
2407             CV_CheckEQ(maxValue.total(), (size_t)1, ""); CV_CheckTypeEQ(maxValue.type(), CV_32FC1, "");
2408
2409             layerParams.set("min_value", minValue.at<float>(0));
2410             layerParams.set("max_value", maxValue.at<float>(0));
2411
2412             int id = dstNet.addLayer(name, "ReLU6", layerParams);
2413             layer_id[name] = id;
2414
2415             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2416         }
2417         else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
2418                  type == "Relu" || type == "Elu" ||
2419                  type == "Identity" || type == "Relu6")
2420         {
2421             CV_CheckGT(num_inputs, 0, "");
2422             std::string dnnType = type;
2423             if (type == "Abs") dnnType = "AbsVal";
2424             else if (type == "Tanh") dnnType = "TanH";
2425             else if (type == "Relu") dnnType = "ReLU";
2426             else if (type == "Relu6") dnnType = "ReLU6";
2427             else if (type == "Elu") dnnType = "ELU";
2428
2429             int id = dstNet.addLayer(name, dnnType, layerParams);
2430             layer_id[name] = id;
2431             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
2432         }
2433         else
2434         {
2435             // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer.
2436             // However we create a layer with the same type and rely that user defined a custom layer.
2437
2438             // All the attributes are added to LayerParams.
2439             google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
2440             for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
2441                  ai != attr.end(); ++ai)
2442             {
2443                 if (ai->second.value_case() == tensorflow::AttrValue::kS)  // string
2444                     layerParams.set(ai->first, ai->second.s());
2445                 if (ai->second.value_case() == tensorflow::AttrValue::kI)  // int64
2446                     layerParams.set(ai->first, ai->second.i());
2447                 if (ai->second.value_case() == tensorflow::AttrValue::kF)  // float
2448                     layerParams.set(ai->first, ai->second.f());
2449                 if (ai->second.value_case() == tensorflow::AttrValue::kB)  // bool
2450                     layerParams.set(ai->first, ai->second.b());
2451             }
2452
2453             // All the Const input nodes are added to layer's blobs.
2454             std::vector<std::string> inputsNames;
2455             for (int i = 0; i < num_inputs; ++i)
2456             {
2457                 // Check if input is a Const node.
2458                 if (value_id.find(layer.input(i)) != value_id.end())
2459                 {
2460                     Mat blob = getTensorContent(getConstBlob(layer, value_id, i));
2461                     layerParams.blobs.push_back(blob);
2462                 }
2463                 else
2464                     inputsNames.push_back(layer.input(i));
2465             }
2466             int id = dstNet.addLayer(name, type, layerParams);
2467             layer_id[name] = id;
2468
2469             for (int i = 0; i < inputsNames.size(); ++i)
2470             {
2471                 connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i);
2472             }
2473         }
2474     }
2475     catch (const std::exception& e)
2476     {
2477         CV_LOG_ERROR(NULL, "DNN/TF: Can't parse layer for node='" << name << "'. Exception: " << e.what());
2478         throw;
2479     }
2480 }
2481
2482 } // namespace
2483
2484 #endif //HAVE_PROTOBUF
2485
2486 Net readNetFromTensorflow(const String &model, const String &config)
2487 {
2488     Net net;
2489     TFImporter importer(net, model.c_str(), config.c_str());
2490     return net;
2491 }
2492
2493 Net readNetFromTensorflow(const char* bufferModel, size_t lenModel,
2494                           const char* bufferConfig, size_t lenConfig)
2495 {
2496     Net net;
2497     TFImporter importer(net, bufferModel, lenModel, bufferConfig, lenConfig);
2498     return net;
2499 }
2500
2501 Net readNetFromTensorflow(const std::vector<uchar>& bufferModel, const std::vector<uchar>& bufferConfig)
2502 {
2503     const char* bufferModelPtr = reinterpret_cast<const char*>(&bufferModel[0]);
2504     const char* bufferConfigPtr = bufferConfig.empty() ? NULL :
2505                                   reinterpret_cast<const char*>(&bufferConfig[0]);
2506     return readNetFromTensorflow(bufferModelPtr, bufferModel.size(),
2507                                  bufferConfigPtr, bufferConfig.size());
2508 }
2509
2510 void writeTextGraph(const String& _model, const String& output)
2511 {
2512     String model = _model;
2513     const std::string modelExt = model.substr(model.rfind('.') + 1);
2514     if (modelExt != "pb")
2515         CV_Error(Error::StsNotImplemented, "Only TensorFlow models support export to text file");
2516
2517     tensorflow::GraphDef net;
2518     ReadTFNetParamsFromBinaryFileOrDie(model.c_str(), &net);
2519
2520     sortByExecutionOrder(net);
2521
2522     RepeatedPtrField<tensorflow::NodeDef>::iterator it;
2523     for (it = net.mutable_node()->begin(); it != net.mutable_node()->end(); ++it)
2524     {
2525         if (it->op() == "Const")
2526         {
2527             it->mutable_attr()->at("value").mutable_tensor()->clear_tensor_content();
2528         }
2529     }
2530
2531     std::string content;
2532     google::protobuf::TextFormat::PrintToString(net, &content);
2533
2534     std::ofstream ofs(output.c_str());
2535     ofs << content;
2536     ofs.close();
2537 }
2538
2539 CV__DNN_INLINE_NS_END
2540 }} // namespace