modules/dnn/src/tensorflow/tf_importer.cpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
   6 // Third party copyrights are property of their respective owners.
   7
   8 /*
   9 Implementation of Tensorflow models parser
  10 */
  11
  12 #include "../precomp.hpp"
  13
  14 #include <opencv2/core/utils/logger.defines.hpp>
  15 #include <opencv2/dnn/shape_utils.hpp>
  16 #undef CV_LOG_STRIP_LEVEL
  17 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
  18 #include <opencv2/core/utils/logger.hpp>
  19
  20 #ifdef HAVE_PROTOBUF
  21 #include "tf_io.hpp"
  22
  23 #include <iostream>
  24 #include <fstream>
  25 #include <algorithm>
  26 #include <string>
  27 #include <queue>
  28 #include "tf_graph_simplifier.hpp"
  29 #endif
  30
  31 namespace cv {
  32 namespace dnn {
  33 CV__DNN_EXPERIMENTAL_NS_BEGIN
  34
  35 #if HAVE_PROTOBUF
  36
  37 using ::google::protobuf::RepeatedField;
  38 using ::google::protobuf::RepeatedPtrField;
  39 using ::google::protobuf::Message;
  40 using ::google::protobuf::Descriptor;
  41 using ::google::protobuf::FieldDescriptor;
  42 using ::google::protobuf::Reflection;
  43
  44 namespace
  45 {
  46
  47 static int toNCHW(int idx)
  48 {
  49     CV_Assert(-4 <= idx && idx < 4);
  50     if (idx == 0) return 0;
  51     else if (idx > 0) return idx % 3 + 1;
  52     else return (4 + idx) % 3 + 1;
  53 }
  54
  55 static int toNCDHW(int idx)
  56 {
  57     CV_Assert(-5 <= idx && idx < 5);
  58     if (idx == 0) return 0;
  59     else if (idx > 0) return idx % 4 + 1;
  60     else return (5 + idx) % 4 + 1;
  61 }
  62
  63 // This values are used to indicate layer output's data layout where it's possible.
  64 enum DataLayout
  65 {
  66     DATA_LAYOUT_NHWC,
  67     DATA_LAYOUT_NCHW,
  68     DATA_LAYOUT_NDHWC,
  69     DATA_LAYOUT_UNKNOWN,
  70     DATA_LAYOUT_PLANAR  // 2-dimensional outputs (matmul, flatten, reshape to 2d)
  71 };
  72
  73 typedef std::vector<std::pair<String, int> > StrIntVector;
  74
  75 struct Pin
  76 {
  77     Pin(const std::string &_name, int _blobIndex = 0) :
  78         name(_name), blobIndex(_blobIndex) {}
  79
  80     Pin() :
  81         name(""), blobIndex(-1) {}
  82
  83     std::string name;
  84     int blobIndex;
  85 };
  86
  87 void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape)
  88 {
  89     shape.clear();
  90     if (tensor.has_tensor_shape())
  91     {
  92         const tensorflow::TensorShapeProto &_shape = tensor.tensor_shape();
  93         int i, n = _shape.dim_size();
  94         if (n)
  95         {
  96             shape.resize(n);
  97
  98             for (i = 0; i < n; i++)
  99                 shape[i] = (int)_shape.dim(i).size();
 100         }
 101         else
 102             shape.resize(1, 1);  // Scalar. // FIXIT: should be empty
 103     }
 104     else
 105     {
 106         CV_Error(Error::StsError, "Unknown shape of input tensor");
 107     }
 108 }
 109
 110 template <typename T>
 111 void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
 112 {
 113     MatShape shape;
 114     blobShapeFromTensor(tensor, shape);
 115     int dims = (int)shape.size();
 116
 117     if (dims == 4)
 118     {
 119         // REORDER blob NHWC to NCHW
 120         swap(shape[2], shape[3]); // NHCW
 121         swap(shape[1], shape[2]); // NCHW
 122     }
 123
 124     dstBlob.create(shape, CV_32F);
 125
 126     Mat tensorContent = getTensorContent(tensor, /*no copy*/false);
 127     int size = tensorContent.total();
 128     CV_Assert(size == (int)dstBlob.total());
 129
 130     float *dstData = dstBlob.ptr<float>();
 131     const T *data = reinterpret_cast<const T*>(tensorContent.data);
 132
 133     if (dims == 4)
 134     {
 135         int num = shape[0], channels = shape[1], height = shape[2], width = shape[3];
 136         int total = num*channels*height*width;
 137         for(int i_n = 0; i_n < shape[0]; i_n++) {
 138             for(int i_c = 0; i_c < shape[1]; i_c++) {
 139                 for(int i_h = 0; i_h < shape[2]; i_h++) {
 140                     for(int i_w = 0; i_w < shape[3]; i_w++) {
 141                        int dst_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w;
 142                        int src_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w;
 143
 144                        CV_Assert(dst_i < total);
 145                        CV_Assert(src_i < total);
 146
 147                        dstData[dst_i] = data[src_i];
 148                     }
 149                 }
 150             }
 151         }
 152     } else {
 153         for (int i = 0; i < size; i++)
 154             dstData[i] = data[i];
 155     }
 156 }
 157
 158 void blobFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
 159 {
 160     switch (tensor.dtype()) {
 161         case tensorflow::DT_FLOAT:
 162         case tensorflow::DT_HALF:
 163             parseTensor<float>(tensor, dstBlob);
 164             break;
 165         case tensorflow::DT_DOUBLE:
 166             parseTensor<double>(tensor, dstBlob);
 167             break;
 168         default:
 169             CV_Error(Error::StsError, "Tensor's data type is not supported");
 170             break;
 171     }
 172 }
 173
 174 #if 0
 175 void printList(const tensorflow::AttrValue::ListValue &val)
 176 {
 177     std::cout << "(";
 178     for (int i = 0; i < val.i_size(); i++)
 179         std::cout << " " << val.i(i);
 180     std::cout << " )";
 181 }
 182
 183 void printTensorShape(const tensorflow::TensorShapeProto &shape)
 184 {
 185     std::cout << "[ ";
 186     for (int d = 0; d < shape.dim_size(); d++)
 187         std::cout << shape.dim(d).name() <<
 188                      ":" << shape.dim(d).size() << " ";
 189     std::cout << "]";
 190 }
 191
 192 void printTensor(const tensorflow::TensorProto &tensor)
 193 {
 194     printTensorShape(tensor.tensor_shape());
 195
 196     if (tensor.tensor_content().empty())
 197         return;
 198
 199     switch (tensor.dtype())
 200     {
 201     case tensorflow::DT_FLOAT:
 202         {
 203             const float *data = reinterpret_cast<const float*>(tensor.tensor_content().c_str());
 204             int size = tensor.tensor_content().size() / sizeof(float);
 205             for (int i = 0; i < std::min(10, size); i++)
 206                 std::cout << " " << data[i];
 207             if (size > 10)
 208                 std::cout << " ... " << size - 10 << " more";
 209             break;
 210         }
 211     case tensorflow::DT_INT32:
 212         {
 213             const int *data = reinterpret_cast<const int*>(tensor.tensor_content().c_str());
 214             int size = tensor.tensor_content().size() / sizeof(int);
 215             for (int i = 0; i < std::min(10, size); i++)
 216                 std::cout << " " << data[i];
 217             if (size > 10)
 218                 std::cout << " ... " << size - 10 << " more";
 219             break;
 220         }
 221     default:
 222         CV_Error(Error::StsError, "Tensor type is not supported");
 223         break;
 224     }
 225 }
 226
 227 void printLayerAttr(const tensorflow::NodeDef &layer)
 228 {
 229     std::cout << std::endl << layer.name() << ":" << layer.op();
 230     for (int ii = 0; ii < layer.input_size(); ii++)
 231         std::cout << "(" << layer.input(ii) << ")";
 232     std::cout << std::endl;
 233     google::protobuf::Map<std::string, tensorflow::AttrValue> attr
 234             = layer.attr();
 235     for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
 236          ai != attr.end(); ++ai)
 237     {
 238         std::cout << ai->first << ":";
 239         if (ai->first == "dtype" || ai->first == "T")
 240             std::cout << ai->second.i();
 241         else if (ai->first == "padding")
 242             std::cout << ai->second.s();
 243         else if (ai->first == "transpose_a" || ai->first == "transpose_b")
 244             std::cout << ai->second.b();
 245         //            else if (ai->first == "shape")
 246         //              printTensorShape(ai->second.shape());
 247         else if (ai->first == "strides" || ai->first == "ksize")
 248             printList(ai->second.list());
 249         else
 250             printTensor(ai->second.tensor());
 251         std::cout << std::endl;
 252     }
 253 }
 254 #endif
 255
 256 bool hasLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
 257 {
 258     google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
 259     return attr.find(name) != attr.end();
 260 }
 261
 262 const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
 263 {
 264     return layer.attr().at(name);
 265 }
 266
 267 static DataLayout getDataLayout(const tensorflow::NodeDef& layer)
 268 {
 269     if (hasLayerAttr(layer, "data_format"))
 270     {
 271         std::string format = getLayerAttr(layer, "data_format").s();
 272         if (format == "NHWC" || format == "channels_last")
 273             return DATA_LAYOUT_NHWC;
 274         else if (format == "NCHW" || format == "channels_first")
 275             return DATA_LAYOUT_NCHW;
 276         else if (format == "NDHWC")
 277             return DATA_LAYOUT_NDHWC;
 278         else
 279             CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
 280     }
 281     return DATA_LAYOUT_UNKNOWN;
 282 }
 283
 284 static inline std::string getNodeName(const std::string& tensorName)
 285 {
 286     return tensorName.substr(0, tensorName.rfind(':'));
 287 }
 288
 289 static inline
 290 DataLayout getDataLayout(
 291         const std::string& layerName,
 292         const std::map<String, DataLayout>& data_layouts
 293 )
 294 {
 295     std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layerName));
 296     return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN;
 297 }
 298
 299 static
 300 bool hasAllOnes(const Mat &inputs, int startPos, int endPos)
 301 {
 302     CV_CheckLE(inputs.dims, 2, "");
 303     CV_CheckGE(startPos, 0, "");
 304     CV_CheckLE(startPos, endPos, "");
 305     CV_CheckLT((size_t)endPos, inputs.total(), "");
 306
 307     for (int i = startPos; i < endPos; i++)
 308     {
 309         if (inputs.at<int>(i) != 1 && inputs.at<int>(i) != -1)
 310             return false;
 311     }
 312     return true;
 313 }
 314
 315 void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
 316 {
 317     if (hasLayerAttr(layer, "strides"))
 318     {
 319         const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
 320         int dimX, dimY, dimC, dimD;
 321         int layout = getDataLayout(layer);
 322         if (layout == DATA_LAYOUT_NCHW)
 323         {
 324             dimC = 1; dimY = 2; dimX = 3;
 325         }
 326         else if (layout == DATA_LAYOUT_NDHWC)
 327         {
 328             dimD = 1; dimY = 2; dimX = 3; dimC = 4;
 329         }
 330         else
 331         {
 332             dimY = 1; dimX = 2; dimC = 3;
 333         }
 334         if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
 335             val.list().i(0) != 1 || val.list().i(dimC) != 1)
 336             CV_Error(Error::StsError, "Unsupported strides");
 337         if (layout == DATA_LAYOUT_NDHWC) {
 338             int strides[] = {static_cast<int>(val.list().i(dimD)),
 339                              static_cast<int>(val.list().i(dimY)),
 340                              static_cast<int>(val.list().i(dimX))};
 341             layerParams.set("stride",  DictValue::arrayInt(strides, 3));
 342         }
 343         else
 344         {
 345             layerParams.set("stride_h", static_cast<int>(val.list().i(dimY)));
 346             layerParams.set("stride_w", static_cast<int>(val.list().i(dimX)));
 347         }
 348     }
 349 }
 350
 351 DictValue parseDims(const tensorflow::TensorProto &tensor) {
 352     MatShape shape;
 353     blobShapeFromTensor(tensor, shape);
 354     int dims = (int)shape.size();
 355
 356     CV_Assert(tensor.dtype() == tensorflow::DT_INT32);
 357     CV_Assert(dims == 1);
 358
 359     Mat values = getTensorContent(tensor);
 360     CV_Assert(values.type() == CV_32SC1);
 361     // TODO: add reordering shape if dims == 4
 362     return DictValue::arrayInt((int*)values.data, values.total());
 363 }
 364
 365 void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
 366 {
 367     if (hasLayerAttr(layer, "ksize"))
 368     {
 369         const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
 370         int dimX, dimY, dimC, dimD;
 371         int layout = getDataLayout(layer);
 372         if (layout == DATA_LAYOUT_NCHW)
 373         {
 374             dimC = 1; dimY = 2; dimX = 3;
 375         }
 376         else if (layout == DATA_LAYOUT_NDHWC)
 377         {
 378             dimD = 1; dimY = 2; dimX = 3; dimC = 4;
 379         }
 380         else
 381         {
 382             dimY = 1; dimX = 2; dimC = 3;
 383         }
 384         if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
 385             val.list().i(0) != 1 || val.list().i(dimC) != 1)
 386             CV_Error(Error::StsError, "Unsupported ksize");
 387
 388         if (layout == DATA_LAYOUT_NDHWC) {
 389             int kernel[] = {static_cast<int>(val.list().i(dimD)),
 390                             static_cast<int>(val.list().i(dimY)),
 391                             static_cast<int>(val.list().i(dimX))};
 392             layerParams.set("kernel_size",  DictValue::arrayInt(kernel, 3));
 393         }
 394         else
 395         {
 396             layerParams.set("kernel_h", static_cast<int>(val.list().i(dimY)));
 397             layerParams.set("kernel_w", static_cast<int>(val.list().i(dimX)));
 398         }
 399     }
 400     else
 401     {
 402         layerParams.set("kernel_h", 1);
 403         layerParams.set("kernel_w", 1);
 404     }
 405 }
 406
 407 void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
 408 {
 409     if (hasLayerAttr(layer, "padding"))
 410         layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
 411 }
 412
 413 Pin parsePin(const std::string &name)
 414 {
 415     Pin pin(name);
 416
 417     size_t delimiter_pos = name.find_first_of(':');
 418     if (delimiter_pos != std::string::npos)
 419     {
 420         pin.name = name.substr(0, delimiter_pos);
 421         std::istringstream(name.substr(delimiter_pos + 1)) >> pin.blobIndex;
 422     }
 423
 424     return pin;
 425 }
 426
 427 StrIntVector getNextLayers(const tensorflow::GraphDef& net, const String& layer_name, const String& type = "")
 428 {
 429    StrIntVector layers;
 430
 431    for (int li = 0; li < net.node_size(); li++)
 432    {
 433        const tensorflow::NodeDef& layer = net.node(li);
 434        for (int input_id = 0; input_id < layer.input_size(); input_id++) {
 435            String input_op_name = parsePin(layer.input(input_id)).name;
 436            bool type_ok = type.empty() ? true : type == layer.op();
 437            if (input_op_name == layer_name && type_ok)
 438                layers.push_back(std::make_pair(layer.name(), li));
 439        }
 440    }
 441
 442    return layers;
 443 }
 444
 445 void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int input_blob_index, bool remove_from_net = true) {
 446     String layer_name = net.node(layer_index).name();
 447     StrIntVector layers = getNextLayers(net, layer_name);
 448
 449     String removed_layer_input = net.node(layer_index).input(input_blob_index);
 450
 451     for (size_t i = 0; i < layers.size(); i++)
 452     {
 453         tensorflow::NodeDef* layer = net.mutable_node(layers[i].second);
 454         for (int input_id = 0; input_id < layer->input_size(); input_id++) {
 455                 String input_op_name = layer->input(input_id);
 456
 457                 if (input_op_name == layer_name) {
 458                     layer->set_input(input_id, removed_layer_input);
 459                 }
 460         }
 461     }
 462
 463     if (remove_from_net)
 464         net.mutable_node()->DeleteSubrange(layer_index, 1);
 465 }
 466
 467 class TFImporter
 468 {
 469 public:
 470     TFImporter(Net& net, const char *model, const char *config = NULL);
 471     TFImporter(Net& net, const char *dataModel, size_t lenModel,
 472                const char *dataConfig = NULL, size_t lenConfig = 0);
 473 protected:
 474     Net& dstNet;
 475     void populateNet();
 476
 477     void parseNode(const tensorflow::NodeDef& layer);
 478
 479     DataLayout predictOutputDataLayout(const tensorflow::NodeDef& layer);
 480
 481     void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob);
 482
 483     void connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
 484                  const int input_layer_id, const int input_blob_id);
 485     void connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
 486                            const int input_layer_id, const int input_blobs_count);
 487     const tensorflow::TensorProto& getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
 488                                                 int input_blob_index = -1, int* actual_inp_blob_idx = 0);
 489
 490
 491     // Binary serialized TensorFlow graph includes weights.
 492     tensorflow::GraphDef netBin;
 493     // Optional text definition of TensorFlow graph. More flexible than binary format
 494     // and may be used to build the network using binary format only as a weights storage.
 495     // This approach is similar to Caffe's `.prorotxt` and `.caffemodel`.
 496     tensorflow::GraphDef netTxt;
 497
 498     std::vector<String> netInputsNames;
 499     std::vector<MatShape> netInputShapes;
 500
 501     std::set<String> layers_to_ignore;
 502     std::map<String, DataLayout> data_layouts;
 503
 504     // find all Const layers for params
 505     std::map<String, int> value_id;
 506     // A map with constant blobs which are shared between multiple layers.
 507     std::map<String, Mat> sharedWeights;
 508
 509     std::map<String, int> layer_id;
 510
 511 private:
 512     void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId);
 513 };
 514
 515 TFImporter::TFImporter(Net& net, const char *model, const char *config)
 516     : dstNet(net)
 517 {
 518     if (model && model[0])
 519     {
 520         CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from file: " << model);
 521         ReadTFNetParamsFromBinaryFileOrDie(model, &netBin);
 522     }
 523     if (config && config[0])
 524     {
 525         CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from file: " << config);
 526         ReadTFNetParamsFromTextFileOrDie(config, &netTxt);
 527     }
 528
 529     populateNet();
 530 }
 531
 532 TFImporter::TFImporter(
 533         Net& net,
 534         const char *dataModel, size_t lenModel,
 535         const char *dataConfig, size_t lenConfig
 536 )
 537     : dstNet(net)
 538 {
 539     if (dataModel != NULL && lenModel > 0)
 540     {
 541         CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from memory (" << lenModel << " bytes)");
 542         ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin);
 543     }
 544     if (dataConfig != NULL && lenConfig > 0)
 545     {
 546         CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from memory (" << lenConfig << " bytes)");
 547         ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt);
 548     }
 549     populateNet();
 550 }
 551
 552 void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
 553 {
 554     MatShape shape;
 555     blobShapeFromTensor(tensor, shape);
 556     int dims = (int)shape.size();
 557
 558     // TODO: other blob types
 559     CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||
 560               tensor.dtype() == tensorflow::DT_HALF);
 561     CV_Assert(dims == 4 || dims == 5);
 562
 563     int out_c, input_c, depth, height, width;
 564     if (dims == 4)
 565     {
 566         // REORDER kernel HWIO to OIHW
 567         swap(shape[0], shape[2]); // IWHO
 568         swap(shape[1], shape[3]); // IOHW
 569         swap(shape[0], shape[1]); // OIHW
 570         depth = 1; height = shape[2]; width = shape[3];
 571     }
 572     else
 573     {
 574         // REORDER kernel DHWIO to OIDHW
 575         swap(shape[0], shape[4]); // OHWID
 576         swap(shape[1], shape[3]); // OIWHD
 577         swap(shape[2], shape[4]); // OIDHW
 578         depth = shape[2]; height = shape[3]; width = shape[4];
 579     }
 580     out_c = shape[0]; input_c = shape[1];
 581
 582     dstBlob.create(shape, CV_32F);
 583
 584     Mat tensorContent = getTensorContent(tensor, /*no copy*/false);
 585     int size = tensorContent.total();
 586     CV_Assert(size == (int)dstBlob.total());
 587
 588     float *dstData = dstBlob.ptr<float>();
 589     const float *data = reinterpret_cast<const float*>(tensorContent.data);
 590
 591     int total = out_c * input_c * depth * height * width;
 592     for (int i_oc = 0; i_oc < out_c; i_oc++) {
 593         for (int i_ic = 0; i_ic < input_c; i_ic++) {
 594             for (int i_d = 0; i_d < depth; i_d++) {
 595                 for (int i_h = 0; i_h < height; i_h++) {
 596                     for (int i_w = 0; i_w < width; i_w++) {
 597                         int dst_i = input_c * depth * height * width * i_oc +
 598                                     depth * height * width * i_ic + height * width * i_d + width * i_h + i_w;
 599                         int src_i = out_c * input_c * width * height * i_d +
 600                                     out_c * input_c * width * i_h + out_c * input_c * i_w + out_c * i_ic + i_oc;
 601                         CV_Assert(dst_i < total);
 602                         CV_Assert(src_i < total);
 603                        dstData[dst_i] = data[src_i];
 604                    }
 605                 }
 606             }
 607         }
 608     }
 609 }
 610
 611 void TFImporter::connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
 612              const int input_layer_id, const int input_blob_id)
 613 {
 614     std::map<String, int>::const_iterator it = layers_name_id_map.find(outPin.name);
 615     if (it == layers_name_id_map.end())
 616         CV_Error(Error::StsError, "Input layer not found: " + outPin.name);
 617
 618     std::vector<String>::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name);
 619     int blobIndex;
 620     if (inpNameIt == netInputsNames.end())
 621         blobIndex = outPin.blobIndex;
 622     else
 623         blobIndex = inpNameIt - netInputsNames.begin();
 624     network.connect(it->second, blobIndex, input_layer_id, input_blob_id);
 625 }
 626
 627 void TFImporter::connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
 628                      const int input_layer_id, const int input_blobs_count)
 629 {
 630     for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++)
 631         connect(layer_id, network, outPin, input_layer_id, input_blob_id);
 632 }
 633
 634 const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
 635                                               int input_blob_index, int* actual_inp_blob_idx) {
 636     if (input_blob_index == -1) {
 637         for(int i = 0; i < layer.input_size(); i++) {
 638             Pin input = parsePin(layer.input(i));
 639             if (const_layers.find(input.name) != const_layers.end()) {
 640                 if (input_blob_index != -1)
 641                     CV_Error(Error::StsError, "More than one input is Const op");
 642
 643                 input_blob_index = i;
 644             }
 645         }
 646     }
 647
 648     if (input_blob_index == -1)
 649         CV_Error(Error::StsError, "Const input blob for weights not found");
 650
 651     Pin kernel_inp = parsePin(layer.input(input_blob_index));
 652     if (const_layers.find(kernel_inp.name) == const_layers.end())
 653         CV_Error(Error::StsError, "Input [" + layer.input(input_blob_index) +
 654                                   "] for node [" + layer.name() + "] not found");
 655     if (kernel_inp.blobIndex != 0)
 656         CV_Error(Error::StsError, "Unsupported kernel input");
 657
 658     if(actual_inp_blob_idx) {
 659         *actual_inp_blob_idx = input_blob_index;
 660     }
 661
 662     int nodeIdx = const_layers.at(kernel_inp.name);
 663     if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name)
 664     {
 665         return netBin.node(nodeIdx).attr().at("value").tensor();
 666     }
 667     else
 668     {
 669         CV_Assert_N(nodeIdx < netTxt.node_size(),
 670                     netTxt.node(nodeIdx).name() == kernel_inp.name);
 671         return netTxt.node(nodeIdx).attr().at("value").tensor();
 672     }
 673 }
 674
 675 static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& const_layers,
 676                           std::set<String>& layers_to_ignore)
 677 {
 678     CV_LOG_DEBUG(NULL, "DNN/TF: addConstNodes(): handling " << net.node_size() << " nodes...");
 679     for (int li = 0; li < net.node_size(); li++)
 680     {
 681         const tensorflow::NodeDef &layer = net.node(li);
 682         String name = layer.name();
 683         String type = layer.op();
 684
 685         //CV_LOG_DEBUG(NULL, "DNN/TF: layer_id=" << li << " - '" << name << "' @ " << type);
 686
 687         try
 688         {
 689             if (type == "Dequantize")
 690             {
 691                 // Example of Dequantize node:
 692                 //   name: "conv2d_1/bias"
 693                 //   op: "Dequantize"
 694                 //   input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8)
 695                 //   input: "conv2d_1/bias_quantized_min"
 696                 //   input: "conv2d_1/bias_quantized_max"
 697                 //   attr { key: "T" value { type: DT_QUINT8 } }   (quantized type)
 698                 //   attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique)
 699                 CV_CheckEQ(layer.input_size(), 3, "Dequantize: 3 inputs is supported only");
 700                 for (int i = 0; i < 3; ++i)
 701                     CV_Assert(const_layers.find(layer.input(i)) != const_layers.end());
 702                 CV_Assert(hasLayerAttr(layer, "mode") &&
 703                           getLayerAttr(layer, "mode").s() == "MIN_FIRST");
 704
 705                 int tensorId = const_layers[layer.input(0)];
 706                 int minId = const_layers[layer.input(1)];
 707                 int maxId = const_layers[layer.input(2)];
 708
 709                 tensorflow::TensorProto* tensor = net.mutable_node(tensorId)
 710                                                     ->mutable_attr()->at("value")
 711                                                      .mutable_tensor();
 712                 CV_CheckEQ((int)tensor->dtype(), (int)tensorflow::DT_QUINT8, "");
 713
 714                 Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor());
 715                 Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor());
 716                 CV_CheckEQ(qMin.total(), (size_t)1, "");
 717                 CV_CheckTypeEQ(qMin.type(), CV_32FC1, "");
 718                 CV_CheckEQ(qMax.total(), (size_t)1, "");
 719                 CV_CheckTypeEQ(qMax.type(), CV_32FC1, "");
 720
 721                 Mat content = getTensorContent(*tensor);
 722
 723                 float minVal = qMin.at<float>(0);
 724                 float rangeScale = (qMax.at<float>(0) - minVal) / 255;
 725                 CV_Assert(rangeScale >= 0);
 726                 content.convertTo(content, CV_32FC1, rangeScale,
 727                                   rangeScale * cvRound(minVal / rangeScale));
 728
 729                 tensor->set_dtype(tensorflow::DT_FLOAT);
 730                 tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
 731
 732                 net.mutable_node(tensorId)->set_name(name);
 733                 CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
 734                 layers_to_ignore.insert(name);
 735                 continue;
 736             }
 737             else if (type != "Const")
 738                 continue;  // only Const parameters are supported
 739
 740             if (layer.attr().find("value") != layer.attr().end())
 741             {
 742                 CV_Assert(const_layers.insert(std::make_pair(name, li)).second);
 743             }
 744             layers_to_ignore.insert(name);
 745         }
 746         catch (const std::exception& e)
 747         {
 748             CV_LOG_ERROR(NULL, "DNN/TF: Can't handle node='" << name << "'. Exception: " << e.what());
 749             throw;
 750         }
 751     }
 752     CV_LOG_DEBUG(NULL, "DNN/TF: layers_to_ignore.size() = " << layers_to_ignore.size());
 753 }
 754
 755 // If all inputs of specific layer have the same data layout we can say that
 756 // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
 757 DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer)
 758 {
 759     DataLayout layout = getDataLayout(layer);
 760     if (layout != DATA_LAYOUT_UNKNOWN)
 761     {
 762         CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)");
 763         return layout;
 764     }
 765
 766     // Determine layout by layer's inputs
 767     for (int i = 0, n = layer.input_size(); i < n; ++i)
 768     {
 769         std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layer.input(i)));
 770         if (it != data_layouts.end())
 771         {
 772             if (layout != DATA_LAYOUT_UNKNOWN)
 773             {
 774                 if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN)
 775                     return DATA_LAYOUT_UNKNOWN;
 776             }
 777             else
 778                 layout = it->second;
 779         }
 780     }
 781
 782     if (layout != DATA_LAYOUT_UNKNOWN)
 783     {
 784         CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)");
 785         return layout;
 786     }
 787
 788     // Determine layout by layer's consumers recursively.
 789     std::map<String, DataLayout>::const_iterator it = data_layouts.find(layer.name());
 790     CV_Assert(it != data_layouts.end());
 791     return it->second;
 792 }
 793
 794 void TFImporter::populateNet()
 795 {
 796     CV_Assert(netBin.ByteSize() || netTxt.ByteSize());
 797
 798     CV_LOG_INFO(NULL, "DNN/TF: parsing model"
 799         << (netBin.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netBin.versions().producer(), (int)netBin.versions().min_consumer()) : cv::String(" (N/A version info)"))
 800         << ". Number of nodes = " << netBin.node_size()
 801     );
 802
 803     if (netTxt.ByteSize())
 804     {
 805         CV_LOG_INFO(NULL, "DNN/TF: parsing config"
 806             << (netTxt.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netTxt.versions().producer(), (int)netTxt.versions().min_consumer()) : cv::String(" (N/A version info)"))
 807             << ". Number of nodes = " << netTxt.node_size()
 808         );
 809
 810         RemoveIdentityOps(netBin);
 811         CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes");
 812         RemoveIdentityOps(netTxt);
 813         CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(config) => " << netTxt.node_size() << " nodes");
 814
 815         sortByExecutionOrder(netTxt);
 816         CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(config) => " << netTxt.node_size() << " nodes");
 817     }
 818     else
 819     {
 820         removePhaseSwitches(netBin);
 821         CV_LOG_DEBUG(NULL, "DNN/TF: removePhaseSwitches(model) => " << netBin.node_size() << " nodes");
 822
 823         RemoveIdentityOps(netBin);
 824         CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes");
 825
 826         simplifySubgraphs(netBin);
 827         CV_LOG_DEBUG(NULL, "DNN/TF: simplifySubgraphs(model) => " << netBin.node_size() << " nodes");
 828         sortByExecutionOrder(netBin);
 829         CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(model) => " << netBin.node_size() << " nodes");
 830     }
 831
 832     tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
 833
 834     int layersSize = net.node_size();
 835
 836     // Pre-fill data layouts where they are set explicitly.
 837     // Assuming that nodes are in topological order
 838     for (int i = layersSize - 1; i >= 0; --i)
 839     {
 840         const tensorflow::NodeDef& layer = net.node(i);
 841         std::string name = layer.name();
 842
 843         CV_LOG_DEBUG(NULL, "DNN/TF: node(" << i << " - '" << name << "') propagating layout...");
 844
 845         try
 846         {
 847             DataLayout layout = getDataLayout(layer);
 848             std::map<String, DataLayout>::iterator it = data_layouts.find(name);
 849             if (it != data_layouts.end())
 850             {
 851                 if (layout != DATA_LAYOUT_UNKNOWN)
 852                 {
 853                     if (it->second == DATA_LAYOUT_UNKNOWN)
 854                         it->second = layout;
 855                     else if (it->second != layout)
 856                     {
 857                         it->second = DATA_LAYOUT_UNKNOWN;
 858                         layout = DATA_LAYOUT_UNKNOWN;
 859                     }
 860                 }
 861                 else
 862                     layout = it->second;
 863             }
 864             else
 865                 data_layouts[name] = layout;
 866
 867             // Specify input layers to have the same data layout.
 868             for (int j = 0; j < layer.input_size(); ++j)
 869             {
 870                 name = getNodeName(layer.input(j));
 871                 it = data_layouts.find(name);
 872                 if (it != data_layouts.end())
 873                 {
 874                     if (layout != DATA_LAYOUT_UNKNOWN)
 875                     {
 876                         if (it->second == DATA_LAYOUT_UNKNOWN)
 877                             it->second = layout;
 878                         else if (it->second != layout)
 879                             it->second = DATA_LAYOUT_UNKNOWN;
 880                     }
 881                 }
 882                 else
 883                     data_layouts[name] = layout;
 884             }
 885         }
 886         catch (const std::exception& e)
 887         {
 888             CV_LOG_ERROR(NULL, "DNN/TF: Can't propagate layout for node='" << name << "'. Exception: " << e.what());
 889             throw;
 890         }
 891     }
 892
 893     addConstNodes(netBin, value_id, layers_to_ignore);
 894     addConstNodes(netTxt, value_id, layers_to_ignore);
 895
 896
 897     for (int li = 0; li < layersSize; li++)
 898     {
 899         const tensorflow::NodeDef& layer = net.node(li);
 900
 901         const std::string name = layer.name();
 902         const std::string type = layer.op();
 903         const int ninputs = layer.input_size();
 904         CV_LOG_DEBUG(NULL, "DNN/TF: (" << li << "/" << layersSize << ") Parse layer " << name << " @ " << type << " with " << ninputs << " inputs");
 905
 906         parseNode(layer);
 907     }
 908
 909     for (size_t i = 0; i < netInputsNames.size(); i++)
 910     {
 911         CV_LOG_DEBUG(NULL, "DNN/TF: Model input: " << i << " - '" << netInputsNames[i] << "'");
 912         CV_Assert(!netInputsNames[i].empty());
 913     }
 914     dstNet.setInputsNames(netInputsNames);
 915     CV_LOG_DEBUG(NULL, "DNN/TF: ===================== Import completed =====================");
 916 }
 917
 918 void TFImporter::addPermuteLayer(const int* order, const std::string& permName, Pin& inpId)
 919 {
 920     LayerParams permLP;
 921     permLP.set("order", DictValue::arrayInt<const int*>(order, 4));
 922     CV_Assert(layer_id.find(permName) == layer_id.end());
 923     int permId = dstNet.addLayer(permName, "Permute", permLP);
 924     layer_id[permName] = permId;
 925     connect(layer_id, dstNet, inpId, permId, 0);
 926     inpId = Pin(permName);
 927 }
 928
 929 void TFImporter::parseNode(const tensorflow::NodeDef& layer_)
 930 {
 931     tensorflow::NodeDef layer = layer_;
 932
 933     tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
 934
 935     /*const*/ std::string name = layer.name();
 936     /*const*/ std::string type = layer.op();
 937     /*const*/ int num_inputs = layer.input_size();
 938
 939     try
 940     {
 941         LayerParams layerParams;
 942
 943         if (layers_to_ignore.find(name) != layers_to_ignore.end())
 944         {
 945             CV_LOG_DEBUG(NULL, "DNN/TF:     ignored");
 946             return;
 947         }
 948
 949         DataLayout predictedLayout = predictOutputDataLayout(layer);
 950         data_layouts[name] = predictedLayout;
 951
 952         if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad" || type == "MirrorPad" || type == "Conv3D")
 953         {
 954             CV_CheckGT(num_inputs, 0, "");
 955             // The first node of dilated convolution subgraph.
 956             // Extract input node, dilation rate and paddings.
 957             std::string input = layer.input(0);
 958             StrIntVector next_layers;
 959             if (type == "SpaceToBatchND" || type == "Pad")
 960             {
 961                 next_layers = getNextLayers(net, name, "Conv2D");
 962                 if (next_layers.empty())
 963                     next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
 964             }
 965
 966             if (type == "SpaceToBatchND")
 967             {
 968                 // op: "SpaceToBatchND"
 969                 // input: "input"
 970                 // input: "SpaceToBatchND/block_shape"
 971                 // input: "SpaceToBatchND/paddings"
 972                 CV_CheckEQ(num_inputs, 3, "");
 973
 974                 DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
 975                 CV_Assert(dilation.size() == 2);
 976                 layerParams.set("dilation_h", dilation.get<int>(0));
 977                 layerParams.set("dilation_w", dilation.get<int>(1));
 978
 979                 Mat paddings;
 980                 parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
 981
 982                 // paddings is a 2x2 matrix: [[top, bot], [left, right]]
 983                 layerParams.set("pad_h", paddings.at<float>(0));
 984                 layerParams.set("pad_w", paddings.at<float>(2));
 985
 986                 CV_Assert(next_layers.size() == 1);
 987                 layers_to_ignore.insert(next_layers[0].first);
 988
 989                 // FIXIT don't override, rewrite this code
 990                 layer = net.node(next_layers[0].second);
 991                 name = layer.name();
 992                 type = layer.op();
 993                 num_inputs = layer.input_size();
 994                 CV_LOG_DEBUG(NULL, "DNN/TF:     switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs");
 995             }
 996             else if (type == "Pad" || type == "MirrorPad")
 997             {
 998                 Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
 999                 CV_Assert(paddings.type() == CV_32SC1);
1000                 if (paddings.total() == 8)
1001                 {
1002                     // Perhaps, we have NHWC padding dimensions order.
1003                     //  N    H    W    C
1004                     // 0 1  2 3  4 5  6 7
1005                     std::swap(paddings.at<int32_t>(2), paddings.at<int32_t>(6));
1006                     std::swap(paddings.at<int32_t>(3), paddings.at<int32_t>(7));
1007                     //  N    C    W    H
1008                     // 0 1  2 3  4 5  6 7
1009                     std::swap(paddings.at<int32_t>(4), paddings.at<int32_t>(6));
1010                     std::swap(paddings.at<int32_t>(5), paddings.at<int32_t>(7));
1011                     //  N    C    H    W
1012                     // 0 1  2 3  4 5  6 7
1013                 }
1014
1015                 if (next_layers.empty() || paddings.total() != 8 ||
1016                     paddings.at<int32_t>(4) != paddings.at<int32_t>(5) ||
1017                     paddings.at<int32_t>(6) != paddings.at<int32_t>(7) || type == "MirrorPad")
1018                 {
1019                     // Just a single padding layer.
1020                     layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
1021                     if (type == "MirrorPad")
1022                         layerParams.set("type", "reflect");
1023
1024                     int id = dstNet.addLayer(name, "Padding", layerParams);
1025                     layer_id[name] = id;
1026
1027                     connect(layer_id, dstNet, parsePin(input), id, 0);
1028                     return;
1029                 }
1030                 else
1031                 {
1032                     // Merge with subsequent convolutional layer.
1033                     CV_Assert(next_layers.size() == 1);
1034
1035                     layerParams.set("pad_h", paddings.at<int32_t>(4));
1036                     layerParams.set("pad_w", paddings.at<int32_t>(6));
1037
1038                     layers_to_ignore.insert(next_layers[0].first);
1039
1040                     // FIXIT don't override, rewrite this code
1041                     layer = net.node(next_layers[0].second);
1042                     name = layer.name();
1043                     type = layer.op();
1044                     num_inputs = layer.input_size();
1045                     CV_LOG_DEBUG(NULL, "DNN/TF:     switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs");
1046                 }
1047             }
1048
1049             // For the object detection networks, TensorFlow Object Detection API
1050             // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
1051             // order. We can manage it at DetectionOutput layer parsing predictions
1052             // or shuffle last convolution's weights.
1053             bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&
1054                                      getLayerAttr(layer, "loc_pred_transposed").b();
1055
1056             layerParams.set("bias_term", false);
1057             layerParams.blobs.resize(1);
1058
1059             next_layers = getNextLayers(net, name, "BiasAdd");
1060             if (next_layers.size() == 1) {
1061                 layerParams.set("bias_term", true);
1062                 layerParams.blobs.resize(2);
1063
1064                 int weights_layer_index = next_layers[0].second;
1065
1066                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1067                 ExcludeLayer(net, weights_layer_index, 0, false);
1068                 layers_to_ignore.insert(next_layers[0].first);
1069
1070                 // Shuffle bias from yxYX to xyXY.
1071                 if (locPredTransposed)
1072                 {
1073                     const int numWeights = layerParams.blobs[1].total();
1074                     float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);
1075                     CV_Assert(numWeights % 4 == 0);
1076                     for (int i = 0; i < numWeights; i += 2)
1077                     {
1078                         std::swap(biasData[i], biasData[i + 1]);
1079                     }
1080                 }
1081             }
1082
1083             int kernelTensorInpId = -1;
1084             const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernelTensorInpId);
1085             const String kernelTensorName = layer.input(kernelTensorInpId);
1086             std::map<String, Mat>::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName);
1087             if (sharedWeightsIt == sharedWeights.end())
1088             {
1089                 kernelFromTensor(kernelTensor, layerParams.blobs[0]);
1090                 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
1091
1092                 int* kshape = layerParams.blobs[0].size.p;
1093                 const int outCh = kshape[0];
1094                 const int inCh = kshape[1];
1095                 const int height = kshape[2];
1096                 const int width = kshape[3];
1097                 if (type == "DepthwiseConv2dNative")
1098                 {
1099                     CV_Assert(!locPredTransposed);
1100                     const int chMultiplier = kshape[0];
1101
1102                     Mat copy = layerParams.blobs[0].clone();
1103                     float* src = (float*)copy.data;
1104                     float* dst = (float*)layerParams.blobs[0].data;
1105                     for (int i = 0; i < chMultiplier; ++i)
1106                         for (int j = 0; j < inCh; ++j)
1107                             for (int s = 0; s < height * width; ++s)
1108                                 {
1109                                     int src_i = (i * inCh + j) * height * width + s;
1110                                     int dst_i = (j * chMultiplier + i) * height* width + s;
1111                                     dst[dst_i] = src[src_i];
1112                                 }
1113                     // TODO Use reshape instead
1114                     kshape[0] = inCh * chMultiplier;
1115                     kshape[1] = 1;
1116                     size_t* kstep = layerParams.blobs[0].step.p;
1117                     kstep[0] = kstep[1]; // fix steps too
1118                 }
1119
1120                 // Shuffle output channels from yxYX to xyXY.
1121                 if (locPredTransposed)
1122                 {
1123                     const int slice = height * width * inCh;
1124                     for (int i = 0; i < outCh; i += 2)
1125                     {
1126                         cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i));
1127                         cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i + 1));
1128                         std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());
1129                     }
1130                 }
1131                 sharedWeights[kernelTensorName] = layerParams.blobs[0];
1132             }
1133             else
1134             {
1135                 layerParams.blobs[0] = sharedWeightsIt->second;
1136             }
1137             Mat weights = layerParams.blobs[0];
1138             layerParams.set("kernel_size",  DictValue::arrayInt(&weights.size[2], weights.dims - 2));
1139
1140             layerParams.set("num_output", layerParams.blobs[0].size[0]);
1141
1142             setStrides(layerParams, layer);
1143             if (!layerParams.has("pad_w") && !layerParams.has("pad_h"))
1144                 setPadding(layerParams, layer);
1145
1146             // The final node of dilated convolution subgraph.
1147             next_layers = getNextLayers(net, name, "BatchToSpaceND");
1148             if (!next_layers.empty())
1149             {
1150                 CV_Assert(next_layers.size() == 1);
1151                 ExcludeLayer(net, next_layers[0].second, 0, false);
1152                 layers_to_ignore.insert(next_layers[0].first);
1153             }
1154
1155             int id = dstNet.addLayer(name, "Convolution", layerParams);
1156             layer_id[name] = id;
1157
1158             // one input only
1159             connect(layer_id, dstNet, parsePin(input), id, 0);
1160
1161
1162             if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN)
1163                 data_layouts[name] = DATA_LAYOUT_NHWC;
1164         }
1165         else if (type == "BiasAdd" || type == "Add" || type == "AddV2" || type == "Sub" || type=="AddN")
1166         {
1167             CV_CheckGT(num_inputs, 0, "");
1168             bool haveConst = false;
1169             for(int ii = 0; !haveConst && ii < num_inputs; ++ii)
1170             {
1171                 Pin input = parsePin(layer.input(ii));
1172                 haveConst = value_id.find(input.name) != value_id.end();
1173             }
1174             CV_Assert(!haveConst || num_inputs == 2);
1175
1176             if (haveConst)
1177             {
1178                 Mat values = getTensorContent(getConstBlob(layer, value_id));
1179                 CV_Assert(values.type() == CV_32FC1);
1180                 if (type == "Sub")
1181                     values *= -1.0f;
1182
1183                 int id;
1184                 if (values.total() == 1)  // is a scalar.
1185                 {
1186                     layerParams.set("shift", values.at<float>(0));
1187                     id = dstNet.addLayer(name, "Power", layerParams);
1188                 }
1189                 else  // is a vector
1190                 {
1191                     layerParams.blobs.resize(1, values);
1192                     id = dstNet.addLayer(name, "Shift", layerParams);
1193                 }
1194                 layer_id[name] = id;
1195
1196                 // one input only
1197                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1198             }
1199             else
1200             {
1201                 layerParams.set("operation", "sum");
1202                 if (type == "Sub")
1203                 {
1204                     static float subCoeffs[] = {1.f, -1.f};
1205                     layerParams.set("coeff", DictValue::arrayReal<float*>(subCoeffs, 2));
1206                 }
1207
1208                 int id = dstNet.addLayer(name, "Eltwise", layerParams);
1209                 layer_id[name] = id;
1210
1211                 for (int ii = 0; ii < num_inputs; ii++)
1212                 {
1213                     Pin inp = parsePin(layer.input(ii));
1214                     if (layer_id.find(inp.name) == layer_id.end())
1215                         CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1216                     connect(layer_id, dstNet, inp, id, ii);
1217                 }
1218             }
1219         }
1220         else if (type == "MatMul")
1221         {
1222             CV_CheckEQ(num_inputs, 2, "");
1223
1224             // For the object detection networks, TensorFlow Object Detection API
1225             // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
1226             // order. We can manage it at DetectionOutput layer parsing predictions
1227             // or shuffle last Faster-RCNN's matmul weights.
1228             bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&
1229                                      getLayerAttr(layer, "loc_pred_transposed").b();
1230
1231             layerParams.set("bias_term", false);
1232             layerParams.blobs.resize(1);
1233
1234             StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");  // FIXIT Use layers fusion instead
1235             if (next_layers.empty())
1236             {
1237                 next_layers = getNextLayers(net, name, "Add");
1238             }
1239             if (next_layers.size() == 1) {
1240                 layerParams.set("bias_term", true);
1241                 layerParams.blobs.resize(2);
1242
1243                 int weights_layer_index = next_layers[0].second;
1244                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1245                 ExcludeLayer(net, weights_layer_index, 0, false);
1246                 layers_to_ignore.insert(next_layers[0].first);
1247
1248                 if (locPredTransposed)
1249                 {
1250                     const int numWeights = layerParams.blobs[1].total();
1251                     float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);
1252                     CV_Assert(numWeights % 4 == 0);
1253                     for (int i = 0; i < numWeights; i += 2)
1254                     {
1255                         std::swap(biasData[i], biasData[i + 1]);
1256                     }
1257                 }
1258             }
1259
1260             int kernel_blob_index = -1;
1261             const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index);
1262             const String kernelTensorName = layer.input(kernel_blob_index);
1263             std::map<String, Mat>::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName);
1264             if (sharedWeightsIt == sharedWeights.end())
1265             {
1266                 blobFromTensor(kernelTensor, layerParams.blobs[0]);
1267                 releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
1268                 sharedWeights[kernelTensorName] = layerParams.blobs[0];
1269             }
1270             else
1271             {
1272                 layerParams.blobs[0] = sharedWeightsIt->second;
1273             }
1274
1275             if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed
1276                 Mat data = layerParams.blobs[0].t();
1277                 layerParams.blobs[0] = data.clone();
1278             }
1279
1280             layerParams.set("num_output", layerParams.blobs[0].size[0]);
1281             if (locPredTransposed)
1282             {
1283                 CV_Assert(layerParams.blobs[0].dims == 2);
1284                 for (int i = 0; i < layerParams.blobs[0].size[0]; i += 2)
1285                 {
1286                     cv::Mat src = layerParams.blobs[0].row(i);
1287                     cv::Mat dst = layerParams.blobs[0].row(i + 1);
1288                     std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());
1289                 }
1290             }
1291
1292             int id = dstNet.addLayer(name, "InnerProduct", layerParams);
1293             layer_id[name] = id;
1294
1295             // one input only
1296             int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
1297             connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
1298             data_layouts[name] = DATA_LAYOUT_PLANAR;
1299         }
1300         else if (type == "Reshape")
1301         {
1302             CV_CheckGT(num_inputs, 0, "");
1303             Pin inpId = parsePin(layer.input(0));
1304             DataLayout inpLayout = getDataLayout(layer.input(0), data_layouts);
1305             // There are two possible implementations: reshape an input using
1306             // predefined sizes or use a second input blob as a source of new shape.
1307             if (value_id.find(layer.input(1)) != value_id.end())
1308             {
1309                 Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));
1310                 int newShapeSize = newShape.total();
1311                 bool hasSwap = false;
1312                 if (newShapeSize == 4 && hasAllOnes(newShape, 0, 2))
1313                 {
1314                     // NHWC->NCHW
1315                     std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
1316                     std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));
1317                     hasSwap = true;
1318                 }
1319                 if (inpLayout == DATA_LAYOUT_NHWC)
1320                 {
1321                     if (newShapeSize >= 2 || newShape.at<int>(1) == 1)
1322                     {
1323                         int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
1324                         addPermuteLayer(order, name + "/nhwc", inpId);
1325                         if (newShapeSize < 4)
1326                         {
1327                             inpLayout = DATA_LAYOUT_NCHW;
1328                         }
1329                         else
1330                         {
1331                             inpLayout = DATA_LAYOUT_NHWC;
1332                         }
1333                     }
1334                 }
1335                 layerParams.set("dim", DictValue::arrayInt<int*>(newShape.ptr<int>(), newShapeSize));
1336
1337                 int id = dstNet.addLayer(name, "Reshape", layerParams);
1338                 layer_id[name] = id;
1339
1340                 // one input only
1341                 connect(layer_id, dstNet, inpId, id, 0);
1342                 inpId = Pin(name);
1343
1344                 if ((inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_UNKNOWN || inpLayout == DATA_LAYOUT_PLANAR) &&
1345                     newShapeSize == 4 && !hasSwap)
1346                 {
1347                     int order[] = {0, 3, 1, 2};  // Transform back to OpenCV's NCHW.
1348                     addPermuteLayer(order, name + "/nchw", inpId);
1349                     inpLayout = DATA_LAYOUT_NCHW;
1350                 }
1351
1352                 data_layouts[name] = newShapeSize == 2 ? DATA_LAYOUT_PLANAR : inpLayout;
1353             }
1354             else
1355             {
1356                 int id = dstNet.addLayer(name, "Reshape", layerParams);
1357                 layer_id[name] = id;
1358                 connect(layer_id, dstNet, inpId, id, 0);
1359                 connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1360                 data_layouts[name] = inpLayout;
1361             }
1362         }
1363         else if (type == "Flatten" || type == "Squeeze")
1364         {
1365             CV_CheckGT(num_inputs, 0, "");
1366             Pin inpId = parsePin(layer.input(0));
1367             int inpLayout = getDataLayout(layer.input(0), data_layouts);
1368             if (type == "Squeeze")
1369             {
1370                 CV_Assert(hasLayerAttr(layer, "squeeze_dims"));
1371                 const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims");
1372                 std::vector<int> dimsVector(dims.list().i_size());
1373                 for (int i = 0; i < dimsVector.size(); ++i)
1374                     dimsVector[i] = dims.list().i(i);
1375
1376                 // Flatten layer can squeeze dimensions range into one.
1377                 std::sort(dimsVector.begin(), dimsVector.end());
1378                 for (int i = 1; i < dimsVector.size(); ++i)
1379                 {
1380                     if (dimsVector[i] != dimsVector[i - 1] + 1)
1381                         CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1382                 }
1383                 int start = dimsVector.front() - 1, end = dimsVector.back();
1384                 if (start == -1 && end == 0)  // squeeze 0th dimension
1385                 {
1386                     start = 0;
1387                     end = 1;
1388                 }
1389                 layerParams.set("axis", start);
1390                 layerParams.set("end_axis", end);
1391             }
1392             if (inpLayout == DATA_LAYOUT_NHWC)
1393             {
1394                 LayerParams permLP;
1395                 int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
1396                 permLP.set("order", DictValue::arrayInt<int*>(order, 4));
1397
1398                 std::string permName = name + "/nchw";
1399                 CV_Assert(layer_id.find(permName) == layer_id.end());
1400                 int permId = dstNet.addLayer(permName, "Permute", permLP);
1401                 layer_id[permName] = permId;
1402                 connect(layer_id, dstNet, inpId, permId, 0);
1403                 inpId = Pin(permName);
1404             }
1405             int id = dstNet.addLayer(name, "Flatten", layerParams);
1406             layer_id[name] = id;
1407             connect(layer_id, dstNet, inpId, id, 0);
1408             data_layouts[name] = DATA_LAYOUT_PLANAR;
1409         }
1410         else if (type == "Transpose")
1411         {
1412             CV_CheckGT(num_inputs, 0, "");
1413             Mat perm = getTensorContent(getConstBlob(layer, value_id, 1));
1414             CV_Assert(perm.type() == CV_32SC1);
1415             int* permData = (int*)perm.data;
1416             if (perm.total() == 4)
1417             {
1418                 // Only NHWC <-> NCHW permutations are allowed. OpenCV is always
1419                 // keep NCHW layout this way.
1420                 int inpLayout = getDataLayout(layer.input(0), data_layouts);
1421                 std::string type = "Identity";
1422                 if (inpLayout == DATA_LAYOUT_NHWC)
1423                 {
1424                     if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
1425                     {
1426                         // in TensorFlow: NHWC->NCHW
1427                         // in OpenCV: NCHW->NCHW
1428                         data_layouts[name] = DATA_LAYOUT_NCHW;
1429                     }
1430                     else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1431                     {
1432                         // in TensorFlow: NHWC->NHWC
1433                         // in OpenCV: NCHW->NCHW
1434                         data_layouts[name] = DATA_LAYOUT_NHWC;
1435                     }
1436                     else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1)
1437                     {
1438                         // in TensorFlow: NHWC->NCWH
1439                         // in OpenCV: NCHW->NCWH
1440                         int permData[] = {0, 1, 3, 2};
1441                         layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
1442                         data_layouts[name] = DATA_LAYOUT_NCHW;  // we keep track NCHW because channels position only matters
1443                         type = "Permute";
1444                     }
1445                     else
1446                         CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1447                 }
1448                 else if (inpLayout == DATA_LAYOUT_NCHW)
1449                 {
1450                     if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
1451                     {
1452                         // in TensorFlow: NCHW->NHWC
1453                         // in OpenCV: NCHW->NCHW
1454                         data_layouts[name] = DATA_LAYOUT_NHWC;
1455                     }
1456                     else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1457                     {
1458                         // in TensorFlow: NCHW->NCHW
1459                         // in OpenCV: NCHW->NCHW
1460                         data_layouts[name] = DATA_LAYOUT_NCHW;
1461                     }
1462                     else
1463                         CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1464                 }
1465                 int id = dstNet.addLayer(name, type, layerParams);
1466                 layer_id[name] = id;
1467                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1468             }
1469             else
1470             {
1471                 layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
1472
1473                 int id = dstNet.addLayer(name, "Permute", layerParams);
1474                 layer_id[name] = id;
1475
1476                 // one input only
1477                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1478                 data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1479             }
1480         }
1481         else if (type == "Const")
1482         {
1483         }
1484         else if (type == "LRN")
1485         {
1486             CV_CheckGT(num_inputs, 0, "");
1487             if(hasLayerAttr(layer, "alpha")) {
1488                 layerParams.set("alpha", getLayerAttr(layer, "alpha").f());
1489             }
1490             if(hasLayerAttr(layer, "beta")) {
1491                 layerParams.set("beta", getLayerAttr(layer, "beta").f());
1492             }
1493             if(hasLayerAttr(layer, "depth_radius")) {
1494                 int radius = (int)getLayerAttr(layer, "depth_radius").i();
1495                 layerParams.set("local_size", 2*radius + 1);
1496             }
1497             if(hasLayerAttr(layer, "bias")) {
1498                 layerParams.set("bias", getLayerAttr(layer, "bias").f());
1499             }
1500             layerParams.set("norm_by_size", false);
1501
1502             int id = dstNet.addLayer(name, "LRN", layerParams);
1503             layer_id[name] = id;
1504
1505             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
1506         }
1507         else if (type == "Concat" || type == "ConcatV2")
1508         {
1509             CV_CheckGT(num_inputs, 0, "");
1510             int axisId = (type == "Concat" ? 0 : num_inputs - 1);
1511             int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
1512
1513             if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1514                 axis = toNCHW(axis);
1515             else if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NDHWC)
1516                 axis = toNCDHW(axis);
1517             layerParams.set("axis", axis);
1518
1519             // input(0) or input(n-1) is concat_dim
1520             int from = (type == "Concat" ? 1 : 0);
1521             int to = (type == "Concat" ? num_inputs : num_inputs - 1);
1522
1523             for (int ii = from; ii < to; ii++)
1524             {
1525                 Pin inp = parsePin(layer.input(ii));
1526                 if (layer_id.find(inp.name) == layer_id.end())
1527                 {
1528                     // There are constant inputs.
1529                     LayerParams lp;
1530                     lp.name = inp.name;
1531                     lp.type = "Const";
1532                     lp.blobs.resize(1);
1533                     blobFromTensor(getConstBlob(layer, value_id, ii), lp.blobs.back());
1534                     CV_Assert_N(!lp.blobs[0].empty(), lp.blobs[0].type() == CV_32F);
1535
1536                     int constInpId = dstNet.addLayer(lp.name, lp.type, lp);
1537                     layer_id[lp.name] = constInpId;
1538                 }
1539             }
1540
1541             int id = dstNet.addLayer(name, "Concat", layerParams);
1542             layer_id[name] = id;
1543
1544             for (int ii = from; ii < to; ii++)
1545             {
1546                 Pin inp = parsePin(layer.input(ii));
1547                 if (layer_id.find(inp.name) == layer_id.end())
1548                     CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1549                 connect(layer_id, dstNet, inp, id, ii - from);
1550             }
1551         }
1552         else if (type == "MaxPool" || type == "MaxPool3D")
1553         {
1554             CV_CheckGT(num_inputs, 0, "");
1555             layerParams.set("pool", "max");
1556
1557             setKSize(layerParams, layer);
1558             setStrides(layerParams, layer);
1559             setPadding(layerParams, layer);
1560             // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU
1561             layerParams.set("ceil_mode", false);
1562
1563             int id = dstNet.addLayer(name, "Pooling", layerParams);
1564             layer_id[name] = id;
1565
1566             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
1567         }
1568         else if (type == "AvgPool" || type == "AvgPool3D")
1569         {
1570             CV_CheckGT(num_inputs, 0, "");
1571             layerParams.set("pool", "ave");
1572             layerParams.set("ave_pool_padded_area", false);
1573             setKSize(layerParams, layer);
1574             setStrides(layerParams, layer);
1575             setPadding(layerParams, layer);
1576
1577             int id = dstNet.addLayer(name, "Pooling", layerParams);
1578             layer_id[name] = id;
1579
1580             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
1581         }
1582         else if (type == "MaxPoolGrad")
1583         {
1584             CV_CheckEQ(num_inputs, 3, "");
1585
1586             layerParams.set("pool_k_h", 0);
1587             layerParams.set("pool_k_w", 0);
1588             layerParams.set("pool_stride_h", 0);
1589             layerParams.set("pool_stride_w", 0);
1590             layerParams.set("pool_pad_h", 0);
1591             layerParams.set("pool_pad_w", 0);
1592
1593             int id = dstNet.addLayer(name, "MaxUnpool", layerParams);
1594             layer_id[name] = id;
1595
1596             connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
1597             connect(layer_id, dstNet, parsePin(layer.input(1) + ":1"), id, 1);
1598             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 2);
1599         }
1600         else if (type == "Placeholder")
1601         {
1602             if (!hasLayerAttr(layer, "dtype") ||
1603                 getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL)  // If input is not a train/test flag.
1604             {
1605                 netInputsNames.push_back(name);
1606                 layer_id[name] = 0;
1607             }
1608             tensorflow::TensorShapeProto shape;
1609             if (hasLayerAttr(layer, "shape"))
1610                 shape = getLayerAttr(layer, "shape").shape();
1611             else if (hasLayerAttr(layer, "_output_shapes"))
1612             {
1613                 tensorflow::AttrValue_ListValue list = getLayerAttr(layer, "_output_shapes").list();
1614                 if (list.shape_size())
1615                     shape = list.shape()[0];
1616             }
1617             if (shape.dim_size())
1618             {
1619                 MatShape dims(shape.dim_size());
1620                 for (int i = 0; i < dims.size(); ++i)
1621                     dims[i] = shape.dim(i).size();
1622                 if (dims.size() == 4 && predictedLayout == DATA_LAYOUT_NHWC)
1623                 {
1624                     std::swap(dims[1], dims[3]);  // NHWC->NCWH
1625                     std::swap(dims[2], dims[3]);  // NCWH->NCHW
1626                     if (dims[0] == -1)  // It's OK to have undetermined batch size
1627                         dims[0] = 1;
1628                 }
1629                 bool hasNeg = false;
1630                 for (int i = 0; i < dims.size() && !hasNeg; ++i)
1631                 {
1632                     hasNeg = dims[i] < 0;
1633                 }
1634                 if (!hasNeg)
1635                     netInputShapes.push_back(dims);
1636             }
1637         }
1638         else if (type == "Split") {
1639             // TODO: determining axis index remapping by input dimensions order of input blob
1640             // TODO: slicing input may be Const op
1641             // TODO: slicing kernels for convolutions - in current implementation it is impossible
1642             // TODO: add parsing num of slices parameter
1643             CV_CheckEQ(num_inputs, 2, "");
1644             // num_split
1645             // 1st blob is dims tensor
1646             int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
1647             if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1648                 axis = toNCHW(axis);
1649             layerParams.set("axis", axis);
1650
1651             if (hasLayerAttr(layer, "num_split"))
1652                 layerParams.set("num_split", getLayerAttr(layer, "num_split").i());
1653
1654             int id = dstNet.addLayer(name, "Slice", layerParams);
1655             layer_id[name] = id;
1656
1657             // one input only
1658             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1659         }
1660         else if (type == "Slice")
1661         {
1662             // op: "Slice"
1663             // input: "input_node"
1664             // input: "Slice/begin"
1665             // input: "Slice/size"
1666             CV_CheckEQ(num_inputs, 3, "");
1667             Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));
1668             Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2));
1669             CV_Assert_N(!begins.empty(), !sizes.empty());
1670             CV_CheckTypeEQ(begins.type(), CV_32SC1, "");
1671             CV_CheckTypeEQ(sizes.type(), CV_32SC1, "");
1672
1673             if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1674             {
1675                 // Swap NHWC parameters' order to NCHW.
1676                 std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
1677                 std::swap(*begins.ptr<int32_t>(0, 1), *begins.ptr<int32_t>(0, 2));
1678                 std::swap(*sizes.ptr<int32_t>(0, 2), *sizes.ptr<int32_t>(0, 3));
1679                 std::swap(*sizes.ptr<int32_t>(0, 1), *sizes.ptr<int32_t>(0, 2));
1680             }
1681             layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
1682             layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total()));
1683
1684             int id = dstNet.addLayer(name, "Slice", layerParams);
1685             layer_id[name] = id;
1686
1687             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1688         }
1689         else if (type == "StridedSlice")
1690         {
1691             CV_CheckEQ(num_inputs, 4, "");
1692             Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));
1693             Mat ends = getTensorContent(getConstBlob(layer, value_id, 2));
1694             Mat strides = getTensorContent(getConstBlob(layer, value_id, 3));
1695             CV_CheckTypeEQ(begins.type(), CV_32SC1, "");
1696             CV_CheckTypeEQ(ends.type(), CV_32SC1, "");
1697             CV_CheckTypeEQ(strides.type(), CV_32SC1, "");
1698             const int num = begins.total();
1699             CV_Assert_N(num == ends.total(), num == strides.total());
1700
1701             int end_mask = getLayerAttr(layer, "end_mask").i();
1702             for (int i = 0; i < num; ++i)
1703             {
1704                 if (ends.at<int>(i) < 0)
1705                     ends.at<int>(i) -= 1;
1706                 if (end_mask & (1 << i))
1707                     ends.at<int>(i) = -1;
1708                 if (strides.at<int>(i) != 1)
1709                     CV_Error(Error::StsNotImplemented,
1710                              format("StridedSlice with stride %d", strides.at<int>(i)));
1711             }
1712             if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1713             {
1714                 // Swap NHWC parameters' order to NCHW.
1715                 std::swap(begins.at<int>(2), begins.at<int>(3));
1716                 std::swap(begins.at<int>(1), begins.at<int>(2));
1717                 std::swap(ends.at<int>(2), ends.at<int>(3));
1718                 std::swap(ends.at<int>(1), ends.at<int>(2));
1719             }
1720             layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
1721             layerParams.set("end", DictValue::arrayInt((int*)ends.data, ends.total()));
1722
1723             int id = dstNet.addLayer(name, "Slice", layerParams);
1724             layer_id[name] = id;
1725
1726             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1727         }
1728         else if (type == "Mul" || type == "RealDiv")
1729         {
1730             CV_CheckGT(num_inputs, 0, "");
1731             int constId = -1;
1732             for(int ii = 0; ii < num_inputs; ++ii)
1733             {
1734                 Pin input = parsePin(layer.input(ii));
1735                 if (value_id.find(input.name) != value_id.end())
1736                 {
1737                     constId = ii;
1738                     break;
1739                 }
1740             }
1741             CV_Assert((constId != -1) || (num_inputs == 2));
1742
1743             if (constId != -1)
1744             {
1745                 // Multiplication by constant.
1746                 CV_CheckEQ(num_inputs, 2, "");
1747                 Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
1748                 CV_Assert(scaleMat.type() == CV_32FC1);
1749                 if (type == "RealDiv")
1750                 {
1751                     if (constId == 0)
1752                         CV_Error(Error::StsNotImplemented, "Division of constant over variable");
1753                     scaleMat = 1.0f / scaleMat;
1754                 }
1755
1756                 int id;
1757                 if (scaleMat.total() == 1)  // is a scalar.
1758                 {
1759                     // Try to match with a LeakyRelu:
1760                     // node {
1761                     //   name: "LeakyRelu/mul"
1762                     //   op: "Mul"
1763                     //   input: "LeakyRelu/alpha"
1764                     //   input: "input"
1765                     // }
1766                     // node {
1767                     //   name: "LeakyRelu/Maximum"
1768                     //   op: "Maximum"
1769                     //   input: "LeakyRelu/mul"
1770                     //   input: "input"
1771                     // }
1772                     StrIntVector next_layers = getNextLayers(net, name, "Maximum");
1773                     if (!next_layers.empty())
1774                     {
1775                         int maximumLayerIdx = next_layers[0].second;
1776
1777                         CV_Assert(net.node(maximumLayerIdx).input_size() == 2);
1778
1779                         // The input from the Mul layer can also be at index 1.
1780                         int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1;
1781
1782                         ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false);
1783                         layers_to_ignore.insert(next_layers[0].first);
1784
1785                         layerParams.set("negative_slope", scaleMat.at<float>(0));
1786                         id = dstNet.addLayer(name, "ReLU", layerParams);
1787                     }
1788                     else
1789                     {
1790                         // Just a multiplication.
1791                         layerParams.set("scale", scaleMat.at<float>(0));
1792                         id = dstNet.addLayer(name, "Power", layerParams);
1793                     }
1794                 }
1795                 else  // is a vector
1796                 {
1797                     layerParams.blobs.resize(1, scaleMat);
1798
1799                    StrIntVector next_layers = getNextLayers(net, name, "Add");
1800                    if (!next_layers.empty())
1801                    {
1802                        layerParams.set("bias_term", true);
1803                        layerParams.blobs.resize(2);
1804
1805                        int weights_layer_index = next_layers[0].second;
1806                        blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back());
1807                        ExcludeLayer(net, weights_layer_index, 0, false);
1808                        layers_to_ignore.insert(next_layers[0].first);
1809                    }
1810
1811                     if (hasLayerAttr(layer, "axis"))
1812                         layerParams.set("axis", getLayerAttr(layer, "axis").i());
1813
1814                     id = dstNet.addLayer(name, "Scale", layerParams);
1815                 }
1816                 layer_id[name] = id;
1817
1818                 Pin inp0 = parsePin(layer.input(0));
1819                 if (layer_id.find(inp0.name) != layer_id.end())
1820                     // First operand is a constant.
1821                     connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1822                 else
1823                     connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1824             }
1825             else
1826             {
1827                 // Check if all the inputs have the same shape.
1828                 bool equalInpShapes = true;
1829                 bool isShapeOnes = false;
1830                 MatShape outShape0;
1831                 for (int ii = 0; ii < num_inputs && !netInputShapes.empty(); ii++)
1832                 {
1833                     Pin pin = parsePin(layer.input(ii));
1834                     int inpId = layer_id.find(pin.name)->second;
1835
1836                     // Get input shape
1837                     MatShape outShape;
1838                     std::vector<MatShape> inpShapes, outShapes;
1839                     dstNet.getLayerShapes(netInputShapes, inpId, inpShapes, outShapes);
1840                     CV_CheckGT(static_cast<int>(outShapes.size()), pin.blobIndex, "");
1841                     outShape = outShapes[pin.blobIndex];
1842
1843                     if (ii == 0)
1844                     {
1845                         outShape0 = outShape;
1846                     }
1847                     else if (outShape != outShape0)
1848                     {
1849                         equalInpShapes = false;
1850                         isShapeOnes = isAllOnes(outShape, 2, outShape.size()) ||
1851                                       isAllOnes(outShape0, 2, outShape0.size());
1852                         break;
1853                     }
1854                 }
1855
1856                 int id;
1857                 if (equalInpShapes || netInputShapes.empty() || (!equalInpShapes && isShapeOnes))
1858                 {
1859                     layerParams.set("operation", type == "RealDiv" ? "div" : "prod");
1860                     id = dstNet.addLayer(name, "Eltwise", layerParams);
1861                 }
1862                 else
1863                 {
1864                     if (type == "RealDiv")
1865                         CV_Error(Error::StsNotImplemented, "Division of non equal tensors");
1866                     id = dstNet.addLayer(name, "Scale", layerParams);
1867                 }
1868
1869                 layer_id[name] = id;
1870
1871                 for (int ii = 0; ii < num_inputs; ii++)
1872                 {
1873                     Pin inp = parsePin(layer.input(ii));
1874                     if (layer_id.find(inp.name) == layer_id.end())
1875                         CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1876                     connect(layer_id, dstNet, inp, id, ii);
1877                 }
1878             }
1879         }
1880         else if (type == "FusedBatchNorm" || type == "FusedBatchNormV3")
1881         {
1882             // op: "FusedBatchNorm"
1883             // input: "input"
1884             // input: "BatchNorm/gamma"
1885             // input: "BatchNorm/beta"
1886             // input: "BatchNorm/moving_mean"
1887             // input: "BatchNorm/moving_variance"
1888             CV_CheckEQ(num_inputs, 5, "Expected gamma, beta, mean and std");
1889             Pin inpId = parsePin(layer.input(0));
1890
1891             bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
1892
1893             layerParams.blobs.resize(2);
1894
1895             const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1);
1896             if (!gammaTensor.tensor_content().empty())
1897             {
1898                 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1899                 layerParams.set("has_weight", true);
1900                 blobFromTensor(gammaTensor, layerParams.blobs.back());
1901             }
1902             else
1903                 layerParams.set("has_weight", false);
1904
1905             const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2);
1906             if (!betaTensor.tensor_content().empty())
1907             {
1908                 layerParams.blobs.resize(layerParams.blobs.size() + 1);
1909                 layerParams.set("has_bias", true);
1910                 blobFromTensor(betaTensor, layerParams.blobs.back());
1911             }
1912             else
1913                 layerParams.set("has_bias", false);
1914
1915             Mat mean, std;
1916             if (isTraining)
1917             {
1918                 if (layerParams.blobs.size() == 2)
1919                     CV_Error(Error::StsNotImplemented, "Cannot determine number "
1920                              "of parameters for batch normalization layer.");
1921                 mean = Mat::zeros(1, layerParams.blobs[2].total(), CV_32F);
1922                 std = Mat::ones(1, layerParams.blobs[2].total(), CV_32F);
1923
1924                 // Add an extra layer: Mean-Variance normalization
1925                 LayerParams mvnParams;
1926                 std::string mvnName = name + "/MVN";
1927                 CV_Assert(layer_id.find(mvnName) == layer_id.end());
1928                 int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
1929                 layer_id[mvnName] = mvnId;
1930                 connect(layer_id, dstNet, inpId, mvnId, 0);
1931                 inpId = Pin(mvnName);
1932             }
1933             else
1934             {
1935                 blobFromTensor(getConstBlob(layer, value_id, 3), mean);
1936                 blobFromTensor(getConstBlob(layer, value_id, 4), std);
1937             }
1938             layerParams.blobs[0] = mean;
1939             layerParams.blobs[1] = std;
1940
1941             if (hasLayerAttr(layer, "epsilon"))
1942                 layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
1943
1944             int id = dstNet.addLayer(name, "BatchNorm", layerParams);
1945             layer_id[name] = id;
1946
1947             // one input only
1948             connect(layer_id, dstNet, inpId, id, 0);
1949         }
1950         else if (type == "Conv2DBackpropInput")
1951         {
1952             // op: "Conv2DBackpropInput"
1953             // input: "conv2d_transpose/output_shape"
1954             // input: "weights"
1955             // input: "input"
1956             CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes");
1957
1958             layerParams.set("bias_term", false);
1959             layerParams.blobs.resize(1);
1960
1961             StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
1962             if (next_layers.size() == 1)
1963             {
1964                 layerParams.set("bias_term", true);
1965                 layerParams.blobs.resize(2);
1966
1967                 int weights_layer_index = next_layers[0].second;
1968
1969                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1970                 ExcludeLayer(net, weights_layer_index, 0, false);
1971                 layers_to_ignore.insert(next_layers[0].first);
1972             }
1973
1974             kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
1975
1976             const int* kshape = layerParams.blobs[0].size.p;
1977             const int kernelH = kshape[2];
1978             const int kernelW = kshape[3];
1979             layerParams.set("kernel_h", kernelH);
1980             layerParams.set("kernel_w", kernelW);
1981             layerParams.set("num_output", kshape[1]);
1982
1983             setStrides(layerParams, layer);
1984             setPadding(layerParams, layer);
1985
1986             // For convolution layer, output shape computes as
1987             // o = 1 + (i - k + 2*p) / s
1988             // i - input size, o - output size, k - kernel size, p - pad, s - stride
1989             // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
1990             // considering that k is odd.
1991             // SAME:  o = 1 + (i - 1) / s
1992             // VALID: o = 1 + i / s
1993             // Deconvolution's layer output shape computes as
1994             // SAME:  o = 1 + (i - 1)*s
1995             // VALID: o = (i - 1)*s
1996             // If output_shape differs from formulas above then adjust padding is applied.
1997
1998             const int strideY = layerParams.get<int>("stride_h");
1999             const int strideX = layerParams.get<int>("stride_w");
2000             Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
2001             const int outH = outShape.at<int>(1);
2002             const int outW = outShape.at<int>(2);
2003             if (layerParams.get<String>("pad_mode") == "SAME")
2004             {
2005                 layerParams.set("adj_w", (outW - 1) % strideX);
2006                 layerParams.set("adj_h", (outH - 1) % strideY);
2007             }
2008             else if (layerParams.get<String>("pad_mode") == "VALID")
2009             {
2010                 layerParams.set("adj_w", (outW - kernelW) % strideX);
2011                 layerParams.set("adj_h", (outH - kernelH) % strideY);
2012             }
2013             int id = dstNet.addLayer(name, "Deconvolution", layerParams);
2014             layer_id[name] = id;
2015
2016             // one input only
2017             connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
2018         }
2019         else if (type == "BlockLSTM")
2020         {
2021             // op: "BlockLSTM"
2022             // input: "lstm_block_wrapper/ToInt64/x"  (ignore, number of time stamps)
2023             // input: "input"
2024             // input: "lstm_block_wrapper/zeros"      (ignore)
2025             // input: "lstm_block_wrapper/zeros"      (ignore)
2026             // input: "lstm_block_wrapper/kernel"
2027             // input: "lstm_block_wrapper/w_i_diag"
2028             // input: "lstm_block_wrapper/w_f_diag"
2029             // input: "lstm_block_wrapper/w_o_diag"
2030             // input: "lstm_block_wrapper/bias"
2031             CV_CheckEQ(num_inputs, 9, "Unexpected number of input nodes");
2032
2033             if (hasLayerAttr(layer, "forget_bias"))
2034                 layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f());
2035
2036             if (hasLayerAttr(layer, "forget_bias"))
2037             {
2038                 float cellClip = getLayerAttr(layer, "cell_clip").f();
2039                 // Cell clip disabled if it's negative.
2040                 if (cellClip >= 0)
2041                 {
2042                     layerParams.set("use_cell_clip", true);
2043                     layerParams.set("cell_clip", cellClip);
2044                 }
2045             }
2046
2047             Mat W, Wh, Wx, b;
2048             blobFromTensor(getConstBlob(layer, value_id, 4), W);
2049             blobFromTensor(getConstBlob(layer, value_id, 8), b);
2050             const int outSize = W.cols / 4;
2051
2052             // IGFO->IFOG
2053             float* weightData = (float*)W.data;
2054             for (int i = 0; i < W.rows; ++i)
2055                 for (int j = 0; j < outSize; ++j)
2056                 {
2057                     std::swap(weightData[i * W.cols + 1 * outSize + j],
2058                               weightData[i * W.cols + 2 * outSize + j]);
2059                     std::swap(weightData[i * W.cols + 2 * outSize + j],
2060                               weightData[i * W.cols + 3 * outSize + j]);
2061                 }
2062             Wx = W.rowRange(0, W.rows - outSize).t();
2063             Wh = W.rowRange(W.rows - outSize, W.rows).t();
2064
2065             layerParams.blobs.resize(3);
2066             layerParams.blobs[0] = Wh;
2067             layerParams.blobs[1] = Wx;
2068             layerParams.blobs[2] = b;
2069
2070             if (hasLayerAttr(layer, "use_peephole"))
2071             {
2072                 bool usePeephole = getLayerAttr(layer, "use_peephole").b();
2073                 if (usePeephole)
2074                 {
2075                     layerParams.set("use_peephole", true);
2076                     layerParams.blobs.resize(6);
2077                     for (int i = 0; i < 3; ++i)
2078                     {
2079                         Mat w;
2080                         blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
2081                         w = w.reshape(1, w.total());  // Single column.
2082                         w = Mat::diag(w);  // Make a diagonal matrix.
2083                         layerParams.blobs[3 + i] = w;
2084                     }
2085                 }
2086             }
2087
2088             int id = dstNet.addLayer(name, "LSTM", layerParams);
2089             layer_id[name] = id;
2090
2091             // one input only
2092             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
2093             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
2094         }
2095         else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear" || type == "FusedResizeAndPadConv2D")
2096         {
2097             CV_CheckGT(num_inputs, 0, "");
2098             std::string convWeights = "";
2099             if (type == "FusedResizeAndPadConv2D")
2100             {
2101                 // input: "mul_1"
2102                 // input: "decoder/ResizeBilinear/size"
2103                 // input: "decoder/decoder_conv0/Conv2D_dummy_paddings"
2104                 // input: "decoder/decoder_conv0/weights"
2105                 CV_CheckEQ(num_inputs, 4, "Number of input for FusedResizeAndPadConv2D");
2106
2107                 Mat paddings = getTensorContent(getConstBlob(layer, value_id, 2));
2108                 CV_CheckEQ(countNonZero(paddings), 0, "Unsupported mode");
2109
2110                 convWeights = layer.input(3);
2111                 layer.mutable_input()->DeleteSubrange(2, 2);  // FIXIT do NOT modify input model
2112                 num_inputs = layer.input_size();
2113                 name = name + "/resize";
2114
2115                 if (hasLayerAttr(layer, "resize_align_corners"))
2116                 {
2117                     // FIXIT do NOT modify input model
2118                     layer.mutable_attr()->insert(
2119                         ::google::protobuf::MapPair<std::string, tensorflow::AttrValue>("align_corners",
2120                                                                                         getLayerAttr(layer, "resize_align_corners")));
2121                 }
2122             }
2123             if (num_inputs == 2)
2124             {
2125                 Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
2126                 CV_CheckTypeEQ(outSize.type(), CV_32SC1, ""); CV_CheckEQ(outSize.total(), (size_t)2, "");
2127                 layerParams.set("height", outSize.at<int>(0, 0));
2128                 layerParams.set("width", outSize.at<int>(0, 1));
2129             }
2130             else if (num_inputs == 3)
2131             {
2132                 Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1));
2133                 Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2));
2134                 factorHeight.convertTo(factorHeight, CV_32F);
2135                 factorWidth.convertTo(factorWidth, CV_32F);
2136                 layerParams.set("zoom_factor_x", factorWidth.at<float>(0));
2137                 layerParams.set("zoom_factor_y", factorHeight.at<float>(0));
2138             }
2139             else
2140                 CV_Check(num_inputs, num_inputs == 2 || num_inputs == 3, "");
2141
2142             if (type == "ResizeNearestNeighbor")
2143                 layerParams.set("interpolation", "nearest");
2144             else
2145                 layerParams.set("interpolation", "bilinear");
2146
2147             if (hasLayerAttr(layer, "align_corners"))
2148                 layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());
2149
2150             if (hasLayerAttr(layer, "half_pixel_centers"))
2151                 layerParams.set("half_pixel_centers", getLayerAttr(layer, "half_pixel_centers").b());
2152
2153             int id = dstNet.addLayer(name, "Resize", layerParams);
2154             layer_id[name] = id;
2155
2156             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2157
2158             // Step back to add convolution
2159             if (type == "FusedResizeAndPadConv2D")
2160             {
2161                 tensorflow::NodeDef conv = layer_;
2162                 conv.clear_input();
2163                 conv.add_input(name);
2164                 conv.add_input(convWeights);
2165                 conv.set_op("Conv2D");
2166                 parseNode(conv);
2167             }
2168         }
2169         else if (type == "L2Normalize")
2170         {
2171             // op: "L2Normalize"
2172             // input: "input"
2173             // input: "reduction_indices" (axis)
2174             CV_CheckEQ(num_inputs, 2, "");
2175             Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
2176             CV_Assert(reductionIndices.type() == CV_32SC1);
2177
2178             const int numAxes = reductionIndices.total();
2179             if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
2180                 for (int i = 0; i < numAxes; ++i)
2181                     reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
2182
2183             cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
2184             for (int i = 1; i < numAxes; ++i)
2185             {
2186                 CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
2187                 // Axes have the same sign.
2188                 CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
2189             }
2190             layerParams.set("start_axis", reductionIndices.at<int>(0));
2191             layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
2192
2193             int id = dstNet.addLayer(name, "Normalize", layerParams);
2194             layer_id[name] = id;
2195             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2196         }
2197         else if (type == "PriorBox")
2198         {
2199             CV_CheckEQ(num_inputs, 2, "");
2200             if (hasLayerAttr(layer, "min_size"))
2201                 layerParams.set("min_size", getLayerAttr(layer, "min_size").i());
2202             if (hasLayerAttr(layer, "max_size"))
2203                 layerParams.set("max_size", getLayerAttr(layer, "max_size").i());
2204             if (hasLayerAttr(layer, "flip"))
2205                 layerParams.set("flip", getLayerAttr(layer, "flip").b());
2206             if (hasLayerAttr(layer, "clip"))
2207                 layerParams.set("clip", getLayerAttr(layer, "clip").b());
2208             if (hasLayerAttr(layer, "offset"))
2209                 layerParams.set("offset", getLayerAttr(layer, "offset").f());
2210             if (hasLayerAttr(layer, "step"))
2211                 layerParams.set("step", getLayerAttr(layer, "step").f());
2212
2213             const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
2214                                               "width", "height"};
2215             for (int i = 0; i < 5; ++i)
2216             {
2217                 if (hasLayerAttr(layer, paramNames[i]))
2218                 {
2219                     Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor());
2220                     layerParams.set(paramNames[i],
2221                                     DictValue::arrayReal<float*>((float*)values.data, values.total()));
2222                 }
2223             }
2224             int id = dstNet.addLayer(name, "PriorBox", layerParams);
2225             layer_id[name] = id;
2226             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2227             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
2228             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
2229         }
2230         else if (type == "Softmax")
2231         {
2232             CV_CheckGT(num_inputs, 0, "");
2233             if (hasLayerAttr(layer, "axis"))
2234                 layerParams.set("axis", getLayerAttr(layer, "axis").i());
2235
2236             int id = dstNet.addLayer(name, "Softmax", layerParams);
2237             layer_id[name] = id;
2238             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
2239         }
2240         else if (type == "CropAndResize")
2241         {
2242             // op: "CropAndResize"
2243             // input: "input"
2244             // input: "boxes"
2245             // input: "sizes"
2246             CV_CheckEQ(num_inputs, 3, "");
2247
2248             Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2));
2249             CV_CheckTypeEQ(cropSize.type(), CV_32SC1, ""); CV_CheckEQ(cropSize.total(), (size_t)2, "");
2250
2251             layerParams.set("height", cropSize.at<int>(0));
2252             layerParams.set("width", cropSize.at<int>(1));
2253
2254             int id = dstNet.addLayer(name, "CropAndResize", layerParams);
2255             layer_id[name] = id;
2256
2257             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2258             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
2259         }
2260         else if (type == "Mean" || type == "Sum")
2261         {
2262             // Computes the mean of elements across dimensions of a tensor.
2263             // If keepdims is false (default) reduces input_tensor along the dimensions given in axis,
2264             // else the reduced dimensions are retained with length 1.
2265             // if indices = [1, 2] in NHWC layout we use global pooling: NxCxHxW --Pooling--> NxCx1x1
2266             // if keepdims is false we use Flatten after Pooling: out_shape = NxC
2267             // if indices = [0] we use a global pooling by indices.
2268             // To return correct shape, we use Reshape after Pooling. To determine input shape use Slice for input,
2269             // if keepdims is false we use Flatten after Slice.
2270             // Example: input_shape = NxCxHxW
2271             // determine out shape: NxCxHxW --Slice--> 1xCxHxW
2272             //                      out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW)
2273             // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape
2274             CV_CheckGT(num_inputs, 0, "");
2275
2276             Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));
2277             CV_Assert(indices.type() == CV_32SC1);
2278
2279             // There are two attributes, "keepdims" and a deprecated "keep_dims".
2280             bool keepDims = false;
2281             if (hasLayerAttr(layer, "keepdims"))
2282                 keepDims = getLayerAttr(layer, "keepdims").b();
2283             else if (hasLayerAttr(layer, "keep_dims"))
2284                 keepDims = getLayerAttr(layer, "keep_dims").b();
2285
2286             if (indices.total() == 1 && indices.at<int>(0) == 0)
2287             {
2288                 LayerParams flattenLp;
2289                 std::string flattenName = name + "/flatten";
2290                 CV_Assert(layer_id.find(flattenName) == layer_id.end());
2291                 int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
2292                 layer_id[flattenName] = flattenId;
2293                 connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0);
2294
2295                 LayerParams reshapeLp;
2296                 std::string reshapeName = name + "/reshape";
2297                 CV_Assert(layer_id.find(reshapeName) == layer_id.end());
2298                 reshapeLp.set("axis", 0);
2299                 reshapeLp.set("num_axes", 1);
2300                 int newShape[] = {1, 1, -1};
2301                 reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3));
2302
2303                 int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp);
2304                 layer_id[reshapeName] = reshapeId;
2305                 connect(layer_id, dstNet, Pin(flattenName), reshapeId, 0);
2306
2307                 LayerParams avgLp;
2308                 std::string avgName = name + "/avg";
2309                 CV_Assert(layer_id.find(avgName) == layer_id.end());
2310                 avgLp.set("pool", type == "Mean" ? "ave" : "sum");
2311                 // pooling kernel H x 1
2312                 avgLp.set("global_pooling_h", true);
2313                 avgLp.set("kernel_w", 1);
2314                 int avgId = dstNet.addLayer(avgName, "Pooling", avgLp);
2315                 layer_id[avgName] = avgId;
2316                 connect(layer_id, dstNet, Pin(reshapeName), avgId, 0);
2317
2318                 LayerParams sliceLp;
2319                 std::string layerShapeName = name + "/slice";
2320                 CV_Assert(layer_id.find(layerShapeName) == layer_id.end());
2321                 sliceLp.set("axis", 0);
2322                 int begin[] = {0};
2323                 int size[] = {1};
2324                 sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1));
2325                 sliceLp.set("size", DictValue::arrayInt(&size[0], 1));
2326                 int sliceId = dstNet.addLayer(layerShapeName, "Slice", sliceLp);
2327                 layer_id[layerShapeName] = sliceId;
2328                 connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0);
2329
2330                 if (!keepDims)
2331                 {
2332                     LayerParams squeezeLp;
2333                     std::string squeezeName = name + "/squeeze";
2334                     CV_Assert(layer_id.find(squeezeName) == layer_id.end());
2335                     squeezeLp.set("axis", 0);
2336                     squeezeLp.set("end_axis", 1);
2337                     int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
2338                     layer_id[squeezeName] = squeezeId;
2339                     connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0);
2340                     layerShapeName = squeezeName;
2341                 }
2342
2343                 int id = dstNet.addLayer(name, "Reshape", layerParams);
2344                 layer_id[name] = id;
2345                 connect(layer_id, dstNet, Pin(avgName), id, 0);
2346                 connect(layer_id, dstNet, Pin(layerShapeName), id, 1);
2347             } else if (indices.total() == 1) {
2348                 int axis = toNCHW(indices.at<int>(0));
2349                 if (axis == 2 || axis == 3)
2350                 {
2351                     layerParams.set("pool", type == "Mean" ? "ave" : "sum");
2352                     layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1);
2353                     layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true);
2354                     int id = dstNet.addLayer(name, "Pooling", layerParams);
2355                     layer_id[name] = id;
2356                     connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2357
2358                     if (!keepDims)
2359                     {
2360                         // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC
2361                         LayerParams permLP;
2362                         int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
2363                         std::string permName = name + "/nchw";
2364                         Pin inpId = Pin(name);
2365                         addPermuteLayer(order, permName, inpId);
2366
2367                         LayerParams squeezeLp;
2368                         std::string squeezeName = name + "/squeeze";
2369                         CV_Assert(layer_id.find(squeezeName) == layer_id.end());
2370                         squeezeLp.set("axis", indices.at<int>(0));
2371                         squeezeLp.set("end_axis", indices.at<int>(0) + 1);
2372                         int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
2373                         layer_id[squeezeName] = squeezeId;
2374                         connect(layer_id, dstNet, Pin(permName), squeezeId, 0);
2375                     }
2376                 }
2377                 else if (axis == 1)
2378                 {
2379                     int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
2380                     Pin inpId = parsePin(layer.input(0));
2381                     addPermuteLayer(order, name + "/nhwc", inpId);
2382
2383                     layerParams.set("pool", type == "Mean" ? "ave" : "sum");
2384                     layerParams.set("kernel_h", 1);
2385                     layerParams.set("global_pooling_w", true);
2386                     int id = dstNet.addLayer(name, "Pooling", layerParams);
2387                     layer_id[name] = id;
2388                     connect(layer_id, dstNet, inpId, id, 0);
2389
2390                     if (!keepDims)
2391                     {
2392                         LayerParams squeezeLp;
2393                         std::string squeezeName = name + "/squeeze";
2394                         CV_Assert(layer_id.find(squeezeName) == layer_id.end());
2395                         int channel_id = 3; // TF NHWC layout
2396                         squeezeLp.set("axis", channel_id - 1);
2397                         squeezeLp.set("end_axis", channel_id);
2398                         int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
2399                         layer_id[squeezeName] = squeezeId;
2400                         connect(layer_id, dstNet, Pin(name), squeezeId, 0);
2401                     }
2402                     else
2403                     {
2404                         int order[] = {0, 3, 1, 2};  // From NHWC to OpenCV's NCHW.
2405                         Pin inpId = parsePin(name);
2406                         addPermuteLayer(order, name + "/nchw", inpId);
2407                     }
2408                 }
2409             } else {
2410                 if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
2411                     CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation.");
2412
2413                 layerParams.set("pool", type == "Mean" ? "ave" : "sum");
2414                 layerParams.set("global_pooling", true);
2415                 int id = dstNet.addLayer(name, "Pooling", layerParams);
2416                 layer_id[name] = id;
2417                 connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2418
2419                 if (!keepDims)
2420                 {
2421                     LayerParams flattenLp;
2422                     std::string flattenName = name + "/flatten";
2423                     CV_Assert(layer_id.find(flattenName) == layer_id.end());
2424                     int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
2425                     layer_id[flattenName] = flattenId;
2426                     connect(layer_id, dstNet, Pin(name), flattenId, 0);
2427                 }
2428             }
2429         }
2430         else if (type == "Pack")
2431         {
2432             // op: tf.stack(list of tensors, axis=0)
2433             // Join a list of inputs along a new axis.
2434             // The "axis" specifies the index of the new axis in the dimensions of the output.
2435             // Example: given a list with "N" tensors of shape (C, H, W):
2436             // if axis == 0 then the output tensor will have the shape (N, C, H, W),
2437             // if axis == 1 then the output tensor will have the shape (C, N, H, W).
2438             CV_CheckGT(num_inputs, 0, "");
2439             CV_Assert(hasLayerAttr(layer, "axis"));
2440             int dim = (int)getLayerAttr(layer, "axis").i();
2441             if (dim != 0)
2442                 CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation.");
2443
2444             CV_Assert(hasLayerAttr(layer, "N"));
2445             int num = (int)getLayerAttr(layer, "N").i();
2446             CV_CheckEQ(num_inputs, num, "");
2447             std::string base_name = name + "/reshape_";
2448             std::vector<int> reshape_ids;
2449             for (int i = 0; i < num; i++) {
2450                 std::ostringstream ss;
2451                 ss << i;
2452                 std::string reshape_name = base_name + ss.str();
2453                 LayerParams reshapeLP;
2454                 reshapeLP.set("axis", dim);
2455                 reshapeLP.set("num_axes", 1);
2456                 int outShape[] = {1, -1};
2457                 reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2));
2458                 int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP);
2459                 layer_id[reshape_name] = id;
2460                 reshape_ids.push_back(id);
2461                 connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0);
2462             }
2463
2464             layerParams.set("axis", dim);
2465             int id = dstNet.addLayer(name, "Concat", layerParams);
2466             layer_id[name] = id;
2467
2468             for (int li = 0; li < num; li++)
2469                 dstNet.connect(reshape_ids[li], 0, id, li);
2470         }
2471         else if (type == "ClipByValue")
2472         {
2473             // op: "ClipByValue"
2474             // input: "input"
2475             // input: "mix"
2476             // input: "max"
2477             CV_CheckEQ(num_inputs, 3, "");
2478
2479             Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1));
2480             Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2));
2481             CV_CheckEQ(minValue.total(), (size_t)1, ""); CV_CheckTypeEQ(minValue.type(), CV_32FC1, "");
2482             CV_CheckEQ(maxValue.total(), (size_t)1, ""); CV_CheckTypeEQ(maxValue.type(), CV_32FC1, "");
2483
2484             layerParams.set("min_value", minValue.at<float>(0));
2485             layerParams.set("max_value", maxValue.at<float>(0));
2486
2487             int id = dstNet.addLayer(name, "ReLU6", layerParams);
2488             layer_id[name] = id;
2489
2490             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
2491         }
2492         else if (type == "LeakyRelu")
2493         {
2494             CV_CheckGT(num_inputs, 0, "");
2495             CV_Assert(hasLayerAttr(layer, "alpha"));
2496             layerParams.set("negative_slope", getLayerAttr(layer, "alpha").f());
2497
2498             int id = dstNet.addLayer(name, "ReLU", layerParams);
2499             layer_id[name] = id;
2500             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
2501         }
2502         else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
2503                  type == "Relu" || type == "Elu" || type == "Exp" ||
2504                  type == "Identity" || type == "Relu6")
2505         {
2506             CV_CheckGT(num_inputs, 0, "");
2507             std::string dnnType = type;
2508             if (type == "Abs") dnnType = "AbsVal";
2509             else if (type == "Tanh") dnnType = "TanH";
2510             else if (type == "Relu") dnnType = "ReLU";
2511             else if (type == "Relu6") dnnType = "ReLU6";
2512             else if (type == "Elu") dnnType = "ELU";
2513
2514             int id = dstNet.addLayer(name, dnnType, layerParams);
2515             layer_id[name] = id;
2516             connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
2517         }
2518         else
2519         {
2520             // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer.
2521             // However we create a layer with the same type and rely that user defined a custom layer.
2522
2523             // All the attributes are added to LayerParams.
2524             google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
2525             for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
2526                  ai != attr.end(); ++ai)
2527             {
2528                 if (ai->second.value_case() == tensorflow::AttrValue::kS)  // string
2529                     layerParams.set(ai->first, ai->second.s());
2530                 if (ai->second.value_case() == tensorflow::AttrValue::kI)  // int64
2531                     layerParams.set(ai->first, ai->second.i());
2532                 if (ai->second.value_case() == tensorflow::AttrValue::kF)  // float
2533                     layerParams.set(ai->first, ai->second.f());
2534                 if (ai->second.value_case() == tensorflow::AttrValue::kB)  // bool
2535                     layerParams.set(ai->first, ai->second.b());
2536             }
2537
2538             // All the Const input nodes are added to layer's blobs.
2539             std::vector<std::string> inputsNames;
2540             for (int i = 0; i < num_inputs; ++i)
2541             {
2542                 // Check if input is a Const node.
2543                 if (value_id.find(layer.input(i)) != value_id.end())
2544                 {
2545                     Mat blob = getTensorContent(getConstBlob(layer, value_id, i));
2546                     layerParams.blobs.push_back(blob);
2547                 }
2548                 else
2549                     inputsNames.push_back(layer.input(i));
2550             }
2551             int id = dstNet.addLayer(name, type, layerParams);
2552             layer_id[name] = id;
2553
2554             for (int i = 0; i < inputsNames.size(); ++i)
2555             {
2556                 connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i);
2557             }
2558         }
2559     }
2560     catch (const std::exception& e)
2561     {
2562         CV_LOG_ERROR(NULL, "DNN/TF: Can't parse layer for node='" << name << "'. Exception: " << e.what());
2563         throw;
2564     }
2565 }
2566
2567 } // namespace
2568
2569 #endif //HAVE_PROTOBUF
2570
2571 Net readNetFromTensorflow(const String &model, const String &config)
2572 {
2573     Net net;
2574     TFImporter importer(net, model.c_str(), config.c_str());
2575     return net;
2576 }
2577
2578 Net readNetFromTensorflow(const char* bufferModel, size_t lenModel,
2579                           const char* bufferConfig, size_t lenConfig)
2580 {
2581     Net net;
2582     TFImporter importer(net, bufferModel, lenModel, bufferConfig, lenConfig);
2583     return net;
2584 }
2585
2586 Net readNetFromTensorflow(const std::vector<uchar>& bufferModel, const std::vector<uchar>& bufferConfig)
2587 {
2588     const char* bufferModelPtr = reinterpret_cast<const char*>(&bufferModel[0]);
2589     const char* bufferConfigPtr = bufferConfig.empty() ? NULL :
2590                                   reinterpret_cast<const char*>(&bufferConfig[0]);
2591     return readNetFromTensorflow(bufferModelPtr, bufferModel.size(),
2592                                  bufferConfigPtr, bufferConfig.size());
2593 }
2594
2595 void writeTextGraph(const String& _model, const String& output)
2596 {
2597     String model = _model;
2598     const std::string modelExt = model.substr(model.rfind('.') + 1);
2599     if (modelExt != "pb")
2600         CV_Error(Error::StsNotImplemented, "Only TensorFlow models support export to text file");
2601
2602     tensorflow::GraphDef net;
2603     ReadTFNetParamsFromBinaryFileOrDie(model.c_str(), &net);
2604
2605     sortByExecutionOrder(net);
2606
2607     RepeatedPtrField<tensorflow::NodeDef>::iterator it;
2608     for (it = net.mutable_node()->begin(); it != net.mutable_node()->end(); ++it)
2609     {
2610         if (it->op() == "Const")
2611         {
2612             it->mutable_attr()->at("value").mutable_tensor()->clear_tensor_content();
2613         }
2614     }
2615
2616     std::string content;
2617     google::protobuf::TextFormat::PrintToString(net, &content);
2618
2619     std::ofstream ofs(output.c_str());
2620     ofs << content;
2621     ofs.close();
2622 }
2623
2624 CV__DNN_EXPERIMENTAL_NS_END
2625 }} // namespace