modules/dnn/src/torch/torch_importer.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of the copyright holders may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "../precomp.hpp"
  43 #include <limits>
  44 #include <set>
  45 #include <map>
  46 #include <algorithm>
  47 #include <iostream>
  48 #include <fstream>
  49
  50 #if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
  51 #include "THDiskFile.h"
  52 #endif
  53
  54 namespace cv {
  55 namespace dnn {
  56 CV__DNN_EXPERIMENTAL_NS_BEGIN
  57
  58 #if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
  59 using namespace TH;
  60
  61 //#ifdef NDEBUG
  62 static bool dbgPrint = false;
  63 //#else
  64 //static bool dbgPrint = true;
  65 //#endif
  66
  67 enum LuaType
  68 {
  69     TYPE_NIL      = 0,
  70     TYPE_NUMBER   = 1,
  71     TYPE_STRING   = 2,
  72     TYPE_TABLE    = 3,
  73     TYPE_TORCH    = 4,
  74     TYPE_BOOLEAN  = 5,
  75     TYPE_FUNCTION = 6,
  76     TYPE_RECUR_FUNCTION = 8,
  77     LEGACY_TYPE_RECUR_FUNCTION = 7
  78 };
  79
  80 template<typename T>
  81 static String toString(const T &v)
  82 {
  83     std::ostringstream ss;
  84     ss << v;
  85     return ss.str();
  86 }
  87
  88 static inline bool startsWith(const String &str, const char *substr)
  89 {
  90     return str.find(substr) == 0;
  91 }
  92
  93 static inline bool endsWith(const String &str, const char *substr)
  94 {
  95     return str.rfind(substr) == str.length() - strlen(substr);
  96 }
  97
  98 struct TorchImporter : public ::cv::dnn::Importer
  99 {
 100     typedef std::map<String, std::pair<int, Mat> > TensorsMap;
 101     Net net;
 102
 103     cv::Ptr<THFile> file;
 104     std::set<int> readedIndexes;
 105     std::map<int, Mat> storages;
 106     std::map<int, Mat> tensors;
 107
 108     struct Module
 109     {
 110         String thName, apiType;
 111         dnn::LayerParams params;
 112         std::vector<cv::Ptr<Module> > modules;
 113
 114         Module(const String &_thName, const String &_apiType = String())
 115             : thName(_thName), apiType(_apiType) {}
 116     };
 117
 118     Module *rootModule;
 119     Module *curModule;
 120     int moduleCounter;
 121
 122     TorchImporter(String filename, bool isBinary)
 123     {
 124         CV_TRACE_FUNCTION();
 125
 126         rootModule = curModule = NULL;
 127         moduleCounter = 0;
 128
 129         file = cv::Ptr<THFile>(THDiskFile_new(filename, "r", 0), THFile_free);
 130         CV_Assert(file && THFile_isOpened(file));
 131
 132         if (isBinary)
 133             THFile_binary(file);
 134         else
 135             THFile_ascii(file);
 136     }
 137
 138     /* Simple readers */
 139
 140     inline int readInt()
 141     {
 142         return THFile_readIntScalar(file);
 143     }
 144
 145     inline long readLong()
 146     {
 147         return THFile_readLongScalar(file);
 148     }
 149
 150     inline bool readBool()
 151     {
 152         return readInt() != 0;
 153     }
 154
 155     inline double readDouble()
 156     {
 157         return THFile_readDoubleScalar(file);
 158     }
 159
 160     inline String readString()
 161     {
 162         int size = THFile_readIntScalar(file);
 163         String str(size, '\0');
 164         THFile_readCharRaw(file, const_cast<char*>(str.c_str()), size);
 165         return str;
 166     }
 167
 168     inline String readTorchClassName()
 169     {
 170         String version = readString();
 171         return startsWith(version, "V ") ? readString() : version;
 172     }
 173
 174     inline void readFunction()
 175     {
 176         readString();
 177         readObject();
 178     }
 179
 180     void readTable(int index = -1)
 181     {
 182         index = (index < 0) ? readInt() : index;
 183
 184         if (readedIndexes.count(index))
 185             return;
 186
 187         readedIndexes.insert(index);
 188
 189         int size = readInt();
 190
 191         for (int i = 0; i < size; i++)
 192         {
 193             readObject(); //key
 194             readObject(); //value
 195         }
 196     }
 197
 198     /* Special readers */
 199
 200     static inline int parseTorchType(const String &str, const char *suffix, const char *prefix = "torch.")
 201     {
 202         if (startsWith(str, prefix) && endsWith(str, suffix))
 203         {
 204            String typeStr = str.substr(strlen(prefix), str.length() - strlen(prefix) - strlen(suffix));
 205
 206            if (typeStr == "Double")
 207                return CV_64F;
 208            else if (typeStr == "Float" || typeStr == "Cuda")
 209                return CV_32F;
 210            else if (typeStr == "Byte")
 211                return CV_8U;
 212            else if (typeStr == "Char")
 213                return CV_8S;
 214            else if (typeStr == "Short")
 215                return CV_16S;
 216            else if (typeStr == "Int")
 217                return CV_32S;
 218            else if (typeStr == "Long") //Carefully! CV_64S type coded as CV_USRTYPE1
 219                return CV_USRTYPE1;
 220            else
 221                CV_Error(Error::StsNotImplemented, "Unknown type \"" + typeStr + "\" of torch class \"" + str + "\"");
 222         }
 223
 224         return -1;
 225     }
 226
 227     static int parseTensorType(const String &className)
 228     {
 229         return parseTorchType(className, "Tensor");
 230     }
 231
 232     static int parseStorageType(const String &className)
 233     {
 234         return parseTorchType(className, "Storage");
 235     }
 236
 237     void readTorchStorage(int index, int type = -1)
 238     {
 239         long size = readLong();
 240         Mat storageMat(1, size, (type != CV_USRTYPE1) ? type : CV_64F); //handle LongStorage as CV_64F Mat
 241
 242         switch (type)
 243         {
 244         case CV_32F:
 245             THFile_readFloatRaw(file, (float*)storageMat.data, size);
 246             break;
 247         case CV_64F:
 248             THFile_readDoubleRaw(file, (double*)storageMat.data, size);
 249             break;
 250         case CV_8S:
 251         case CV_8U:
 252             THFile_readByteRaw(file, (uchar*)storageMat.data, size);
 253             break;
 254         case CV_16S:
 255         case CV_16U:
 256             THFile_readShortRaw(file, (short*)storageMat.data, size);
 257             break;
 258         case CV_32S:
 259             THFile_readIntRaw(file, (int*)storageMat.data, size);
 260             break;
 261         case CV_USRTYPE1:
 262         {
 263             double *buf = storageMat.ptr<double>();
 264             THFile_readLongRaw(file, (int64*)buf, size);
 265
 266             for (size_t i = (size_t)size; i-- > 0; )
 267                 buf[i] = ((int64*)buf)[i];
 268         }
 269             break;
 270         default:
 271             CV_Error(Error::StsInternal, "");
 272             break;
 273         }
 274
 275         storages.insert(std::make_pair(index, storageMat));
 276     }
 277
 278     void readTorchTable(Dict &scalarParams, TensorsMap &tensorParams)
 279     {
 280         int luaType = readInt();
 281         int index = readInt();
 282
 283         CV_Assert(luaType == TYPE_TABLE && readedIndexes.count(index) == 0);
 284         readedIndexes.insert(index);
 285
 286         long fpos;
 287         int numPairs = readInt();
 288
 289         for (int i = 0; i < numPairs; i++)
 290         {
 291             fpos = THFile_position(file);
 292             int ktype = readInt();
 293
 294             if (ktype != TYPE_STRING) //skip non-string fileds
 295             {
 296                 THFile_seek(file, fpos);
 297                 readObject(); //key
 298                 readObject(); //value
 299                 continue;
 300             }
 301
 302             String key = readString();
 303             if (dbgPrint)
 304                 std::cout << i << "th key: " << key << "\n";
 305
 306             fpos = THFile_position(file);
 307             int vtype = readInt();
 308
 309             if (vtype == TYPE_TORCH)
 310             {
 311                 int index = readInt();
 312                 int numModules = curModule->modules.size();
 313                 readTorchObject(index);
 314
 315                 if (tensors.count(index)) //tensor was readed
 316                 {
 317                     tensorParams.insert(std::make_pair(key, std::make_pair(index, tensors[index])));
 318                 }
 319                 else if (storages.count(index)) //storage was readed
 320                 {
 321                     Mat &matStorage = storages[index];
 322                     Mat matCasted;
 323                     matStorage.convertTo(matCasted, CV_64F);
 324
 325                     DictValue scalar = DictValue::arrayReal(matCasted.ptr<double>(), matCasted.total());
 326                     scalarParams.set(key, scalar);
 327                 }
 328                 else
 329                 {
 330                     // Only tensors and scalars are supported for table fields.
 331                     // i.e. nn.Inception has field `transfer` which is an
 332                     // activation layer. So we remove added modules as readTorchObject(index).
 333                     while (curModule->modules.size() > numModules)
 334                         curModule->modules.pop_back();
 335                 }
 336             }
 337             else if (vtype == TYPE_NUMBER)
 338             {
 339                 scalarParams.set(key, readDouble());
 340             }
 341             else if (vtype == TYPE_STRING)
 342             {
 343                 scalarParams.set(key, readString());
 344             }
 345             else if (vtype == TYPE_BOOLEAN)
 346             {
 347                 scalarParams.set(key, readBool());
 348             }
 349             else
 350             {
 351                 THFile_seek(file, fpos);
 352                 readObject();
 353             }
 354         }
 355
 356         //Debug output
 357         if (dbgPrint)
 358         {
 359             std::cout << "scalarParams:\n";
 360             std::cout << scalarParams;
 361
 362             std::cout << "#" << tensorParams.size() << " tensorParams:\n";
 363             std::map<String,std::pair<int, Mat> >::const_iterator it;
 364             for (it = tensorParams.begin(); it != tensorParams.end(); it++)
 365                 std::cout << it->first << ": Tensor " << it->second.second.size << "\n";
 366         }
 367     }
 368
 369     void readTorchTensor(int indexTensor, int typeTensor)
 370     {
 371         int ndims = readInt();
 372         AutoBuffer<int64, 4> sizes(ndims);
 373         AutoBuffer<int64, 4> steps(ndims);
 374         THFile_readLongRaw(file, sizes, ndims);
 375         THFile_readLongRaw(file, steps, ndims);
 376         long offset = readLong() - 1;
 377
 378         //read Storage
 379         int typeidx = readInt();
 380         CV_Assert(typeidx == TYPE_TORCH || (typeidx == TYPE_NIL && ndims == 0));
 381
 382         if (typeidx == TYPE_NIL)
 383         {
 384             tensors.insert(std::make_pair(indexTensor, Mat()));
 385             return;
 386         }
 387
 388         int indexStorage = readInt();
 389         if (readedIndexes.count(indexStorage) == 0)
 390         {
 391             String className = readTorchClassName();
 392             int typeStorage = parseStorageType(className);
 393             CV_Assert(typeStorage >= 0 && typeTensor == typeStorage);
 394             readTorchStorage(indexStorage, typeStorage);
 395             typeTensor = storages[indexStorage].type();
 396             readedIndexes.insert(indexStorage);
 397         }
 398
 399         //small check
 400         size_t requireElems = (size_t)offset + (size_t)steps[0] * (size_t)sizes[0];
 401         size_t storageElems = storages[indexStorage].total();
 402         if (requireElems > storageElems)
 403             CV_Error(Error::StsBadSize, "Storage has insufficent number of elemements for requested Tensor");
 404
 405         //convert sizes
 406         AutoBuffer<int, 4> isizes(ndims);
 407         AutoBuffer<size_t, 4> ssteps(ndims);
 408         for (int i = ndims - 1; i >= 0; i--)
 409         {
 410             isizes[i] = (int)sizes[i];
 411             ssteps[i] = (size_t)steps[i] * CV_ELEM_SIZE(typeTensor);
 412         }
 413
 414         //allocate Blob
 415         Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), (size_t*)ssteps);
 416         int dstType = CV_32F;
 417
 418         Mat blob;
 419         srcMat.convertTo(blob, dstType);
 420
 421         tensors.insert(std::make_pair(indexTensor, blob));
 422     }
 423
 424     static bool isNNClass(const String &className, String &nnName)
 425     {
 426         const char *prefixes[] = {"nn.", "cunn.", "cudnn.", "fbcunn.", NULL};
 427
 428         for (int i = 0; prefixes[i]; i++)
 429         {
 430             if (startsWith(className, prefixes[i]))
 431             {
 432                 nnName = className.substr(strlen(prefixes[i]));
 433                 return true;
 434             }
 435         }
 436
 437         return false;
 438     }
 439
 440     static void convertTorchKernelsParams(const Dict &torchParams, cv::dnn::LayerParams &layerParams)
 441     {
 442         layerParams.set("kernel_h", torchParams.get<int>("kH"));
 443         layerParams.set("kernel_w", torchParams.get<int>("kW"));
 444         layerParams.set("stride_h", torchParams.get<int>("dH"));
 445         layerParams.set("stride_w", torchParams.get<int>("dW"));
 446         layerParams.set("pad_h", torchParams.get<int>("padH", 0));
 447         layerParams.set("pad_w", torchParams.get<int>("padW", 0));
 448     }
 449
 450     void readTorchObject(int index)
 451     {
 452         if(readedIndexes.count(index))
 453             return;
 454
 455         String className = readTorchClassName();
 456         String nnName;
 457
 458         if (dbgPrint)
 459             std::cout << "Class: " << className << std::endl;
 460
 461         int type;
 462         if ( (type = parseTensorType(className)) >= 0 ) //is Tensor
 463         {
 464             readTorchTensor(index, type);
 465         }
 466         else if ( (type = parseStorageType(className)) >= 0 ) //is Storage
 467         {
 468             readTorchStorage(index, type);
 469         }
 470         else if (isNNClass(className, nnName))
 471         {
 472             Dict scalarParams;
 473             TensorsMap tensorParams;
 474
 475             cv::Ptr<Module> newModule(new Module(nnName));
 476             cv::dnn::LayerParams &layerParams = newModule->params;
 477
 478             layerParams.set("torch_index", index);
 479
 480             if (nnName == "Sequential" || nnName == "Parallel" ||
 481                 nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable" ||
 482                 nnName == "DepthConcat" || nnName == "Inception")
 483             {
 484                 Module *parentModule = curModule;
 485                 curModule->modules.push_back(newModule);
 486                 curModule = newModule;
 487                 readTorchTable(scalarParams, tensorParams);
 488                 curModule = parentModule;
 489
 490                 if (nnName == "Parallel")
 491                 {
 492                     layerParams.set("inputDimension", scalarParams.get<int>("inputDimension"));
 493                     layerParams.set("outputDimension", scalarParams.get<int>("outputDimension"));
 494                 }
 495                 if (nnName == "Concat")
 496                 {
 497                     layerParams.set("dimension", scalarParams.get<int>("dimension"));
 498                 }
 499                 if (nnName == "JoinTable")
 500                 {
 501                     layerParams.set("dimension", scalarParams.get<int>("dimension"));
 502                 }
 503                 if (nnName == "DepthConcat")
 504                 {
 505                     layerParams.set("dimension", scalarParams.get<int>("dimension"));
 506                 }
 507             }
 508             else if (nnName == "SpatialConvolution" || nnName == "SpatialConvolutionMM")
 509             {
 510                 newModule->apiType = "Convolution";
 511                 readTorchTable(scalarParams, tensorParams);
 512
 513                 CV_Assert(tensorParams.count("weight"));
 514                 layerParams.blobs.push_back(tensorParams["weight"].second);
 515
 516                 bool bias = tensorParams.count("bias") != 0;
 517                 layerParams.set("bias_term", bias);
 518                 if (bias)
 519                     layerParams.blobs.push_back(tensorParams["bias"].second);
 520
 521                 layerParams.set("num_output", scalarParams.get<int>("nOutputPlane"));
 522                 convertTorchKernelsParams(scalarParams, layerParams);
 523
 524                 if (nnName == "SpatialConvolutionMM")
 525                 {
 526                     // Split weights from a [ outCh x inCh*kH*kW ] 2D matrix
 527                     // onto a 4D [ outCh x inCh x kH x kW ] blob.
 528                     CV_Assert(layerParams.blobs[0].dims == 2);
 529                     const int kernel = layerParams.blobs[0].size[1];  // inCh * kH * kW
 530                     MatShape kernelShape(4);
 531                     kernelShape[0] = layerParams.blobs[0].size[0];  // outCh.
 532                     kernelShape[2] = layerParams.get<int>("kernel_h");
 533                     kernelShape[3] = layerParams.get<int>("kernel_w");
 534                     kernelShape[1] = kernel / (kernelShape[2] * kernelShape[3]);  // inCh.
 535                     layerParams.blobs[0] = layerParams.blobs[0].reshape(1, kernelShape);
 536                 }
 537                 curModule->modules.push_back(newModule);
 538             }
 539             else if (nnName == "SpatialLPPooling")
 540             {
 541                 // nn.Sequential {
 542                 //     [input -> (1) -> (2) -> output]
 543                 //     (1): nn.Sequential {
 544                 //       [input -> (1) -> (2) -> (3) -> (4) -> output]
 545                 //       (1): nn.Power
 546                 //       (2): nn.SpatialAveragePooling(...)
 547                 //       (3): nn.MulConstant
 548                 //       (4): nn.Power
 549                 //     }
 550                 //     (2): nn.Sigmoid
 551                 // }
 552                 // nn.SpatialLPPooling is just a table so we skip it.
 553                 readTorchTable(scalarParams, tensorParams);
 554             }
 555             else if (nnName == "SpatialMaxPooling" || nnName == "SpatialAveragePooling")
 556             {
 557                 newModule->apiType = "Pooling";
 558                 readTorchTable(scalarParams, tensorParams);
 559
 560                 if (nnName == "SpatialMaxPooling") {
 561                     layerParams.set("pool", "MAX");
 562                     layerParams.set("indices_blob_id", tensorParams["indices"].first);
 563                 }
 564                 if (nnName == "SpatialAveragePooling")
 565                     layerParams.set("pool", "AVE");
 566                 convertTorchKernelsParams(scalarParams, layerParams);
 567
 568                 CV_Assert(scalarParams.has("ceil_mode"));
 569                 layerParams.set("ceil_mode", scalarParams.get<bool>("ceil_mode"));
 570
 571                 curModule->modules.push_back(newModule);
 572             }
 573             else if (nnName == "Linear")
 574             {
 575                 newModule->apiType = "InnerProduct";
 576                 readTorchTable(scalarParams, tensorParams);
 577
 578                 CV_Assert(tensorParams.count("weight"));
 579                 Mat weightBlob = tensorParams["weight"].second;
 580                 layerParams.blobs.push_back(weightBlob);
 581
 582                 bool bias = tensorParams.count("bias") != 0;
 583                 if (bias)
 584                     layerParams.blobs.push_back(tensorParams["bias"].second);
 585                 layerParams.set("bias_term", bias);
 586
 587                 layerParams.set("num_output", weightBlob.size[0]);
 588                 curModule->modules.push_back(newModule);
 589             }
 590             else if (nnName == "Reshape" || nnName == "View")
 591             {
 592                 newModule->apiType = "Reshape";
 593
 594                 readTorchTable(scalarParams, tensorParams);
 595                 CV_Assert(scalarParams.has("size"));
 596
 597                 DictValue dimParam = scalarParams.get("size");
 598                 layerParams.set("dim", dimParam);
 599
 600                 if (scalarParams.has("batchMode") && scalarParams.get<bool>("batchMode"))
 601                     layerParams.set("axis", 1);
 602
 603                 curModule->modules.push_back(newModule);
 604             }
 605             else if (nnName == "ReLU")
 606             {
 607                 curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "ReLU")));
 608                 readObject();
 609             }
 610             else if (nnName == "Tanh")
 611             {
 612                 curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "TanH")));
 613                 readObject();
 614             }
 615             else if (nnName == "Sigmoid")
 616             {
 617                 curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
 618                 readObject();
 619             }
 620             else if (nnName == "SpatialBatchNormalization" || nnName == "InstanceNormalization" ||
 621                      nnName == "BatchNormalization")
 622             {
 623                 newModule->apiType = "BatchNorm";
 624                 readTorchTable(scalarParams, tensorParams);
 625
 626                 CV_Assert(scalarParams.has("eps"));
 627                 float eps = float(scalarParams.get<double>("eps"));
 628                 layerParams.set("eps", eps);
 629
 630                 if (tensorParams.count("running_mean"))
 631                 {
 632                     layerParams.blobs.push_back(tensorParams["running_mean"].second);
 633                 }
 634                 else
 635                 {
 636                     CV_Assert(scalarParams.has("nOutput"));
 637                     layerParams.blobs.push_back(Mat::zeros(1, scalarParams.get<int>("nOutput"), CV_32F));
 638                 }
 639
 640                 if (tensorParams.count("running_var"))
 641                 {
 642                     layerParams.blobs.push_back(tensorParams["running_var"].second);
 643                 }
 644                 else if (tensorParams.count("running_std"))
 645                 {
 646                     layerParams.blobs.push_back(tensorParams["running_std"].second);
 647                     pow(layerParams.blobs.back(), -2, layerParams.blobs.back());
 648                     subtract(layerParams.blobs.back(), eps, layerParams.blobs.back());
 649                 }
 650                 else
 651                 {
 652                     CV_Assert(scalarParams.has("nOutput"));
 653                     layerParams.blobs.push_back(Mat::ones(1, scalarParams.get<int>("nOutput"), CV_32F));
 654                 }
 655
 656                 if (tensorParams.count("weight"))
 657                 {
 658                     layerParams.set("has_weight", true);
 659                     layerParams.blobs.push_back(tensorParams["weight"].second);
 660                 }
 661
 662                 if (tensorParams.count("bias"))
 663                 {
 664                     layerParams.set("has_bias", true);
 665                     layerParams.blobs.push_back(tensorParams["bias"].second);
 666                 }
 667
 668                 if (nnName == "InstanceNormalization")
 669                 {
 670                     cv::Ptr<Module> mvnModule(new Module(nnName));
 671                     mvnModule->apiType = "MVN";
 672                     curModule->modules.push_back(mvnModule);
 673
 674                     layerParams.blobs[0].setTo(0);  // batch norm's mean
 675                     layerParams.blobs[1].setTo(1);  // batch norm's std
 676                 }
 677
 678                 curModule->modules.push_back(newModule);
 679             }
 680             else if (nnName == "PReLU")
 681             {
 682                 readTorchTable(scalarParams, tensorParams);
 683
 684                 CV_Assert(tensorParams.count("weight"));
 685
 686                 size_t outputChannels = static_cast<int>(scalarParams.get<double>("nOutputPlane"));
 687                 if (outputChannels) {
 688
 689                     CV_Assert(tensorParams["weight"].second.total() == outputChannels);
 690                     layerParams.blobs.push_back(tensorParams["weight"].second);
 691
 692                     newModule->apiType = "ChannelsPReLU";
 693                 }
 694                 else {
 695                     CV_Assert(tensorParams["weight"].second.total() == 1);
 696                     float negative_slope = *tensorParams["weight"].second.ptr<float>();
 697                     layerParams.set("negative_slope", negative_slope);
 698
 699                     newModule->apiType = "ReLU";
 700                 }
 701
 702                 curModule->modules.push_back(newModule);
 703             }
 704             else if (nnName == "SpatialDropout" || nnName == "Dropout")
 705             {
 706                 readTorchTable(scalarParams, tensorParams);
 707                 CV_Assert(scalarParams.has("p"));
 708
 709                 if (scalarParams.has("v2") && scalarParams.get<bool>("v2"))
 710                 {
 711                     newModule->apiType = "Identity";
 712                 }
 713                 else
 714                 {
 715                     float scale = 1 -  scalarParams.get<double>("p");
 716
 717                     CV_Assert(scale > 0);
 718
 719                     newModule->apiType = "Power";
 720                     layerParams.set("scale", scale);
 721                 }
 722                 curModule->modules.push_back(newModule);
 723             }
 724             // TotalVariation layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
 725             // It's a loss function that has an Identity forward.
 726             else if (nnName == "Identity" || nnName == "TotalVariation")
 727             {
 728                 readTorchTable(scalarParams, tensorParams);
 729                 newModule->apiType = "Identity";
 730                 curModule->modules.push_back(newModule);
 731             }
 732             else if (nnName == "Normalize")
 733             {
 734                 readTorchTable(scalarParams, tensorParams);
 735                 CV_Assert(scalarParams.has("p"));
 736
 737                 layerParams.set("p", scalarParams.get<float>("p"));
 738                 if (scalarParams.has("eps"))
 739                     layerParams.set("eps", scalarParams.get<float>("eps"));
 740
 741                 newModule->apiType = "Normalize";
 742                 curModule->modules.push_back(newModule);
 743             }
 744             else if (nnName == "Padding")
 745             {
 746                 readTorchTable(scalarParams, tensorParams);
 747                 newModule->apiType = "Padding";
 748
 749                 CV_Assert(scalarParams.has("pad") && scalarParams.has("dim"));
 750                 if (scalarParams.has("index") && scalarParams.get<int>("index") != 1)
 751                     CV_Error(Error::StsNotImplemented, "Padding with offset is not implemented");
 752
 753                 if (scalarParams.has("value"))
 754                     layerParams.set("value", scalarParams.get<float>("value"));
 755
 756                 if (scalarParams.has("nInputDim"))
 757                     layerParams.set("input_dims", scalarParams.get<int>("nInputDim"));
 758
 759                 int dim = scalarParams.get<int>("dim") - 1;  // In Lua we start from 1.
 760                 int pad = scalarParams.get<int>("pad");
 761
 762                 std::vector<int> paddings((dim + 1) * 2, 0);
 763                 if (pad > 0)
 764                     paddings[dim * 2 + 1] = pad;  // Pad after (right).
 765                 else
 766                     paddings[dim * 2] = -pad;  // Pad before (left).
 767                 layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
 768
 769                 curModule->modules.push_back(newModule);
 770             }
 771             else if (nnName == "CAddTable")
 772             {
 773                 curModule->modules.push_back(newModule);
 774                 readObject();
 775             }
 776             else if (nnName == "SpatialDilatedConvolution")
 777             {
 778                 readTorchTable(scalarParams, tensorParams);
 779                 newModule->apiType = "Convolution";
 780                 CV_Assert(scalarParams.has("padW") &&
 781                           scalarParams.has("padH")&&
 782                           scalarParams.has("dW")&&
 783                           scalarParams.has("dH")&&
 784                           scalarParams.has("dilationW")&&
 785                           scalarParams.has("dilationH")&&
 786                           scalarParams.has("kW")&&
 787                           scalarParams.has("kH")&&
 788                           scalarParams.has("nOutputPlane"));
 789
 790                 layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
 791                 layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
 792                 layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
 793                 layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
 794                 layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
 795                 layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
 796                 layerParams.set("dilation_w", static_cast<int>(scalarParams.get<double>("dilationW")));
 797                 layerParams.set("dilation_h", static_cast<int>(scalarParams.get<double>("dilationH")));
 798                 layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
 799
 800                 layerParams.blobs.push_back(tensorParams["weight"].second);
 801
 802                 bool bias = tensorParams.count("bias");
 803                 layerParams.set("bias_term", bias);
 804                 if (bias)
 805                     layerParams.blobs.push_back(tensorParams["bias"].second);
 806
 807                 curModule->modules.push_back(newModule);
 808             }
 809             else if (nnName == "SpatialFullConvolution")
 810             {
 811                 readTorchTable(scalarParams, tensorParams);
 812                 newModule->apiType = "Deconvolution";
 813                 CV_Assert(scalarParams.has("padW") &&
 814                           scalarParams.has("padH")&&
 815                           scalarParams.has("dW")&&
 816                           scalarParams.has("dH")&&
 817                           scalarParams.has("adjW")&&
 818                           scalarParams.has("adjH")&&
 819                           scalarParams.has("kW")&&
 820                           scalarParams.has("kH")&&
 821                           scalarParams.has("nOutputPlane"));
 822
 823                 layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
 824                 layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
 825                 layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
 826                 layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
 827                 layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
 828                 layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
 829                 layerParams.set("adj_w", static_cast<int>(scalarParams.get<double>("adjW")));
 830                 layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
 831                 layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
 832
 833                 layerParams.blobs.push_back(tensorParams["weight"].second);
 834
 835                 bool bias = tensorParams.count("bias");
 836                 layerParams.set("bias_term", bias);
 837                 if (bias)
 838                     layerParams.blobs.push_back(tensorParams["bias"].second);
 839
 840                 curModule->modules.push_back(newModule);
 841             }
 842             else if (nnName == "SpatialMaxUnpooling")
 843             {
 844                 readTorchTable(scalarParams, tensorParams);
 845                 CV_Assert(tensorParams.count("indices"));
 846
 847                 layerParams.set("indices_blob_id", tensorParams["indices"].first);
 848                 curModule->modules.push_back(newModule);
 849             }
 850             else if (nnName == "SoftMax")
 851             {
 852                 newModule->apiType = "SoftMax";
 853                 curModule->modules.push_back(newModule);
 854             }
 855             else if (nnName == "LogSoftMax")
 856             {
 857                 newModule->apiType = "SoftMax";
 858                 layerParams.set("log_softmax", true);
 859                 curModule->modules.push_back(newModule);
 860             }
 861             else if (nnName == "SpatialCrossMapLRN")
 862             {
 863                 newModule->apiType = "LRN";
 864                 readTorchTable(scalarParams, tensorParams);
 865
 866                 CV_Assert(scalarParams.has("alpha"));
 867                 CV_Assert(scalarParams.has("beta"));
 868                 CV_Assert(scalarParams.has("k"));
 869                 CV_Assert(scalarParams.has("size"));
 870
 871                 layerParams.set("norm_region", "ACROSS_CHANNELS");
 872                 layerParams.set("alpha", scalarParams.get<float>("alpha"));
 873                 layerParams.set("beta", scalarParams.get<float>("beta"));
 874                 layerParams.set("bias", scalarParams.get<float>("k"));
 875                 layerParams.set("local_size", scalarParams.get<int>("size"));
 876                 layerParams.set("norm_by_size", true);
 877
 878                 curModule->modules.push_back(newModule);
 879             }
 880             else if (nnName == "Square" || nnName == "Sqrt" || nnName == "Power")
 881             {
 882                 readTorchTable(scalarParams, tensorParams);
 883
 884                 float power;
 885                 if (nnName == "Square") power = 2.0f;
 886                 else if (nnName == "Sqrt") power = 0.5f;
 887                 else if (nnName == "Power") power = scalarParams.get<float>("pow", 1.0f);
 888
 889                 newModule->apiType = "Power";
 890                 layerParams.set("power", power);
 891                 curModule->modules.push_back(newModule);
 892             }
 893             else if (nnName == "MulConstant")
 894             {
 895                 readTorchTable(scalarParams, tensorParams);
 896                 CV_Assert(scalarParams.has("constant_scalar"));
 897                 newModule->apiType = "Power";
 898                 layerParams.set("scale", scalarParams.get<float>("constant_scalar"));
 899                 curModule->modules.push_back(newModule);
 900             }
 901             else if (nnName == "SpatialZeroPadding" || nnName == "SpatialReflectionPadding")
 902             {
 903                 readTorchTable(scalarParams, tensorParams);
 904                 CV_Assert(scalarParams.has("pad_l"), scalarParams.has("pad_r"),
 905                           scalarParams.has("pad_t"), scalarParams.has("pad_b"));
 906                 int padTop = scalarParams.get<int>("pad_t");
 907                 int padLeft = scalarParams.get<int>("pad_l");
 908                 int padRight = scalarParams.get<int>("pad_r");
 909                 int padBottom = scalarParams.get<int>("pad_b");
 910                 if (padTop < 0 || padLeft < 0 || padRight < 0 || padBottom < 0)
 911                     CV_Error(Error::StsNotImplemented, "SpatialZeroPadding in cropping mode is not implemented");
 912
 913                 newModule->apiType = "Padding";
 914
 915                 // Torch's SpatialZeroPadding works with 3- or 4-dimensional input.
 916                 // So we add parameter input_dims=3 to ignore batch dimension if it will be.
 917                 std::vector<int> paddings(6, 0);  // CHW
 918                 paddings[2] = padTop;
 919                 paddings[3] = padBottom;
 920                 paddings[4] = padLeft;
 921                 paddings[5] = padRight;
 922                 layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
 923                 layerParams.set("input_dims", 3);
 924
 925                 if (nnName == "SpatialReflectionPadding")
 926                     layerParams.set("type", "reflect");
 927
 928                 curModule->modules.push_back(newModule);
 929             }
 930             else if (nnName == "ShaveImage")
 931             {
 932                 // ShaveImage layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
 933                 // It may be mapped to Slice layer.
 934                 readTorchTable(scalarParams, tensorParams);
 935                 CV_Assert(scalarParams.has("size"));
 936                 int size = scalarParams.get<int>("size");
 937
 938                 int begins[] = {0, 0, size, size};
 939                 int ends[] = {-1, -1, -size - 1, -size - 1};
 940
 941                 newModule->apiType = "Slice";
 942                 layerParams.set("begin", DictValue::arrayInt<int*>(&begins[0], 4));
 943                 layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
 944                 curModule->modules.push_back(newModule);
 945             }
 946             else
 947             {
 948                 CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\"");
 949             }
 950         }
 951         else
 952         {
 953             CV_Error(Error::StsNotImplemented, "Unsupported Torch class \"" + className + "\"");
 954         }
 955
 956         readedIndexes.insert(index);
 957     }
 958
 959     void readObject()
 960     {
 961         int typeidx = readInt();
 962
 963         if (typeidx == TYPE_TORCH)
 964         {
 965             int index = readInt();
 966             readTorchObject(index);
 967             readedIndexes.insert(index);
 968         }
 969         else if (typeidx == TYPE_NIL)
 970             return;
 971         else if (typeidx == TYPE_NUMBER)
 972             readDouble();
 973         else if (typeidx == TYPE_BOOLEAN)
 974             readBool();
 975         else if (typeidx == TYPE_STRING)
 976             readString();
 977         else if (typeidx == TYPE_TABLE)
 978             readTable();
 979         else
 980             CV_Error(Error::StsNotImplemented, "Unsupported Lua type");
 981     }
 982
 983     inline String generateLayerName(const String &label = String())
 984     {
 985         return "l" + toString(++this->moduleCounter) + "_" + label;
 986     }
 987
 988     int fill(Module *module, std::vector<std::pair<int, Module*> >& addedModules, int prevLayerId = 0, int prevOutNum = 0)
 989     {
 990         if (module == NULL)
 991             return prevLayerId;
 992
 993         if (module->apiType.length())
 994         {
 995             int newLayerId = net.addLayer(generateLayerName(module->apiType), module->apiType, module->params);
 996             net.connect(prevLayerId, prevOutNum, newLayerId, 0);
 997             addedModules.push_back(std::make_pair(newLayerId, module));
 998             return newLayerId;
 999         }
1000         else
1001         {
1002             if (module->thName == "Sequential" || module->thName == "Inception")
1003             {
1004                 for (size_t i = 0; i < module->modules.size(); i++)
1005                 {
1006                     prevLayerId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1007                     prevOutNum = 0;
1008                 }
1009                 return prevLayerId;
1010             }
1011             else if (module->thName == "Concat")
1012             {
1013                 int newId, mergeId;
1014                 LayerParams mergeParams;
1015                 mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1016
1017                 std::vector<int> branchIds;
1018                 for (int i = 0; i < (int)module->modules.size(); i++)
1019                 {
1020                     newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1021                     branchIds.push_back(newId);
1022                 }
1023
1024                 moduleCounter += 1;  // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.
1025                 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1026
1027                 for (int i = 0; i < branchIds.size(); i++)
1028                 {
1029                     net.connect(branchIds[i], 0, mergeId, i);
1030                 }
1031
1032                 addedModules.push_back(std::make_pair(mergeId, module));
1033                 return mergeId;
1034             }
1035             else if (module->thName == "DepthConcat")
1036             {
1037                 int newId, mergeId;
1038                 LayerParams mergeParams;
1039                 mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1040                 mergeParams.set("padding", true);
1041
1042                 std::vector<int> branchIds;
1043                 for (int i = 0; i < (int)module->modules.size(); i++)
1044                 {
1045                     newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1046                     branchIds.push_back(newId);
1047                 }
1048
1049                 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1050
1051                 for (int i = 0; i < branchIds.size(); i++)
1052                 {
1053                     net.connect(branchIds[i], 0, mergeId, i);
1054                 }
1055
1056                 addedModules.push_back(std::make_pair(mergeId, module));
1057                 return mergeId;
1058             }
1059             else if (module->thName == "Parallel")
1060             {
1061                 int newId, splitId, mergeId, reshapeId;
1062
1063                 LayerParams splitParams, mergeParams, reshapeParams;
1064                 splitParams.set("axis", module->params.get<int>("inputDimension") - 1);
1065                 mergeParams.set("axis", module->params.get<int>("outputDimension") - 1);
1066                 reshapeParams.set("axis", splitParams.get<int>("axis"));
1067                 reshapeParams.set("num_axes", 1);
1068
1069                 splitId = net.addLayer(generateLayerName("torchSplit"), "Slice", splitParams);
1070                 reshapeId = net.addLayer(generateLayerName("torchReshape"), "Reshape", reshapeParams);
1071                 net.connect(prevLayerId, prevOutNum, splitId, 0);
1072
1073                 std::vector<int> branchIds;
1074                 for (int i = 0; i < (int)module->modules.size(); i++)
1075                 {
1076                     net.connect(splitId, i, reshapeId, i);
1077                     newId = fill(module->modules[i], addedModules, reshapeId, i);
1078                     branchIds.push_back(newId);
1079                 }
1080
1081                 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1082
1083                 for (int i = 0; i < branchIds.size(); i++)
1084                 {
1085                     net.connect(branchIds[i], 0, mergeId, i);
1086                 }
1087
1088                 addedModules.push_back(std::make_pair(mergeId, module));
1089                 return mergeId;
1090             }
1091             else if (module->thName == "ConcatTable") {
1092                 int newId = -1;
1093                 moduleCounter += 1;  // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.
1094                 for (int i = 0; i < (int)module->modules.size(); i++)
1095                 {
1096                     newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1097                 }
1098                 return newId;
1099             }
1100             else if (module->thName == "JoinTable") {
1101                 std::vector<int> ids = net.getUnconnectedOutLayers();
1102
1103                 int mergeId;
1104                 LayerParams mergeParams;
1105                 mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1106
1107                 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1108                 addedModules.push_back(std::make_pair(mergeId, module));
1109
1110                 for (int i = 0; i < ids.size(); i++)
1111                 {
1112                     net.connect(ids[i], 0, mergeId, i);
1113                 }
1114
1115                 return mergeId;
1116             }
1117             else if (module->thName == "CAddTable") {
1118                 String name = generateLayerName("torchCAddTable");
1119                 std::vector<int> ids = net.getUnconnectedOutLayers();
1120                 LayerParams params;
1121                 params.set("operation", "sum");
1122
1123
1124                 int id = net.addLayer(name, "Eltwise", params);
1125
1126                 for (int i = 0; i < ids.size(); i++)
1127                 {
1128                     net.connect(ids[i], 0, id, i);
1129                 }
1130
1131                 addedModules.push_back(std::make_pair(id, module));
1132                 return id;
1133             }
1134             else if (module->thName == "SpatialMaxUnpooling") {
1135                 CV_Assert(module->params.has("indices_blob_id"));
1136                 int indicesBlobId = module->params.get<int>("indices_blob_id");
1137                 std::pair<int, Module*> poolingLayer;
1138                 poolingLayer.first = -1;
1139
1140                 for(int i = 0; i < addedModules.size(); i++)
1141                 {
1142                     if (addedModules[i].second->apiType == "Pooling" &&
1143                         addedModules[i].second->params.has("indices_blob_id") &&
1144                         addedModules[i].second->params.get<int>("indices_blob_id") == indicesBlobId)
1145                     {
1146                         poolingLayer = addedModules[i];
1147                         break;
1148                     }
1149                 }
1150
1151                 module->params.set("pool_k_h", poolingLayer.second->params.get<int>("kernel_h"));
1152                 module->params.set("pool_k_w", poolingLayer.second->params.get<int>("kernel_w"));
1153                 module->params.set("pool_stride_h", poolingLayer.second->params.get<int>("stride_h"));
1154                 module->params.set("pool_stride_w", poolingLayer.second->params.get<int>("stride_w"));
1155                 module->params.set("pool_pad_h", poolingLayer.second->params.get<int>("pad_h"));
1156                 module->params.set("pool_pad_w", poolingLayer.second->params.get<int>("pad_w"));
1157
1158                 String name = generateLayerName("torchMaxUnpooling");
1159                 int id = net.addLayer(name, "MaxUnpool", module->params);
1160                 net.connect(prevLayerId, 0, id, 0);
1161
1162                 CV_Assert(poolingLayer.first != -1);
1163                 net.connect(poolingLayer.first, 1, id, 1);
1164
1165                 return id;
1166             }
1167         }
1168
1169         CV_Error(Error::StsInternal, "Unexpected torch container: " + module->thName);
1170         return -1;
1171     }
1172
1173     void populateNet(Net net_)
1174     {
1175         CV_TRACE_FUNCTION();
1176
1177         CV_Assert(rootModule == NULL);
1178         cv::Ptr<Module> rootModule_ = cv::makePtr<Module>("Sequential");
1179         rootModule = rootModule_.get();
1180         curModule = rootModule;
1181
1182         THFile_seek(file, 0);
1183         readObject();
1184
1185         net = net_;
1186         std::vector<std::pair<int, Module*> > addedModules;
1187         fill(rootModule, addedModules);
1188
1189         rootModule = NULL;
1190         curModule = NULL;
1191     }
1192 };
1193
1194 Ptr<Importer> createTorchImporter(const String &filename, bool isBinary)
1195 {
1196     return Ptr<Importer>(new TorchImporter(filename, isBinary));
1197 }
1198
1199
1200 Mat readTorchBlob(const String &filename, bool isBinary)
1201 {
1202     Ptr<TorchImporter> importer(new TorchImporter(filename, isBinary));
1203     importer->readObject();
1204     CV_Assert(importer->tensors.size() == 1);
1205
1206     return importer->tensors.begin()->second;
1207 }
1208
1209 Net readNetFromTorch(const String &model, bool isBinary)
1210 {
1211     CV_TRACE_FUNCTION();
1212
1213     TorchImporter importer(model, isBinary);
1214     Net net;
1215     importer.populateNet(net);
1216     return net;
1217 }
1218
1219 #else
1220
1221 Ptr<Importer> createTorchImporter(const String&, bool)
1222 {
1223     CV_Error(Error::StsNotImplemented, "Torch importer is disabled in current build");
1224     return Ptr<Importer>();
1225 }
1226
1227 Mat readTorchBlob(const String&, bool)
1228 {
1229     CV_Error(Error::StsNotImplemented, "Torch importer is disabled in current build");
1230     return Mat();
1231 }
1232
1233 Net readNetFromTorch(const String &model, bool isBinary)
1234 {
1235     CV_Error(Error::StsNotImplemented, "Torch importer is disabled in current build");
1236     return Net();
1237 }
1238
1239 #endif //defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
1240
1241 CV__DNN_EXPERIMENTAL_NS_END
1242 }} // namespace