HAL for canny
[platform/upstream/opencv.git] / modules / dnn / src / torch / torch_importer.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of the copyright holders may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 #include "../precomp.hpp"
43 #include <limits>
44 #include <set>
45 #include <map>
46 #include <algorithm>
47 #include <iostream>
48 #include <fstream>
49
50 #if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
51 #include "THDiskFile.h"
52 #endif
53
54 namespace cv {
55 namespace dnn {
56 CV__DNN_EXPERIMENTAL_NS_BEGIN
57
58 #if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
59 using namespace TH;
60
61 //#ifdef NDEBUG
62 static bool dbgPrint = false;
63 //#else
64 //static bool dbgPrint = true;
65 //#endif
66
67 enum LuaType
68 {
69     TYPE_NIL      = 0,
70     TYPE_NUMBER   = 1,
71     TYPE_STRING   = 2,
72     TYPE_TABLE    = 3,
73     TYPE_TORCH    = 4,
74     TYPE_BOOLEAN  = 5,
75     TYPE_FUNCTION = 6,
76     TYPE_RECUR_FUNCTION = 8,
77     LEGACY_TYPE_RECUR_FUNCTION = 7
78 };
79
80 template<typename T>
81 static String toString(const T &v)
82 {
83     std::ostringstream ss;
84     ss << v;
85     return ss.str();
86 }
87
88 static inline bool startsWith(const String &str, const char *substr)
89 {
90     return str.find(substr) == 0;
91 }
92
93 static inline bool endsWith(const String &str, const char *substr)
94 {
95     return str.rfind(substr) == str.length() - strlen(substr);
96 }
97
98 struct TorchImporter : public ::cv::dnn::Importer
99 {
100     typedef std::map<String, std::pair<int, Mat> > TensorsMap;
101     Net net;
102
103     cv::Ptr<THFile> file;
104     std::set<int> readedIndexes;
105     std::map<int, Mat> storages;
106     std::map<int, Mat> tensors;
107
108     struct Module
109     {
110         String thName, apiType;
111         dnn::LayerParams params;
112         std::vector<cv::Ptr<Module> > modules;
113
114         Module(const String &_thName, const String &_apiType = String())
115             : thName(_thName), apiType(_apiType) {}
116     };
117
118     Module *rootModule;
119     Module *curModule;
120     int moduleCounter;
121
122     TorchImporter(String filename, bool isBinary)
123     {
124         CV_TRACE_FUNCTION();
125
126         rootModule = curModule = NULL;
127         moduleCounter = 0;
128
129         file = cv::Ptr<THFile>(THDiskFile_new(filename, "r", 0), THFile_free);
130         CV_Assert(file && THFile_isOpened(file));
131
132         if (isBinary)
133             THFile_binary(file);
134         else
135             THFile_ascii(file);
136     }
137
138     /* Simple readers */
139
140     inline int readInt()
141     {
142         return THFile_readIntScalar(file);
143     }
144
145     inline long readLong()
146     {
147         return THFile_readLongScalar(file);
148     }
149
150     inline bool readBool()
151     {
152         return readInt() != 0;
153     }
154
155     inline double readDouble()
156     {
157         return THFile_readDoubleScalar(file);
158     }
159
160     inline String readString()
161     {
162         int size = THFile_readIntScalar(file);
163         String str(size, '\0');
164         THFile_readCharRaw(file, const_cast<char*>(str.c_str()), size);
165         return str;
166     }
167
168     inline String readTorchClassName()
169     {
170         String version = readString();
171         return startsWith(version, "V ") ? readString() : version;
172     }
173
174     inline void readFunction()
175     {
176         readString();
177         readObject();
178     }
179
180     void readTable(int index = -1)
181     {
182         index = (index < 0) ? readInt() : index;
183
184         if (readedIndexes.count(index))
185             return;
186
187         readedIndexes.insert(index);
188
189         int size = readInt();
190
191         for (int i = 0; i < size; i++)
192         {
193             readObject(); //key
194             readObject(); //value
195         }
196     }
197
198     /* Special readers */
199
200     static inline int parseTorchType(const String &str, const char *suffix, const char *prefix = "torch.")
201     {
202         if (startsWith(str, prefix) && endsWith(str, suffix))
203         {
204            String typeStr = str.substr(strlen(prefix), str.length() - strlen(prefix) - strlen(suffix));
205
206            if (typeStr == "Double")
207                return CV_64F;
208            else if (typeStr == "Float" || typeStr == "Cuda")
209                return CV_32F;
210            else if (typeStr == "Byte")
211                return CV_8U;
212            else if (typeStr == "Char")
213                return CV_8S;
214            else if (typeStr == "Short")
215                return CV_16S;
216            else if (typeStr == "Int")
217                return CV_32S;
218            else if (typeStr == "Long") //Carefully! CV_64S type coded as CV_USRTYPE1
219                return CV_USRTYPE1;
220            else
221                CV_Error(Error::StsNotImplemented, "Unknown type \"" + typeStr + "\" of torch class \"" + str + "\"");
222         }
223
224         return -1;
225     }
226
227     static int parseTensorType(const String &className)
228     {
229         return parseTorchType(className, "Tensor");
230     }
231
232     static int parseStorageType(const String &className)
233     {
234         return parseTorchType(className, "Storage");
235     }
236
237     void readTorchStorage(int index, int type = -1)
238     {
239         long size = readLong();
240         Mat storageMat(1, size, (type != CV_USRTYPE1) ? type : CV_64F); //handle LongStorage as CV_64F Mat
241
242         switch (type)
243         {
244         case CV_32F:
245             THFile_readFloatRaw(file, (float*)storageMat.data, size);
246             break;
247         case CV_64F:
248             THFile_readDoubleRaw(file, (double*)storageMat.data, size);
249             break;
250         case CV_8S:
251         case CV_8U:
252             THFile_readByteRaw(file, (uchar*)storageMat.data, size);
253             break;
254         case CV_16S:
255         case CV_16U:
256             THFile_readShortRaw(file, (short*)storageMat.data, size);
257             break;
258         case CV_32S:
259             THFile_readIntRaw(file, (int*)storageMat.data, size);
260             break;
261         case CV_USRTYPE1:
262         {
263             double *buf = storageMat.ptr<double>();
264             THFile_readLongRaw(file, (int64*)buf, size);
265
266             for (size_t i = (size_t)size; i-- > 0; )
267                 buf[i] = ((int64*)buf)[i];
268         }
269             break;
270         default:
271             CV_Error(Error::StsInternal, "");
272             break;
273         }
274
275         storages.insert(std::make_pair(index, storageMat));
276     }
277
278     void readTorchTable(Dict &scalarParams, TensorsMap &tensorParams)
279     {
280         int luaType = readInt();
281         int index = readInt();
282
283         CV_Assert(luaType == TYPE_TABLE && readedIndexes.count(index) == 0);
284         readedIndexes.insert(index);
285
286         long fpos;
287         int numPairs = readInt();
288
289         for (int i = 0; i < numPairs; i++)
290         {
291             fpos = THFile_position(file);
292             int ktype = readInt();
293
294             if (ktype != TYPE_STRING) //skip non-string fileds
295             {
296                 THFile_seek(file, fpos);
297                 readObject(); //key
298                 readObject(); //value
299                 continue;
300             }
301
302             String key = readString();
303             if (dbgPrint)
304                 std::cout << i << "th key: " << key << "\n";
305
306             fpos = THFile_position(file);
307             int vtype = readInt();
308
309             if (vtype == TYPE_TORCH)
310             {
311                 int index = readInt();
312                 int numModules = curModule->modules.size();
313                 readTorchObject(index);
314
315                 if (tensors.count(index)) //tensor was readed
316                 {
317                     tensorParams.insert(std::make_pair(key, std::make_pair(index, tensors[index])));
318                 }
319                 else if (storages.count(index)) //storage was readed
320                 {
321                     Mat &matStorage = storages[index];
322                     Mat matCasted;
323                     matStorage.convertTo(matCasted, CV_64F);
324
325                     DictValue scalar = DictValue::arrayReal(matCasted.ptr<double>(), matCasted.total());
326                     scalarParams.set(key, scalar);
327                 }
328                 else
329                 {
330                     // Only tensors and scalars are supported for table fields.
331                     // i.e. nn.Inception has field `transfer` which is an
332                     // activation layer. So we remove added modules as readTorchObject(index).
333                     while (curModule->modules.size() > numModules)
334                         curModule->modules.pop_back();
335                 }
336             }
337             else if (vtype == TYPE_NUMBER)
338             {
339                 scalarParams.set(key, readDouble());
340             }
341             else if (vtype == TYPE_STRING)
342             {
343                 scalarParams.set(key, readString());
344             }
345             else if (vtype == TYPE_BOOLEAN)
346             {
347                 scalarParams.set(key, readBool());
348             }
349             else
350             {
351                 THFile_seek(file, fpos);
352                 readObject();
353             }
354         }
355
356         //Debug output
357         if (dbgPrint)
358         {
359             std::cout << "scalarParams:\n";
360             std::cout << scalarParams;
361
362             std::cout << "#" << tensorParams.size() << " tensorParams:\n";
363             std::map<String,std::pair<int, Mat> >::const_iterator it;
364             for (it = tensorParams.begin(); it != tensorParams.end(); it++)
365                 std::cout << it->first << ": Tensor " << it->second.second.size << "\n";
366         }
367     }
368
369     void readTorchTensor(int indexTensor, int typeTensor)
370     {
371         int ndims = readInt();
372         AutoBuffer<int64, 4> sizes(ndims);
373         AutoBuffer<int64, 4> steps(ndims);
374         THFile_readLongRaw(file, sizes, ndims);
375         THFile_readLongRaw(file, steps, ndims);
376         long offset = readLong() - 1;
377
378         //read Storage
379         int typeidx = readInt();
380         CV_Assert(typeidx == TYPE_TORCH || (typeidx == TYPE_NIL && ndims == 0));
381
382         if (typeidx == TYPE_NIL)
383         {
384             tensors.insert(std::make_pair(indexTensor, Mat()));
385             return;
386         }
387
388         int indexStorage = readInt();
389         if (readedIndexes.count(indexStorage) == 0)
390         {
391             String className = readTorchClassName();
392             int typeStorage = parseStorageType(className);
393             CV_Assert(typeStorage >= 0 && typeTensor == typeStorage);
394             readTorchStorage(indexStorage, typeStorage);
395             typeTensor = storages[indexStorage].type();
396             readedIndexes.insert(indexStorage);
397         }
398
399         //small check
400         size_t requireElems = (size_t)offset + (size_t)steps[0] * (size_t)sizes[0];
401         size_t storageElems = storages[indexStorage].total();
402         if (requireElems > storageElems)
403             CV_Error(Error::StsBadSize, "Storage has insufficent number of elemements for requested Tensor");
404
405         //convert sizes
406         AutoBuffer<int, 4> isizes(ndims);
407         AutoBuffer<size_t, 4> ssteps(ndims);
408         for (int i = ndims - 1; i >= 0; i--)
409         {
410             isizes[i] = (int)sizes[i];
411             ssteps[i] = (size_t)steps[i] * CV_ELEM_SIZE(typeTensor);
412         }
413
414         //allocate Blob
415         Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), (size_t*)ssteps);
416         int dstType = CV_32F;
417
418         Mat blob;
419         srcMat.convertTo(blob, dstType);
420
421         tensors.insert(std::make_pair(indexTensor, blob));
422     }
423
424     static bool isNNClass(const String &className, String &nnName)
425     {
426         const char *prefixes[] = {"nn.", "cunn.", "cudnn.", "fbcunn.", NULL};
427
428         for (int i = 0; prefixes[i]; i++)
429         {
430             if (startsWith(className, prefixes[i]))
431             {
432                 nnName = className.substr(strlen(prefixes[i]));
433                 return true;
434             }
435         }
436
437         return false;
438     }
439
440     static void convertTorchKernelsParams(const Dict &torchParams, cv::dnn::LayerParams &layerParams)
441     {
442         layerParams.set("kernel_h", torchParams.get<int>("kH"));
443         layerParams.set("kernel_w", torchParams.get<int>("kW"));
444         layerParams.set("stride_h", torchParams.get<int>("dH"));
445         layerParams.set("stride_w", torchParams.get<int>("dW"));
446         layerParams.set("pad_h", torchParams.get<int>("padH", 0));
447         layerParams.set("pad_w", torchParams.get<int>("padW", 0));
448     }
449
450     void readTorchObject(int index)
451     {
452         if(readedIndexes.count(index))
453             return;
454
455         String className = readTorchClassName();
456         String nnName;
457
458         if (dbgPrint)
459             std::cout << "Class: " << className << std::endl;
460
461         int type;
462         if ( (type = parseTensorType(className)) >= 0 ) //is Tensor
463         {
464             readTorchTensor(index, type);
465         }
466         else if ( (type = parseStorageType(className)) >= 0 ) //is Storage
467         {
468             readTorchStorage(index, type);
469         }
470         else if (isNNClass(className, nnName))
471         {
472             Dict scalarParams;
473             TensorsMap tensorParams;
474
475             cv::Ptr<Module> newModule(new Module(nnName));
476             cv::dnn::LayerParams &layerParams = newModule->params;
477
478             layerParams.set("torch_index", index);
479
480             if (nnName == "Sequential" || nnName == "Parallel" ||
481                 nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable" ||
482                 nnName == "DepthConcat" || nnName == "Inception")
483             {
484                 Module *parentModule = curModule;
485                 curModule->modules.push_back(newModule);
486                 curModule = newModule;
487                 readTorchTable(scalarParams, tensorParams);
488                 curModule = parentModule;
489
490                 if (nnName == "Parallel")
491                 {
492                     layerParams.set("inputDimension", scalarParams.get<int>("inputDimension"));
493                     layerParams.set("outputDimension", scalarParams.get<int>("outputDimension"));
494                 }
495                 if (nnName == "Concat")
496                 {
497                     layerParams.set("dimension", scalarParams.get<int>("dimension"));
498                 }
499                 if (nnName == "JoinTable")
500                 {
501                     layerParams.set("dimension", scalarParams.get<int>("dimension"));
502                 }
503                 if (nnName == "DepthConcat")
504                 {
505                     layerParams.set("dimension", scalarParams.get<int>("dimension"));
506                 }
507             }
508             else if (nnName == "SpatialConvolution" || nnName == "SpatialConvolutionMM")
509             {
510                 newModule->apiType = "Convolution";
511                 readTorchTable(scalarParams, tensorParams);
512
513                 CV_Assert(tensorParams.count("weight"));
514                 layerParams.blobs.push_back(tensorParams["weight"].second);
515
516                 bool bias = tensorParams.count("bias") != 0;
517                 layerParams.set("bias_term", bias);
518                 if (bias)
519                     layerParams.blobs.push_back(tensorParams["bias"].second);
520
521                 layerParams.set("num_output", scalarParams.get<int>("nOutputPlane"));
522                 convertTorchKernelsParams(scalarParams, layerParams);
523
524                 if (nnName == "SpatialConvolutionMM")
525                 {
526                     // Split weights from a [ outCh x inCh*kH*kW ] 2D matrix
527                     // onto a 4D [ outCh x inCh x kH x kW ] blob.
528                     CV_Assert(layerParams.blobs[0].dims == 2);
529                     const int kernel = layerParams.blobs[0].size[1];  // inCh * kH * kW
530                     MatShape kernelShape(4);
531                     kernelShape[0] = layerParams.blobs[0].size[0];  // outCh.
532                     kernelShape[2] = layerParams.get<int>("kernel_h");
533                     kernelShape[3] = layerParams.get<int>("kernel_w");
534                     kernelShape[1] = kernel / (kernelShape[2] * kernelShape[3]);  // inCh.
535                     layerParams.blobs[0] = layerParams.blobs[0].reshape(1, kernelShape);
536                 }
537                 curModule->modules.push_back(newModule);
538             }
539             else if (nnName == "SpatialLPPooling")
540             {
541                 // nn.Sequential {
542                 //     [input -> (1) -> (2) -> output]
543                 //     (1): nn.Sequential {
544                 //       [input -> (1) -> (2) -> (3) -> (4) -> output]
545                 //       (1): nn.Power
546                 //       (2): nn.SpatialAveragePooling(...)
547                 //       (3): nn.MulConstant
548                 //       (4): nn.Power
549                 //     }
550                 //     (2): nn.Sigmoid
551                 // }
552                 // nn.SpatialLPPooling is just a table so we skip it.
553                 readTorchTable(scalarParams, tensorParams);
554             }
555             else if (nnName == "SpatialMaxPooling" || nnName == "SpatialAveragePooling")
556             {
557                 newModule->apiType = "Pooling";
558                 readTorchTable(scalarParams, tensorParams);
559
560                 if (nnName == "SpatialMaxPooling") {
561                     layerParams.set("pool", "MAX");
562                     layerParams.set("indices_blob_id", tensorParams["indices"].first);
563                 }
564                 if (nnName == "SpatialAveragePooling")
565                     layerParams.set("pool", "AVE");
566                 convertTorchKernelsParams(scalarParams, layerParams);
567
568                 CV_Assert(scalarParams.has("ceil_mode"));
569                 layerParams.set("ceil_mode", scalarParams.get<bool>("ceil_mode"));
570
571                 curModule->modules.push_back(newModule);
572             }
573             else if (nnName == "Linear")
574             {
575                 newModule->apiType = "InnerProduct";
576                 readTorchTable(scalarParams, tensorParams);
577
578                 CV_Assert(tensorParams.count("weight"));
579                 Mat weightBlob = tensorParams["weight"].second;
580                 layerParams.blobs.push_back(weightBlob);
581
582                 bool bias = tensorParams.count("bias") != 0;
583                 if (bias)
584                     layerParams.blobs.push_back(tensorParams["bias"].second);
585                 layerParams.set("bias_term", bias);
586
587                 layerParams.set("num_output", weightBlob.size[0]);
588                 curModule->modules.push_back(newModule);
589             }
590             else if (nnName == "Reshape" || nnName == "View")
591             {
592                 newModule->apiType = "Reshape";
593
594                 readTorchTable(scalarParams, tensorParams);
595                 CV_Assert(scalarParams.has("size"));
596
597                 DictValue dimParam = scalarParams.get("size");
598                 layerParams.set("dim", dimParam);
599
600                 if (scalarParams.has("batchMode") && scalarParams.get<bool>("batchMode"))
601                     layerParams.set("axis", 1);
602
603                 curModule->modules.push_back(newModule);
604             }
605             else if (nnName == "ReLU")
606             {
607                 curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "ReLU")));
608                 readObject();
609             }
610             else if (nnName == "Tanh")
611             {
612                 curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "TanH")));
613                 readObject();
614             }
615             else if (nnName == "Sigmoid")
616             {
617                 curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
618                 readObject();
619             }
620             else if (nnName == "SpatialBatchNormalization" || nnName == "InstanceNormalization" ||
621                      nnName == "BatchNormalization")
622             {
623                 newModule->apiType = "BatchNorm";
624                 readTorchTable(scalarParams, tensorParams);
625
626                 CV_Assert(scalarParams.has("eps"));
627                 float eps = float(scalarParams.get<double>("eps"));
628                 layerParams.set("eps", eps);
629
630                 if (tensorParams.count("running_mean"))
631                 {
632                     layerParams.blobs.push_back(tensorParams["running_mean"].second);
633                 }
634                 else
635                 {
636                     CV_Assert(scalarParams.has("nOutput"));
637                     layerParams.blobs.push_back(Mat::zeros(1, scalarParams.get<int>("nOutput"), CV_32F));
638                 }
639
640                 if (tensorParams.count("running_var"))
641                 {
642                     layerParams.blobs.push_back(tensorParams["running_var"].second);
643                 }
644                 else if (tensorParams.count("running_std"))
645                 {
646                     layerParams.blobs.push_back(tensorParams["running_std"].second);
647                     pow(layerParams.blobs.back(), -2, layerParams.blobs.back());
648                     subtract(layerParams.blobs.back(), eps, layerParams.blobs.back());
649                 }
650                 else
651                 {
652                     CV_Assert(scalarParams.has("nOutput"));
653                     layerParams.blobs.push_back(Mat::ones(1, scalarParams.get<int>("nOutput"), CV_32F));
654                 }
655
656                 if (tensorParams.count("weight"))
657                 {
658                     layerParams.set("has_weight", true);
659                     layerParams.blobs.push_back(tensorParams["weight"].second);
660                 }
661
662                 if (tensorParams.count("bias"))
663                 {
664                     layerParams.set("has_bias", true);
665                     layerParams.blobs.push_back(tensorParams["bias"].second);
666                 }
667
668                 if (nnName == "InstanceNormalization")
669                 {
670                     cv::Ptr<Module> mvnModule(new Module(nnName));
671                     mvnModule->apiType = "MVN";
672                     curModule->modules.push_back(mvnModule);
673
674                     layerParams.blobs[0].setTo(0);  // batch norm's mean
675                     layerParams.blobs[1].setTo(1);  // batch norm's std
676                 }
677
678                 curModule->modules.push_back(newModule);
679             }
680             else if (nnName == "PReLU")
681             {
682                 readTorchTable(scalarParams, tensorParams);
683
684                 CV_Assert(tensorParams.count("weight"));
685
686                 size_t outputChannels = static_cast<int>(scalarParams.get<double>("nOutputPlane"));
687                 if (outputChannels) {
688
689                     CV_Assert(tensorParams["weight"].second.total() == outputChannels);
690                     layerParams.blobs.push_back(tensorParams["weight"].second);
691
692                     newModule->apiType = "ChannelsPReLU";
693                 }
694                 else {
695                     CV_Assert(tensorParams["weight"].second.total() == 1);
696                     float negative_slope = *tensorParams["weight"].second.ptr<float>();
697                     layerParams.set("negative_slope", negative_slope);
698
699                     newModule->apiType = "ReLU";
700                 }
701
702                 curModule->modules.push_back(newModule);
703             }
704             else if (nnName == "SpatialDropout" || nnName == "Dropout")
705             {
706                 readTorchTable(scalarParams, tensorParams);
707                 CV_Assert(scalarParams.has("p"));
708
709                 if (scalarParams.has("v2") && scalarParams.get<bool>("v2"))
710                 {
711                     newModule->apiType = "Identity";
712                 }
713                 else
714                 {
715                     float scale = 1 -  scalarParams.get<double>("p");
716
717                     CV_Assert(scale > 0);
718
719                     newModule->apiType = "Power";
720                     layerParams.set("scale", scale);
721                 }
722                 curModule->modules.push_back(newModule);
723             }
724             // TotalVariation layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
725             // It's a loss function that has an Identity forward.
726             else if (nnName == "Identity" || nnName == "TotalVariation")
727             {
728                 readTorchTable(scalarParams, tensorParams);
729                 newModule->apiType = "Identity";
730                 curModule->modules.push_back(newModule);
731             }
732             else if (nnName == "Normalize")
733             {
734                 readTorchTable(scalarParams, tensorParams);
735                 CV_Assert(scalarParams.has("p"));
736
737                 layerParams.set("p", scalarParams.get<float>("p"));
738                 if (scalarParams.has("eps"))
739                     layerParams.set("eps", scalarParams.get<float>("eps"));
740
741                 newModule->apiType = "Normalize";
742                 curModule->modules.push_back(newModule);
743             }
744             else if (nnName == "Padding")
745             {
746                 readTorchTable(scalarParams, tensorParams);
747                 newModule->apiType = "Padding";
748
749                 CV_Assert(scalarParams.has("pad") && scalarParams.has("dim"));
750                 if (scalarParams.has("index") && scalarParams.get<int>("index") != 1)
751                     CV_Error(Error::StsNotImplemented, "Padding with offset is not implemented");
752
753                 if (scalarParams.has("value"))
754                     layerParams.set("value", scalarParams.get<float>("value"));
755
756                 if (scalarParams.has("nInputDim"))
757                     layerParams.set("input_dims", scalarParams.get<int>("nInputDim"));
758
759                 int dim = scalarParams.get<int>("dim") - 1;  // In Lua we start from 1.
760                 int pad = scalarParams.get<int>("pad");
761
762                 std::vector<int> paddings((dim + 1) * 2, 0);
763                 if (pad > 0)
764                     paddings[dim * 2 + 1] = pad;  // Pad after (right).
765                 else
766                     paddings[dim * 2] = -pad;  // Pad before (left).
767                 layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
768
769                 curModule->modules.push_back(newModule);
770             }
771             else if (nnName == "CAddTable")
772             {
773                 curModule->modules.push_back(newModule);
774                 readObject();
775             }
776             else if (nnName == "SpatialDilatedConvolution")
777             {
778                 readTorchTable(scalarParams, tensorParams);
779                 newModule->apiType = "Convolution";
780                 CV_Assert(scalarParams.has("padW") &&
781                           scalarParams.has("padH")&&
782                           scalarParams.has("dW")&&
783                           scalarParams.has("dH")&&
784                           scalarParams.has("dilationW")&&
785                           scalarParams.has("dilationH")&&
786                           scalarParams.has("kW")&&
787                           scalarParams.has("kH")&&
788                           scalarParams.has("nOutputPlane"));
789
790                 layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
791                 layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
792                 layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
793                 layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
794                 layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
795                 layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
796                 layerParams.set("dilation_w", static_cast<int>(scalarParams.get<double>("dilationW")));
797                 layerParams.set("dilation_h", static_cast<int>(scalarParams.get<double>("dilationH")));
798                 layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
799
800                 layerParams.blobs.push_back(tensorParams["weight"].second);
801
802                 bool bias = tensorParams.count("bias");
803                 layerParams.set("bias_term", bias);
804                 if (bias)
805                     layerParams.blobs.push_back(tensorParams["bias"].second);
806
807                 curModule->modules.push_back(newModule);
808             }
809             else if (nnName == "SpatialFullConvolution")
810             {
811                 readTorchTable(scalarParams, tensorParams);
812                 newModule->apiType = "Deconvolution";
813                 CV_Assert(scalarParams.has("padW") &&
814                           scalarParams.has("padH")&&
815                           scalarParams.has("dW")&&
816                           scalarParams.has("dH")&&
817                           scalarParams.has("adjW")&&
818                           scalarParams.has("adjH")&&
819                           scalarParams.has("kW")&&
820                           scalarParams.has("kH")&&
821                           scalarParams.has("nOutputPlane"));
822
823                 layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
824                 layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
825                 layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
826                 layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
827                 layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
828                 layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
829                 layerParams.set("adj_w", static_cast<int>(scalarParams.get<double>("adjW")));
830                 layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
831                 layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
832
833                 layerParams.blobs.push_back(tensorParams["weight"].second);
834
835                 bool bias = tensorParams.count("bias");
836                 layerParams.set("bias_term", bias);
837                 if (bias)
838                     layerParams.blobs.push_back(tensorParams["bias"].second);
839
840                 curModule->modules.push_back(newModule);
841             }
842             else if (nnName == "SpatialMaxUnpooling")
843             {
844                 readTorchTable(scalarParams, tensorParams);
845                 CV_Assert(tensorParams.count("indices"));
846
847                 layerParams.set("indices_blob_id", tensorParams["indices"].first);
848                 curModule->modules.push_back(newModule);
849             }
850             else if (nnName == "SoftMax")
851             {
852                 newModule->apiType = "SoftMax";
853                 curModule->modules.push_back(newModule);
854             }
855             else if (nnName == "LogSoftMax")
856             {
857                 newModule->apiType = "SoftMax";
858                 layerParams.set("log_softmax", true);
859                 curModule->modules.push_back(newModule);
860             }
861             else if (nnName == "SpatialCrossMapLRN")
862             {
863                 newModule->apiType = "LRN";
864                 readTorchTable(scalarParams, tensorParams);
865
866                 CV_Assert(scalarParams.has("alpha"));
867                 CV_Assert(scalarParams.has("beta"));
868                 CV_Assert(scalarParams.has("k"));
869                 CV_Assert(scalarParams.has("size"));
870
871                 layerParams.set("norm_region", "ACROSS_CHANNELS");
872                 layerParams.set("alpha", scalarParams.get<float>("alpha"));
873                 layerParams.set("beta", scalarParams.get<float>("beta"));
874                 layerParams.set("bias", scalarParams.get<float>("k"));
875                 layerParams.set("local_size", scalarParams.get<int>("size"));
876                 layerParams.set("norm_by_size", true);
877
878                 curModule->modules.push_back(newModule);
879             }
880             else if (nnName == "Square" || nnName == "Sqrt" || nnName == "Power")
881             {
882                 readTorchTable(scalarParams, tensorParams);
883
884                 float power;
885                 if (nnName == "Square") power = 2.0f;
886                 else if (nnName == "Sqrt") power = 0.5f;
887                 else if (nnName == "Power") power = scalarParams.get<float>("pow", 1.0f);
888
889                 newModule->apiType = "Power";
890                 layerParams.set("power", power);
891                 curModule->modules.push_back(newModule);
892             }
893             else if (nnName == "MulConstant")
894             {
895                 readTorchTable(scalarParams, tensorParams);
896                 CV_Assert(scalarParams.has("constant_scalar"));
897                 newModule->apiType = "Power";
898                 layerParams.set("scale", scalarParams.get<float>("constant_scalar"));
899                 curModule->modules.push_back(newModule);
900             }
901             else if (nnName == "SpatialZeroPadding" || nnName == "SpatialReflectionPadding")
902             {
903                 readTorchTable(scalarParams, tensorParams);
904                 CV_Assert(scalarParams.has("pad_l"), scalarParams.has("pad_r"),
905                           scalarParams.has("pad_t"), scalarParams.has("pad_b"));
906                 int padTop = scalarParams.get<int>("pad_t");
907                 int padLeft = scalarParams.get<int>("pad_l");
908                 int padRight = scalarParams.get<int>("pad_r");
909                 int padBottom = scalarParams.get<int>("pad_b");
910                 if (padTop < 0 || padLeft < 0 || padRight < 0 || padBottom < 0)
911                     CV_Error(Error::StsNotImplemented, "SpatialZeroPadding in cropping mode is not implemented");
912
913                 newModule->apiType = "Padding";
914
915                 // Torch's SpatialZeroPadding works with 3- or 4-dimensional input.
916                 // So we add parameter input_dims=3 to ignore batch dimension if it will be.
917                 std::vector<int> paddings(6, 0);  // CHW
918                 paddings[2] = padTop;
919                 paddings[3] = padBottom;
920                 paddings[4] = padLeft;
921                 paddings[5] = padRight;
922                 layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
923                 layerParams.set("input_dims", 3);
924
925                 if (nnName == "SpatialReflectionPadding")
926                     layerParams.set("type", "reflect");
927
928                 curModule->modules.push_back(newModule);
929             }
930             else if (nnName == "ShaveImage")
931             {
932                 // ShaveImage layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
933                 // It may be mapped to Slice layer.
934                 readTorchTable(scalarParams, tensorParams);
935                 CV_Assert(scalarParams.has("size"));
936                 int size = scalarParams.get<int>("size");
937
938                 int begins[] = {0, 0, size, size};
939                 int ends[] = {-1, -1, -size - 1, -size - 1};
940
941                 newModule->apiType = "Slice";
942                 layerParams.set("begin", DictValue::arrayInt<int*>(&begins[0], 4));
943                 layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
944                 curModule->modules.push_back(newModule);
945             }
946             else
947             {
948                 CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\"");
949             }
950         }
951         else
952         {
953             CV_Error(Error::StsNotImplemented, "Unsupported Torch class \"" + className + "\"");
954         }
955
956         readedIndexes.insert(index);
957     }
958
959     void readObject()
960     {
961         int typeidx = readInt();
962
963         if (typeidx == TYPE_TORCH)
964         {
965             int index = readInt();
966             readTorchObject(index);
967             readedIndexes.insert(index);
968         }
969         else if (typeidx == TYPE_NIL)
970             return;
971         else if (typeidx == TYPE_NUMBER)
972             readDouble();
973         else if (typeidx == TYPE_BOOLEAN)
974             readBool();
975         else if (typeidx == TYPE_STRING)
976             readString();
977         else if (typeidx == TYPE_TABLE)
978             readTable();
979         else
980             CV_Error(Error::StsNotImplemented, "Unsupported Lua type");
981     }
982
983     inline String generateLayerName(const String &label = String())
984     {
985         return "l" + toString(++this->moduleCounter) + "_" + label;
986     }
987
988     int fill(Module *module, std::vector<std::pair<int, Module*> >& addedModules, int prevLayerId = 0, int prevOutNum = 0)
989     {
990         if (module == NULL)
991             return prevLayerId;
992
993         if (module->apiType.length())
994         {
995             int newLayerId = net.addLayer(generateLayerName(module->apiType), module->apiType, module->params);
996             net.connect(prevLayerId, prevOutNum, newLayerId, 0);
997             addedModules.push_back(std::make_pair(newLayerId, module));
998             return newLayerId;
999         }
1000         else
1001         {
1002             if (module->thName == "Sequential" || module->thName == "Inception")
1003             {
1004                 for (size_t i = 0; i < module->modules.size(); i++)
1005                 {
1006                     prevLayerId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1007                     prevOutNum = 0;
1008                 }
1009                 return prevLayerId;
1010             }
1011             else if (module->thName == "Concat")
1012             {
1013                 int newId, mergeId;
1014                 LayerParams mergeParams;
1015                 mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1016
1017                 std::vector<int> branchIds;
1018                 for (int i = 0; i < (int)module->modules.size(); i++)
1019                 {
1020                     newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1021                     branchIds.push_back(newId);
1022                 }
1023
1024                 moduleCounter += 1;  // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.
1025                 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1026
1027                 for (int i = 0; i < branchIds.size(); i++)
1028                 {
1029                     net.connect(branchIds[i], 0, mergeId, i);
1030                 }
1031
1032                 addedModules.push_back(std::make_pair(mergeId, module));
1033                 return mergeId;
1034             }
1035             else if (module->thName == "DepthConcat")
1036             {
1037                 int newId, mergeId;
1038                 LayerParams mergeParams;
1039                 mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1040                 mergeParams.set("padding", true);
1041
1042                 std::vector<int> branchIds;
1043                 for (int i = 0; i < (int)module->modules.size(); i++)
1044                 {
1045                     newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1046                     branchIds.push_back(newId);
1047                 }
1048
1049                 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1050
1051                 for (int i = 0; i < branchIds.size(); i++)
1052                 {
1053                     net.connect(branchIds[i], 0, mergeId, i);
1054                 }
1055
1056                 addedModules.push_back(std::make_pair(mergeId, module));
1057                 return mergeId;
1058             }
1059             else if (module->thName == "Parallel")
1060             {
1061                 int newId, splitId, mergeId, reshapeId;
1062
1063                 LayerParams splitParams, mergeParams, reshapeParams;
1064                 splitParams.set("axis", module->params.get<int>("inputDimension") - 1);
1065                 mergeParams.set("axis", module->params.get<int>("outputDimension") - 1);
1066                 reshapeParams.set("axis", splitParams.get<int>("axis"));
1067                 reshapeParams.set("num_axes", 1);
1068
1069                 splitId = net.addLayer(generateLayerName("torchSplit"), "Slice", splitParams);
1070                 reshapeId = net.addLayer(generateLayerName("torchReshape"), "Reshape", reshapeParams);
1071                 net.connect(prevLayerId, prevOutNum, splitId, 0);
1072
1073                 std::vector<int> branchIds;
1074                 for (int i = 0; i < (int)module->modules.size(); i++)
1075                 {
1076                     net.connect(splitId, i, reshapeId, i);
1077                     newId = fill(module->modules[i], addedModules, reshapeId, i);
1078                     branchIds.push_back(newId);
1079                 }
1080
1081                 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1082
1083                 for (int i = 0; i < branchIds.size(); i++)
1084                 {
1085                     net.connect(branchIds[i], 0, mergeId, i);
1086                 }
1087
1088                 addedModules.push_back(std::make_pair(mergeId, module));
1089                 return mergeId;
1090             }
1091             else if (module->thName == "ConcatTable") {
1092                 int newId = -1;
1093                 moduleCounter += 1;  // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.
1094                 for (int i = 0; i < (int)module->modules.size(); i++)
1095                 {
1096                     newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1097                 }
1098                 return newId;
1099             }
1100             else if (module->thName == "JoinTable") {
1101                 std::vector<int> ids = net.getUnconnectedOutLayers();
1102
1103                 int mergeId;
1104                 LayerParams mergeParams;
1105                 mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1106
1107                 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1108                 addedModules.push_back(std::make_pair(mergeId, module));
1109
1110                 for (int i = 0; i < ids.size(); i++)
1111                 {
1112                     net.connect(ids[i], 0, mergeId, i);
1113                 }
1114
1115                 return mergeId;
1116             }
1117             else if (module->thName == "CAddTable") {
1118                 String name = generateLayerName("torchCAddTable");
1119                 std::vector<int> ids = net.getUnconnectedOutLayers();
1120                 LayerParams params;
1121                 params.set("operation", "sum");
1122
1123
1124                 int id = net.addLayer(name, "Eltwise", params);
1125
1126                 for (int i = 0; i < ids.size(); i++)
1127                 {
1128                     net.connect(ids[i], 0, id, i);
1129                 }
1130
1131                 addedModules.push_back(std::make_pair(id, module));
1132                 return id;
1133             }
1134             else if (module->thName == "SpatialMaxUnpooling") {
1135                 CV_Assert(module->params.has("indices_blob_id"));
1136                 int indicesBlobId = module->params.get<int>("indices_blob_id");
1137                 std::pair<int, Module*> poolingLayer;
1138                 poolingLayer.first = -1;
1139
1140                 for(int i = 0; i < addedModules.size(); i++)
1141                 {
1142                     if (addedModules[i].second->apiType == "Pooling" &&
1143                         addedModules[i].second->params.has("indices_blob_id") &&
1144                         addedModules[i].second->params.get<int>("indices_blob_id") == indicesBlobId)
1145                     {
1146                         poolingLayer = addedModules[i];
1147                         break;
1148                     }
1149                 }
1150
1151                 module->params.set("pool_k_h", poolingLayer.second->params.get<int>("kernel_h"));
1152                 module->params.set("pool_k_w", poolingLayer.second->params.get<int>("kernel_w"));
1153                 module->params.set("pool_stride_h", poolingLayer.second->params.get<int>("stride_h"));
1154                 module->params.set("pool_stride_w", poolingLayer.second->params.get<int>("stride_w"));
1155                 module->params.set("pool_pad_h", poolingLayer.second->params.get<int>("pad_h"));
1156                 module->params.set("pool_pad_w", poolingLayer.second->params.get<int>("pad_w"));
1157
1158                 String name = generateLayerName("torchMaxUnpooling");
1159                 int id = net.addLayer(name, "MaxUnpool", module->params);
1160                 net.connect(prevLayerId, 0, id, 0);
1161
1162                 CV_Assert(poolingLayer.first != -1);
1163                 net.connect(poolingLayer.first, 1, id, 1);
1164
1165                 return id;
1166             }
1167         }
1168
1169         CV_Error(Error::StsInternal, "Unexpected torch container: " + module->thName);
1170         return -1;
1171     }
1172
1173     void populateNet(Net net_)
1174     {
1175         CV_TRACE_FUNCTION();
1176
1177         CV_Assert(rootModule == NULL);
1178         cv::Ptr<Module> rootModule_ = cv::makePtr<Module>("Sequential");
1179         rootModule = rootModule_.get();
1180         curModule = rootModule;
1181
1182         THFile_seek(file, 0);
1183         readObject();
1184
1185         net = net_;
1186         std::vector<std::pair<int, Module*> > addedModules;
1187         fill(rootModule, addedModules);
1188
1189         rootModule = NULL;
1190         curModule = NULL;
1191     }
1192 };
1193
1194 Ptr<Importer> createTorchImporter(const String &filename, bool isBinary)
1195 {
1196     return Ptr<Importer>(new TorchImporter(filename, isBinary));
1197 }
1198
1199
1200 Mat readTorchBlob(const String &filename, bool isBinary)
1201 {
1202     Ptr<TorchImporter> importer(new TorchImporter(filename, isBinary));
1203     importer->readObject();
1204     CV_Assert(importer->tensors.size() == 1);
1205
1206     return importer->tensors.begin()->second;
1207 }
1208
1209 Net readNetFromTorch(const String &model, bool isBinary)
1210 {
1211     CV_TRACE_FUNCTION();
1212
1213     TorchImporter importer(model, isBinary);
1214     Net net;
1215     importer.populateNet(net);
1216     return net;
1217 }
1218
1219 #else
1220
1221 Ptr<Importer> createTorchImporter(const String&, bool)
1222 {
1223     CV_Error(Error::StsNotImplemented, "Torch importer is disabled in current build");
1224     return Ptr<Importer>();
1225 }
1226
1227 Mat readTorchBlob(const String&, bool)
1228 {
1229     CV_Error(Error::StsNotImplemented, "Torch importer is disabled in current build");
1230     return Mat();
1231 }
1232
1233 Net readNetFromTorch(const String &model, bool isBinary)
1234 {
1235     CV_Error(Error::StsNotImplemented, "Torch importer is disabled in current build");
1236     return Net();
1237 }
1238
1239 #endif //defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
1240
1241 CV__DNN_EXPERIMENTAL_NS_END
1242 }} // namespace