1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "../precomp.hpp"
50 #if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
51 #include "THDiskFile.h"
56 CV__DNN_EXPERIMENTAL_NS_BEGIN
58 #if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
62 static bool dbgPrint = false;
64 //static bool dbgPrint = true;
76 TYPE_RECUR_FUNCTION = 8,
77 LEGACY_TYPE_RECUR_FUNCTION = 7
81 static String toString(const T &v)
83 std::ostringstream ss;
88 static inline bool startsWith(const String &str, const char *substr)
90 return str.find(substr) == 0;
93 static inline bool endsWith(const String &str, const char *substr)
95 return str.rfind(substr) == str.length() - strlen(substr);
98 struct TorchImporter : public ::cv::dnn::Importer
100 typedef std::map<String, std::pair<int, Mat> > TensorsMap;
103 cv::Ptr<THFile> file;
104 std::set<int> readedIndexes;
105 std::map<int, Mat> storages;
106 std::map<int, Mat> tensors;
110 String thName, apiType;
111 dnn::LayerParams params;
112 std::vector<cv::Ptr<Module> > modules;
114 Module(const String &_thName, const String &_apiType = String())
115 : thName(_thName), apiType(_apiType) {}
122 TorchImporter(String filename, bool isBinary)
126 rootModule = curModule = NULL;
129 file = cv::Ptr<THFile>(THDiskFile_new(filename, "r", 0), THFile_free);
130 CV_Assert(file && THFile_isOpened(file));
142 return THFile_readIntScalar(file);
145 inline long readLong()
147 return THFile_readLongScalar(file);
150 inline bool readBool()
152 return readInt() != 0;
155 inline double readDouble()
157 return THFile_readDoubleScalar(file);
160 inline String readString()
162 int size = THFile_readIntScalar(file);
163 String str(size, '\0');
164 THFile_readCharRaw(file, const_cast<char*>(str.c_str()), size);
168 inline String readTorchClassName()
170 String version = readString();
171 return startsWith(version, "V ") ? readString() : version;
174 inline void readFunction()
180 void readTable(int index = -1)
182 index = (index < 0) ? readInt() : index;
184 if (readedIndexes.count(index))
187 readedIndexes.insert(index);
189 int size = readInt();
191 for (int i = 0; i < size; i++)
194 readObject(); //value
198 /* Special readers */
200 static inline int parseTorchType(const String &str, const char *suffix, const char *prefix = "torch.")
202 if (startsWith(str, prefix) && endsWith(str, suffix))
204 String typeStr = str.substr(strlen(prefix), str.length() - strlen(prefix) - strlen(suffix));
206 if (typeStr == "Double")
208 else if (typeStr == "Float" || typeStr == "Cuda")
210 else if (typeStr == "Byte")
212 else if (typeStr == "Char")
214 else if (typeStr == "Short")
216 else if (typeStr == "Int")
218 else if (typeStr == "Long") //Carefully! CV_64S type coded as CV_USRTYPE1
221 CV_Error(Error::StsNotImplemented, "Unknown type \"" + typeStr + "\" of torch class \"" + str + "\"");
227 static int parseTensorType(const String &className)
229 return parseTorchType(className, "Tensor");
232 static int parseStorageType(const String &className)
234 return parseTorchType(className, "Storage");
237 void readTorchStorage(int index, int type = -1)
239 long size = readLong();
240 Mat storageMat(1, size, (type != CV_USRTYPE1) ? type : CV_64F); //handle LongStorage as CV_64F Mat
245 THFile_readFloatRaw(file, (float*)storageMat.data, size);
248 THFile_readDoubleRaw(file, (double*)storageMat.data, size);
252 THFile_readByteRaw(file, (uchar*)storageMat.data, size);
256 THFile_readShortRaw(file, (short*)storageMat.data, size);
259 THFile_readIntRaw(file, (int*)storageMat.data, size);
263 double *buf = storageMat.ptr<double>();
264 THFile_readLongRaw(file, (int64*)buf, size);
266 for (size_t i = (size_t)size; i-- > 0; )
267 buf[i] = ((int64*)buf)[i];
271 CV_Error(Error::StsInternal, "");
275 storages.insert(std::make_pair(index, storageMat));
278 void readTorchTable(Dict &scalarParams, TensorsMap &tensorParams)
280 int luaType = readInt();
281 int index = readInt();
283 CV_Assert(luaType == TYPE_TABLE && readedIndexes.count(index) == 0);
284 readedIndexes.insert(index);
287 int numPairs = readInt();
289 for (int i = 0; i < numPairs; i++)
291 fpos = THFile_position(file);
292 int ktype = readInt();
294 if (ktype != TYPE_STRING) //skip non-string fileds
296 THFile_seek(file, fpos);
298 readObject(); //value
302 String key = readString();
304 std::cout << i << "th key: " << key << "\n";
306 fpos = THFile_position(file);
307 int vtype = readInt();
309 if (vtype == TYPE_TORCH)
311 int index = readInt();
312 int numModules = curModule->modules.size();
313 readTorchObject(index);
315 if (tensors.count(index)) //tensor was readed
317 tensorParams.insert(std::make_pair(key, std::make_pair(index, tensors[index])));
319 else if (storages.count(index)) //storage was readed
321 Mat &matStorage = storages[index];
323 matStorage.convertTo(matCasted, CV_64F);
325 DictValue scalar = DictValue::arrayReal(matCasted.ptr<double>(), matCasted.total());
326 scalarParams.set(key, scalar);
330 // Only tensors and scalars are supported for table fields.
331 // i.e. nn.Inception has field `transfer` which is an
332 // activation layer. So we remove added modules as readTorchObject(index).
333 while (curModule->modules.size() > numModules)
334 curModule->modules.pop_back();
337 else if (vtype == TYPE_NUMBER)
339 scalarParams.set(key, readDouble());
341 else if (vtype == TYPE_STRING)
343 scalarParams.set(key, readString());
345 else if (vtype == TYPE_BOOLEAN)
347 scalarParams.set(key, readBool());
351 THFile_seek(file, fpos);
359 std::cout << "scalarParams:\n";
360 std::cout << scalarParams;
362 std::cout << "#" << tensorParams.size() << " tensorParams:\n";
363 std::map<String,std::pair<int, Mat> >::const_iterator it;
364 for (it = tensorParams.begin(); it != tensorParams.end(); it++)
365 std::cout << it->first << ": Tensor " << it->second.second.size << "\n";
369 void readTorchTensor(int indexTensor, int typeTensor)
371 int ndims = readInt();
372 AutoBuffer<int64, 4> sizes(ndims);
373 AutoBuffer<int64, 4> steps(ndims);
374 THFile_readLongRaw(file, sizes, ndims);
375 THFile_readLongRaw(file, steps, ndims);
376 long offset = readLong() - 1;
379 int typeidx = readInt();
380 CV_Assert(typeidx == TYPE_TORCH || (typeidx == TYPE_NIL && ndims == 0));
382 if (typeidx == TYPE_NIL)
384 tensors.insert(std::make_pair(indexTensor, Mat()));
388 int indexStorage = readInt();
389 if (readedIndexes.count(indexStorage) == 0)
391 String className = readTorchClassName();
392 int typeStorage = parseStorageType(className);
393 CV_Assert(typeStorage >= 0 && typeTensor == typeStorage);
394 readTorchStorage(indexStorage, typeStorage);
395 typeTensor = storages[indexStorage].type();
396 readedIndexes.insert(indexStorage);
400 size_t requireElems = (size_t)offset + (size_t)steps[0] * (size_t)sizes[0];
401 size_t storageElems = storages[indexStorage].total();
402 if (requireElems > storageElems)
403 CV_Error(Error::StsBadSize, "Storage has insufficent number of elemements for requested Tensor");
406 AutoBuffer<int, 4> isizes(ndims);
407 AutoBuffer<size_t, 4> ssteps(ndims);
408 for (int i = ndims - 1; i >= 0; i--)
410 isizes[i] = (int)sizes[i];
411 ssteps[i] = (size_t)steps[i] * CV_ELEM_SIZE(typeTensor);
415 Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), (size_t*)ssteps);
416 int dstType = CV_32F;
419 srcMat.convertTo(blob, dstType);
421 tensors.insert(std::make_pair(indexTensor, blob));
424 static bool isNNClass(const String &className, String &nnName)
426 const char *prefixes[] = {"nn.", "cunn.", "cudnn.", "fbcunn.", NULL};
428 for (int i = 0; prefixes[i]; i++)
430 if (startsWith(className, prefixes[i]))
432 nnName = className.substr(strlen(prefixes[i]));
440 static void convertTorchKernelsParams(const Dict &torchParams, cv::dnn::LayerParams &layerParams)
442 layerParams.set("kernel_h", torchParams.get<int>("kH"));
443 layerParams.set("kernel_w", torchParams.get<int>("kW"));
444 layerParams.set("stride_h", torchParams.get<int>("dH"));
445 layerParams.set("stride_w", torchParams.get<int>("dW"));
446 layerParams.set("pad_h", torchParams.get<int>("padH", 0));
447 layerParams.set("pad_w", torchParams.get<int>("padW", 0));
450 void readTorchObject(int index)
452 if(readedIndexes.count(index))
455 String className = readTorchClassName();
459 std::cout << "Class: " << className << std::endl;
462 if ( (type = parseTensorType(className)) >= 0 ) //is Tensor
464 readTorchTensor(index, type);
466 else if ( (type = parseStorageType(className)) >= 0 ) //is Storage
468 readTorchStorage(index, type);
470 else if (isNNClass(className, nnName))
473 TensorsMap tensorParams;
475 cv::Ptr<Module> newModule(new Module(nnName));
476 cv::dnn::LayerParams &layerParams = newModule->params;
478 layerParams.set("torch_index", index);
480 if (nnName == "Sequential" || nnName == "Parallel" ||
481 nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable" ||
482 nnName == "DepthConcat" || nnName == "Inception")
484 Module *parentModule = curModule;
485 curModule->modules.push_back(newModule);
486 curModule = newModule;
487 readTorchTable(scalarParams, tensorParams);
488 curModule = parentModule;
490 if (nnName == "Parallel")
492 layerParams.set("inputDimension", scalarParams.get<int>("inputDimension"));
493 layerParams.set("outputDimension", scalarParams.get<int>("outputDimension"));
495 if (nnName == "Concat")
497 layerParams.set("dimension", scalarParams.get<int>("dimension"));
499 if (nnName == "JoinTable")
501 layerParams.set("dimension", scalarParams.get<int>("dimension"));
503 if (nnName == "DepthConcat")
505 layerParams.set("dimension", scalarParams.get<int>("dimension"));
508 else if (nnName == "SpatialConvolution" || nnName == "SpatialConvolutionMM")
510 newModule->apiType = "Convolution";
511 readTorchTable(scalarParams, tensorParams);
513 CV_Assert(tensorParams.count("weight"));
514 layerParams.blobs.push_back(tensorParams["weight"].second);
516 bool bias = tensorParams.count("bias") != 0;
517 layerParams.set("bias_term", bias);
519 layerParams.blobs.push_back(tensorParams["bias"].second);
521 layerParams.set("num_output", scalarParams.get<int>("nOutputPlane"));
522 convertTorchKernelsParams(scalarParams, layerParams);
524 if (nnName == "SpatialConvolutionMM")
526 // Split weights from a [ outCh x inCh*kH*kW ] 2D matrix
527 // onto a 4D [ outCh x inCh x kH x kW ] blob.
528 CV_Assert(layerParams.blobs[0].dims == 2);
529 const int kernel = layerParams.blobs[0].size[1]; // inCh * kH * kW
530 MatShape kernelShape(4);
531 kernelShape[0] = layerParams.blobs[0].size[0]; // outCh.
532 kernelShape[2] = layerParams.get<int>("kernel_h");
533 kernelShape[3] = layerParams.get<int>("kernel_w");
534 kernelShape[1] = kernel / (kernelShape[2] * kernelShape[3]); // inCh.
535 layerParams.blobs[0] = layerParams.blobs[0].reshape(1, kernelShape);
537 curModule->modules.push_back(newModule);
539 else if (nnName == "SpatialLPPooling")
542 // [input -> (1) -> (2) -> output]
543 // (1): nn.Sequential {
544 // [input -> (1) -> (2) -> (3) -> (4) -> output]
546 // (2): nn.SpatialAveragePooling(...)
547 // (3): nn.MulConstant
552 // nn.SpatialLPPooling is just a table so we skip it.
553 readTorchTable(scalarParams, tensorParams);
555 else if (nnName == "SpatialMaxPooling" || nnName == "SpatialAveragePooling")
557 newModule->apiType = "Pooling";
558 readTorchTable(scalarParams, tensorParams);
560 if (nnName == "SpatialMaxPooling") {
561 layerParams.set("pool", "MAX");
562 layerParams.set("indices_blob_id", tensorParams["indices"].first);
564 if (nnName == "SpatialAveragePooling")
565 layerParams.set("pool", "AVE");
566 convertTorchKernelsParams(scalarParams, layerParams);
568 CV_Assert(scalarParams.has("ceil_mode"));
569 layerParams.set("ceil_mode", scalarParams.get<bool>("ceil_mode"));
571 curModule->modules.push_back(newModule);
573 else if (nnName == "Linear")
575 newModule->apiType = "InnerProduct";
576 readTorchTable(scalarParams, tensorParams);
578 CV_Assert(tensorParams.count("weight"));
579 Mat weightBlob = tensorParams["weight"].second;
580 layerParams.blobs.push_back(weightBlob);
582 bool bias = tensorParams.count("bias") != 0;
584 layerParams.blobs.push_back(tensorParams["bias"].second);
585 layerParams.set("bias_term", bias);
587 layerParams.set("num_output", weightBlob.size[0]);
588 curModule->modules.push_back(newModule);
590 else if (nnName == "Reshape" || nnName == "View")
592 newModule->apiType = "Reshape";
594 readTorchTable(scalarParams, tensorParams);
595 CV_Assert(scalarParams.has("size"));
597 DictValue dimParam = scalarParams.get("size");
598 layerParams.set("dim", dimParam);
600 if (scalarParams.has("batchMode") && scalarParams.get<bool>("batchMode"))
601 layerParams.set("axis", 1);
603 curModule->modules.push_back(newModule);
605 else if (nnName == "ReLU")
607 curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "ReLU")));
610 else if (nnName == "Tanh")
612 curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "TanH")));
615 else if (nnName == "Sigmoid")
617 curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
620 else if (nnName == "SpatialBatchNormalization" || nnName == "InstanceNormalization" ||
621 nnName == "BatchNormalization")
623 newModule->apiType = "BatchNorm";
624 readTorchTable(scalarParams, tensorParams);
626 CV_Assert(scalarParams.has("eps"));
627 float eps = float(scalarParams.get<double>("eps"));
628 layerParams.set("eps", eps);
630 if (tensorParams.count("running_mean"))
632 layerParams.blobs.push_back(tensorParams["running_mean"].second);
636 CV_Assert(scalarParams.has("nOutput"));
637 layerParams.blobs.push_back(Mat::zeros(1, scalarParams.get<int>("nOutput"), CV_32F));
640 if (tensorParams.count("running_var"))
642 layerParams.blobs.push_back(tensorParams["running_var"].second);
644 else if (tensorParams.count("running_std"))
646 layerParams.blobs.push_back(tensorParams["running_std"].second);
647 pow(layerParams.blobs.back(), -2, layerParams.blobs.back());
648 subtract(layerParams.blobs.back(), eps, layerParams.blobs.back());
652 CV_Assert(scalarParams.has("nOutput"));
653 layerParams.blobs.push_back(Mat::ones(1, scalarParams.get<int>("nOutput"), CV_32F));
656 if (tensorParams.count("weight"))
658 layerParams.set("has_weight", true);
659 layerParams.blobs.push_back(tensorParams["weight"].second);
662 if (tensorParams.count("bias"))
664 layerParams.set("has_bias", true);
665 layerParams.blobs.push_back(tensorParams["bias"].second);
668 if (nnName == "InstanceNormalization")
670 cv::Ptr<Module> mvnModule(new Module(nnName));
671 mvnModule->apiType = "MVN";
672 curModule->modules.push_back(mvnModule);
674 layerParams.blobs[0].setTo(0); // batch norm's mean
675 layerParams.blobs[1].setTo(1); // batch norm's std
678 curModule->modules.push_back(newModule);
680 else if (nnName == "PReLU")
682 readTorchTable(scalarParams, tensorParams);
684 CV_Assert(tensorParams.count("weight"));
686 size_t outputChannels = static_cast<int>(scalarParams.get<double>("nOutputPlane"));
687 if (outputChannels) {
689 CV_Assert(tensorParams["weight"].second.total() == outputChannels);
690 layerParams.blobs.push_back(tensorParams["weight"].second);
692 newModule->apiType = "ChannelsPReLU";
695 CV_Assert(tensorParams["weight"].second.total() == 1);
696 float negative_slope = *tensorParams["weight"].second.ptr<float>();
697 layerParams.set("negative_slope", negative_slope);
699 newModule->apiType = "ReLU";
702 curModule->modules.push_back(newModule);
704 else if (nnName == "SpatialDropout" || nnName == "Dropout")
706 readTorchTable(scalarParams, tensorParams);
707 CV_Assert(scalarParams.has("p"));
709 if (scalarParams.has("v2") && scalarParams.get<bool>("v2"))
711 newModule->apiType = "Identity";
715 float scale = 1 - scalarParams.get<double>("p");
717 CV_Assert(scale > 0);
719 newModule->apiType = "Power";
720 layerParams.set("scale", scale);
722 curModule->modules.push_back(newModule);
724 // TotalVariation layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
725 // It's a loss function that has an Identity forward.
726 else if (nnName == "Identity" || nnName == "TotalVariation")
728 readTorchTable(scalarParams, tensorParams);
729 newModule->apiType = "Identity";
730 curModule->modules.push_back(newModule);
732 else if (nnName == "Normalize")
734 readTorchTable(scalarParams, tensorParams);
735 CV_Assert(scalarParams.has("p"));
737 layerParams.set("p", scalarParams.get<float>("p"));
738 if (scalarParams.has("eps"))
739 layerParams.set("eps", scalarParams.get<float>("eps"));
741 newModule->apiType = "Normalize";
742 curModule->modules.push_back(newModule);
744 else if (nnName == "Padding")
746 readTorchTable(scalarParams, tensorParams);
747 newModule->apiType = "Padding";
749 CV_Assert(scalarParams.has("pad") && scalarParams.has("dim"));
750 if (scalarParams.has("index") && scalarParams.get<int>("index") != 1)
751 CV_Error(Error::StsNotImplemented, "Padding with offset is not implemented");
753 if (scalarParams.has("value"))
754 layerParams.set("value", scalarParams.get<float>("value"));
756 if (scalarParams.has("nInputDim"))
757 layerParams.set("input_dims", scalarParams.get<int>("nInputDim"));
759 int dim = scalarParams.get<int>("dim") - 1; // In Lua we start from 1.
760 int pad = scalarParams.get<int>("pad");
762 std::vector<int> paddings((dim + 1) * 2, 0);
764 paddings[dim * 2 + 1] = pad; // Pad after (right).
766 paddings[dim * 2] = -pad; // Pad before (left).
767 layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
769 curModule->modules.push_back(newModule);
771 else if (nnName == "CAddTable")
773 curModule->modules.push_back(newModule);
776 else if (nnName == "SpatialDilatedConvolution")
778 readTorchTable(scalarParams, tensorParams);
779 newModule->apiType = "Convolution";
780 CV_Assert(scalarParams.has("padW") &&
781 scalarParams.has("padH")&&
782 scalarParams.has("dW")&&
783 scalarParams.has("dH")&&
784 scalarParams.has("dilationW")&&
785 scalarParams.has("dilationH")&&
786 scalarParams.has("kW")&&
787 scalarParams.has("kH")&&
788 scalarParams.has("nOutputPlane"));
790 layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
791 layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
792 layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
793 layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
794 layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
795 layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
796 layerParams.set("dilation_w", static_cast<int>(scalarParams.get<double>("dilationW")));
797 layerParams.set("dilation_h", static_cast<int>(scalarParams.get<double>("dilationH")));
798 layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
800 layerParams.blobs.push_back(tensorParams["weight"].second);
802 bool bias = tensorParams.count("bias");
803 layerParams.set("bias_term", bias);
805 layerParams.blobs.push_back(tensorParams["bias"].second);
807 curModule->modules.push_back(newModule);
809 else if (nnName == "SpatialFullConvolution")
811 readTorchTable(scalarParams, tensorParams);
812 newModule->apiType = "Deconvolution";
813 CV_Assert(scalarParams.has("padW") &&
814 scalarParams.has("padH")&&
815 scalarParams.has("dW")&&
816 scalarParams.has("dH")&&
817 scalarParams.has("adjW")&&
818 scalarParams.has("adjH")&&
819 scalarParams.has("kW")&&
820 scalarParams.has("kH")&&
821 scalarParams.has("nOutputPlane"));
823 layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
824 layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
825 layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
826 layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
827 layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
828 layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
829 layerParams.set("adj_w", static_cast<int>(scalarParams.get<double>("adjW")));
830 layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
831 layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
833 layerParams.blobs.push_back(tensorParams["weight"].second);
835 bool bias = tensorParams.count("bias");
836 layerParams.set("bias_term", bias);
838 layerParams.blobs.push_back(tensorParams["bias"].second);
840 curModule->modules.push_back(newModule);
842 else if (nnName == "SpatialMaxUnpooling")
844 readTorchTable(scalarParams, tensorParams);
845 CV_Assert(tensorParams.count("indices"));
847 layerParams.set("indices_blob_id", tensorParams["indices"].first);
848 curModule->modules.push_back(newModule);
850 else if (nnName == "SoftMax")
852 newModule->apiType = "SoftMax";
853 curModule->modules.push_back(newModule);
855 else if (nnName == "LogSoftMax")
857 newModule->apiType = "SoftMax";
858 layerParams.set("log_softmax", true);
859 curModule->modules.push_back(newModule);
861 else if (nnName == "SpatialCrossMapLRN")
863 newModule->apiType = "LRN";
864 readTorchTable(scalarParams, tensorParams);
866 CV_Assert(scalarParams.has("alpha"));
867 CV_Assert(scalarParams.has("beta"));
868 CV_Assert(scalarParams.has("k"));
869 CV_Assert(scalarParams.has("size"));
871 layerParams.set("norm_region", "ACROSS_CHANNELS");
872 layerParams.set("alpha", scalarParams.get<float>("alpha"));
873 layerParams.set("beta", scalarParams.get<float>("beta"));
874 layerParams.set("bias", scalarParams.get<float>("k"));
875 layerParams.set("local_size", scalarParams.get<int>("size"));
876 layerParams.set("norm_by_size", true);
878 curModule->modules.push_back(newModule);
880 else if (nnName == "Square" || nnName == "Sqrt" || nnName == "Power")
882 readTorchTable(scalarParams, tensorParams);
885 if (nnName == "Square") power = 2.0f;
886 else if (nnName == "Sqrt") power = 0.5f;
887 else if (nnName == "Power") power = scalarParams.get<float>("pow", 1.0f);
889 newModule->apiType = "Power";
890 layerParams.set("power", power);
891 curModule->modules.push_back(newModule);
893 else if (nnName == "MulConstant")
895 readTorchTable(scalarParams, tensorParams);
896 CV_Assert(scalarParams.has("constant_scalar"));
897 newModule->apiType = "Power";
898 layerParams.set("scale", scalarParams.get<float>("constant_scalar"));
899 curModule->modules.push_back(newModule);
901 else if (nnName == "SpatialZeroPadding" || nnName == "SpatialReflectionPadding")
903 readTorchTable(scalarParams, tensorParams);
904 CV_Assert(scalarParams.has("pad_l"), scalarParams.has("pad_r"),
905 scalarParams.has("pad_t"), scalarParams.has("pad_b"));
906 int padTop = scalarParams.get<int>("pad_t");
907 int padLeft = scalarParams.get<int>("pad_l");
908 int padRight = scalarParams.get<int>("pad_r");
909 int padBottom = scalarParams.get<int>("pad_b");
910 if (padTop < 0 || padLeft < 0 || padRight < 0 || padBottom < 0)
911 CV_Error(Error::StsNotImplemented, "SpatialZeroPadding in cropping mode is not implemented");
913 newModule->apiType = "Padding";
915 // Torch's SpatialZeroPadding works with 3- or 4-dimensional input.
916 // So we add parameter input_dims=3 to ignore batch dimension if it will be.
917 std::vector<int> paddings(6, 0); // CHW
918 paddings[2] = padTop;
919 paddings[3] = padBottom;
920 paddings[4] = padLeft;
921 paddings[5] = padRight;
922 layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
923 layerParams.set("input_dims", 3);
925 if (nnName == "SpatialReflectionPadding")
926 layerParams.set("type", "reflect");
928 curModule->modules.push_back(newModule);
930 else if (nnName == "ShaveImage")
932 // ShaveImage layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
933 // It may be mapped to Slice layer.
934 readTorchTable(scalarParams, tensorParams);
935 CV_Assert(scalarParams.has("size"));
936 int size = scalarParams.get<int>("size");
938 int begins[] = {0, 0, size, size};
939 int ends[] = {-1, -1, -size - 1, -size - 1};
941 newModule->apiType = "Slice";
942 layerParams.set("begin", DictValue::arrayInt<int*>(&begins[0], 4));
943 layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
944 curModule->modules.push_back(newModule);
948 CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\"");
953 CV_Error(Error::StsNotImplemented, "Unsupported Torch class \"" + className + "\"");
956 readedIndexes.insert(index);
961 int typeidx = readInt();
963 if (typeidx == TYPE_TORCH)
965 int index = readInt();
966 readTorchObject(index);
967 readedIndexes.insert(index);
969 else if (typeidx == TYPE_NIL)
971 else if (typeidx == TYPE_NUMBER)
973 else if (typeidx == TYPE_BOOLEAN)
975 else if (typeidx == TYPE_STRING)
977 else if (typeidx == TYPE_TABLE)
980 CV_Error(Error::StsNotImplemented, "Unsupported Lua type");
983 inline String generateLayerName(const String &label = String())
985 return "l" + toString(++this->moduleCounter) + "_" + label;
988 int fill(Module *module, std::vector<std::pair<int, Module*> >& addedModules, int prevLayerId = 0, int prevOutNum = 0)
993 if (module->apiType.length())
995 int newLayerId = net.addLayer(generateLayerName(module->apiType), module->apiType, module->params);
996 net.connect(prevLayerId, prevOutNum, newLayerId, 0);
997 addedModules.push_back(std::make_pair(newLayerId, module));
1002 if (module->thName == "Sequential" || module->thName == "Inception")
1004 for (size_t i = 0; i < module->modules.size(); i++)
1006 prevLayerId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1011 else if (module->thName == "Concat")
1014 LayerParams mergeParams;
1015 mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1017 std::vector<int> branchIds;
1018 for (int i = 0; i < (int)module->modules.size(); i++)
1020 newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1021 branchIds.push_back(newId);
1024 moduleCounter += 1; // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.
1025 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1027 for (int i = 0; i < branchIds.size(); i++)
1029 net.connect(branchIds[i], 0, mergeId, i);
1032 addedModules.push_back(std::make_pair(mergeId, module));
1035 else if (module->thName == "DepthConcat")
1038 LayerParams mergeParams;
1039 mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1040 mergeParams.set("padding", true);
1042 std::vector<int> branchIds;
1043 for (int i = 0; i < (int)module->modules.size(); i++)
1045 newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1046 branchIds.push_back(newId);
1049 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1051 for (int i = 0; i < branchIds.size(); i++)
1053 net.connect(branchIds[i], 0, mergeId, i);
1056 addedModules.push_back(std::make_pair(mergeId, module));
1059 else if (module->thName == "Parallel")
1061 int newId, splitId, mergeId, reshapeId;
1063 LayerParams splitParams, mergeParams, reshapeParams;
1064 splitParams.set("axis", module->params.get<int>("inputDimension") - 1);
1065 mergeParams.set("axis", module->params.get<int>("outputDimension") - 1);
1066 reshapeParams.set("axis", splitParams.get<int>("axis"));
1067 reshapeParams.set("num_axes", 1);
1069 splitId = net.addLayer(generateLayerName("torchSplit"), "Slice", splitParams);
1070 reshapeId = net.addLayer(generateLayerName("torchReshape"), "Reshape", reshapeParams);
1071 net.connect(prevLayerId, prevOutNum, splitId, 0);
1073 std::vector<int> branchIds;
1074 for (int i = 0; i < (int)module->modules.size(); i++)
1076 net.connect(splitId, i, reshapeId, i);
1077 newId = fill(module->modules[i], addedModules, reshapeId, i);
1078 branchIds.push_back(newId);
1081 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1083 for (int i = 0; i < branchIds.size(); i++)
1085 net.connect(branchIds[i], 0, mergeId, i);
1088 addedModules.push_back(std::make_pair(mergeId, module));
1091 else if (module->thName == "ConcatTable") {
1093 moduleCounter += 1; // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.
1094 for (int i = 0; i < (int)module->modules.size(); i++)
1096 newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1100 else if (module->thName == "JoinTable") {
1101 std::vector<int> ids = net.getUnconnectedOutLayers();
1104 LayerParams mergeParams;
1105 mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1107 mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1108 addedModules.push_back(std::make_pair(mergeId, module));
1110 for (int i = 0; i < ids.size(); i++)
1112 net.connect(ids[i], 0, mergeId, i);
1117 else if (module->thName == "CAddTable") {
1118 String name = generateLayerName("torchCAddTable");
1119 std::vector<int> ids = net.getUnconnectedOutLayers();
1121 params.set("operation", "sum");
1124 int id = net.addLayer(name, "Eltwise", params);
1126 for (int i = 0; i < ids.size(); i++)
1128 net.connect(ids[i], 0, id, i);
1131 addedModules.push_back(std::make_pair(id, module));
1134 else if (module->thName == "SpatialMaxUnpooling") {
1135 CV_Assert(module->params.has("indices_blob_id"));
1136 int indicesBlobId = module->params.get<int>("indices_blob_id");
1137 std::pair<int, Module*> poolingLayer;
1138 poolingLayer.first = -1;
1140 for(int i = 0; i < addedModules.size(); i++)
1142 if (addedModules[i].second->apiType == "Pooling" &&
1143 addedModules[i].second->params.has("indices_blob_id") &&
1144 addedModules[i].second->params.get<int>("indices_blob_id") == indicesBlobId)
1146 poolingLayer = addedModules[i];
1151 module->params.set("pool_k_h", poolingLayer.second->params.get<int>("kernel_h"));
1152 module->params.set("pool_k_w", poolingLayer.second->params.get<int>("kernel_w"));
1153 module->params.set("pool_stride_h", poolingLayer.second->params.get<int>("stride_h"));
1154 module->params.set("pool_stride_w", poolingLayer.second->params.get<int>("stride_w"));
1155 module->params.set("pool_pad_h", poolingLayer.second->params.get<int>("pad_h"));
1156 module->params.set("pool_pad_w", poolingLayer.second->params.get<int>("pad_w"));
1158 String name = generateLayerName("torchMaxUnpooling");
1159 int id = net.addLayer(name, "MaxUnpool", module->params);
1160 net.connect(prevLayerId, 0, id, 0);
1162 CV_Assert(poolingLayer.first != -1);
1163 net.connect(poolingLayer.first, 1, id, 1);
1169 CV_Error(Error::StsInternal, "Unexpected torch container: " + module->thName);
1173 void populateNet(Net net_)
1175 CV_TRACE_FUNCTION();
1177 CV_Assert(rootModule == NULL);
1178 cv::Ptr<Module> rootModule_ = cv::makePtr<Module>("Sequential");
1179 rootModule = rootModule_.get();
1180 curModule = rootModule;
1182 THFile_seek(file, 0);
1186 std::vector<std::pair<int, Module*> > addedModules;
1187 fill(rootModule, addedModules);
1194 Ptr<Importer> createTorchImporter(const String &filename, bool isBinary)
1196 return Ptr<Importer>(new TorchImporter(filename, isBinary));
1200 Mat readTorchBlob(const String &filename, bool isBinary)
1202 Ptr<TorchImporter> importer(new TorchImporter(filename, isBinary));
1203 importer->readObject();
1204 CV_Assert(importer->tensors.size() == 1);
1206 return importer->tensors.begin()->second;
1209 Net readNetFromTorch(const String &model, bool isBinary)
1211 CV_TRACE_FUNCTION();
1213 TorchImporter importer(model, isBinary);
1215 importer.populateNet(net);
1221 Ptr<Importer> createTorchImporter(const String&, bool)
1223 CV_Error(Error::StsNotImplemented, "Torch importer is disabled in current build");
1224 return Ptr<Importer>();
1227 Mat readTorchBlob(const String&, bool)
1229 CV_Error(Error::StsNotImplemented, "Torch importer is disabled in current build");
1233 Net readNetFromTorch(const String &model, bool isBinary)
1235 CV_Error(Error::StsNotImplemented, "Torch importer is disabled in current build");
1239 #endif //defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
1241 CV__DNN_EXPERIMENTAL_NS_END