Custom layers for deep learning networks (#11129)
authorDmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Tue, 24 Apr 2018 11:59:59 +0000 (14:59 +0300)
committerVadim Pisarevsky <vadim.pisarevsky@gmail.com>
Tue, 24 Apr 2018 11:59:59 +0000 (14:59 +0300)
* Custom deep learning layers support

* Stack custom deep learning layers

19 files changed:
3rdparty/protobuf/src/google/protobuf/text_format.cc
3rdparty/protobuf/src/google/protobuf/text_format.h
doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md [new file with mode: 0644]
doc/tutorials/dnn/table_of_content_dnn.markdown
modules/dnn/include/opencv2/dnn/all_layers.hpp
modules/dnn/include/opencv2/dnn/dict.hpp
modules/dnn/include/opencv2/dnn/dnn.inl.hpp
modules/dnn/include/opencv2/dnn/layer.details.hpp
modules/dnn/include/opencv2/dnn/layer.hpp
modules/dnn/src/caffe/caffe_importer.cpp
modules/dnn/src/caffe/caffe_io.cpp
modules/dnn/src/dnn.cpp
modules/dnn/src/tensorflow/tf_importer.cpp
modules/dnn/src/torch/torch_importer.cpp
modules/dnn/test/test_layers.cpp
modules/dnn/test/test_misc.cpp
modules/dnn/test/test_tf_importer.cpp
modules/dnn/test/test_torch_importer.cpp
samples/cpp/tutorial_code/dnn/custom_layers.cpp [new file with mode: 0644]

index eed2a76..78f1acd 100644 (file)
@@ -469,8 +469,9 @@ class TextFormat::Parser::ParserImpl {
                       "\" has no field named \"" + field_name + "\".");
           return false;
         } else {
-          ReportWarning("Message type \"" + descriptor->full_name() +
-                        "\" has no field named \"" + field_name + "\".");
+          // No warnings to let user define custom layers (see https://github.com/opencv/opencv/pull/11129)
+          // ReportWarning("Message type \"" + descriptor->full_name() +
+          //               "\" has no field named \"" + field_name + "\".");
         }
       }
     }
@@ -485,10 +486,13 @@ class TextFormat::Parser::ParserImpl {
       // start with "{" or "<" which indicates the beginning of a message body.
       // If there is no ":" or there is a "{" or "<" after ":", this field has
       // to be a message or the input is ill-formed.
+      UnknownFieldSet* unknown_fields = reflection->MutableUnknownFields(message);
       if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
-        return SkipFieldValue();
+        UnknownFieldSet* unknown_field = unknown_fields->AddGroup(unknown_fields->field_count());
+        unknown_field->AddLengthDelimited(0, field_name);  // Add a field's name.
+        return SkipFieldValue(unknown_field);
       } else {
-        return SkipFieldMessage();
+        return SkipFieldMessage(unknown_fields);
       }
     }
 
@@ -571,7 +575,7 @@ label_skip_parsing:
   }
 
   // Skips the next field including the field's name and value.
-  bool SkipField() {
+  bool SkipField(UnknownFieldSet* unknown_fields) {
     string field_name;
     if (TryConsume("[")) {
       // Extension name.
@@ -588,9 +592,11 @@ label_skip_parsing:
     // If there is no ":" or there is a "{" or "<" after ":", this field has
     // to be a message or the input is ill-formed.
     if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
-      DO(SkipFieldValue());
+      UnknownFieldSet* unknown_field = unknown_fields->AddGroup(unknown_fields->field_count());
+      unknown_field->AddLengthDelimited(0, field_name);  // Add a field's name.
+      DO(SkipFieldValue(unknown_field));
     } else {
-      DO(SkipFieldMessage());
+      DO(SkipFieldMessage(unknown_fields));
     }
     // For historical reasons, fields may optionally be separated by commas or
     // semicolons.
@@ -625,11 +631,11 @@ label_skip_parsing:
 
   // Skips the whole body of a message including the beginning delimiter and
   // the ending delimiter.
-  bool SkipFieldMessage() {
+  bool SkipFieldMessage(UnknownFieldSet* unknown_fields) {
     string delimiter;
     DO(ConsumeMessageDelimiter(&delimiter));
     while (!LookingAt(">") &&  !LookingAt("}")) {
-      DO(SkipField());
+      DO(SkipField(unknown_fields));
     }
     DO(Consume(delimiter));
     return true;
@@ -769,7 +775,7 @@ label_skip_parsing:
     return true;
   }
 
-  bool SkipFieldValue() {
+  bool SkipFieldValue(UnknownFieldSet* unknown_field) {
     if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
       while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
         tokenizer_.Next();
@@ -779,9 +785,9 @@ label_skip_parsing:
     if (TryConsume("[")) {
       while (true) {
         if (!LookingAt("{") && !LookingAt("<")) {
-          DO(SkipFieldValue());
+          DO(SkipFieldValue(unknown_field));
         } else {
-          DO(SkipFieldMessage());
+          DO(SkipFieldMessage(unknown_field));
         }
         if (TryConsume("]")) {
           break;
@@ -833,6 +839,8 @@ label_skip_parsing:
         return false;
       }
     }
+    // Use a tag 1 because tag 0 is used for field's name.
+    unknown_field->AddLengthDelimited(1, tokenizer_.current().text);
     tokenizer_.Next();
     return true;
   }
@@ -1298,13 +1306,13 @@ class TextFormat::Printer::TextGenerator
 TextFormat::Finder::~Finder() {
 }
 
-TextFormat::Parser::Parser()
+TextFormat::Parser::Parser(bool allow_unknown_field)
   : error_collector_(NULL),
     finder_(NULL),
     parse_info_tree_(NULL),
     allow_partial_(false),
     allow_case_insensitive_field_(false),
-    allow_unknown_field_(false),
+    allow_unknown_field_(allow_unknown_field),
     allow_unknown_enum_(false),
     allow_field_number_(false),
     allow_relaxed_whitespace_(false),
index a2670d6..74d89a5 100644 (file)
@@ -457,7 +457,7 @@ class LIBPROTOBUF_EXPORT TextFormat {
   // For more control over parsing, use this class.
   class LIBPROTOBUF_EXPORT Parser {
    public:
-    Parser();
+    Parser(bool allow_unknown_field = false);
     ~Parser();
 
     // Like TextFormat::Parse().
diff --git a/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md
new file mode 100644 (file)
index 0000000..f0e4197
--- /dev/null
@@ -0,0 +1,192 @@
+# Custom deep learning layers support {#tutorial_dnn_custom_layers}
+
+## Introduction
+Deep learning is a fast growing area. The new approaches to build neural networks
+usually introduce new types of layers. They could be modifications of existing
+ones or implement outstanding researching ideas.
+
+OpenCV gives an opportunity to import and run networks from different deep learning
+frameworks. There are a number of the most popular layers. However you can face
+a problem that your network cannot be imported using OpenCV because of unimplemented layers.
+
+The first solution is to create a feature request at https://github.com/opencv/opencv/issues
+mentioning details such a source of model and type of new layer. A new layer could
+be implemented if OpenCV community shares this need.
+
+The second way is to define a **custom layer** so OpenCV's deep learning engine
+will know how to use it. This tutorial is dedicated to show you a process of deep
+learning models import customization.
+
+## Define a custom layer in C++
+Deep learning layer is a building block of network's pipeline.
+It has connections to **input blobs** and produces results to **output blobs**.
+There are trained **weights** and **hyper-parameters**.
+Layers' names, types, weights and hyper-parameters are stored in files are generated by
+native frameworks during training. If OpenCV mets unknown layer type it throws an
+exception trying to read a model:
+
+```
+Unspecified error: Can't create layer "layer_name" of type "MyType" in function getLayerInstance
+```
+
+To import the model correctly you have to derive a class from cv::dnn::Layer with
+the following methods:
+
+@snippet dnn/custom_layers.cpp A custom layer interface
+
+And register it before the import:
+
+@snippet dnn/custom_layers.cpp Register a custom layer
+
+@note `MyType` is a type of unimplemented layer from the thrown exception.
+
+Let's see what all the methods do:
+
+- Constructor
+
+@snippet dnn/custom_layers.cpp MyLayer::MyLayer
+
+Retrieves hyper-parameters from cv::dnn::LayerParams. If your layer has trainable
+weights they will be already stored in the Layer's member cv::dnn::Layer::blobs.
+
+- A static method `create`
+
+@snippet dnn/custom_layers.cpp MyLayer::create
+
+This method should create an instance of you layer and return cv::Ptr with it.
+
+- Output blobs' shape computation
+
+@snippet dnn/custom_layers.cpp MyLayer::getMemoryShapes
+
+Returns layer's output shapes depends on input shapes. You may request an extra
+memory using `internals`.
+
+- Run a layer
+
+@snippet dnn/custom_layers.cpp MyLayer::forward
+
+Implement a layer's logic here. Compute outputs for given inputs.
+
+@note OpenCV manages memory allocated for layers. In the most cases the same memory
+can be reused between layers. So your `forward` implementation should not rely that
+the second invocation of `forward` will has the same data at `outputs` and `internals`.
+
+- Optional `finalize` method
+
+@snippet dnn/custom_layers.cpp MyLayer::finalize
+
+The chain of methods are the following: OpenCV deep learning engine calls `create`
+method once then it calls `getMemoryShapes` for an every created layer then you
+can make some preparations depends on known input dimensions at cv::dnn::Layer::finalize.
+After network was initialized only `forward` method is called for an every network's input.
+
+@note Varying input blobs' sizes such height or width or batch size you make OpenCV
+reallocate all the internal memory. That leads efficiency gaps. Try to initialize
+and deploy models using a fixed batch size and image's dimensions.
+
+## Example: custom layer from Caffe
+Let's create a custom layer `Interp` from https://github.com/cdmh/deeplab-public.
+It's just a simple resize that takes an input blob of size `N x C x Hi x Wi` and returns
+an output blob of size `N x C x Ho x Wo` where `N` is a batch size, `C` is a number of channels,
+`Hi x Wi` and `Ho x Wo` are input and output `height x width` correspondingly.
+This layer has no trainable weights but it has hyper-parameters to specify an output size.
+
+In example,
+~~~~~~~~~~~~~
+layer {
+  name: "output"
+  type: "Interp"
+  bottom: "input"
+  top: "output"
+  interp_param {
+    height: 9
+    width: 8
+  }
+}
+~~~~~~~~~~~~~
+
+This way our implementation can look like:
+
+@snippet dnn/custom_layers.cpp InterpLayer
+
+Next we need to register a new layer type and try to import the model.
+
+@snippet dnn/custom_layers.cpp Register InterpLayer
+
+## Example: custom layer from TensorFlow
+This is an example of how to import a network with [tf.image.resize_bilinear](https://www.tensorflow.org/versions/master/api_docs/python/tf/image/resize_bilinear)
+operation. This is also a resize but with an implementation different from OpenCV's or `Interp` above.
+
+Let's create a single layer network:
+~~~~~~~~~~~~~{.py}
+inp = tf.placeholder(tf.float32, [2, 3, 4, 5], 'input')
+resized = tf.image.resize_bilinear(inp, size=[9, 8], name='resize_bilinear')
+~~~~~~~~~~~~~
+OpenCV sees that TensorFlow's graph in the following way:
+
+```
+node {
+  name: "input"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "resize_bilinear/size"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\t\000\000\000\010\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "resize_bilinear"
+  op: "ResizeBilinear"
+  input: "input:0"
+  input: "resize_bilinear/size"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "align_corners"
+    value {
+      b: false
+    }
+  }
+}
+library {
+}
+```
+Custom layers import from TensorFlow is designed to put all layer's `attr` into
+cv::dnn::LayerParams but input `Const` blobs into cv::dnn::Layer::blobs.
+In our case resize's output shape will be stored in layer's `blobs[0]`.
+
+@snippet dnn/custom_layers.cpp ResizeBilinearLayer
+
+Next we register a layer and try to import the model.
+
+@snippet dnn/custom_layers.cpp Register ResizeBilinearLayer
index 94b818c..9a52f10 100644 (file)
@@ -48,3 +48,11 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn}
     *Author:* Dmitry Kurtaev
 
     In this tutorial we'll run deep learning models in browser using OpenCV.js.
+
+-   @subpage tutorial_dnn_custom_layers
+
+    *Compatibility:* \> OpenCV 3.4.1
+
+    *Author:* Dmitry Kurtaev
+
+    How to define custom layers to import networks.
index 9053842..f4e93b7 100644 (file)
@@ -555,7 +555,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
      * An every sample in the batch is normalized separately. Optionally,
      * output is scaled by the trained parameters.
      */
-    class NormalizeBBoxLayer : public Layer
+    class CV_EXPORTS NormalizeBBoxLayer : public Layer
     {
     public:
         float pnorm, epsilon;
index 43cb58a..69287dc 100644 (file)
@@ -142,6 +142,10 @@ public:
     const T &set(const String &key, const T &value);
 
     friend std::ostream &operator<<(std::ostream &stream, const Dict &dict);
+
+    std::map<String, DictValue>::const_iterator begin() const;
+
+    std::map<String, DictValue>::const_iterator end() const;
 };
 
 //! @}
index c30185b..be2e1c4 100644 (file)
@@ -102,9 +102,13 @@ inline int64 DictValue::get<int64>(int idx) const
 
         return (int64)doubleValue;
     }
+    else if (type == Param::STRING)
+    {
+        return std::atoi((*ps)[idx].c_str());
+    }
     else
     {
-        CV_Assert(isInt() || isReal());
+        CV_Assert(isInt() || isReal() || isString());
         return 0;
     }
 }
@@ -146,9 +150,13 @@ inline double DictValue::get<double>(int idx) const
     {
         return (double)(*pi)[idx];
     }
+    else if (type == Param::STRING)
+    {
+        return std::atof((*ps)[idx].c_str());
+    }
     else
     {
-        CV_Assert(isReal() || isInt());
+        CV_Assert(isReal() || isInt() || isString());
         return 0;
     }
 }
@@ -366,6 +374,16 @@ inline std::ostream &operator<<(std::ostream &stream, const Dict &dict)
     return stream;
 }
 
+inline std::map<String, DictValue>::const_iterator Dict::begin() const
+{
+    return dict.begin();
+}
+
+inline std::map<String, DictValue>::const_iterator Dict::end() const
+{
+    return dict.end();
+}
+
 CV__DNN_EXPERIMENTAL_NS_END
 }
 }
index 82bd3b1..619514e 100644 (file)
@@ -13,11 +13,11 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
 
 /** @brief Registers layer constructor in runtime.
 *   @param type string, containing type name of the layer.
-*   @param constuctorFunc pointer to the function of type LayerRegister::Constuctor, which creates the layer.
+*   @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer.
 *   @details This macros must be placed inside the function code.
 */
-#define CV_DNN_REGISTER_LAYER_FUNC(type, constuctorFunc) \
-    cv::dnn::LayerFactory::registerLayer(#type, constuctorFunc);
+#define CV_DNN_REGISTER_LAYER_FUNC(type, constructorFunc) \
+    cv::dnn::LayerFactory::registerLayer(#type, constructorFunc);
 
 /** @brief Registers layer class in runtime.
  *  @param type string, containing type name of the layer.
@@ -29,11 +29,11 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
 
 /** @brief Registers layer constructor on module load time.
 *   @param type string, containing type name of the layer.
-*   @param constuctorFunc pointer to the function of type LayerRegister::Constuctor, which creates the layer.
+*   @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer.
 *   @details This macros must be placed outside the function code.
 */
-#define CV_DNN_REGISTER_LAYER_FUNC_STATIC(type, constuctorFunc) \
-static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc);
+#define CV_DNN_REGISTER_LAYER_FUNC_STATIC(type, constructorFunc) \
+static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constructorFunc);
 
 /** @brief Registers layer class on module load time.
  *  @param type string, containing type name of the layer.
@@ -59,10 +59,10 @@ class _LayerStaticRegisterer
     String type;
 public:
 
-    _LayerStaticRegisterer(const String &layerType, LayerFactory::Constuctor layerConstuctor)
+    _LayerStaticRegisterer(const String &layerType, LayerFactory::Constructor layerConstructor)
     {
         this->type = layerType;
-        LayerFactory::registerLayer(layerType, layerConstuctor);
+        LayerFactory::registerLayer(layerType, layerConstructor);
     }
 
     ~_LayerStaticRegisterer()
index 3fb81f3..c4712b8 100644 (file)
@@ -58,10 +58,10 @@ class CV_EXPORTS LayerFactory
 public:
 
     //! Each Layer class must provide this function to the factory
-    typedef Ptr<Layer>(*Constuctor)(LayerParams &params);
+    typedef Ptr<Layer>(*Constructor)(LayerParams &params);
 
     //! Registers the layer class with typename @p type and specified @p constructor. Thread-safe.
-    static void registerLayer(const String &type, Constuctor constructor);
+    static void registerLayer(const String &type, Constructor constructor);
 
     //! Unregisters registered layer with specified type name. Thread-safe.
     static void unregisterLayer(const String &type);
index c13d3a5..1cc6d28 100644 (file)
@@ -103,6 +103,19 @@ public:
             ReadNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBinary);
     }
 
+    void extractCustomParams(const google::protobuf::UnknownFieldSet& unknownFields, cv::dnn::LayerParams &params)
+    {
+        const int numFields = unknownFields.field_count();
+        for (int i = 0; i < numFields; ++i)
+        {
+            const google::protobuf::UnknownField& field = unknownFields.field(i);
+            CV_Assert(field.type() == google::protobuf::UnknownField::TYPE_GROUP);
+            std::string fieldName = field.group().field(0).length_delimited();
+            std::string fieldValue = field.group().field(1).length_delimited();
+            params.set(fieldName, fieldValue);
+        }
+    }
+
     void addParam(const Message &msg, const FieldDescriptor *field, cv::dnn::LayerParams &params)
     {
         const Reflection *refl = msg.GetReflection();
@@ -187,12 +200,15 @@ public:
             if (!isInternal && !ends_with_param(fd->name()))
                 continue;
 
+            const google::protobuf::UnknownFieldSet& unknownFields = msgRefl->GetUnknownFields(msg);
             bool hasData =  fd->is_required() ||
                             (fd->is_optional() && msgRefl->HasField(msg, fd)) ||
-                            (fd->is_repeated() && msgRefl->FieldSize(msg, fd) > 0);
+                            (fd->is_repeated() && msgRefl->FieldSize(msg, fd) > 0) ||
+                            !unknownFields.empty();
             if (!hasData)
                 continue;
 
+            extractCustomParams(unknownFields, params);
             if (fd->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE)
             {
                 if (fd->is_repeated()) //Extract only first item!
@@ -258,7 +274,7 @@ public:
         }
     }
 
-    void extractBinaryLayerParms(const caffe::LayerParameter& layer, LayerParams& layerParams)
+    void extractBinaryLayerParams(const caffe::LayerParameter& layer, LayerParams& layerParams)
     {
         const std::string &name = layer.name();
 
@@ -319,7 +335,7 @@ public:
             LayerParams layerParams;
 
             extractLayerParams(layer, layerParams);
-            extractBinaryLayerParms(layer, layerParams);
+            extractBinaryLayerParams(layer, layerParams);
 
             int repetitions = layerCounter[name]++;
             if (repetitions)
index f6b4eb3..7fc5fea 100644 (file)
@@ -1120,7 +1120,7 @@ bool ReadProtoFromTextFile(const char* filename, Message* proto) {
     std::ifstream fs(filename, std::ifstream::in);
     CHECK(fs.is_open()) << "Can't open \"" << filename << "\"";
     IstreamInputStream input(&fs);
-    return google::protobuf::TextFormat::Parse(&input, proto);
+    return google::protobuf::TextFormat::Parser(true).Parse(&input, proto);
 }
 
 bool ReadProtoFromBinaryFile(const char* filename, Message* proto) {
index 86da4c0..edeca8e 100644 (file)
@@ -2790,7 +2790,7 @@ static Mutex& getLayerFactoryMutex()
     return *instance;
 }
 
-typedef std::map<String, LayerFactory::Constuctor> LayerFactory_Impl;
+typedef std::map<String, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
 
 static LayerFactory_Impl& getLayerFactoryImpl_()
 {
@@ -2813,21 +2813,22 @@ static LayerFactory_Impl& getLayerFactoryImpl()
     return *instance;
 }
 
-void LayerFactory::registerLayer(const String &type, Constuctor constructor)
+void LayerFactory::registerLayer(const String &type, Constructor constructor)
 {
     CV_TRACE_FUNCTION();
     CV_TRACE_ARG_VALUE(type, "type", type.c_str());
 
     cv::AutoLock lock(getLayerFactoryMutex());
     String type_ = type.toLowerCase();
-    LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_);
+    LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
 
-    if (it != getLayerFactoryImpl().end() && it->second != constructor)
+    if (it != getLayerFactoryImpl().end())
     {
-        CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered");
+        if (it->second.back() == constructor)
+            CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered");
+        it->second.push_back(constructor);
     }
-
-    getLayerFactoryImpl().insert(std::make_pair(type_, constructor));
+    getLayerFactoryImpl().insert(std::make_pair(type_, std::vector<Constructor>(1, constructor)));
 }
 
 void LayerFactory::unregisterLayer(const String &type)
@@ -2837,7 +2838,15 @@ void LayerFactory::unregisterLayer(const String &type)
 
     cv::AutoLock lock(getLayerFactoryMutex());
     String type_ = type.toLowerCase();
-    getLayerFactoryImpl().erase(type_);
+
+    LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
+    if (it != getLayerFactoryImpl().end())
+    {
+        if (it->second.size() > 1)
+            it->second.pop_back();
+        else
+            getLayerFactoryImpl().erase(it);
+    }
 }
 
 Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
@@ -2851,7 +2860,8 @@ Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& pa
 
     if (it != getLayerFactoryImpl().end())
     {
-        return it->second(params);
+        CV_Assert(!it->second.empty());
+        return it->second.back()(params);
     }
     else
     {
index a401f71..ea5d1e7 100644 (file)
@@ -1564,8 +1564,44 @@ void TFImporter::populateNet(Net dstNet)
         }
         else
         {
-            printLayerAttr(layer);
-            CV_Error_(Error::StsError, ("Unknown layer type %s in op %s", type.c_str(), name.c_str()));
+            // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer.
+            // However we create a layer with the same type and rely that user defined a custom layer.
+
+            // All the attributes are added to LayerParams.
+            google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
+            for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
+                 ai != attr.end(); ++ai)
+            {
+                if (ai->second.value_case() == tensorflow::AttrValue::kS)  // string
+                    layerParams.set(ai->first, ai->second.s());
+                if (ai->second.value_case() == tensorflow::AttrValue::kI)  // int64
+                    layerParams.set(ai->first, ai->second.i());
+                if (ai->second.value_case() == tensorflow::AttrValue::kF)  // float
+                    layerParams.set(ai->first, ai->second.f());
+                if (ai->second.value_case() == tensorflow::AttrValue::kB)  // bool
+                    layerParams.set(ai->first, ai->second.b());
+            }
+
+            // All the Const input nodes are added to layer's blobs.
+            std::vector<std::string> inputsNames;
+            for (int i = 0; i < layer.input_size(); ++i)
+            {
+                // Check if input is a Const node.
+                if (value_id.find(layer.input(i)) != value_id.end())
+                {
+                    Mat blob = getTensorContent(getConstBlob(layer, value_id, i));
+                    layerParams.blobs.push_back(blob);
+                }
+                else
+                    inputsNames.push_back(layer.input(i));
+            }
+            int id = dstNet.addLayer(name, type, layerParams);
+            layer_id[name] = id;
+
+            for (int i = 0; i < inputsNames.size(); ++i)
+            {
+                connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i);
+            }
         }
     }
 }
index 83e4a48..813ee08 100644 (file)
@@ -940,7 +940,21 @@ struct TorchImporter
             }
             else
             {
-                CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\"");
+                // Importer does not know how to map Torch's layer type to an OpenCV's one.
+                // However we parse all the parameters to let user create a custom layer.
+                readTorchTable(scalarParams, tensorParams);
+                for (std::map<String, DictValue>::const_iterator it = scalarParams.begin();
+                     it != scalarParams.end(); ++it)
+                {
+                    layerParams.set(it->first, it->second);
+                }
+                for (std::map<String, std::pair<int, Mat> >::iterator it = tensorParams.begin();
+                     it != tensorParams.end(); ++it)
+                {
+                    layerParams.blobs.push_back(it->second.second);
+                }
+                newModule->apiType = nnName;
+                curModule->modules.push_back(newModule);
             }
         }
         else
index 413e527..89c6ed8 100644 (file)
@@ -44,7 +44,7 @@
 #include "npy_blob.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/dnn/all_layers.hpp>
-#include <opencv2/ts/ocl_test.hpp>
+#include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
 
 namespace opencv_test { namespace {
 
@@ -117,94 +117,50 @@ void testLayerUsingCaffeModels(String basename, int targetId = DNN_TARGET_CPU,
     normAssert(ref, out);
 }
 
-TEST(Layer_Test_Softmax, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_softmax");
-}
-
-OCL_TEST(Layer_Test_Softmax, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_softmax", DNN_TARGET_OPENCL);
-}
-
-TEST(Layer_Test_LRN_spatial, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_lrn_spatial");
-}
-
-OCL_TEST(Layer_Test_LRN_spatial, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_lrn_spatial", DNN_TARGET_OPENCL);
-}
-
-TEST(Layer_Test_LRN_channels, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_lrn_channels");
-}
-
-OCL_TEST(Layer_Test_LRN_channels, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_lrn_channels", DNN_TARGET_OPENCL);
-}
-
-TEST(Layer_Test_Convolution, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_convolution", DNN_TARGET_CPU, true);
-}
-
-OCL_TEST(Layer_Test_Convolution, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_convolution", DNN_TARGET_OPENCL, true);
-}
-
-TEST(Layer_Test_DeConvolution, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_CPU, true, false);
-}
-
-OCL_TEST(Layer_Test_DeConvolution, Accuracy)
+typedef testing::TestWithParam<DNNTarget> Test_Caffe_layers;
+TEST_P(Test_Caffe_layers, Softmax)
 {
-    testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_OPENCL, true, false);
+    testLayerUsingCaffeModels("layer_softmax", GetParam());
 }
 
-TEST(Layer_Test_InnerProduct, Accuracy)
+TEST_P(Test_Caffe_layers, LRN_spatial)
 {
-    testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_CPU, true);
+    testLayerUsingCaffeModels("layer_lrn_spatial", GetParam());
 }
 
-OCL_TEST(Layer_Test_InnerProduct, Accuracy)
+TEST_P(Test_Caffe_layers, LRN_channels)
 {
-    testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_OPENCL, true);
+    testLayerUsingCaffeModels("layer_lrn_channels", GetParam());
 }
 
-TEST(Layer_Test_Pooling_max, Accuracy)
+TEST_P(Test_Caffe_layers, Convolution)
 {
-    testLayerUsingCaffeModels("layer_pooling_max");
+    testLayerUsingCaffeModels("layer_convolution", GetParam(), true);
 }
 
-OCL_TEST(Layer_Test_Pooling_max, Accuracy)
+TEST_P(Test_Caffe_layers, DeConvolution)
 {
-    testLayerUsingCaffeModels("layer_pooling_max", DNN_TARGET_OPENCL);
+    testLayerUsingCaffeModels("layer_deconvolution", GetParam(), true, false);
 }
 
-TEST(Layer_Test_Pooling_ave, Accuracy)
+TEST_P(Test_Caffe_layers, InnerProduct)
 {
-    testLayerUsingCaffeModels("layer_pooling_ave");
+    testLayerUsingCaffeModels("layer_inner_product", GetParam(), true);
 }
 
-OCL_TEST(Layer_Test_Pooling_ave, Accuracy)
+TEST_P(Test_Caffe_layers, Pooling_max)
 {
-    testLayerUsingCaffeModels("layer_pooling_ave", DNN_TARGET_OPENCL);
+    testLayerUsingCaffeModels("layer_pooling_max", GetParam());
 }
 
-TEST(Layer_Test_MVN, Accuracy)
+TEST_P(Test_Caffe_layers, Pooling_ave)
 {
-    testLayerUsingCaffeModels("layer_mvn");
+    testLayerUsingCaffeModels("layer_pooling_ave", GetParam());
 }
 
-OCL_TEST(Layer_Test_MVN, Accuracy)
+TEST_P(Test_Caffe_layers, MVN)
 {
-    testLayerUsingCaffeModels("layer_mvn", DNN_TARGET_OPENCL);
+    testLayerUsingCaffeModels("layer_mvn", GetParam());
 }
 
 void testReshape(const MatShape& inputShape, const MatShape& targetShape,
@@ -257,14 +213,9 @@ TEST(Layer_Test_BatchNorm, local_stats)
     testLayerUsingCaffeModels("layer_batch_norm_local_stats", DNN_TARGET_CPU, true, false);
 }
 
-TEST(Layer_Test_ReLU, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_relu");
-}
-
-OCL_TEST(Layer_Test_ReLU, Accuracy)
+TEST_P(Test_Caffe_layers, ReLU)
 {
-    testLayerUsingCaffeModels("layer_relu", DNN_TARGET_OPENCL);
+    testLayerUsingCaffeModels("layer_relu", GetParam());
 }
 
 TEST(Layer_Test_Dropout, Accuracy)
@@ -272,14 +223,9 @@ TEST(Layer_Test_Dropout, Accuracy)
     testLayerUsingCaffeModels("layer_dropout");
 }
 
-TEST(Layer_Test_Concat, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_concat");
-}
-
-OCL_TEST(Layer_Test_Concat, Accuracy)
+TEST_P(Test_Caffe_layers, Concat)
 {
-    testLayerUsingCaffeModels("layer_concat", DNN_TARGET_OPENCL);
+    testLayerUsingCaffeModels("layer_concat", GetParam());
 }
 
 TEST(Layer_Test_Fused_Concat, Accuracy)
@@ -325,26 +271,16 @@ TEST(Layer_Test_Fused_Concat, Accuracy)
     testLayerUsingCaffeModels("layer_concat_shared_input", DNN_TARGET_CPU, true, false);
 }
 
-TEST(Layer_Test_Eltwise, Accuracy)
+TEST_P(Test_Caffe_layers, Eltwise)
 {
-    testLayerUsingCaffeModels("layer_eltwise");
+    testLayerUsingCaffeModels("layer_eltwise", GetParam());
 }
 
-OCL_TEST(Layer_Test_Eltwise, Accuracy)
+TEST_P(Test_Caffe_layers, PReLU)
 {
-    testLayerUsingCaffeModels("layer_eltwise", DNN_TARGET_OPENCL);
-}
-
-TEST(Layer_Test_PReLU, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_CPU, true);
-    testLayerUsingCaffeModels("layer_prelu_fc", DNN_TARGET_CPU, true, false);
-}
-
-OCL_TEST(Layer_Test_PReLU, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_OPENCL, true);
-    testLayerUsingCaffeModels("layer_prelu_fc", DNN_TARGET_OPENCL, true, false);
+    int targetId = GetParam();
+    testLayerUsingCaffeModels("layer_prelu", targetId, true);
+    testLayerUsingCaffeModels("layer_prelu_fc", targetId, true, false);
 }
 
 //template<typename XMat>
@@ -385,14 +321,9 @@ static void test_Reshape_Split_Slice_layers(int targetId)
     normAssert(input, output);
 }
 
-TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
-{
-    test_Reshape_Split_Slice_layers(DNN_TARGET_CPU);
-}
-
-OCL_TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
+TEST_P(Test_Caffe_layers, Reshape_Split_Slice)
 {
-    test_Reshape_Split_Slice_layers(DNN_TARGET_OPENCL);
+    test_Reshape_Split_Slice_layers(GetParam());
 }
 
 TEST(Layer_Conv_Elu, Accuracy)
@@ -602,7 +533,6 @@ TEST(Layer_Test_ROIPooling, Accuracy)
     normAssert(out, ref);
 }
 
-typedef testing::TestWithParam<DNNTarget> Test_Caffe_layers;
 TEST_P(Test_Caffe_layers, FasterRCNN_Proposal)
 {
     Net net = readNetFromCaffe(_tf("net_faster_rcnn_proposal.prototxt"));
@@ -906,4 +836,104 @@ TEST(Test_DLDT, two_inputs)
 }
 #endif  // HAVE_INF_ENGINE
 
+// Test a custom layer.
+class InterpLayer CV_FINAL : public Layer
+{
+public:
+    InterpLayer(const LayerParams &params) : Layer(params)
+    {
+        zoomFactor = params.get<int>("zoom_factor", 0);
+        outWidth = params.get<int>("width", 0);
+        outHeight = params.get<int>("height", 0);
+    }
+
+    static Ptr<InterpLayer> create(LayerParams& params)
+    {
+        return Ptr<InterpLayer>(new InterpLayer(params));
+    }
+
+    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
+                                 const int requiredOutputs,
+                                 std::vector<std::vector<int> > &outputs,
+                                 std::vector<std::vector<int> > &internals) const CV_OVERRIDE
+    {
+        const int batchSize = inputs[0][0];
+        const int numChannels = inputs[0][1];
+        const int inpHeight = inputs[0][2];
+        const int inpWidth = inputs[0][3];
+
+        std::vector<int> outShape(4);
+        outShape[0] = batchSize;
+        outShape[1] = numChannels;
+        outShape[2] = outHeight != 0 ? outHeight : (inpHeight + (inpHeight - 1) * (zoomFactor - 1));
+        outShape[3] = outWidth != 0 ? outWidth : (inpWidth + (inpWidth - 1) * (zoomFactor - 1));
+        outputs.assign(1, outShape);
+        return false;
+    }
+
+    virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
+    {
+        if (!outWidth && !outHeight)
+        {
+            outHeight = outputs[0].size[2];
+            outWidth = outputs[0].size[3];
+        }
+    }
+
+    // Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp
+    virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat>& internals) CV_OVERRIDE
+    {
+        Mat& inp = *inputs[0];
+        Mat& out = outputs[0];
+        const float* inpData = (float*)inp.data;
+        float* outData = (float*)out.data;
+
+        const int batchSize = inp.size[0];
+        const int numChannels = inp.size[1];
+        const int inpHeight = inp.size[2];
+        const int inpWidth = inp.size[3];
+
+        const float rheight = (outHeight > 1) ? static_cast<float>(inpHeight - 1) / (outHeight - 1) : 0.f;
+        const float rwidth = (outWidth > 1) ? static_cast<float>(inpWidth - 1) / (outWidth - 1) : 0.f;
+        for (int h2 = 0; h2 < outHeight; ++h2)
+        {
+            const float h1r = rheight * h2;
+            const int h1 = h1r;
+            const int h1p = (h1 < inpHeight - 1) ? 1 : 0;
+            const float h1lambda = h1r - h1;
+            const float h0lambda = 1.f - h1lambda;
+            for (int w2 = 0; w2 < outWidth; ++w2)
+            {
+                const float w1r = rwidth * w2;
+                const int w1 = w1r;
+                const int w1p = (w1 < inpWidth - 1) ? 1 : 0;
+                const float w1lambda = w1r - w1;
+                const float w0lambda = 1.f - w1lambda;
+                const float* pos1 = inpData + h1 * inpWidth + w1;
+                float* pos2 = outData + h2 * outWidth + w2;
+                for (int c = 0; c < batchSize * numChannels; ++c)
+                {
+                    pos2[0] =
+                      h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
+                      h1lambda * (w0lambda * pos1[h1p * inpWidth] + w1lambda * pos1[h1p * inpWidth + w1p]);
+                    pos1 += inpWidth * inpHeight;
+                    pos2 += outWidth * outHeight;
+                }
+            }
+        }
+    }
+
+    virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
+
+private:
+    int outWidth, outHeight, zoomFactor;
+};
+
+TEST(Layer_Test_Interp, Accuracy)
+{
+    CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
+    testLayerUsingCaffeModels("layer_interp", DNN_TARGET_CPU, false, false);
+    LayerFactory::unregisterLayer("Interp");
+}
+
 }} // namespace
index 2e92504..b2fb71b 100644 (file)
@@ -7,6 +7,8 @@
 
 #include "test_precomp.hpp"
 
+#include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
+
 namespace opencv_test { namespace {
 
 TEST(blobFromImage_4ch, Regression)
@@ -75,4 +77,64 @@ TEST(readNet, Regression)
     EXPECT_FALSE(net.empty());
 }
 
+class FirstCustomLayer CV_FINAL : public Layer
+{
+public:
+    FirstCustomLayer(const LayerParams &params) : Layer(params) {}
+
+    static Ptr<Layer> create(LayerParams& params)
+    {
+        return Ptr<Layer>(new FirstCustomLayer(params));
+    }
+
+    virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
+    virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat>& internals) CV_OVERRIDE
+    {
+        outputs[0].setTo(1);
+    }
+};
+
+class SecondCustomLayer CV_FINAL : public Layer
+{
+public:
+    SecondCustomLayer(const LayerParams &params) : Layer(params) {}
+
+    static Ptr<Layer> create(LayerParams& params)
+    {
+        return Ptr<Layer>(new SecondCustomLayer(params));
+    }
+
+    virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
+    virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat>& internals) CV_OVERRIDE
+    {
+        outputs[0].setTo(2);
+    }
+};
+
+TEST(LayerFactory, custom_layers)
+{
+    LayerParams lp;
+    lp.name = "name";
+    lp.type = "CustomType";
+
+    Mat inp(1, 1, CV_32FC1);
+    for (int i = 0; i < 3; ++i)
+    {
+        if (i == 0)      { CV_DNN_REGISTER_LAYER_CLASS(CustomType, FirstCustomLayer); }
+        else if (i == 1) { CV_DNN_REGISTER_LAYER_CLASS(CustomType, SecondCustomLayer); }
+        else if (i == 2) { LayerFactory::unregisterLayer("CustomType"); }
+
+        Net net;
+        net.addLayerToPrev(lp.name, lp.type, lp);
+
+        net.setInput(inp);
+        Mat output = net.forward();
+
+        if (i == 0)      EXPECT_EQ(output.at<float>(0), 1);
+        else if (i == 1) EXPECT_EQ(output.at<float>(0), 2);
+        else if (i == 2) EXPECT_EQ(output.at<float>(0), 1);
+    }
+    LayerFactory::unregisterLayer("CustomType");
+}
+
 }} // namespace
index b7f3c5c..397aadf 100644 (file)
@@ -12,6 +12,8 @@ Test for Tensorflow models loading
 #include "test_precomp.hpp"
 #include "npy_blob.hpp"
 
+#include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
+
 namespace opencv_test
 {
 
@@ -364,4 +366,95 @@ TEST(Test_TensorFlow, memory_read)
     runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
 }
 
+// Test a custom layer.
+class ResizeBilinearLayer CV_FINAL : public Layer
+{
+public:
+    ResizeBilinearLayer(const LayerParams &params) : Layer(params)
+    {
+        CV_Assert(!params.get<bool>("align_corners", false));
+        CV_Assert(blobs.size() == 1, blobs[0].type() == CV_32SC1);
+        outHeight = blobs[0].at<int>(0, 0);
+        outWidth = blobs[0].at<int>(0, 1);
+    }
+
+    static Ptr<Layer> create(LayerParams& params)
+    {
+        return Ptr<Layer>(new ResizeBilinearLayer(params));
+    }
+
+    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
+                                 const int requiredOutputs,
+                                 std::vector<std::vector<int> > &outputs,
+                                 std::vector<std::vector<int> > &internals) const CV_OVERRIDE
+    {
+        std::vector<int> outShape(4);
+        outShape[0] = inputs[0][0];  // batch size
+        outShape[1] = inputs[0][1];  // number of channels
+        outShape[2] = outHeight;
+        outShape[3] = outWidth;
+        outputs.assign(1, outShape);
+        return false;
+    }
+
+    // This implementation is based on a reference implementation from
+    // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+    virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
+    {
+        Mat& inp = *inputs[0];
+        Mat& out = outputs[0];
+        const float* inpData = (float*)inp.data;
+        float* outData = (float*)out.data;
+
+        const int batchSize = inp.size[0];
+        const int numChannels = inp.size[1];
+        const int inpHeight = inp.size[2];
+        const int inpWidth = inp.size[3];
+
+        float heightScale = static_cast<float>(inpHeight) / outHeight;
+        float widthScale = static_cast<float>(inpWidth) / outWidth;
+        for (int b = 0; b < batchSize; ++b)
+        {
+            for (int y = 0; y < outHeight; ++y)
+            {
+                float input_y = y * heightScale;
+                int y0 = static_cast<int>(std::floor(input_y));
+                int y1 = std::min(y0 + 1, inpHeight - 1);
+                for (int x = 0; x < outWidth; ++x)
+                {
+                    float input_x = x * widthScale;
+                    int x0 = static_cast<int>(std::floor(input_x));
+                    int x1 = std::min(x0 + 1, inpWidth - 1);
+                    for (int c = 0; c < numChannels; ++c)
+                    {
+                        float interpolation =
+                            inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) +
+                            inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) +
+                            inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) +
+                            inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0);
+                        outData[offset(out.size, c, x, y, b)] = interpolation;
+                    }
+                }
+            }
+        }
+    }
+
+    virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
+
+private:
+    static inline int offset(const MatSize& size, int c, int x, int y, int b)
+    {
+        return x + size[3] * (y + size[2] * (c + size[1] * b));
+    }
+
+    int outWidth, outHeight;
+};
+
+TEST(Test_TensorFlow, resize_bilinear)
+{
+    CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
+    runTensorFlowNet("resize_bilinear");
+    LayerFactory::unregisterLayer("ResizeBilinear");
+}
+
 }
index f748758..33e0e94 100644 (file)
@@ -42,6 +42,7 @@
 #include "test_precomp.hpp"
 #include "npy_blob.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
+#include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
 
 namespace opencv_test
 {
@@ -325,4 +326,62 @@ TEST(Torch_Importer, net_residual)
     runTorchNet("net_residual", DNN_TARGET_CPU, "", false, true);
 }
 
+// Test a custom layer
+// https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest
+class SpatialUpSamplingNearestLayer CV_FINAL : public Layer
+{
+public:
+    SpatialUpSamplingNearestLayer(const LayerParams &params) : Layer(params)
+    {
+        scale = params.get<int>("scale_factor");
+    }
+
+    static Ptr<Layer> create(LayerParams& params)
+    {
+        return Ptr<Layer>(new SpatialUpSamplingNearestLayer(params));
+    }
+
+    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
+                                 const int requiredOutputs,
+                                 std::vector<std::vector<int> > &outputs,
+                                 std::vector<std::vector<int> > &internals) const CV_OVERRIDE
+    {
+        std::vector<int> outShape(4);
+        outShape[0] = inputs[0][0];  // batch size
+        outShape[1] = inputs[0][1];  // number of channels
+        outShape[2] = scale * inputs[0][2];
+        outShape[3] = scale * inputs[0][3];
+        outputs.assign(1, outShape);
+        return false;
+    }
+
+    virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
+    {
+        Mat& inp = *inputs[0];
+        Mat& out = outputs[0];
+        const int outHeight = out.size[2];
+        const int outWidth = out.size[3];
+        for (size_t n = 0; n < inputs[0]->size[0]; ++n)
+        {
+            for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch)
+            {
+                resize(getPlane(inp, n, ch), getPlane(out, n, ch),
+                       Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
+            }
+        }
+    }
+
+    virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
+
+private:
+    int scale;
+};
+
+TEST(Torch_Importer, upsampling_nearest)
+{
+    CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer);
+    runTorchNet("net_spatial_upsampling_nearest", DNN_TARGET_CPU, "", false, true);
+    LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
+}
+
 }
diff --git a/samples/cpp/tutorial_code/dnn/custom_layers.cpp b/samples/cpp/tutorial_code/dnn/custom_layers.cpp
new file mode 100644 (file)
index 0000000..3fc9e61
--- /dev/null
@@ -0,0 +1,232 @@
+#include <opencv2/dnn.hpp>
+
+//! [A custom layer interface]
+class MyLayer : public cv::dnn::Layer
+{
+public:
+    //! [MyLayer::MyLayer]
+    MyLayer(const cv::dnn::LayerParams &params);
+    //! [MyLayer::MyLayer]
+
+    //! [MyLayer::create]
+    static cv::Ptr<cv::dnn::Layer> create(cv::dnn::LayerParams& params);
+    //! [MyLayer::create]
+
+    //! [MyLayer::getMemoryShapes]
+    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
+                                 const int requiredOutputs,
+                                 std::vector<std::vector<int> > &outputs,
+                                 std::vector<std::vector<int> > &internals) const;
+    //! [MyLayer::getMemoryShapes]
+
+    //! [MyLayer::forward]
+    virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals);
+    //! [MyLayer::forward]
+
+    //! [MyLayer::finalize]
+    virtual void finalize(const std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs);
+    //! [MyLayer::finalize]
+
+    virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals);
+};
+//! [A custom layer interface]
+
+//! [InterpLayer]
+class InterpLayer : public cv::dnn::Layer
+{
+public:
+    InterpLayer(const cv::dnn::LayerParams &params) : Layer(params)
+    {
+        outWidth = params.get<int>("width", 0);
+        outHeight = params.get<int>("height", 0);
+    }
+
+    static cv::Ptr<cv::dnn::Layer> create(cv::dnn::LayerParams& params)
+    {
+        return cv::Ptr<cv::dnn::Layer>(new InterpLayer(params));
+    }
+
+    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
+                                 const int requiredOutputs,
+                                 std::vector<std::vector<int> > &outputs,
+                                 std::vector<std::vector<int> > &internals) const
+    {
+        CV_UNUSED(requiredOutputs); CV_UNUSED(internals);
+        std::vector<int> outShape(4);
+        outShape[0] = inputs[0][0];  // batch size
+        outShape[1] = inputs[0][1];  // number of channels
+        outShape[2] = outHeight;
+        outShape[3] = outWidth;
+        outputs.assign(1, outShape);
+        return false;
+    }
+
+    // Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp
+    virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals)
+    {
+        CV_UNUSED(internals);
+        cv::Mat& inp = *inputs[0];
+        cv::Mat& out = outputs[0];
+        const float* inpData = (float*)inp.data;
+        float* outData = (float*)out.data;
+
+        const int batchSize = inp.size[0];
+        const int numChannels = inp.size[1];
+        const int inpHeight = inp.size[2];
+        const int inpWidth = inp.size[3];
+
+        const float rheight = (outHeight > 1) ? static_cast<float>(inpHeight - 1) / (outHeight - 1) : 0.f;
+        const float rwidth = (outWidth > 1) ? static_cast<float>(inpWidth - 1) / (outWidth - 1) : 0.f;
+        for (int h2 = 0; h2 < outHeight; ++h2)
+        {
+            const float h1r = rheight * h2;
+            const int h1 = static_cast<int>(h1r);
+            const int h1p = (h1 < inpHeight - 1) ? 1 : 0;
+            const float h1lambda = h1r - h1;
+            const float h0lambda = 1.f - h1lambda;
+            for (int w2 = 0; w2 < outWidth; ++w2)
+            {
+                const float w1r = rwidth * w2;
+                const int w1 = static_cast<int>(w1r);
+                const int w1p = (w1 < inpWidth - 1) ? 1 : 0;
+                const float w1lambda = w1r - w1;
+                const float w0lambda = 1.f - w1lambda;
+                const float* pos1 = inpData + h1 * inpWidth + w1;
+                float* pos2 = outData + h2 * outWidth + w2;
+                for (int c = 0; c < batchSize * numChannels; ++c)
+                {
+                    pos2[0] =
+                      h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
+                      h1lambda * (w0lambda * pos1[h1p * inpWidth] + w1lambda * pos1[h1p * inpWidth + w1p]);
+                    pos1 += inpWidth * inpHeight;
+                    pos2 += outWidth * outHeight;
+                }
+            }
+        }
+    }
+
+    virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}
+
+private:
+    int outWidth, outHeight;
+};
+//! [InterpLayer]
+
+//! [ResizeBilinearLayer]
+class ResizeBilinearLayer : public cv::dnn::Layer
+{
+public:
+    ResizeBilinearLayer(const cv::dnn::LayerParams &params) : Layer(params)
+    {
+        CV_Assert(!params.get<bool>("align_corners", false));
+        CV_Assert(blobs.size() == 1, blobs[0].type() == CV_32SC1);
+        outHeight = blobs[0].at<int>(0, 0);
+        outWidth = blobs[0].at<int>(0, 1);
+    }
+
+    static cv::Ptr<cv::dnn::Layer> create(cv::dnn::LayerParams& params)
+    {
+        return cv::Ptr<cv::dnn::Layer>(new ResizeBilinearLayer(params));
+    }
+
+    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
+                                 const int requiredOutputs,
+                                 std::vector<std::vector<int> > &outputs,
+                                 std::vector<std::vector<int> > &internals) const
+    {
+        CV_UNUSED(requiredOutputs); CV_UNUSED(internals);
+        std::vector<int> outShape(4);
+        outShape[0] = inputs[0][0];  // batch size
+        outShape[1] = inputs[0][1];  // number of channels
+        outShape[2] = outHeight;
+        outShape[3] = outWidth;
+        outputs.assign(1, outShape);
+        return false;
+    }
+
+    // This implementation is based on a reference implementation from
+    // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+    virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals)
+    {
+        CV_UNUSED(internals);
+        cv::Mat& inp = *inputs[0];
+        cv::Mat& out = outputs[0];
+        const float* inpData = (float*)inp.data;
+        float* outData = (float*)out.data;
+
+        const int batchSize = inp.size[0];
+        const int numChannels = inp.size[1];
+        const int inpHeight = inp.size[2];
+        const int inpWidth = inp.size[3];
+
+        float heightScale = static_cast<float>(inpHeight) / outHeight;
+        float widthScale = static_cast<float>(inpWidth) / outWidth;
+        for (int b = 0; b < batchSize; ++b)
+        {
+            for (int y = 0; y < outHeight; ++y)
+            {
+                float input_y = y * heightScale;
+                int y0 = static_cast<int>(std::floor(input_y));
+                int y1 = std::min(y0 + 1, inpHeight - 1);
+                for (int x = 0; x < outWidth; ++x)
+                {
+                    float input_x = x * widthScale;
+                    int x0 = static_cast<int>(std::floor(input_x));
+                    int x1 = std::min(x0 + 1, inpWidth - 1);
+                    for (int c = 0; c < numChannels; ++c)
+                    {
+                        float interpolation =
+                            inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) +
+                            inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) +
+                            inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) +
+                            inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0);
+                        outData[offset(out.size, c, x, y, b)] = interpolation;
+                    }
+                }
+            }
+        }
+    }
+
+    virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}
+
+private:
+    static inline int offset(const cv::MatSize& size, int c, int x, int y, int b)
+    {
+        return x + size[3] * (y + size[2] * (c + size[1] * b));
+    }
+
+    int outWidth, outHeight;
+};
+//! [ResizeBilinearLayer]
+
+//! [Register a custom layer]
+#include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS macro
+
+int main(int argc, char** argv)
+{
+    CV_DNN_REGISTER_LAYER_CLASS(MyType, MyLayer);
+    // ...
+    //! [Register a custom layer]
+    CV_UNUSED(argc); CV_UNUSED(argv);
+    //! [Register InterpLayer]
+    CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
+    cv::dnn::Net caffeNet = cv::dnn::readNet("/path/to/config.prototxt", "/path/to/weights.caffemodel");
+    //! [Register InterpLayer]
+
+    //! [Register ResizeBilinearLayer]
+    CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
+    cv::dnn::Net tfNet = cv::dnn::readNet("/path/to/graph.pb");
+    //! [Register ResizeBilinearLayer]
+}
+
+cv::Ptr<cv::dnn::Layer> MyLayer::create(cv::dnn::LayerParams& params)
+{
+    return cv::Ptr<cv::dnn::Layer>(new MyLayer(params));
+}
+MyLayer::MyLayer(const cv::dnn::LayerParams&) {}
+bool MyLayer::getMemoryShapes(const std::vector<std::vector<int> >&, const int,
+                              std::vector<std::vector<int> >&,
+                              std::vector<std::vector<int> >&) const { return false; }
+void MyLayer::forward(std::vector<cv::Mat*>&, std::vector<cv::Mat>&, std::vector<cv::Mat>&) {}
+void MyLayer::finalize(const std::vector<cv::Mat*>&, std::vector<cv::Mat>&) {}
+void MyLayer::forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}