From 4ec456f0a0993457120b916b36bde1d6df93e37b Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 24 Apr 2018 14:59:59 +0300 Subject: [PATCH] Custom layers for deep learning networks (#11129) * Custom deep learning layers support * Stack custom deep learning layers --- .../protobuf/src/google/protobuf/text_format.cc | 36 ++-- .../protobuf/src/google/protobuf/text_format.h | 2 +- .../dnn/dnn_custom_layers/dnn_custom_layers.md | 192 +++++++++++++++++ doc/tutorials/dnn/table_of_content_dnn.markdown | 8 + modules/dnn/include/opencv2/dnn/all_layers.hpp | 2 +- modules/dnn/include/opencv2/dnn/dict.hpp | 4 + modules/dnn/include/opencv2/dnn/dnn.inl.hpp | 22 +- modules/dnn/include/opencv2/dnn/layer.details.hpp | 16 +- modules/dnn/include/opencv2/dnn/layer.hpp | 4 +- modules/dnn/src/caffe/caffe_importer.cpp | 22 +- modules/dnn/src/caffe/caffe_io.cpp | 2 +- modules/dnn/src/dnn.cpp | 28 ++- modules/dnn/src/tensorflow/tf_importer.cpp | 40 +++- modules/dnn/src/torch/torch_importer.cpp | 16 +- modules/dnn/test/test_layers.cpp | 234 ++++++++++++--------- modules/dnn/test/test_misc.cpp | 62 ++++++ modules/dnn/test/test_tf_importer.cpp | 93 ++++++++ modules/dnn/test/test_torch_importer.cpp | 59 ++++++ samples/cpp/tutorial_code/dnn/custom_layers.cpp | 232 ++++++++++++++++++++ 19 files changed, 928 insertions(+), 146 deletions(-) create mode 100644 doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md create mode 100644 samples/cpp/tutorial_code/dnn/custom_layers.cpp diff --git a/3rdparty/protobuf/src/google/protobuf/text_format.cc b/3rdparty/protobuf/src/google/protobuf/text_format.cc index eed2a76..78f1acd 100644 --- a/3rdparty/protobuf/src/google/protobuf/text_format.cc +++ b/3rdparty/protobuf/src/google/protobuf/text_format.cc @@ -469,8 +469,9 @@ class TextFormat::Parser::ParserImpl { "\" has no field named \"" + field_name + "\"."); return false; } else { - ReportWarning("Message type \"" + descriptor->full_name() + - "\" has no field named \"" + field_name + "\"."); + // No warnings to let user define custom layers (see https://github.com/opencv/opencv/pull/11129) + // ReportWarning("Message type \"" + descriptor->full_name() + + // "\" has no field named \"" + field_name + "\"."); } } } @@ -485,10 +486,13 @@ class TextFormat::Parser::ParserImpl { // start with "{" or "<" which indicates the beginning of a message body. // If there is no ":" or there is a "{" or "<" after ":", this field has // to be a message or the input is ill-formed. + UnknownFieldSet* unknown_fields = reflection->MutableUnknownFields(message); if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) { - return SkipFieldValue(); + UnknownFieldSet* unknown_field = unknown_fields->AddGroup(unknown_fields->field_count()); + unknown_field->AddLengthDelimited(0, field_name); // Add a field's name. + return SkipFieldValue(unknown_field); } else { - return SkipFieldMessage(); + return SkipFieldMessage(unknown_fields); } } @@ -571,7 +575,7 @@ label_skip_parsing: } // Skips the next field including the field's name and value. - bool SkipField() { + bool SkipField(UnknownFieldSet* unknown_fields) { string field_name; if (TryConsume("[")) { // Extension name. @@ -588,9 +592,11 @@ label_skip_parsing: // If there is no ":" or there is a "{" or "<" after ":", this field has // to be a message or the input is ill-formed. if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) { - DO(SkipFieldValue()); + UnknownFieldSet* unknown_field = unknown_fields->AddGroup(unknown_fields->field_count()); + unknown_field->AddLengthDelimited(0, field_name); // Add a field's name. + DO(SkipFieldValue(unknown_field)); } else { - DO(SkipFieldMessage()); + DO(SkipFieldMessage(unknown_fields)); } // For historical reasons, fields may optionally be separated by commas or // semicolons. @@ -625,11 +631,11 @@ label_skip_parsing: // Skips the whole body of a message including the beginning delimiter and // the ending delimiter. - bool SkipFieldMessage() { + bool SkipFieldMessage(UnknownFieldSet* unknown_fields) { string delimiter; DO(ConsumeMessageDelimiter(&delimiter)); while (!LookingAt(">") && !LookingAt("}")) { - DO(SkipField()); + DO(SkipField(unknown_fields)); } DO(Consume(delimiter)); return true; @@ -769,7 +775,7 @@ label_skip_parsing: return true; } - bool SkipFieldValue() { + bool SkipFieldValue(UnknownFieldSet* unknown_field) { if (LookingAtType(io::Tokenizer::TYPE_STRING)) { while (LookingAtType(io::Tokenizer::TYPE_STRING)) { tokenizer_.Next(); @@ -779,9 +785,9 @@ label_skip_parsing: if (TryConsume("[")) { while (true) { if (!LookingAt("{") && !LookingAt("<")) { - DO(SkipFieldValue()); + DO(SkipFieldValue(unknown_field)); } else { - DO(SkipFieldMessage()); + DO(SkipFieldMessage(unknown_field)); } if (TryConsume("]")) { break; @@ -833,6 +839,8 @@ label_skip_parsing: return false; } } + // Use a tag 1 because tag 0 is used for field's name. + unknown_field->AddLengthDelimited(1, tokenizer_.current().text); tokenizer_.Next(); return true; } @@ -1298,13 +1306,13 @@ class TextFormat::Printer::TextGenerator TextFormat::Finder::~Finder() { } -TextFormat::Parser::Parser() +TextFormat::Parser::Parser(bool allow_unknown_field) : error_collector_(NULL), finder_(NULL), parse_info_tree_(NULL), allow_partial_(false), allow_case_insensitive_field_(false), - allow_unknown_field_(false), + allow_unknown_field_(allow_unknown_field), allow_unknown_enum_(false), allow_field_number_(false), allow_relaxed_whitespace_(false), diff --git a/3rdparty/protobuf/src/google/protobuf/text_format.h b/3rdparty/protobuf/src/google/protobuf/text_format.h index a2670d6..74d89a5 100644 --- a/3rdparty/protobuf/src/google/protobuf/text_format.h +++ b/3rdparty/protobuf/src/google/protobuf/text_format.h @@ -457,7 +457,7 @@ class LIBPROTOBUF_EXPORT TextFormat { // For more control over parsing, use this class. class LIBPROTOBUF_EXPORT Parser { public: - Parser(); + Parser(bool allow_unknown_field = false); ~Parser(); // Like TextFormat::Parse(). diff --git a/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md new file mode 100644 index 0000000..f0e4197 --- /dev/null +++ b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md @@ -0,0 +1,192 @@ +# Custom deep learning layers support {#tutorial_dnn_custom_layers} + +## Introduction +Deep learning is a fast growing area. The new approaches to build neural networks +usually introduce new types of layers. They could be modifications of existing +ones or implement outstanding researching ideas. + +OpenCV gives an opportunity to import and run networks from different deep learning +frameworks. There are a number of the most popular layers. However you can face +a problem that your network cannot be imported using OpenCV because of unimplemented layers. + +The first solution is to create a feature request at https://github.com/opencv/opencv/issues +mentioning details such a source of model and type of new layer. A new layer could +be implemented if OpenCV community shares this need. + +The second way is to define a **custom layer** so OpenCV's deep learning engine +will know how to use it. This tutorial is dedicated to show you a process of deep +learning models import customization. + +## Define a custom layer in C++ +Deep learning layer is a building block of network's pipeline. +It has connections to **input blobs** and produces results to **output blobs**. +There are trained **weights** and **hyper-parameters**. +Layers' names, types, weights and hyper-parameters are stored in files are generated by +native frameworks during training. If OpenCV mets unknown layer type it throws an +exception trying to read a model: + +``` +Unspecified error: Can't create layer "layer_name" of type "MyType" in function getLayerInstance +``` + +To import the model correctly you have to derive a class from cv::dnn::Layer with +the following methods: + +@snippet dnn/custom_layers.cpp A custom layer interface + +And register it before the import: + +@snippet dnn/custom_layers.cpp Register a custom layer + +@note `MyType` is a type of unimplemented layer from the thrown exception. + +Let's see what all the methods do: + +- Constructor + +@snippet dnn/custom_layers.cpp MyLayer::MyLayer + +Retrieves hyper-parameters from cv::dnn::LayerParams. If your layer has trainable +weights they will be already stored in the Layer's member cv::dnn::Layer::blobs. + +- A static method `create` + +@snippet dnn/custom_layers.cpp MyLayer::create + +This method should create an instance of you layer and return cv::Ptr with it. + +- Output blobs' shape computation + +@snippet dnn/custom_layers.cpp MyLayer::getMemoryShapes + +Returns layer's output shapes depends on input shapes. You may request an extra +memory using `internals`. + +- Run a layer + +@snippet dnn/custom_layers.cpp MyLayer::forward + +Implement a layer's logic here. Compute outputs for given inputs. + +@note OpenCV manages memory allocated for layers. In the most cases the same memory +can be reused between layers. So your `forward` implementation should not rely that +the second invocation of `forward` will has the same data at `outputs` and `internals`. + +- Optional `finalize` method + +@snippet dnn/custom_layers.cpp MyLayer::finalize + +The chain of methods are the following: OpenCV deep learning engine calls `create` +method once then it calls `getMemoryShapes` for an every created layer then you +can make some preparations depends on known input dimensions at cv::dnn::Layer::finalize. +After network was initialized only `forward` method is called for an every network's input. + +@note Varying input blobs' sizes such height or width or batch size you make OpenCV +reallocate all the internal memory. That leads efficiency gaps. Try to initialize +and deploy models using a fixed batch size and image's dimensions. + +## Example: custom layer from Caffe +Let's create a custom layer `Interp` from https://github.com/cdmh/deeplab-public. +It's just a simple resize that takes an input blob of size `N x C x Hi x Wi` and returns +an output blob of size `N x C x Ho x Wo` where `N` is a batch size, `C` is a number of channels, +`Hi x Wi` and `Ho x Wo` are input and output `height x width` correspondingly. +This layer has no trainable weights but it has hyper-parameters to specify an output size. + +In example, +~~~~~~~~~~~~~ +layer { + name: "output" + type: "Interp" + bottom: "input" + top: "output" + interp_param { + height: 9 + width: 8 + } +} +~~~~~~~~~~~~~ + +This way our implementation can look like: + +@snippet dnn/custom_layers.cpp InterpLayer + +Next we need to register a new layer type and try to import the model. + +@snippet dnn/custom_layers.cpp Register InterpLayer + +## Example: custom layer from TensorFlow +This is an example of how to import a network with [tf.image.resize_bilinear](https://www.tensorflow.org/versions/master/api_docs/python/tf/image/resize_bilinear) +operation. This is also a resize but with an implementation different from OpenCV's or `Interp` above. + +Let's create a single layer network: +~~~~~~~~~~~~~{.py} +inp = tf.placeholder(tf.float32, [2, 3, 4, 5], 'input') +resized = tf.image.resize_bilinear(inp, size=[9, 8], name='resize_bilinear') +~~~~~~~~~~~~~ +OpenCV sees that TensorFlow's graph in the following way: + +``` +node { + name: "input" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "resize_bilinear/size" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\t\000\000\000\010\000\000\000" + } + } + } +} +node { + name: "resize_bilinear" + op: "ResizeBilinear" + input: "input:0" + input: "resize_bilinear/size" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "align_corners" + value { + b: false + } + } +} +library { +} +``` +Custom layers import from TensorFlow is designed to put all layer's `attr` into +cv::dnn::LayerParams but input `Const` blobs into cv::dnn::Layer::blobs. +In our case resize's output shape will be stored in layer's `blobs[0]`. + +@snippet dnn/custom_layers.cpp ResizeBilinearLayer + +Next we register a layer and try to import the model. + +@snippet dnn/custom_layers.cpp Register ResizeBilinearLayer diff --git a/doc/tutorials/dnn/table_of_content_dnn.markdown b/doc/tutorials/dnn/table_of_content_dnn.markdown index 94b818c..9a52f10 100644 --- a/doc/tutorials/dnn/table_of_content_dnn.markdown +++ b/doc/tutorials/dnn/table_of_content_dnn.markdown @@ -48,3 +48,11 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn} *Author:* Dmitry Kurtaev In this tutorial we'll run deep learning models in browser using OpenCV.js. + +- @subpage tutorial_dnn_custom_layers + + *Compatibility:* \> OpenCV 3.4.1 + + *Author:* Dmitry Kurtaev + + How to define custom layers to import networks. diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 9053842..f4e93b7 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -555,7 +555,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN * An every sample in the batch is normalized separately. Optionally, * output is scaled by the trained parameters. */ - class NormalizeBBoxLayer : public Layer + class CV_EXPORTS NormalizeBBoxLayer : public Layer { public: float pnorm, epsilon; diff --git a/modules/dnn/include/opencv2/dnn/dict.hpp b/modules/dnn/include/opencv2/dnn/dict.hpp index 43cb58a..69287dc 100644 --- a/modules/dnn/include/opencv2/dnn/dict.hpp +++ b/modules/dnn/include/opencv2/dnn/dict.hpp @@ -142,6 +142,10 @@ public: const T &set(const String &key, const T &value); friend std::ostream &operator<<(std::ostream &stream, const Dict &dict); + + std::map::const_iterator begin() const; + + std::map::const_iterator end() const; }; //! @} diff --git a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp index c30185b..be2e1c4 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp @@ -102,9 +102,13 @@ inline int64 DictValue::get(int idx) const return (int64)doubleValue; } + else if (type == Param::STRING) + { + return std::atoi((*ps)[idx].c_str()); + } else { - CV_Assert(isInt() || isReal()); + CV_Assert(isInt() || isReal() || isString()); return 0; } } @@ -146,9 +150,13 @@ inline double DictValue::get(int idx) const { return (double)(*pi)[idx]; } + else if (type == Param::STRING) + { + return std::atof((*ps)[idx].c_str()); + } else { - CV_Assert(isReal() || isInt()); + CV_Assert(isReal() || isInt() || isString()); return 0; } } @@ -366,6 +374,16 @@ inline std::ostream &operator<<(std::ostream &stream, const Dict &dict) return stream; } +inline std::map::const_iterator Dict::begin() const +{ + return dict.begin(); +} + +inline std::map::const_iterator Dict::end() const +{ + return dict.end(); +} + CV__DNN_EXPERIMENTAL_NS_END } } diff --git a/modules/dnn/include/opencv2/dnn/layer.details.hpp b/modules/dnn/include/opencv2/dnn/layer.details.hpp index 82bd3b1..619514e 100644 --- a/modules/dnn/include/opencv2/dnn/layer.details.hpp +++ b/modules/dnn/include/opencv2/dnn/layer.details.hpp @@ -13,11 +13,11 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN /** @brief Registers layer constructor in runtime. * @param type string, containing type name of the layer. -* @param constuctorFunc pointer to the function of type LayerRegister::Constuctor, which creates the layer. +* @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer. * @details This macros must be placed inside the function code. */ -#define CV_DNN_REGISTER_LAYER_FUNC(type, constuctorFunc) \ - cv::dnn::LayerFactory::registerLayer(#type, constuctorFunc); +#define CV_DNN_REGISTER_LAYER_FUNC(type, constructorFunc) \ + cv::dnn::LayerFactory::registerLayer(#type, constructorFunc); /** @brief Registers layer class in runtime. * @param type string, containing type name of the layer. @@ -29,11 +29,11 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN /** @brief Registers layer constructor on module load time. * @param type string, containing type name of the layer. -* @param constuctorFunc pointer to the function of type LayerRegister::Constuctor, which creates the layer. +* @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer. * @details This macros must be placed outside the function code. */ -#define CV_DNN_REGISTER_LAYER_FUNC_STATIC(type, constuctorFunc) \ -static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc); +#define CV_DNN_REGISTER_LAYER_FUNC_STATIC(type, constructorFunc) \ +static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constructorFunc); /** @brief Registers layer class on module load time. * @param type string, containing type name of the layer. @@ -59,10 +59,10 @@ class _LayerStaticRegisterer String type; public: - _LayerStaticRegisterer(const String &layerType, LayerFactory::Constuctor layerConstuctor) + _LayerStaticRegisterer(const String &layerType, LayerFactory::Constructor layerConstructor) { this->type = layerType; - LayerFactory::registerLayer(layerType, layerConstuctor); + LayerFactory::registerLayer(layerType, layerConstructor); } ~_LayerStaticRegisterer() diff --git a/modules/dnn/include/opencv2/dnn/layer.hpp b/modules/dnn/include/opencv2/dnn/layer.hpp index 3fb81f3..c4712b8 100644 --- a/modules/dnn/include/opencv2/dnn/layer.hpp +++ b/modules/dnn/include/opencv2/dnn/layer.hpp @@ -58,10 +58,10 @@ class CV_EXPORTS LayerFactory public: //! Each Layer class must provide this function to the factory - typedef Ptr(*Constuctor)(LayerParams ¶ms); + typedef Ptr(*Constructor)(LayerParams ¶ms); //! Registers the layer class with typename @p type and specified @p constructor. Thread-safe. - static void registerLayer(const String &type, Constuctor constructor); + static void registerLayer(const String &type, Constructor constructor); //! Unregisters registered layer with specified type name. Thread-safe. static void unregisterLayer(const String &type); diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp index c13d3a5..1cc6d28 100644 --- a/modules/dnn/src/caffe/caffe_importer.cpp +++ b/modules/dnn/src/caffe/caffe_importer.cpp @@ -103,6 +103,19 @@ public: ReadNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBinary); } + void extractCustomParams(const google::protobuf::UnknownFieldSet& unknownFields, cv::dnn::LayerParams ¶ms) + { + const int numFields = unknownFields.field_count(); + for (int i = 0; i < numFields; ++i) + { + const google::protobuf::UnknownField& field = unknownFields.field(i); + CV_Assert(field.type() == google::protobuf::UnknownField::TYPE_GROUP); + std::string fieldName = field.group().field(0).length_delimited(); + std::string fieldValue = field.group().field(1).length_delimited(); + params.set(fieldName, fieldValue); + } + } + void addParam(const Message &msg, const FieldDescriptor *field, cv::dnn::LayerParams ¶ms) { const Reflection *refl = msg.GetReflection(); @@ -187,12 +200,15 @@ public: if (!isInternal && !ends_with_param(fd->name())) continue; + const google::protobuf::UnknownFieldSet& unknownFields = msgRefl->GetUnknownFields(msg); bool hasData = fd->is_required() || (fd->is_optional() && msgRefl->HasField(msg, fd)) || - (fd->is_repeated() && msgRefl->FieldSize(msg, fd) > 0); + (fd->is_repeated() && msgRefl->FieldSize(msg, fd) > 0) || + !unknownFields.empty(); if (!hasData) continue; + extractCustomParams(unknownFields, params); if (fd->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { if (fd->is_repeated()) //Extract only first item! @@ -258,7 +274,7 @@ public: } } - void extractBinaryLayerParms(const caffe::LayerParameter& layer, LayerParams& layerParams) + void extractBinaryLayerParams(const caffe::LayerParameter& layer, LayerParams& layerParams) { const std::string &name = layer.name(); @@ -319,7 +335,7 @@ public: LayerParams layerParams; extractLayerParams(layer, layerParams); - extractBinaryLayerParms(layer, layerParams); + extractBinaryLayerParams(layer, layerParams); int repetitions = layerCounter[name]++; if (repetitions) diff --git a/modules/dnn/src/caffe/caffe_io.cpp b/modules/dnn/src/caffe/caffe_io.cpp index f6b4eb3..7fc5fea 100644 --- a/modules/dnn/src/caffe/caffe_io.cpp +++ b/modules/dnn/src/caffe/caffe_io.cpp @@ -1120,7 +1120,7 @@ bool ReadProtoFromTextFile(const char* filename, Message* proto) { std::ifstream fs(filename, std::ifstream::in); CHECK(fs.is_open()) << "Can't open \"" << filename << "\""; IstreamInputStream input(&fs); - return google::protobuf::TextFormat::Parse(&input, proto); + return google::protobuf::TextFormat::Parser(true).Parse(&input, proto); } bool ReadProtoFromBinaryFile(const char* filename, Message* proto) { diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 86da4c0..edeca8e 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2790,7 +2790,7 @@ static Mutex& getLayerFactoryMutex() return *instance; } -typedef std::map LayerFactory_Impl; +typedef std::map > LayerFactory_Impl; static LayerFactory_Impl& getLayerFactoryImpl_() { @@ -2813,21 +2813,22 @@ static LayerFactory_Impl& getLayerFactoryImpl() return *instance; } -void LayerFactory::registerLayer(const String &type, Constuctor constructor) +void LayerFactory::registerLayer(const String &type, Constructor constructor) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); - LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_); + LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_); - if (it != getLayerFactoryImpl().end() && it->second != constructor) + if (it != getLayerFactoryImpl().end()) { - CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered"); + if (it->second.back() == constructor) + CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered"); + it->second.push_back(constructor); } - - getLayerFactoryImpl().insert(std::make_pair(type_, constructor)); + getLayerFactoryImpl().insert(std::make_pair(type_, std::vector(1, constructor))); } void LayerFactory::unregisterLayer(const String &type) @@ -2837,7 +2838,15 @@ void LayerFactory::unregisterLayer(const String &type) cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); - getLayerFactoryImpl().erase(type_); + + LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_); + if (it != getLayerFactoryImpl().end()) + { + if (it->second.size() > 1) + it->second.pop_back(); + else + getLayerFactoryImpl().erase(it); + } } Ptr LayerFactory::createLayerInstance(const String &type, LayerParams& params) @@ -2851,7 +2860,8 @@ Ptr LayerFactory::createLayerInstance(const String &type, LayerParams& pa if (it != getLayerFactoryImpl().end()) { - return it->second(params); + CV_Assert(!it->second.empty()); + return it->second.back()(params); } else { diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index a401f71..ea5d1e7 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1564,8 +1564,44 @@ void TFImporter::populateNet(Net dstNet) } else { - printLayerAttr(layer); - CV_Error_(Error::StsError, ("Unknown layer type %s in op %s", type.c_str(), name.c_str())); + // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer. + // However we create a layer with the same type and rely that user defined a custom layer. + + // All the attributes are added to LayerParams. + google::protobuf::Map attr = layer.attr(); + for (google::protobuf::Map::const_iterator ai = attr.begin(); + ai != attr.end(); ++ai) + { + if (ai->second.value_case() == tensorflow::AttrValue::kS) // string + layerParams.set(ai->first, ai->second.s()); + if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64 + layerParams.set(ai->first, ai->second.i()); + if (ai->second.value_case() == tensorflow::AttrValue::kF) // float + layerParams.set(ai->first, ai->second.f()); + if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool + layerParams.set(ai->first, ai->second.b()); + } + + // All the Const input nodes are added to layer's blobs. + std::vector inputsNames; + for (int i = 0; i < layer.input_size(); ++i) + { + // Check if input is a Const node. + if (value_id.find(layer.input(i)) != value_id.end()) + { + Mat blob = getTensorContent(getConstBlob(layer, value_id, i)); + layerParams.blobs.push_back(blob); + } + else + inputsNames.push_back(layer.input(i)); + } + int id = dstNet.addLayer(name, type, layerParams); + layer_id[name] = id; + + for (int i = 0; i < inputsNames.size(); ++i) + { + connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i); + } } } } diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 83e4a48..813ee08 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -940,7 +940,21 @@ struct TorchImporter } else { - CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\""); + // Importer does not know how to map Torch's layer type to an OpenCV's one. + // However we parse all the parameters to let user create a custom layer. + readTorchTable(scalarParams, tensorParams); + for (std::map::const_iterator it = scalarParams.begin(); + it != scalarParams.end(); ++it) + { + layerParams.set(it->first, it->second); + } + for (std::map >::iterator it = tensorParams.begin(); + it != tensorParams.end(); ++it) + { + layerParams.blobs.push_back(it->second.second); + } + newModule->apiType = nnName; + curModule->modules.push_back(newModule); } } else diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 413e527..89c6ed8 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -44,7 +44,7 @@ #include "npy_blob.hpp" #include #include -#include +#include // CV_DNN_REGISTER_LAYER_CLASS namespace opencv_test { namespace { @@ -117,94 +117,50 @@ void testLayerUsingCaffeModels(String basename, int targetId = DNN_TARGET_CPU, normAssert(ref, out); } -TEST(Layer_Test_Softmax, Accuracy) -{ - testLayerUsingCaffeModels("layer_softmax"); -} - -OCL_TEST(Layer_Test_Softmax, Accuracy) -{ - testLayerUsingCaffeModels("layer_softmax", DNN_TARGET_OPENCL); -} - -TEST(Layer_Test_LRN_spatial, Accuracy) -{ - testLayerUsingCaffeModels("layer_lrn_spatial"); -} - -OCL_TEST(Layer_Test_LRN_spatial, Accuracy) -{ - testLayerUsingCaffeModels("layer_lrn_spatial", DNN_TARGET_OPENCL); -} - -TEST(Layer_Test_LRN_channels, Accuracy) -{ - testLayerUsingCaffeModels("layer_lrn_channels"); -} - -OCL_TEST(Layer_Test_LRN_channels, Accuracy) -{ - testLayerUsingCaffeModels("layer_lrn_channels", DNN_TARGET_OPENCL); -} - -TEST(Layer_Test_Convolution, Accuracy) -{ - testLayerUsingCaffeModels("layer_convolution", DNN_TARGET_CPU, true); -} - -OCL_TEST(Layer_Test_Convolution, Accuracy) -{ - testLayerUsingCaffeModels("layer_convolution", DNN_TARGET_OPENCL, true); -} - -TEST(Layer_Test_DeConvolution, Accuracy) -{ - testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_CPU, true, false); -} - -OCL_TEST(Layer_Test_DeConvolution, Accuracy) +typedef testing::TestWithParam Test_Caffe_layers; +TEST_P(Test_Caffe_layers, Softmax) { - testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_OPENCL, true, false); + testLayerUsingCaffeModels("layer_softmax", GetParam()); } -TEST(Layer_Test_InnerProduct, Accuracy) +TEST_P(Test_Caffe_layers, LRN_spatial) { - testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_CPU, true); + testLayerUsingCaffeModels("layer_lrn_spatial", GetParam()); } -OCL_TEST(Layer_Test_InnerProduct, Accuracy) +TEST_P(Test_Caffe_layers, LRN_channels) { - testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_OPENCL, true); + testLayerUsingCaffeModels("layer_lrn_channels", GetParam()); } -TEST(Layer_Test_Pooling_max, Accuracy) +TEST_P(Test_Caffe_layers, Convolution) { - testLayerUsingCaffeModels("layer_pooling_max"); + testLayerUsingCaffeModels("layer_convolution", GetParam(), true); } -OCL_TEST(Layer_Test_Pooling_max, Accuracy) +TEST_P(Test_Caffe_layers, DeConvolution) { - testLayerUsingCaffeModels("layer_pooling_max", DNN_TARGET_OPENCL); + testLayerUsingCaffeModels("layer_deconvolution", GetParam(), true, false); } -TEST(Layer_Test_Pooling_ave, Accuracy) +TEST_P(Test_Caffe_layers, InnerProduct) { - testLayerUsingCaffeModels("layer_pooling_ave"); + testLayerUsingCaffeModels("layer_inner_product", GetParam(), true); } -OCL_TEST(Layer_Test_Pooling_ave, Accuracy) +TEST_P(Test_Caffe_layers, Pooling_max) { - testLayerUsingCaffeModels("layer_pooling_ave", DNN_TARGET_OPENCL); + testLayerUsingCaffeModels("layer_pooling_max", GetParam()); } -TEST(Layer_Test_MVN, Accuracy) +TEST_P(Test_Caffe_layers, Pooling_ave) { - testLayerUsingCaffeModels("layer_mvn"); + testLayerUsingCaffeModels("layer_pooling_ave", GetParam()); } -OCL_TEST(Layer_Test_MVN, Accuracy) +TEST_P(Test_Caffe_layers, MVN) { - testLayerUsingCaffeModels("layer_mvn", DNN_TARGET_OPENCL); + testLayerUsingCaffeModels("layer_mvn", GetParam()); } void testReshape(const MatShape& inputShape, const MatShape& targetShape, @@ -257,14 +213,9 @@ TEST(Layer_Test_BatchNorm, local_stats) testLayerUsingCaffeModels("layer_batch_norm_local_stats", DNN_TARGET_CPU, true, false); } -TEST(Layer_Test_ReLU, Accuracy) -{ - testLayerUsingCaffeModels("layer_relu"); -} - -OCL_TEST(Layer_Test_ReLU, Accuracy) +TEST_P(Test_Caffe_layers, ReLU) { - testLayerUsingCaffeModels("layer_relu", DNN_TARGET_OPENCL); + testLayerUsingCaffeModels("layer_relu", GetParam()); } TEST(Layer_Test_Dropout, Accuracy) @@ -272,14 +223,9 @@ TEST(Layer_Test_Dropout, Accuracy) testLayerUsingCaffeModels("layer_dropout"); } -TEST(Layer_Test_Concat, Accuracy) -{ - testLayerUsingCaffeModels("layer_concat"); -} - -OCL_TEST(Layer_Test_Concat, Accuracy) +TEST_P(Test_Caffe_layers, Concat) { - testLayerUsingCaffeModels("layer_concat", DNN_TARGET_OPENCL); + testLayerUsingCaffeModels("layer_concat", GetParam()); } TEST(Layer_Test_Fused_Concat, Accuracy) @@ -325,26 +271,16 @@ TEST(Layer_Test_Fused_Concat, Accuracy) testLayerUsingCaffeModels("layer_concat_shared_input", DNN_TARGET_CPU, true, false); } -TEST(Layer_Test_Eltwise, Accuracy) +TEST_P(Test_Caffe_layers, Eltwise) { - testLayerUsingCaffeModels("layer_eltwise"); + testLayerUsingCaffeModels("layer_eltwise", GetParam()); } -OCL_TEST(Layer_Test_Eltwise, Accuracy) +TEST_P(Test_Caffe_layers, PReLU) { - testLayerUsingCaffeModels("layer_eltwise", DNN_TARGET_OPENCL); -} - -TEST(Layer_Test_PReLU, Accuracy) -{ - testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_CPU, true); - testLayerUsingCaffeModels("layer_prelu_fc", DNN_TARGET_CPU, true, false); -} - -OCL_TEST(Layer_Test_PReLU, Accuracy) -{ - testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_OPENCL, true); - testLayerUsingCaffeModels("layer_prelu_fc", DNN_TARGET_OPENCL, true, false); + int targetId = GetParam(); + testLayerUsingCaffeModels("layer_prelu", targetId, true); + testLayerUsingCaffeModels("layer_prelu_fc", targetId, true, false); } //template @@ -385,14 +321,9 @@ static void test_Reshape_Split_Slice_layers(int targetId) normAssert(input, output); } -TEST(Layer_Test_Reshape_Split_Slice, Accuracy) -{ - test_Reshape_Split_Slice_layers(DNN_TARGET_CPU); -} - -OCL_TEST(Layer_Test_Reshape_Split_Slice, Accuracy) +TEST_P(Test_Caffe_layers, Reshape_Split_Slice) { - test_Reshape_Split_Slice_layers(DNN_TARGET_OPENCL); + test_Reshape_Split_Slice_layers(GetParam()); } TEST(Layer_Conv_Elu, Accuracy) @@ -602,7 +533,6 @@ TEST(Layer_Test_ROIPooling, Accuracy) normAssert(out, ref); } -typedef testing::TestWithParam Test_Caffe_layers; TEST_P(Test_Caffe_layers, FasterRCNN_Proposal) { Net net = readNetFromCaffe(_tf("net_faster_rcnn_proposal.prototxt")); @@ -906,4 +836,104 @@ TEST(Test_DLDT, two_inputs) } #endif // HAVE_INF_ENGINE +// Test a custom layer. +class InterpLayer CV_FINAL : public Layer +{ +public: + InterpLayer(const LayerParams ¶ms) : Layer(params) + { + zoomFactor = params.get("zoom_factor", 0); + outWidth = params.get("width", 0); + outHeight = params.get("height", 0); + } + + static Ptr create(LayerParams& params) + { + return Ptr(new InterpLayer(params)); + } + + virtual bool getMemoryShapes(const std::vector > &inputs, + const int requiredOutputs, + std::vector > &outputs, + std::vector > &internals) const CV_OVERRIDE + { + const int batchSize = inputs[0][0]; + const int numChannels = inputs[0][1]; + const int inpHeight = inputs[0][2]; + const int inpWidth = inputs[0][3]; + + std::vector outShape(4); + outShape[0] = batchSize; + outShape[1] = numChannels; + outShape[2] = outHeight != 0 ? outHeight : (inpHeight + (inpHeight - 1) * (zoomFactor - 1)); + outShape[3] = outWidth != 0 ? outWidth : (inpWidth + (inpWidth - 1) * (zoomFactor - 1)); + outputs.assign(1, outShape); + return false; + } + + virtual void finalize(const std::vector& inputs, std::vector &outputs) CV_OVERRIDE + { + if (!outWidth && !outHeight) + { + outHeight = outputs[0].size[2]; + outWidth = outputs[0].size[3]; + } + } + + // Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp + virtual void forward(std::vector &inputs, std::vector &outputs, std::vector& internals) CV_OVERRIDE + { + Mat& inp = *inputs[0]; + Mat& out = outputs[0]; + const float* inpData = (float*)inp.data; + float* outData = (float*)out.data; + + const int batchSize = inp.size[0]; + const int numChannels = inp.size[1]; + const int inpHeight = inp.size[2]; + const int inpWidth = inp.size[3]; + + const float rheight = (outHeight > 1) ? static_cast(inpHeight - 1) / (outHeight - 1) : 0.f; + const float rwidth = (outWidth > 1) ? static_cast(inpWidth - 1) / (outWidth - 1) : 0.f; + for (int h2 = 0; h2 < outHeight; ++h2) + { + const float h1r = rheight * h2; + const int h1 = h1r; + const int h1p = (h1 < inpHeight - 1) ? 1 : 0; + const float h1lambda = h1r - h1; + const float h0lambda = 1.f - h1lambda; + for (int w2 = 0; w2 < outWidth; ++w2) + { + const float w1r = rwidth * w2; + const int w1 = w1r; + const int w1p = (w1 < inpWidth - 1) ? 1 : 0; + const float w1lambda = w1r - w1; + const float w0lambda = 1.f - w1lambda; + const float* pos1 = inpData + h1 * inpWidth + w1; + float* pos2 = outData + h2 * outWidth + w2; + for (int c = 0; c < batchSize * numChannels; ++c) + { + pos2[0] = + h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) + + h1lambda * (w0lambda * pos1[h1p * inpWidth] + w1lambda * pos1[h1p * inpWidth + w1p]); + pos1 += inpWidth * inpHeight; + pos2 += outWidth * outHeight; + } + } + } + } + + virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {} + +private: + int outWidth, outHeight, zoomFactor; +}; + +TEST(Layer_Test_Interp, Accuracy) +{ + CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer); + testLayerUsingCaffeModels("layer_interp", DNN_TARGET_CPU, false, false); + LayerFactory::unregisterLayer("Interp"); +} + }} // namespace diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 2e92504..b2fb71b 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -7,6 +7,8 @@ #include "test_precomp.hpp" +#include // CV_DNN_REGISTER_LAYER_CLASS + namespace opencv_test { namespace { TEST(blobFromImage_4ch, Regression) @@ -75,4 +77,64 @@ TEST(readNet, Regression) EXPECT_FALSE(net.empty()); } +class FirstCustomLayer CV_FINAL : public Layer +{ +public: + FirstCustomLayer(const LayerParams ¶ms) : Layer(params) {} + + static Ptr create(LayerParams& params) + { + return Ptr(new FirstCustomLayer(params)); + } + + virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {} + virtual void forward(std::vector &inputs, std::vector &outputs, std::vector& internals) CV_OVERRIDE + { + outputs[0].setTo(1); + } +}; + +class SecondCustomLayer CV_FINAL : public Layer +{ +public: + SecondCustomLayer(const LayerParams ¶ms) : Layer(params) {} + + static Ptr create(LayerParams& params) + { + return Ptr(new SecondCustomLayer(params)); + } + + virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {} + virtual void forward(std::vector &inputs, std::vector &outputs, std::vector& internals) CV_OVERRIDE + { + outputs[0].setTo(2); + } +}; + +TEST(LayerFactory, custom_layers) +{ + LayerParams lp; + lp.name = "name"; + lp.type = "CustomType"; + + Mat inp(1, 1, CV_32FC1); + for (int i = 0; i < 3; ++i) + { + if (i == 0) { CV_DNN_REGISTER_LAYER_CLASS(CustomType, FirstCustomLayer); } + else if (i == 1) { CV_DNN_REGISTER_LAYER_CLASS(CustomType, SecondCustomLayer); } + else if (i == 2) { LayerFactory::unregisterLayer("CustomType"); } + + Net net; + net.addLayerToPrev(lp.name, lp.type, lp); + + net.setInput(inp); + Mat output = net.forward(); + + if (i == 0) EXPECT_EQ(output.at(0), 1); + else if (i == 1) EXPECT_EQ(output.at(0), 2); + else if (i == 2) EXPECT_EQ(output.at(0), 1); + } + LayerFactory::unregisterLayer("CustomType"); +} + }} // namespace diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index b7f3c5c..397aadf 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -12,6 +12,8 @@ Test for Tensorflow models loading #include "test_precomp.hpp" #include "npy_blob.hpp" +#include // CV_DNN_REGISTER_LAYER_CLASS + namespace opencv_test { @@ -364,4 +366,95 @@ TEST(Test_TensorFlow, memory_read) runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true); } +// Test a custom layer. +class ResizeBilinearLayer CV_FINAL : public Layer +{ +public: + ResizeBilinearLayer(const LayerParams ¶ms) : Layer(params) + { + CV_Assert(!params.get("align_corners", false)); + CV_Assert(blobs.size() == 1, blobs[0].type() == CV_32SC1); + outHeight = blobs[0].at(0, 0); + outWidth = blobs[0].at(0, 1); + } + + static Ptr create(LayerParams& params) + { + return Ptr(new ResizeBilinearLayer(params)); + } + + virtual bool getMemoryShapes(const std::vector > &inputs, + const int requiredOutputs, + std::vector > &outputs, + std::vector > &internals) const CV_OVERRIDE + { + std::vector outShape(4); + outShape[0] = inputs[0][0]; // batch size + outShape[1] = inputs[0][1]; // number of channels + outShape[2] = outHeight; + outShape[3] = outWidth; + outputs.assign(1, outShape); + return false; + } + + // This implementation is based on a reference implementation from + // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h + virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) CV_OVERRIDE + { + Mat& inp = *inputs[0]; + Mat& out = outputs[0]; + const float* inpData = (float*)inp.data; + float* outData = (float*)out.data; + + const int batchSize = inp.size[0]; + const int numChannels = inp.size[1]; + const int inpHeight = inp.size[2]; + const int inpWidth = inp.size[3]; + + float heightScale = static_cast(inpHeight) / outHeight; + float widthScale = static_cast(inpWidth) / outWidth; + for (int b = 0; b < batchSize; ++b) + { + for (int y = 0; y < outHeight; ++y) + { + float input_y = y * heightScale; + int y0 = static_cast(std::floor(input_y)); + int y1 = std::min(y0 + 1, inpHeight - 1); + for (int x = 0; x < outWidth; ++x) + { + float input_x = x * widthScale; + int x0 = static_cast(std::floor(input_x)); + int x1 = std::min(x0 + 1, inpWidth - 1); + for (int c = 0; c < numChannels; ++c) + { + float interpolation = + inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) + + inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) + + inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) + + inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0); + outData[offset(out.size, c, x, y, b)] = interpolation; + } + } + } + } + } + + virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {} + +private: + static inline int offset(const MatSize& size, int c, int x, int y, int b) + { + return x + size[3] * (y + size[2] * (c + size[1] * b)); + } + + int outWidth, outHeight; +}; + +TEST(Test_TensorFlow, resize_bilinear) +{ + CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer); + runTensorFlowNet("resize_bilinear"); + LayerFactory::unregisterLayer("ResizeBilinear"); +} + } diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index f748758..33e0e94 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -42,6 +42,7 @@ #include "test_precomp.hpp" #include "npy_blob.hpp" #include +#include // CV_DNN_REGISTER_LAYER_CLASS namespace opencv_test { @@ -325,4 +326,62 @@ TEST(Torch_Importer, net_residual) runTorchNet("net_residual", DNN_TARGET_CPU, "", false, true); } +// Test a custom layer +// https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest +class SpatialUpSamplingNearestLayer CV_FINAL : public Layer +{ +public: + SpatialUpSamplingNearestLayer(const LayerParams ¶ms) : Layer(params) + { + scale = params.get("scale_factor"); + } + + static Ptr create(LayerParams& params) + { + return Ptr(new SpatialUpSamplingNearestLayer(params)); + } + + virtual bool getMemoryShapes(const std::vector > &inputs, + const int requiredOutputs, + std::vector > &outputs, + std::vector > &internals) const CV_OVERRIDE + { + std::vector outShape(4); + outShape[0] = inputs[0][0]; // batch size + outShape[1] = inputs[0][1]; // number of channels + outShape[2] = scale * inputs[0][2]; + outShape[3] = scale * inputs[0][3]; + outputs.assign(1, outShape); + return false; + } + + virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) CV_OVERRIDE + { + Mat& inp = *inputs[0]; + Mat& out = outputs[0]; + const int outHeight = out.size[2]; + const int outWidth = out.size[3]; + for (size_t n = 0; n < inputs[0]->size[0]; ++n) + { + for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch) + { + resize(getPlane(inp, n, ch), getPlane(out, n, ch), + Size(outWidth, outHeight), 0, 0, INTER_NEAREST); + } + } + } + + virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {} + +private: + int scale; +}; + +TEST(Torch_Importer, upsampling_nearest) +{ + CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer); + runTorchNet("net_spatial_upsampling_nearest", DNN_TARGET_CPU, "", false, true); + LayerFactory::unregisterLayer("SpatialUpSamplingNearest"); +} + } diff --git a/samples/cpp/tutorial_code/dnn/custom_layers.cpp b/samples/cpp/tutorial_code/dnn/custom_layers.cpp new file mode 100644 index 0000000..3fc9e61 --- /dev/null +++ b/samples/cpp/tutorial_code/dnn/custom_layers.cpp @@ -0,0 +1,232 @@ +#include + +//! [A custom layer interface] +class MyLayer : public cv::dnn::Layer +{ +public: + //! [MyLayer::MyLayer] + MyLayer(const cv::dnn::LayerParams ¶ms); + //! [MyLayer::MyLayer] + + //! [MyLayer::create] + static cv::Ptr create(cv::dnn::LayerParams& params); + //! [MyLayer::create] + + //! [MyLayer::getMemoryShapes] + virtual bool getMemoryShapes(const std::vector > &inputs, + const int requiredOutputs, + std::vector > &outputs, + std::vector > &internals) const; + //! [MyLayer::getMemoryShapes] + + //! [MyLayer::forward] + virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &internals); + //! [MyLayer::forward] + + //! [MyLayer::finalize] + virtual void finalize(const std::vector &inputs, std::vector &outputs); + //! [MyLayer::finalize] + + virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals); +}; +//! [A custom layer interface] + +//! [InterpLayer] +class InterpLayer : public cv::dnn::Layer +{ +public: + InterpLayer(const cv::dnn::LayerParams ¶ms) : Layer(params) + { + outWidth = params.get("width", 0); + outHeight = params.get("height", 0); + } + + static cv::Ptr create(cv::dnn::LayerParams& params) + { + return cv::Ptr(new InterpLayer(params)); + } + + virtual bool getMemoryShapes(const std::vector > &inputs, + const int requiredOutputs, + std::vector > &outputs, + std::vector > &internals) const + { + CV_UNUSED(requiredOutputs); CV_UNUSED(internals); + std::vector outShape(4); + outShape[0] = inputs[0][0]; // batch size + outShape[1] = inputs[0][1]; // number of channels + outShape[2] = outHeight; + outShape[3] = outWidth; + outputs.assign(1, outShape); + return false; + } + + // Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp + virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) + { + CV_UNUSED(internals); + cv::Mat& inp = *inputs[0]; + cv::Mat& out = outputs[0]; + const float* inpData = (float*)inp.data; + float* outData = (float*)out.data; + + const int batchSize = inp.size[0]; + const int numChannels = inp.size[1]; + const int inpHeight = inp.size[2]; + const int inpWidth = inp.size[3]; + + const float rheight = (outHeight > 1) ? static_cast(inpHeight - 1) / (outHeight - 1) : 0.f; + const float rwidth = (outWidth > 1) ? static_cast(inpWidth - 1) / (outWidth - 1) : 0.f; + for (int h2 = 0; h2 < outHeight; ++h2) + { + const float h1r = rheight * h2; + const int h1 = static_cast(h1r); + const int h1p = (h1 < inpHeight - 1) ? 1 : 0; + const float h1lambda = h1r - h1; + const float h0lambda = 1.f - h1lambda; + for (int w2 = 0; w2 < outWidth; ++w2) + { + const float w1r = rwidth * w2; + const int w1 = static_cast(w1r); + const int w1p = (w1 < inpWidth - 1) ? 1 : 0; + const float w1lambda = w1r - w1; + const float w0lambda = 1.f - w1lambda; + const float* pos1 = inpData + h1 * inpWidth + w1; + float* pos2 = outData + h2 * outWidth + w2; + for (int c = 0; c < batchSize * numChannels; ++c) + { + pos2[0] = + h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) + + h1lambda * (w0lambda * pos1[h1p * inpWidth] + w1lambda * pos1[h1p * inpWidth + w1p]); + pos1 += inpWidth * inpHeight; + pos2 += outWidth * outHeight; + } + } + } + } + + virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {} + +private: + int outWidth, outHeight; +}; +//! [InterpLayer] + +//! [ResizeBilinearLayer] +class ResizeBilinearLayer : public cv::dnn::Layer +{ +public: + ResizeBilinearLayer(const cv::dnn::LayerParams ¶ms) : Layer(params) + { + CV_Assert(!params.get("align_corners", false)); + CV_Assert(blobs.size() == 1, blobs[0].type() == CV_32SC1); + outHeight = blobs[0].at(0, 0); + outWidth = blobs[0].at(0, 1); + } + + static cv::Ptr create(cv::dnn::LayerParams& params) + { + return cv::Ptr(new ResizeBilinearLayer(params)); + } + + virtual bool getMemoryShapes(const std::vector > &inputs, + const int requiredOutputs, + std::vector > &outputs, + std::vector > &internals) const + { + CV_UNUSED(requiredOutputs); CV_UNUSED(internals); + std::vector outShape(4); + outShape[0] = inputs[0][0]; // batch size + outShape[1] = inputs[0][1]; // number of channels + outShape[2] = outHeight; + outShape[3] = outWidth; + outputs.assign(1, outShape); + return false; + } + + // This implementation is based on a reference implementation from + // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h + virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) + { + CV_UNUSED(internals); + cv::Mat& inp = *inputs[0]; + cv::Mat& out = outputs[0]; + const float* inpData = (float*)inp.data; + float* outData = (float*)out.data; + + const int batchSize = inp.size[0]; + const int numChannels = inp.size[1]; + const int inpHeight = inp.size[2]; + const int inpWidth = inp.size[3]; + + float heightScale = static_cast(inpHeight) / outHeight; + float widthScale = static_cast(inpWidth) / outWidth; + for (int b = 0; b < batchSize; ++b) + { + for (int y = 0; y < outHeight; ++y) + { + float input_y = y * heightScale; + int y0 = static_cast(std::floor(input_y)); + int y1 = std::min(y0 + 1, inpHeight - 1); + for (int x = 0; x < outWidth; ++x) + { + float input_x = x * widthScale; + int x0 = static_cast(std::floor(input_x)); + int x1 = std::min(x0 + 1, inpWidth - 1); + for (int c = 0; c < numChannels; ++c) + { + float interpolation = + inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) + + inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) + + inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) + + inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0); + outData[offset(out.size, c, x, y, b)] = interpolation; + } + } + } + } + } + + virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {} + +private: + static inline int offset(const cv::MatSize& size, int c, int x, int y, int b) + { + return x + size[3] * (y + size[2] * (c + size[1] * b)); + } + + int outWidth, outHeight; +}; +//! [ResizeBilinearLayer] + +//! [Register a custom layer] +#include // CV_DNN_REGISTER_LAYER_CLASS macro + +int main(int argc, char** argv) +{ + CV_DNN_REGISTER_LAYER_CLASS(MyType, MyLayer); + // ... + //! [Register a custom layer] + CV_UNUSED(argc); CV_UNUSED(argv); + //! [Register InterpLayer] + CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer); + cv::dnn::Net caffeNet = cv::dnn::readNet("/path/to/config.prototxt", "/path/to/weights.caffemodel"); + //! [Register InterpLayer] + + //! [Register ResizeBilinearLayer] + CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer); + cv::dnn::Net tfNet = cv::dnn::readNet("/path/to/graph.pb"); + //! [Register ResizeBilinearLayer] +} + +cv::Ptr MyLayer::create(cv::dnn::LayerParams& params) +{ + return cv::Ptr(new MyLayer(params)); +} +MyLayer::MyLayer(const cv::dnn::LayerParams&) {} +bool MyLayer::getMemoryShapes(const std::vector >&, const int, + std::vector >&, + std::vector >&) const { return false; } +void MyLayer::forward(std::vector&, std::vector&, std::vector&) {} +void MyLayer::finalize(const std::vector&, std::vector&) {} +void MyLayer::forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {} -- 2.7.4