1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP
43 #define OPENCV_DNN_DNN_ALL_LAYERS_HPP
44 #include <opencv2/dnn.hpp>
48 CV__DNN_EXPERIMENTAL_NS_BEGIN
52 /** @defgroup dnnLayerList Partial List of Implemented Layers
54 This subsection of dnn module contains information about bult-in layers and their descriptions.
56 Classes listed here, in fact, provides C++ API for creating intances of bult-in layers.
57 In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
58 You can use both API, but factory API is less convinient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()).
60 Bult-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
61 In partuclar, the following layers and Caffe @ref Importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
66 - TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
68 - Reshape, Flatten, Slice, Split
71 - Dropout (since it does nothing on forward pass -))
74 class CV_EXPORTS BlankLayer : public Layer
77 static Ptr<BlankLayer> create(const LayerParams ¶ms);
80 //! LSTM recurrent layer
81 class CV_EXPORTS LSTMLayer : public Layer
84 /** Creates instance of LSTM layer */
85 static Ptr<LSTMLayer> create(const LayerParams& params);
87 /** @deprecated Use LayerParams::blobs instead.
88 @brief Set trained weights for LSTM layer.
90 LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
92 Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
93 Than current output and current cell state is computed as follows:
95 h_t &= o_t \odot tanh(c_t), \\
96 c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
98 where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned wights.
100 Gates are computed as follows:
102 i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
103 f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
104 o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
105 g_t &= tanh &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
107 where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
108 @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
110 For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
111 (i.e. @f$W_x@f$ is vertical contacentaion of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
112 The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
113 and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
115 @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_h @f$)
116 @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_x @f$)
117 @param b is bias vector (i.e. according to abovemtioned notation is @f$ b @f$)
119 CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
121 /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
122 * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
123 * where `Wh` is parameter from setWeights().
125 virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
127 /** @deprecated Use flag `produce_cell_output` in LayerParams.
128 * @brief Specifies either interpet first dimension of input blob as timestamp dimenion either as sample.
130 * If flag is set to true then shape of input blob will be interpeted as [`T`, `N`, `[data dims]`] where `T` specifies number of timpestamps, `N` is number of independent streams.
131 * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
133 * If flag is set to false then shape of input blob will be interpeted as [`N`, `[data dims]`].
134 * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
136 CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0;
138 /** @deprecated Use flag `use_timestamp_dim` in LayerParams.
139 * @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
140 * @details Shape of the second output is the same as first output.
142 CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0;
144 /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
145 * @param input should contain packed values @f$x_t@f$
146 * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
148 * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
149 * where `T` specifies number of timpestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
151 * If setUseTimstampsDim() is set to fase then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
152 * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
155 int inputNameToIndex(String inputName);
156 int outputNameToIndex(String outputName);
159 /** @brief Classical recurrent layer
161 Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
163 - input: should contain packed input @f$x_t@f$.
164 - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
166 input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
168 output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
170 If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
172 class CV_EXPORTS RNNLayer : public Layer
175 /** Creates instance of RNNLayer */
176 static Ptr<RNNLayer> create(const LayerParams& params);
178 /** Setups learned weights.
180 Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
182 h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h), \\
183 o_t &= tanh&(W_{ho} h_t + b_o),
186 @param Wxh is @f$ W_{xh} @f$ matrix
187 @param bh is @f$ b_{h} @f$ vector
188 @param Whh is @f$ W_{hh} @f$ matrix
189 @param Who is @f$ W_{xo} @f$ matrix
190 @param bo is @f$ b_{o} @f$ vector
192 virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0;
194 /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
195 * @details Shape of the second output is the same as first output.
197 virtual void setProduceHiddenOutput(bool produce = false) = 0;
201 class CV_EXPORTS BaseConvolutionLayer : public Layer
204 Size kernel, stride, pad, dilation, adjustPad;
209 class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
212 static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
215 class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer
218 static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
221 class CV_EXPORTS LRNLayer : public Layer
232 float alpha, beta, bias;
235 static Ptr<LRNLayer> create(const LayerParams& params);
238 class CV_EXPORTS PoolingLayer : public Layer
249 Size kernel, stride, pad;
255 static Ptr<PoolingLayer> create(const LayerParams& params);
258 class CV_EXPORTS SoftmaxLayer : public Layer
263 static Ptr<SoftmaxLayer> create(const LayerParams& params);
266 class CV_EXPORTS LPNormalizeLayer : public Layer
269 float pnorm, epsilon;
271 static Ptr<LPNormalizeLayer> create(const LayerParams& params);
274 class CV_EXPORTS InnerProductLayer : public Layer
278 static Ptr<InnerProductLayer> create(const LayerParams& params);
281 class CV_EXPORTS MVNLayer : public Layer
285 bool normVariance, acrossChannels;
287 static Ptr<MVNLayer> create(const LayerParams& params);
292 class CV_EXPORTS ReshapeLayer : public Layer
295 MatShape newShapeDesc;
298 static Ptr<ReshapeLayer> create(const LayerParams& params);
301 class CV_EXPORTS FlattenLayer : public Layer
304 static Ptr<FlattenLayer> create(const LayerParams ¶ms);
307 class CV_EXPORTS ConcatLayer : public Layer
312 * @brief Add zero padding in case of concatenation of blobs with different
315 * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
319 static Ptr<ConcatLayer> create(const LayerParams ¶ms);
322 class CV_EXPORTS SplitLayer : public Layer
325 int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
327 static Ptr<SplitLayer> create(const LayerParams ¶ms);
331 * Slice layer has several modes:
333 * @param[in] axis Axis of split operation
334 * @param[in] slice_point Array of split points
336 * Number of output blobs equals to number of split points plus one. The
337 * first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis,
338 * the second output blob is a slice of input from @p slice_point[0] to
339 * @p slice_point[1] - 1 by @p axis and the last output blob is a slice of
340 * input from @p slice_point[-1] up to the end of @p axis size.
343 * @param begin Vector of start indices
344 * @param size Vector of sizes
346 * More convinient numpy-like slice. One and only output blob
347 * is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]`
350 * @param axis Axis of split operation
352 * Split input blob on the equal parts by @p axis.
354 class CV_EXPORTS SliceLayer : public Layer
358 * @brief Vector of slice ranges.
360 * The first dimension equals number of output blobs.
361 * Inner vector has slice ranges for the first number of input dimensions.
363 std::vector<std::vector<Range> > sliceRanges;
366 static Ptr<SliceLayer> create(const LayerParams ¶ms);
369 class CV_EXPORTS PermuteLayer : public Layer
372 static Ptr<PermuteLayer> create(const LayerParams& params);
376 * @brief Adds extra values for specific axes.
377 * @param paddings Vector of paddings in format
379 * [ pad_before, pad_after, // [0]th dimension
380 * pad_before, pad_after, // [1]st dimension
382 * pad_before, pad_after ] // [n]th dimension
384 * that represents number of padded values at every dimension
385 * starting from the first one. The rest of dimensions won't
387 * @param value Value to be padded. Defaults to zero.
388 * @param input_dims Torch's parameter. If @p input_dims is not equal to the
389 * actual input dimensionality then the `[0]th` dimension
390 * is considered as a batch dimension and @p paddings are shifted
391 * to a one dimension. Defaults to `-1` that means padding
392 * corresponding to @p paddings.
394 class CV_EXPORTS PaddingLayer : public Layer
397 static Ptr<PaddingLayer> create(const LayerParams& params);
401 class CV_EXPORTS ActivationLayer : public Layer
404 virtual void forwardSlice(const float* src, float* dst, int len,
405 size_t outPlaneSize, int cn0, int cn1) const = 0;
408 class CV_EXPORTS ReLULayer : public ActivationLayer
413 static Ptr<ReLULayer> create(const LayerParams ¶ms);
416 class CV_EXPORTS ReLU6Layer : public ActivationLayer
419 static Ptr<ReLU6Layer> create(const LayerParams ¶ms);
422 class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
425 static Ptr<Layer> create(const LayerParams& params);
428 class CV_EXPORTS ELULayer : public ActivationLayer
431 static Ptr<ELULayer> create(const LayerParams ¶ms);
434 class CV_EXPORTS TanHLayer : public ActivationLayer
437 static Ptr<TanHLayer> create(const LayerParams ¶ms);
440 class CV_EXPORTS SigmoidLayer : public ActivationLayer
443 static Ptr<SigmoidLayer> create(const LayerParams ¶ms);
446 class CV_EXPORTS BNLLLayer : public ActivationLayer
449 static Ptr<BNLLLayer> create(const LayerParams ¶ms);
452 class CV_EXPORTS AbsLayer : public ActivationLayer
455 static Ptr<AbsLayer> create(const LayerParams ¶ms);
458 class CV_EXPORTS PowerLayer : public ActivationLayer
461 float power, scale, shift;
463 static Ptr<PowerLayer> create(const LayerParams ¶ms);
466 /* Layers used in semantic segmentation */
468 class CV_EXPORTS CropLayer : public Layer
472 std::vector<int> offset;
474 static Ptr<CropLayer> create(const LayerParams ¶ms);
477 class CV_EXPORTS EltwiseLayer : public Layer
487 static Ptr<EltwiseLayer> create(const LayerParams ¶ms);
490 class CV_EXPORTS BatchNormLayer : public Layer
493 bool hasWeights, hasBias;
496 virtual void getScaleShift(Mat& scale, Mat& shift) const = 0;
497 static Ptr<BatchNormLayer> create(const LayerParams ¶ms);
500 class CV_EXPORTS MaxUnpoolLayer : public Layer
507 static Ptr<MaxUnpoolLayer> create(const LayerParams ¶ms);
510 class CV_EXPORTS ScaleLayer : public Layer
515 static Ptr<ScaleLayer> create(const LayerParams& params);
518 class CV_EXPORTS ShiftLayer : public Layer
521 static Ptr<ShiftLayer> create(const LayerParams& params);
524 class CV_EXPORTS PriorBoxLayer : public Layer
527 static Ptr<PriorBoxLayer> create(const LayerParams& params);
530 class CV_EXPORTS ReorgLayer : public Layer
533 static Ptr<ReorgLayer> create(const LayerParams& params);
536 class CV_EXPORTS RegionLayer : public Layer
539 static Ptr<RegionLayer> create(const LayerParams& params);
542 class CV_EXPORTS DetectionOutputLayer : public Layer
545 static Ptr<DetectionOutputLayer> create(const LayerParams& params);
548 class NormalizeBBoxLayer : public Layer
551 static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
555 * @brief Resize input 4-dimensional blob by nearest neghbor strategy.
557 * Layer is used to support TensorFlow's resize_nearest_neighbor op.
559 class CV_EXPORTS ResizeNearestNeighborLayer : public Layer
562 static Ptr<ResizeNearestNeighborLayer> create(const LayerParams& params);
567 CV__DNN_EXPERIMENTAL_NS_END