modules/dnn/include/opencv2/dnn/all_layers.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of the copyright holders may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP
  43 #define OPENCV_DNN_DNN_ALL_LAYERS_HPP
  44 #include <opencv2/dnn.hpp>
  45
  46 namespace cv {
  47 namespace dnn {
  48 CV__DNN_EXPERIMENTAL_NS_BEGIN
  49 //! @addtogroup dnn
  50 //! @{
  51
  52 /** @defgroup dnnLayerList Partial List of Implemented Layers
  53   @{
  54   This subsection of dnn module contains information about bult-in layers and their descriptions.
  55
  56   Classes listed here, in fact, provides C++ API for creating intances of bult-in layers.
  57   In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
  58   You can use both API, but factory API is less convinient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()).
  59
  60   Bult-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
  61   In partuclar, the following layers and Caffe @ref Importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
  62   - Convolution
  63   - Deconvolution
  64   - Pooling
  65   - InnerProduct
  66   - TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
  67   - Softmax
  68   - Reshape, Flatten, Slice, Split
  69   - LRN
  70   - MVN
  71   - Dropout (since it does nothing on forward pass -))
  72 */
  73
  74     class CV_EXPORTS BlankLayer : public Layer
  75     {
  76     public:
  77         static Ptr<BlankLayer> create(const LayerParams &params);
  78     };
  79
  80     //! LSTM recurrent layer
  81     class CV_EXPORTS LSTMLayer : public Layer
  82     {
  83     public:
  84         /** Creates instance of LSTM layer */
  85         static Ptr<LSTMLayer> create(const LayerParams& params);
  86
  87         /** Set trained weights for LSTM layer.
  88         LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
  89
  90         Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
  91         Than current output and current cell state is computed as follows:
  92         @f{eqnarray*}{
  93         h_t &= o_t \odot tanh(c_t),               \\
  94         c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
  95         @f}
  96         where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned wights.
  97
  98         Gates are computed as follows:
  99         @f{eqnarray*}{
 100         i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
 101         f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
 102         o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
 103         g_t &= tanh   &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
 104         @f}
 105         where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
 106         @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
 107
 108         For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
 109         (i.e. @f$W_x@f$ is vertical contacentaion of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
 110         The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
 111         and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
 112
 113         @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_h @f$)
 114         @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_x @f$)
 115         @param b  is bias vector (i.e. according to abovemtioned notation is @f$ b @f$)
 116         */
 117         virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
 118
 119         /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
 120           * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
 121           * where `Wh` is parameter from setWeights().
 122           */
 123         virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
 124
 125         /** @brief Specifies either interpet first dimension of input blob as timestamp dimenion either as sample.
 126           *
 127           * If flag is set to true then shape of input blob will be interpeted as [`T`, `N`, `[data dims]`] where `T` specifies number of timpestamps, `N` is number of independent streams.
 128           * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
 129           *
 130           * If flag is set to false then shape of input blob will be interpeted as [`N`, `[data dims]`].
 131           * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
 132           */
 133         virtual void setUseTimstampsDim(bool use = true) = 0;
 134
 135         /** @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
 136          * @details Shape of the second output is the same as first output.
 137          */
 138         virtual void setProduceCellOutput(bool produce = false) = 0;
 139
 140         /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
 141          * @param input should contain packed values @f$x_t@f$
 142          * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
 143          *
 144          * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
 145          * where `T` specifies number of timpestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
 146          *
 147          * If setUseTimstampsDim() is set to fase then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
 148          * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
 149         */
 150
 151         int inputNameToIndex(String inputName);
 152         int outputNameToIndex(String outputName);
 153     };
 154
 155     /** @brief Classical recurrent layer
 156
 157     Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
 158
 159     - input: should contain packed input @f$x_t@f$.
 160     - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
 161
 162     input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
 163
 164     output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
 165
 166     If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
 167     */
 168     class CV_EXPORTS RNNLayer : public Layer
 169     {
 170     public:
 171         /** Creates instance of RNNLayer */
 172         static Ptr<RNNLayer> create(const LayerParams& params);
 173
 174         /** Setups learned weights.
 175
 176         Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
 177         @f{eqnarray*}{
 178         h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h),  \\
 179         o_t &= tanh&(W_{ho} h_t + b_o),
 180         @f}
 181
 182         @param Wxh is @f$ W_{xh} @f$ matrix
 183         @param bh  is @f$ b_{h}  @f$ vector
 184         @param Whh is @f$ W_{hh} @f$ matrix
 185         @param Who is @f$ W_{xo} @f$ matrix
 186         @param bo  is @f$ b_{o}  @f$ vector
 187         */
 188         virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0;
 189
 190         /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
 191          * @details Shape of the second output is the same as first output.
 192          */
 193         virtual void setProduceHiddenOutput(bool produce = false) = 0;
 194
 195     };
 196
 197     class CV_EXPORTS BaseConvolutionLayer : public Layer
 198     {
 199     public:
 200         Size kernel, stride, pad, dilation, adjustPad;
 201         String padMode;
 202     };
 203
 204     class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
 205     {
 206     public:
 207         static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
 208     };
 209
 210     class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer
 211     {
 212     public:
 213         static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
 214     };
 215
 216     class CV_EXPORTS LRNLayer : public Layer
 217     {
 218     public:
 219         enum Type
 220         {
 221             CHANNEL_NRM,
 222             SPATIAL_NRM
 223         };
 224         int type;
 225
 226         int size;
 227         float alpha, beta, bias;
 228         bool normBySize;
 229
 230         static Ptr<LRNLayer> create(const LayerParams& params);
 231     };
 232
 233     class CV_EXPORTS PoolingLayer : public Layer
 234     {
 235     public:
 236         enum Type
 237         {
 238             MAX,
 239             AVE,
 240             STOCHASTIC
 241         };
 242
 243         int type;
 244         Size kernel, stride, pad;
 245         bool globalPooling;
 246         bool computeMaxIdx;
 247         String padMode;
 248         bool ceilMode;
 249
 250         static Ptr<PoolingLayer> create(const LayerParams& params);
 251     };
 252
 253     class CV_EXPORTS SoftmaxLayer : public Layer
 254     {
 255     public:
 256         bool logSoftMax;
 257
 258         static Ptr<SoftmaxLayer> create(const LayerParams& params);
 259     };
 260
 261     class CV_EXPORTS LPNormalizeLayer : public Layer
 262     {
 263     public:
 264         float pnorm, epsilon;
 265
 266         static Ptr<LPNormalizeLayer> create(const LayerParams& params);
 267     };
 268
 269     class CV_EXPORTS InnerProductLayer : public Layer
 270     {
 271     public:
 272         int axis;
 273         static Ptr<InnerProductLayer> create(const LayerParams& params);
 274     };
 275
 276     class CV_EXPORTS MVNLayer : public Layer
 277     {
 278     public:
 279         float eps;
 280         bool normVariance, acrossChannels;
 281
 282         static Ptr<MVNLayer> create(const LayerParams& params);
 283     };
 284
 285     /* Reshaping */
 286
 287     class CV_EXPORTS ReshapeLayer : public Layer
 288     {
 289     public:
 290         MatShape newShapeDesc;
 291         Range newShapeRange;
 292
 293         static Ptr<ReshapeLayer> create(const LayerParams& params);
 294     };
 295
 296     class CV_EXPORTS FlattenLayer : public Layer
 297     {
 298     public:
 299         static Ptr<FlattenLayer> create(const LayerParams &params);
 300     };
 301
 302     class CV_EXPORTS ConcatLayer : public Layer
 303     {
 304     public:
 305         int axis;
 306         /**
 307          * @brief Add zero padding in case of concatenation of blobs with different
 308          * spatial sizes.
 309          *
 310          * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
 311          */
 312         bool padding;
 313
 314         static Ptr<ConcatLayer> create(const LayerParams &params);
 315     };
 316
 317     class CV_EXPORTS SplitLayer : public Layer
 318     {
 319     public:
 320         int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
 321
 322         static Ptr<SplitLayer> create(const LayerParams &params);
 323     };
 324
 325     class CV_EXPORTS SliceLayer : public Layer
 326     {
 327     public:
 328         int axis;
 329         std::vector<int> sliceIndices;
 330
 331         static Ptr<SliceLayer> create(const LayerParams &params);
 332     };
 333
 334     class CV_EXPORTS PermuteLayer : public Layer
 335     {
 336     public:
 337         static Ptr<PermuteLayer> create(const LayerParams& params);
 338     };
 339
 340     class CV_EXPORTS PaddingLayer : public Layer
 341     {
 342     public:
 343         static Ptr<PaddingLayer> create(const LayerParams& params);
 344     };
 345
 346     /* Activations */
 347     class CV_EXPORTS ActivationLayer : public Layer
 348     {
 349     public:
 350         virtual void forwardSlice(const float* src, float* dst, int len,
 351                                   size_t outPlaneSize, int cn0, int cn1) const = 0;
 352     };
 353
 354     class CV_EXPORTS ReLULayer : public ActivationLayer
 355     {
 356     public:
 357         float negativeSlope;
 358
 359         static Ptr<ReLULayer> create(const LayerParams &params);
 360     };
 361
 362     class CV_EXPORTS ReLU6Layer : public ActivationLayer
 363     {
 364     public:
 365         static Ptr<ReLU6Layer> create(const LayerParams &params);
 366     };
 367
 368     class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
 369     {
 370     public:
 371         static Ptr<ChannelsPReLULayer> create(const LayerParams& params);
 372     };
 373
 374     class CV_EXPORTS ELULayer : public ActivationLayer
 375     {
 376     public:
 377         static Ptr<ELULayer> create(const LayerParams &params);
 378     };
 379
 380     class CV_EXPORTS TanHLayer : public ActivationLayer
 381     {
 382     public:
 383         static Ptr<TanHLayer> create(const LayerParams &params);
 384     };
 385
 386     class CV_EXPORTS SigmoidLayer : public ActivationLayer
 387     {
 388     public:
 389         static Ptr<SigmoidLayer> create(const LayerParams &params);
 390     };
 391
 392     class CV_EXPORTS BNLLLayer : public ActivationLayer
 393     {
 394     public:
 395         static Ptr<BNLLLayer> create(const LayerParams &params);
 396     };
 397
 398     class CV_EXPORTS AbsLayer : public ActivationLayer
 399     {
 400     public:
 401         static Ptr<AbsLayer> create(const LayerParams &params);
 402     };
 403
 404     class CV_EXPORTS PowerLayer : public ActivationLayer
 405     {
 406     public:
 407         float power, scale, shift;
 408
 409         static Ptr<PowerLayer> create(const LayerParams &params);
 410     };
 411
 412     /* Layers used in semantic segmentation */
 413
 414     class CV_EXPORTS CropLayer : public Layer
 415     {
 416     public:
 417         int startAxis;
 418         std::vector<int> offset;
 419
 420         static Ptr<CropLayer> create(const LayerParams &params);
 421     };
 422
 423     class CV_EXPORTS EltwiseLayer : public Layer
 424     {
 425     public:
 426         enum EltwiseOp
 427         {
 428             PROD = 0,
 429             SUM = 1,
 430             MAX = 2,
 431         };
 432
 433         static Ptr<EltwiseLayer> create(const LayerParams &params);
 434     };
 435
 436     class CV_EXPORTS BatchNormLayer : public Layer
 437     {
 438     public:
 439         bool hasWeights, hasBias;
 440         float epsilon;
 441
 442         virtual void getScaleShift(Mat& scale, Mat& shift) const = 0;
 443         static Ptr<BatchNormLayer> create(const LayerParams &params);
 444     };
 445
 446     class CV_EXPORTS MaxUnpoolLayer : public Layer
 447     {
 448     public:
 449         Size poolKernel;
 450         Size poolPad;
 451         Size poolStride;
 452
 453         static Ptr<MaxUnpoolLayer> create(const LayerParams &params);
 454     };
 455
 456     class CV_EXPORTS ScaleLayer : public Layer
 457     {
 458     public:
 459         bool hasBias;
 460
 461         static Ptr<ScaleLayer> create(const LayerParams& params);
 462     };
 463
 464     class CV_EXPORTS ShiftLayer : public Layer
 465     {
 466     public:
 467         static Ptr<ShiftLayer> create(const LayerParams& params);
 468     };
 469
 470     class CV_EXPORTS PriorBoxLayer : public Layer
 471     {
 472     public:
 473         static Ptr<PriorBoxLayer> create(const LayerParams& params);
 474     };
 475
 476     class CV_EXPORTS DetectionOutputLayer : public Layer
 477     {
 478     public:
 479         static Ptr<DetectionOutputLayer> create(const LayerParams& params);
 480     };
 481
 482     class NormalizeBBoxLayer : public Layer
 483     {
 484     public:
 485         static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
 486     };
 487
 488 //! @}
 489 //! @}
 490 CV__DNN_EXPERIMENTAL_NS_END
 491 }
 492 }
 493 #endif