modules/dnn/include/opencv2/dnn/all_layers.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of the copyright holders may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP
  43 #define OPENCV_DNN_DNN_ALL_LAYERS_HPP
  44 #include <opencv2/dnn.hpp>
  45
  46 namespace cv {
  47 namespace dnn {
  48 CV__DNN_EXPERIMENTAL_NS_BEGIN
  49 //! @addtogroup dnn
  50 //! @{
  51
  52 /** @defgroup dnnLayerList Partial List of Implemented Layers
  53   @{
  54   This subsection of dnn module contains information about bult-in layers and their descriptions.
  55
  56   Classes listed here, in fact, provides C++ API for creating intances of bult-in layers.
  57   In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
  58   You can use both API, but factory API is less convinient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()).
  59
  60   Bult-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
  61   In partuclar, the following layers and Caffe @ref Importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
  62   - Convolution
  63   - Deconvolution
  64   - Pooling
  65   - InnerProduct
  66   - TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
  67   - Softmax
  68   - Reshape, Flatten, Slice, Split
  69   - LRN
  70   - MVN
  71   - Dropout (since it does nothing on forward pass -))
  72 */
  73
  74     class CV_EXPORTS BlankLayer : public Layer
  75     {
  76     public:
  77         static Ptr<BlankLayer> create(const LayerParams &params);
  78     };
  79
  80     //! LSTM recurrent layer
  81     class CV_EXPORTS LSTMLayer : public Layer
  82     {
  83     public:
  84         /** Creates instance of LSTM layer */
  85         static Ptr<LSTMLayer> create(const LayerParams& params);
  86
  87         /** @deprecated Use LayerParams::blobs instead.
  88         @brief Set trained weights for LSTM layer.
  89
  90         LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
  91
  92         Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
  93         Than current output and current cell state is computed as follows:
  94         @f{eqnarray*}{
  95         h_t &= o_t \odot tanh(c_t),               \\
  96         c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
  97         @f}
  98         where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned wights.
  99
 100         Gates are computed as follows:
 101         @f{eqnarray*}{
 102         i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
 103         f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
 104         o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
 105         g_t &= tanh   &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
 106         @f}
 107         where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
 108         @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
 109
 110         For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
 111         (i.e. @f$W_x@f$ is vertical contacentaion of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
 112         The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
 113         and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
 114
 115         @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_h @f$)
 116         @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_x @f$)
 117         @param b  is bias vector (i.e. according to abovemtioned notation is @f$ b @f$)
 118         */
 119         CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
 120
 121         /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
 122           * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
 123           * where `Wh` is parameter from setWeights().
 124           */
 125         virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
 126
 127         /** @deprecated Use flag `produce_cell_output` in LayerParams.
 128           * @brief Specifies either interpet first dimension of input blob as timestamp dimenion either as sample.
 129           *
 130           * If flag is set to true then shape of input blob will be interpeted as [`T`, `N`, `[data dims]`] where `T` specifies number of timpestamps, `N` is number of independent streams.
 131           * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
 132           *
 133           * If flag is set to false then shape of input blob will be interpeted as [`N`, `[data dims]`].
 134           * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
 135           */
 136         CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0;
 137
 138         /** @deprecated Use flag `use_timestamp_dim` in LayerParams.
 139          * @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
 140          * @details Shape of the second output is the same as first output.
 141          */
 142         CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0;
 143
 144         /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
 145          * @param input should contain packed values @f$x_t@f$
 146          * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
 147          *
 148          * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
 149          * where `T` specifies number of timpestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
 150          *
 151          * If setUseTimstampsDim() is set to fase then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
 152          * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
 153         */
 154
 155         int inputNameToIndex(String inputName);
 156         int outputNameToIndex(String outputName);
 157     };
 158
 159     /** @brief Classical recurrent layer
 160
 161     Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
 162
 163     - input: should contain packed input @f$x_t@f$.
 164     - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
 165
 166     input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
 167
 168     output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
 169
 170     If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
 171     */
 172     class CV_EXPORTS RNNLayer : public Layer
 173     {
 174     public:
 175         /** Creates instance of RNNLayer */
 176         static Ptr<RNNLayer> create(const LayerParams& params);
 177
 178         /** Setups learned weights.
 179
 180         Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
 181         @f{eqnarray*}{
 182         h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h),  \\
 183         o_t &= tanh&(W_{ho} h_t + b_o),
 184         @f}
 185
 186         @param Wxh is @f$ W_{xh} @f$ matrix
 187         @param bh  is @f$ b_{h}  @f$ vector
 188         @param Whh is @f$ W_{hh} @f$ matrix
 189         @param Who is @f$ W_{xo} @f$ matrix
 190         @param bo  is @f$ b_{o}  @f$ vector
 191         */
 192         virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0;
 193
 194         /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
 195          * @details Shape of the second output is the same as first output.
 196          */
 197         virtual void setProduceHiddenOutput(bool produce = false) = 0;
 198
 199     };
 200
 201     class CV_EXPORTS BaseConvolutionLayer : public Layer
 202     {
 203     public:
 204         Size kernel, stride, pad, dilation, adjustPad;
 205         String padMode;
 206         int numOutput;
 207     };
 208
 209     class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
 210     {
 211     public:
 212         static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
 213     };
 214
 215     class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer
 216     {
 217     public:
 218         static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
 219     };
 220
 221     class CV_EXPORTS LRNLayer : public Layer
 222     {
 223     public:
 224         enum Type
 225         {
 226             CHANNEL_NRM,
 227             SPATIAL_NRM
 228         };
 229         int type;
 230
 231         int size;
 232         float alpha, beta, bias;
 233         bool normBySize;
 234
 235         static Ptr<LRNLayer> create(const LayerParams& params);
 236     };
 237
 238     class CV_EXPORTS PoolingLayer : public Layer
 239     {
 240     public:
 241         enum Type
 242         {
 243             MAX,
 244             AVE,
 245             STOCHASTIC
 246         };
 247
 248         int type;
 249         Size kernel, stride, pad;
 250         bool globalPooling;
 251         bool computeMaxIdx;
 252         String padMode;
 253         bool ceilMode;
 254
 255         static Ptr<PoolingLayer> create(const LayerParams& params);
 256     };
 257
 258     class CV_EXPORTS SoftmaxLayer : public Layer
 259     {
 260     public:
 261         bool logSoftMax;
 262
 263         static Ptr<SoftmaxLayer> create(const LayerParams& params);
 264     };
 265
 266     class CV_EXPORTS LPNormalizeLayer : public Layer
 267     {
 268     public:
 269         float pnorm, epsilon;
 270
 271         static Ptr<LPNormalizeLayer> create(const LayerParams& params);
 272     };
 273
 274     class CV_EXPORTS InnerProductLayer : public Layer
 275     {
 276     public:
 277         int axis;
 278         static Ptr<InnerProductLayer> create(const LayerParams& params);
 279     };
 280
 281     class CV_EXPORTS MVNLayer : public Layer
 282     {
 283     public:
 284         float eps;
 285         bool normVariance, acrossChannels;
 286
 287         static Ptr<MVNLayer> create(const LayerParams& params);
 288     };
 289
 290     /* Reshaping */
 291
 292     class CV_EXPORTS ReshapeLayer : public Layer
 293     {
 294     public:
 295         MatShape newShapeDesc;
 296         Range newShapeRange;
 297
 298         static Ptr<ReshapeLayer> create(const LayerParams& params);
 299     };
 300
 301     class CV_EXPORTS FlattenLayer : public Layer
 302     {
 303     public:
 304         static Ptr<FlattenLayer> create(const LayerParams &params);
 305     };
 306
 307     class CV_EXPORTS ConcatLayer : public Layer
 308     {
 309     public:
 310         int axis;
 311         /**
 312          * @brief Add zero padding in case of concatenation of blobs with different
 313          * spatial sizes.
 314          *
 315          * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
 316          */
 317         bool padding;
 318
 319         static Ptr<ConcatLayer> create(const LayerParams &params);
 320     };
 321
 322     class CV_EXPORTS SplitLayer : public Layer
 323     {
 324     public:
 325         int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
 326
 327         static Ptr<SplitLayer> create(const LayerParams &params);
 328     };
 329
 330     /**
 331      * Slice layer has several modes:
 332      * 1. Caffe mode
 333      * @param[in] axis Axis of split operation
 334      * @param[in] slice_point Array of split points
 335      *
 336      * Number of output blobs equals to number of split points plus one. The
 337      * first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis,
 338      * the second output blob is a slice of input from @p slice_point[0] to
 339      * @p slice_point[1] - 1 by @p axis and the last output blob is a slice of
 340      * input from @p slice_point[-1] up to the end of @p axis size.
 341      *
 342      * 2. TensorFlow mode
 343      * @param begin Vector of start indices
 344      * @param size Vector of sizes
 345      *
 346      * More convinient numpy-like slice. One and only output blob
 347      * is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]`
 348      *
 349      * 3. Torch mode
 350      * @param axis Axis of split operation
 351      *
 352      * Split input blob on the equal parts by @p axis.
 353      */
 354     class CV_EXPORTS SliceLayer : public Layer
 355     {
 356     public:
 357         /**
 358          * @brief Vector of slice ranges.
 359          *
 360          * The first dimension equals number of output blobs.
 361          * Inner vector has slice ranges for the first number of input dimensions.
 362          */
 363         std::vector<std::vector<Range> > sliceRanges;
 364         int axis;
 365
 366         static Ptr<SliceLayer> create(const LayerParams &params);
 367     };
 368
 369     class CV_EXPORTS PermuteLayer : public Layer
 370     {
 371     public:
 372         static Ptr<PermuteLayer> create(const LayerParams& params);
 373     };
 374
 375     /**
 376      * @brief Adds extra values for specific axes.
 377      * @param paddings Vector of paddings in format
 378      *                 @code
 379      *                 [ pad_before, pad_after,  // [0]th dimension
 380      *                   pad_before, pad_after,  // [1]st dimension
 381      *                   ...
 382      *                   pad_before, pad_after ] // [n]th dimension
 383      *                 @endcode
 384      *                 that represents number of padded values at every dimension
 385      *                 starting from the first one. The rest of dimensions won't
 386      *                 be padded.
 387      * @param value Value to be padded. Defaults to zero.
 388      * @param input_dims Torch's parameter. If @p input_dims is not equal to the
 389      *                   actual input dimensionality then the `[0]th` dimension
 390      *                   is considered as a batch dimension and @p paddings are shifted
 391      *                   to a one dimension. Defaults to `-1` that means padding
 392      *                   corresponding to @p paddings.
 393      */
 394     class CV_EXPORTS PaddingLayer : public Layer
 395     {
 396     public:
 397         static Ptr<PaddingLayer> create(const LayerParams& params);
 398     };
 399
 400     /* Activations */
 401     class CV_EXPORTS ActivationLayer : public Layer
 402     {
 403     public:
 404         virtual void forwardSlice(const float* src, float* dst, int len,
 405                                   size_t outPlaneSize, int cn0, int cn1) const = 0;
 406     };
 407
 408     class CV_EXPORTS ReLULayer : public ActivationLayer
 409     {
 410     public:
 411         float negativeSlope;
 412
 413         static Ptr<ReLULayer> create(const LayerParams &params);
 414     };
 415
 416     class CV_EXPORTS ReLU6Layer : public ActivationLayer
 417     {
 418     public:
 419         static Ptr<ReLU6Layer> create(const LayerParams &params);
 420     };
 421
 422     class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
 423     {
 424     public:
 425         static Ptr<Layer> create(const LayerParams& params);
 426     };
 427
 428     class CV_EXPORTS ELULayer : public ActivationLayer
 429     {
 430     public:
 431         static Ptr<ELULayer> create(const LayerParams &params);
 432     };
 433
 434     class CV_EXPORTS TanHLayer : public ActivationLayer
 435     {
 436     public:
 437         static Ptr<TanHLayer> create(const LayerParams &params);
 438     };
 439
 440     class CV_EXPORTS SigmoidLayer : public ActivationLayer
 441     {
 442     public:
 443         static Ptr<SigmoidLayer> create(const LayerParams &params);
 444     };
 445
 446     class CV_EXPORTS BNLLLayer : public ActivationLayer
 447     {
 448     public:
 449         static Ptr<BNLLLayer> create(const LayerParams &params);
 450     };
 451
 452     class CV_EXPORTS AbsLayer : public ActivationLayer
 453     {
 454     public:
 455         static Ptr<AbsLayer> create(const LayerParams &params);
 456     };
 457
 458     class CV_EXPORTS PowerLayer : public ActivationLayer
 459     {
 460     public:
 461         float power, scale, shift;
 462
 463         static Ptr<PowerLayer> create(const LayerParams &params);
 464     };
 465
 466     /* Layers used in semantic segmentation */
 467
 468     class CV_EXPORTS CropLayer : public Layer
 469     {
 470     public:
 471         int startAxis;
 472         std::vector<int> offset;
 473
 474         static Ptr<CropLayer> create(const LayerParams &params);
 475     };
 476
 477     class CV_EXPORTS EltwiseLayer : public Layer
 478     {
 479     public:
 480         enum EltwiseOp
 481         {
 482             PROD = 0,
 483             SUM = 1,
 484             MAX = 2,
 485         };
 486
 487         static Ptr<EltwiseLayer> create(const LayerParams &params);
 488     };
 489
 490     class CV_EXPORTS BatchNormLayer : public Layer
 491     {
 492     public:
 493         bool hasWeights, hasBias;
 494         float epsilon;
 495
 496         virtual void getScaleShift(Mat& scale, Mat& shift) const = 0;
 497         static Ptr<BatchNormLayer> create(const LayerParams &params);
 498     };
 499
 500     class CV_EXPORTS MaxUnpoolLayer : public Layer
 501     {
 502     public:
 503         Size poolKernel;
 504         Size poolPad;
 505         Size poolStride;
 506
 507         static Ptr<MaxUnpoolLayer> create(const LayerParams &params);
 508     };
 509
 510     class CV_EXPORTS ScaleLayer : public Layer
 511     {
 512     public:
 513         bool hasBias;
 514
 515         static Ptr<ScaleLayer> create(const LayerParams& params);
 516     };
 517
 518     class CV_EXPORTS ShiftLayer : public Layer
 519     {
 520     public:
 521         static Ptr<ShiftLayer> create(const LayerParams& params);
 522     };
 523
 524     class CV_EXPORTS PriorBoxLayer : public Layer
 525     {
 526     public:
 527         static Ptr<PriorBoxLayer> create(const LayerParams& params);
 528     };
 529
 530     class CV_EXPORTS ReorgLayer : public Layer
 531     {
 532     public:
 533         static Ptr<ReorgLayer> create(const LayerParams& params);
 534     };
 535
 536     class CV_EXPORTS RegionLayer : public Layer
 537     {
 538     public:
 539         static Ptr<RegionLayer> create(const LayerParams& params);
 540     };
 541
 542     class CV_EXPORTS DetectionOutputLayer : public Layer
 543     {
 544     public:
 545         static Ptr<DetectionOutputLayer> create(const LayerParams& params);
 546     };
 547
 548     class NormalizeBBoxLayer : public Layer
 549     {
 550     public:
 551         static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
 552     };
 553
 554     /**
 555      * @brief Resize input 4-dimensional blob by nearest neghbor strategy.
 556      *
 557      * Layer is used to support TensorFlow's resize_nearest_neighbor op.
 558      */
 559     class CV_EXPORTS ResizeNearestNeighborLayer : public Layer
 560     {
 561     public:
 562         static Ptr<ResizeNearestNeighborLayer> create(const LayerParams& params);
 563     };
 564
 565 //! @}
 566 //! @}
 567 CV__DNN_EXPERIMENTAL_NS_END
 568 }
 569 }
 570 #endif