arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h

   1 /*
   2  * Copyright (c) 2017 ARM Limited.
   3  *
   4  * SPDX-License-Identifier: MIT
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #ifndef __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__
  25 #define __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__
  26
  27 #include "arm_compute/runtime/IFunction.h"
  28
  29 #include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
  30 #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
  31 #include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
  32 #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
  33 #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
  34 #include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
  35 #include "arm_compute/runtime/Tensor.h"
  36
  37 namespace arm_compute
  38 {
  39 /** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:
  40  *  -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
  41  *  -# @ref NETransposeKernel (if @p transpose_weights flag is set to true) (called once)
  42  *  -# @ref NEGEMMTranspose1xWKernel (called once if we have a multi-batch input)
  43  *  -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input)
  44  *  -# @ref NEGEMMMatrixMultiplyKernel
  45  *  -# @ref NEGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
  46  *
  47  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
  48  */
  49 class NEFullyConnectedLayer : public IFunction
  50 {
  51 public:
  52     /** Constructor */
  53     NEFullyConnectedLayer();
  54     /** Set the input and output tensors.
  55      *
  56      * @param[in]  input             Source tensor. Data type supported: F32.
  57      * @param[in]  weights           Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input.
  58      * @param[in]  biases            Bias tensor. Can be nullptr. Data type supported:Same as @p input.
  59      * @param[out] output            Destination tensor. Data type supported: Same as @p input.
  60      * @param[in]  transpose_weights (Optional) Transpose weights if true. Defaults to true.
  61      */
  62     void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights = true);
  63
  64     //Inherited methods override
  65     void run() override;
  66
  67 private:
  68     void configure_fc_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output);
  69     void configure_fc_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output);
  70     void configure_conv_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output);
  71     void configure_conv_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output);
  72
  73     NEIm2ColKernel                     _im2col_kernel;
  74     NETransposeKernel                  _transpose_kernel;
  75     NEGEMMTranspose1xWKernel           _transpose1xW_kernel;
  76     NEGEMMInterleave4x4Kernel          _interleave4x4_kernel;
  77     NEGEMMMatrixMultiplyKernel         _mm_kernel;
  78     NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
  79     Tensor                             _im2col_output;
  80     Tensor                             _interleave4x4_output;
  81     Tensor                             _transpose_output;
  82     Tensor                             _transpose1xW_output;
  83     bool                               _is_first_run;
  84     bool                               _transpose_weights;
  85     bool                               _fc_after_conv;
  86     bool                               _batched_fc_layer;
  87     bool                               _accumulate_biases;
  88 };
  89 }
  90 #endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */