2 * Copyright (c) 2017 ARM Limited.
4 * SPDX-License-Identifier: MIT
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #ifndef __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__
25 #define __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__
27 #include "arm_compute/runtime/IFunction.h"
29 #include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
30 #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
31 #include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
32 #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
33 #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
34 #include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
35 #include "arm_compute/runtime/Tensor.h"
39 /** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:
40 * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
41 * -# @ref NETransposeKernel (if @p transpose_weights flag is set to true) (called once)
42 * -# @ref NEGEMMTranspose1xWKernel (called once if we have a multi-batch input)
43 * -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input)
44 * -# @ref NEGEMMMatrixMultiplyKernel
45 * -# @ref NEGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
47 * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
49 class NEFullyConnectedLayer : public IFunction
53 NEFullyConnectedLayer();
54 /** Set the input and output tensors.
56 * @param[in] input Source tensor. Data type supported: F32.
57 * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input.
58 * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
59 * @param[out] output Destination tensor. Data type supported: Same as @p input.
60 * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true.
62 void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights = true);
64 //Inherited methods override
68 void configure_fc_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output);
69 void configure_fc_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output);
70 void configure_conv_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output);
71 void configure_conv_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output);
73 NEIm2ColKernel _im2col_kernel;
74 NETransposeKernel _transpose_kernel;
75 NEGEMMTranspose1xWKernel _transpose1xW_kernel;
76 NEGEMMInterleave4x4Kernel _interleave4x4_kernel;
77 NEGEMMMatrixMultiplyKernel _mm_kernel;
78 NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
79 Tensor _im2col_output;
80 Tensor _interleave4x4_output;
81 Tensor _transpose_output;
82 Tensor _transpose1xW_output;
84 bool _transpose_weights;
86 bool _batched_fc_layer;
87 bool _accumulate_biases;
90 #endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */