compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * Copyright (c) 2017-2019 ARM Limited.
  19  *
  20  * SPDX-License-Identifier: MIT
  21  *
  22  * Permission is hereby granted, free of charge, to any person obtaining a copy
  23  * of this software and associated documentation files (the "Software"), to
  24  * deal in the Software without restriction, including without limitation the
  25  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  26  * sell copies of the Software, and to permit persons to whom the Software is
  27  * furnished to do so, subject to the following conditions:
  28  *
  29  * The above copyright notice and this permission notice shall be included in all
  30  * copies or substantial portions of the Software.
  31  *
  32  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  33  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  34  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  35  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  36  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  37  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  38  * SOFTWARE.
  39  */
  40
  41 #ifndef __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__
  42 #define __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__
  43
  44 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
  45
  46 #include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
  47 #include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
  48 #include "arm_compute/runtime/CL/CLTensor.h"
  49 #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
  50 #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
  51 #include "arm_compute/runtime/CL/functions/CLGEMM.h"
  52 #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
  53 #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
  54 #include "arm_compute/runtime/IWeightsManager.h"
  55 #include "arm_compute/runtime/MemoryGroup.h"
  56
  57 namespace arm_compute
  58 {
  59 /** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls
  60  * the following kernels:
  61  *
  62  *  -# @ref CLTransposeKernel
  63  *
  64  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
  65  */
  66 class CLFullyConnectedLayerReshapeWeightsEx : public ICLSimpleFunction
  67 {
  68 public:
  69   /** Set the input and output tensors.
  70    *
  71    * @param[in]  input  Weights tensor. The weights must be 2 dimensional. Data types supported:
  72    * QASYMM8/F16/F32.
  73    * @param[out] output Destination tensor which stores the transposed input tensor. Data type
  74    * supported: Same as @p input.
  75    */
  76   void configure(const ICLTensor *input, ICLTensor *output);
  77   /** Static function to check if given info will lead to a valid configuration of @ref
  78    * CLFullyConnectedLayerReshapeWeightsEx
  79    *
  80    * @param[in] input  Weights tensor. The weights must be 2 dimensional. Data types supported:
  81    * QASYMM8/F16/F32.
  82    * @param[in] output Destination tensor which stores the transposed input tensor. Data type
  83    * supported: Same as @p input.
  84    *
  85    * @return a status
  86    */
  87   static Status validate(const ITensorInfo *input, const ITensorInfo *output);
  88 };
  89
  90 namespace weights_transformations
  91 {
  92 /** Basic function to manage the reshape weights generated from @ref
  93  * CLFullyConnectedLayerReshapeWeightsEx */
  94 class CLFullyConnectedLayerReshapeWeightsExManaged : public ITransformWeights
  95 {
  96 public:
  97   // Inherited method override
  98   void run() override
  99   {
 100     _output.allocator()->allocate();
 101     _func.run();
 102     _reshape_run = true;
 103   }
 104
 105   // Inherited method override
 106   void release() override { _output.allocator()->free(); }
 107
 108   // Inherited method override
 109   ICLTensor *get_weights() override { return &_output; }
 110
 111   // Inherited method override
 112   uint32_t uid() override { return _uid; }
 113
 114   /** Configures the @ref CLFullyConnectedLayerReshapeWeightsEx function
 115    *
 116    * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
 117    */
 118   void configure(const ICLTensor *input) { _func.configure(input, &_output); }
 119
 120 private:
 121   static constexpr uint32_t _uid = 0x0;
 122   CLTensor _output{};
 123   CLFullyConnectedLayerReshapeWeightsEx _func{};
 124 };
 125 } // namespace weights_transformations
 126
 127 /** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following
 128  * OpenCL kernels:
 129  *
 130  *  -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
 131  *  -# @ref CLFullyConnectedLayerReshapeWeightsEx (if @p are_weights_reshaped is set to false and
 132  * transpose_weights is set to true ) (called once)
 133  *  -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized
 134  * asymmetric)
 135  *  -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref
 136  * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is
 137  * not equal to nullptr)
 138  *
 139  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
 140  */
 141 class CLFullyConnectedLayerEx : public IFunction
 142 {
 143 public:
 144   /** Constructor */
 145   CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr,
 146                           IWeightsManager *weights_manager = nullptr);
 147   /** Prevent instances of this class from being copied (As this class contains pointers) */
 148   CLFullyConnectedLayerEx(const CLFullyConnectedLayerEx &) = delete;
 149   /** Default move constructor */
 150   CLFullyConnectedLayerEx(CLFullyConnectedLayerEx &&) = default;
 151   /** Prevent instances of this class from being copied (As this class contains pointers) */
 152   CLFullyConnectedLayerEx &operator=(const CLFullyConnectedLayerEx &) = delete;
 153   /** Default move assignment operator */
 154   CLFullyConnectedLayerEx &operator=(CLFullyConnectedLayerEx &&) = default;
 155   /** Set the input and output tensors.
 156    *
 157    * @param[in]  input   Source tensor. Data type supported: QASYMM8/F16/F32.
 158    * @param[in]  weights Weights tensor. The weights must be 2 dimensional.
 159    *                     If this function is called after a Convolution Layer, the (transposed)
 160    * weights will have as many rows as the product of the first 3 input's dimensions.
 161    *                     If it is called after another FullyConnected Layer, the (transposed)
 162    * weights will have as many rows as the input's first dimension.
 163    *                     Data type supported: Same as @p input.
 164    * @param[in]  biases  Bias tensor. Can be nullptr. Data type supported:Same as @p input.
 165    * @param[out] output  Destination tensor. Its shape should be equal to the output of a matrix
 166    * multiplication between:
 167    *                     - The output of im2col on the input and the (transposed) 2D weights, if the
 168    * function is called after a Convolution Layer
 169    *                     - The input tensor and the (transposed) 2D weights, if the function is
 170    * called after another FullyConnected Layer.
 171    *                     Data type supported: Same as @p input.
 172    * @param[in]  fc_info (Optional) Fully connected layer additional info
 173    */
 174   void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
 175                  ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
 176   /** Static function to check if given info will lead to a valid configuration of @ref
 177    * CLFullyConnectedLayerEx
 178    *
 179    * @param[in]  input   Source tensor info. Data type supported: QASYMM8/F16/F32.
 180    * @param[in]  weights Weights tensor info. The weights must be 2 dimensional.
 181    *                     If this function is called after a Convolution Layer, the (transposed)
 182    * weights will have as many rows as the product of the first 3 input's dimensions.
 183    *                     If it is called after another FullyConnected Layer, the (transposed)
 184    * weights will have as many rows as the input's first dimension.
 185    *                     Data type supported: Same as @p input.
 186    * @param[in]  biases  Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
 187    * @param[out] output  Destination tensor info. Its shape should be equal to the output of a
 188    * matrix multiplication between:
 189    *                     - The output of im2col on the input and the (transposed) 2D weights, if the
 190    * function is called after a Convolution Layer
 191    *                     - The input tensor and the (transposed) 2D weights, if the function is
 192    * called after another FullyConnected Layer.
 193    *                     Data type supported: Same as @p input.
 194    * @param[in]  fc_info (Optional) Fully connected layer additional info
 195    *
 196    * @return a status
 197    */
 198   static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
 199                          const ITensorInfo *biases, const ITensorInfo *output,
 200                          FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
 201
 202   // Inherited methods override
 203   void run() override;
 204   void prepare() override;
 205
 206 private:
 207   void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias,
 208                        ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
 209   void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias,
 210                          ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
 211   void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias,
 212                     ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
 213
 214   MemoryGroup _memory_group;
 215   IWeightsManager *_weights_manager;
 216   CLConvertFullyConnectedWeights _convert_weights;
 217   weights_transformations::CLConvertFullyConnectedWeightsManaged _convert_weights_managed;
 218   weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged
 219     _reshape_weights_managed_function;
 220   CLFlattenLayer _flatten_layer;
 221   CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function;
 222   CLGEMM _mm_gemm;
 223   CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
 224   CLTensor _flatten_output;
 225   CLTensor _converted_weights_output;
 226   CLTensor _reshape_weights_output;
 227   bool _are_weights_converted;
 228   bool _are_weights_reshaped;
 229   bool _is_fc_after_conv;
 230   bool _is_quantized;
 231   bool _is_prepared;
 232   const ICLTensor *_original_weights;
 233 };
 234 } // namespace arm_compute
 235 #endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__ */