compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * Copyright (c) 2019-2020 ARM Limited.
  19  *
  20  * SPDX-License-Identifier: MIT
  21  *
  22  * Permission is hereby granted, free of charge, to any person obtaining a copy
  23  * of this software and associated documentation files (the "Software"), to
  24  * deal in the Software without restriction, including without limitation the
  25  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  26  * sell copies of the Software, and to permit persons to whom the Software is
  27  * furnished to do so, subject to the following conditions:
  28  *
  29  * The above copyright notice and this permission notice shall be included in all
  30  * copies or substantial portions of the Software.
  31  *
  32  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  33  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  34  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  35  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  36  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  37  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  38  * SOFTWARE.
  39  */
  40 #ifndef __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
  41 #define __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
  42
  43 #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
  44 #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
  45 #include "arm_compute/runtime/CL/functions/CLReverse.h"
  46 #include "arm_compute/runtime/CL/functions/CLTranspose.h"
  47
  48 #include "arm_compute/runtime/CL/CLTensor.h"
  49 #include "arm_compute/runtime/IFunction.h"
  50 #include "arm_compute/runtime/IMemoryManager.h"
  51 #include "arm_compute/runtime/MemoryGroup.h"
  52
  53 #include <memory>
  54
  55 namespace arm_compute
  56 {
  57 class ICLTensor;
  58 /** Function to run the deconvolution layer.
  59  *
  60  * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
  61  * depending on the stride and pad info and then perform a 1x1
  62  * convolution pass. Input stride defines how many zeroes we should put between each element of the
  63  * input and pad is the amount of padding.
  64  *
  65  *  The relation between input to output is as follows:
  66  *  \f[
  67  *       width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
  68  *  \f]
  69  *  \f[
  70  *       height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
  71  *  \f]
  72  *
  73  *  where:
  74  *      width_input is the size of the first input dimension.
  75  *      height_input is the size of the second input dimension.
  76  *      width_output is the size of the first output dimension.
  77  *      height_output is the size of the second output dimension.
  78  *      kernel_x and kernel_y are the convolution sizes in x and y.
  79  *      stride_x and stride_y is the input stride of the first and second dimension.
  80  *
  81  * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
  82  * Therefore, it will be necessary to use the weights in the
  83  * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse.
  84  *
  85  * This function calls the following OpenCL kernels/functions:
  86  *
  87  * -# @ref CLDeconvolutionLayerUpsample
  88  * -# @ref CLConvolutionLayer
  89  *
  90  * And the following CPP kernels:
  91  * -# @ref CLReverse
  92  *
  93  */
  94 class CLDirectTransposeConvLayer : public IFunction
  95 {
  96 public:
  97   /** Constructor */
  98   CLDirectTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
  99   /** Prevent instances of this class from being copied (As this class contains pointers) */
 100   CLDirectTransposeConvLayer(const CLDirectTransposeConvLayer &) = delete;
 101   /** Default move constructor */
 102   CLDirectTransposeConvLayer(CLDirectTransposeConvLayer &&) = default;
 103   /** Prevent instances of this class from being copied (As this class contains pointers) */
 104   CLDirectTransposeConvLayer &operator=(const CLDirectTransposeConvLayer &) = delete;
 105   /** Default move assignment operator */
 106   CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default;
 107   /** Set the input, weights, biases and output tensors.
 108    *
 109    * @param[in,out] input        Input tensor. 3 lower dimensions represent a single input, and an
 110  * optional 4th dimension for batch of inputs.
 111    *                             Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
 112    * @param[in]     weights      The 4d weights with dimensions [width, height, IFM, OFM]. Data type
 113  * supported: Same as @p input.
 114    * @param[in]     bias         (Optional) The biases have one dimension.
 115    *                             Data type supported: Should match @p input data type, except for
 116  * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
 117    * @param[out]    output       Output tensor. The output has the same number of dimensions as the
 118  * @p input.
 119    * @param[in]     info         Contains padding and policies to be used in the deconvolution, this
 120  * is decribed in @ref PadStrideInfo.
 121  * @param[in] invalid_right  The number of zeros added to right edge of the output.
 122  * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
 123    * @param[in]     weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
 124  * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
 125    *
 126    */
 127   void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
 128                  const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
 129                  const WeightsInfo &weights_info = WeightsInfo());
 130   /** Set the input, weights, biases and output tensors.
 131    *
 132    * @param[in]     compile_context The compile context to be used.
 133    * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and
 134  * an optional 4th dimension for batch of inputs.
 135    *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
 136    * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data
 137  * type supported: Same as @p input.
 138    * @param[in]     bias            (Optional) The biases have one dimension.
 139    *                                Data type supported: Should match @p input data type, except for
 140  * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
 141    * @param[out]    output          Output tensor. The output has the same number of dimensions as
 142  * the @p input.
 143    * @param[in]     info            Contains padding and policies to be used in the deconvolution,
 144  * this is decribed in @ref PadStrideInfo.
 145  * @param[in] invalid_right  The number of zeros added to right edge of the output.
 146  * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
 147    * @param[in]     weights_info    (Optional) Weights information needed for @ref
 148  * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
 149  * CLWeightsReshapeKernel.
 150    *
 151    */
 152   void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
 153                  const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
 154                  unsigned int invalid_right, unsigned int invalid_bottom,
 155                  const WeightsInfo &weights_info = WeightsInfo());
 156   /** Static function to check if given info will lead to a valid configuration of @ref
 157  * CLDirectTransposeConvLayer
 158    *
 159    * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an
 160  * optional 4th dimension for batch of inputs.
 161    *                         Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
 162    * @param[in] weights      The 4d weights info with dimensions [width, height, IFM, OFM]. Data
 163  * type supported: Same as @p input.
 164    * @param[in] bias         (Optional) The biases have one dimension.
 165    *                         Data type supported: Should match @p input data type, except for input
 166  * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
 167    * @param[in] output       Output tensor info. The output has the same number of dimensions as the
 168  * @p input.
 169    * @param[in] info         Contains padding and policies to be used in the deconvolution, this is
 170  * decribed in @ref PadStrideInfo.
 171  * @param[in] invalid_right  The number of zeros added to right edge of the output.
 172  * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
 173    * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
 174  * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
 175    *
 176    * @return a status
 177    */
 178   static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
 179                          const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
 180                          unsigned int invalid_right, unsigned int invalid_bottom,
 181                          const WeightsInfo &weights_info = WeightsInfo());
 182
 183   // Inherited methods overridden:
 184   void run() override;
 185   void prepare() override;
 186
 187 private:
 188   MemoryGroup _memory_group;
 189   CLDeconvolutionLayerUpsample _scale_f;
 190   CLConvolutionLayer _conv_f;
 191   CLReverse _flip_weights;
 192
 193   CLTensor _scaled_output;
 194   ICLTensor *_original_weights;
 195   CLTensor _weights_flipped;
 196   CLTensor _flip_axis;
 197
 198   bool _is_prepared;
 199 };
 200 } // namespace arm_compute
 201 #endif /* __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ */