compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * Copyright (c) 2019-2020 ARM Limited.
  19  *
  20  * SPDX-License-Identifier: MIT
  21  *
  22  * Permission is hereby granted, free of charge, to any person obtaining a copy
  23  * of this software and associated documentation files (the "Software"), to
  24  * deal in the Software without restriction, including without limitation the
  25  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  26  * sell copies of the Software, and to permit persons to whom the Software is
  27  * furnished to do so, subject to the following conditions:
  28  *
  29  * The above copyright notice and this permission notice shall be included in all
  30  * copies or substantial portions of the Software.
  31  *
  32  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  33  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  34  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  35  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  36  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  37  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  38  * SOFTWARE.
  39  */
  40 #ifndef __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
  41 #define __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
  42
  43 #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
  44 #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
  45 #include "arm_compute/runtime/CL/functions/CLReverse.h"
  46 #include "arm_compute/runtime/CL/functions/CLTranspose.h"
  47
  48 #include "arm_compute/runtime/CL/CLTensor.h"
  49 #include "arm_compute/runtime/IFunction.h"
  50 #include "arm_compute/runtime/IMemoryManager.h"
  51 #include "arm_compute/runtime/MemoryGroup.h"
  52
  53 #include <memory>
  54
  55 namespace arm_compute
  56 {
  57 class ICLTensor;
  58 /** Function to run the deconvolution layer.
  59  *
  60  * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
  61  * depending on the stride and pad info and then perform a 1x1
  62  * convolution pass. Input stride defines how many zeroes we should put between each element of the
  63  * input and pad is the amount of padding.
  64  *
  65  *  The relation between input to output is as follows:
  66  *  \f[
  67  *       width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
  68  *  \f]
  69  *  \f[
  70  *       height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
  71  *  \f]
  72  *
  73  *  where:
  74  *      width_input is the size of the first input dimension.
  75  *      height_input is the size of the second input dimension.
  76  *      width_output is the size of the first output dimension.
  77  *      height_output is the size of the second output dimension.
  78  *      kernel_x and kernel_y are the convolution sizes in x and y.
  79  *      stride_x and stride_y is the input stride of the first and second dimension.
  80  *
  81  * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
  82  * Therefore, it will be necessary to use the weights in the
  83  * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse.
  84  *
  85  * This function calls the following OpenCL kernels/functions:
  86  *
  87  * -# @ref CLDeconvolutionLayerUpsample
  88  * -# @ref CLConvolutionLayer
  89  *
  90  * And the following CPP kernels:
  91  * -# @ref CLReverse
  92  *
  93  */
  94 class CLDirectTransposeConvLayer : public IFunction
  95 {
  96 public:
  97   /** Constructor */
  98   CLDirectTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
  99   /** Prevent instances of this class from being copied (As this class contains pointers) */
 100   CLDirectTransposeConvLayer(const CLDirectTransposeConvLayer &) = delete;
 101   /** Default move constructor */
 102   CLDirectTransposeConvLayer(CLDirectTransposeConvLayer &&) = default;
 103   /** Prevent instances of this class from being copied (As this class contains pointers) */
 104   CLDirectTransposeConvLayer &operator=(const CLDirectTransposeConvLayer &) = delete;
 105   /** Default move assignment operator */
 106   CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default;
 107   /** Set the input, weights, biases and output tensors.
 108    *
 109    * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input,
 110    *                                and an optional 4th dimension for batch of inputs.
 111    *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
 112    * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM].
 113    *                                Data type supported: Same as @p input.
 114    * @param[in]     bias            (Optional) The biases have one dimension.
 115    *                                Data type supported: Should match @p input data type,
 116    *                                except for input of QASYMM8 and QASYMM8_SIGNED type
 117    *                                where biases should be of S32 type
 118    * @param[out]    output          Output tensor.
 119    *                                The output has the same number of dimensions as the @p input.
 120    * @param[in]     info            Contains padding and policies to be used in the deconvolution,
 121    *                                this is decribed in @ref PadStrideInfo.
 122    * @param[in]     invalid_right   The number of zeros added to right edge of the output.
 123    * @param[in]     invalid_bottom  The number of zeros added to bottom edge of the output.
 124    * @param[in]     weights_info    (Optional) Weights information needed for
 125    *                @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with
 126    *                @ref CLWeightsReshapeKernel.
 127    *
 128    */
 129   void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
 130                  const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
 131                  const WeightsInfo &weights_info = WeightsInfo());
 132   /** Set the input, weights, biases and output tensors.
 133    *
 134    * @param[in]     compile_context The compile context to be used.
 135    * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input,
 136    *                                 and an optional 4th dimension for batch of inputs.
 137    *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
 138    * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM].
 139    *                                Data type supported: Same as @p input.
 140    * @param[in]     bias            (Optional) The biases have one dimension.
 141    *                                Data type supported: Should match @p input data type, except for
 142    *                                input of QASYMM8 and QASYMM8_SIGNED type
 143    *                                where biases should be of S32 type
 144    * @param[out]    output          Output tensor. The output has the same number of dimensions as
 145    *                                the @p input.
 146    * @param[in]     info            Contains padding and policies to be used in the deconvolution,
 147    *                                this is decribed in @ref PadStrideInfo.
 148    * @param[in]     invalid_right   The number of zeros added to right edge of the output.
 149    * @param[in]     invalid_bottom  The number of zeros added to bottom edge of the output.
 150    * @param[in]     weights_info    (Optional) Weights information needed for
 151    *                                @ref CLConvolutionLayer, specifies if the weights tensor has
 152    *                                been reshaped with @ref CLWeightsReshapeKernel.
 153    *
 154    */
 155   void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
 156                  const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
 157                  unsigned int invalid_right, unsigned int invalid_bottom,
 158                  const WeightsInfo &weights_info = WeightsInfo());
 159   /** Static function to check if given info will lead to a valid configuration of @ref
 160    * CLDirectTransposeConvLayer
 161    *
 162    * @param[in] input           Input tensor info. 3 lower dimensions represent a single input,
 163    *                            and an optional 4th dimension for batch of inputs.
 164    *                            Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
 165    * @param[in] weights         The 4d weights info with dimensions [width, height, IFM, OFM].
 166    *                            Data type supported: Same as @p input.
 167    * @param[in] bias            (Optional) The biases have one dimension.
 168    *                            Data type supported: Should match @p input data type,
 169    *                            except for input of QASYMM8 and QASYMM8_SIGNED type
 170    *                            where biases should be of S32 type
 171    * @param[in] output          Output tensor info. The output has the same number of dimensions
 172    *                            as the @p input.
 173    * @param[in] info            Contains padding and policies to be used in the deconvolution,
 174    *                            this is decribed in @ref PadStrideInfo.
 175    * @param[in] invalid_right   The number of zeros added to right edge of the output.
 176    * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
 177    * @param[in] weights_info    (Optional) Weights information needed for @ref CLConvolutionLayer,
 178    *                            specifies if the weights tensor has been reshaped
 179    *                            with @ref CLWeightsReshapeKernel.
 180    *
 181    * @return a status
 182    */
 183   static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
 184                          const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
 185                          unsigned int invalid_right, unsigned int invalid_bottom,
 186                          const WeightsInfo &weights_info = WeightsInfo());
 187
 188   // Inherited methods overridden:
 189   void run() override;
 190   void prepare() override;
 191
 192 private:
 193   MemoryGroup _memory_group;
 194   CLDeconvolutionLayerUpsample _scale_f;
 195   CLConvolutionLayer _conv_f;
 196   CLReverse _flip_weights;
 197
 198   CLTensor _scaled_output;
 199   ICLTensor *_original_weights;
 200   CLTensor _weights_flipped;
 201   CLTensor _flip_axis;
 202
 203   bool _is_prepared;
 204 };
 205 } // namespace arm_compute
 206 #endif /* __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ */