From: 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 Date: Wed, 3 Jul 2019 09:29:03 +0000 (+0900) Subject: Fix arm compute cl kernel for transpose conv (#5543) X-Git-Tag: submit/tizen/20190809.050447~601 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=af6d63198f65525ae840f95becf2dedd2cde3634;p=platform%2Fcore%2Fml%2Fnnfw.git Fix arm compute cl kernel for transpose conv (#5543) * Fix arm compute cl kernel for transpose conv Fix arm compute cl kernel for transpose conv - Padding calculation - Allow asymmetric padding - Get and fix upsample layer and kernel - Rename DeconvolutionXXX to TransposeConvXXX - Enable all transpose conv tests Signed-off-by: Hyeongseok Oh * Fix pacl --- diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h new file mode 100644 index 0000000..c5ef730 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ +#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Upsampling layer kernel for transpose convolution on OpenCL. + */ +class CLTransposeConvLayerUpsampleKernel : public ICLKernel +{ +public: + /** Constructor */ + CLTransposeConvLayerUpsampleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTransposeConvLayerUpsampleKernel(const CLTransposeConvLayerUpsampleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTransposeConvLayerUpsampleKernel & + operator=(const CLTransposeConvLayerUpsampleKernel &) = delete; + /** Default Move Constructor. */ + CLTransposeConvLayerUpsampleKernel(CLTransposeConvLayerUpsampleKernel &&) = default; + /** Default move assignment operator */ + CLTransposeConvLayerUpsampleKernel &operator=(CLTransposeConvLayerUpsampleKernel &&) = default; + /** Default destructor */ + ~CLTransposeConvLayerUpsampleKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. All but + * the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only + * performed within the XY-plane. + * @param[in] inner_border Top and right inner border sizes. These rows and columns will be + * filled with zero. + * @param[in] info Contains padding and stride information described in @ref + * PadStrideInfo. + */ + void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, + const PadStrideInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLTransposeConvLayerUpsample + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] output Destination tensor info. Data types supported: same as @p input. All + * but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is + * only performed within the XY-plane. + * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled + * with zero. + * @param[in] info Contains padding and stride information described in @ref + * PadStrideInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const BorderSize &inner_border, const PadStrideInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + BorderSize _inner_border; + PadStrideInfo _info; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/UtilsEx.h b/libs/ARMComputeEx/arm_compute/core/UtilsEx.h index 3fb3955..39026e6 100644 --- a/libs/ARMComputeEx/arm_compute/core/UtilsEx.h +++ b/libs/ARMComputeEx/arm_compute/core/UtilsEx.h @@ -19,37 +19,29 @@ #include +#include "arm_compute/core/Types.h" + namespace arm_compute { -/** Returns expected width and height of the deconvolution's output tensor. +/** Returns expected width and height of the transpose convolution's output tensor. * * @note This function was copied in order to fix a bug computing to wrong output dimensions. - * The formula for computing the output dimension is: o = s*(i - 1) + a + k - 2*p - * k: kernel size - * s: stride - * i: input size - * o: output size - * p: padding - * a: inner border - * Refer to : https://github.com/ARM-software/ComputeLibrary/issues/523#issuecomment-414606797 * * @param[in] in_width Width of input tensor (Number of columns) * @param[in] in_height Height of input tensor (Number of rows) * @param[in] kernel_width Kernel width. * @param[in] kernel_height Kernel height. - * @param[in] padx X axis padding. - * @param[in] pady Y axis padding. - * @param[in] stride_x X axis input stride. - * @param[in] stride_y Y axis input stride. - * @param[in] inner_border_right The number of zeros added to right edge of the input. - * @param[in] inner_border_top The number of zeros added to top edge of the input. + * @param[in] info padding and stride info. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_top The number of zeros added to bottom edge of the output. * * @return A pair with the new width in the first position and the new height in the second. */ -const std::pair deconvolution_output_dimensions_ex( - unsigned int in_width, unsigned int in_height, unsigned int kernel_width, - unsigned int kernel_height, unsigned int padx, unsigned int pady, unsigned int stride_x, - unsigned int stride_y, unsigned int inner_border_right = 0, unsigned int inner_border_top = 0); +const std::pair +transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height, + unsigned int kernel_width, unsigned int kernel_height, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_top); } #endif /*__ARM_COMPUTE_UTILSEX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h b/libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h new file mode 100644 index 0000000..367129e --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__ +#define __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__ + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/Utils.h" + +#include "arm_compute/core/utils/helpers/tensor_transform.h" + +#include + +namespace arm_compute +{ +namespace misc +{ +namespace shape_calculator +{ + +/** Calculate the upsampled output shape used for transpose convolution + * + * @param[in] input Input tensor info + * @param[in] weights Weights tensor shape + * @param[in] info Padding and stride info + * @param[in] out_dims Output shape dimensions + * @param[in] pad_left Padding on left + * @param[in] pad_right Padding on right + * @param[in] pad_top Padding on top + * @param[in] pad_bottom Padding on bottom + * + * @return the calculated shape + */ +inline TensorShape compute_transposeconv_upsampled_shape( + const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info, + std::pair &out_dims, unsigned int &pad_left, + unsigned int &pad_right, unsigned int &pad_top, unsigned int &pad_bottom) +{ + unsigned int sx = info.stride().first; + unsigned int sy = info.stride().second; + const DataLayout data_layout = input.data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + // Find the upsampled dimensions + // transpose conv out: + // tconv_out + pad = 1 + (in - 1) * stride + invalid + // tconv_out = 1 + (in - 1) * stride + invalid - pad + // upsample out: + // upsample_out = 1 + (in - 1) * stride + unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1; + unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1; + + // Find the padding needed for the convolution with stride 1 in order to match output shape + // upsample+pad out: + // upsample_out + pad = tconv_out + kernel - 1 + // pad = tconv_out + kernel - 1 - upsample_out + unsigned int padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1); + unsigned int pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1); + out_x += padx; + out_y += pady; + + unsigned int padx_all = padx + info.pad_left() + info.pad_right(); + unsigned int pady_all = pady + info.pad_top() + info.pad_bottom(); + pad_left = (padx_all + 1) / 2 - info.pad_left(); + pad_right = padx_all / 2 - info.pad_right(); + pad_top = (pady_all + 1) / 2 - info.pad_top(); + pad_bottom = pady_all / 2 - info.pad_bottom(); + + TensorShape scale_out_shape(input.tensor_shape()); + scale_out_shape.set(idx_w, out_x); + scale_out_shape.set(idx_h, out_y); + + return scale_out_shape; +} + +/** Calculate the output shape of the transpose convolution layer + * + * @param[in] out_dims Output x and y shape dimensions + * @param[in] input Input tensor info + * @param[in] weights Weights tensor shape + * + * @return the calculated shape + */ +inline TensorShape +compute_transposeconv_output_shape(const std::pair &out_dims, + const ITensorInfo &input, const ITensorInfo &weights) +{ + const TensorShape input_shape{input.tensor_shape()}; + const TensorShape weights_shape{weights.tensor_shape()}; + + const DataLayout data_layout = input.data_layout(); + const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int channel_idx = + get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); + + TensorShape out_shape{input_shape}; + out_shape.set(width_idx, out_dims.first); + out_shape.set(height_idx, out_dims.second); + out_shape.set(channel_idx, weights_shape[batch_idx]); + return out_shape; +} + +} // namespace shape_calculator +} // namespace misc +} // namespace arm_compute + +#endif // __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h index 277e1e7..3b7fcde 100644 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h +++ b/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -38,5 +37,6 @@ #include #include #include +#include #endif // __ARM_COMPUTE_CLFUNCTIONSEX_H__ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h similarity index 53% rename from libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h rename to libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h index 2c8ccc3..7b58ba2 100644 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h +++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h @@ -22,11 +22,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYEREX_H__ -#define __ARM_COMPUTE_CLDECONVOLUTIONLAYEREX_H__ +#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ +#define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h" +#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h" #include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" @@ -40,11 +40,11 @@ namespace arm_compute { class ICLTensor; -/** Function to run the deconvolution layer. +/** Function to run the transpose convolution layer. * * @note This layer was copied in order to fix a bug computing to wrong output dimensions. * - * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input + * TransposeConv Layer is the backward pass of Convolution Layer. First we transform the input * depending on the stride and pad info and then perform a 1x1 * convolution pass. Input stride defines how many zeroes we should put between each element of the * input, pad is the amount of padding and finally a is a user @@ -53,10 +53,10 @@ class ICLTensor; * * The relation between input to output is as follows: * \f[ - * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x + * width\_output = (width\_input - 1) \cdot stride\_x - \cdot padding\_x + kernel\_x * \f] * \f[ - * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y + * height\_output = (height\_input - 1) \cdot stride\_y - \cdot padding\_y + kernel\_y * \f] * * where: @@ -74,69 +74,69 @@ class ICLTensor; * * This function calls the following OpenCL kernels/functions: * - * -# @ref CLDeconvolutionLayerUpsample + * -# @ref CLTransposeConvLayerUpsample * -# @ref CLConvolutionLayer * */ -class CLDeconvolutionLayerEx : public IFunction +class CLTransposeConvLayer : public IFunction { public: /** Constructor */ - CLDeconvolutionLayerEx(std::shared_ptr memory_manager = nullptr); + CLTransposeConvLayer(std::shared_ptr memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDeconvolutionLayerEx(const CLDeconvolutionLayerEx &) = delete; + CLTransposeConvLayer(const CLTransposeConvLayer &) = delete; /** Default move constructor */ - CLDeconvolutionLayerEx(CLDeconvolutionLayerEx &&) = default; + CLTransposeConvLayer(CLTransposeConvLayer &&) = default; /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDeconvolutionLayerEx &operator=(const CLDeconvolutionLayerEx &) = delete; + CLTransposeConvLayer &operator=(const CLTransposeConvLayer &) = delete; /** Default move assignment operator */ - CLDeconvolutionLayerEx &operator=(CLDeconvolutionLayerEx &&) = default; + CLTransposeConvLayer &operator=(CLTransposeConvLayer &&) = default; /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, - * and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. - * Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type - * supported: Same as @p input. - * @param[out] output Output tensor. The output has the same number of dimensions - * as the @p input. - * @param[in] info Contains padding and policies to be used in the - * deconvolution, this is decribed in @ref PadStrideInfo. - * @param[in] inner_border_right The number of zeros added to right edge of the input. - * @param[in] inner_border_top The number of zeros added to top edge of the input. - * @param[in] weights_info (Optional) Weights information needed for @ref - * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref - * CLWeightsReshapeKernel. - * + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: + * Same as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions + * as the @p input. + * @param[in] info Contains padding and policies to be used in the + * deconvolution, this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the input. + * @param[in] invalid_bottom The number of zeros added to top edge of the input. + * @param[in] weights_info (Optional) Weights information needed for @ref + * CLConvolutionLayer, specifies if the weights tensor has been + * reshaped with @ref CLWeightsReshapeKernel. */ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, - const PadStrideInfo &info, unsigned int inner_border_right, - unsigned int inner_border_top, const WeightsInfo &weights_info = WeightsInfo()); + const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref - * CLDeconvolutionLayerEx - * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, - * and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8/F16/F32. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. - * Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: - * Same as @p input. - * @param[in] output Output tensor info. The output has the same number of dimensions - * as the @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, - * this is decribed in @ref PadStrideInfo. - * @param[in] inner_border_right The number of zeros added to right edge of the input. - * @param[in] inner_border_top The number of zeros added to top edge of the input. - * @param[in] weights_info (Optional) Weights information needed for @ref - * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref - * CLWeightsReshapeKernel. + * CLTransposeConvLayer * + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8/F16/F32. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: + * Same as @p input. + * @param[in] output Output tensor info. The output has the same number of dimensions + * as the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] innvalid_right The number of zeros added to right edge of the input. + * @param[in] invalid_bottom The number of zeros added to top edge of the input. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with @ref + * CLWeightsReshapeKernel. * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info, - unsigned int inner_border_right, unsigned int inner_border_top, + unsigned int innvalid_right, unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); // Inherited methods overridden: @@ -145,7 +145,7 @@ public: private: CLMemoryGroup _memory_group; - CLDeconvolutionLayerUpsample _scale_f; + CLTransposeConvLayerUpsample _scale_f; CLConvolutionLayer _conv_f; CPPFlipWeightsKernel _flip_weights; CLTensor _scaled_output; @@ -154,4 +154,4 @@ private: bool _is_prepared; }; } -#endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYEREX_H__ */ +#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h new file mode 100644 index 0000000..4ae0e18 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ +#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLTransposeConvLayerUpsampleKernel */ +class CLTransposeConvLayerUpsample : public IFunction +{ +public: + /** Default constructor */ + CLTransposeConvLayerUpsample(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTransposeConvLayerUpsample(const CLTransposeConvLayerUpsample &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTransposeConvLayerUpsample &operator=(const CLTransposeConvLayerUpsample &) = delete; + /** Allow instances of this class to be moved */ + CLTransposeConvLayerUpsample(CLTransposeConvLayerUpsample &&) = default; + /** Allow instances of this class to be moved */ + CLTransposeConvLayerUpsample &operator=(CLTransposeConvLayerUpsample &&) = default; + /** Default destructor */ + virtual ~CLTransposeConvLayerUpsample() = default; + + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] inner_border The number of zeros added to right and top edges of the input. + * @param[in] info Contains padding and policies to be used in the deconvolution. + */ + void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, + const PadStrideInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLTransposeConvLayerUpsample + * + * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input. + * @param[in] inner_border The number of zeros added to right and top edges of the input. + * @param[in] info Contains padding and policies to be used in the deconvolution. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const BorderSize &inner_border, const PadStrideInfo &info); + + // Inherited methods overridden: + void run() override; + +private: + CLTransposeConvLayerUpsampleKernel _upsample; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ */ diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp new file mode 100644 index 0000000..6cc8d9d --- /dev/null +++ b/libs/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/CLValidate.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +using namespace arm_compute; + +CLTransposeConvLayerUpsampleKernel::CLTransposeConvLayerUpsampleKernel() + : _input(nullptr), _output(nullptr), _inner_border(), _info() +{ +} + +Status CLTransposeConvLayerUpsampleKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + const BorderSize &inner_border, + const PadStrideInfo &info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, + DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); + + const DataLayout data_layout = input->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_w) == 0); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_h) == 0); + + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(idx_c)); + for (size_t i = 3; i < Coordinates::num_max_dimensions; ++i) + { + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); + } + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.right > info.stride().first - 1, + "inner_border_right must be smaller that stride_x"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.top > info.stride().second - 1, + "inner_border_top must be smaller that stride_y"); + + return Status{}; +} + +void CLTransposeConvLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output, + const BorderSize &inner_border, + const PadStrideInfo &info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + _input = input; + _output = output; + _inner_border = inner_border; + _info = info; + + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayerUpsampleKernel::validate( + input->info(), output->info(), inner_border, info)); + + // Create kernel + CLBuildOptions build_opts; + build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + _kernel = static_cast( + CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options())); + + constexpr unsigned int num_elems_processed_per_iteration = 1; + + // Configure kernel window + Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); + + ICLKernel::configure_internal(win); +} + +void CLTransposeConvLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + const DataLayout data_layout = _input->info()->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + const int out_start_x = _info.pad_left(); + const int out_end_x = _output->info()->dimension(idx_w) - _inner_border.right - + _info.pad_right() + _info.stride().first - 1; + const int out_step_x = _info.stride().first; + + const int out_start_y = _inner_border.top + _info.pad_top(); + const int out_end_y = + _output->info()->dimension(idx_h) - _info.pad_bottom() + _info.stride().second - 1; + const int out_step_y = _info.stride().second; + + switch (data_layout) + { + case DataLayout::NCHW: + { + Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + + Window slice_out = collapsed.first_slice_window_3D(); + slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x)); + slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y)); + + Window slice_in = collapsed.first_slice_window_3D(); + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, slice_in); + add_3D_tensor_argument(idx, _output, slice_out); + enqueue(queue, *this, slice_out); + } while (collapsed.slide_window_slice_3D(slice_in) && + collapsed.slide_window_slice_3D(slice_out)); + break; + } + case DataLayout::NHWC: + { + // NOTE: not collapsing in NHWC + Window slice_out = window.first_slice_window_3D(); + slice_out.set(Window::DimY, Window::Dimension(out_start_x, out_end_x, out_step_x)); + slice_out.set(Window::DimZ, Window::Dimension(out_start_y, out_end_y, out_step_y)); + + Window slice_in = window.first_slice_window_3D(); + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, slice_in); + add_3D_tensor_argument(idx, _output, slice_out); + enqueue(queue, *this, slice_out); + } while (window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out)); + break; + } + default: + ARM_COMPUTE_ERROR("Unsupported data layout"); + } +} diff --git a/libs/ARMComputeEx/src/core/UtilsEx.cpp b/libs/ARMComputeEx/src/core/UtilsEx.cpp index 3322796..94242b5 100644 --- a/libs/ARMComputeEx/src/core/UtilsEx.cpp +++ b/libs/ARMComputeEx/src/core/UtilsEx.cpp @@ -19,16 +19,27 @@ using namespace arm_compute; -const std::pair arm_compute::deconvolution_output_dimensions_ex( - unsigned int in_width, unsigned int in_height, unsigned int kernel_width, - unsigned int kernel_height, unsigned int padx, unsigned int pady, unsigned int stride_x, - unsigned int stride_y, unsigned int inner_border_right, unsigned int inner_border_top) +const std::pair +arm_compute::transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height, + unsigned int kernel_width, unsigned int kernel_height, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom) { + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; + const unsigned int padx = info.pad_left() + info.pad_right(); + const unsigned int pady = info.pad_top() + info.pad_bottom(); + ARM_COMPUTE_ERROR_ON(in_width < 1 || in_height < 1); - ARM_COMPUTE_ERROR_ON(((in_width - 1) * stride_x + kernel_width) < 2 * padx); - ARM_COMPUTE_ERROR_ON(((in_height - 1) * stride_y + kernel_height) < 2 * pady); - const int w = stride_x * (in_width - 1) + kernel_width - 2 * padx + inner_border_right; - const int h = stride_y * (in_height - 1) + kernel_height - 2 * pady + inner_border_top; + ARM_COMPUTE_ERROR_ON(kernel_width <= padx); + ARM_COMPUTE_ERROR_ON(kernel_height <= pady); + + // Find the transpose conv out dimensions + // transpose conv out: + // tconv_out + pad = 1 + (in - 1) * stride + invalid + // tconv_out = 1 + (in - 1) * stride + invalid - pad + const int w = stride_x * (in_width - 1) + kernel_width - padx + invalid_right; + const int h = stride_y * (in_height - 1) + kernel_height - pady + invalid_bottom; return std::make_pair(w, h); } diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLDeconvolutionLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp similarity index 69% rename from libs/ARMComputeEx/src/runtime/CL/functions/CLDeconvolutionLayerEx.cpp rename to libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp index 2d7c36d..55eb3ad 100644 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLDeconvolutionLayerEx.cpp +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp @@ -22,7 +22,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h" +#include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h" +#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" @@ -38,8 +39,7 @@ using namespace arm_compute; using namespace arm_compute::misc::shape_calculator; -CLDeconvolutionLayerEx::CLDeconvolutionLayerEx( - std::shared_ptr memory_manager) // NOLINT +CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), _scale_f(), _conv_f(), @@ -51,11 +51,10 @@ CLDeconvolutionLayerEx::CLDeconvolutionLayerEx( { } -Status CLDeconvolutionLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, ITensorInfo *output, - const PadStrideInfo &info, unsigned int inner_border_right, - unsigned int inner_border_top, - const WeightsInfo &weights_info) +Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *bias, ITensorInfo *output, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, @@ -70,24 +69,21 @@ Status CLDeconvolutionLayerEx::validate(const ITensorInfo *input, const ITensorI ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h)); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1); - ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric()); - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; + const unsigned int kernel_x = weights->dimension(idx_w); + const unsigned int kernel_y = weights->dimension(idx_h); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_right > stride_x - 1, - "inner_border_right must be smaller than stride_x"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_top > stride_y - 1, - "inner_border_top must be smaller than stride_y"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_right > kernel_x - 1, + "invalid_right must be smaller than kernel_x"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_bottom > kernel_y - 1, + "inner_border_top must be smaller than kernel_y"); - // NOTE From the existing CLDeconvolutionLayer, inner_border_right and inner_border_top were - // added. - auto out_dims = deconvolution_output_dimensions_ex( + // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were added. + auto out_dims = transposeconv_output_dimensions( input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), - weights->dimension(idx_h), info.pad().first, info.pad().second, stride_x, stride_y, - inner_border_right, inner_border_top); + weights->dimension(idx_h), info, invalid_right, invalid_bottom); - const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input, *weights); + const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); @@ -111,11 +107,12 @@ Status CLDeconvolutionLayerEx::validate(const ITensorInfo *input, const ITensorI ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c], "Output's depth is invalid."); - unsigned int padx = 0; - unsigned int pady = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape( - *input, *weights, stride_x, stride_y, inner_border_right, inner_border_top, out_dims, padx, - pady); + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *input, *weights, info, out_dims, pad_left, pad_right, pad_top, pad_bottom); TensorInfo scale_out_info(input->clone() ->set_is_resizable(true) .reset_padding() @@ -123,19 +120,18 @@ Status CLDeconvolutionLayerEx::validate(const ITensorInfo *input, const ITensorI .set_data_layout(data_layout)); const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate( - input, &scale_out_info, BorderSize(inner_border_right, inner_border_top), info)); + ARM_COMPUTE_RETURN_ON_ERROR( + CLTransposeConvLayerUpsample::validate(input, &scale_out_info, BorderSize(0, 0), info)); ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, weights_info)); return Status{}; } -void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, - ICLTensor *output, const PadStrideInfo &info, - unsigned int inner_border_right, - unsigned int inner_border_top, - const WeightsInfo &weights_info) +void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, + ICLTensor *output, const PadStrideInfo &info, + unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -151,15 +147,15 @@ void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, con _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); _flip_weights.configure(weights, &_weights_flipped); - // NOTE From the existing CLDeconvolutionLayer, inner_border_right and inner_border_top were + // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were // added. - auto out_dims = deconvolution_output_dimensions_ex( + auto out_dims = transposeconv_output_dimensions( input->info()->dimension(idx_w), input->info()->dimension(idx_h), - weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info.pad().first, - info.pad().second, stride_x, stride_y, inner_border_right, inner_border_top); + weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right, + invalid_bottom); const TensorShape output_shape = - compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info()); + compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); // Output auto initialization if not yet initialized auto_init_if_empty( @@ -167,9 +163,9 @@ void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, con input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout)); // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerEx::validate( + ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayer::validate( input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), - info, inner_border_right, inner_border_top)); + info, invalid_right, invalid_bottom)); _is_prepared = weights_info.retain_internal_weights(); @@ -177,11 +173,12 @@ void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, con // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order // to match output shape - unsigned int padx = 0; - unsigned int pady = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape( - *input->info(), *weights->info(), stride_x, stride_y, inner_border_right, inner_border_top, - out_dims, padx, pady); + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *input->info(), *weights->info(), info, out_dims, pad_left, pad_right, pad_top, pad_bottom); TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); @@ -189,9 +186,9 @@ void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, con _scaled_output.allocator()->init(scale_out_info); // configure scale function - const PadStrideInfo upsample_info(stride_x, stride_y, padx / 2, pady / 2); - _scale_f.configure(input, &_scaled_output, BorderSize(inner_border_top, inner_border_right), - upsample_info); + const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, + DimensionRoundingType::FLOOR); + _scale_f.configure(input, &_scaled_output, BorderSize(0, 0), upsample_info); // setup the function to convolve the upscaled output const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); @@ -199,7 +196,7 @@ void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, con _scaled_output.allocator()->allocate(); } -void CLDeconvolutionLayerEx::run() +void CLTransposeConvLayer::run() { prepare(); @@ -211,7 +208,7 @@ void CLDeconvolutionLayerEx::run() _memory_group.release(); } -void CLDeconvolutionLayerEx::prepare() +void CLTransposeConvLayer::prepare() { if (!_is_prepared) { diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp new file mode 100644 index 0000000..0ce3e67 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h" + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include +#include +#include + +using namespace arm_compute; + +CLTransposeConvLayerUpsample::CLTransposeConvLayerUpsample() // NOLINT + : _upsample(), + _output(nullptr) +{ +} + +Status CLTransposeConvLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output, + const BorderSize &inner_border, + const PadStrideInfo &info) +{ + return CLTransposeConvLayerUpsampleKernel::validate(input, output, inner_border, info); +} + +void CLTransposeConvLayerUpsample::configure(ICLTensor *input, ICLTensor *output, + const BorderSize &inner_border, + const PadStrideInfo &info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + _output = output; + _upsample.configure(input, _output, inner_border, info); +} + +void CLTransposeConvLayerUpsample::run() +{ + _output->map(CLScheduler::get().queue(), true); + if (is_data_type_quantized_asymmetric(_output->info()->data_type())) + { + const uint8_t quantized_zero = _output->info()->quantization_info().offset; + std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero); + } + else + { + memset(_output->buffer(), 0, _output->info()->total_size()); + } + _output->unmap(CLScheduler::get().queue()); + + CLScheduler::get().enqueue(_upsample, false); +} diff --git a/runtimes/neurun/backend/acl_cl/StageGenerator.cc b/runtimes/neurun/backend/acl_cl/StageGenerator.cc index 9ba4c1a..7304c34 100644 --- a/runtimes/neurun/backend/acl_cl/StageGenerator.cc +++ b/runtimes/neurun/backend/acl_cl/StageGenerator.cc @@ -2403,6 +2403,8 @@ void StageGenerator::visit(const model::operation::TransposeConvNode &node) model::ExplicitPadding padding; model::Stride stride; + uint32_t invalid_horizontal; + uint32_t invalid_vertical; }; Param param; @@ -2417,6 +2419,18 @@ void StageGenerator::visit(const model::operation::TransposeConvNode &node) (node.param().padding.type == model::PaddingType::VALID)); param.padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, param.stride, ker_shape.W, ker_shape.H); + if (node.param().padding.type == model::PaddingType::VALID) + { + param.invalid_horizontal = + ofm_shape.W - (1 + (ifm_shape.W - 1) * param.stride.horizontal) - (ker_shape.W - 1); + param.invalid_vertical = + ofm_shape.H - (1 + (ifm_shape.H - 1) * param.stride.vertical) - (ker_shape.H - 1); + } + else + { + param.invalid_horizontal = 0; + param.invalid_vertical = 0; + } auto tensors = _tensor_builder; @@ -2425,21 +2439,14 @@ void StageGenerator::visit(const model::operation::TransposeConvNode &node) auto ifm_alloc = tensors->at(param.ifm_index).get(); auto ker_alloc = tensors->at(param.ker_index).get(); - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLDeconvolutionLayerEx>(); + const auto tconv_info = acl_common::asPadStrideInfo(param.padding, param.stride); - auto padding = param.padding; - auto inner_border_right = padding.right - padding.left; - auto inner_border_top = padding.bottom - padding.top; + std::unique_ptr<::arm_compute::IFunction> fn; - padding.left = padding.right; - padding.top = padding.bottom; - auto symmetric_tconv_info = - ::neurun::backend::acl_common::asPadStrideInfo(padding, param.stride); + auto l = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>(); - l->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), - symmetric_tconv_info, inner_border_right, inner_border_top); + l->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info, + param.invalid_vertical, param.invalid_horizontal); fn = std::move(l); diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index b2b6505..666d27e 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -4011,6 +4011,8 @@ void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node) int ker_index; Padding padding; Stride stride; + uint32_t invalid_horizontal; + uint32_t invalid_vertical; }; Param param; @@ -4026,6 +4028,15 @@ void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node) ? same_padding(ofm_shape, ifm_shape, param.stride, ker_shape.W, ker_shape.H) : valid_padding(); + param.invalid_horizontal = + (padding_type == ANEURALNETWORKS_PADDING_SAME) + ? 0 + : ofm_shape.W - (1 + (ifm_shape.W - 1) * hstride) - (ker_shape.W - 1); + param.invalid_vertical = + (padding_type == ANEURALNETWORKS_PADDING_SAME) + ? 0 + : ofm_shape.H - (1 + (ifm_shape.H - 1) * param.stride.vertical) - (ker_shape.H - 1); + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); @@ -4036,19 +4047,13 @@ void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDeconvolutionLayerEx>(); - - auto padding = param.padding; - auto inner_border_right = padding.right - padding.left; - auto inner_border_top = padding.bottom - padding.top; + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>(); - padding.left = padding.right; - padding.top = padding.bottom; - auto symmetric_tconv_info = asPadStrideInfo(padding, param.stride); + auto symmetric_tconv_info = asPadStrideInfo(param.padding, param.stride); // TODO Support WeightInfo in some cases in order to performance improvement fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), nullptr, CAST_CL(ofm_alloc), - symmetric_tconv_info, inner_border_right, inner_border_top); + symmetric_tconv_info, param.invalid_horizontal, param.invalid_vertical); builder.append("TransposeConv", std::move(fn)); } else diff --git a/tests/framework/tests/transpose_conv/config.sh b/tests/framework/tests/transpose_conv/config.sh index 2b19611..2cca86e 100644 --- a/tests/framework/tests/transpose_conv/config.sh +++ b/tests/framework/tests/transpose_conv/config.sh @@ -1,2 +1 @@ MODELFILE_NAME="transpose_conv_test.tflite" -STATUS="disabled" diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux b/tests/nnapi/nnapi_gtest.skip.armv7l-linux index ee76672..88ea9df 100644 --- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux +++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux @@ -11,7 +11,6 @@ GeneratedTests.prelu_ex_quant8_1 GeneratedTests.prelu_ex_broadcast_quant8_1 # Unexpected result GeneratedTests.pack* -GeneratedTests.transpose_conv_ex_float_4 # Not support broadcast GeneratedTests.logical_or_ex_broadcast_4D_2D # Unsupported optional input that has shape