From 67d060a8cb648a9951375a744cd99e1640436cbc Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EC=9E=A5=EC=A7=80=EC=84=AD/On-Device=20Lab=28SR=29/Enginee?= =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Fri, 2 Aug 2019 10:40:52 +0900 Subject: [PATCH] Support TransposeConv op for acl_neon (#5786) This commit supports TransposeConv op for acl_neon backend. - Introduce NETransposeConvLayer - Introduce CPPUpsampeEx - Introduce CPPUpsampeKernelEx - Apply NETransposeConvLayer for acl_neon backend - Enable nnapi tests for TransposeConv Signed-off-by: jiseob.jang --- .../core/CPP/kernels/CPPUpsampleKernelEx.h | 72 +++++ .../runtime/CPP/functions/CPPUpsampleEx.h | 49 ++++ .../runtime/NEON/functions/NETransposeConvLayer.h | 162 +++++++++++ .../src/core/CPP/kernels/CPPUpsampleKernelEx.cpp | 102 +++++++ .../src/runtime/CPP/functions/CPPUpsampleEx.cpp | 37 +++ .../NEON/functions/NETransposeConvLayer.cpp | 307 +++++++++++++++++++++ .../neurun/backend/acl_neon/ConstantInitializer.cc | 7 + .../neurun/backend/acl_neon/ConstantInitializer.h | 1 + .../neurun/backend/acl_neon/KernelGenerator.cc | 49 ++++ runtimes/neurun/backend/acl_neon/KernelGenerator.h | 1 + runtimes/neurun/backend/acl_neon/ShapeFixer.cc | 2 + runtimes/neurun/backend/acl_neon/ShapeFixer.h | 1 + tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon | 1 - .../neurun_frameworktest_list.armv7l.acl_neon.txt | 1 + 14 files changed, 791 insertions(+), 1 deletion(-) create mode 100644 runtimes/libs/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h create mode 100644 runtimes/libs/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h create mode 100644 runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h create mode 100644 runtimes/libs/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp create mode 100644 runtimes/libs/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp create mode 100644 runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h new file mode 100644 index 0000000..d093c22 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ +#define __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ + +#include "arm_compute/core/CPP/ICPPKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** CPP kernel to perform tensor upsample. + * + */ +class CPPUpsampleKernelEx : public ICPPKernel +{ +public: + const char *name() const override { return "CPPUpsampleKernelEx"; } + /** Default constructor */ + CPPUpsampleKernelEx(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPUpsampleKernelEx(const CPPUpsampleKernelEx &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPUpsampleKernelEx &operator=(const CPPUpsampleKernelEx &) = delete; + /** Allow instances of this class to be moved */ + CPPUpsampleKernelEx(CPPUpsampleKernelEx &&) = default; + /** Allow instances of this class to be moved */ + CPPUpsampleKernelEx &operator=(CPPUpsampleKernelEx &&) = default; + /** Default destructor */ + ~CPPUpsampleKernelEx() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8 + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] info Padding info. + */ + void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + bool is_parallelisable() const override; + +private: + const ITensor *_input; + ITensor *_output; + PadStrideInfo _info; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h new file mode 100644 index 0000000..8e7e2f9 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ +#define __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ + +#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref CPPUpsample */ +class CPPUpsampleEx : public ICPPSimpleFunction +{ +public: + /** Configure the upsample CPP kernel + * + * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8 + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] info Padding information + */ + void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info); +}; +} +#endif /* __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h new file mode 100644 index 0000000..a50b9ea --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ +#define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ + +#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEPermute.h" + +#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +/** Function to run the deconvolution layer. + * + * Transpose convolution Layer is the backward pass of Convolution Layer. First we transform the + * input depending on the stride and pad info and then perfrom a 1x1 + * convolution pass. Input stride defines how many zeroes we should put between each element of the + * input, pad is the amount of padding and finaly a is a user + * specified value where a < stride - 1 that increases the padding top and right of the input image. + * + * The relation between input to output is as follows: + * \f[ + * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x + * \f] + * \f[ + * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y + * \f] + * + * where + * width is the size of the first input dimension. + * height is the size of the second input dimension. + * width_output is the size of the first output dimension. + * height_output is the size of the second output dimension. + * kernel_x and kernel_y are the convolution sizes in x and y. + * stride_x and stride_y is the input stride of the first and second dimension. + * + * The weights used by Transpose convolution are supposed to be the same as the ones used for + * Convolution. Therefore, it will be necessary to use the weights in the + * reverse order to perform an actual convolution. This is achieved by using the @ref + * CPPFlipWeightsKernel. + * + * This function calls the following NEON kernels/functions: + * + * -# @ref CPPUpsample + * -# @ref NEConvolutionLayer + * + */ +class NETransposeConvLayer : public IFunction +{ +public: + /** Default constructor */ + NETransposeConvLayer(std::shared_ptr memory_manager = nullptr); + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeConvLayer(const NETransposeConvLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeConvLayer &operator=(const NETransposeConvLayer &) = delete; + /** Allow instances of this class to be moved */ + NETransposeConvLayer(NETransposeConvLayer &&) = default; + /** Allow instances of this class to be moved */ + NETransposeConvLayer &operator=(NETransposeConvLayer &&) = default; + /** Default destructor */ + virtual ~NETransposeConvLayer() = default; + + /** Set the input, weights, biases and output tensors. + * + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an + * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type + * supported: Same as @p input. + * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type + * supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. + * @param[out] output Output tensor. The output has the same number of dimensions as the @p + * input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is + * decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to top edge of the output. + * + */ + void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom); + /** Static function to check if given info will lead to a valid configuration of @ref + * NETransposeConvLayer + * + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an + * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type + * supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types + * supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. + * @param[in] output Output tensor info. The output has the same number of dimensions as the @p + * input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is + * decribed in @ref PadStrideInfo. + * @param[in] innvalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to top edge of the output. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *bias, const ITensorInfo *output, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom); + + // Inherited methods overridden: + void run() override; + void prepare() override; + +private: + MemoryGroup _memory_group; + NEConvolutionLayer _conv_f; + CPPUpsampleEx _upsample_f; + CPPFlipWeightsKernel _flip_weights; + NEPermute _permute_input; + NEPermute _permute_weights; + NEPermute _permute_output; + Tensor _scaled_output; + Tensor _weights_flipped; + Tensor _permuted_input; + Tensor _permuted_weights; + Tensor _permuted_output; + bool _is_nchw; + const ITensor *_original_weights; + ITensor *_input; + PadStrideInfo _info; + bool _is_prepared; +}; +} // arm_compute +#endif /* __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ */ diff --git a/runtimes/libs/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp b/runtimes/libs/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp new file mode 100644 index 0000000..8ac667c --- /dev/null +++ b/runtimes/libs/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + +#include +#include + +namespace arm_compute +{ +CPPUpsampleKernelEx::CPPUpsampleKernelEx() : _input(nullptr), _output(nullptr), _info() {} + +bool CPPUpsampleKernelEx::is_parallelisable() const { return false; } + +void CPPUpsampleKernelEx::configure(const ITensor *input, ITensor *output, + const PadStrideInfo &info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + _input = input; + _output = output; + _info = info; + + // Configure kernel window + Window win = calculate_max_window(*input->info(), Steps()); + + // The CPPUpsampleKernelEx doesn't need padding so update_window_and_padding() can be skipped + Coordinates coord; + coord.set_num_dimensions(output->info()->num_dimensions()); + output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + + ICPPKernel::configure(win); +} + +void CPPUpsampleKernelEx::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); + + // Initialize _scaled_output buffer + const int width_scaled = _output->info()->dimension(0); + const int height_scaled = _output->info()->dimension(1); + const int stride_x = _info.stride().first; + const int stride_y = _info.stride().second; + const int start_x = _info.pad_left(); + const int start_y = _info.pad_top(); + const int end_y = height_scaled - _info.pad_bottom(); + const int end_x = width_scaled - _info.pad_top(); + const size_t element_size = _input->info()->element_size(); + + // The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset + const uint8_t fill_value = + _output->info()->data_type() == DataType::QASYMM8 + ? utility::clamp(_output->info()->quantization_info().offset) + : 0; + // Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte + // values in a buffer of uint8_ts + std::fill_n(_output->buffer(), _output->info()->total_size(), fill_value); + + // Create window + Window window_out(window); + window_out.set(Window::DimX, Window::Dimension(start_x, end_x, stride_x)); + window_out.set(Window::DimY, Window::Dimension(start_y, end_y, stride_y)); + + // Create iterators + Iterator in(_input, window); + Iterator out(_output, window_out); + + execute_window_loop( + window, [&](const Coordinates &) { memcpy(out.ptr(), in.ptr(), element_size); }, in, out); +} +} // namespace arm_compute diff --git a/runtimes/libs/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp b/runtimes/libs/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp new file mode 100644 index 0000000..f8e0ef8 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h" + +#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +void CPPUpsampleEx::configure(const ITensor *input, ITensor *output, const PadStrideInfo &info) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output, info); + _kernel = std::move(k); +} diff --git a/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp new file mode 100644 index 0000000..fd15ef0 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/UtilsEx.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute::misc::shape_calculator; + +namespace arm_compute +{ +NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr memory_manager) // NOLINT + : _memory_group(std::move(memory_manager)), + _conv_f(), + _upsample_f(), + _flip_weights(), + _permute_input(), + _permute_weights(), + _permute_output(), + _scaled_output(), + _weights_flipped(), + _permuted_input(), + _permuted_weights(), + _permuted_output(), + _is_nchw(false), + _original_weights(nullptr), + _input(nullptr), + _info(), + _is_prepared(false) +{ +} + +Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *bias, const ITensorInfo *output, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, + DataType::QASYMM8); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input); + const unsigned int width_idx = + get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH); + const unsigned int height_idx = + get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx)); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1); + + auto out_dims = transposeconv_output_dimensions( + input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx), + weights->dimension(height_idx), info, invalid_right, invalid_bottom); + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + if (is_data_type_quantized_asymmetric(input->data_type()) && bias) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); + } + else if (bias) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); + } + + if (output->tensor_shape().total_size() > 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + + const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) < output_shape.x(), + "Output's dim 0 is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) < output_shape.y(), + "Output's dim 1 is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) < output_shape.z(), + "Output's dim 2 is invalid."); + } + + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top, + pad_bottom); + TensorInfo scale_out_info( + input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape)); + scale_out_info.set_data_layout(input->data_layout()); + const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); + + const unsigned int batches_idx = + get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES); + const unsigned int channel_idx = + get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) != + scale_out_info.dimension(batches_idx)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != + scale_out_info.dimension(channel_idx)); + + ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, + conv_info, WeightsInfo())); + + return Status{}; +} + +void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, + ITensor *output, const PadStrideInfo &info, + unsigned int invalid_right, unsigned int invalid_bottom) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + + const DataLayout data_layout = input->info()->data_layout(); + + _input = input; + _original_weights = weights; + _info = info; + _is_prepared = false; + _is_nchw = data_layout == DataLayout::NCHW; + + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; + + const unsigned int width_idx = + get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const unsigned int height_idx = + get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + auto out_dims = transposeconv_output_dimensions( + input->info()->dimension(width_idx), input->info()->dimension(height_idx), + weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info, + invalid_right, invalid_bottom); + + const TensorShape output_shape = + compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); + // Output auto initialization if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), + input->info()->quantization_info()); + + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate( + input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), + info, invalid_right, invalid_bottom)); + + _memory_group.manage(&_scaled_output); + + if (!_is_nchw) + { + _memory_group.manage(&_permuted_input); + _memory_group.manage(&_permuted_weights); + _memory_group.manage(&_permuted_output); + + // Configure the function to transform the input tensor from NHWC -> NCHW + _permuted_input.info()->set_quantization_info(input->info()->quantization_info()); + _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U)); + _permuted_input.info()->set_data_layout(DataLayout::NCHW); + + // Configure the function to transform the weights tensor from NHWC -> NCHW + _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info()); + _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U)); + _permuted_weights.info()->set_data_layout(DataLayout::NCHW); + + // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in + // order to match output shape + + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *_permuted_input.info(), *_permuted_weights.info(), info, out_dims, invalid_right, + invalid_bottom, pad_left, pad_right, pad_top, pad_bottom); + + TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(), + _permuted_input.info()->quantization_info()); + scale_out_info.set_data_layout(DataLayout::NCHW); + _scaled_output.allocator()->init(scale_out_info); + + const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, + DimensionRoundingType::CEIL); + _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info); + + _weights_flipped.allocator()->init(*_permuted_weights.info()->clone()); + _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info()); + _flip_weights.configure(&_permuted_weights, &_weights_flipped); + + // setup the function to convolve the upscaled output + const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); + + const auto out_shape = output->info()->tensor_shape(); + TensorShape permuted_out_shape{out_shape[1], out_shape[2], out_shape[0], out_shape[3]}; + TensorInfo permuted_out_info(permuted_out_shape, 1, output->info()->data_type(), + output->info()->quantization_info()); + _permuted_output.allocator()->init(permuted_out_info); + _permuted_output.info()->set_data_layout(DataLayout::NCHW); + _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info); + + // Configure the function to transform the convoluted output to NHWC + _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U)); + + _permuted_input.allocator()->allocate(); + _permuted_weights.allocator()->allocate(); + _permuted_output.allocator()->allocate(); + } + else + { + // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in + // order to match output shape + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, + pad_right, pad_top, pad_bottom); + + TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), + input->info()->quantization_info()); + _scaled_output.allocator()->init(scale_out_info); + const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, + DimensionRoundingType::FLOOR); + _upsample_f.configure(input, &_scaled_output, upsample_info); + + _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); + _flip_weights.configure(weights, &_weights_flipped); + + // setup the function to convolve the upscaled output + const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); + _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info); + } + _scaled_output.allocator()->allocate(); +} + +void NETransposeConvLayer::run() +{ + prepare(); + + // MemoryGroupResourceScope scope_mg(_memory_group); + + // Permute input + if (!_is_nchw) + { + _permute_input.run(); + } + + _upsample_f.run(); + _conv_f.run(); + + // Permute output + if (!_is_nchw) + { + _permute_output.run(); + } +} + +void NETransposeConvLayer::prepare() +{ + if (!_is_prepared) + { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + + // Run weights flipping and mark original weights tensor as unused + _weights_flipped.allocator()->allocate(); + // Permute weights + if (!_is_nchw) + { + _permute_weights.run(); + } + NEScheduler::get().schedule(&_flip_weights, Window::DimZ); + _original_weights->mark_as_unused(); + + // Prepare convolution + _conv_f.prepare(); + + if (!_weights_flipped.is_used()) + { + _weights_flipped.allocator()->free(); + } + + _is_prepared = true; + } +} +} // namespace arm_compute diff --git a/runtimes/neurun/backend/acl_neon/ConstantInitializer.cc b/runtimes/neurun/backend/acl_neon/ConstantInitializer.cc index 98be80b..c4ff292 100644 --- a/runtimes/neurun/backend/acl_neon/ConstantInitializer.cc +++ b/runtimes/neurun/backend/acl_neon/ConstantInitializer.cc @@ -78,6 +78,13 @@ void ConstantInitializer::visit(const model::operation::FullyConnectedNode &node registerCopyInitializer(bias_index, bias_obj); } +void ConstantInitializer::visit(const model::operation::TransposeConvNode &node) +{ + const auto &kernel_index = node.getInputs().at(model::operation::TransposeConvNode::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + registerPermuteInitializer(kernel_index, kernel_obj); +} + } // namespace acl_neon } // namespace backend } // namespace neurun diff --git a/runtimes/neurun/backend/acl_neon/ConstantInitializer.h b/runtimes/neurun/backend/acl_neon/ConstantInitializer.h index 62e889c..e608c7b 100644 --- a/runtimes/neurun/backend/acl_neon/ConstantInitializer.h +++ b/runtimes/neurun/backend/acl_neon/ConstantInitializer.h @@ -41,6 +41,7 @@ public: void visit(const model::operation::Conv2DNode &) override; void visit(const model::operation::DepthwiseConv2DNode &) override; void visit(const model::operation::FullyConnectedNode &) override; + void visit(const model::operation::TransposeConvNode &) override; private: const model::Operands &_operands; diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc index e7d7eda..a4bb098 100644 --- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc +++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc @@ -30,6 +30,7 @@ #include #include #include +#include #include "kernel/ConcatLayer.h" #include "util/Padding.h" @@ -836,6 +837,54 @@ void KernelGenerator::visit(const model::operation::StridedSliceNode &node) throw std::runtime_error("Not supported, yet"); } +void KernelGenerator::visit(const model::operation::TransposeConvNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto output_shape_index{ + node.getInputs().at(model::operation::TransposeConvNode::Input::OUTPUT_SHAPE)}; + const auto ker_index{node.getInputs().at(model::operation::TransposeConvNode::Input::KERNEL)}; + const auto ifm_index{node.getInputs().at(model::operation::TransposeConvNode::Input::INPUT)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(); + + const auto stride = node.param().stride; + + assert((node.param().padding.type == model::PaddingType::SAME) || + (node.param().padding.type == model::PaddingType::VALID)); + auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride, + ker_shape.W, ker_shape.H); + + uint32_t invalid_horizontal = 0; + uint32_t invalid_vertical = 0; + if (node.param().padding.type == model::PaddingType::VALID) + { + invalid_horizontal = + ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1); + invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1); + } + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + + const auto tconv_info = acl_common::asPadStrideInfo(padding, stride); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::NETransposeConvLayer>(); + + l->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info, + invalid_horizontal, invalid_vertical); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + void KernelGenerator::visit(const model::operation::TransposeNode &node) { (void)node; diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.h b/runtimes/neurun/backend/acl_neon/KernelGenerator.h index a823981..429d0fd 100644 --- a/runtimes/neurun/backend/acl_neon/KernelGenerator.h +++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.h @@ -59,6 +59,7 @@ public: void visit(const model::operation::SquaredDifferenceNode &) override; void visit(const model::operation::SubNode &) override; void visit(const model::operation::StridedSliceNode &) override; + void visit(const model::operation::TransposeConvNode &) override; void visit(const model::operation::TransposeNode &) override; void visit(const model::operation::AddNode &) override; void visit(const model::operation::DivNode &) override; diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc index c3d158e..2afc3de 100644 --- a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc +++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc @@ -195,6 +195,8 @@ void ShapeFixer::visit(const model::operation::SubNode &node) } } +void ShapeFixer::visit(const model::operation::TransposeConvNode &) { /* DO NOTHING */} + void ShapeFixer::visit(const model::operation::AddNode &node) { const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)}; diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.h b/runtimes/neurun/backend/acl_neon/ShapeFixer.h index e038d37..60f7a2e 100644 --- a/runtimes/neurun/backend/acl_neon/ShapeFixer.h +++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.h @@ -57,6 +57,7 @@ public: void visit(const model::operation::SQRTNode &) override; void visit(const model::operation::SquaredDifferenceNode &) override; void visit(const model::operation::SubNode &) override; + void visit(const model::operation::TransposeConvNode &) override; void visit(const model::operation::AddNode &) override; void visit(const model::operation::DivNode &) override; void visit(const model::operation::ComparisonNode &) override; diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon index fe4625d..5678c7d 100644 --- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon +++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon @@ -62,7 +62,6 @@ GeneratedTests.reduce_sum_ex* GeneratedTests.topk_v2* # Unexpected result GeneratedTests.split* -GeneratedTests.transpose_conv* GeneratedTests.pack* GeneratedTests.unpack* generatedtests.logical_not_ex* diff --git a/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt b/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt index 2d3dd19..7dda80a 100644 --- a/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt +++ b/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt @@ -12,5 +12,6 @@ reshape softmax sqrt tanh +transpose_conv MODELS/inception_module MODELS/mobilenet -- 2.7.4