--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Upsampling layer kernel for transpose convolution on OpenCL.
+ */
+class CLTransposeConvLayerUpsampleKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLTransposeConvLayerUpsampleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayerUpsampleKernel(const CLTransposeConvLayerUpsampleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayerUpsampleKernel &
+ operator=(const CLTransposeConvLayerUpsampleKernel &) = delete;
+ /** Default Move Constructor. */
+ CLTransposeConvLayerUpsampleKernel(CLTransposeConvLayerUpsampleKernel &&) = default;
+ /** Default move assignment operator */
+ CLTransposeConvLayerUpsampleKernel &operator=(CLTransposeConvLayerUpsampleKernel &&) = default;
+ /** Default destructor */
+ ~CLTransposeConvLayerUpsampleKernel() = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: same as @p input. All but
+ * the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only
+ * performed within the XY-plane.
+ * @param[in] inner_border Top and right inner border sizes. These rows and columns will be
+ * filled with zero.
+ * @param[in] info Contains padding and stride information described in @ref
+ * PadStrideInfo.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+ const PadStrideInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLTransposeConvLayerUpsample
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] output Destination tensor info. Data types supported: same as @p input. All
+ * but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is
+ * only performed within the XY-plane.
+ * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled
+ * with zero.
+ * @param[in] info Contains padding and stride information described in @ref
+ * PadStrideInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const BorderSize &inner_border, const PadStrideInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ BorderSize _inner_border;
+ PadStrideInfo _info;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ */
#include <utility>
+#include "arm_compute/core/Types.h"
+
namespace arm_compute
{
-/** Returns expected width and height of the deconvolution's output tensor.
+/** Returns expected width and height of the transpose convolution's output tensor.
*
* @note This function was copied in order to fix a bug computing to wrong output dimensions.
- * The formula for computing the output dimension is: o = s*(i - 1) + a + k - 2*p
- * k: kernel size
- * s: stride
- * i: input size
- * o: output size
- * p: padding
- * a: inner border
- * Refer to : https://github.com/ARM-software/ComputeLibrary/issues/523#issuecomment-414606797
*
* @param[in] in_width Width of input tensor (Number of columns)
* @param[in] in_height Height of input tensor (Number of rows)
* @param[in] kernel_width Kernel width.
* @param[in] kernel_height Kernel height.
- * @param[in] padx X axis padding.
- * @param[in] pady Y axis padding.
- * @param[in] stride_x X axis input stride.
- * @param[in] stride_y Y axis input stride.
- * @param[in] inner_border_right The number of zeros added to right edge of the input.
- * @param[in] inner_border_top The number of zeros added to top edge of the input.
+ * @param[in] info padding and stride info.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_top The number of zeros added to bottom edge of the output.
*
* @return A pair with the new width in the first position and the new height in the second.
*/
-const std::pair<unsigned int, unsigned int> deconvolution_output_dimensions_ex(
- unsigned int in_width, unsigned int in_height, unsigned int kernel_width,
- unsigned int kernel_height, unsigned int padx, unsigned int pady, unsigned int stride_x,
- unsigned int stride_y, unsigned int inner_border_right = 0, unsigned int inner_border_top = 0);
+const std::pair<unsigned int, unsigned int>
+transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height,
+ unsigned int kernel_width, unsigned int kernel_height,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_top);
}
#endif /*__ARM_COMPUTE_UTILSEX_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__
+#define __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Utils.h"
+
+#include "arm_compute/core/utils/helpers/tensor_transform.h"
+
+#include <cmath>
+
+namespace arm_compute
+{
+namespace misc
+{
+namespace shape_calculator
+{
+
+/** Calculate the upsampled output shape used for transpose convolution
+ *
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor shape
+ * @param[in] info Padding and stride info
+ * @param[in] out_dims Output shape dimensions
+ * @param[in] pad_left Padding on left
+ * @param[in] pad_right Padding on right
+ * @param[in] pad_top Padding on top
+ * @param[in] pad_bottom Padding on bottom
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_transposeconv_upsampled_shape(
+ const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info,
+ std::pair<unsigned int, unsigned int> &out_dims, unsigned int &pad_left,
+ unsigned int &pad_right, unsigned int &pad_top, unsigned int &pad_bottom)
+{
+ unsigned int sx = info.stride().first;
+ unsigned int sy = info.stride().second;
+ const DataLayout data_layout = input.data_layout();
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ // Find the upsampled dimensions
+ // transpose conv out:
+ // tconv_out + pad = 1 + (in - 1) * stride + invalid
+ // tconv_out = 1 + (in - 1) * stride + invalid - pad
+ // upsample out:
+ // upsample_out = 1 + (in - 1) * stride
+ unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1;
+ unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1;
+
+ // Find the padding needed for the convolution with stride 1 in order to match output shape
+ // upsample+pad out:
+ // upsample_out + pad = tconv_out + kernel - 1
+ // pad = tconv_out + kernel - 1 - upsample_out
+ unsigned int padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1);
+ unsigned int pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1);
+ out_x += padx;
+ out_y += pady;
+
+ unsigned int padx_all = padx + info.pad_left() + info.pad_right();
+ unsigned int pady_all = pady + info.pad_top() + info.pad_bottom();
+ pad_left = (padx_all + 1) / 2 - info.pad_left();
+ pad_right = padx_all / 2 - info.pad_right();
+ pad_top = (pady_all + 1) / 2 - info.pad_top();
+ pad_bottom = pady_all / 2 - info.pad_bottom();
+
+ TensorShape scale_out_shape(input.tensor_shape());
+ scale_out_shape.set(idx_w, out_x);
+ scale_out_shape.set(idx_h, out_y);
+
+ return scale_out_shape;
+}
+
+/** Calculate the output shape of the transpose convolution layer
+ *
+ * @param[in] out_dims Output x and y shape dimensions
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor shape
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_transposeconv_output_shape(const std::pair<unsigned int, unsigned int> &out_dims,
+ const ITensorInfo &input, const ITensorInfo &weights)
+{
+ const TensorShape input_shape{input.tensor_shape()};
+ const TensorShape weights_shape{weights.tensor_shape()};
+
+ const DataLayout data_layout = input.data_layout();
+ const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int channel_idx =
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
+
+ TensorShape out_shape{input_shape};
+ out_shape.set(width_idx, out_dims.first);
+ out_shape.set(height_idx, out_dims.second);
+ out_shape.set(channel_idx, weights_shape[batch_idx]);
+ return out_shape;
+}
+
+} // namespace shape_calculator
+} // namespace misc
+} // namespace arm_compute
+
+#endif // __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__
#include <arm_compute/runtime/CL/functions/CLBatchToSpaceND.h>
#include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h>
#include <arm_compute/runtime/CL/functions/CLCast.h>
-#include <arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLDepthToSpace.h>
#include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h>
#include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h>
#include <arm_compute/runtime/CL/functions/CLSplit.h>
#include <arm_compute/runtime/CL/functions/CLStridedSliceEx.h>
#include <arm_compute/runtime/CL/functions/CLTopKV2.h>
+#include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h>
#endif // __ARM_COMPUTE_CLFUNCTIONSEX_H__
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYEREX_H__
-#define __ARM_COMPUTE_CLDECONVOLUTIONLAYEREX_H__
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
namespace arm_compute
{
class ICLTensor;
-/** Function to run the deconvolution layer.
+/** Function to run the transpose convolution layer.
*
* @note This layer was copied in order to fix a bug computing to wrong output dimensions.
*
- * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
+ * TransposeConv Layer is the backward pass of Convolution Layer. First we transform the input
* depending on the stride and pad info and then perform a 1x1
* convolution pass. Input stride defines how many zeroes we should put between each element of the
* input, pad is the amount of padding and finally a is a user
*
* The relation between input to output is as follows:
* \f[
- * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ * width\_output = (width\_input - 1) \cdot stride\_x - \cdot padding\_x + kernel\_x
* \f]
* \f[
- * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ * height\_output = (height\_input - 1) \cdot stride\_y - \cdot padding\_y + kernel\_y
* \f]
*
* where:
*
* This function calls the following OpenCL kernels/functions:
*
- * -# @ref CLDeconvolutionLayerUpsample
+ * -# @ref CLTransposeConvLayerUpsample
* -# @ref CLConvolutionLayer
*
*/
-class CLDeconvolutionLayerEx : public IFunction
+class CLTransposeConvLayer : public IFunction
{
public:
/** Constructor */
- CLDeconvolutionLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDeconvolutionLayerEx(const CLDeconvolutionLayerEx &) = delete;
+ CLTransposeConvLayer(const CLTransposeConvLayer &) = delete;
/** Default move constructor */
- CLDeconvolutionLayerEx(CLDeconvolutionLayerEx &&) = default;
+ CLTransposeConvLayer(CLTransposeConvLayer &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDeconvolutionLayerEx &operator=(const CLDeconvolutionLayerEx &) = delete;
+ CLTransposeConvLayer &operator=(const CLTransposeConvLayer &) = delete;
/** Default move assignment operator */
- CLDeconvolutionLayerEx &operator=(CLDeconvolutionLayerEx &&) = default;
+ CLTransposeConvLayer &operator=(CLTransposeConvLayer &&) = default;
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
- * and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
- * Data type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type
- * supported: Same as @p input.
- * @param[out] output Output tensor. The output has the same number of dimensions
- * as the @p input.
- * @param[in] info Contains padding and policies to be used in the
- * deconvolution, this is decribed in @ref PadStrideInfo.
- * @param[in] inner_border_right The number of zeros added to right edge of the input.
- * @param[in] inner_border_top The number of zeros added to top edge of the input.
- * @param[in] weights_info (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
- *
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported:
+ * Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] info Contains padding and policies to be used in the
+ * deconvolution, this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the input.
+ * @param[in] invalid_bottom The number of zeros added to top edge of the input.
+ * @param[in] weights_info (Optional) Weights information needed for @ref
+ * CLConvolutionLayer, specifies if the weights tensor has been
+ * reshaped with @ref CLWeightsReshapeKernel.
*/
void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
- const PadStrideInfo &info, unsigned int inner_border_right,
- unsigned int inner_border_top, const WeightsInfo &weights_info = WeightsInfo());
+ const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref
- * CLDeconvolutionLayerEx
- *
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
- * and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8/F16/F32.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
- * Data type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported:
- * Same as @p input.
- * @param[in] output Output tensor info. The output has the same number of dimensions
- * as the @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution,
- * this is decribed in @ref PadStrideInfo.
- * @param[in] inner_border_right The number of zeros added to right edge of the input.
- * @param[in] inner_border_top The number of zeros added to top edge of the input.
- * @param[in] weights_info (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
+ * CLTransposeConvLayer
*
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported:
+ * Same as @p input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] innvalid_right The number of zeros added to right edge of the input.
+ * @param[in] invalid_bottom The number of zeros added to top edge of the input.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref
+ * CLWeightsReshapeKernel.
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
- unsigned int inner_border_right, unsigned int inner_border_top,
+ unsigned int innvalid_right, unsigned int invalid_bottom,
const WeightsInfo &weights_info = WeightsInfo());
// Inherited methods overridden:
private:
CLMemoryGroup _memory_group;
- CLDeconvolutionLayerUpsample _scale_f;
+ CLTransposeConvLayerUpsample _scale_f;
CLConvolutionLayer _conv_f;
CPPFlipWeightsKernel _flip_weights;
CLTensor _scaled_output;
bool _is_prepared;
};
}
-#endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYEREX_H__ */
+#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLTransposeConvLayerUpsampleKernel */
+class CLTransposeConvLayerUpsample : public IFunction
+{
+public:
+ /** Default constructor */
+ CLTransposeConvLayerUpsample();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayerUpsample(const CLTransposeConvLayerUpsample &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayerUpsample &operator=(const CLTransposeConvLayerUpsample &) = delete;
+ /** Allow instances of this class to be moved */
+ CLTransposeConvLayerUpsample(CLTransposeConvLayerUpsample &&) = default;
+ /** Allow instances of this class to be moved */
+ CLTransposeConvLayerUpsample &operator=(CLTransposeConvLayerUpsample &&) = default;
+ /** Default destructor */
+ virtual ~CLTransposeConvLayerUpsample() = default;
+
+ /** Initialize the function's source, destination, interpolation type and border_mode.
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] inner_border The number of zeros added to right and top edges of the input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+ const PadStrideInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLTransposeConvLayerUpsample
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input.
+ * @param[in] inner_border The number of zeros added to right and top edges of the input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const BorderSize &inner_border, const PadStrideInfo &info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLTransposeConvLayerUpsampleKernel _upsample;
+ ICLTensor *_output;
+};
+}
+#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLValidate.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+CLTransposeConvLayerUpsampleKernel::CLTransposeConvLayerUpsampleKernel()
+ : _input(nullptr), _output(nullptr), _inner_border(), _info()
+{
+}
+
+Status CLTransposeConvLayerUpsampleKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const BorderSize &inner_border,
+ const PadStrideInfo &info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
+ DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
+
+ const DataLayout data_layout = input->data_layout();
+
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_w) == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_h) == 0);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(idx_c));
+ for (size_t i = 3; i < Coordinates::num_max_dimensions; ++i)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
+ }
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.right > info.stride().first - 1,
+ "inner_border_right must be smaller that stride_x");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.top > info.stride().second - 1,
+ "inner_border_top must be smaller that stride_y");
+
+ return Status{};
+}
+
+void CLTransposeConvLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output,
+ const BorderSize &inner_border,
+ const PadStrideInfo &info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ _input = input;
+ _output = output;
+ _inner_border = inner_border;
+ _info = info;
+
+ // Perform validation step
+ ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayerUpsampleKernel::validate(
+ input->info(), output->info(), inner_border, info));
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options()));
+
+ constexpr unsigned int num_elems_processed_per_iteration = 1;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+ AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+ ICLKernel::configure_internal(win);
+}
+
+void CLTransposeConvLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ const DataLayout data_layout = _input->info()->data_layout();
+
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ const int out_start_x = _info.pad_left();
+ const int out_end_x = _output->info()->dimension(idx_w) - _inner_border.right -
+ _info.pad_right() + _info.stride().first - 1;
+ const int out_step_x = _info.stride().first;
+
+ const int out_start_y = _inner_border.top + _info.pad_top();
+ const int out_end_y =
+ _output->info()->dimension(idx_h) - _info.pad_bottom() + _info.stride().second - 1;
+ const int out_step_y = _info.stride().second;
+
+ switch (data_layout)
+ {
+ case DataLayout::NCHW:
+ {
+ Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+
+ Window slice_out = collapsed.first_slice_window_3D();
+ slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x));
+ slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y));
+
+ Window slice_in = collapsed.first_slice_window_3D();
+
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, _input, slice_in);
+ add_3D_tensor_argument(idx, _output, slice_out);
+ enqueue(queue, *this, slice_out);
+ } while (collapsed.slide_window_slice_3D(slice_in) &&
+ collapsed.slide_window_slice_3D(slice_out));
+ break;
+ }
+ case DataLayout::NHWC:
+ {
+ // NOTE: not collapsing in NHWC
+ Window slice_out = window.first_slice_window_3D();
+ slice_out.set(Window::DimY, Window::Dimension(out_start_x, out_end_x, out_step_x));
+ slice_out.set(Window::DimZ, Window::Dimension(out_start_y, out_end_y, out_step_y));
+
+ Window slice_in = window.first_slice_window_3D();
+
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, _input, slice_in);
+ add_3D_tensor_argument(idx, _output, slice_out);
+ enqueue(queue, *this, slice_out);
+ } while (window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out));
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Unsupported data layout");
+ }
+}
using namespace arm_compute;
-const std::pair<unsigned int, unsigned int> arm_compute::deconvolution_output_dimensions_ex(
- unsigned int in_width, unsigned int in_height, unsigned int kernel_width,
- unsigned int kernel_height, unsigned int padx, unsigned int pady, unsigned int stride_x,
- unsigned int stride_y, unsigned int inner_border_right, unsigned int inner_border_top)
+const std::pair<unsigned int, unsigned int>
+arm_compute::transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height,
+ unsigned int kernel_width, unsigned int kernel_height,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom)
{
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
+ const unsigned int padx = info.pad_left() + info.pad_right();
+ const unsigned int pady = info.pad_top() + info.pad_bottom();
+
ARM_COMPUTE_ERROR_ON(in_width < 1 || in_height < 1);
- ARM_COMPUTE_ERROR_ON(((in_width - 1) * stride_x + kernel_width) < 2 * padx);
- ARM_COMPUTE_ERROR_ON(((in_height - 1) * stride_y + kernel_height) < 2 * pady);
- const int w = stride_x * (in_width - 1) + kernel_width - 2 * padx + inner_border_right;
- const int h = stride_y * (in_height - 1) + kernel_height - 2 * pady + inner_border_top;
+ ARM_COMPUTE_ERROR_ON(kernel_width <= padx);
+ ARM_COMPUTE_ERROR_ON(kernel_height <= pady);
+
+ // Find the transpose conv out dimensions
+ // transpose conv out:
+ // tconv_out + pad = 1 + (in - 1) * stride + invalid
+ // tconv_out = 1 + (in - 1) * stride + invalid - pad
+ const int w = stride_x * (in_width - 1) + kernel_width - padx + invalid_right;
+ const int h = stride_y * (in_height - 1) + kernel_height - pady + invalid_bottom;
return std::make_pair<unsigned int, unsigned int>(w, h);
}
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h"
+#include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h"
+#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
using namespace arm_compute;
using namespace arm_compute::misc::shape_calculator;
-CLDeconvolutionLayerEx::CLDeconvolutionLayerEx(
- std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
_scale_f(),
_conv_f(),
{
}
-Status CLDeconvolutionLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights,
- const ITensorInfo *bias, ITensorInfo *output,
- const PadStrideInfo &info, unsigned int inner_border_right,
- unsigned int inner_border_top,
- const WeightsInfo &weights_info)
+Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, ITensorInfo *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h));
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
- ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric());
- const unsigned int stride_x = info.stride().first;
- const unsigned int stride_y = info.stride().second;
+ const unsigned int kernel_x = weights->dimension(idx_w);
+ const unsigned int kernel_y = weights->dimension(idx_h);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_right > stride_x - 1,
- "inner_border_right must be smaller than stride_x");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_top > stride_y - 1,
- "inner_border_top must be smaller than stride_y");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_right > kernel_x - 1,
+ "invalid_right must be smaller than kernel_x");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_bottom > kernel_y - 1,
+ "inner_border_top must be smaller than kernel_y");
- // NOTE From the existing CLDeconvolutionLayer, inner_border_right and inner_border_top were
- // added.
- auto out_dims = deconvolution_output_dimensions_ex(
+ // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were added.
+ auto out_dims = transposeconv_output_dimensions(
input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
- weights->dimension(idx_h), info.pad().first, info.pad().second, stride_x, stride_y,
- inner_border_right, inner_border_top);
+ weights->dimension(idx_h), info, invalid_right, invalid_bottom);
- const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input, *weights);
+ const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c],
"Output's depth is invalid.");
- unsigned int padx = 0;
- unsigned int pady = 0;
- const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(
- *input, *weights, stride_x, stride_y, inner_border_right, inner_border_top, out_dims, padx,
- pady);
+ unsigned int pad_left = 0;
+ unsigned int pad_right = 0;
+ unsigned int pad_top = 0;
+ unsigned int pad_bottom = 0;
+ const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+ *input, *weights, info, out_dims, pad_left, pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(input->clone()
->set_is_resizable(true)
.reset_padding()
.set_data_layout(data_layout));
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
- ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(
- input, &scale_out_info, BorderSize(inner_border_right, inner_border_top), info));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLTransposeConvLayerUpsample::validate(input, &scale_out_info, BorderSize(0, 0), info));
ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
conv_info, weights_info));
return Status{};
}
-void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias,
- ICLTensor *output, const PadStrideInfo &info,
- unsigned int inner_border_right,
- unsigned int inner_border_top,
- const WeightsInfo &weights_info)
+void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias,
+ ICLTensor *output, const PadStrideInfo &info,
+ unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
_weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
_flip_weights.configure(weights, &_weights_flipped);
- // NOTE From the existing CLDeconvolutionLayer, inner_border_right and inner_border_top were
+ // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were
// added.
- auto out_dims = deconvolution_output_dimensions_ex(
+ auto out_dims = transposeconv_output_dimensions(
input->info()->dimension(idx_w), input->info()->dimension(idx_h),
- weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info.pad().first,
- info.pad().second, stride_x, stride_y, inner_border_right, inner_border_top);
+ weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
+ invalid_bottom);
const TensorShape output_shape =
- compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info());
+ compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
// Output auto initialization if not yet initialized
auto_init_if_empty(
input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
// Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerEx::validate(
+ ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayer::validate(
input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
- info, inner_border_right, inner_border_top));
+ info, invalid_right, invalid_bottom));
_is_prepared = weights_info.retain_internal_weights();
// Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
// to match output shape
- unsigned int padx = 0;
- unsigned int pady = 0;
- const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(
- *input->info(), *weights->info(), stride_x, stride_y, inner_border_right, inner_border_top,
- out_dims, padx, pady);
+ unsigned int pad_left = 0;
+ unsigned int pad_right = 0;
+ unsigned int pad_top = 0;
+ unsigned int pad_bottom = 0;
+ const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+ *input->info(), *weights->info(), info, out_dims, pad_left, pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
input->info()->quantization_info());
_scaled_output.allocator()->init(scale_out_info);
// configure scale function
- const PadStrideInfo upsample_info(stride_x, stride_y, padx / 2, pady / 2);
- _scale_f.configure(input, &_scaled_output, BorderSize(inner_border_top, inner_border_right),
- upsample_info);
+ const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
+ DimensionRoundingType::FLOOR);
+ _scale_f.configure(input, &_scaled_output, BorderSize(0, 0), upsample_info);
// setup the function to convolve the upscaled output
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
_scaled_output.allocator()->allocate();
}
-void CLDeconvolutionLayerEx::run()
+void CLTransposeConvLayer::run()
{
prepare();
_memory_group.release();
}
-void CLDeconvolutionLayerEx::prepare()
+void CLTransposeConvLayer::prepare()
{
if (!_is_prepared)
{
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include <cmath>
+#include <memory>
+#include <tuple>
+
+using namespace arm_compute;
+
+CLTransposeConvLayerUpsample::CLTransposeConvLayerUpsample() // NOLINT
+ : _upsample(),
+ _output(nullptr)
+{
+}
+
+Status CLTransposeConvLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const BorderSize &inner_border,
+ const PadStrideInfo &info)
+{
+ return CLTransposeConvLayerUpsampleKernel::validate(input, output, inner_border, info);
+}
+
+void CLTransposeConvLayerUpsample::configure(ICLTensor *input, ICLTensor *output,
+ const BorderSize &inner_border,
+ const PadStrideInfo &info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ _output = output;
+ _upsample.configure(input, _output, inner_border, info);
+}
+
+void CLTransposeConvLayerUpsample::run()
+{
+ _output->map(CLScheduler::get().queue(), true);
+ if (is_data_type_quantized_asymmetric(_output->info()->data_type()))
+ {
+ const uint8_t quantized_zero = _output->info()->quantization_info().offset;
+ std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
+ }
+ else
+ {
+ memset(_output->buffer(), 0, _output->info()->total_size());
+ }
+ _output->unmap(CLScheduler::get().queue());
+
+ CLScheduler::get().enqueue(_upsample, false);
+}
model::ExplicitPadding padding;
model::Stride stride;
+ uint32_t invalid_horizontal;
+ uint32_t invalid_vertical;
};
Param param;
(node.param().padding.type == model::PaddingType::VALID));
param.padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape,
param.stride, ker_shape.W, ker_shape.H);
+ if (node.param().padding.type == model::PaddingType::VALID)
+ {
+ param.invalid_horizontal =
+ ofm_shape.W - (1 + (ifm_shape.W - 1) * param.stride.horizontal) - (ker_shape.W - 1);
+ param.invalid_vertical =
+ ofm_shape.H - (1 + (ifm_shape.H - 1) * param.stride.vertical) - (ker_shape.H - 1);
+ }
+ else
+ {
+ param.invalid_horizontal = 0;
+ param.invalid_vertical = 0;
+ }
auto tensors = _tensor_builder;
auto ifm_alloc = tensors->at(param.ifm_index).get();
auto ker_alloc = tensors->at(param.ker_index).get();
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLDeconvolutionLayerEx>();
+ const auto tconv_info = acl_common::asPadStrideInfo(param.padding, param.stride);
- auto padding = param.padding;
- auto inner_border_right = padding.right - padding.left;
- auto inner_border_top = padding.bottom - padding.top;
+ std::unique_ptr<::arm_compute::IFunction> fn;
- padding.left = padding.right;
- padding.top = padding.bottom;
- auto symmetric_tconv_info =
- ::neurun::backend::acl_common::asPadStrideInfo(padding, param.stride);
+ auto l = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>();
- l->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(),
- symmetric_tconv_info, inner_border_right, inner_border_top);
+ l->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
+ param.invalid_vertical, param.invalid_horizontal);
fn = std::move(l);
int ker_index;
Padding padding;
Stride stride;
+ uint32_t invalid_horizontal;
+ uint32_t invalid_vertical;
};
Param param;
? same_padding(ofm_shape, ifm_shape, param.stride, ker_shape.W, ker_shape.H)
: valid_padding();
+ param.invalid_horizontal =
+ (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? 0
+ : ofm_shape.W - (1 + (ifm_shape.W - 1) * hstride) - (ker_shape.W - 1);
+ param.invalid_vertical =
+ (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? 0
+ : ofm_shape.H - (1 + (ifm_shape.H - 1) * param.stride.vertical) - (ker_shape.H - 1);
+
auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDeconvolutionLayerEx>();
-
- auto padding = param.padding;
- auto inner_border_right = padding.right - padding.left;
- auto inner_border_top = padding.bottom - padding.top;
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>();
- padding.left = padding.right;
- padding.top = padding.bottom;
- auto symmetric_tconv_info = asPadStrideInfo(padding, param.stride);
+ auto symmetric_tconv_info = asPadStrideInfo(param.padding, param.stride);
// TODO Support WeightInfo in some cases in order to performance improvement
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), nullptr, CAST_CL(ofm_alloc),
- symmetric_tconv_info, inner_border_right, inner_border_top);
+ symmetric_tconv_info, param.invalid_horizontal, param.invalid_vertical);
builder.append("TransposeConv", std::move(fn));
}
else
MODELFILE_NAME="transpose_conv_test.tflite"
-STATUS="disabled"
GeneratedTests.prelu_ex_broadcast_quant8_1
# Unexpected result
GeneratedTests.pack*
-GeneratedTests.transpose_conv_ex_float_4
# Not support broadcast
GeneratedTests.logical_or_ex_broadcast_4D_2D
# Unsupported optional input that has shape