namespace arm_compute
{
+namespace
+{
+PadStrideInfo compute_upsample_info(const PadStrideInfo &info, uint32_t deconv_pad_x, uint32_t deconv_pad_y)
+{
+ const unsigned int pad_left = info.pad_left();
+ const unsigned int pad_right = info.pad_right();
+ const unsigned int pad_top = info.pad_top();
+ const unsigned int pad_bottom = info.pad_bottom();
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
+
+ // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape
+ unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0;
+ unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0;
+ deconv_pad_x -= deconv_pad_left + deconv_pad_right;
+ ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0);
+ deconv_pad_left += deconv_pad_x / 2;
+ deconv_pad_right += deconv_pad_x / 2;
+
+ unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0;
+ unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0;
+ deconv_pad_y -= deconv_pad_top + deconv_pad_bottom;
+ ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0);
+ deconv_pad_top += deconv_pad_y / 2;
+ deconv_pad_bottom += deconv_pad_y / 2;
+
+ return PadStrideInfo(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR);
+}
+
+} // namespace
+
NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
_conv_f(),
_upsample_f(),
_flip_weights(),
- _permute_input(),
- _permute_weights(),
- _permute_output(),
_scaled_output(),
_weights_flipped(),
- _permuted_input(),
- _permuted_weights(),
- _permuted_output(),
_flip_axis(),
- _is_nchw(false),
_original_weights(nullptr),
_input(nullptr),
_info(),
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid.");
}
- unsigned int deconv_pad_x = 0;
- unsigned int deconv_pad_y = 0;
+ uint32_t deconv_pad_x = 0;
+ uint32_t deconv_pad_y = 0;
const unsigned int stride_x = info.stride().first;
const unsigned int stride_y = info.stride().second;
const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y);
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(NEDeconvolutionLayer::validate(input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), info));
- const DataLayout data_layout = input->info()->data_layout();
+ const DataLayout data_layout = input->info()->data_layout();
+ const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ auto out_dims = deconvolution_output_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx),
+ weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info);
+
+ const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info());
_input = input;
_original_weights = weights;
_info = info;
_is_prepared = false;
- _is_nchw = data_layout == DataLayout::NCHW;
- _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
-
- const unsigned int pad_left = info.pad_left();
- const unsigned int pad_right = info.pad_right();
- const unsigned int pad_top = info.pad_top();
- const unsigned int pad_bottom = info.pad_bottom();
- const unsigned int stride_x = info.stride().first;
- const unsigned int stride_y = info.stride().second;
- const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- auto out_dims = deconvolution_output_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx),
- weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info);
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
- const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info());
// Output auto initialization if not yet initialized
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info());
_flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
_memory_group.manage(&_scaled_output);
+ _memory_group.manage(&_flip_axis);
- if(!_is_nchw)
- {
- _memory_group.manage(&_permuted_input);
- _memory_group.manage(&_permuted_output);
-
- // Configure the function to transform the input tensor from NHWC -> NCHW
- _permuted_input.info()->set_quantization_info(input->info()->quantization_info());
- _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
- _permuted_input.info()->set_data_layout(DataLayout::NCHW);
-
- // Configure the function to transform the weights tensor from NHWC -> NCHW
- _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info());
- _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
- _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
-
- // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape
- unsigned int deconv_pad_x = 0;
- unsigned int deconv_pad_y = 0;
- const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*_permuted_input.info(), *_permuted_weights.info(), stride_x, stride_y, out_dims,
- deconv_pad_x, deconv_pad_y);
-
- unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0;
- unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0;
- deconv_pad_x -= deconv_pad_left + deconv_pad_right;
- ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0);
- deconv_pad_left += deconv_pad_x / 2;
- deconv_pad_right += deconv_pad_x / 2;
-
- unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0;
- unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0;
- deconv_pad_y -= deconv_pad_top + deconv_pad_bottom;
- ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0);
- deconv_pad_top += deconv_pad_y / 2;
- deconv_pad_bottom += deconv_pad_y / 2;
-
- TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(), _permuted_input.info()->quantization_info());
- scale_out_info.set_data_layout(DataLayout::NCHW);
- _scaled_output.allocator()->init(scale_out_info);
-
- const PadStrideInfo upsample_info(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR);
- _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info);
-
- _weights_flipped.allocator()->init(*_permuted_weights.info()->clone());
- _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info());
- _flip_weights.configure(&_permuted_weights, &_weights_flipped, &_flip_axis);
-
- // setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-
- _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info);
-
- // Configure the function to transform the convoluted output to NHWC
- _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
- _permuted_output.info()->set_data_layout(DataLayout::NCHW);
-
- _permuted_input.allocator()->allocate();
- _permuted_output.allocator()->allocate();
- }
- else
- {
- // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape
- unsigned int deconv_pad_x = 0;
- unsigned int deconv_pad_y = 0;
- const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y,
- out_dims, deconv_pad_x, deconv_pad_y);
-
- unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0;
- unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0;
- deconv_pad_x -= deconv_pad_left + deconv_pad_right;
- ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0);
- deconv_pad_left += deconv_pad_x / 2;
- deconv_pad_right += deconv_pad_x / 2;
-
- unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0;
- unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0;
- deconv_pad_y -= deconv_pad_top + deconv_pad_bottom;
- ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0);
- deconv_pad_top += deconv_pad_y / 2;
- deconv_pad_bottom += deconv_pad_y / 2;
-
- TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info());
- scale_out_info.set_data_layout(data_layout);
- _scaled_output.allocator()->init(scale_out_info);
-
- const PadStrideInfo upsample_info(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR);
- _upsample_f.configure(input, &_scaled_output, upsample_info);
-
- _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
- _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
-
- // setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
- }
- _scaled_output.allocator()->allocate();
+ _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
+ _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
+
+ // setup the function to convolve the upscaled output
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+ uint32_t deconv_pad_x = 0;
+ uint32_t deconv_pad_y = 0;
+
+ const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(),
+ stride_x, stride_y,
+ out_dims, deconv_pad_x, deconv_pad_y);
+
+ const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y);
+
+ TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info());
+ scale_out_info.set_data_layout(data_layout);
+ _scaled_output.allocator()->init(scale_out_info);
+
+ _upsample_f.configure(input, &_scaled_output, upsample_info);
+
+ _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
// Setup flip axis data
_flip_axis.allocator()->allocate();
auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
- axis_data[0] = 0;
- axis_data[1] = 1;
+ axis_data[0] = static_cast<uint32_t>(width_idx);
+ axis_data[1] = static_cast<uint32_t>(height_idx);
+
+ _scaled_output.allocator()->allocate();
}
void NEDeconvolutionLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
- // Permute input
- if(!_is_nchw)
- {
- _permute_input.run();
- }
-
_upsample_f.run();
_conv_f.run();
-
- // Permute output
- if(!_is_nchw)
- {
- _permute_output.run();
- }
}
void NEDeconvolutionLayer::prepare()
if(!_is_prepared)
{
ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
- // Permute weights
- if(!_is_nchw)
- {
- // Manually manage _permuted_weights
- _permuted_weights.allocator()->allocate();
- _permute_weights.run();
- }
// Run weights flipping and mark original weights tensor as unused
_weights_flipped.allocator()->allocate();
// Prepare convolution
_conv_f.prepare();
- // Unused weights are already released in _conv_f
-
- if(!_is_nchw)
- {
- // Manually manage _permuted_weights
- // Free _permuted_weights as it not used after this method (prepare)
- _permuted_weights.allocator()->free();
- }
-
_is_prepared = true;
}
}
const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1)
* framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 });
+const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape{ 10U, 10U, 1U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
+ 2)
+ *framework::dataset::make("PadLeft", 3)
+ *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
+
+const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
+ 2)
+ *framework::dataset::make("PadLeft", 3)
+ *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
+
const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2)
* framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 });
template <typename T>
using NEDeconvolutionLayerAsymmFixture3x3 = DeconvolutionValidationAsymmFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 3, 3>;
+template <typename T>
+using NEDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 9, 9>;
+
template <typename T>
using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
validate(Accessor(_target), _reference, tolerance_fp32);
}
TEST_SUITE_END() // W1x1
+TEST_SUITE(W9x9)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerAsymmFixture9x9<float>, framework::DatasetMode::ALL, combine(combine(combine(data9x9_small_asymm, framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("AddBias", { false })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerAsymmFixture9x9<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data9x9_large_asymm, framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("AddBias", { false })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END() // W9x9
TEST_SUITE_END() // FP32
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC