Imported Upstream version 1.8.0
[platform/core/ml/nnfw.git] / compute / ARMComputeEx / src / runtime / NEON / functions / NETransposeConvLayer.cpp
index df06892..09f1780 100644 (file)
@@ -1,21 +1,5 @@
 /*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-
 #include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h"
 
 #include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
 #include "arm_compute/core/UtilsEx.h"
 #include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 
@@ -52,20 +33,15 @@ using namespace arm_compute::misc::shape_calculator;
 
 namespace arm_compute
 {
+
 NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
     : _memory_group(std::move(memory_manager)),
       _conv_f(),
       _upsample_f(),
       _flip_weights(),
-      _permute_input(),
-      _permute_weights(),
-      _permute_output(),
       _scaled_output(),
       _weights_flipped(),
-      _permuted_input(),
-      _permuted_weights(),
-      _permuted_output(),
-      _is_nchw(false),
+      _flip_axis(),
       _original_weights(nullptr),
       _input(nullptr),
       _info(),
@@ -80,7 +56,7 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
 {
   ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
   ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16,
-                                                       DataType::QASYMM8);
+                                                       DataType::QASYMM8, DataType::QASYMM8_SIGNED);
   ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
   ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
   const unsigned int width_idx =
@@ -95,13 +71,16 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
       weights->dimension(height_idx), info, invalid_right, invalid_bottom);
 
   ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
-  if (is_data_type_quantized_asymmetric(input->data_type()) && bias)
+  if (bias != nullptr)
   {
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
-  }
-  else if (bias)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+    if (is_data_type_quantized_asymmetric(input->data_type()))
+    {
+      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+    }
+    else
+    {
+      ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+    }
   }
 
   if (output->tensor_shape().total_size() > 0)
@@ -110,12 +89,12 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
 
     const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
 
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) < output_shape.x(),
-                                    "Output's dim 0 is invalid.");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) < output_shape.y(),
-                                    "Output's dim 1 is invalid.");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) < output_shape.z(),
-                                    "Output's dim 2 is invalid.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(),
+                                    "Output's width is invalid.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(),
+                                    "Output's height is invalid.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(),
+                                    "Output's depth is invalid.");
   }
 
   unsigned int pad_left = 0;
@@ -127,7 +106,6 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
       pad_bottom);
   TensorInfo scale_out_info(
       input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
-  scale_out_info.set_data_layout(input->data_layout());
   const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
 
   const unsigned int batches_idx =
@@ -149,19 +127,13 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con
                                      ITensor *output, const PadStrideInfo &info,
                                      unsigned int invalid_right, unsigned int invalid_bottom)
 {
+  // Perform validation step
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+  ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
+      input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
+      info, invalid_right, invalid_bottom));
 
   const DataLayout data_layout = input->info()->data_layout();
-
-  _input = input;
-  _original_weights = weights;
-  _info = info;
-  _is_prepared = false;
-  _is_nchw = data_layout == DataLayout::NCHW;
-
-  const unsigned int stride_x = info.stride().first;
-  const unsigned int stride_y = info.stride().second;
-
   const unsigned int width_idx =
       get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
   const unsigned int height_idx =
@@ -173,101 +145,54 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con
 
   const TensorShape output_shape =
       compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+
+  _input = input;
+  _original_weights = weights;
+  _info = info;
+  _is_prepared = false;
+
+  unsigned int pad_left = 0;
+  unsigned int pad_right = 0;
+  unsigned int pad_top = 0;
+  unsigned int pad_bottom = 0;
+  const unsigned int stride_x = info.stride().first;
+  const unsigned int stride_y = info.stride().second;
+
   // Output auto initialization if not yet initialized
   auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(),
                      input->info()->quantization_info());
 
-  // Perform validation step
-  ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
-      input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
-      info, invalid_right, invalid_bottom));
-
+  _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
   _memory_group.manage(&_scaled_output);
 
-  if (!_is_nchw)
-  {
-    _memory_group.manage(&_permuted_input);
-    _memory_group.manage(&_permuted_weights);
-    _memory_group.manage(&_permuted_output);
-
-    // Configure the function to transform the input tensor from NHWC -> NCHW
-    _permuted_input.info()->set_quantization_info(input->info()->quantization_info());
-    _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
-    _permuted_input.info()->set_data_layout(DataLayout::NCHW);
-
-    // Configure the function to transform the weights tensor from NHWC -> NCHW
-    _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info());
-    _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
-    _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
-
-    // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in
-    // order to match output shape
-
-    unsigned int pad_left = 0;
-    unsigned int pad_right = 0;
-    unsigned int pad_top = 0;
-    unsigned int pad_bottom = 0;
-    const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
-        *_permuted_input.info(), *_permuted_weights.info(), info, out_dims, invalid_right,
-        invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
-
-    TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(),
-                              _permuted_input.info()->quantization_info());
-    scale_out_info.set_data_layout(DataLayout::NCHW);
-    _scaled_output.allocator()->init(scale_out_info);
-
-    const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
-                                      DimensionRoundingType::CEIL);
-    _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info);
-
-    _weights_flipped.allocator()->init(*_permuted_weights.info()->clone());
-    _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info());
-    _flip_weights.configure(&_permuted_weights, &_weights_flipped);
-
-    // setup the function to convolve the upscaled output
-    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-
-    const auto out_shape = output->info()->tensor_shape();
-    TensorShape permuted_out_shape{out_shape[1], out_shape[2], out_shape[0], out_shape[3]};
-    TensorInfo permuted_out_info(permuted_out_shape, 1, output->info()->data_type(),
-                                 output->info()->quantization_info());
-    _permuted_output.allocator()->init(permuted_out_info);
-    _permuted_output.info()->set_data_layout(DataLayout::NCHW);
-    _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info);
-
-    // Configure the function to transform the convoluted output to NHWC
-    _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
-
-    _permuted_input.allocator()->allocate();
-    _permuted_weights.allocator()->allocate();
-    _permuted_output.allocator()->allocate();
-  }
-  else
-  {
-    // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in
-    // order to match output shape
-    unsigned int pad_left = 0;
-    unsigned int pad_right = 0;
-    unsigned int pad_top = 0;
-    unsigned int pad_bottom = 0;
-    const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
-        *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
-        pad_right, pad_top, pad_bottom);
-
-    TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
-                              input->info()->quantization_info());
-    _scaled_output.allocator()->init(scale_out_info);
-    const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
-                                      DimensionRoundingType::FLOOR);
-    _upsample_f.configure(input, &_scaled_output, upsample_info);
-
-    _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
-    _flip_weights.configure(weights, &_weights_flipped);
-
-    // setup the function to convolve the upscaled output
-    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-    _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
-  }
+  _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
+  _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
+
+  // setup the function to convolve the upscaled output
+  const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+      *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+      pad_right, pad_top, pad_bottom);
+
+  const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
+                                    DimensionRoundingType::FLOOR);
+
+  TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
+                            input->info()->quantization_info());
+  scale_out_info.set_data_layout(data_layout);
+  _scaled_output.allocator()->init(scale_out_info);
+
+  _upsample_f.configure(input, &_scaled_output, upsample_info);
+
+  _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
+
+  // Setup flip axis data
+  _flip_axis.allocator()->allocate();
+  auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
+  axis_data[0] = static_cast<uint32_t>(width_idx);
+  axis_data[1] = static_cast<uint32_t>(height_idx);
+
   _scaled_output.allocator()->allocate();
 }
 
@@ -275,22 +200,10 @@ void NETransposeConvLayer::run()
 {
   prepare();
 
-  // MemoryGroupResourceScope scope_mg(_memory_group);
-
-  // Permute input
-  if (!_is_nchw)
-  {
-    _permute_input.run();
-  }
+  MemoryGroupResourceScope scope_mg(_memory_group);
 
   _upsample_f.run();
   _conv_f.run();
-
-  // Permute output
-  if (!_is_nchw)
-  {
-    _permute_output.run();
-  }
 }
 
 void NETransposeConvLayer::prepare()
@@ -301,22 +214,12 @@ void NETransposeConvLayer::prepare()
 
     // Run weights flipping and mark original weights tensor as unused
     _weights_flipped.allocator()->allocate();
-    // Permute weights
-    if (!_is_nchw)
-    {
-      _permute_weights.run();
-    }
-    NEScheduler::get().schedule(&_flip_weights, Window::DimZ);
+    _flip_weights.run();
     _original_weights->mark_as_unused();
 
     // Prepare convolution
     _conv_f.prepare();
 
-    if (!_weights_flipped.is_used())
-    {
-      _weights_flipped.allocator()->free();
-    }
-
     _is_prepared = true;
   }
 }