Fix arm compute cl kernel for transpose conv (#5543)

author 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>

Wed, 3 Jul 2019 09:29:03 +0000 (18:29 +0900)

committer 이춘석/On-Device Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>

Wed, 3 Jul 2019 09:29:03 +0000 (18:29 +0900)
author 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Wed, 3 Jul 2019 09:29:03 +0000 (18:29 +0900)
committer 이춘석/On-Device Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
Wed, 3 Jul 2019 09:29:03 +0000 (18:29 +0900)
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h

new file mode 100644 (file)

index 0000000..c5ef730
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Upsampling layer kernel for transpose convolution on OpenCL.
+ */
+class CLTransposeConvLayerUpsampleKernel : public ICLKernel
+{
+public:
+  /** Constructor */
+  CLTransposeConvLayerUpsampleKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLTransposeConvLayerUpsampleKernel(const CLTransposeConvLayerUpsampleKernel &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLTransposeConvLayerUpsampleKernel &
+  operator=(const CLTransposeConvLayerUpsampleKernel &) = delete;
+  /** Default Move Constructor. */
+  CLTransposeConvLayerUpsampleKernel(CLTransposeConvLayerUpsampleKernel &&) = default;
+  /** Default move assignment operator */
+  CLTransposeConvLayerUpsampleKernel &operator=(CLTransposeConvLayerUpsampleKernel &&) = default;
+  /** Default destructor */
+  ~CLTransposeConvLayerUpsampleKernel() = default;
+
+  /** Initialise the kernel's input and output.
+   *
+   * @param[in]  input        Source tensor. Data types supported: QASYMM8/F16/F32.
+   * @param[out] output       Destination tensor. Data types supported: same as @p input. All but
+   * the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only
+   * performed within the XY-plane.
+   * @param[in]  inner_border Top and right inner border sizes. These rows and columns will be
+   * filled with zero.
+   * @param[in]  info         Contains padding and stride information described in @ref
+   * PadStrideInfo.
+   */
+  void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+                 const PadStrideInfo &info);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLTransposeConvLayerUpsample
+   *
+   * @param[in] input        Source tensor info. Data types supported: QASYMM8/F16/F32.
+   * @param[in] output       Destination tensor info. Data types supported: same as @p input. All
+   * but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is
+   * only performed within the XY-plane.
+   * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled
+   * with zero.
+   * @param[in] info         Contains padding and stride information described in @ref
+   * PadStrideInfo.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+                         const BorderSize &inner_border, const PadStrideInfo &info);
+
+  // Inherited methods overridden:
+  void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+  const ICLTensor *_input;
+  ICLTensor *_output;
+  BorderSize _inner_border;
+  PadStrideInfo _info;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/UtilsEx.h b/libs/ARMComputeEx/arm_compute/core/UtilsEx.h

index 3fb3955..39026e6 100644 (file)
--- a/libs/ARMComputeEx/arm_compute/core/UtilsEx.h
+++ b/libs/ARMComputeEx/arm_compute/core/UtilsEx.h
@@ -19,37 +19,29 @@
  
  #include <utility>
  
+#include "arm_compute/core/Types.h"
+
  namespace arm_compute
  {
  
-/** Returns expected width and height of the deconvolution's output tensor.
+/** Returns expected width and height of the transpose convolution's output tensor.
   *
   * @note This function was copied in order to fix a bug computing to wrong output dimensions.
- *       The formula for computing the output dimension is: o = s*(i - 1) + a + k - 2*p
- *         k: kernel size
- *         s: stride
- *         i: input size
- *         o: output size
- *         p: padding
- *         a: inner border
- *       Refer to : https://github.com/ARM-software/ComputeLibrary/issues/523#issuecomment-414606797
   *
   * @param[in] in_width      Width of input tensor (Number of columns)
   * @param[in] in_height     Height of input tensor (Number of rows)
   * @param[in] kernel_width  Kernel width.
   * @param[in] kernel_height Kernel height.
- * @param[in] padx          X axis padding.
- * @param[in] pady          Y axis padding.
- * @param[in] stride_x      X axis input stride.
- * @param[in] stride_y      Y axis input stride.
- * @param[in] inner_border_right  The number of zeros added to right edge of the input.
- * @param[in] inner_border_top    The number of zeros added to top edge of the input.
+ * @param[in] info          padding and stride info.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_top   The number of zeros added to bottom edge of the output.
   *
   * @return A pair with the new width in the first position and the new height in the second.
   */
-const std::pair<unsigned int, unsigned int> deconvolution_output_dimensions_ex(
-    unsigned int in_width, unsigned int in_height, unsigned int kernel_width,
-    unsigned int kernel_height, unsigned int padx, unsigned int pady, unsigned int stride_x,
-    unsigned int stride_y, unsigned int inner_border_right = 0, unsigned int inner_border_top = 0);
+const std::pair<unsigned int, unsigned int>
+transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height,
+                                unsigned int kernel_width, unsigned int kernel_height,
+                                const PadStrideInfo &info, unsigned int invalid_right,
+                                unsigned int invalid_top);
  }
  #endif /*__ARM_COMPUTE_UTILSEX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h b/libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h

new file mode 100644 (file)

index 0000000..367129e
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__
+#define __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Utils.h"
+
+#include "arm_compute/core/utils/helpers/tensor_transform.h"
+
+#include <cmath>
+
+namespace arm_compute
+{
+namespace misc
+{
+namespace shape_calculator
+{
+
+/** Calculate the upsampled output shape used for transpose convolution
+ *
+ * @param[in] input              Input tensor info
+ * @param[in] weights            Weights tensor shape
+ * @param[in] info               Padding and stride info
+ * @param[in] out_dims           Output shape dimensions
+ * @param[in] pad_left           Padding on left
+ * @param[in] pad_right          Padding on right
+ * @param[in] pad_top            Padding on top
+ * @param[in] pad_bottom         Padding on bottom
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_transposeconv_upsampled_shape(
+    const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info,
+    std::pair<unsigned int, unsigned int> &out_dims, unsigned int &pad_left,
+    unsigned int &pad_right, unsigned int &pad_top, unsigned int &pad_bottom)
+{
+  unsigned int sx = info.stride().first;
+  unsigned int sy = info.stride().second;
+  const DataLayout data_layout = input.data_layout();
+  const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+  const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+  // Find the upsampled dimensions
+  // transpose conv out:
+  //    tconv_out + pad = 1 + (in - 1) * stride + invalid
+  //    tconv_out = 1 + (in - 1) * stride + invalid - pad
+  // upsample out:
+  //    upsample_out = 1 + (in - 1) * stride
+  unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1;
+  unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1;
+
+  // Find the padding needed for the convolution with stride 1 in order to match output shape
+  // upsample+pad out:
+  //    upsample_out + pad = tconv_out + kernel - 1
+  //    pad = tconv_out + kernel - 1 - upsample_out
+  unsigned int padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1);
+  unsigned int pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1);
+  out_x += padx;
+  out_y += pady;
+
+  unsigned int padx_all = padx + info.pad_left() + info.pad_right();
+  unsigned int pady_all = pady + info.pad_top() + info.pad_bottom();
+  pad_left = (padx_all + 1) / 2 - info.pad_left();
+  pad_right = padx_all / 2 - info.pad_right();
+  pad_top = (pady_all + 1) / 2 - info.pad_top();
+  pad_bottom = pady_all / 2 - info.pad_bottom();
+
+  TensorShape scale_out_shape(input.tensor_shape());
+  scale_out_shape.set(idx_w, out_x);
+  scale_out_shape.set(idx_h, out_y);
+
+  return scale_out_shape;
+}
+
+/** Calculate the output shape of the transpose convolution layer
+ *
+ * @param[in] out_dims Output x and y shape dimensions
+ * @param[in] input    Input tensor info
+ * @param[in] weights  Weights tensor shape
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_transposeconv_output_shape(const std::pair<unsigned int, unsigned int> &out_dims,
+                                   const ITensorInfo &input, const ITensorInfo &weights)
+{
+  const TensorShape input_shape{input.tensor_shape()};
+  const TensorShape weights_shape{weights.tensor_shape()};
+
+  const DataLayout data_layout = input.data_layout();
+  const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+  const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+  const int channel_idx =
+      get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+  const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
+
+  TensorShape out_shape{input_shape};
+  out_shape.set(width_idx, out_dims.first);
+  out_shape.set(height_idx, out_dims.second);
+  out_shape.set(channel_idx, weights_shape[batch_idx]);
+  return out_shape;
+}
+
+} // namespace shape_calculator
+} // namespace misc
+} // namespace arm_compute
+
+#endif // __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h

index 277e1e7..3b7fcde 100644 (file)
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
@@ -20,7 +20,6 @@
  #include <arm_compute/runtime/CL/functions/CLBatchToSpaceND.h>
  #include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h>
  #include <arm_compute/runtime/CL/functions/CLCast.h>
-#include <arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h>
  #include <arm_compute/runtime/CL/functions/CLDepthToSpace.h>
  #include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h>
  #include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h>
@@ -38,5 +37,6 @@
  #include <arm_compute/runtime/CL/functions/CLSplit.h>
  #include <arm_compute/runtime/CL/functions/CLStridedSliceEx.h>
  #include <arm_compute/runtime/CL/functions/CLTopKV2.h>
+#include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h>
  
  #endif // __ARM_COMPUTE_CLFUNCTIONSEX_H__
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h

similarity index 53%

rename from libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h

rename to libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h

index 2c8ccc3..7b58ba2 100644 (file)
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
@@ -22,11 +22,11 @@
   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
-#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYEREX_H__
-#define __ARM_COMPUTE_CLDECONVOLUTIONLAYEREX_H__
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
  
  #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
  
  #include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
  
@@ -40,11 +40,11 @@
  namespace arm_compute
  {
  class ICLTensor;
-/** Function to run the deconvolution layer.
+/** Function to run the transpose convolution layer.
   *
   * @note This layer was copied in order to fix a bug computing to wrong output dimensions.
   *
- * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
+ * TransposeConv Layer is the backward pass of Convolution Layer. First we transform the input
   * depending on the stride and pad info and then perform a 1x1
   * convolution pass. Input stride defines how many zeroes we should put between each element of the
   * input, pad is the amount of padding and finally a is a user
@@ -53,10 +53,10 @@ class ICLTensor;
   *
   *  The relation between input to output is as follows:
   *  \f[
- *       width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ *       width\_output = (width\_input - 1) \cdot stride\_x - \cdot padding\_x + kernel\_x
   *  \f]
   *  \f[
- *       height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ *       height\_output = (height\_input - 1) \cdot stride\_y - \cdot padding\_y + kernel\_y
   *  \f]
   *
   *  where:
@@ -74,69 +74,69 @@ class ICLTensor;
   *
   * This function calls the following OpenCL kernels/functions:
   *
- * -# @ref CLDeconvolutionLayerUpsample
+ * -# @ref CLTransposeConvLayerUpsample
   * -# @ref CLConvolutionLayer
   *
   */
-class CLDeconvolutionLayerEx : public IFunction
+class CLTransposeConvLayer : public IFunction
  {
  public:
    /** Constructor */
-  CLDeconvolutionLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+  CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
    /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLDeconvolutionLayerEx(const CLDeconvolutionLayerEx &) = delete;
+  CLTransposeConvLayer(const CLTransposeConvLayer &) = delete;
    /** Default move constructor */
-  CLDeconvolutionLayerEx(CLDeconvolutionLayerEx &&) = default;
+  CLTransposeConvLayer(CLTransposeConvLayer &&) = default;
    /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLDeconvolutionLayerEx &operator=(const CLDeconvolutionLayerEx &) = delete;
+  CLTransposeConvLayer &operator=(const CLTransposeConvLayer &) = delete;
    /** Default move assignment operator */
-  CLDeconvolutionLayerEx &operator=(CLDeconvolutionLayerEx &&) = default;
+  CLTransposeConvLayer &operator=(CLTransposeConvLayer &&) = default;
    /** Set the input, weights, biases and output tensors.
     *
-   * @param[in,out] input              Input tensor. 3 lower dimensions represent a single input,
-   * and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8/F16/F32.
-   * @param[in]     weights            The 4d weights with dimensions [width, height, IFM, OFM].
-   * Data type supported: Same as @p input.
-   * @param[in]     bias               (Optional) The biases have one dimension. Data type
-   * supported: Same as @p input.
-   * @param[out]    output             Output tensor. The output has the same number of dimensions
-   * as the @p input.
-   * @param[in]     info               Contains padding and policies to be used in the
-   * deconvolution, this is decribed in @ref PadStrideInfo.
-   * @param[in]     inner_border_right The number of zeros added to right edge of the input.
-   * @param[in]     inner_border_top   The number of zeros added to top edge of the input.
-   * @param[in]     weights_info       (Optional) Weights information needed for @ref
-   * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
-   * CLWeightsReshapeKernel.
-   *
+   * @param[in,out] input          Input tensor. 3 lower dimensions represent a single input,
+   *                               and an optional 4th dimension for batch of inputs.
+   *                               Data types supported: QASYMM8/F16/F32.
+   * @param[in]     weights        The 4d weights with dimensions [width, height, IFM, OFM].
+   *                               Data type supported: Same as @p input.
+   * @param[in]     bias           (Optional) The biases have one dimension. Data type supported:
+   *                               Same as @p input.
+   * @param[out]    output         Output tensor. The output has the same number of dimensions
+   *                               as the @p input.
+   * @param[in]     info           Contains padding and policies to be used in the
+   *                               deconvolution, this is decribed in @ref PadStrideInfo.
+   * @param[in]     invalid_right  The number of zeros added to right edge of the input.
+   * @param[in]     invalid_bottom The number of zeros added to top edge of the input.
+   * @param[in]     weights_info   (Optional) Weights information needed for @ref
+   *                               CLConvolutionLayer, specifies if the weights tensor has been
+   *                               reshaped with @ref CLWeightsReshapeKernel.
     */
    void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
-                 const PadStrideInfo &info, unsigned int inner_border_right,
-                 unsigned int inner_border_top, const WeightsInfo &weights_info = WeightsInfo());
+                 const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
+                 const WeightsInfo &weights_info = WeightsInfo());
    /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLDeconvolutionLayerEx
-   *
-   * @param[in] input              Input tensor info. 3 lower dimensions represent a single input,
-   * and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8/F16/F32.
-   * @param[in] weights            The 4d weights info with dimensions [width, height, IFM, OFM].
-   * Data type supported: Same as @p input.
-   * @param[in] bias               (Optional) The biases have one dimension. Data type supported:
-   * Same as @p input.
-   * @param[in] output             Output tensor info. The output has the same number of dimensions
-   * as the @p input.
-   * @param[in] info               Contains padding and policies to be used in the deconvolution,
-   * this is decribed in @ref PadStrideInfo.
-   * @param[in] inner_border_right The number of zeros added to right edge of the input.
-   * @param[in] inner_border_top   The number of zeros added to top edge of the input.
-   * @param[in] weights_info       (Optional) Weights information needed for @ref
-   * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
-   * CLWeightsReshapeKernel.
+   * CLTransposeConvLayer
     *
+   * @param[in] input           Input tensor info. 3 lower dimensions represent a single input,
+   *                            and an optional 4th dimension for batch of inputs.
+   *                            Data types supported: QASYMM8/F16/F32.
+   * @param[in] weights         The 4d weights info with dimensions [width, height, IFM, OFM].
+   *                            Data type supported: Same as @p input.
+   * @param[in] bias            (Optional) The biases have one dimension. Data type supported:
+   *                            Same as @p input.
+   * @param[in] output          Output tensor info. The output has the same number of dimensions
+   *                            as the @p input.
+   * @param[in] info            Contains padding and policies to be used in the deconvolution,
+   *                            this is decribed in @ref PadStrideInfo.
+   * @param[in] innvalid_right  The number of zeros added to right edge of the input.
+   * @param[in] invalid_bottom  The number of zeros added to top edge of the input.
+   * @param[in] weights_info    (Optional) Weights information needed for @ref CLConvolutionLayer,
+   *                            specifies if the weights tensor has been reshaped with @ref
+   *                            CLWeightsReshapeKernel.
     * @return a status
     */
    static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
                           const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
-                         unsigned int inner_border_right, unsigned int inner_border_top,
+                         unsigned int innvalid_right, unsigned int invalid_bottom,
                           const WeightsInfo &weights_info = WeightsInfo());
  
    // Inherited methods overridden:
@@ -145,7 +145,7 @@ public:
  
  private:
    CLMemoryGroup _memory_group;
-  CLDeconvolutionLayerUpsample _scale_f;
+  CLTransposeConvLayerUpsample _scale_f;
    CLConvolutionLayer _conv_f;
    CPPFlipWeightsKernel _flip_weights;
    CLTensor _scaled_output;
@@ -154,4 +154,4 @@ private:
    bool _is_prepared;
  };
  }
-#endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYEREX_H__ */
+#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h

new file mode 100644 (file)

index 0000000..4ae0e18
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLTransposeConvLayerUpsampleKernel */
+class CLTransposeConvLayerUpsample : public IFunction
+{
+public:
+  /** Default constructor */
+  CLTransposeConvLayerUpsample();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLTransposeConvLayerUpsample(const CLTransposeConvLayerUpsample &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLTransposeConvLayerUpsample &operator=(const CLTransposeConvLayerUpsample &) = delete;
+  /** Allow instances of this class to be moved */
+  CLTransposeConvLayerUpsample(CLTransposeConvLayerUpsample &&) = default;
+  /** Allow instances of this class to be moved */
+  CLTransposeConvLayerUpsample &operator=(CLTransposeConvLayerUpsample &&) = default;
+  /** Default destructor */
+  virtual ~CLTransposeConvLayerUpsample() = default;
+
+  /** Initialize the function's source, destination, interpolation type and border_mode.
+   *
+   * @param[in, out] input        Source tensor. Data type supported: QASYMM8/F16/F32.
+   * @param[out]     output       Destination tensor. Data type supported: same as @p input.
+   * @param[in]      inner_border The number of zeros added to right and top edges of the input.
+   * @param[in]      info         Contains padding and policies to be used in the deconvolution.
+   */
+  void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+                 const PadStrideInfo &info);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLTransposeConvLayerUpsample
+   *
+   * @param[in] input        Source tensor info. Data type supported: QASYMM8/F16/F32.
+   * @param[in] output       Destination tensor info. Data type supported: same as @p input.
+   * @param[in] inner_border The number of zeros added to right and top edges of the input.
+   * @param[in] info         Contains padding and policies to be used in the deconvolution.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+                         const BorderSize &inner_border, const PadStrideInfo &info);
+
+  // Inherited methods overridden:
+  void run() override;
+
+private:
+  CLTransposeConvLayerUpsampleKernel _upsample;
+  ICLTensor *_output;
+};
+}
+#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ */
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp

new file mode 100644 (file)

index 0000000..6cc8d9d
--- /dev/null
+++ b/libs/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLValidate.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+CLTransposeConvLayerUpsampleKernel::CLTransposeConvLayerUpsampleKernel()
+    : _input(nullptr), _output(nullptr), _inner_border(), _info()
+{
+}
+
+Status CLTransposeConvLayerUpsampleKernel::validate(const ITensorInfo *input,
+                                                    const ITensorInfo *output,
+                                                    const BorderSize &inner_border,
+                                                    const PadStrideInfo &info)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+  ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
+                                                       DataType::F32);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
+
+  const DataLayout data_layout = input->data_layout();
+
+  const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+  const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+  const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+
+  ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_w) == 0);
+  ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_h) == 0);
+
+  ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(idx_c));
+  for (size_t i = 3; i < Coordinates::num_max_dimensions; ++i)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
+  }
+
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.right > info.stride().first - 1,
+                                  "inner_border_right must be smaller that stride_x");
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.top > info.stride().second - 1,
+                                  "inner_border_top must be smaller that stride_y");
+
+  return Status{};
+}
+
+void CLTransposeConvLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output,
+                                                   const BorderSize &inner_border,
+                                                   const PadStrideInfo &info)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+  _input = input;
+  _output = output;
+  _inner_border = inner_border;
+  _info = info;
+
+  // Perform validation step
+  ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayerUpsampleKernel::validate(
+      input->info(), output->info(), inner_border, info));
+
+  // Create kernel
+  CLBuildOptions build_opts;
+  build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+  _kernel = static_cast<cl::Kernel>(
+      CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options()));
+
+  constexpr unsigned int num_elems_processed_per_iteration = 1;
+
+  // Configure kernel window
+  Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+  AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+  ICLKernel::configure_internal(win);
+}
+
+void CLTransposeConvLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+  const DataLayout data_layout = _input->info()->data_layout();
+
+  const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+  const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+  const int out_start_x = _info.pad_left();
+  const int out_end_x = _output->info()->dimension(idx_w) - _inner_border.right -
+                        _info.pad_right() + _info.stride().first - 1;
+  const int out_step_x = _info.stride().first;
+
+  const int out_start_y = _inner_border.top + _info.pad_top();
+  const int out_end_y =
+      _output->info()->dimension(idx_h) - _info.pad_bottom() + _info.stride().second - 1;
+  const int out_step_y = _info.stride().second;
+
+  switch (data_layout)
+  {
+    case DataLayout::NCHW:
+    {
+      Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+
+      Window slice_out = collapsed.first_slice_window_3D();
+      slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x));
+      slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y));
+
+      Window slice_in = collapsed.first_slice_window_3D();
+
+      do
+      {
+        unsigned int idx = 0;
+        add_3D_tensor_argument(idx, _input, slice_in);
+        add_3D_tensor_argument(idx, _output, slice_out);
+        enqueue(queue, *this, slice_out);
+      } while (collapsed.slide_window_slice_3D(slice_in) &&
+               collapsed.slide_window_slice_3D(slice_out));
+      break;
+    }
+    case DataLayout::NHWC:
+    {
+      // NOTE: not collapsing in NHWC
+      Window slice_out = window.first_slice_window_3D();
+      slice_out.set(Window::DimY, Window::Dimension(out_start_x, out_end_x, out_step_x));
+      slice_out.set(Window::DimZ, Window::Dimension(out_start_y, out_end_y, out_step_y));
+
+      Window slice_in = window.first_slice_window_3D();
+
+      do
+      {
+        unsigned int idx = 0;
+        add_3D_tensor_argument(idx, _input, slice_in);
+        add_3D_tensor_argument(idx, _output, slice_out);
+        enqueue(queue, *this, slice_out);
+      } while (window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out));
+      break;
+    }
+    default:
+      ARM_COMPUTE_ERROR("Unsupported data layout");
+  }
+}
diff --git a/libs/ARMComputeEx/src/core/UtilsEx.cpp b/libs/ARMComputeEx/src/core/UtilsEx.cpp

index 3322796..94242b5 100644 (file)
--- a/libs/ARMComputeEx/src/core/UtilsEx.cpp
+++ b/libs/ARMComputeEx/src/core/UtilsEx.cpp
@@ -19,16 +19,27 @@
  
  using namespace arm_compute;
  
-const std::pair<unsigned int, unsigned int> arm_compute::deconvolution_output_dimensions_ex(
-    unsigned int in_width, unsigned int in_height, unsigned int kernel_width,
-    unsigned int kernel_height, unsigned int padx, unsigned int pady, unsigned int stride_x,
-    unsigned int stride_y, unsigned int inner_border_right, unsigned int inner_border_top)
+const std::pair<unsigned int, unsigned int>
+arm_compute::transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height,
+                                             unsigned int kernel_width, unsigned int kernel_height,
+                                             const PadStrideInfo &info, unsigned int invalid_right,
+                                             unsigned int invalid_bottom)
  {
+  const unsigned int stride_x = info.stride().first;
+  const unsigned int stride_y = info.stride().second;
+  const unsigned int padx = info.pad_left() + info.pad_right();
+  const unsigned int pady = info.pad_top() + info.pad_bottom();
+
    ARM_COMPUTE_ERROR_ON(in_width < 1 || in_height < 1);
-  ARM_COMPUTE_ERROR_ON(((in_width - 1) * stride_x + kernel_width) < 2 * padx);
-  ARM_COMPUTE_ERROR_ON(((in_height - 1) * stride_y + kernel_height) < 2 * pady);
-  const int w = stride_x * (in_width - 1) + kernel_width - 2 * padx + inner_border_right;
-  const int h = stride_y * (in_height - 1) + kernel_height - 2 * pady + inner_border_top;
+  ARM_COMPUTE_ERROR_ON(kernel_width <= padx);
+  ARM_COMPUTE_ERROR_ON(kernel_height <= pady);
+
+  // Find the transpose conv out dimensions
+  // transpose conv out:
+  //    tconv_out + pad = 1 + (in - 1) * stride + invalid
+  //    tconv_out = 1 + (in - 1) * stride + invalid - pad
+  const int w = stride_x * (in_width - 1) + kernel_width - padx + invalid_right;
+  const int h = stride_y * (in_height - 1) + kernel_height - pady + invalid_bottom;
  
    return std::make_pair<unsigned int, unsigned int>(w, h);
  }
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLDeconvolutionLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp

similarity index 69%

rename from libs/ARMComputeEx/src/runtime/CL/functions/CLDeconvolutionLayerEx.cpp

rename to libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp

index 2d7c36d..55eb3ad 100644 (file)
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLDeconvolutionLayerEx.cpp
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
@@ -22,7 +22,8 @@
   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
-#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h"
+#include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h"
+#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
  
  #include "arm_compute/core/Helpers.h"
  #include "arm_compute/core/Utils.h"
@@ -38,8 +39,7 @@
  using namespace arm_compute;
  using namespace arm_compute::misc::shape_calculator;
  
-CLDeconvolutionLayerEx::CLDeconvolutionLayerEx(
-    std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
      : _memory_group(std::move(memory_manager)),
        _scale_f(),
        _conv_f(),
@@ -51,11 +51,10 @@ CLDeconvolutionLayerEx::CLDeconvolutionLayerEx(
  {
  }
  
-Status CLDeconvolutionLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights,
-                                        const ITensorInfo *bias, ITensorInfo *output,
-                                        const PadStrideInfo &info, unsigned int inner_border_right,
-                                        unsigned int inner_border_top,
-                                        const WeightsInfo &weights_info)
+Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
+                                      const ITensorInfo *bias, ITensorInfo *output,
+                                      const PadStrideInfo &info, unsigned int invalid_right,
+                                      unsigned int invalid_bottom, const WeightsInfo &weights_info)
  {
    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
@@ -70,24 +69,21 @@ Status CLDeconvolutionLayerEx::validate(const ITensorInfo *input, const ITensorI
  
    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h));
    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
-  ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric());
  
-  const unsigned int stride_x = info.stride().first;
-  const unsigned int stride_y = info.stride().second;
+  const unsigned int kernel_x = weights->dimension(idx_w);
+  const unsigned int kernel_y = weights->dimension(idx_h);
  
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_right > stride_x - 1,
-                                  "inner_border_right must be smaller than stride_x");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_top > stride_y - 1,
-                                  "inner_border_top must be smaller than stride_y");
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_right > kernel_x - 1,
+                                  "invalid_right must be smaller than kernel_x");
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_bottom > kernel_y - 1,
+                                  "inner_border_top must be smaller than kernel_y");
  
-  // NOTE From the existing CLDeconvolutionLayer, inner_border_right and inner_border_top were
-  // added.
-  auto out_dims = deconvolution_output_dimensions_ex(
+  // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were added.
+  auto out_dims = transposeconv_output_dimensions(
        input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
-      weights->dimension(idx_h), info.pad().first, info.pad().second, stride_x, stride_y,
-      inner_border_right, inner_border_top);
+      weights->dimension(idx_h), info, invalid_right, invalid_bottom);
  
-  const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input, *weights);
+  const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
  
    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
  
@@ -111,11 +107,12 @@ Status CLDeconvolutionLayerEx::validate(const ITensorInfo *input, const ITensorI
    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c],
                                    "Output's depth is invalid.");
  
-  unsigned int padx = 0;
-  unsigned int pady = 0;
-  const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(
-      *input, *weights, stride_x, stride_y, inner_border_right, inner_border_top, out_dims, padx,
-      pady);
+  unsigned int pad_left = 0;
+  unsigned int pad_right = 0;
+  unsigned int pad_top = 0;
+  unsigned int pad_bottom = 0;
+  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+      *input, *weights, info, out_dims, pad_left, pad_right, pad_top, pad_bottom);
    TensorInfo scale_out_info(input->clone()
                                  ->set_is_resizable(true)
                                  .reset_padding()
@@ -123,19 +120,18 @@ Status CLDeconvolutionLayerEx::validate(const ITensorInfo *input, const ITensorI
                                  .set_data_layout(data_layout));
    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
  
-  ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(
-      input, &scale_out_info, BorderSize(inner_border_right, inner_border_top), info));
+  ARM_COMPUTE_RETURN_ON_ERROR(
+      CLTransposeConvLayerUpsample::validate(input, &scale_out_info, BorderSize(0, 0), info));
    ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
                                                             conv_info, weights_info));
  
    return Status{};
  }
  
-void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias,
-                                       ICLTensor *output, const PadStrideInfo &info,
-                                       unsigned int inner_border_right,
-                                       unsigned int inner_border_top,
-                                       const WeightsInfo &weights_info)
+void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias,
+                                     ICLTensor *output, const PadStrideInfo &info,
+                                     unsigned int invalid_right, unsigned int invalid_bottom,
+                                     const WeightsInfo &weights_info)
  {
    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
  
@@ -151,15 +147,15 @@ void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, con
    _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
    _flip_weights.configure(weights, &_weights_flipped);
  
-  // NOTE From the existing CLDeconvolutionLayer, inner_border_right and inner_border_top were
+  // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were
    // added.
-  auto out_dims = deconvolution_output_dimensions_ex(
+  auto out_dims = transposeconv_output_dimensions(
        input->info()->dimension(idx_w), input->info()->dimension(idx_h),
-      weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info.pad().first,
-      info.pad().second, stride_x, stride_y, inner_border_right, inner_border_top);
+      weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
+      invalid_bottom);
  
    const TensorShape output_shape =
-      compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info());
+      compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
  
    // Output auto initialization if not yet initialized
    auto_init_if_empty(
@@ -167,9 +163,9 @@ void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, con
        input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
  
    // Perform validation step
-  ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerEx::validate(
+  ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayer::validate(
        input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
-      info, inner_border_right, inner_border_top));
+      info, invalid_right, invalid_bottom));
  
    _is_prepared = weights_info.retain_internal_weights();
  
@@ -177,11 +173,12 @@ void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, con
  
    // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
    // to match output shape
-  unsigned int padx = 0;
-  unsigned int pady = 0;
-  const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(
-      *input->info(), *weights->info(), stride_x, stride_y, inner_border_right, inner_border_top,
-      out_dims, padx, pady);
+  unsigned int pad_left = 0;
+  unsigned int pad_right = 0;
+  unsigned int pad_top = 0;
+  unsigned int pad_bottom = 0;
+  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+      *input->info(), *weights->info(), info, out_dims, pad_left, pad_right, pad_top, pad_bottom);
  
    TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
                              input->info()->quantization_info());
@@ -189,9 +186,9 @@ void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, con
    _scaled_output.allocator()->init(scale_out_info);
  
    // configure scale function
-  const PadStrideInfo upsample_info(stride_x, stride_y, padx / 2, pady / 2);
-  _scale_f.configure(input, &_scaled_output, BorderSize(inner_border_top, inner_border_right),
-                     upsample_info);
+  const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
+                                    DimensionRoundingType::FLOOR);
+  _scale_f.configure(input, &_scaled_output, BorderSize(0, 0), upsample_info);
  
    // setup the function to convolve the upscaled output
    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
@@ -199,7 +196,7 @@ void CLDeconvolutionLayerEx::configure(ICLTensor *input, ICLTensor *weights, con
    _scaled_output.allocator()->allocate();
  }
  
-void CLDeconvolutionLayerEx::run()
+void CLTransposeConvLayer::run()
  {
    prepare();
  
@@ -211,7 +208,7 @@ void CLDeconvolutionLayerEx::run()
    _memory_group.release();
  }
  
-void CLDeconvolutionLayerEx::prepare()
+void CLTransposeConvLayer::prepare()
  {
    if (!_is_prepared)
    {
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp

new file mode 100644 (file)

index 0000000..0ce3e67
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include <cmath>
+#include <memory>
+#include <tuple>
+
+using namespace arm_compute;
+
+CLTransposeConvLayerUpsample::CLTransposeConvLayerUpsample() // NOLINT
+    : _upsample(),
+      _output(nullptr)
+{
+}
+
+Status CLTransposeConvLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output,
+                                              const BorderSize &inner_border,
+                                              const PadStrideInfo &info)
+{
+  return CLTransposeConvLayerUpsampleKernel::validate(input, output, inner_border, info);
+}
+
+void CLTransposeConvLayerUpsample::configure(ICLTensor *input, ICLTensor *output,
+                                             const BorderSize &inner_border,
+                                             const PadStrideInfo &info)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+  _output = output;
+  _upsample.configure(input, _output, inner_border, info);
+}
+
+void CLTransposeConvLayerUpsample::run()
+{
+  _output->map(CLScheduler::get().queue(), true);
+  if (is_data_type_quantized_asymmetric(_output->info()->data_type()))
+  {
+    const uint8_t quantized_zero = _output->info()->quantization_info().offset;
+    std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
+  }
+  else
+  {
+    memset(_output->buffer(), 0, _output->info()->total_size());
+  }
+  _output->unmap(CLScheduler::get().queue());
+
+  CLScheduler::get().enqueue(_upsample, false);
+}
diff --git a/runtimes/neurun/backend/acl_cl/StageGenerator.cc b/runtimes/neurun/backend/acl_cl/StageGenerator.cc

index 9ba4c1a..7304c34 100644 (file)
--- a/runtimes/neurun/backend/acl_cl/StageGenerator.cc
+++ b/runtimes/neurun/backend/acl_cl/StageGenerator.cc
@@ -2403,6 +2403,8 @@ void StageGenerator::visit(const model::operation::TransposeConvNode &node)
  
      model::ExplicitPadding padding;
      model::Stride stride;
+    uint32_t invalid_horizontal;
+    uint32_t invalid_vertical;
    };
  
    Param param;
@@ -2417,6 +2419,18 @@ void StageGenerator::visit(const model::operation::TransposeConvNode &node)
           (node.param().padding.type == model::PaddingType::VALID));
    param.padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape,
                                                   param.stride, ker_shape.W, ker_shape.H);
+  if (node.param().padding.type == model::PaddingType::VALID)
+  {
+    param.invalid_horizontal =
+        ofm_shape.W - (1 + (ifm_shape.W - 1) * param.stride.horizontal) - (ker_shape.W - 1);
+    param.invalid_vertical =
+        ofm_shape.H - (1 + (ifm_shape.H - 1) * param.stride.vertical) - (ker_shape.H - 1);
+  }
+  else
+  {
+    param.invalid_horizontal = 0;
+    param.invalid_vertical = 0;
+  }
  
    auto tensors = _tensor_builder;
  
@@ -2425,21 +2439,14 @@ void StageGenerator::visit(const model::operation::TransposeConvNode &node)
      auto ifm_alloc = tensors->at(param.ifm_index).get();
      auto ker_alloc = tensors->at(param.ker_index).get();
  
-    std::unique_ptr<::arm_compute::IFunction> fn;
-
-    auto l = nnfw::cpp14::make_unique<::arm_compute::CLDeconvolutionLayerEx>();
+    const auto tconv_info = acl_common::asPadStrideInfo(param.padding, param.stride);
  
-    auto padding = param.padding;
-    auto inner_border_right = padding.right - padding.left;
-    auto inner_border_top = padding.bottom - padding.top;
+    std::unique_ptr<::arm_compute::IFunction> fn;
  
-    padding.left = padding.right;
-    padding.top = padding.bottom;
-    auto symmetric_tconv_info =
-        ::neurun::backend::acl_common::asPadStrideInfo(padding, param.stride);
+    auto l = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>();
  
-    l->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(),
-                 symmetric_tconv_info, inner_border_right, inner_border_top);
+    l->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
+                 param.invalid_vertical, param.invalid_horizontal);
  
      fn = std::move(l);
  
diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc

index b2b6505..666d27e 100644 (file)
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -4011,6 +4011,8 @@ void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node)
      int ker_index;
      Padding padding;
      Stride stride;
+    uint32_t invalid_horizontal;
+    uint32_t invalid_vertical;
    };
  
    Param param;
@@ -4026,6 +4028,15 @@ void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node)
                        ? same_padding(ofm_shape, ifm_shape, param.stride, ker_shape.W, ker_shape.H)
                        : valid_padding();
  
+  param.invalid_horizontal =
+      (padding_type == ANEURALNETWORKS_PADDING_SAME)
+          ? 0
+          : ofm_shape.W - (1 + (ifm_shape.W - 1) * hstride) - (ker_shape.W - 1);
+  param.invalid_vertical =
+      (padding_type == ANEURALNETWORKS_PADDING_SAME)
+          ? 0
+          : ofm_shape.H - (1 + (ifm_shape.H - 1) * param.stride.vertical) - (ker_shape.H - 1);
+
    auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
      auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
      auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
@@ -4036,19 +4047,13 @@ void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node)
  
      if (::internal::arm_compute::isGpuMode())
      {
-      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDeconvolutionLayerEx>();
-
-      auto padding = param.padding;
-      auto inner_border_right = padding.right - padding.left;
-      auto inner_border_top = padding.bottom - padding.top;
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>();
  
-      padding.left = padding.right;
-      padding.top = padding.bottom;
-      auto symmetric_tconv_info = asPadStrideInfo(padding, param.stride);
+      auto symmetric_tconv_info = asPadStrideInfo(param.padding, param.stride);
  
        // TODO Support WeightInfo in some cases in order to performance improvement
        fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), nullptr, CAST_CL(ofm_alloc),
-                    symmetric_tconv_info, inner_border_right, inner_border_top);
+                    symmetric_tconv_info, param.invalid_horizontal, param.invalid_vertical);
        builder.append("TransposeConv", std::move(fn));
      }
      else
diff --git a/tests/framework/tests/transpose_conv/config.sh b/tests/framework/tests/transpose_conv/config.sh

index 2b19611..2cca86e 100644 (file)
--- a/tests/framework/tests/transpose_conv/config.sh
+++ b/tests/framework/tests/transpose_conv/config.sh
@@ -1,2 +1 @@
  MODELFILE_NAME="transpose_conv_test.tflite"
-STATUS="disabled"
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux b/tests/nnapi/nnapi_gtest.skip.armv7l-linux

index ee76672..88ea9df 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux
@@ -11,7 +11,6 @@ GeneratedTests.prelu_ex_quant8_1
  GeneratedTests.prelu_ex_broadcast_quant8_1
  # Unexpected result
  GeneratedTests.pack*
-GeneratedTests.transpose_conv_ex_float_4
  # Not support broadcast
  GeneratedTests.logical_or_ex_broadcast_4D_2D
  # Unsupported optional input that has shape
author	오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
	Wed, 3 Jul 2019 09:29:03 +0000 (18:29 +0900)
committer	이춘석/On-Device Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
	Wed, 3 Jul 2019 09:29:03 +0000 (18:29 +0900)
libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h	[new file with mode: 0644]	patch \| blob
libs/ARMComputeEx/arm_compute/core/UtilsEx.h		patch \| blob \| history
libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h	[new file with mode: 0644]	patch \| blob
libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h		patch \| blob \| history
libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h	[moved from libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDeconvolutionLayerEx.h with 53% similarity]	patch \| blob \| history
libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h	[new file with mode: 0644]	patch \| blob
libs/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp	[new file with mode: 0644]	patch \| blob
libs/ARMComputeEx/src/core/UtilsEx.cpp		patch \| blob \| history
libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp	[moved from libs/ARMComputeEx/src/runtime/CL/functions/CLDeconvolutionLayerEx.cpp with 69% similarity]	patch \| blob \| history
libs/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp	[new file with mode: 0644]	patch \| blob
runtimes/neurun/backend/acl_cl/StageGenerator.cc		patch \| blob \| history
runtimes/pure_arm_compute/src/compilation.cc		patch \| blob \| history
tests/framework/tests/transpose_conv/config.sh		patch \| blob \| history
tests/nnapi/nnapi_gtest.skip.armv7l-linux		patch \| blob \| history