From 2789d8fcea80e6ce9a38759238b4009eeff258c5 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EC=98=A4=ED=98=95=EC=84=9D/On-Device=20Lab=28SR=29/Staff?=
 =?utf8?q?=20Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?=
 <hseok82.oh@samsung.com>
Date: Wed, 19 Jun 2019 16:37:18 +0900
Subject: [PATCH] Remove pad in armcompute-ex (#5448)

Remove unused pad implementation in armcompute-ex

Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
---
 .../arm_compute/core/CL/kernels/CLPadLayerKernel.h |  60 ---------
 .../arm_compute/runtime/CL/CLFunctionsEx.h         |   1 -
 .../runtime/CL/functions/CLPadLayerEx.h            |  47 -------
 libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp  |   5 -
 libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl    |  99 ---------------
 .../src/core/CL/kernels/CLPadLayerKernel.cpp       | 135 ---------------------
 .../src/runtime/CL/functions/CLPadLayerEx.cpp      |  28 -----
 7 files changed, 375 deletions(-)
 delete mode 100644 libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h
 delete mode 100644 libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
 delete mode 100644 libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl
 delete mode 100644 libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp
 delete mode 100644 libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp

diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h
deleted file mode 100644
index cbaa2ad..0000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
-* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-* Copyright (c) 2016-2018 ARM Limited.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*      http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-#ifndef __ARM_COMPUTE_CLPADLAYERKERNEL_H__
-#define __ARM_COMPUTE_CLPADLAYERKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform PAD operation */
-class CLPadLayerKernel : public ICLKernel
-{
-public:
-  /** Default constructor */
-  CLPadLayerKernel();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLPadLayerKernel(const CLPadLayerKernel &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete;
-  /** Allow instances of this class to be moved */
-  CLPadLayerKernel(CLPadLayerKernel &&) = default;
-  /** Allow instances of this class to be moved */
-  CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default;
-  /** Default destructor */
-  ~CLPadLayerKernel() = default;
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]  input  Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]  output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]  pad_size Padding Size tensor. Data types supported : S32
-   */
-  void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size);
-
-  // Inherited methods overridden:
-  void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-  const ICLTensor *_input; /**< Source tensor */
-  ICLTensor *_output;      /**< Destination tensor */
-  ICLTensor *_pad_size;    /**< Padding Size tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLPADLAYERKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
index bc14406..d2463fa 100644
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
@@ -30,7 +30,6 @@
 #include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
 #include <arm_compute/runtime/CL/functions/CLLogicalNot.h>
 #include <arm_compute/runtime/CL/functions/CLNeg.h>
-#include <arm_compute/runtime/CL/functions/CLPadLayerEx.h>
 #include <arm_compute/runtime/CL/functions/CLPermuteEx.h>
 #include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h>
 #include <arm_compute/runtime/CL/functions/CLPReLU.h>
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
deleted file mode 100644
index d6ea486..0000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
-* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-* Copyright (c) 2016-2018 ARM Limited.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*      http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-#ifndef __ARM_COMPUTE_CLPADLAYEREX_H__
-#define __ARM_COMPUTE_CLPADLAYEREX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLPadLayerKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLPadLayerEx : public ICLSimpleFunction
-{
-public:
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]           input     Input tensor. Data types supported:
-   *                                U8/QASYMM8/S16/S32/F16/F32.
-   * @param[out]          output    Output tensor. Data types supported:
-   *                                U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]           pad_size  Tensor for Padding values in NHWC format shape [n, 2],
-   *                                where n is the rank of tensor . Data types supported: S32
-   */
-  void configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLPADLAYEREX_H__ */
diff --git a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
index f65fc08..05d30d1 100644
--- a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -57,7 +57,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
     {"gather_1d_out", "gather.cl"},
     {"hashtable_lookup", "hashtable_lookup.cl"},
     {"neg_tensor", "neg_tensor.cl"},
-    {"pad", "pad.cl"},
     {"permute_generic", "permute_ex.cl"},
     {"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"},
     {"prelu", "prelu.cl"},
@@ -131,10 +130,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
 #include "./cl_kernels/neg_tensor.clembed"
     },
     {
-        "pad.cl",
-#include "./cl_kernels/pad.clembed"
-    },
-    {
         "prelu.cl",
 #include "./cl_kernels/prelu.clembed"
     },
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl
deleted file mode 100644
index e74c6b5..0000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(IW) && defined(IH) && defined(ID) && defined(IB) && defined(DEPTH_OUT) && \
-    defined(ZERO_VALUE)
-/** Basic function to pad a tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
- *            e.g. -DDEPTH_OUT=16
- * @attention Input dimensions should be passed as a preprocessor argument using -DIW(width),
- *            -DIH(height), -DID(depth) and -DIB(batch). e.g. -DIW = 4
- * @attention The value to be set by pad value using -DZERO_VALUE=value. e.g. -DZERO_VALUE=0
- *
- * @param[in]  input_ptr                            Pointer to the source tensor. Supported data
- *                                                  types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in]  input_stride_x                       Stride of the source tensor in X dimension (in
- *                                                  bytes)
- * @param[in]  input_step_x                         input_stride_x * number of elements along X
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_stride_y                       Stride of the source tensor in Y dimension (in
- *                                                  bytes)
- * @param[in]  input_step_y                         input_stride_y * number of elements along Y
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  input_step_z                         input_stride_z * number of elements along Z
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the source
- *                                                  tensor
- * @param[out] output_ptr                           Pointer to the destination tensor. Supported
- *                                                  data types: same as @p input_ptr
- * @param[in]  output_stride_x                      Stride of the destination tensor in X dimension
- *                                                  (in bytes)
- * @param[in]  output_step_x                        output_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_y                      Stride of the destination tensor in Y dimension
- *                                                  (in bytes)
- * @param[in]  output_step_y                        output_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_z                      Stride of the destination tensor in Z dimension
- *                                                  (in bytes)
- * @param[in]  output_step_z                        output_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_w                      Stride of the destination tensor in W dimension
- *                                                  (in bytes)
- * @param[in]  output_step_w                        output_stride_w * number of elements along W
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
- *                                                  destination tensor
- * @param[in]  pad_values                           Padding values for each of the dimensions. Only
- *                                                  pad values for Up(for batch), Top(for height),
- *                                                  Left(for width) and Front(for depth) are
- *                                                  required. Supported data type: S32
- */
-
-__kernel void pad(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output), const int4 pad_values)
-{
-  Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
-  Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
-  int index[4] = {0};
-
-  index[0] = get_global_id(0);             // W
-  index[1] = get_global_id(1);             // H
-  index[2] = get_global_id(2) % DEPTH_OUT; // C
-  index[3] = get_global_id(2) / DEPTH_OUT; // N
-
-  if (index[0] < pad_values.x || index[0] >= (IW + pad_values.x) || index[1] < pad_values.y ||
-      index[1] >= (IH + pad_values.y) || index[2] < pad_values.z ||
-      index[2] >= (ID + pad_values.z) || index[3] < pad_values.w || index[3] >= (IB + pad_values.w))
-  {
-    *((__global DATA_TYPE *)out.ptr) = (DATA_TYPE)ZERO_VALUE;
-  }
-  else
-  {
-    *((__global DATA_TYPE *)out.ptr) = *(
-        (__global DATA_TYPE *)tensor4D_offset(&in, index[0] - pad_values.x, index[1] - pad_values.y,
-                                              index[2] - pad_values.z, index[3] - pad_values.w));
-  }
-}
-
-#endif // if defined(IW) && defined(IH) && defined(ID) && defined(IB) && defined(DEPTH_OUT) &&
-       // defined(ZERO_VALUE)
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp
deleted file mode 100644
index 50f8a39..0000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input_info, const ITensorInfo *output_info,
-                          const ITensorInfo *pad_size_info)
-{
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_info, 1, DataType::U8, DataType::QASYMM8,
-                                                DataType::S16, DataType::S32, DataType::F16,
-                                                DataType::F32);
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_info, 1, DataType::U8, DataType::QASYMM8,
-                                                DataType::S16, DataType::S32, DataType::F16,
-                                                DataType::F32);
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(pad_size_info, 1, DataType::S32);
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-      !(input_info->num_dimensions() > 0 && input_info->num_dimensions() <= 4),
-      "Pad kernel supports upto 4-D input tensor");
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_info->num_dimensions() != output_info->num_dimensions(),
-                                  "Output and input should have same number of dimensions");
-
-  if (input_info->data_type() == DataType::QASYMM8)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_info->quantization_info() !=
-                                        output_info->quantization_info(),
-                                    "The input and output quantization info are different!");
-  }
-
-  return Status{};
-}
-
-} // namespace
-
-CLPadLayerKernel::CLPadLayerKernel() : _input(nullptr), _output(nullptr), _pad_size(nullptr) {}
-
-void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, pad_size);
-  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pad_size->info()));
-
-  _input = input;
-  _output = output;
-  _pad_size = pad_size;
-
-  // Set kernel build options
-  std::set<std::string> build_opts;
-  build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
-  build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
-  build_opts.emplace("-DIW=" + support::cpp11::to_string(input->info()->dimension(0)));
-  build_opts.emplace("-DIH=" + support::cpp11::to_string(input->info()->dimension(1)));
-  build_opts.emplace("-DID=" + support::cpp11::to_string(input->info()->dimension(2)));
-  build_opts.emplace("-DIB=" + support::cpp11::to_string(input->info()->dimension(3)));
-  if (input->info()->data_type() == DataType::QASYMM8)
-  {
-    build_opts.emplace("-DZERO_VALUE=" +
-                       support::cpp11::to_string(input->info()->quantization_info().offset));
-  }
-  else
-  {
-    build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(0));
-  }
-
-  // Create kernel
-  _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("pad", build_opts));
-
-  // Configure  kernel window
-  Window win = calculate_max_window(*output->info(), Steps());
-
-  Coordinates coord;
-  coord.set_num_dimensions(output->info()->num_dimensions());
-  output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
-  ICLKernel::configure_internal(win);
-}
-
-void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue)
-{
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
-  _pad_size->map(queue);
-
-  auto pad_width_left = *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, 0}));
-  auto pad_height_top = *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, 1}));
-  auto pad_depth_front = *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, 2}));
-  auto pad_batch_up = *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, 3}));
-
-  _pad_size->unmap(queue);
-
-  // Pad_values which needs to be passed
-  const cl_int4 paddingValues = {
-      {static_cast<cl_int>(pad_width_left), static_cast<cl_int>(pad_height_top),
-       static_cast<cl_int>(pad_depth_front), static_cast<cl_int>(pad_batch_up)}};
-
-  Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
-  // Setup output slice
-  Window slice_in(slice_out);
-  slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
-  slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
-  slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
-  slice_in.set(3, Window::Dimension(0, 0, 0));
-
-  do
-  {
-    unsigned int idx = 0;
-    add_4D_tensor_argument(idx, _input, slice_in);
-    add_4D_tensor_argument(idx, _output, slice_out);
-    _kernel.setArg<cl_int4>(idx++, paddingValues);
-    enqueue(queue, *this, slice_out, lws_hint());
-  } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in));
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
deleted file mode 100644
index 5265b6c..0000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
-* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-* Copyright (c) 2016-2018 ARM Limited.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*      http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h"
-
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-
-using namespace arm_compute;
-
-void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size)
-{
-  auto k = arm_compute::support::cpp14::make_unique<CLPadLayerKernel>();
-  k->configure(input, output, pad_size);
-  _kernel = std::move(k);
-}
-- 
2.7.4