From ab53b619ad3bbf20d871ac75337d3f2164f8e1cf Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EC=98=A4=ED=98=95=EC=84=9D/On-Device=20Lab=28SR=29/Staff?= =?utf8?q?=20Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Fri, 6 Sep 2019 14:53:44 +0900 Subject: [PATCH] Remove CLPermuteEx layer and kernel (#7253) CLPermuteEx is not used anymore in neurun - Remove this implementation - Fix pacl to use CLPermute (used in neurun acl-cl backend) Signed-off-by: Hyeongseok Oh --- .../core/CL/kernels/CLPermuteExKernel.h | 73 ----------- .../arm_compute/runtime/CL/CLFunctionsEx.h | 1 - .../arm_compute/runtime/CL/functions/CLPermuteEx.h | 51 -------- .../ARMComputeEx/src/core/CL/CLKernelLibrary.cpp | 4 - .../src/core/CL/cl_kernels/permute_ex.cl | 82 ------------- .../src/core/CL/kernels/CLPermuteExKernel.cpp | 136 --------------------- .../src/runtime/CL/functions/CLPermuteEx.cpp | 36 ------ runtimes/pure_arm_compute/src/compilation.cc | 2 +- 8 files changed, 1 insertion(+), 384 deletions(-) delete mode 100644 runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h delete mode 100644 runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h delete mode 100644 runtimes/libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl delete mode 100644 runtimes/libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp delete mode 100644 runtimes/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h deleted file mode 100644 index 3434dee..0000000 --- a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ -#define __ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform tensor permutation. - * - * Permutes given a permutation vector - */ -class CLPermuteExKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPermuteExKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPermuteExKernel(const CLPermuteExKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPermuteExKernel &operator=(const CLPermuteExKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPermuteExKernel(CLPermuteExKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPermuteExKernel &operator=(CLPermuteExKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to permute. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - */ - void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLPermuteKernel - * - * @param[in] input First tensor input info. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] perm Permutation vector - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - PermutationVector _perm; -}; -} // arm_compute -#endif /*__ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h index a060832..5fbbb25 100644 --- a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h deleted file mode 100644 index 9a0cc21..0000000 --- a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLPERMUTEEX_H__ -#define __ARM_COMPUTE_CLPERMUTEEX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to execute an @ref CLPermuteKernel. */ -class CLPermuteEx : public ICLSimpleFunction -{ -public: - /** Set the input and output tensors. - * - * @param[in] input The input tensor to permute. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - */ - void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); - /** Static function to check if given info will lead to a valid configuration of @ref CLPermute. - * - * @param[in] input First tensor input info. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] perm Permutation vector - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm); -}; -} -#endif /*__ARM_COMPUTE_CLPERMUTEEX_H__ */ diff --git a/runtimes/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/runtimes/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp index 3a1a8a0..8081256 100644 --- a/runtimes/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ b/runtimes/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp @@ -150,10 +150,6 @@ const std::map CLKernelLibraryEx::_program_source_map "topkv2_quicksort.cl", #include "./cl_kernels/topkv2_quicksort.clembed" }, - { - "permute_ex.cl", -#include "./cl_kernels/permute_ex.clembed" - }, #endif /* EMBEDDED_KERNELS */ }; diff --git a/runtimes/libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl b/runtimes/libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl deleted file mode 100644 index 637788a..0000000 --- a/runtimes/libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(P1) && defined(P2) && defined(P3) && \ - defined(P4) -/** Perform a Generic permute operation on an input tensor of Shape DCHW. - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. - e.g. -DDEPTH_IN=16 - * @attention Permutation vector is passed as a preprocessor arguement using -DP1, -DP2, -DP3 and - -DP4=int, e.g. -DP1=2 - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[out] output_ptr Pointer to the destination image. Supported data - * types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - */ -__kernel void permute_generic(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output)) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH_IN); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0); - - // WHCN format - int in_index[] = { - get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_IN, get_global_id(2) / DEPTH_IN, - }; - - // New locations based on Permuted index calc as out_index[index] = in_index[new_index] - *((__global DATA_TYPE *)tensor4D_offset(&out, in_index[P1], in_index[P2], in_index[P3], - in_index[P4])) = *((__global DATA_TYPE *)in.ptr); -} -#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(P1) && defined(P2) && defined(P3) && - // defined(P4) diff --git a/runtimes/libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp b/runtimes/libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp deleted file mode 100644 index 0941682..0000000 --- a/runtimes/libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() < 1 || input->num_dimensions() > 4, - "Permutation upto 4-D input tensor is supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(perm.num_dimensions() < 1 || perm.num_dimensions() > 4, - "Permutation vector size should be less than or equal to 4"); - for (unsigned int i = 0; i < perm.num_dimensions(); ++i) - { - auto &p = perm[i]; - ARM_COMPUTE_RETURN_ERROR_ON_MSG(p >= perm.num_dimensions(), - "Permutation vector has invalid values"); - } - - // Validate configured output - if (output->total_size() != 0) - { - const TensorShape output_shape = - misc::shape_calculator::compute_permutation_output_shape(*input, perm); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - return Status{}; -} -} // namespace - -CLPermuteExKernel::CLPermuteExKernel() : _input(nullptr), _output(nullptr), _perm() {} - -void CLPermuteExKernel::configure(const ICLTensor *input, ICLTensor *output, - const PermutationVector &perm) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), perm)); - - _input = input; - _output = output; - _perm = perm; - - const TensorShape output_shape = - misc::shape_calculator::compute_permutation_output_shape(*input->info(), perm); - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); - - // Create kernel - std::set build_opts; - - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DDEPTH_IN=" + support::cpp11::to_string(input->info()->dimension(2))); - - // New positions of batch(D), height(H), width(w) and channel(C) based on permutation vector - build_opts.emplace("-DP1=" + - support::cpp11::to_string((perm.num_dimensions() >= 1) ? perm[0] : 0)); - build_opts.emplace("-DP2=" + - support::cpp11::to_string((perm.num_dimensions() >= 2) ? perm[1] : 1)); - build_opts.emplace("-DP3=" + - support::cpp11::to_string((perm.num_dimensions() >= 3) ? perm[2] : 2)); - build_opts.emplace("-DP4=" + - support::cpp11::to_string((perm.num_dimensions() >= 4) ? perm[3] : 3)); - - _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel("permute_generic", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - - // The CLPermute doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -Status CLPermuteExKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, perm)); - - return Status{}; -} - -void CLPermuteExKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_out(slice_in); - slice_out.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_out.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_in); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); -} diff --git a/runtimes/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp b/runtimes/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp deleted file mode 100644 index fb36327..0000000 --- a/runtimes/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPermuteEx.h" - -#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h" - -using namespace arm_compute; - -void CLPermuteEx::configure(const ICLTensor *input, ICLTensor *output, - const PermutationVector &perm) -{ - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, perm); - _kernel = std::move(k); -} - -Status CLPermuteEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm) -{ - ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteExKernel::validate(input, output, perm)); - return Status{}; -} diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index eaea97a..8cc86eb 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -3636,7 +3636,7 @@ void Planner::visit(const ::internal::tflite::op::Transpose::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermuteEx>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermute>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), getARMComputePermutationVector(param.rank, param.pv)); -- 2.7.4