From: 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 Date: Wed, 19 Jun 2019 07:37:18 +0000 (+0900) Subject: Remove pad in armcompute-ex (#5448) X-Git-Tag: submit/tizen/20190809.050447~661 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2789d8fcea80e6ce9a38759238b4009eeff258c5;p=platform%2Fcore%2Fml%2Fnnfw.git Remove pad in armcompute-ex (#5448) Remove unused pad implementation in armcompute-ex Signed-off-by: Hyeongseok Oh --- diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h deleted file mode 100644 index cbaa2ad..0000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h +++ /dev/null @@ -1,60 +0,0 @@ -/* -* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved -* Copyright (c) 2016-2018 ARM Limited. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -#ifndef __ARM_COMPUTE_CLPADLAYERKERNEL_H__ -#define __ARM_COMPUTE_CLPADLAYERKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform PAD operation */ -class CLPadLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPadLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPadLayerKernel(const CLPadLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPadLayerKernel(CLPadLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default; - /** Default destructor */ - ~CLPadLayerKernel() = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] pad_size Padding Size tensor. Data types supported : S32 - */ - void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ - ICLTensor *_pad_size; /**< Padding Size tensor */ -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLPADLAYERKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h index bc14406..d2463fa 100644 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h +++ b/libs/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h deleted file mode 100644 index d6ea486..0000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h +++ /dev/null @@ -1,47 +0,0 @@ -/* -* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved -* Copyright (c) 2016-2018 ARM Limited. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -#ifndef __ARM_COMPUTE_CLPADLAYEREX_H__ -#define __ARM_COMPUTE_CLPADLAYEREX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLPadLayerKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLPadLayerEx : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: - * U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: - * U8/QASYMM8/S16/S32/F16/F32. - * @param[in] pad_size Tensor for Padding values in NHWC format shape [n, 2], - * where n is the rank of tensor . Data types supported: S32 - */ - void configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size); -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLPADLAYEREX_H__ */ diff --git a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp index f65fc08..05d30d1 100644 --- a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp @@ -57,7 +57,6 @@ const std::map CLKernelLibraryEx::_kernel_program_map {"gather_1d_out", "gather.cl"}, {"hashtable_lookup", "hashtable_lookup.cl"}, {"neg_tensor", "neg_tensor.cl"}, - {"pad", "pad.cl"}, {"permute_generic", "permute_ex.cl"}, {"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"}, {"prelu", "prelu.cl"}, @@ -131,10 +130,6 @@ const std::map CLKernelLibraryEx::_program_source_map #include "./cl_kernels/neg_tensor.clembed" }, { - "pad.cl", -#include "./cl_kernels/pad.clembed" - }, - { "prelu.cl", #include "./cl_kernels/prelu.clembed" }, diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl deleted file mode 100644 index e74c6b5..0000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(IW) && defined(IH) && defined(ID) && defined(IB) && defined(DEPTH_OUT) && \ - defined(ZERO_VALUE) -/** Basic function to pad a tensor - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. - * e.g. -DDEPTH_OUT=16 - * @attention Input dimensions should be passed as a preprocessor argument using -DIW(width), - * -DIH(height), -DID(depth) and -DIB(batch). e.g. -DIW = 4 - * @attention The value to be set by pad value using -DZERO_VALUE=value. e.g. -DZERO_VALUE=0 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data - * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[out] output_ptr Pointer to the destination tensor. Supported - * data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension - * (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the destination tensor in W dimension - * (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination tensor - * @param[in] pad_values Padding values for each of the dimensions. Only - * pad values for Up(for batch), Top(for height), - * Left(for width) and Front(for depth) are - * required. Supported data type: S32 - */ - -__kernel void pad(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output), const int4 pad_values) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int index[4] = {0}; - - index[0] = get_global_id(0); // W - index[1] = get_global_id(1); // H - index[2] = get_global_id(2) % DEPTH_OUT; // C - index[3] = get_global_id(2) / DEPTH_OUT; // N - - if (index[0] < pad_values.x || index[0] >= (IW + pad_values.x) || index[1] < pad_values.y || - index[1] >= (IH + pad_values.y) || index[2] < pad_values.z || - index[2] >= (ID + pad_values.z) || index[3] < pad_values.w || index[3] >= (IB + pad_values.w)) - { - *((__global DATA_TYPE *)out.ptr) = (DATA_TYPE)ZERO_VALUE; - } - else - { - *((__global DATA_TYPE *)out.ptr) = *( - (__global DATA_TYPE *)tensor4D_offset(&in, index[0] - pad_values.x, index[1] - pad_values.y, - index[2] - pad_values.z, index[3] - pad_values.w)); - } -} - -#endif // if defined(IW) && defined(IH) && defined(ID) && defined(IB) && defined(DEPTH_OUT) && - // defined(ZERO_VALUE) diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp deleted file mode 100644 index 50f8a39..0000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input_info, const ITensorInfo *output_info, - const ITensorInfo *pad_size_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_info, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_info, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(pad_size_info, 1, DataType::S32); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - !(input_info->num_dimensions() > 0 && input_info->num_dimensions() <= 4), - "Pad kernel supports upto 4-D input tensor"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_info->num_dimensions() != output_info->num_dimensions(), - "Output and input should have same number of dimensions"); - - if (input_info->data_type() == DataType::QASYMM8) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_info->quantization_info() != - output_info->quantization_info(), - "The input and output quantization info are different!"); - } - - return Status{}; -} - -} // namespace - -CLPadLayerKernel::CLPadLayerKernel() : _input(nullptr), _output(nullptr), _pad_size(nullptr) {} - -void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, pad_size); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pad_size->info())); - - _input = input; - _output = output; - _pad_size = pad_size; - - // Set kernel build options - std::set build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - build_opts.emplace("-DIW=" + support::cpp11::to_string(input->info()->dimension(0))); - build_opts.emplace("-DIH=" + support::cpp11::to_string(input->info()->dimension(1))); - build_opts.emplace("-DID=" + support::cpp11::to_string(input->info()->dimension(2))); - build_opts.emplace("-DIB=" + support::cpp11::to_string(input->info()->dimension(3))); - if (input->info()->data_type() == DataType::QASYMM8) - { - build_opts.emplace("-DZERO_VALUE=" + - support::cpp11::to_string(input->info()->quantization_info().offset)); - } - else - { - build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(0)); - } - - // Create kernel - _kernel = static_cast(CLKernelLibraryEx::get().create_kernel("pad", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - _pad_size->map(queue); - - auto pad_width_left = *reinterpret_cast(_pad_size->ptr_to_element({0, 0})); - auto pad_height_top = *reinterpret_cast(_pad_size->ptr_to_element({0, 1})); - auto pad_depth_front = *reinterpret_cast(_pad_size->ptr_to_element({0, 2})); - auto pad_batch_up = *reinterpret_cast(_pad_size->ptr_to_element({0, 3})); - - _pad_size->unmap(queue); - - // Pad_values which needs to be passed - const cl_int4 paddingValues = { - {static_cast(pad_width_left), static_cast(pad_height_top), - static_cast(pad_depth_front), static_cast(pad_batch_up)}}; - - Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - _kernel.setArg(idx++, paddingValues); - enqueue(queue, *this, slice_out, lws_hint()); - } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in)); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp deleted file mode 100644 index 5265b6c..0000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* -* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved -* Copyright (c) 2016-2018 ARM Limited. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h" - -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" - -using namespace arm_compute; - -void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size) -{ - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, pad_size); - _kernel = std::move(k); -}