From: 장지섭/On-Device Lab(SR)/Engineer/삼성전자 Date: Mon, 30 Sep 2019 06:58:48 +0000 (+0900) Subject: Make to support Gather op for acl neon (#7746) X-Git-Tag: submit/tizen/20191205.083104~1018 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=52c73854d4d2a4a08a2ccea87e659f35a7188735;p=platform%2Fcore%2Fml%2Fnnfw.git Make to support Gather op for acl neon (#7746) This commit makes to support Gather op for acl neon. - Introduce NEGatherEx and NEGatherKernelEx - Apply NEGatherEx layer for neurun Signed-off-by: jiseob.jang --- diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h b/runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h new file mode 100644 index 0000000..3fa9c6e --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_NEGATHERKERNELEX_H__ +#define __ARM_COMPUTE_NEGATHERKERNELEX_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform other operation on NEON */ +class NEGatherKernelEx : public INEKernel +{ +public: + /** Default constructor. */ + NEGatherKernelEx(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEGatherKernelEx(const NEGatherKernelEx &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEGatherKernelEx &operator=(const NEGatherKernelEx &) = delete; + /** Allow instances of this class to be moved. */ + NEGatherKernelEx(NEGatherKernelEx &&) = default; + /** Allow instances of this class to be moved. */ + NEGatherKernelEx &operator=(NEGatherKernelEx &&) = default; + /** Default detructor */ + ~NEGatherKernelEx() = default; + + /** Name of the kernel + * + * @return Kernel name + */ + const char *name() const override { return "NEGatherKernelEx"; } + /** Initialise the kernel's inputs and outputs + * + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: + * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the + * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values + * wrap around. Defaults to 0 + */ + void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); + /** Static function to check if given info will lead to a valid configuration of @ref + * NEGatherKernelEx + * + * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: + * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the + * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values + * wrap around. Defaults to 0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, + const ITensorInfo *output, int axis); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Implementation of the gather operation for 0 axis. + * + * For gather on the 0 axis an element by element copy is performed. + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window + * returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + template void gather_0_axis(const Window &window, const ThreadInfo &info); + + /** Implementation of the gather operation. + * + * For 1<=axis a row-wise copy is taking place. + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window + * returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + template void gather_n_axis(const Window &window, const ThreadInfo &info); + + using kernel_ptr = void (NEGatherKernelEx::*)(const Window &window, const ThreadInfo &info); + + const ITensor *_input; + const ITensor *_indices; + int _axis; + ITensor *_output; + kernel_ptr _func; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEGATHERKERNELEX_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h b/runtimes/libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h index 1a6978a..16fd40e 100644 --- a/runtimes/libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h +++ b/runtimes/libs/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h @@ -171,6 +171,50 @@ inline TensorShape compute_space_to_depth_shape_ex(const ITensorInfo *input, int return output_shape; } +/** Calculate the gather output shape of a tensor + * + * @param[in] input_shape Input tensor shape + * @param[in] indices_shape Indices tensor shape + * @param[in] actual_axis The axis to be gathered + * + * @return the calculated shape + */ +inline TensorShape compute_gather_shape_ex(const TensorShape &input_shape, + const TensorShape &indices_shape, uint32_t actual_axis) +{ + ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 3); + ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4); + ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() + indices_shape.num_dimensions() - 1 > 4); + ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions()); + + TensorShape output_shape = input_shape; + if (indices_shape.num_dimensions() == 1) + { + output_shape[actual_axis] = indices_shape[0]; + } + else if (indices_shape.num_dimensions() > 1) + { + output_shape.shift_right(indices_shape.num_dimensions() - 1); + + for (uint32_t i = 0, o = 0; o < output_shape.num_dimensions(); ++o, ++i) + { + if (o == actual_axis) + { + ++i; + for (uint32_t in = 0; in < indices_shape.num_dimensions(); ++in, ++o) + { + output_shape[o] = indices_shape[in]; + } + } + else + { + output_shape[o] = input_shape[i]; + } + } + } + return output_shape; +} + } // namespace shape_calculator } // namespace misc } // namespace arm_compute diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h index 208d5df..4ea7b97 100644 --- a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h new file mode 100644 index 0000000..d95e6a8 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_NEGATHEREX_H__ +#define __ARM_COMPUTE_NEGATHEREX_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEGatherKernelEx */ +class NEGatherEx : public INESimpleFunctionNoBorder +{ +public: + /** Initialise the kernel's inputs and outputs + * + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: + * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the + * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 + */ + void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); + + /** Static function to check if given info will lead to a valid configuration of @ref + * NEGatherKernelEx + * + * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: + * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the + * following types: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, + const ITensorInfo *output, int axis); +}; + +} // namespace arm_compute + +#endif /* __ARM_COMPUTE_NEGATHEREX_H__ */ diff --git a/runtimes/libs/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/runtimes/libs/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp index c83ece0..718f615 100644 --- a/runtimes/libs/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp +++ b/runtimes/libs/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp @@ -19,6 +19,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibraryEx.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" #include "arm_compute/core/UtilsEx.h" using namespace arm_compute; @@ -26,51 +27,6 @@ using namespace arm_compute; namespace { -inline TensorShape compute_gather_shape(const TensorShape &input_shape, - const TensorShape &indices_shape, uint32_t actual_axis) -{ - ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 3); - ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() + indices_shape.num_dimensions() - 1 > 4); - ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions()); - - TensorShape output_shape = input_shape; - if (indices_shape.num_dimensions() == 1) - { - output_shape[actual_axis] = indices_shape[0]; - } - else if (indices_shape.num_dimensions() > 1) - { - output_shape.shift_right(indices_shape.num_dimensions() - 1); - - for (uint32_t i = 0, o = 0; o < output_shape.num_dimensions(); ++o, ++i) - { - if (o == actual_axis) - { - ++i; - for (uint32_t in = 0; in < indices_shape.num_dimensions(); ++in, ++o) - { - output_shape[o] = indices_shape[in]; - } - } - else - { - output_shape[o] = input_shape[i]; - } - } - } - return output_shape; -} - -/** Wrap-around a number within the range 0 <= x < m - * - * @param[in] x Input value - * @param[in] m Range - * - * @return the wrapped-around number - */ -template inline T wrap_around(T x, T m) { return x >= 0 ? x % m : (x % m + m) % m; } - inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) { @@ -88,8 +44,8 @@ inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *in { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); - TensorShape output_shape = - compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), actual_axis); + TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex( + input->tensor_shape(), indices->tensor_shape(), actual_axis); ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); } @@ -104,8 +60,8 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); const uint32_t actual_axis = wrap_around(axis, static_cast(input->num_dimensions())); std::unique_ptr output_info = input->clone(); - output_info->set_tensor_shape( - compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), actual_axis)); + output_info->set_tensor_shape(arm_compute::misc::shape_calculator::compute_gather_shape_ex( + input->tensor_shape(), indices->tensor_shape(), actual_axis)); // Output auto initialization if not yet initialized auto_init_if_empty((*output), output_info->tensor_shape(), 1, input->data_type()); diff --git a/runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp b/runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp new file mode 100644 index 0000000..ce2413d --- /dev/null +++ b/runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h" + +#include "arm_compute/core/CPP/Validate.h" +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" + +namespace arm_compute +{ +namespace +{ +/** Validate the indices + * + * Validate that indices are not negative + * + * @param[in] indices Indices tensor info. + */ +template void validate_indices(const ITensor *indices) +{ + for (size_t i = 0; i < indices->info()->tensor_shape()[0]; ++i) + { + ARM_COMPUTE_ERROR_ON(*(reinterpret_cast(indices->ptr_to_element(Coordinates(i)))) < 0); + } +} + +} // namespace + +NEGatherKernelEx::NEGatherKernelEx() : _input{}, _indices{}, _axis{}, _output{}, _func{} {} + +template +inline void NEGatherKernelEx::gather_0_axis(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + + // Validate that the indices are not negative + validate_indices(_indices); + + Iterator output_it(_output, window); + execute_window_loop( + window, + [&](const Coordinates &id) { + Coordinates gather_id(id); + gather_id.collapse(_indices->info()->num_dimensions(), 0); + + U new_index; + switch (_indices->info()->num_dimensions()) + { + case 1: + new_index = *(reinterpret_cast(_indices->ptr_to_element(Coordinates(id[0])))); + break; + case 2: + new_index = + *(reinterpret_cast(_indices->ptr_to_element(Coordinates(id[0], id[1])))); + break; + case 3: + new_index = *( + reinterpret_cast(_indices->ptr_to_element(Coordinates(id[0], id[1], id[2])))); + break; + default: + ARM_COMPUTE_ERROR("Wrong num of dimensions"); + break; + } + + gather_id.set(0, new_index); + + std::copy_n(_input->ptr_to_element(gather_id), _output->info()->element_size(), + output_it.ptr()); + }, + output_it); +} + +template +void NEGatherKernelEx::gather_n_axis(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + + // Validate that the indices are not negative + validate_indices(_indices); + + Window output_window{window}; + output_window.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator output_it(_output, output_window); + execute_window_loop( + output_window, + [&](const Coordinates &id) { + Coordinates gather_id(id); + gather_id.collapse(_indices->info()->num_dimensions(), _axis); + + U new_index; + switch (_indices->info()->num_dimensions()) + { + case 1: + new_index = *(reinterpret_cast(_indices->ptr_to_element(Coordinates(id[_axis])))); + break; + case 2: + new_index = *(reinterpret_cast( + _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1])))); + break; + case 3: + new_index = *(reinterpret_cast( + _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1], id[_axis + 2])))); + break; + default: + ARM_COMPUTE_ERROR("Wrong num of dimensions"); + break; + } + + gather_id.set(_axis, new_index); + + std::copy_n(_input->ptr_to_element(gather_id), + _input->info()->dimension(0) * _output->info()->element_size(), + output_it.ptr()); + }, + output_it); +} + +void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, + int axis) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); + ARM_COMPUTE_ERROR_ON(indices->info()->num_dimensions() > 3); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( + input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, DataType::F16, DataType::F32); + + _input = input; + _indices = indices; + _output = output; + _axis = axis; + + if (_axis < 0) + { + _axis += input->info()->num_dimensions(); + } + ARM_COMPUTE_ERROR_ON(0 > _axis || _axis >= static_cast(input->info()->num_dimensions())); + + if (0 == _axis) + { + switch (_indices->info()->data_type()) + { + case DataType::U32: + _func = &NEGatherKernelEx::gather_0_axis; + break; + case DataType::S32: + _func = &NEGatherKernelEx::gather_0_axis; + break; + default: + ARM_COMPUTE_ERROR("Not supported"); + break; + } + } + else + { + switch (_indices->info()->data_type()) + { + case DataType::U32: + _func = &NEGatherKernelEx::gather_n_axis; + break; + case DataType::S32: + _func = &NEGatherKernelEx::gather_n_axis; + break; + default: + ARM_COMPUTE_ERROR("Not supported"); + break; + } + } + // Output auto initialization if not yet initialized + TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex( + input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis); + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); + + // Create window + Window win = calculate_max_window(*output->info(), Steps()); + output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); + + INEKernel::configure(win); +} + +Status NEGatherKernelEx::validate(const ITensorInfo *input, const ITensorInfo *indices, + const ITensorInfo *output, int axis) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output); + ARM_COMPUTE_RETURN_ERROR_ON(indices->num_dimensions() > 3); + ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); + ARM_COMPUTE_ERROR_ON(input->num_dimensions() + indices->num_dimensions() - 1 > 4); + + if (axis < 0) + { + axis += input->num_dimensions(); + } + + ARM_COMPUTE_RETURN_ERROR_ON(0 > axis || axis >= static_cast(input->num_dimensions())); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( + input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, DataType::F16, DataType::F32); + + if (output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); + TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex( + input->tensor_shape(), indices->tensor_shape(), axis); + ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); + } + + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32); + + return Status{}; +} + +void NEGatherKernelEx::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON(_func == nullptr); + + (this->*_func)(window, info); +} + +} // namespace arm_compute diff --git a/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp new file mode 100644 index 0000000..90dabb3 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEGatherEx.h" + +#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h" +#include "support/ToolchainSupport.h" + +#include + +namespace arm_compute +{ +void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, indices, output, axis); + _kernel = std::move(k); +} + +Status NEGatherEx::validate(const ITensorInfo *input, const ITensorInfo *indices, + const ITensorInfo *output, int axis) +{ + return NEGatherKernelEx::validate(input, indices, output, axis); +} + +} // namespace arm_compute diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc index b714f7e..0508a95 100644 --- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc +++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc @@ -716,6 +716,49 @@ void KernelGenerator::visit(const model::operation::HashtableLookupNode &node) _execution_builder->append(std::move(acl_fn)); } +void KernelGenerator::visit(const model::operation::GatherNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto ifm_index{node.getInputs().at(model::operation::GatherNode::Input::INPUT)}; + const auto indices_index{node.getInputs().at(model::operation::GatherNode::Input::INDICES)}; + + const auto axis_index{node.param().axis_index}; + + const auto ifm_shape = _ctx.at(ifm_index).shape(); + + const auto axis_value = static_cast(_ctx.at(axis_index).asScalar()); + // Converting in reverse order + const int axis = + ::neurun::backend::acl_common::ToARMComputeAxis(ifm_shape.rank(), axis_value).value(); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto indices_alloc = _tensor_builder->at(indices_index).get(); + const auto backend_layout = ofm_alloc->layout(); + UNUSED_RELEASE(backend_layout); + + // NOTE The frontend layout and backend layout must be the same for this operation. + // If not the same, we have to add a stage(?) to perform permutation of output tensor. It + // is not not efficient even if it works well. If so, it would be better to set the + // layout of these backend tensors to the same layout. + // There is also one thing we have to think about. This operation depends on the layout of + // a model. For example, if a model in NHWC has this operation as output rank == 4, indices + // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W + // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. + assert(backend_layout == ifm_alloc->layout()); + assert(backend_layout == indices_alloc->layout()); + assert(ifm_shape.rank() < 4 || _current_subg_layout == backend_layout); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEGatherEx>(); + + fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + void KernelGenerator::visit(const model::operation::L2NormalizationNode &node) { const auto ofm_index{node.getOutputs().at(0)}; diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.h b/runtimes/neurun/backend/acl_neon/KernelGenerator.h index fe1ff7c..2603860 100644 --- a/runtimes/neurun/backend/acl_neon/KernelGenerator.h +++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.h @@ -50,6 +50,7 @@ public: void visit(const model::operation::EmbeddingLookupNode &) override; void visit(const model::operation::FloorNode &) override; void visit(const model::operation::FullyConnectedNode &) override; + void visit(const model::operation::GatherNode &) override; void visit(const model::operation::HashtableLookupNode &) override; void visit(const model::operation::L2NormalizationNode &) override; void visit(const model::operation::L2Pool2DNode &) override; diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc index 915c2e9..6ba16c8 100644 --- a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc +++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc @@ -122,6 +122,16 @@ void ShapeFixer::visit(const model::operation::HashtableLookupNode &node) _tensor_builder->dimCorrection(output_index, false); } +void ShapeFixer::visit(const model::operation::GatherNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::GatherNode::Input::INPUT)}; + const auto indices_index{node.getInputs().at(model::operation::GatherNode::Input::INDICES)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); + _tensor_builder->dimCorrection(indices_index, false); +} + void ShapeFixer::visit(const model::operation::L2NormalizationNode &) { /* DO NOTHING */} void ShapeFixer::visit(const model::operation::L2Pool2DNode &) { /* DO NOTHING */} diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.h b/runtimes/neurun/backend/acl_neon/ShapeFixer.h index 4a83bc6..ab7bd20 100644 --- a/runtimes/neurun/backend/acl_neon/ShapeFixer.h +++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.h @@ -52,6 +52,7 @@ public: void visit(const model::operation::ExpNode &) override; void visit(const model::operation::FloorNode &) override; void visit(const model::operation::FullyConnectedNode &) override; + void visit(const model::operation::GatherNode &) override; void visit(const model::operation::HashtableLookupNode &) override; void visit(const model::operation::L2NormalizationNode &) override; void visit(const model::operation::L2Pool2DNode &) override; diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon index cdec336..df06758 100644 --- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon +++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon @@ -7,7 +7,6 @@ GeneratedTests.mobilenet* GeneratedTests.svdf* GeneratedTests.batch_to_space* GeneratedTests.space_to_batch* -GeneratedTests.gather_ex* GeneratedTests.topk_v2* # Unexpected result GeneratedTests.pack* diff --git a/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt b/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt index fd3410e..8bcdaef 100644 --- a/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt +++ b/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt @@ -8,6 +8,7 @@ div embedding_lookup floor fullyconnected +gather hashtable_lookup l2_normalization l2_pool_2d