From: 장지섭/On-Device Lab(SR)/Engineer/삼성전자 Date: Tue, 17 Sep 2019 08:23:58 +0000 (+0900) Subject: Make to support ArgMax op for acl neon (#7515) X-Git-Tag: accepted/tizen/unified/20190918.102349~19 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4d4f85e1561eaf7dfd9f4126e24b85af24168d55;p=platform%2Fcore%2Fml%2Fnnfw.git Make to support ArgMax op for acl neon (#7515) This commit makes to support ArgMax op for acl neon except int32 type. - Introduce NEArgMinMax layer - Apply NEArgMinMax layer for neurun Signed-off-by: jiseob.jang --- diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h index 6eb0830..fb5323d 100644 --- a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h @@ -16,6 +16,7 @@ #ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__ #define __ARM_COMPUTE_NEFUNCTIONSEX_H__ +#include #include #include #include diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h new file mode 100644 index 0000000..604cd93 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2018-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__ +#define __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h" +#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to perform reduce min/max operation */ +template class NEArgMinMaxStatic : public IFunction +{ +public: + /** Constructor */ + NEArgMinMaxStatic(std::shared_ptr memory_manager = nullptr); + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 + * @param[in] axis Reduction axis. + * @param[out] output Destination tensor. Data type supported: Same as @p input + */ + void configure(ITensor *input, int axis, ITensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMax + * + * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 + * @param[in] axis Reduction axis. + * @param[in] output Destination tensor. Data type supported: Same as @p input + * + * @return A status + */ + static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + MemoryGroup _memory_group; + NEArgMinMaxLayer _reduction_kernel; + Tensor _reduced_out; + NEReshapeLayer _reshape; +}; + +/** Basic function to run arg max. */ +using NEArgMax = NEArgMinMaxStatic; +/** Basic function to run arg min. */ +using NEArgMin = NEArgMinMaxStatic; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__ */ diff --git a/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp new file mode 100644 index 0000000..5ba465b --- /dev/null +++ b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2018-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEArgMinMax.h" + +#include "arm_compute/core/CPP/Validate.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +namespace arm_compute +{ + +template +NEArgMinMaxStatic::NEArgMinMaxStatic(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _reduction_kernel(), _reduced_out(), _reshape() +{ +} + +template +Status NEArgMinMaxStatic::validate(const ITensorInfo *input, int axis, + const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, + DataType::F32); + + TensorShape out_shape = input->tensor_shape(); + const int input_dims = input->num_dimensions(); + int axis_local = axis; + + // Convert negative axis + axis_local = wrap_around(axis_local, input_dims); + + ARM_COMPUTE_RETURN_ERROR_ON(axis_local > 3); + ARM_COMPUTE_RETURN_ERROR_ON(static_cast(axis_local) > input->num_dimensions() - 1); + out_shape.remove_dimension(axis_local); + + const TensorInfo out_info = output->clone()->set_tensor_shape(out_shape); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info); + + return Status{}; +} + +template +void NEArgMinMaxStatic::configure(ITensor *input, int axis, ITensor *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input); + + int axis_local = axis; + const int input_dims = input->info()->num_dimensions(); + + // Convert negative axis + axis_local = wrap_around(axis_local, input_dims); + + // Perform reduction for axis + TensorShape intermediate_shape = input->info()->tensor_shape(); + intermediate_shape.set(axis_local, 1); + auto in = input; + + _reduced_out.allocator()->init(TensorInfo(intermediate_shape, output->info()->num_channels(), + output->info()->data_type(), + output->info()->quantization_info())); + _memory_group.manage(&_reduced_out); + _reduction_kernel.configure(in, axis_local, &_reduced_out, OP); + + // Allocate intermediate tensor + _reduced_out.allocator()->allocate(); + + // Configure reshape layer if we want to drop the dimensions + TensorShape out_shape = input->info()->tensor_shape(); + out_shape.remove_dimension(axis_local); + auto_init_if_empty(*output->info(), output->info()->clone()->set_tensor_shape(out_shape)); + _reshape.configure(&_reduced_out, output); +} + +template void NEArgMinMaxStatic::run() +{ + MemoryGroupResourceScope scope_mg(_memory_group); + + _reduction_kernel.run(); + _reshape.run(); +} + +// Supported Specializations +template class NEArgMinMaxStatic; +template class NEArgMinMaxStatic; +} // namespace arm_compute diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc index 32222d1..0293b83 100644 --- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc +++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc @@ -181,6 +181,51 @@ void KernelGenerator::visit(const model::operation::AbsNode &node) _execution_builder->append(std::move(acl_fn)); } +void KernelGenerator::visit(const model::operation::ArgMaxNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::ArgMaxNode::Input::INPUT)}; + const auto axis_index{node.param().axis_index}; + + auto ifm_shape = _ctx.at(ifm_index).shape(); + auto ofm_shape = _ctx.at(ofm_index).shape(); + auto axis_shape = _ctx.at(axis_index).shape(); + + assert(_ctx.at(axis_index).isConstant()); + // Axis rank is always 1. + assert(axis_shape.rank() == 1); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + const auto ifm_rank = ifm_shape.rank(); + auto frontend_layout = _current_subg_layout; + auto backend_layout = ifm_alloc->layout(); + int32_t axis_value = _ctx.at(axis_index).asScalar(); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + assert(axis_value >= 0 && axis_value < ifm_rank); + const auto fixed_axis = + acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); + + // auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMax>(); + + // NOTE + // if (ofm_alloc->info()->data_type() == arm_compute::DataType::S32) + //{ + ofm_alloc->info()->set_data_type(arm_compute::DataType::U32); + //} + fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle()); + // fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(), + // arm_compute::ReductionOperation::ARG_IDX_MAX); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + void KernelGenerator::visit(const model::operation::Conv2DNode &node) { using model::operation::Conv2DNode; diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.h b/runtimes/neurun/backend/acl_neon/KernelGenerator.h index 7b93c4f..28ef565 100644 --- a/runtimes/neurun/backend/acl_neon/KernelGenerator.h +++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.h @@ -37,6 +37,7 @@ public: void visit(const model::Subgraph &) override; void visit(const model::operation::AbsNode &) override; + void visit(const model::operation::ArgMaxNode &) override; void visit(const model::operation::Conv2DNode &) override; void visit(const model::operation::DepthwiseConv2DNode &) override; void visit(const model::operation::MaxPool2DNode &) override; diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc index 1fdb5db..f78b566 100644 --- a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc +++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc @@ -61,6 +61,8 @@ ShapeFixer::ShapeFixer(const neurun::model::Operands &ctx, void ShapeFixer::visit(const model::operation::AbsNode &) { /* DO NOTHING */} +void ShapeFixer::visit(const model::operation::ArgMaxNode &) { /* DO NOTHING */} + void ShapeFixer::visit(const model::operation::Conv2DNode &) { /* DO NOTHING */} void ShapeFixer::visit(const model::operation::DepthwiseConv2DNode &) { /* DO NOTHING */} diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.h b/runtimes/neurun/backend/acl_neon/ShapeFixer.h index f5c6721..796ea39 100644 --- a/runtimes/neurun/backend/acl_neon/ShapeFixer.h +++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.h @@ -38,6 +38,7 @@ public: std::shared_ptr tensor_builder() override { return _tensor_builder; } void visit(const model::operation::AbsNode &) override; + void visit(const model::operation::ArgMaxNode &) override; void visit(const model::operation::Conv2DNode &) override; void visit(const model::operation::DepthwiseConv2DNode &) override; void visit(const model::operation::MaxPool2DNode &) override; diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon index cc93f97..255ef0b 100644 --- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon +++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon @@ -3,7 +3,6 @@ # # Not support operations TrivialTest.BroadcastMulTwo -GeneratedTests.argmax* GeneratedTests.depth_to_space* GeneratedTests.dequantize GeneratedTests.embedding_lookup* @@ -28,3 +27,6 @@ GeneratedTests.exp_ex_1D_float GeneratedTests.exp_ex_2D_float # Unsupported optional input that has shape GeneratedTests.lstm2* +# Unsupported data type +GeneratedTests.argmax_ex_int32 +GeneratedTests.argmax_ex_neg_axis_int32