From 0fe725cb4103df8ee02238bb3954864281d53f03 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EC=9E=A5=EC=A7=80=EC=84=AD/On-Device=20Lab=28SR=29/Enginee?= =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Mon, 23 Sep 2019 14:32:15 +0900 Subject: [PATCH] Make to support EmbeddingLookup op for acl neon (#7663) This commit makes to support EmbeddingLookup op for acl neon. - Introduce NEEmbeddingLookup NEEmbeddingLookupKernel - Apply NEEmbeddingLookup layer for neurun Signed-off-by: jiseob.jang --- .../core/NEON/kernels/NEEmbeddingLookupKernel.h | 79 ++++++++++++++ .../arm_compute/runtime/NEON/NEFunctionsEx.h | 1 + .../runtime/NEON/functions/NEEmbeddingLookup.h | 65 ++++++++++++ .../core/NEON/kernels/NEEmbeddingLookupKernel.cpp | 118 +++++++++++++++++++++ .../runtime/NEON/functions/NEEmbeddingLookup.cpp | 29 +++++ .../neurun/backend/acl_neon/KernelGenerator.cc | 21 ++++ runtimes/neurun/backend/acl_neon/KernelGenerator.h | 1 + runtimes/neurun/backend/acl_neon/ShapeFixer.cc | 9 ++ runtimes/neurun/backend/acl_neon/ShapeFixer.h | 1 + tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon | 1 - .../neurun_frameworktest_list.armv7l.acl_neon.txt | 1 + 11 files changed, 325 insertions(+), 1 deletion(-) create mode 100644 runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h create mode 100644 runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h create mode 100644 runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp create mode 100644 runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h new file mode 100644 index 0000000..1490e75 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__ +#define __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform EmbeddingLookup operation */ +class NEEmbeddingLookupKernel : public INEKernel +{ +public: + const char *name() const override { return "NEEmbeddingLookupKernel"; } + /** Default constructor */ + NEEmbeddingLookupKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEEmbeddingLookupKernel(const NEEmbeddingLookupKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEEmbeddingLookupKernel &operator=(const NEEmbeddingLookupKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEmbeddingLookupKernel(NEEmbeddingLookupKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEmbeddingLookupKernel &operator=(NEEmbeddingLookupKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] lookups Lookups are 1D tensor that values are indices into the first dimension of + * input. + */ + void configure(const ITensor *input, ITensor *output, const ITensor *lookups); + /** Static function to check if given info will lead to a valid configuration of @ref + * NEEmbeddingLookupKernel + * + * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] output Destination tensor. Data types supported: same as @p input. + * @param[in] lookups Lookups info. Data types supported: S32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *lookups); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + const ITensor *_lookups; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h index 6be03f2..c6e80ba 100644 --- a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h new file mode 100644 index 0000000..0646f16 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file NEEmbeddingLookup.h + * @ingroup COM_AI_RUNTIME + * @brief This file contains arm_compute::NEEmbeddingLookup class + */ + +#ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ +#define __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** + * @brief Class to perform EmbeddingLookup operation + */ +class NEEmbeddingLookup : public INESimpleFunctionNoBorder +{ +public: + /** + * @brief Set the input and output tensors. + * @param[in] input Source tensor. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p + * input. + * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of + * input. Data types supported: S32. + * @return N/A + */ + void configure(const ITensor *input, ITensor *output, const ITensor *lookups); + /** Static function to check if given info will lead to a valid configuration of @ref NECopy + * + * @param[in] input Source tensor info. Data types supported: + * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] output Lookups tensor info. Data types supported: S32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *lookups); +}; +} +#endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ */ diff --git a/runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp b/runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp new file mode 100644 index 0000000..5401afe --- /dev/null +++ b/runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2018-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +using namespace arm_compute; + +NEEmbeddingLookupKernel::NEEmbeddingLookupKernel() + : _input(nullptr), _lookups(nullptr), _output(nullptr) +{ +} + +void NEEmbeddingLookupKernel::configure(const ITensor *input, ITensor *output, + const ITensor *lookups) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), lookups->info())); + + _input = input; + _output = output; + _lookups = lookups; + + // Auto initialize output if not initialized + auto out_shape = input->info()->tensor_shape(); + out_shape.set(out_shape.num_dimensions() - 1, lookups->info()->num_dimensions()); + auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type(), + input->info()->quantization_info()); + + INEKernel::configure(calculate_max_window(*output->info())); +} + +Status NEEmbeddingLookupKernel::validate(const arm_compute::ITensorInfo *input, + const arm_compute::ITensorInfo *output, + const arm_compute::ITensorInfo *lookups) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( + input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); + + ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4); + ARM_COMPUTE_ERROR_ON(lookups->num_dimensions() > 1); + + // Validate in case of configured output + if (output->total_size() > 0) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON(input->num_dimensions() != output->num_dimensions()); + ARM_COMPUTE_ERROR_ON(output->dimension(output->num_dimensions() - 1) != lookups->dimension(0)); + for (size_t i = 0; i < output->num_dimensions() - 1; ++i) + { + ARM_COMPUTE_ERROR_ON(input->dimension(i) != output->dimension(i)); + } + } + + return Status{}; +} + +void NEEmbeddingLookupKernel::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + + const size_t lookup_dim = _output->info()->num_dimensions() - 1; + + Window output_window{window}; + output_window.set(Window::DimX, + Window::Dimension(output_window.x().start(), output_window.x().end(), + _input->info()->dimension(0))); + + Window out_slice = output_window.first_slice_window_4D(); + do + { + Iterator output_it(_output, out_slice); + + execute_window_loop(out_slice, + [&](const Coordinates &id) { + const int32_t lookup = *reinterpret_cast( + _lookups->ptr_to_element(Coordinates{id[lookup_dim]})); + Coordinates input_id{id}; + input_id.set(lookup_dim, lookup); + memcpy(output_it.ptr(), _input->ptr_to_element(input_id), + _output->info()->dimension(0) * _output->info()->element_size()); + }, + output_it); + + } while (window.slide_window_slice_4D(out_slice)); +} diff --git a/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp new file mode 100644 index 0000000..00c3ed9 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h" + +#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output, lookups); + _kernel = std::move(k); +} diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc index a3930e0..cce6efc 100644 --- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc +++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc @@ -567,6 +567,27 @@ void KernelGenerator::visit(const model::operation::ConcatNode &node) _execution_builder->append(std::move(acl_fn)); } +void KernelGenerator::visit(const model::operation::EmbeddingLookupNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lookups_index{ + node.getInputs().at(model::operation::EmbeddingLookupNode::Input::LOOKUPS)}; + const auto values_index{ + node.getInputs().at(model::operation::EmbeddingLookupNode::Input::VALUES)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto lookups_alloc = _tensor_builder->at(lookups_index).get(); + auto values_alloc = _tensor_builder->at(values_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEEmbeddingLookup>(); + + fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + void KernelGenerator::visit(const model::operation::FloorNode &node) { const auto ofm_index{node.getOutputs().at(0)}; diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.h b/runtimes/neurun/backend/acl_neon/KernelGenerator.h index 9d55777..fe8f312 100644 --- a/runtimes/neurun/backend/acl_neon/KernelGenerator.h +++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.h @@ -46,6 +46,7 @@ public: void visit(const model::operation::MeanNode &) override; void visit(const model::operation::AvgPool2DNode &) override; void visit(const model::operation::ConcatNode &) override; + void visit(const model::operation::EmbeddingLookupNode &) override; void visit(const model::operation::FloorNode &) override; void visit(const model::operation::FullyConnectedNode &) override; void visit(const model::operation::L2NormalizationNode &) override; diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc index f598d91..9e052d3 100644 --- a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc +++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc @@ -85,6 +85,15 @@ void ShapeFixer::visit(const model::operation::ConcatNode &node) _tensor_builder->dimCorrection(inputs, false); } +void ShapeFixer::visit(const model::operation::EmbeddingLookupNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto values_index{ + node.getInputs().at(model::operation::EmbeddingLookupNode::Input::VALUES)}; + _tensor_builder->dimCorrection(values_index, false); + _tensor_builder->dimCorrection(output_index, false); +} + void ShapeFixer::visit(const model::operation::ExpNode &) { /* DO NOTHING */} void ShapeFixer::visit(const model::operation::FloorNode &) { /* DO NOTHING */} diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.h b/runtimes/neurun/backend/acl_neon/ShapeFixer.h index 7ec54a5..e8dada6 100644 --- a/runtimes/neurun/backend/acl_neon/ShapeFixer.h +++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.h @@ -47,6 +47,7 @@ public: void visit(const model::operation::MeanNode &) override; void visit(const model::operation::AvgPool2DNode &) override; void visit(const model::operation::ConcatNode &) override; + void visit(const model::operation::EmbeddingLookupNode &) override; void visit(const model::operation::ExpNode &) override; void visit(const model::operation::FloorNode &) override; void visit(const model::operation::FullyConnectedNode &) override; diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon index 3cc1089..2a09308 100644 --- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon +++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon @@ -4,7 +4,6 @@ # Not support operations TrivialTest.BroadcastMulTwo GeneratedTests.dequantize -GeneratedTests.embedding_lookup* GeneratedTests.hashtable_lookup* GeneratedTests.lsh_projection* GeneratedTests.mobilenet* diff --git a/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt b/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt index bb6b322..cd98e66 100644 --- a/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt +++ b/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt @@ -4,6 +4,7 @@ concat conv_2d depthwise_conv_2d div +embedding_lookup floor fullyconnected/fc1 l2_normalization -- 2.7.4