From db8a3ca25f813578ce5a7c47e29e20b470faaad9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nikita=20Sizov/AI=20Tools=20Lab=20/SRR/Professional/?= =?utf8?q?=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Wed, 31 Jul 2019 13:07:29 +0300 Subject: [PATCH] ACL NEON Div Node (#5948) Add support of DivNode for ACL NEON Signed-off-by: Sizov Nikita --- .../neurun/backend/acl_neon/KernelGenerator.cc | 25 ++++++++++++++++++++-- runtimes/neurun/backend/acl_neon/ShapeFixer.cc | 17 +++++++++++++++ runtimes/neurun/backend/acl_neon/ShapeFixer.h | 1 + tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon | 1 - 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc index 39e1173..e7d7eda 100644 --- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc +++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc @@ -872,8 +872,29 @@ void KernelGenerator::visit(const model::operation::AddNode &node) void KernelGenerator::visit(const model::operation::DivNode &node) { - (void)node; - throw std::runtime_error("Not supported, yet"); + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(model::operation::DivNode::Input::LHS)}; + const auto rhs_index{node.getInputs().at(model::operation::DivNode::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseDivision>(); + + l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); } void KernelGenerator::visit(const model::operation::ExpNode &node) diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc index 18455f6..c3d158e 100644 --- a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc +++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc @@ -209,6 +209,23 @@ void ShapeFixer::visit(const model::operation::AddNode &node) } } +void ShapeFixer::visit(const model::operation::DivNode &node) +{ + const auto lhs_index{node.getInputs().at(model::operation::DivNode::Input::LHS)}; + const auto rhs_index{node.getInputs().at(model::operation::DivNode::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<::neurun::model::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<::neurun::model::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + } // namespace acl_neon } // namespace backend } // namespace neurun diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.h b/runtimes/neurun/backend/acl_neon/ShapeFixer.h index 5d1758b..e038d37 100644 --- a/runtimes/neurun/backend/acl_neon/ShapeFixer.h +++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.h @@ -58,6 +58,7 @@ public: void visit(const model::operation::SquaredDifferenceNode &) override; void visit(const model::operation::SubNode &) override; void visit(const model::operation::AddNode &) override; + void visit(const model::operation::DivNode &) override; void visit(const model::operation::ComparisonNode &) override; private: diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon index be9e02c..fe4625d 100644 --- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon +++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon @@ -51,7 +51,6 @@ GeneratedTests.space_to_depth* GeneratedTests.svdf* GeneratedTests.tanh_ GeneratedTests.batch_to_space* -GeneratedTests.div_* GeneratedTests.space_to_batch* GeneratedTests.strided_slice* GeneratedTests.transpose* -- 2.7.4