From 999bccb29d429261390159440b8479852c3217eb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Prasanna=20R/System=20SW=20/SRI-Bangalore/Engineer/?= =?utf8?q?=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Mon, 1 Oct 2018 11:38:57 +0530 Subject: [PATCH] Implement SquaredDifference in PACL (#2815) This patch implements Squared Difference using Sub and Square kernel. This patch is added as a replacement to previous patch #2570, which realizes Squared Difference using Sub and Mul. Signed-off-by: prasannar --- runtimes/pure_arm_compute/src/compilation.cc | 22 +++----------- .../internal/layers/SquaredDifferenceOperation.cc | 35 +++++++++++++++------- .../internal/layers/SquaredDifferenceOperation.h | 14 +++++---- 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index 54fa5eb..a74469c 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -3386,26 +3386,12 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node) auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index}); auto rhs_alloc = ctx.at(::internal::tflite::operand::Index{param.rhs_index}); - if (::internal::arm_compute::isGpuMode()) - { - auto fn = nnfw::make_unique(); - - // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification - fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE, 1.0, - ::arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - - builder.append("SquaredDifference", std::move(fn)); - } - else // NEON - { - auto fn = nnfw::make_unique(); + auto fn = nnfw::make_unique(); - // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification - fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE, 1.0, - ::arm_compute::RoundingPolicy::TO_ZERO); + // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification + fn->configure(lhs_alloc, rhs_alloc, ofm_alloc); - builder.append("SquaredDifference", std::move(fn)); - } + builder.append("SquaredDifference", std::move(fn)); ActivationBuilder{builder}.append(param.activation, ofm_alloc); }; diff --git a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc index 2b670bd..e9120a2 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc +++ b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc @@ -3,9 +3,7 @@ void SquaredDifferenceOperation::configure(::arm_compute::ITensor *input1, ::arm_compute::ITensor *input2, - ::arm_compute::ITensor *output, - ::arm_compute::ConvertPolicy ConvertPolicy, float scale, - ::arm_compute::RoundingPolicy RoundingPolicy) + ::arm_compute::ITensor *output) { _input1 = input1; _input2 = input2; @@ -13,14 +11,31 @@ void SquaredDifferenceOperation::configure(::arm_compute::ITensor *input1, if (::internal::arm_compute::isGpuMode()) { - _cl_sub.configure(CAST_CL(input1), CAST_CL(input2), CAST_CL(output), ConvertPolicy); - _cl_mul.configure(CAST_CL(output), CAST_CL(output), CAST_CL(output), scale, ConvertPolicy, - RoundingPolicy); + _cl_intermediate.allocator()->init(*input1->info()); + + _cl_sub.configure(CAST_CL(input1), CAST_CL(input2), &_cl_intermediate, + ::arm_compute::ConvertPolicy::SATURATE); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::SQUARE}; + + _cl_act.configure(&_cl_intermediate, CAST_CL(output), act_info); + + _cl_intermediate.allocator()->allocate(); } else { - _neon_sub.configure(input1, input2, output, ConvertPolicy); - _neon_mul.configure(output, output, output, scale, ConvertPolicy, RoundingPolicy); + _neon_intermediate.allocator()->init(*input1->info()); + + _neon_sub.configure(input1, input2, &_neon_intermediate, + ::arm_compute::ConvertPolicy::SATURATE); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::SQUARE}; + + _neon_act.configure(&_neon_intermediate, output, act_info); + + _neon_intermediate.allocator()->allocate(); } } @@ -29,11 +44,11 @@ void SquaredDifferenceOperation::run(void) if (::internal::arm_compute::isGpuMode()) { _cl_sub.run(); - _cl_mul.run(); + _cl_act.run(); } else { _neon_sub.run(); - _neon_mul.run(); + _neon_act.run(); } } diff --git a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h index 3782c4e..b9738c9 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h +++ b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h @@ -5,16 +5,15 @@ #include #include -#include +#include #include -#include +#include class SquaredDifferenceOperation : public ::arm_compute::IFunction { public: void configure(::arm_compute::ITensor *input1, ::arm_compute::ITensor *input2, - ::arm_compute::ITensor *output, ::arm_compute::ConvertPolicy ConvertPolicy, - float scale, ::arm_compute::RoundingPolicy RoundingPolicy); + ::arm_compute::ITensor *output); public: void run(void) override; @@ -23,13 +22,16 @@ private: ::arm_compute::ITensor *_input1; ::arm_compute::ITensor *_input2; + ::arm_compute::CLTensor _cl_intermediate; + ::arm_compute::Tensor _neon_intermediate; + ::arm_compute::ITensor *_output; private: ::arm_compute::CLArithmeticSubtraction _cl_sub; - ::arm_compute::CLPixelWiseMultiplication _cl_mul; + ::arm_compute::CLActivationLayer _cl_act; ::arm_compute::NEArithmeticSubtraction _neon_sub; - ::arm_compute::NEPixelWiseMultiplication _neon_mul; + ::arm_compute::NEActivationLayer _neon_act; }; #endif // __SQUARED_DIFFERENCE_OPERATION_H__ -- 2.7.4