From 19db3f9a095b6dd18d555bf18a40b515076c944c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Prasanna=20R/SNAP=20/SRI-Bangalore/Engineer/=EC=82=BC?= =?utf8?q?=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Mon, 3 Dec 2018 07:52:56 +0530 Subject: [PATCH] Implement SQRT_EX op in CPU (#3760) This patch implements SQRT_EX op in CPU. Related issue: #3459 Signed-off-by: prasannar --- runtimes/pure_arm_compute/src/compilation.cc | 25 +++++++--- .../src/internal/layers/SimpleSQRT.cc | 54 ++++++++++++++++++++++ .../src/internal/layers/SimpleSQRT.h | 41 ++++++++++++++++ 3 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc create mode 100644 runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index 133f075..4f4d9da 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -104,6 +104,7 @@ #include "internal/layers/SimplePackLayer.h" #include "internal/layers/SimpleSpaceToBatchND.h" #include "internal/layers/SimpleNeg.h" +#include "internal/layers/SimpleSQRT.h" #include "internal/layers/ArgMinMax.h" #include "util/matrix/IndexIterator.h" @@ -3633,21 +3634,33 @@ void Planner::visit(const ::internal::tflite::op::SQRT::Node &node) const ::arm_compute::ActivationLayerInfo act_info{ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; - if (::internal::arm_compute::isGpuMode()) + if (from_env(std::getenv("USE_SIMPLE_SQRT"))) { - auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); + // USE CPU VERSION OF SQRT + auto fn = nnfw::make_unique(); - fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info); + fn->configure(input_alloc, output_alloc); builder.append("SQRT", std::move(fn)); } else { - auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>(); + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); - fn->configure(input_alloc, output_alloc, act_info); + fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info); - builder.append("SQRT", std::move(fn)); + builder.append("SQRT", std::move(fn)); + } + else + { + auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(input_alloc, output_alloc, act_info); + + builder.append("SQRT", std::move(fn)); + } } }; diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc new file mode 100644 index 0000000..32d2828 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleSQRT.h" + +#include +#include + +void SimpleSQRT::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output) +{ + _input = input; + _output = output; +} + +void SimpleSQRT::run() +{ + auto &queue = ::arm_compute::CLScheduler::get().queue(); + if (::internal::arm_compute::isGpuMode()) + { + CAST_CL(_input)->map(queue); + CAST_CL(_output)->map(queue); + } + + arm_compute::Window window; + window.use_tensor_dimensions(_output->info()->tensor_shape()); + + execute_window_loop(window, [this](const arm_compute::Coordinates &id) { + // NOTE Must be two input tensors of identical type + // Must be output tensor of the same type as input0. + assert(_input->info()->data_type() == _output->info()->data_type()); + + const auto input_value = *reinterpret_cast(_input->ptr_to_element(id)); + *reinterpret_cast(_output->ptr_to_element(id)) = sqrt(input_value); + }); + + if (::internal::arm_compute::isGpuMode()) + { + CAST_CL(_input)->unmap(queue); + CAST_CL(_output)->unmap(queue); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h new file mode 100644 index 0000000..ec2e7b0 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_SQRT_H__ +#define __SIMPLE_SQRT_H__ + +#include "internal/arm_compute.h" +#include +#include + +class SimpleSQRT : public ::arm_compute::IFunction +{ +public: + SimpleSQRT(void) : _input(nullptr), _output(nullptr) + { + // DO NOTHING + } + + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output); + + void run() override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_output; +}; + +#endif /*__SIMPLE_SQRT_H__ */ -- 2.7.4