From ad6c37d2bb56ad36fca25a73edaf122dec2964dd Mon Sep 17 00:00:00 2001 From: =?utf8?q?Shubham=20Gupta/System=20SW=20/SRI-Bangalore/Engineer/?= =?utf8?q?=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Tue, 23 Oct 2018 05:39:55 +0530 Subject: [PATCH] PAD op in PACL as CPU Fallback (#2857) This patch will add PAD op in PACL as CPU version Signed-off-by: shubham --- runtimes/pure_arm_compute/src/compilation.cc | 49 ++++--- .../src/internal/layers/SimplePadLayer.cc | 142 +++++++++++++++++++++ .../src/internal/layers/SimplePadLayer.h | 46 +++++++ 3 files changed, 222 insertions(+), 15 deletions(-) create mode 100644 runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc create mode 100644 runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index e79b581..d589e04 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -88,9 +88,9 @@ #include "internal/arm_compute/tensor/View.h" #include "internal/layers/GenericReshapeLayer.h" #include "internal/layers/SimpleArithmeticAddition.h" +#include "internal/layers/SimplePadLayer.h" #include "internal/layers/SimpleCastLayer.h" #include "internal/layers/GenericFullyConnectedLayer.h" -#include "internal/layers/PadLayer.h" #include "internal/layers/SimpleSpaceToDepth.h" #include "internal/layers/SimpleEmbeddingLookup.h" #include "internal/layers/SimpleDepthToSpace.h" @@ -3636,6 +3636,10 @@ void Planner::visit(const ::internal::tflite::op::Pad::Node &node) const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; const ::internal::tflite::operand::Index paddings_index{node.param().paddings_index}; + const auto paddings_shape = _ctx.at(paddings_index).shape().asTensor(); + + assert(_ctx.at(paddings_index).hasData() == true); + // Set Shape Constraints and TensorInfo _builder.addShapeConstr( ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), @@ -3648,37 +3652,52 @@ void Planner::visit(const ::internal::tflite::op::Pad::Node &node) asTensorInfo(asTensorShape(_ctx.at(paddings_index).shape()), _ctx.at(paddings_index).type(), _ctx.at(paddings_index).scale(), _ctx.at(paddings_index).zeroPoint())); + // initializer for padding + { + auto pad_type = _ctx.at(paddings_index).type(); + + if (pad_type == ANEURALNETWORKS_TENSOR_INT32) + { + auto pad_base = _ctx.at(paddings_index).data().base(); + auto pad_size = _ctx.at(paddings_index).data().size(); + auto pad_shape = _ctx.at(paddings_index).shape().asMatrix(); + + // Supported padding for height and width only. + auto initializer = std::bind(initMatrixTensor, _1, pad_shape, pad_base, pad_size); + _builder.addInitializer(paddings_index, initializer); + } + else + { + throw std::runtime_error("Only Int32 datatype is supported for Pad values"); + } + } + // Construct operation parameters struct Param { int ofm_index; int ifm_index; - int32_t padding_size; + int padding_index; }; Param param; param.ofm_index = ofm_index.asInt(); param.ifm_index = ifm_index.asInt(); - - assert(_ctx.at(paddings_index).hasData() == true); - - // TODO: Currently we are supporting uniform padding for the tensor, so only a single - // value is being read. (TOP = BOTTOM = LEFT = RIGHT). - // Need to read padding values for all the sides (TOP, BOTTOM, LEFT & RIGHT) - - const auto &padding_data = _ctx.at(paddings_index).data(); - auto base = padding_data.base(); - auto padsize = reinterpret_cast(base) + 3; - param.padding_size = *padsize; + param.padding_index = paddings_index.asInt(); auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + auto pad_alloc = ctx.at(::internal::tflite::operand::Index{param.padding_index}); + + auto fn = nnfw::make_unique(); + + // only 4d Tensors are supported + int rank = 4; - auto fn = nnfw::make_unique(); + fn->configure(ifm_alloc, ofm_alloc, pad_alloc, getARMComputeAxises(rank)); - fn->configure(ifm_alloc, ofm_alloc, param.padding_size); builder.append("Pad", std::move(fn)); }; diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc new file mode 100644 index 0000000..65bb512 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimplePadLayer.h" +#include + +void SimplePadLayer::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + ::arm_compute::ITensor *padding_size, + const ::arm_compute::Coordinates &axises) +{ + + const auto rank = axises.num_dimensions(); + assert(rank == 4); + assert(input != nullptr && output != nullptr && padding_size != nullptr); + + for (int i = 0; i < rank; ++i) + { + assert(axises[i] >= 0); + assert(axises[i] < rank); + } + + _input = input; + _output = output; + _padding_size = padding_size; + _axises = axises; +} + +template +inline void ApplyPadding(const ::arm_compute::ITensor *input_data, + const ::arm_compute::TensorShape &input_shape, + const ::arm_compute::ITensor *padding_size, + ::arm_compute::ITensor *output_data, + const ::arm_compute::TensorShape &output_shape, + const ::arm_compute::Coordinates &axises) +{ + const int input_height = input_shape[axises[1]]; + const int input_width = input_shape[axises[2]]; + + const int batch = output_shape[axises[0]]; + const int output_height = output_shape[axises[1]]; + const int output_width = output_shape[axises[2]]; + const int depth = output_shape[axises[3]]; + + // Supports only Spatial padding + // Padding size for top, bottom, left and right are required. + auto pad_top = *reinterpret_cast(padding_size->ptr_to_element({0, 1})); + auto pad_bottom = *reinterpret_cast(padding_size->ptr_to_element({1, 1})); + auto pad_left = *reinterpret_cast(padding_size->ptr_to_element({0, 2})); + auto pad_right = *reinterpret_cast(padding_size->ptr_to_element({1, 2})); + + const int padded_height = input_height + pad_top + pad_bottom; + const int padded_width = input_width + pad_left + pad_right; + + { // new block for assertions + assert(input_shape[axises[0]] == output_shape[axises[0]]); + assert(padded_height == output_height); + assert(padded_width == output_width); + assert(input_shape[axises[3]] == output_shape[axises[3]]); + } + + for (int in_b = 0; in_b < batch; ++in_b) + { + for (int in_h = 0; in_h < padded_height; ++in_h) + { + for (int in_w = 0; in_w < padded_width; ++in_w) + { + for (int in_d = 0; in_d < depth; ++in_d) + { + const int out_d = in_d; + const int out_h = in_h; + const int out_w = in_w; + const int out_b = in_b; + + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); + + if (in_h < pad_top || in_h >= (input_height + pad_top) || in_w < pad_left || + in_w >= (pad_left + input_width)) + { + *reinterpret_cast(output_data->ptr_to_element(output_id)) = 0; + } + else + { + auto input_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{in_b, in_h - pad_top, in_w - pad_left, in_d}, axises); + *reinterpret_cast(output_data->ptr_to_element(output_id)) = + *reinterpret_cast(input_data->ptr_to_element(input_id)); + } + } + } + } + } +} +void SimplePadLayer::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_output)->map(q); + CAST_CL(_padding_size)->map(q); + } + + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::U8: + case ::arm_compute::DataType::QASYMM8: + ApplyPadding(_input, _input->info()->tensor_shape(), _padding_size, _output, + _output->info()->tensor_shape(), _axises); + break; + case ::arm_compute::DataType::F32: + ApplyPadding(_input, _input->info()->tensor_shape(), _padding_size, _output, + _output->info()->tensor_shape(), _axises); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_output)->unmap(q); + CAST_CL(_padding_size)->unmap(q); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h new file mode 100644 index 0000000..e636a7c --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_PAD_LAYER_H__ +#define __SIMPLE_PAD_LAYER_H__ + +#include "internal/arm_compute.h" +#include "internal/arm_compute/Cast.h" +#include +#include + +class SimplePadLayer : public ::arm_compute::IFunction +{ +public: + SimplePadLayer(void) : _input(nullptr), _output(nullptr), _padding_size(nullptr), _axises{} + { + // DO NOTHING + } + + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + ::arm_compute::ITensor *padding_size, + const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); + + void run(void) override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_output; + ::arm_compute::ITensor *_padding_size; + ::arm_compute::Coordinates _axises; +}; + +#endif // __SIMPLE_PAD_LAYER_H__ -- 2.7.4