From: Shubham Gupta/System SW /SRI-Bangalore/Engineer/삼성전자 Date: Wed, 10 Oct 2018 02:10:00 +0000 (+0530) Subject: Implementation of Space_to_BatchND op in PACL (#2854) X-Git-Tag: 0.3~695 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3f595627c9a3160184dd1f83a1fb83547123ccbf;p=platform%2Fcore%2Fml%2Fnnfw.git Implementation of Space_to_BatchND op in PACL (#2854) This patch will acc CPU implemenation of SPACETOBATCHND for rank=4 tensors. Signed-off-by: shubham --- diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index c3996ef..c6b0ef8 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -86,6 +86,7 @@ #include "internal/layers/SquaredDifferenceOperation.h" #include "internal/layers/SimpleDepthToSpace.h" #include "internal/layers/HashtableLookupLayer.h" +#include "internal/layers/SimpleSpaceToBatchND.h" #include "util/matrix/IndexIterator.h" #include "util/kernel/IndexIterator.h" @@ -3595,8 +3596,59 @@ void Planner::visit(const ::internal::tflite::op::SpaceToDepth::Node &node) void Planner::visit(const ::internal::tflite::op::SpaceToBatchND::Node &node) { - // TODO Implement SpaceToBatch op - throw std::runtime_error("Not supported, yet"); + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input_index{node.param().input_index}; + const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index}; + const ::internal::tflite::operand::Index padding_size_index{node.param().padding_size_index}; + + // Currently, only 4D NHWC input/output op_context are supported. + // The 4D array need to have exactly 2 spatial dimensions. + // TODO: Support arbitrary dimension in SpaceToBatchND. + + assert(_ctx.at(input_index).shape().rank() == 4); + assert(_ctx.at(output_index).shape().rank() == 4); + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + _builder.addShapeConstr(input_index, + asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false), + _ctx.at(input_index).type(), _ctx.at(input_index).scale(), + _ctx.at(input_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int output_index; + int input_index; + const int32_t *block_size; + const int32_t *padding_size; + int32_t rank; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input_index = input_index.asInt(); + param.block_size = reinterpret_cast(_ctx.at(block_size_index).data().base()); + param.padding_size = reinterpret_cast(_ctx.at(padding_size_index).data().base()); + param.rank = _ctx.at(input_index).shape().rank(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index}); + + auto fn = nnfw::make_unique(); + + fn->configure(input_alloc, output_alloc, param.block_size, param.padding_size, + getARMComputeAxises(param.rank)); + builder.append("SpaceToBatchND", std::move(fn)); + + }; + + _builder.addStage(stage); } void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node) diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc new file mode 100644 index 0000000..8282683 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleSpaceToBatchND.h" + +#include + +void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + const int32_t *block_size, const int32_t *padding_size, + const ::arm_compute::Coordinates &axises = {3, 1, 0, 2}) +{ + const auto rank = axises.num_dimensions(); + assert(rank == 4); + + for (int i = 0; i < rank; ++i) + { + assert(axises[i] >= 0); + assert(axises[i] < rank); + } + + _input = input; + _output = output; + _block_size = block_size; + _padding_size = padding_size; + _axises = axises; +} + +inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w, + int32_t d, const ::arm_compute::Coordinates &axises) +{ + // b, h, w, d >= 0 + size_t indexes[4]; + indexes[axises[0]] = b; + indexes[axises[1]] = h; + indexes[axises[2]] = w; + indexes[axises[3]] = d; + + int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0]; + offset += indexes[2] * shape[1] * shape[0]; + offset += indexes[1] * shape[0]; + offset += indexes[0]; + return offset; +} + +template +inline void SpaceToBatchND(const T *input_data, const ::arm_compute::TensorShape &input_shape, + const int32_t *block_size_data, const int32_t *padding_size_data, + T *output_data, const ::arm_compute::TensorShape &output_shape, + const ::arm_compute::Coordinates &axises) +{ + const int input_batch = input_shape[axises[0]]; + const int input_height = input_shape[axises[1]]; + const int input_width = input_shape[axises[2]]; + + const int output_batch = output_shape[axises[0]]; + const int output_height = output_shape[axises[1]]; + const int output_width = output_shape[axises[2]]; + const int depth = output_shape[axises[3]]; + + const int padded_height = input_height + padding_size_data[0] + padding_size_data[1]; + const int padded_width = input_width + padding_size_data[2] + padding_size_data[3]; + + assert(padded_height % block_size_data[0] == 0); + assert(padded_width % block_size_data[1] == 0); + + for (int in_b = 0; in_b < input_batch; ++in_b) + { + for (int in_h = 0; in_h < padded_height; ++in_h) + { + for (int in_w = 0; in_w < padded_width; ++in_w) + { + for (int in_d = 0; in_d < depth; ++in_d) + { + const int out_d = in_d; + const int out_h = in_h / block_size_data[0]; + const int out_w = in_w / block_size_data[1]; + const int out_b = + in_b + + ((in_h % block_size_data[0]) * block_size_data[1] + in_w % block_size_data[1]) * + input_batch; + + const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises); + + if (in_h < padding_size_data[0] || in_h >= (input_height + padding_size_data[0]) || + in_w < padding_size_data[2] || in_w >= (input_width + padding_size_data[2])) + { + output_data[output_index] = 0; + } + else + { + const int input_index = Offset4D(input_shape, in_b, in_h - padding_size_data[0], + in_w - padding_size_data[2], in_d, axises); + output_data[output_index] = input_data[input_index]; + } + } + } + } + } +} +void SimpleSpaceToBatchND::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_output)->map(q); + } + + auto input_buf = _input->buffer(); + auto output_buf = _output->buffer(); + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::U8: + case ::arm_compute::DataType::QASYMM8: + SpaceToBatchND(reinterpret_cast(input_buf), _input->info()->tensor_shape(), + _block_size, _padding_size, reinterpret_cast(output_buf), + _output->info()->tensor_shape(), _axises); + break; + case ::arm_compute::DataType::F32: + SpaceToBatchND(reinterpret_cast(input_buf), _input->info()->tensor_shape(), + _block_size, _padding_size, reinterpret_cast(output_buf), + _output->info()->tensor_shape(), _axises); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_output)->unmap(q); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h new file mode 100644 index 0000000..0230b26 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_SPACE_TO_BATCHND_H__ +#define __SIMPLE_SPACE_TO_BATCHND_H__ + +#include "internal/arm_compute.h" +#include +#include + +class SimpleSpaceToBatchND : public ::arm_compute::IFunction +{ +public: + /** Initialise input and output + * + * @param[in] input First tensor input. + * @param[out] output Output tensor. + * @param[in] block_size Block size. + * @param[in] padding_size Padding size. + */ + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + const int32_t *block_size, const int32_t *padding_size, + const ::arm_compute::Coordinates &axises); + + void run() override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_output; + const int32_t *_block_size; + const int32_t *_padding_size; + ::arm_compute::Coordinates _axises; +}; + +#endif /*__SIMPLE_SPACE_TO_BATCHND_H__ */