From f916d45179fd382b8f8f30022bba639472b27c30 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Prasanna=20R/System=20SW=20/SRI-Bangalore/Engineer/?= =?utf8?q?=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Tue, 16 Oct 2018 15:59:10 +0530 Subject: [PATCH] Implement BATCH_TO_SPACE_ND in runtime (#3101) This patch implements BATCH_TO_SPACE_ND in runtime. Signed-off-by: prasannar --- runtimes/pure_arm_compute/src/compilation.cc | 61 +++++++++- .../src/internal/layers/SimpleBatchToSpaceNd.cc | 131 +++++++++++++++++++++ .../src/internal/layers/SimpleBatchToSpaceNd.h | 45 +++++++ 3 files changed, 235 insertions(+), 2 deletions(-) create mode 100644 runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc create mode 100644 runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index 05a9cba..588a284 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -85,6 +85,7 @@ #include "internal/layers/SimpleSpaceToDepth.h" #include "internal/layers/SimpleEmbeddingLookup.h" #include "internal/layers/SimpleDepthToSpace.h" +#include "internal/layers/SimpleBatchToSpaceNd.h" #include "internal/layers/HashtableLookupLayer.h" #include "internal/layers/SimpleSpaceToBatchND.h" #include "internal/layers/SimpleNeg.h" @@ -3716,8 +3717,64 @@ void Planner::visit(const ::internal::tflite::op::SpaceToBatchND::Node &node) void Planner::visit(const ::internal::tflite::op::BatchToSpaceNd::Node &node) { - // TODO Implement BatchToSpace op - throw std::runtime_error("Not supported, yet"); + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input_index{node.param().input_index}; + const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index}; + + assert(_ctx.at(input_index).shape().rank() == 4); + assert(_ctx.at(output_index).shape().rank() == 4); + + const int32_t *block_size = + reinterpret_cast(_ctx.at(block_size_index).data().base()); + + const auto &output_shape = _ctx.at(output_index).shape(); + const auto &input_shape = _ctx.at(input_index).shape(); + + assert(block_size[0] > 0 && block_size[1] > 0); + { + assert(output_shape.dim(3) == input_shape.dim(3)); + assert(output_shape.dim(1) == input_shape.dim(1) * block_size[0]); + assert(output_shape.dim(2) == input_shape.dim(2) * block_size[1]); + assert(output_shape.dim(0) == input_shape.dim(0) / (block_size[0] * block_size[1])); + } + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr( + output_index, asTensorInfo(asTensorShape(output_shape, false), _ctx.at(output_index).type(), + _ctx.at(output_index).scale(), _ctx.at(output_index).zeroPoint())); + _builder.addShapeConstr( + input_index, asTensorInfo(asTensorShape(input_shape, false), _ctx.at(input_index).type(), + _ctx.at(input_index).scale(), _ctx.at(input_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int output_index; + int input_index; + const int32_t *block_size; + int32_t rank; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input_index = input_index.asInt(); + param.block_size = block_size; + param.rank = _ctx.at(input_index).shape().rank(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index}); + + auto fn = nnfw::make_unique(); + + fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(param.rank)); + + builder.append("BatchToSpaceND", std::move(fn)); + + }; + + _builder.addStage(stage); } void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node) diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc new file mode 100644 index 0000000..d485e8a --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleBatchToSpaceNd.h" + +#include + +void SimpleBatchToSpaceND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + const int32_t *block_size, + const ::arm_compute::Coordinates &axises = {3, 1, 0, 2}) +{ + const auto rank = axises.num_dimensions(); + assert(rank == 4); + + for (int i = 0; i < rank; ++i) + assert(axises[i] >= 0 && axises[i] < rank); + + _input = input; + _output = output; + _block_size = block_size; + _axises = axises; +} + +inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w, + int32_t d, const ::arm_compute::Coordinates &axises) +{ + // b, h, w, d >= 0 + size_t indexes[4]; + indexes[axises[0]] = b; + indexes[axises[1]] = h; + indexes[axises[2]] = w; + indexes[axises[3]] = d; + + int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0]; + offset += indexes[2] * shape[1] * shape[0]; + offset += indexes[1] * shape[0]; + offset += indexes[0]; + return offset; +} + +template +inline void BatchToSpaceND(const T *input_data, const ::arm_compute::TensorShape &input_shape, + const int32_t *block_size_data, T *output_data, + const ::arm_compute::TensorShape &output_shape, + const ::arm_compute::Coordinates &axises) +{ + const int input_batch = input_shape[axises[0]]; + const int input_height = input_shape[axises[1]]; + const int input_width = input_shape[axises[2]]; + + const int output_batch = output_shape[axises[0]]; + const int output_height = output_shape[axises[1]]; + const int output_width = output_shape[axises[2]]; + const int depth = output_shape[axises[3]]; + + for (int out_b = 0; out_b < output_batch; ++out_b) + { + for (int out_h = 0; out_h < output_height; ++out_h) + { + for (int out_w = 0; out_w < output_width; ++out_w) + { + for (int out_d = 0; out_d < depth; ++out_d) + { + const int in_d = out_d; + const int in_h = out_h / block_size_data[0]; + const int in_w = out_w / block_size_data[1]; + const int in_b = + out_b + + ((out_h % block_size_data[0]) * block_size_data[1] + out_w % block_size_data[1]) * + output_batch; + + const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises); + const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises); + + output_data[output_index] = input_data[input_index]; + } + } + } + } +} +void SimpleBatchToSpaceND::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_output)->map(q); + } + + auto input_buf = _input->buffer(); + auto output_buf = _output->buffer(); + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::U8: + case ::arm_compute::DataType::QASYMM8: + BatchToSpaceND(reinterpret_cast(input_buf), _input->info()->tensor_shape(), + _block_size, reinterpret_cast(output_buf), + _output->info()->tensor_shape(), _axises); + break; + case ::arm_compute::DataType::F32: + BatchToSpaceND(reinterpret_cast(input_buf), _input->info()->tensor_shape(), + _block_size, reinterpret_cast(output_buf), + _output->info()->tensor_shape(), _axises); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_output)->unmap(q); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h new file mode 100644 index 0000000..52a1d35 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h @@ -0,0 +1,45 @@ +/* + *Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_BATCH_TO_SPACE_ND_H__ +#define __SIMPLE_BATCH_TO_SPACE_ND_H__ + +#include "internal/arm_compute.h" +#include +#include + +class SimpleBatchToSpaceND : public ::arm_compute::IFunction +{ +public: + /** Initialise input and output + * + * @param[in] input First tensor input. + * @param[out] output Output tensor. + * @param[in] block_size Block size. + */ + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + const int32_t *block_size, const ::arm_compute::Coordinates &axises); + + void run() override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_output; + const int32_t *_block_size; + ::arm_compute::Coordinates _axises; +}; + +#endif /*__SIMPLE_BATCH_TO_SPACE_ND_H__ */ -- 2.7.4