From 027d5b1292c5305b564e0531a26e4372771e23f8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EC=9E=A5=EC=A7=80=EC=84=AD/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84?= =?utf8?q?=EC=9E=90?= Date: Tue, 16 Oct 2018 15:14:46 +0900 Subject: [PATCH] Fix padding bug of some kernels. (#3161) This commit Fixes padding bug of some kernels used by Coordinates of ACL. Signed-off-by: jiseob.jang --- .../src/internal/layers/SimpleDepthToSpace.cc | 58 ++++++++++------------ .../src/internal/layers/SimpleDepthToSpace.h | 3 +- .../src/internal/layers/SimpleSpaceToBatchND.cc | 53 ++++++++------------ .../src/internal/layers/SimpleSpaceToDepth.cc | 44 ++++++---------- .../src/internal/layers/SimpleSpaceToDepth.h | 3 +- 5 files changed, 64 insertions(+), 97 deletions(-) diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc index 8be5334..71d2aa5 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc @@ -19,8 +19,7 @@ #include void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - int32_t block_size, - const ::arm_compute::Coordinates &axises = {3, 1, 0, 2}) + int32_t block_size, const ::arm_compute::Coordinates &axises) { const auto rank = axises.num_dimensions(); assert(rank == 4); @@ -36,26 +35,10 @@ void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute: _axises = axises; } -inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w, - int32_t d, const ::arm_compute::Coordinates &axises) -{ - // b, h, w, d >= 0 - size_t indexes[4]; - indexes[axises[0]] = b; - indexes[axises[1]] = h; - indexes[axises[2]] = w; - indexes[axises[3]] = d; - - int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0]; - offset += indexes[2] * shape[1] * shape[0]; - offset += indexes[1] * shape[0]; - offset += indexes[0]; - return offset; -} - template -inline void DepthToSpace(const T *input_data, const ::arm_compute::TensorShape &input_shape, - int32_t block_size, T *output_data, +inline void DepthToSpace(const ::arm_compute::ITensor *input, + const ::arm_compute::TensorShape &input_shape, int32_t block_size, + ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape, const ::arm_compute::Coordinates &axises) { @@ -83,10 +66,13 @@ inline void DepthToSpace(const T *input_data, const ::arm_compute::TensorShape & const int in_d = out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth; - const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises); - const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises); + auto input_id = + asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises); + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); - output_data[output_index] = input_data[input_index]; + *reinterpret_cast(output->ptr_to_element(output_id)) = + *reinterpret_cast(input->ptr_to_element(input_id)); } } } @@ -103,20 +89,28 @@ void SimpleDepthToSpace::run() CAST_CL(_output)->map(q); } - auto input_buf = _input->buffer(); - auto output_buf = _output->buffer(); switch (_input->info()->data_type()) { case ::arm_compute::DataType::U8: case ::arm_compute::DataType::QASYMM8: - DepthToSpace(reinterpret_cast(input_buf), _input->info()->tensor_shape(), - _block_size, reinterpret_cast(output_buf), - _output->info()->tensor_shape(), _axises); + DepthToSpace(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); + break; + case ::arm_compute::DataType::S8: + DepthToSpace(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); + break; + case ::arm_compute::DataType::U32: + DepthToSpace(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); + break; + case ::arm_compute::DataType::S32: + DepthToSpace(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); break; case ::arm_compute::DataType::F32: - DepthToSpace(reinterpret_cast(input_buf), _input->info()->tensor_shape(), - _block_size, reinterpret_cast(output_buf), - _output->info()->tensor_shape(), _axises); + DepthToSpace(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); break; default: ARM_COMPUTE_ERROR("DataType not supported"); diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h index e4107e2..dac0beb 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h @@ -18,6 +18,7 @@ #define __SIMPLE_DEPTH_TO_SPACE_H__ #include "internal/arm_compute.h" +#include "internal/arm_compute/Cast.h" #include #include @@ -37,7 +38,7 @@ public: * @param[in] block_size Block size. */ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size, - const ::arm_compute::Coordinates &axises); + const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); void run() override; diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc index 8282683..2f6a8c3 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc @@ -15,12 +15,13 @@ */ #include "internal/layers/SimpleSpaceToBatchND.h" +#include "internal/arm_compute/Cast.h" #include void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, const int32_t *block_size, const int32_t *padding_size, - const ::arm_compute::Coordinates &axises = {3, 1, 0, 2}) + const ::arm_compute::Coordinates &axises) { const auto rank = axises.num_dimensions(); assert(rank == 4); @@ -38,28 +39,12 @@ void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input, ::arm_comput _axises = axises; } -inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w, - int32_t d, const ::arm_compute::Coordinates &axises) -{ - // b, h, w, d >= 0 - size_t indexes[4]; - indexes[axises[0]] = b; - indexes[axises[1]] = h; - indexes[axises[2]] = w; - indexes[axises[3]] = d; - - int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0]; - offset += indexes[2] * shape[1] * shape[0]; - offset += indexes[1] * shape[0]; - offset += indexes[0]; - return offset; -} - template -inline void SpaceToBatchND(const T *input_data, const ::arm_compute::TensorShape &input_shape, - const int32_t *block_size_data, const int32_t *padding_size_data, - T *output_data, const ::arm_compute::TensorShape &output_shape, - const ::arm_compute::Coordinates &axises) +inline void +SpaceToBatchND(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape, + const int32_t *block_size_data, const int32_t *padding_size_data, + const ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape, + const ::arm_compute::Coordinates &axises) { const int input_batch = input_shape[axises[0]]; const int input_height = input_shape[axises[1]]; @@ -92,18 +77,22 @@ inline void SpaceToBatchND(const T *input_data, const ::arm_compute::TensorShape ((in_h % block_size_data[0]) * block_size_data[1] + in_w % block_size_data[1]) * input_batch; - const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises); + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); if (in_h < padding_size_data[0] || in_h >= (input_height + padding_size_data[0]) || in_w < padding_size_data[2] || in_w >= (input_width + padding_size_data[2])) { - output_data[output_index] = 0; + *reinterpret_cast(output->ptr_to_element(output_id)) = 0; } else { - const int input_index = Offset4D(input_shape, in_b, in_h - padding_size_data[0], - in_w - padding_size_data[2], in_d, axises); - output_data[output_index] = input_data[input_index]; + auto input_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{in_b, in_h - padding_size_data[0], + in_w - padding_size_data[2], in_d}, + axises); + *reinterpret_cast(output->ptr_to_element(output_id)) = + *reinterpret_cast(input->ptr_to_element(input_id)); } } } @@ -126,14 +115,12 @@ void SimpleSpaceToBatchND::run() { case ::arm_compute::DataType::U8: case ::arm_compute::DataType::QASYMM8: - SpaceToBatchND(reinterpret_cast(input_buf), _input->info()->tensor_shape(), - _block_size, _padding_size, reinterpret_cast(output_buf), - _output->info()->tensor_shape(), _axises); + SpaceToBatchND(_input, _input->info()->tensor_shape(), _block_size, _padding_size, + _output, _output->info()->tensor_shape(), _axises); break; case ::arm_compute::DataType::F32: - SpaceToBatchND(reinterpret_cast(input_buf), _input->info()->tensor_shape(), - _block_size, _padding_size, reinterpret_cast(output_buf), - _output->info()->tensor_shape(), _axises); + SpaceToBatchND(_input, _input->info()->tensor_shape(), _block_size, _padding_size, + _output, _output->info()->tensor_shape(), _axises); break; default: ARM_COMPUTE_ERROR("DataType not supported"); diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc index a035cd0..9da5d66 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc @@ -19,8 +19,7 @@ #include void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - int32_t block_size, - const ::arm_compute::Coordinates &axises = {3, 1, 0, 2}) + int32_t block_size, const ::arm_compute::Coordinates &axises) { const auto rank = axises.num_dimensions(); assert(rank == 4); @@ -36,26 +35,10 @@ void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute: _axises = axises; } -inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w, - int32_t d, const ::arm_compute::Coordinates &axises) -{ - // b, h, w, d >= 0 - size_t indexes[4]; - indexes[axises[0]] = b; - indexes[axises[1]] = h; - indexes[axises[2]] = w; - indexes[axises[3]] = d; - - int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0]; - offset += indexes[2] * shape[1] * shape[0]; - offset += indexes[1] * shape[0]; - offset += indexes[0]; - return offset; -} - template -inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &input_shape, - int32_t block_size, T *output_data, +inline void SpaceToDepth(const ::arm_compute::ITensor *input, + const ::arm_compute::TensorShape &input_shape, int32_t block_size, + ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape, const ::arm_compute::Coordinates &axises) { @@ -83,10 +66,13 @@ inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape & const int out_d = in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth; - const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises); - const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises); + auto input_id = + asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises); + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); - output_data[output_index] = input_data[input_index]; + *reinterpret_cast(output->ptr_to_element(output_id)) = + *reinterpret_cast(input->ptr_to_element(input_id)); } } } @@ -109,14 +95,12 @@ void SimpleSpaceToDepth::run() { case ::arm_compute::DataType::U8: case ::arm_compute::DataType::QASYMM8: - SpaceToDepth(reinterpret_cast(input_buf), _input->info()->tensor_shape(), - _block_size, reinterpret_cast(output_buf), - _output->info()->tensor_shape(), _axises); + SpaceToDepth(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); break; case ::arm_compute::DataType::F32: - SpaceToDepth(reinterpret_cast(input_buf), _input->info()->tensor_shape(), - _block_size, reinterpret_cast(output_buf), - _output->info()->tensor_shape(), _axises); + SpaceToDepth(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); break; default: ARM_COMPUTE_ERROR("DataType not supported"); diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h index 54fa227..98caf2e 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h @@ -18,6 +18,7 @@ #define __SIMPLE_SPACE_TO_DEPTH_H__ #include "internal/arm_compute.h" +#include "internal/arm_compute/Cast.h" #include #include @@ -36,7 +37,7 @@ public: * @param[in] block_size Block size. */ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size, - const ::arm_compute::Coordinates &axises); + const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); void run() override; -- 2.7.4