From ce36b15c70c6f0add7b47ad8bc03202a843479cb Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EC=9E=A5=EC=A7=80=EC=84=AD/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84?= =?utf8?q?=EC=9E=90?= Date: Fri, 17 Aug 2018 15:03:08 +0900 Subject: [PATCH] Unify the order of inputs in STRIDED_SLICE operation & Update ACL (#2193) * Unify the order of inputs in STRIDED_SLICE operation This commit unifies the order of inputs in STRIDED_SLICE operation. - add reordering bits of a scalar as the order of inputdata's shape. - add reordering data of a vector as the order of inputdata's shape. - unify the order of inputs in STRIDED_SLICE. Signed-off-by: jiseob.jang * Update ACL This commit updates acl. - Extend the range in which StridedSlice is supported by cl. Signed-off-by: jiseob.jang --- externals/acl | 2 +- runtimes/pure_arm_compute/src/compilation.cc | 62 +++++++++++++++++++----- runtimes/pure_arm_compute/src/internal/Model.h | 11 +++++ runtimes/pure_arm_compute/src/internal/Swizzle.h | 15 ++++++ 4 files changed, 78 insertions(+), 12 deletions(-) diff --git a/externals/acl b/externals/acl index b29bc9e..77e9913 160000 --- a/externals/acl +++ b/externals/acl @@ -1 +1 @@ -Subproject commit b29bc9ed09561c93f5cffb61e2e74222cd4a4542 +Subproject commit 77e9913a2bab598033c7d2c5179a9bcd80b959e6 diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index 0ce957f..0588af8 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -207,6 +207,23 @@ static void initMatrixTensor(::arm_compute::ITensor &tensor, } template +static void initReorderVectorTensor(::arm_compute::ITensor &tensor, const uint8_t *vec_base, + const size_t vec_size) +{ + for (uint32_t n = 0; n < vec_size; ++n) + { + const ::arm_compute::Coordinates coordinate{ToARMComputeAxis(vec_size, n).value()}; + + T *into = reinterpret_cast(tensor.ptr_to_element(coordinate)); + + const T *from = reinterpret_cast(vec_base) + n; + const auto value = *from; + + *into = value; + } +} + +template static void initKernelTensor(::arm_compute::ITensor &tensor, const nnfw::util::kernel::Shape &kernel_shape, const uint8_t *kernel_base, const size_t kernel_size) @@ -2126,7 +2143,6 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node) const ::internal::tflite::operand::Index endMask_index{node.param().endMask_index}; const ::internal::tflite::operand::Index shrinkAxisMask_index{node.param().shrinkAxisMask_index}; - // TODO Should move to the place where the operand is handled, if it is possible. // Set Shape Constraints _builder.addShapeConstr(outputData_index, asTensorInfo(_ctx.at(outputData_index).shape(), _ctx.at(outputData_index).type(), @@ -2137,13 +2153,36 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node) _ctx.at(inputData_index).scale(), _ctx.at(inputData_index).zeroPoint())); - _builder.addShapeConstr(startData_index, asTensorInfo(_ctx.at(startData_index).shape().asVector(), - _ctx.at(startData_index).type())); - _builder.addShapeConstr(endData_index, asTensorInfo(_ctx.at(endData_index).shape().asVector(), - _ctx.at(endData_index).type())); + const auto startData_size = _ctx.at(startData_index).shape().asVector(); + const auto endData_size = _ctx.at(endData_index).shape().asVector(); + const auto stridesData_size = _ctx.at(stridesData_index).shape().asVector(); + _builder.addShapeConstr(startData_index, + asTensorInfo(startData_size, _ctx.at(startData_index).type())); + _builder.addShapeConstr(endData_index, asTensorInfo(endData_size, _ctx.at(endData_index).type())); _builder.addShapeConstr(stridesData_index, - asTensorInfo(_ctx.at(stridesData_index).shape().asVector(), - _ctx.at(stridesData_index).type())); + asTensorInfo(stridesData_size, _ctx.at(stridesData_index).type())); + + // Set initializers for indices data such as order of inputData + { + auto startData_base = _ctx.at(startData_index).data().base(); + auto endData_base = _ctx.at(endData_index).data().base(); + auto stridesData_base = _ctx.at(stridesData_index).data().base(); + + assert(_ctx.at(startData_index).type() == ANEURALNETWORKS_TENSOR_INT32); + auto startData_initializer = + std::bind(initReorderVectorTensor, _1, startData_base, startData_size); + _builder.addInitializer(startData_index, startData_initializer); + + assert(_ctx.at(endData_index).type() == ANEURALNETWORKS_TENSOR_INT32); + auto endData_initializer = + std::bind(initReorderVectorTensor, _1, endData_base, endData_size); + _builder.addInitializer(endData_index, endData_initializer); + + assert(_ctx.at(stridesData_index).type() == ANEURALNETWORKS_TENSOR_INT32); + auto stridesData_initializer = + std::bind(initReorderVectorTensor, _1, stridesData_base, stridesData_size); + _builder.addInitializer(stridesData_index, stridesData_initializer); + } struct Param { @@ -2167,15 +2206,16 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node) param.endData_index = endData_index.asInt(); param.stridesData_index = stridesData_index.asInt(); - param.beginMask = _ctx.at(beginMask_index).asScalar(); - param.endMask = _ctx.at(endMask_index).asScalar(); - param.shrinkAxisMask = _ctx.at(shrinkAxisMask_index).asScalar(); + // Set mask bits such as order of inputData + const auto inputData_rank = _ctx.at(inputData_index).shape().rank(); + param.beginMask = _ctx.at(beginMask_index).asReorderBits(inputData_rank); + param.endMask = _ctx.at(endMask_index).asReorderBits(inputData_rank); + param.shrinkAxisMask = _ctx.at(shrinkAxisMask_index).asReorderBits(inputData_rank); auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { auto outputData_alloc = ctx.at(::internal::tflite::operand::Index{param.outputData_index}); auto inputData_alloc = ctx.at(::internal::tflite::operand::Index{param.inputData_index}); - // TODO: Let's alloc 1-D array for startData, endData and stridesData from operand auto startData_alloc = ctx.at(::internal::tflite::operand::Index{param.startData_index}); auto endData_alloc = ctx.at(::internal::tflite::operand::Index{param.endData_index}); auto stridesData_alloc = ctx.at(::internal::tflite::operand::Index{param.stridesData_index}); diff --git a/runtimes/pure_arm_compute/src/internal/Model.h b/runtimes/pure_arm_compute/src/internal/Model.h index 6a332e9..86bbe66 100644 --- a/runtimes/pure_arm_compute/src/internal/Model.h +++ b/runtimes/pure_arm_compute/src/internal/Model.h @@ -120,6 +120,7 @@ private: #include #include #include +#include "internal/Swizzle.h" namespace internal { @@ -167,6 +168,16 @@ public: return *(reinterpret_cast(_data->base())); } +public: + template T asReorderBits(size_t numOfBits) const + { + assert((_shape.rank() == 0) || ((_shape.rank() == 1) && (_shape.dim(0) == 1))); + assert(_data != nullptr); + assert((_data->base() != nullptr) && (_data->size() == sizeof(T))); + + return ReorderBits(asScalar(), numOfBits); + } + private: const Shape _shape; const int32_t _type; diff --git a/runtimes/pure_arm_compute/src/internal/Swizzle.h b/runtimes/pure_arm_compute/src/internal/Swizzle.h index 73c0d10..8c3d1ef 100644 --- a/runtimes/pure_arm_compute/src/internal/Swizzle.h +++ b/runtimes/pure_arm_compute/src/internal/Swizzle.h @@ -49,4 +49,19 @@ inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis) return reversed; } +#include + +template inline T ReorderBits(T in, size_t numOfBits) +{ + assert(numOfBits > 0); + T out = 0; + for (int32_t i = numOfBits - 1; i >= 0; --i) + { + const uint32_t toShift = numOfBits - ToARMComputeAxis(numOfBits, i).value() - 1; + out += ((in & 1) << toShift); + in >>= 1; + } + return out; +} + #endif // __SWIZZLE_H__ -- 2.7.4