From 3a7ed54157eaccd346974964e8c8c88248c6cb89 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EB=B0=95=EC=A2=85=ED=98=84/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Staff=20Engineer/=EC=82=BC=EC=84=B1?= =?utf8?q?=EC=A0=84=EC=9E=90?= Date: Thu, 31 May 2018 09:56:31 +0900 Subject: [PATCH] [Pure CL] Support Tensor Sinks (#1440) This commit revises pure CL runtime to support generic tensor outputs. Signed-off-by: Jonghyun Park --- runtimes/pure_arm_compute/src/execution.cc | 20 ++++-- runtimes/pure_arm_compute/src/internal/Sinks.h | 62 +++++++++++++++++ .../src/internal/arm_compute/tensor/View.h | 75 +++++++++++++++++++++ .../src/internal/nnapi/tensor/View.h | 77 ++++++++++++++++++++++ 4 files changed, 228 insertions(+), 6 deletions(-) create mode 100644 runtimes/pure_arm_compute/src/internal/Sinks.h create mode 100644 runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h create mode 100644 runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h diff --git a/runtimes/pure_arm_compute/src/execution.cc b/runtimes/pure_arm_compute/src/execution.cc index f56a6bd..0214b6a 100644 --- a/runtimes/pure_arm_compute/src/execution.cc +++ b/runtimes/pure_arm_compute/src/execution.cc @@ -7,6 +7,7 @@ #include "internal/nnapi/feature/View.h" #include "internal/arm_compute/feature/View.h" +#include "internal/Sinks.h" #include "util/feature/IndexIterator.h" @@ -191,14 +192,11 @@ int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int3 // TODO Check type conflicts - // NOTE The current implemenation assumes that every output is a feature map. - // TODO Remove this assumption const auto operand_index = execution->plan().model().outputs.at(index); - if (operands.at(operand_index).shape().rank() == 2) + if ((operands.at(operand_index).shape().rank() == 2) && + (operands.at(operand_index).shape().dim(0) == 1)) { - assert(operands.at(operand_index).shape().dim(0) == 1); - const auto len = operands.at(operand_index).shape().dim(1); execution->sink(index, len, reinterpret_cast(buffer), length); @@ -211,7 +209,17 @@ int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int3 } else { - throw std::runtime_error{"Not supported, yet"}; + // NOTE TensorSink is much slower than VectorSink and FeatureSink + const uint32_t rank = operands.at(operand_index).shape().rank(); + // TODO Remove conversion + nnfw::util::tensor::Shape shape(rank); + + for (uint32_t axis = 0; axis < rank; ++axis) + { + shape.dim(axis) = operands.at(operand_index).shape().dim(axis); + } + + execution->sink(index, shape, reinterpret_cast(buffer), length); } return ANEURALNETWORKS_NO_ERROR; diff --git a/runtimes/pure_arm_compute/src/internal/Sinks.h b/runtimes/pure_arm_compute/src/internal/Sinks.h new file mode 100644 index 0000000..ecbffe8 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/Sinks.h @@ -0,0 +1,62 @@ +#ifndef __INTERNAL_SINKS_H__ +#define __INTERNAL_SINKS_H__ + +// TODO Move Sink base class to here +#include "execution.h" + +#include + +// TODO Move VectorSink and FeatureSink into here + +// +// TensorSink +// +#include "internal/Swizzle.h" + +#include "internal/nnapi/tensor/View.h" +#include "internal/arm_compute/tensor/View.h" + +#include "util/tensor/IndexIterator.h" + +class TensorSink final : public Sink +{ +public: + TensorSink(const nnfw::util::tensor::Shape &shape, uint8_t *base, const size_t size) + : _shape{shape}, _base{base}, _size{size} + { + // DO NOTHING + } + +public: + void pull(::arm_compute::ITensor &tensor) const override + { + const ::internal::arm_compute::tensor::View from{_shape, &tensor}; + ::internal::nnapi::tensor::View into{_shape, _base, _size}; + + using ::nnfw::util::tensor::iterate; + using ::nnfw::util::tensor::Index; + + const uint32_t rank = _shape.rank(); + + ::nnfw::util::tensor::iterate(_shape) << [&](const Index &raw) { + Index permuted(raw.rank()); + + for (uint32_t axis = 0; axis < rank; ++axis) + { + permuted.at(ToARMComputeAxis(rank, axis).value()) = raw.at(axis); + } + + const auto value = from.at(permuted); + into.at(raw) = value; + }; + } + +private: + const nnfw::util::tensor::Shape _shape; + +private: + uint8_t *const _base; + const size_t _size; +}; + +#endif // __INTERNAL_SINKS_H__ diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h new file mode 100644 index 0000000..a5af6ed --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h @@ -0,0 +1,75 @@ +#ifndef __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__ +#define __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__ + +#include "util/tensor/Shape.h" +#include "util/tensor/Index.h" + +#include + +namespace internal +{ +namespace arm_compute +{ +namespace tensor +{ + +template class View +{ +public: + View(const nnfw::util::tensor::Shape &shape, ::arm_compute::ITensor *tensor) + : _shape{shape}, _tensor{tensor} + { + // DO NOTHING + } + +public: + const nnfw::util::tensor::Shape &shape(void) const { return _shape; } + +private: + uint32_t byte_offset_of(const nnfw::util::tensor::Index &index) const + { + const uint32_t rank = _shape.rank(); + + ::arm_compute::Coordinates coordinates; + + coordinates.set_num_dimensions(rank); + + for (uint32_t axis = 0; axis < rank; ++axis) + { + coordinates[axis] = index.at(axis); + } + + return _tensor->info()->offset_element_in_bytes(coordinates); + } + +public: + T at(const nnfw::util::tensor::Index &index) const + { + const auto offset = byte_offset_of(index); + + float *ptr = reinterpret_cast(_tensor->buffer() + offset); + + return *ptr; + } + + T &at(const nnfw::util::tensor::Index &index) + { + const auto offset = byte_offset_of(index); + + float *ptr = reinterpret_cast(_tensor->buffer() + offset); + + return *ptr; + } + +private: + const nnfw::util::tensor::Shape _shape; + +private: + ::arm_compute::ITensor *_tensor; +}; + +} // namespace tensor +} // namespace arm_compute +} // namespace internal + +#endif // __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__ diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h new file mode 100644 index 0000000..e521088 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h @@ -0,0 +1,77 @@ +#ifndef __INTERNAL_NNAPI_TENSOR_VIEW_H__ +#define __INTERNAL_NNAPI_TENSOR_VIEW_H__ + +#include "util/tensor/Shape.h" +#include "util/tensor/Index.h" + +namespace internal +{ +namespace nnapi +{ +namespace tensor +{ + +template class View +{ +public: + View(const ::nnfw::util::tensor::Shape &shape, uint8_t *ptr, size_t len) + : _shape{shape}, _ptr{ptr}, _len{len} + { + // DO NOTHING + } + +public: + const nnfw::util::tensor::Shape &shape(void) const { return _shape; } + +private: + uint32_t offset_of(const nnfw::util::tensor::Index &index) const + { + if (_shape.rank() == 0) + { + return 0; + } + + uint32_t offset = index.at(0); + + // Stride decreases as axis increases in NNAPI + for (uint32_t axis = 1; axis < _shape.rank(); ++axis) + { + offset *= _shape.dim(axis); + offset += index.at(axis); + } + + return offset; + } + +public: + T at(const nnfw::util::tensor::Index &index) const + { + const auto offset = offset_of(index); + + T *arr = reinterpret_cast(_ptr); + + return arr[offset]; + } + + T &at(const nnfw::util::tensor::Index &index) + { + const auto offset = offset_of(index); + + T *arr = reinterpret_cast(_ptr); + + return arr[offset]; + } + +private: + nnfw::util::tensor::Shape _shape; + +private: + uint8_t *_ptr; + const size_t _len; +}; + +} // namespace tensor +} // namespace nnapi +} // namespace internal + +#endif // __INTERNAL_NNAPI_TENSOR_VIEW_H__ -- 2.7.4