From 340ef41b77622bb52d887296f5d7f63384ca085e Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EB=B0=95=EC=A2=85=ED=98=84/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Staff=20Engineer/=EC=82=BC=EC=84=B1?= =?utf8?q?=EC=A0=84=EC=9E=90?= Date: Mon, 11 Jun 2018 20:56:35 +0900 Subject: [PATCH] [Pure CL] Add 'MatrixSink' (#1647) * [Pure CL] Add 'MatrixSink' This commit adds 'MatrixSink' which efficiently copies output of matrix shape (rank-2). Signed-off-by: Jonghyun Park --- runtimes/pure_arm_compute/src/execution.cc | 44 +++++++++++++++-- .../pure_arm_compute/src/internal/MatrixSink.h | 55 ++++++++++++++++++++++ 2 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 runtimes/pure_arm_compute/src/internal/MatrixSink.h diff --git a/runtimes/pure_arm_compute/src/execution.cc b/runtimes/pure_arm_compute/src/execution.cc index 9810382..faf680e 100644 --- a/runtimes/pure_arm_compute/src/execution.cc +++ b/runtimes/pure_arm_compute/src/execution.cc @@ -10,6 +10,7 @@ #include "internal/arm_compute/feature/View.h" #include "internal/Sinks.h" +#include "internal/MatrixSink.h" #include "util/feature/IndexIterator.h" @@ -190,6 +191,34 @@ static void asVectorSink(ANeuralNetworksExecution *execution, int32_t type, int3 } } +static void asMatrixSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index, + int32_t H, int32_t W, void *buffer, size_t length) +{ + switch (type) + { + case ANEURALNETWORKS_FLOAT32: + case ANEURALNETWORKS_TENSOR_FLOAT32: + execution->sink>(index, H, W, reinterpret_cast(buffer), length); + break; + case ANEURALNETWORKS_INT32: + case ANEURALNETWORKS_TENSOR_INT32: + execution->sink>(index, H, W, reinterpret_cast(buffer), + length); + break; + case ANEURALNETWORKS_UINT32: + execution->sink>(index, H, W, reinterpret_cast(buffer), + length); + break; + case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM: + execution->sink>(index, H, W, reinterpret_cast(buffer), + length); + break; + default: + throw std::runtime_error("Not supported, yet"); + break; + } +} + static void asFeatureSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index, const nnfw::util::feature::Shape &shape, void *buffer, size_t length) { @@ -340,15 +369,22 @@ int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int3 const auto operand_index = execution->plan().model().outputs.at(index); int32_t output_type = operands.at(operand_index).type(); - const auto squeezed_shape = squeeze(operands.at(operand_index).shape()); + const auto &output_shape = operands.at(operand_index).shape(); - if (squeezed_shape.rank() == 1) + if (output_shape.rank() == 1) { - const auto len = squeezed_shape.dim(0); + const auto len = output_shape.dim(0); asVectorSink(execution, output_type, index, len, buffer, length); } - else if (squeezed_shape.rank() == 3) + else if (output_shape.rank() == 2) + { + const auto H = output_shape.dim(0); + const auto W = output_shape.dim(1); + + asMatrixSink(execution, output_type, index, H, W, buffer, length); + } + else if ((output_shape.rank() == 4) && (output_shape.dim(0) == 1)) { const auto &operand_shape = operands.at(operand_index).shape().asFeature(); diff --git a/runtimes/pure_arm_compute/src/internal/MatrixSink.h b/runtimes/pure_arm_compute/src/internal/MatrixSink.h new file mode 100644 index 0000000..f37b540 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/MatrixSink.h @@ -0,0 +1,55 @@ +#ifndef __INTERNAL_MATRIX_SINK_H__ +#define __INTERNAL_MATRIX_SINK_H__ + +#include "internal/Sink.h" + +#include +#include +#include + +#include +#include +#include + +template class MatrixSink final : public Sink +{ +public: + MatrixSink(const int32_t H, const int32_t W, T *base, const size_t size) + : _height{H}, _width{W}, _base{base} + { + assert(size >= _height * _width * sizeof(T)); + } + +public: + void pull(::arm_compute::ITensor &tensor) const override + { + assert(tensor.info()->dimension(0) == _width); + assert(tensor.info()->dimension(1) == _height); + + using ::arm_compute::Window; + using ::arm_compute::Iterator; + using ::arm_compute::Coordinates; + using ::arm_compute::execute_window_loop; + + Window window; + + window.use_tensor_dimensions(tensor.info()->tensor_shape(), ::arm_compute::Window::DimY); + + Iterator it(&tensor, window); + execute_window_loop(window, + [&](const ::arm_compute::Coordinates &id) { + const auto row = id.y(); + memcpy(_base + row * _width, it.ptr(), _width * sizeof(T)); + }, + it); + } + +private: + const int32_t _height; + const int32_t _width; + +private: + T *const _base; +}; + +#endif // __INTERNAL_MATRIX_SINK_H__ -- 2.7.4