From: 윤현식/동작제어Lab(SR)/Principal Engineer/삼성전자 Date: Thu, 14 Jun 2018 11:09:58 +0000 (+0900) Subject: Optimized TensorSink (#1665) X-Git-Tag: 0.2~607 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d6971cbfaf3f5898970141badbb024177a42fbaa;p=platform%2Fcore%2Fml%2Fnnfw.git Optimized TensorSink (#1665) Parent issue : #1658 memcpy is used just like #1647. Tested with `tools/nnapi_unittests/test/mul_1` and `mul_2`. Signed-off-by: Hyun Sik Yoon hyunsik.yoon@samsung.com --- diff --git a/runtimes/pure_arm_compute/src/execution.cc b/runtimes/pure_arm_compute/src/execution.cc index 12c3ec2..194464a 100644 --- a/runtimes/pure_arm_compute/src/execution.cc +++ b/runtimes/pure_arm_compute/src/execution.cc @@ -12,6 +12,7 @@ #include "internal/arm_compute/feature/View.h" #include "internal/Sinks.h" #include "internal/MatrixSink.h" +#include "internal/Tensor3DSink.h" #include "util/feature/IndexIterator.h" @@ -285,20 +286,36 @@ static void asTensorSink(ANeuralNetworksExecution *execution, int32_t type, int3 { case ANEURALNETWORKS_FLOAT32: case ANEURALNETWORKS_TENSOR_FLOAT32: - execution->sink>(index, shape, reinterpret_cast(buffer), length); + if (shape.rank() == 3) + execution->sink>(index, shape, reinterpret_cast(buffer), + length); + else + execution->sink>(index, shape, reinterpret_cast(buffer), length); break; case ANEURALNETWORKS_INT32: case ANEURALNETWORKS_TENSOR_INT32: - execution->sink>(index, shape, reinterpret_cast(buffer), - length); + if (shape.rank() == 3) + execution->sink>(index, shape, reinterpret_cast(buffer), + length); + else + execution->sink>(index, shape, reinterpret_cast(buffer), + length); break; case ANEURALNETWORKS_UINT32: - execution->sink>(index, shape, reinterpret_cast(buffer), - length); + if (shape.rank() == 3) + execution->sink>(index, shape, reinterpret_cast(buffer), + length); + else + execution->sink>(index, shape, reinterpret_cast(buffer), + length); break; case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM: - execution->sink>(index, shape, reinterpret_cast(buffer), - length); + if (shape.rank() == 3) + execution->sink>(index, shape, reinterpret_cast(buffer), + length); + else + execution->sink>(index, shape, reinterpret_cast(buffer), + length); break; default: throw std::runtime_error("Not supported, yet"); @@ -423,6 +440,7 @@ int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int3 else { // NOTE TensorSink is much slower than VectorSink and FeatureSink + // in case of 3D tensor, optimized Tensor3DSink is called inside asTensorSink const auto &shape = operands.at(operand_index).shape(); asTensorSink(execution, output_type, index, shape, buffer, length); } diff --git a/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h b/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h new file mode 100644 index 0000000..c5fad3f --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h @@ -0,0 +1,54 @@ +#ifndef __TENSOR3D_SINK_H__ +#define __TENSOR3D_SINK_H__ + +#include "internal/Sink.h" + +// +// This is mempcy() version of generic TensorSink for 3D tensor +// +#include +#include +#include + +template class Tensor3DSink final : public Sink +{ +public: + Tensor3DSink(const nnfw::util::tensor::Shape &shape, T *base, const size_t size) + : _shape{shape}, _base{base}, _size{size} + { + // DO NOTHING + } + +public: + void pull(::arm_compute::ITensor &tensor) const override + { + using ::arm_compute::Window; + using ::arm_compute::Iterator; + using ::arm_compute::Coordinates; + using ::arm_compute::execute_window_loop; + + Window window; + + window.use_tensor_dimensions(tensor.info()->tensor_shape(), ::arm_compute::Window::DimY); + int32_t height_width = _shape.dim(1) * _shape.dim(2); + int32_t width = _shape.dim(2); + + Iterator it(&tensor, window); + execute_window_loop(window, + [&](const ::arm_compute::Coordinates &id) { + const auto z = id.z(); + const auto y = id.y(); + memcpy(_base + z * height_width + y * width, it.ptr(), width * sizeof(T)); + }, + it); + } + +private: + const nnfw::util::tensor::Shape _shape; + +private: + T *const _base; + const size_t _size; +}; + +#endif // __TENSOR3D_SINK_H__