From 8fe39916008ed6e088d7bdf4e728311a99d9f3d3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EA=B9=80=EC=A0=95=ED=98=84/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Senior=20Engineer/=EC=82=BC=EC=84=B1?= =?utf8?q?=EC=A0=84=EC=9E=90?= Date: Fri, 4 May 2018 08:56:11 +0900 Subject: [PATCH] Apply CLUniqueTensor to Pooling and Softmax (#1051) This commit applies CLUniqueTensor instead of directly using ACL's Tensor. Signed-off-by: Junghyun Kim --- libs/kernel/acl/src/cl/Pooling.cpp | 37 +++++++++++++++++-------------------- libs/kernel/acl/src/cl/Softmax.cpp | 21 ++++++++++----------- 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/libs/kernel/acl/src/cl/Pooling.cpp b/libs/kernel/acl/src/cl/Pooling.cpp index 265d42e..5a280f8 100644 --- a/libs/kernel/acl/src/cl/Pooling.cpp +++ b/libs/kernel/acl/src/cl/Pooling.cpp @@ -19,6 +19,7 @@ #include #include "../IO_accessor.h" #include "../shape.h" +#include "../CLUniqueTensor.h" #include @@ -50,18 +51,16 @@ bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, arm_compute::Size2D(filter_width,filter_height), pad_info, false); - arm_compute::CLTensor input, output; - - input.allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); - output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); auto pool_f = std::make_shared(); - pool_f->configure(&input, &output, maxpool_info); + pool_f->configure(input.ptr(), output.ptr(), maxpool_info); fns.emplace_back(pool_f); - input.allocator()->allocate(); - output.allocator()->allocate(); + input.allocate(); + output.allocate(); if (ANEURALNETWORKS_FUSED_RELU == activation) { @@ -70,12 +69,12 @@ bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; // Do in-place update - relu_f->configure(&output, nullptr, relu_info); + relu_f->configure(output.ptr(), nullptr, relu_info); fns.emplace_back(relu_f); } - TensorAccess(input, inputData, inputShape); + TensorAccess(input.ref(), inputData, inputShape); for (const auto &fn : fns) { @@ -84,7 +83,7 @@ bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, arm_compute::CLScheduler::get().sync(); - TensorAccess(output, outputData, outputShape); + TensorAccess(output.ref(), outputData, outputShape); return true; } @@ -113,18 +112,16 @@ bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShap arm_compute::Size2D(filter_width,filter_height), pad_info, true); - arm_compute::CLTensor input, output; - - input.allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); - output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); auto pool_f = std::make_shared(); - pool_f->configure(&input, &output, pool_info); + pool_f->configure(input.ptr(), output.ptr(), pool_info); fns.emplace_back(pool_f); - input.allocator()->allocate(); - output.allocator()->allocate(); + input.allocate(); + output.allocate(); if (ANEURALNETWORKS_FUSED_RELU == activation) { @@ -133,12 +130,12 @@ bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShap const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; // Do in-place update - relu_f->configure(&output, nullptr, relu_info); + relu_f->configure(output.ptr(), nullptr, relu_info); fns.emplace_back(relu_f); } - TensorAccess(input, inputData, inputShape); + TensorAccess(input.ref(), inputData, inputShape); for (const auto &fn : fns) { @@ -147,7 +144,7 @@ bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShap arm_compute::CLScheduler::get().sync(); - TensorAccess(output, outputData, outputShape); + TensorAccess(output.ref(), outputData, outputShape); return true; } diff --git a/libs/kernel/acl/src/cl/Softmax.cpp b/libs/kernel/acl/src/cl/Softmax.cpp index 778347e..a628f05 100644 --- a/libs/kernel/acl/src/cl/Softmax.cpp +++ b/libs/kernel/acl/src/cl/Softmax.cpp @@ -21,6 +21,7 @@ #include #include "../IO_accessor.h" #include "../shape.h" +#include "../CLUniqueTensor.h" #include "../util.h" namespace nnfw { @@ -34,36 +35,34 @@ bool softmaxFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); - arm_compute::CLTensor input, output; - - input.allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); - output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); auto softmax_f = std::make_shared(); - softmax_f->configure(&input, &output, beta); + softmax_f->configure(input.ptr(), output.ptr(), beta); - input.allocator()->allocate(); - output.allocator()->allocate(); + input.allocate(); + output.allocate(); if (inputShape.dimensions.size() == 4) { - TensorAccess(input, inputData, inputShape); + TensorAccess(input.ref(), inputData, inputShape); softmax_f->run(); arm_compute::CLScheduler::get().sync(); - TensorAccess(output, outputData, outputShape); + TensorAccess(output.ref(), outputData, outputShape); } else if (inputShape.dimensions.size() == 2) { - TensorAccess(input, inputData, inputShape); + TensorAccess(input.ref(), inputData, inputShape); softmax_f->run(); arm_compute::CLScheduler::get().sync(); - TensorAccess(output, outputData, outputShape); + TensorAccess(output.ref(), outputData, outputShape); } else { -- 2.7.4