From b57269672046da31d40999efbedcb0cee159ca7b Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EA=B9=80=EC=A0=95=ED=98=84/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Senior=20Engineer/=EC=82=BC=EC=84=B1?= =?utf8?q?=EC=A0=84=EC=9E=90?= Date: Thu, 12 Apr 2018 11:03:06 +0900 Subject: [PATCH] [NNOP] The first version of NEON FC layer (#573) This commit introduces the first version of NEON FC Layer. Signed-off-by: Junghyun Kim --- include/kernel/acl/FullyConnected.h | 10 ++++ src/kernel/acl/CMakeLists.txt | 2 + src/kernel/acl/src/IO_accessor.h | 17 ++++++ src/kernel/acl/src/neon/FullyConnected.cpp | 79 +++++++++++++++++++++++++ src/kernel/acl/src/neon/FullyConnected.test.cpp | 45 ++++++++++++++ 5 files changed, 153 insertions(+) create mode 100644 src/kernel/acl/src/neon/FullyConnected.cpp create mode 100644 src/kernel/acl/src/neon/FullyConnected.test.cpp diff --git a/include/kernel/acl/FullyConnected.h b/include/kernel/acl/FullyConnected.h index 30e4d1c..445c600 100644 --- a/include/kernel/acl/FullyConnected.h +++ b/include/kernel/acl/FullyConnected.h @@ -12,6 +12,16 @@ bool fullyConnectedFloat32(const float* inputData, const android::nn::Shape& inp const float* biasData, const android::nn::Shape& biasShape, int32_t activation, float* outputData, const android::nn::Shape& outputShape); + +namespace neon { + +bool fullyConnectedFloat32(const float* inputData, const android::nn::Shape& inputShape, + const float* weightsData, const android::nn::Shape& weightsShape, + const float* biasData, const android::nn::Shape& biasShape, + int32_t activation, + float* outputData, const android::nn::Shape& outputShape); +} // namespace neon + } // namespace acl } // namespace kernel } // namespace nnfw diff --git a/src/kernel/acl/CMakeLists.txt b/src/kernel/acl/CMakeLists.txt index ba0baa6..05337ae 100644 --- a/src/kernel/acl/CMakeLists.txt +++ b/src/kernel/acl/CMakeLists.txt @@ -31,6 +31,7 @@ set(KERNELACL_SRCS "src/Init_acl.cpp" "src/cl/FullyConnected.cpp" "src/cl/Pooling.cpp" "src/cl/Reshape.cpp" + "src/neon/FullyConnected.cpp" ) add_library(${LIB_KERNELACL} SHARED ${KERNELACL_SRCS}) @@ -55,6 +56,7 @@ set(KERNELACL_TEST_SRCS "src/util.cpp" "src/cl/FullyConnected.test.cpp" "src/cl/Pooling.test.cpp" "src/cl/Reshape.test.cpp" + "src/neon/FullyConnected.test.cpp" ) add_executable(${LIB_KERNELACL_TEST} ${KERNELACL_TEST_SRCS}) diff --git a/src/kernel/acl/src/IO_accessor.h b/src/kernel/acl/src/IO_accessor.h index 476e079..c8db0bb 100644 --- a/src/kernel/acl/src/IO_accessor.h +++ b/src/kernel/acl/src/IO_accessor.h @@ -3,6 +3,7 @@ #include #include +#include #include // for android::nn::Shape @@ -86,6 +87,22 @@ inline void TensorAccess(arm_compute::CLTensor& tensor, float* data, tensor.unmap(); } +template +inline void TensorAccess(arm_compute::Tensor& tensor, const float* data, + const android::nn::Shape& shape) +{ + AccessorType accessor(data, shape); + accessor.access_tensor(tensor); +} + +template +inline void TensorAccess(arm_compute::Tensor& tensor, float* data, + const android::nn::Shape& shape) +{ + AccessorType accessor(data, shape); + accessor.access_tensor(tensor); +} + } // namespace acl } // namespace kernel } // namespace nnfw diff --git a/src/kernel/acl/src/neon/FullyConnected.cpp b/src/kernel/acl/src/neon/FullyConnected.cpp new file mode 100644 index 0000000..b35c686 --- /dev/null +++ b/src/kernel/acl/src/neon/FullyConnected.cpp @@ -0,0 +1,79 @@ +#include +#include +#include + +#include + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +namespace neon { + +bool fullyConnectedFloat32(const float* inputData, const android::nn::Shape& inputShape, + const float* weightsData, const android::nn::Shape& weightsShape, + const float* biasData, const android::nn::Shape& biasShape, + int32_t activation, + float* outputData, const android::nn::Shape& outputShape) { + + auto input_shape = util::fromNNShape(inputShape); + auto filter_shape = util::fromNNShape(weightsShape); + auto bias_shape = util::fromNNShape(biasShape); + auto output_shape = util::fromNNShape(outputShape); + + arm_compute::Tensor input, output, bias, filter; + + assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU); + + std::vector> fns; + + input.allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + bias.allocator()->init(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32)); + filter.allocator()->init(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32)); + + auto fc = std::make_shared(); + fc->configure(&input, &filter, &bias, &output); + + fns.emplace_back(fc); + + input.allocator()->allocate(); + output.allocator()->allocate(); + bias.allocator()->allocate(); + filter.allocator()->allocate(); + + if (ANEURALNETWORKS_FUSED_RELU == activation) + { + auto relu_f = std::make_shared(); + + const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + // Do in-place update + relu_f->configure(&output, nullptr, relu_info); + + fns.emplace_back(relu_f); + } + + TensorAccess(input, inputData, inputShape); + TensorAccess(bias, biasData, biasShape); + TensorAccess(filter, weightsData, weightsShape); + + for (const auto &fn : fns) + { + fn->run(); + } + + TensorAccess(output, outputData, outputShape); + + return true; +} + +} // namespace neon +} // namespace acl +} // namespace kernel +} // namespace nnfw + diff --git a/src/kernel/acl/src/neon/FullyConnected.test.cpp b/src/kernel/acl/src/neon/FullyConnected.test.cpp new file mode 100644 index 0000000..10befc9 --- /dev/null +++ b/src/kernel/acl/src/neon/FullyConnected.test.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include + +// TODO: fix include path in CMakeFiles +#include "../util.h" + +using namespace nnfw::kernel::acl; + +TEST(KernelACL_TC, neon_fcFloat32_1) { + const android::nn::Shape inputShape = {OperandType::FLOAT32, {1, 1, 1, 100}, 1.0, 0}; + float inputData[100]; + + const android::nn::Shape weightsShape = {OperandType::FLOAT32, {1, 1, 100, 1}, 1.0, 0}; + float weightsData[100]; + + const android::nn::Shape biasShape = {OperandType::FLOAT32, {1, 1, 1, 1}, 1.0, 0}; + float biasData[1]; + + const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + float outputData[1]; + + int32_t activation = static_cast(FusedActivationFunc::RELU); + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(weightsData, sizeof(weightsData) / sizeof(weightsData[0]), 1.0); + util::initData(biasData, sizeof(biasData) / sizeof(biasData[0]), 0.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bool bret = neon::fullyConnectedFloat32(inputData, inputShape, + weightsData, weightsShape, + biasData, biasShape, + activation, + outputData, outputShape); + + EXPECT_EQ(bret, true); + + float expectData[] = { + 100.0f + }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); + +} -- 2.7.4