From 30295da52a18e6c01d1ad420da12632d4d668e9a Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EA=B9=80=EC=A0=95=ED=98=84/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Senior=20Engineer/=EC=82=BC=EC=84=B1?= =?utf8?q?=EC=A0=84=EC=9E=90?= Date: Wed, 11 Apr 2018 16:19:59 +0900 Subject: [PATCH] Apply RELU to the FC layer. (#572) This commit adds functionality of activation RELU into the FC layer. This commit also adds a test case for RELU. Signed-off-by: Junghyun Kim --- src/kernel/acl/src/cl/FullyConnected.cpp | 29 ++++++++++++++++++--- src/kernel/acl/src/cl/FullyConnected.test.cpp | 36 +++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/src/kernel/acl/src/cl/FullyConnected.cpp b/src/kernel/acl/src/cl/FullyConnected.cpp index e35d8e5..0b27a7f 100644 --- a/src/kernel/acl/src/cl/FullyConnected.cpp +++ b/src/kernel/acl/src/cl/FullyConnected.cpp @@ -2,6 +2,8 @@ #include #include +#include + // TODO: fix include path in CMakeFiles #include "../IO_accessor.h" #include "../shape.h" @@ -23,24 +25,45 @@ bool fullyConnectedFloat32(const float* inputData, const android::nn::Shape& inp arm_compute::CLTensor input, output, bias, filter; + assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU); + + std::vector> fns; + input.allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); bias.allocator()->init(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32)); filter.allocator()->init(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32)); - arm_compute::CLFullyConnectedLayer fc; - fc.configure(&input, &filter, &bias, &output); + auto fc = std::make_shared(); + fc->configure(&input, &filter, &bias, &output); + + fns.emplace_back(fc); input.allocator()->allocate(); output.allocator()->allocate(); bias.allocator()->allocate(); filter.allocator()->allocate(); + if (ANEURALNETWORKS_FUSED_RELU == activation) + { + auto relu_f = std::make_shared(); + + const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + // Do in-place update + relu_f->configure(&output, nullptr, relu_info); + + fns.emplace_back(relu_f); + } + TensorAccess(input, inputData, inputShape); TensorAccess(bias, biasData, biasShape); TensorAccess(filter, weightsData, weightsShape); - fc.run(); + for (const auto &fn : fns) + { + fn->run(); + } arm_compute::CLScheduler::get().sync(); diff --git a/src/kernel/acl/src/cl/FullyConnected.test.cpp b/src/kernel/acl/src/cl/FullyConnected.test.cpp index 20e1171..5ff7852 100644 --- a/src/kernel/acl/src/cl/FullyConnected.test.cpp +++ b/src/kernel/acl/src/cl/FullyConnected.test.cpp @@ -44,3 +44,39 @@ TEST(KernelACL_TC, fcFloat32_1) { } +TEST(KernelACL_TC, fcFloat32_relu) { + const android::nn::Shape inputShape = {OperandType::FLOAT32, {1, 1, 1, 100}, 1.0, 0}; + float inputData[100]; + + const android::nn::Shape weightsShape = {OperandType::FLOAT32, {1, 1, 100, 1}, 1.0, 0}; + float weightsData[100]; + + const android::nn::Shape biasShape = {OperandType::FLOAT32, {1, 1, 1, 1}, 1.0, 0}; + float biasData[1]; + + const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + float outputData[1]; + + int32_t activation = ANEURALNETWORKS_FUSED_RELU; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(weightsData, sizeof(weightsData) / sizeof(weightsData[0]), -1.0); + util::initData(biasData, sizeof(biasData) / sizeof(biasData[0]), 0.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bool bret = fullyConnectedFloat32(inputData, inputShape, + weightsData, weightsShape, + biasData, biasShape, + activation, + outputData, outputShape); + + EXPECT_EQ(bret, true); + + float expectData[] = { + 0.0f + }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); + +} + -- 2.7.4