From: 김정현/동작제어Lab(SR)/Senior Engineer/삼성전자 Date: Wed, 11 Apr 2018 07:19:59 +0000 (+0900) Subject: Apply RELU to the FC layer. (#572) X-Git-Tag: 0.1~328 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=30295da52a18e6c01d1ad420da12632d4d668e9a;p=platform%2Fcore%2Fml%2Fnnfw.git Apply RELU to the FC layer. (#572) This commit adds functionality of activation RELU into the FC layer. This commit also adds a test case for RELU. Signed-off-by: Junghyun Kim --- diff --git a/src/kernel/acl/src/cl/FullyConnected.cpp b/src/kernel/acl/src/cl/FullyConnected.cpp index e35d8e5..0b27a7f 100644 --- a/src/kernel/acl/src/cl/FullyConnected.cpp +++ b/src/kernel/acl/src/cl/FullyConnected.cpp @@ -2,6 +2,8 @@ #include #include +#include + // TODO: fix include path in CMakeFiles #include "../IO_accessor.h" #include "../shape.h" @@ -23,24 +25,45 @@ bool fullyConnectedFloat32(const float* inputData, const android::nn::Shape& inp arm_compute::CLTensor input, output, bias, filter; + assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU); + + std::vector> fns; + input.allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); bias.allocator()->init(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32)); filter.allocator()->init(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32)); - arm_compute::CLFullyConnectedLayer fc; - fc.configure(&input, &filter, &bias, &output); + auto fc = std::make_shared(); + fc->configure(&input, &filter, &bias, &output); + + fns.emplace_back(fc); input.allocator()->allocate(); output.allocator()->allocate(); bias.allocator()->allocate(); filter.allocator()->allocate(); + if (ANEURALNETWORKS_FUSED_RELU == activation) + { + auto relu_f = std::make_shared(); + + const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + // Do in-place update + relu_f->configure(&output, nullptr, relu_info); + + fns.emplace_back(relu_f); + } + TensorAccess(input, inputData, inputShape); TensorAccess(bias, biasData, biasShape); TensorAccess(filter, weightsData, weightsShape); - fc.run(); + for (const auto &fn : fns) + { + fn->run(); + } arm_compute::CLScheduler::get().sync(); diff --git a/src/kernel/acl/src/cl/FullyConnected.test.cpp b/src/kernel/acl/src/cl/FullyConnected.test.cpp index 20e1171..5ff7852 100644 --- a/src/kernel/acl/src/cl/FullyConnected.test.cpp +++ b/src/kernel/acl/src/cl/FullyConnected.test.cpp @@ -44,3 +44,39 @@ TEST(KernelACL_TC, fcFloat32_1) { } +TEST(KernelACL_TC, fcFloat32_relu) { + const android::nn::Shape inputShape = {OperandType::FLOAT32, {1, 1, 1, 100}, 1.0, 0}; + float inputData[100]; + + const android::nn::Shape weightsShape = {OperandType::FLOAT32, {1, 1, 100, 1}, 1.0, 0}; + float weightsData[100]; + + const android::nn::Shape biasShape = {OperandType::FLOAT32, {1, 1, 1, 1}, 1.0, 0}; + float biasData[1]; + + const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + float outputData[1]; + + int32_t activation = ANEURALNETWORKS_FUSED_RELU; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(weightsData, sizeof(weightsData) / sizeof(weightsData[0]), -1.0); + util::initData(biasData, sizeof(biasData) / sizeof(biasData[0]), 0.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bool bret = fullyConnectedFloat32(inputData, inputShape, + weightsData, weightsShape, + biasData, biasShape, + activation, + outputData, outputShape); + + EXPECT_EQ(bret, true); + + float expectData[] = { + 0.0f + }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); + +} +