From: 박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 Date: Thu, 12 Apr 2018 02:21:17 +0000 (+0900) Subject: Initial version of ACL Conv2D with NEON (#602) X-Git-Tag: 0.1~305 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0ca880cb5d3a87f1d2dafd08add841f14bf56a1e;p=platform%2Fcore%2Fml%2Fnnfw.git Initial version of ACL Conv2D with NEON (#602) This will add Conv2D with NEON backend. - implementation will be moved to another module after common codes are moved Signed-off-by: SaeHie Park --- diff --git a/include/kernel/acl/Conv2D.h b/include/kernel/acl/Conv2D.h index 028d8ee..c31163c 100644 --- a/include/kernel/acl/Conv2D.h +++ b/include/kernel/acl/Conv2D.h @@ -16,6 +16,19 @@ bool convFloat32(const float* inputData, const android::nn::Shape& inputShape, int32_t activation, float* outputData, const android::nn::Shape& outputShape); +namespace neon { + +bool convFloat32(const float* inputData, const android::nn::Shape& inputShape, + const float* filterData, const android::nn::Shape& filterShape, + const float* biasData, const android::nn::Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + float* outputData, const android::nn::Shape& outputShape); + +} // namespace neon + } // namespace acl } // namespace kernal } // namespace nnfw diff --git a/src/kernel/acl/src/cl/Conv2D.cpp b/src/kernel/acl/src/cl/Conv2D.cpp index ad805b6..38a0c85 100644 --- a/src/kernel/acl/src/cl/Conv2D.cpp +++ b/src/kernel/acl/src/cl/Conv2D.cpp @@ -228,6 +228,91 @@ bool convFloat32(const float* inputData, const android::nn::Shape& inputShape, return true; } +// TODO move to separate module in neon folder +namespace neon { + +bool convFloat32(const float* inputData, const android::nn::Shape& inputShape, + const float* filterData, const android::nn::Shape& filterShape, + const float* biasData, const android::nn::Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + float* outputData, const android::nn::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape filter_shape = util::fromNNShape(filterShape); + arm_compute::TensorShape bias_shape = util::fromVectorNNShape(biasShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + arm_compute::PadStrideInfo conv_info = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + // TODO introduce and use like CLUniqueTensor + arm_compute::Tensor input, output, bias, filter; + + input.allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + bias.allocator()->init(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32)); + filter.allocator()->init(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32)); + + assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU); + + std::vector> fns; + + auto conv_f = std::make_shared(); + + conv_f->configure(&input, &filter, &bias, &output, conv_info); + + fns.emplace_back(conv_f); + + if (ANEURALNETWORKS_FUSED_RELU == activation) + { + auto relu_f = std::make_shared(); + + const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + // Do in-place update + relu_f->configure(&output, nullptr, relu_info); + + fns.emplace_back(relu_f); + } + + input.allocator()->allocate(); + output.allocator()->allocate(); + bias.allocator()->allocate(); + filter.allocator()->allocate(); + + TensorAccess(input, inputData, inputShape); + TensorAccess(bias, biasData, biasShape); + TensorAccess(filter, filterData, filterShape); + + if (verbose.value()) + { + auto ifm_shape = nnfw::support::nnapi::feature::asFeatureShape(inputShape); + nnfw::support::nnapi::feature::Reader nnapi_ifm_reader{ifm_shape, inputData}; + nnfw::support::acl::feature::Reader acl_ifm_reader{ &input }; + + std::cout << "NNAPI IFM:" << std::endl; + std::cout << nnfw::util::feature::TextFormatter{ifm_shape, nnapi_ifm_reader} << std::endl; + + std::cout << "ARM Compute IFM:" << std::endl; + std::cout << nnfw::util::feature::TextFormatter{ifm_shape, acl_ifm_reader} << std::endl; + } + + for (const auto &fn : fns) + { + fn->run(); + } + + TensorAccess(output, outputData, outputShape); + + return true; +} + +} // namespace neon + } // namespace acl } // namespace kernel } // namespace nnfw diff --git a/src/kernel/acl/src/cl/Conv2D.test.cpp b/src/kernel/acl/src/cl/Conv2D.test.cpp index e25678e..0bd912c 100644 --- a/src/kernel/acl/src/cl/Conv2D.test.cpp +++ b/src/kernel/acl/src/cl/Conv2D.test.cpp @@ -195,3 +195,180 @@ TEST(KernelACL_TC, convFloat32_3x5to3x3) bret = util::compareData(outputData, expectData, outputShape); EXPECT_EQ(bret, true); } + +// TODO move to separate module in neon folder +TEST(KernelACL_TC, neon_convFloat32_3x3to1x1) +{ + float inputData[9]; + const android::nn::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const android::nn::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { 1.0 }; + const android::nn::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast(FusedActivationFunc::RELU); + float outputData[1]; + const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 10.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} +TEST(KernelACL_TC, neon_convFloat32_3x3to3x3) +{ + float inputData[9]; + const android::nn::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const android::nn::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { 1.0 }; + const android::nn::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast(FusedActivationFunc::RELU); + float outputData[9]; + const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { + 5.0f, 7.0f, 5.0f, + 7.0f, 10.0f, 7.0f, + 5.0f, 7.0f, 5.0f + }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_convFloat32_3x3to3x3_RELU) +{ + float inputData[9]; + const android::nn::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const android::nn::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { -5.0f }; + const android::nn::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast(FusedActivationFunc::RELU); + float outputData[9]; + const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = + { + 0.0f, 1.0f, 0.0f, + 1.0f, 4.0f, 1.0f, + 0.0f, 1.0f, 0.0f + }; + + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_convFloat32_3x5to3x3) +{ + float inputData[15] = { + 1,2,3,4,5, + 6,7,8,9,10, + 11,12,13,14,15 + }; + const android::nn::Shape inputShape = { OperandType::FLOAT32, {1,3,5,1}, 1.0, 0 }; + float filterData[18] = { + 1,1,1, 1,1,1, 1,1,1, + 2,2,2, 2,2,2, 2,2,2 + }; + const android::nn::Shape filterShape = { OperandType::FLOAT32, {2,3,3,1}, 1.0, 0 }; + float biasData[2] = { 1.0, 1.0 }; + const android::nn::Shape biasShape = { OperandType::FLOAT32, {2}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast(FusedActivationFunc::RELU); + float outputData[30]; + const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,3,5,2}, 1.0, 0 }; + bool bret; + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectNCHW[] = { + 17.0f, 28.0f, 34.0f, 40.0f, 29.0f, + 40.0f, 64.0f, 73.0f, 82.0f, 58.0f, + 37.0f, 58.0f, 64.0f, 70.0f, 49.0f, + + 33.0f, 55.0f, 67.0f, 79.0f, 57.0f, + 79.0f, 127.0f, 145.0f, 163.0f, 115.0f, + 73.0f, 115.0f, 127.0f, 139.0f, 97.0f + }; + float expectData[30]; + NCHW2NHWC(expectNCHW, expectData, outputShape); + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +}