From: 박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Date: Thu, 12 Apr 2018 02:21:17 +0000 (+0900)
Subject: Initial version of ACL Conv2D with NEON (#602)
X-Git-Tag: 0.1~305
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0ca880cb5d3a87f1d2dafd08add841f14bf56a1e;p=platform%2Fcore%2Fml%2Fnnfw.git

Initial version of ACL Conv2D with NEON (#602)

This will add Conv2D with NEON backend.
- implementation will be moved to another module after common codes are moved

Signed-off-by: SaeHie Park <saehie.park@samsung.com>
---

diff --git a/include/kernel/acl/Conv2D.h b/include/kernel/acl/Conv2D.h
index 028d8ee..c31163c 100644
--- a/include/kernel/acl/Conv2D.h
+++ b/include/kernel/acl/Conv2D.h
@@ -16,6 +16,19 @@ bool convFloat32(const float* inputData, const android::nn::Shape& inputShape,
                  int32_t activation,
                  float* outputData, const android::nn::Shape& outputShape);
 
+namespace neon {
+
+bool convFloat32(const float* inputData, const android::nn::Shape& inputShape,
+                 const float* filterData, const android::nn::Shape& filterShape,
+                 const float* biasData, const android::nn::Shape& biasShape,
+                 int32_t padding_left, int32_t padding_right,
+                 int32_t padding_top, int32_t padding_bottom,
+                 int32_t stride_width, int32_t stride_height,
+                 int32_t activation,
+                 float* outputData, const android::nn::Shape& outputShape);
+
+} // namespace neon
+
 } // namespace acl
 } // namespace kernal
 } // namespace nnfw
diff --git a/src/kernel/acl/src/cl/Conv2D.cpp b/src/kernel/acl/src/cl/Conv2D.cpp
index ad805b6..38a0c85 100644
--- a/src/kernel/acl/src/cl/Conv2D.cpp
+++ b/src/kernel/acl/src/cl/Conv2D.cpp
@@ -228,6 +228,91 @@ bool convFloat32(const float* inputData, const android::nn::Shape& inputShape,
   return true;
 }
 
+// TODO move to separate module in neon folder
+namespace neon {
+
+bool convFloat32(const float* inputData, const android::nn::Shape& inputShape,
+                 const float* filterData, const android::nn::Shape& filterShape,
+                 const float* biasData, const android::nn::Shape& biasShape,
+                 int32_t padding_left, int32_t padding_right,
+                 int32_t padding_top, int32_t padding_bottom,
+                 int32_t stride_width, int32_t stride_height,
+                 int32_t activation,
+                 float* outputData, const android::nn::Shape& outputShape)
+{
+  arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+  arm_compute::TensorShape filter_shape = util::fromNNShape(filterShape);
+  arm_compute::TensorShape bias_shape = util::fromVectorNNShape(biasShape);
+  arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+  arm_compute::PadStrideInfo conv_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+                                              padding_left, padding_right,
+                                              padding_top, padding_bottom,
+                                              arm_compute::DimensionRoundingType::FLOOR);
+
+  // TODO introduce and use like CLUniqueTensor
+  arm_compute::Tensor input, output, bias, filter;
+
+  input.allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+  output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+  bias.allocator()->init(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32));
+  filter.allocator()->init(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32));
+
+  assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU);
+
+  std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+  auto conv_f = std::make_shared<arm_compute::NEConvolutionLayer>();
+
+  conv_f->configure(&input, &filter, &bias, &output, conv_info);
+
+  fns.emplace_back(conv_f);
+
+  if (ANEURALNETWORKS_FUSED_RELU == activation)
+  {
+    auto relu_f = std::make_shared<arm_compute::NEActivationLayer>();
+
+    const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+
+    // Do in-place update
+    relu_f->configure(&output, nullptr, relu_info);
+
+    fns.emplace_back(relu_f);
+  }
+
+  input.allocator()->allocate();
+  output.allocator()->allocate();
+  bias.allocator()->allocate();
+  filter.allocator()->allocate();
+
+  TensorAccess<InputAccessor>(input, inputData, inputShape);
+  TensorAccess<BiasAccessor>(bias, biasData, biasShape);
+  TensorAccess<WeightAccessor>(filter, filterData, filterShape);
+
+  if (verbose.value())
+  {
+    auto ifm_shape = nnfw::support::nnapi::feature::asFeatureShape(inputShape);
+    nnfw::support::nnapi::feature::Reader<float> nnapi_ifm_reader{ifm_shape, inputData};
+    nnfw::support::acl::feature::Reader<float> acl_ifm_reader{ &input };
+
+    std::cout << "NNAPI IFM:" << std::endl;
+    std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, nnapi_ifm_reader} << std::endl;
+
+    std::cout << "ARM Compute IFM:" << std::endl;
+    std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, acl_ifm_reader} << std::endl;
+  }
+
+  for (const auto &fn : fns)
+  {
+    fn->run();
+  }
+
+  TensorAccess<OutputAccessor>(output, outputData, outputShape);
+
+  return true;
+}
+
+} // namespace neon
+
 } // namespace acl
 } // namespace kernel
 } // namespace nnfw
diff --git a/src/kernel/acl/src/cl/Conv2D.test.cpp b/src/kernel/acl/src/cl/Conv2D.test.cpp
index e25678e..0bd912c 100644
--- a/src/kernel/acl/src/cl/Conv2D.test.cpp
+++ b/src/kernel/acl/src/cl/Conv2D.test.cpp
@@ -195,3 +195,180 @@ TEST(KernelACL_TC, convFloat32_3x5to3x3)
   bret = util::compareData(outputData, expectData, outputShape);
   EXPECT_EQ(bret, true);
 }
+
+// TODO move to separate module in neon folder
+TEST(KernelACL_TC, neon_convFloat32_3x3to1x1)
+{
+  float inputData[9];
+  const android::nn::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  float filterData[9];
+  const android::nn::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  float biasData[1] = { 1.0 };
+  const android::nn::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+  int32_t padding_left = 0;
+  int32_t padding_right = 0;
+  int32_t padding_top = 0;
+  int32_t padding_bottom = 0;
+  int32_t stride_width = 1;
+  int32_t stride_height = 1;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+  float outputData[1];
+  const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+  bool bret;
+
+  util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+  util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bret = neon::convFloat32(inputData, inputShape,
+                     filterData, filterShape,
+                     biasData, biasShape,
+                     padding_left, padding_right,
+                     padding_top, padding_bottom,
+                     stride_width, stride_height,
+                     activation,
+                     outputData, outputShape);
+  EXPECT_EQ(bret, true);
+
+  float expectData[] = { 10.0f };
+  bret = util::compareData(outputData, expectData, outputShape);
+  EXPECT_EQ(bret, true);
+}
+TEST(KernelACL_TC, neon_convFloat32_3x3to3x3)
+{
+  float inputData[9];
+  const android::nn::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  float filterData[9];
+  const android::nn::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  float biasData[1] = { 1.0 };
+  const android::nn::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+  int32_t padding_left = 1;
+  int32_t padding_right = 1;
+  int32_t padding_top = 1;
+  int32_t padding_bottom = 1;
+  int32_t stride_width = 1;
+  int32_t stride_height = 1;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+  float outputData[9];
+  const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  bool bret;
+
+  util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+  util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bret = neon::convFloat32(inputData, inputShape,
+                     filterData, filterShape,
+                     biasData, biasShape,
+                     padding_left, padding_right,
+                     padding_top, padding_bottom,
+                     stride_width, stride_height,
+                     activation,
+                     outputData, outputShape);
+  EXPECT_EQ(bret, true);
+
+  float expectData[] = {
+    5.0f, 7.0f, 5.0f,
+    7.0f, 10.0f, 7.0f,
+    5.0f, 7.0f, 5.0f
+  };
+  bret = util::compareData(outputData, expectData, outputShape);
+  EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_convFloat32_3x3to3x3_RELU)
+{
+  float inputData[9];
+  const android::nn::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  float filterData[9];
+  const android::nn::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  float biasData[1] = { -5.0f };
+  const android::nn::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+  int32_t padding_left = 1;
+  int32_t padding_right = 1;
+  int32_t padding_top = 1;
+  int32_t padding_bottom = 1;
+  int32_t stride_width = 1;
+  int32_t stride_height = 1;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+  float outputData[9];
+  const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  bool bret;
+
+  util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+  util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bret = neon::convFloat32(inputData, inputShape,
+                     filterData, filterShape,
+                     biasData, biasShape,
+                     padding_left, padding_right,
+                     padding_top, padding_bottom,
+                     stride_width, stride_height,
+                     activation,
+                     outputData, outputShape);
+  EXPECT_EQ(bret, true);
+
+  float expectData[] =
+  {
+    0.0f, 1.0f, 0.0f,
+    1.0f, 4.0f, 1.0f,
+    0.0f, 1.0f, 0.0f
+  };
+
+  bret = util::compareData(outputData, expectData, outputShape);
+  EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_convFloat32_3x5to3x3)
+{
+  float inputData[15] = {
+    1,2,3,4,5,
+    6,7,8,9,10,
+    11,12,13,14,15
+  };
+  const android::nn::Shape inputShape = { OperandType::FLOAT32, {1,3,5,1}, 1.0, 0 };
+  float filterData[18] = {
+    1,1,1, 1,1,1, 1,1,1,
+    2,2,2, 2,2,2, 2,2,2
+  };
+  const android::nn::Shape filterShape = { OperandType::FLOAT32, {2,3,3,1}, 1.0, 0 };
+  float biasData[2] = { 1.0, 1.0 };
+  const android::nn::Shape biasShape = { OperandType::FLOAT32, {2}, 1.0, 0 };
+  int32_t padding_left = 1;
+  int32_t padding_right = 1;
+  int32_t padding_top = 1;
+  int32_t padding_bottom = 1;
+  int32_t stride_width = 1;
+  int32_t stride_height = 1;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+  float outputData[30];
+  const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,3,5,2}, 1.0, 0 };
+  bool bret;
+
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bret = neon::convFloat32(inputData, inputShape,
+                     filterData, filterShape,
+                     biasData, biasShape,
+                     padding_left, padding_right,
+                     padding_top, padding_bottom,
+                     stride_width, stride_height,
+                     activation,
+                     outputData, outputShape);
+  EXPECT_EQ(bret, true);
+
+  float expectNCHW[] = {
+    17.0f, 28.0f, 34.0f, 40.0f, 29.0f,
+    40.0f, 64.0f, 73.0f, 82.0f, 58.0f,
+    37.0f, 58.0f, 64.0f, 70.0f, 49.0f,
+
+    33.0f, 55.0f, 67.0f, 79.0f, 57.0f,
+    79.0f, 127.0f, 145.0f, 163.0f, 115.0f,
+    73.0f, 115.0f, 127.0f, 139.0f, 97.0f
+  };
+  float expectData[30];
+  NCHW2NHWC(expectNCHW, expectData, outputShape);
+  bret = util::compareData(outputData, expectData, outputShape);
+  EXPECT_EQ(bret, true);
+}