From f74a52c003daabc71e4882efab18a10b9202130a Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EA=B9=80=EC=A0=95=ED=98=84/=EB=8F=99=EC=9E=91=EC=A0=9C?=
 =?utf8?q?=EC=96=B4Lab=28SR=29/Senior=20Engineer/=EC=82=BC=EC=84=B1?=
 =?utf8?q?=EC=A0=84=EC=9E=90?= <jh0822.kim@samsung.com>
Date: Wed, 11 Apr 2018 10:55:51 +0900
Subject: [PATCH] [NNOP] The first version of fullyConnectedFloat32 (#547)

This commit introduces the first version of fullyConnectedFloat32
to integrate ACL with NN runtime.

Signed-off-by: Junghyun Kim <jh0822.kim@samsung.com>
---
 src/kernel/acl/CMakeLists.txt                 |  2 +
 src/kernel/acl/src/cl/FullyConnected.cpp      | 64 +++++++++++++++++++++++++++
 src/kernel/acl/src/cl/FullyConnected.test.cpp | 58 ++++++++++++++++++++++++
 3 files changed, 124 insertions(+)
 create mode 100644 src/kernel/acl/src/cl/FullyConnected.cpp
 create mode 100644 src/kernel/acl/src/cl/FullyConnected.test.cpp
diff --git a/src/kernel/acl/CMakeLists.txt b/src/kernel/acl/CMakeLists.txt
index 31253b2..22195ab 100644
--- a/src/kernel/acl/CMakeLists.txt
+++ b/src/kernel/acl/CMakeLists.txt
@@ -27,6 +27,7 @@ link_directories(${CMAKE_INSTALL_PREFIX}/lib)
 set(KERNELACL_SRCS "src/Init_acl.cpp"
                    "src/IO_accessor.cpp"
                    "src/Conv2D_acl.cpp"
+                   "src/cl/FullyConnected.cpp"
                    )
 
 add_library(${LIB_KERNELACL} SHARED ${KERNELACL_SRCS})
@@ -47,6 +48,7 @@ install(TARGETS ${LIB_KERNELACL} DESTINATION lib)
 set(KERNELACL_TEST_SRCS "src/Conv2D_acl.test.cpp"
                         "src/util.cpp"
                         "src/gtest_env.cpp"
+                        "src/cl/FullyConnected.test.cpp"
                         )
 
 add_executable(${LIB_KERNELACL_TEST} ${KERNELACL_TEST_SRCS})
diff --git a/src/kernel/acl/src/cl/FullyConnected.cpp b/src/kernel/acl/src/cl/FullyConnected.cpp
new file mode 100644
index 0000000..ec745be
--- /dev/null
+++ b/src/kernel/acl/src/cl/FullyConnected.cpp
@@ -0,0 +1,64 @@
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+
+namespace android {
+namespace nn {
+
+// TODO remove from this source and use it from runtime
+uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx);
+
+} // namespace nn
+} // namespace android
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+arm_compute::TensorShape fromNNShape(const android::nn::Shape& shape);
+
+bool fullyConnectedFloat32(const float* inputData, const android::nn::Shape& inputShape,
+                           const float* weightsData, const android::nn::Shape& weightsShape,
+                           const float* biasData, const android::nn::Shape& biasShape,
+                           int32_t activation,
+                           float* outputData, const android::nn::Shape& outputShape) {
+
+  auto input_shape = fromNNShape(inputShape);
+  auto filter_shape = fromNNShape(weightsShape);
+  auto bias_shape = fromNNShape(biasShape);
+  auto output_shape = fromNNShape(outputShape);
+
+  arm_compute::CLTensor input, output, bias, filter;
+
+  input.allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+  output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+  bias.allocator()->init(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32));
+  filter.allocator()->init(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32));
+
+  arm_compute::CLFullyConnectedLayer fc;
+  fc.configure(&input, &filter, &bias, &output);
+
+  input.allocator()->allocate();
+  output.allocator()->allocate();
+  bias.allocator()->allocate();
+  filter.allocator()->allocate();
+
+  TensorAccess<InputAccessor>(input, inputData, inputShape);
+  TensorAccess<BiasAccessor>(bias, biasData, biasShape);
+  TensorAccess<WeightAccessor>(filter, weightsData, weightsShape);
+
+  fc.run();
+
+  arm_compute::CLScheduler::get().sync();
+
+  TensorAccess<OutputAccessor>(output, outputData, outputShape);
+
+  return true;
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/src/kernel/acl/src/cl/FullyConnected.test.cpp b/src/kernel/acl/src/cl/FullyConnected.test.cpp
new file mode 100644
index 0000000..7647c38
--- /dev/null
+++ b/src/kernel/acl/src/cl/FullyConnected.test.cpp
@@ -0,0 +1,58 @@
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool fullyConnectedFloat32(const float* inputData, const android::nn::Shape& inputShape,
+                           const float* weightsData, const android::nn::Shape& weightsShape,
+                           const float* biasData, const android::nn::Shape& biasShape,
+                           int32_t activation,
+                           float* outputData, const android::nn::Shape& outputShape);
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, fcFloat32_1) {
+  const android::nn::Shape inputShape = {OperandType::FLOAT32, {1, 1, 1, 100}, 1.0, 0};
+  float inputData[100];
+
+  const android::nn::Shape weightsShape = {OperandType::FLOAT32, {1, 1, 100, 1}, 1.0, 0};
+  float weightsData[100];
+
+  const android::nn::Shape biasShape = {OperandType::FLOAT32, {1, 1, 1, 1}, 1.0, 0};
+  float biasData[1];
+
+  const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+  float outputData[1];
+
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+  util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+  util::initData(weightsData, sizeof(weightsData) / sizeof(weightsData[0]), 1.0);
+  util::initData(biasData, sizeof(biasData) / sizeof(biasData[0]), 0.0);
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bool bret = fullyConnectedFloat32(inputData, inputShape,
+      weightsData, weightsShape,
+      biasData, biasShape,
+      activation,
+      outputData, outputShape);
+
+  EXPECT_EQ(bret, true);
+
+  float expectData[] = {
+    100.0f
+  };
+  bret = util::compareData(outputData, expectData, outputShape);
+  EXPECT_EQ(bret, true);
+
+}
+
-- 
2.7.4