From 377ef929359a7b9e4e6475ed96b2f9b0a421ed1c Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EC=9C=A4=ED=98=84=EC=8B=9D/=EB=8F=99=EC=9E=91=EC=A0=9C?=
 =?utf8?q?=EC=96=B4Lab=28SR=29/Principal=20Engineer/=EC=82=BC=EC=84=B1?=
 =?utf8?q?=EC=A0=84=EC=9E=90?= <hyunsik.yoon@samsung.com>
Date: Wed, 16 May 2018 17:40:57 +0900
Subject: [PATCH] ADD vector operation for cl and neon (#1166)

Added ADD operation. This commit handles vector addition.
Scalar and matrix operation will be added in next commits.
(It seems that format check works under more strick rule from this week.
 for this reason, util.h was changed)

Signed-off-by: Hyun Sik Yoon <hyunsik.yoon@samsung.com>
---
 include/kernel/acl/Add.h              |  44 ++++++++++++++
 libs/kernel/acl/CMakeLists.txt        |   4 ++
 libs/kernel/acl/src/Add.test.h        | 102 +++++++++++++++++++++++++++++++++
 libs/kernel/acl/src/cl/Add.cpp        | 105 ++++++++++++++++++++++++++++++++++
 libs/kernel/acl/src/cl/Add.test.cpp   | 102 +++++++++++++++++++++++++++++++++
 libs/kernel/acl/src/neon/Add.cpp      | 105 ++++++++++++++++++++++++++++++++++
 libs/kernel/acl/src/neon/Add.test.cpp | 102 +++++++++++++++++++++++++++++++++
 libs/kernel/acl/src/util.cpp          |  17 +++++-
 8 files changed, 580 insertions(+), 1 deletion(-)
 create mode 100644 include/kernel/acl/Add.h
 create mode 100644 libs/kernel/acl/src/Add.test.h
 create mode 100644 libs/kernel/acl/src/cl/Add.cpp
 create mode 100644 libs/kernel/acl/src/cl/Add.test.cpp
 create mode 100644 libs/kernel/acl/src/neon/Add.cpp
 create mode 100644 libs/kernel/acl/src/neon/Add.test.cpp

diff --git a/include/kernel/acl/Add.h b/include/kernel/acl/Add.h
new file mode 100644
index 0000000..7cde685
--- /dev/null
+++ b/include/kernel/acl/Add.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_ADD_H__
+#define __NNFW_KERNEL_ACL_ADD_H__
+
+#include <OperationsUtils.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool addFloat32(const void* inputData1, const nnfw::rt::Shape& inputShape1,
+                const void* inputData2, const nnfw::rt::Shape& inputShape2,
+                int32_t activation,
+                void* outputData, const nnfw::rt::Shape& outputShape);
+
+namespace neon {
+
+bool addFloat32(const void* inputData1, const nnfw::rt::Shape& inputShape1,
+                const void* inputData2, const nnfw::rt::Shape& inputShape2,
+                int32_t activation,
+                void* outputData, const nnfw::rt::Shape& outputShape);
+
+} // namespace neon
+
+} // namespace acl
+} // namespace kernal
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_ADD_H__
diff --git a/libs/kernel/acl/CMakeLists.txt b/libs/kernel/acl/CMakeLists.txt
index 8f0486e..977521d 100644
--- a/libs/kernel/acl/CMakeLists.txt
+++ b/libs/kernel/acl/CMakeLists.txt
@@ -34,6 +34,7 @@ set(KERNELACL_SRCS "src/Init_acl.cpp"
                    "src/cl/Reshape.cpp"
                    "src/cl/Softmax.cpp"
                    "src/cl/Concatenation.cpp"
+                   "src/cl/Add.cpp"
                    "src/neon/Conv2D.cpp"
                    "src/neon/DepthwiseConv2D.cpp"
                    "src/neon/FullyConnected.cpp"
@@ -41,6 +42,7 @@ set(KERNELACL_SRCS "src/Init_acl.cpp"
                    "src/neon/Softmax.cpp"
                    "src/neon/Reshape.cpp"
                    "src/neon/Concatenation.cpp"
+                   "src/neon/Add.cpp"
                    )
 
 add_library(${LIB_KERNELACL} SHARED ${KERNELACL_SRCS})
@@ -68,6 +70,7 @@ set(KERNELACL_TEST_SRCS "src/util.cpp"
                         "src/cl/Reshape.test.cpp"
                         "src/cl/Softmax.test.cpp"
                         "src/cl/Concatenation.test.cpp"
+                        "src/cl/Add.test.cpp"
                         "src/neon/Conv2D.test.cpp"
                         "src/neon/DepthwiseConv2D.test.cpp"
                         "src/neon/FullyConnected.test.cpp"
@@ -75,6 +78,7 @@ set(KERNELACL_TEST_SRCS "src/util.cpp"
                         "src/neon/Softmax.test.cpp"
                         "src/neon/Reshape.test.cpp"
                         "src/neon/Concatenation.test.cpp"
+                        "src/neon/Add.test.cpp"
                         )
 
 add_executable(${LIB_KERNELACL_TEST} ${KERNELACL_TEST_SRCS})
diff --git a/libs/kernel/acl/src/Add.test.h b/libs/kernel/acl/src/Add.test.h
new file mode 100644
index 0000000..afc6beb
--- /dev/null
+++ b/libs/kernel/acl/src/Add.test.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_ADD_TEST_H__
+#define __NNFW_KERNEL_ACL_ADD_TEST_H__
+
+// generated test case data
+namespace nnfw
+{
+namespace add_vector_test
+{
+// 1. adding two vectors: [0], [0]
+static float x1[1] = {
+    1.0,
+};
+static float y1[1] = {
+    2.0,
+};
+static float expected1[1] = {
+    3.0,
+};
+// 2. adding two vectors: 100 random numbers
+static float x2[100] = {
+    -6.417759,  -4.7358875,  -11.074962,  -4.2841253, -6.1764784,  -6.128378,  -2.1253011,
+    1.8434654,  -2.147656,   5.771069,    -7.1907907, -2.7715101,  3.296604,   -4.272622,
+    6.584896,   -2.3693078,  -3.1644564,  0.37782526, 5.2104087,   0.18219212, -3.053298,
+    -3.440207,  -3.6962276,  1.1613896,   14.312525,  0.19944154,  -4.6005354, 2.5095024,
+    0.8347582,  -7.8343153,  10.99354,    1.7297767,  4.4280763,   8.32951,    -6.5384965,
+    0.48049495, 0.8144837,   1.119576,    -4.013504,  0.060964614, 3.707489,   1.8875263,
+    -0.5693004, -10.763851,  7.6864552,   -6.6477323, 2.2509155,   1.4910474,  7.988678,
+    -2.6148214, 4.3094416,   -0.13960715, 0.7423223,  10.182125,   -4.5443907, 7.9473042,
+    -4.4420857, -7.3940916,  5.6635313,   -7.921539,  -0.8845104,  1.0182618,  0.96004575,
+    2.046311,   -3.8806505,  -4.5171704,  2.078031,   7.6286087,   -10.246123, -2.3268178,
+    -1.8535945, 5.6277847,   -4.0266237,  5.865689,   12.195534,   6.451518,   8.369051,
+    -1.9481331, 1.1049461,   -6.0240774,  -3.3728948, 0.11560962,  -11.842209, -2.063498,
+    -2.36375,   4.902294,    15.913748,   -8.045401,  1.4952235,   -1.4233963, 0.23940584,
+    -2.360528,  -4.6092496,  -0.9090049,  -3.2113476, -4.264017,   -4.1235595, 9.532226,
+    2.0181265,  -0.15388536,
+};
+static float y2[100] = {
+    -4.376235,  -0.08368679, -0.6216589, -5.4909983,     -0.04341768, 3.8880827,  -6.337233,
+    -3.1458812, -3.9509559,  7.857975,   -10.7834425,    4.5281,      0.9354341,  -3.215732,
+    3.4087672,  -6.6825914,  -6.841583,  -0.00080897403, -4.719278,   2.5903013,  6.0486693,
+    2.134149,   7.3574038,   -0.2822968, 0.5588348,      0.9474025,   0.5909645,  0.1402733,
+    -1.3347642, 9.823734,    5.2648,     -3.9894438,     5.6188626,   13.165532,  9.022314,
+    -3.3679013, 5.729391,    2.1637747,  2.0659118,      -8.675354,   4.388403,   -0.2468586,
+    6.348771,   6.508556,    -3.9684396, -11.304377,     -13.8049965, 5.3364773,  -4.633437,
+    1.6888955,  2.6685429,   8.21828,    -1.7674841,     10.709848,   -5.9754534, -2.158992,
+    -4.8408647, 1.0743647,   2.2654505,  -1.4429128,     8.646155,    4.9324765,  1.601925,
+    4.0753536,  -11.165701,  -1.0478585, 6.369364,       2.2799797,   -2.8299212, 10.3074255,
+    -6.802859,  -1.543623,   0.73475134, -2.2130835,     -4.313091,   9.970005,   -1.3316114,
+    0.12234986, -1.1443465,  0.31355476, 3.8333132,      4.4205723,   -1.6774553, -2.464783,
+    0.90474373, 1.48355,     6.109352,   0.6740269,      -0.3855688,  -1.1307039, -4.2242513,
+    -1.9623871, 4.5256195,   0.27741814, -10.554449,     -9.463591,   4.7157593,  1.6424015,
+    -3.0457706, 1.535701,
+};
+static float expected2[100] = {
+    -10.793994,  -4.8195744,   -11.696621, -9.775124,  -6.219896,  -2.2402952, -8.462534,
+    -1.3024157,  -6.098612,    13.629044,  -17.974234, 1.7565899,  4.232038,   -7.488354,
+    9.993664,    -9.051899,    -10.006039, 0.37701628, 0.49113083, 2.7724934,  2.9953713,
+    -1.3060579,  3.6611762,    0.8790928,  14.87136,   1.146844,   -4.009571,  2.6497757,
+    -0.500006,   1.989419,     16.258339,  -2.259667,  10.046939,  21.49504,   2.4838176,
+    -2.8874063,  6.5438747,    3.2833507,  -1.9475923, -8.614389,  8.095892,   1.6406677,
+    5.7794704,   -4.2552953,   3.7180157,  -17.952108, -11.554081, 6.8275247,  3.3552408,
+    -0.92592597, 6.9779844,    8.078672,   -1.0251617, 20.891973,  -10.519844, 5.788312,
+    -9.28295,    -6.319727,    7.928982,   -9.364451,  7.761645,   5.9507384,  2.5619707,
+    6.1216645,   -15.046351,   -5.565029,  8.447395,   9.908588,   -13.076044, 7.980608,
+    -8.656453,   4.0841618,    -3.2918725, 3.6526053,  7.882443,   16.421524,  7.0374393,
+    -1.8257833,  -0.03940034,  -5.7105227, 0.46041846, 4.536182,   -13.519664, -4.528281,
+    -1.4590063,  6.385844,     22.0231,    -7.3713737, 1.1096547,  -2.5541003, -3.9848454,
+    -4.322915,   -0.083630085, -0.6315868, -13.765797, -13.727608, 0.5921998,  11.174627,
+    -1.0276442,  1.3818157,
+};
+// 3. adding two vectors with RELU: 10 random numbers
+static float x3[10] = {
+    6.537531,   -2.8453732, -7.6423984, 2.1252766, -0.19148353,
+    -0.8522757, -8.211992,  -1.0764704, 2.3227465, 0.46851206,
+};
+static float y3[10] = {
+    -0.14162672, 3.124883,  3.621642,   3.3335817, -2.766206,
+    1.9977486,   3.4013243, 0.13386084, 1.7938671, 2.8548512,
+};
+static float expected3[10] = {
+    6.395904, 0.27950978, 0.0, 5.4588585, 0.0, 1.1454729, 0.0, 0.0, 4.1166134, 3.3233633,
+};
+} // end of namespace add_vector_test
+} // end of namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_ADD_TEST_H__
diff --git a/libs/kernel/acl/src/cl/Add.cpp b/libs/kernel/acl/src/cl/Add.cpp
new file mode 100644
index 0000000..81e821b
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Add.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../UniqueTensor.h"
+#include "../util.h"
+
+namespace nnfw
+{
+namespace kernel
+{
+namespace acl
+{
+
+// TODO: refactor this just like reshape.h
+namespace common
+{
+
+typedef std::function<void(void)> sync_scheduler_f;
+
+// TODO : Currently, handles only Vector addition. Scala, matrix operation will be added in next
+// commits.
+
+bool addGeneric(const void *inputData1, const nnfw::rt::Shape &inputShape1, const void *inputData2,
+                const nnfw::rt::Shape &inputShape2, int32_t activation, void *outputData,
+                const nnfw::rt::Shape &outputShape, sync_scheduler_f sync_scheduler)
+{
+
+  assert((inputShape1.dimensions.size() == 1) &&
+         (inputShape2.dimensions.size() == 1)); // current version supports vector only
+
+  auto input_shape1 = util::fromNNShape(inputShape1);
+  auto input_shape2 = util::fromNNShape(inputShape2);
+  auto output_shape = util::fromNNShape(outputShape);
+
+  CLUniqueTensor input1(arm_compute::TensorInfo(input_shape1, arm_compute::Format::F32));
+  CLUniqueTensor input2(arm_compute::TensorInfo(input_shape2, arm_compute::Format::F32));
+  CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+  std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+  auto add_f = std::make_shared<arm_compute::CLArithmeticAddition>();
+
+  // Regarding ConvertPolocy, refer to
+  // https://www.mathworks.com/help/fixedpoint/ug/saturation-and-wrapping.html
+  // TODO: check if using arm_compute::ConvertPolicy::SATURATE is correct
+  add_f->configure(input1.ptr(), input2.ptr(), output.ptr(), arm_compute::ConvertPolicy::SATURATE);
+
+  fns.emplace_back(add_f);
+
+  util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output,
+                                                                                   activation, fns);
+
+  input1.allocate();
+  input2.allocate();
+  output.allocate();
+
+  TensorAccess<VectorInputAccessor>(input1.ref(), (float *)inputData1, inputShape1);
+  TensorAccess<VectorInputAccessor>(input2.ref(), (float *)inputData2, inputShape2);
+
+  for (const auto &fn : fns)
+  {
+    fn->run();
+  }
+
+  sync_scheduler();
+
+  TensorAccess<VectorOutputAccessor>(output.ref(), (float *)outputData, outputShape);
+
+  return true;
+}
+} // namespace common
+
+static void sync_scheduler() { arm_compute::CLScheduler::get().sync(); }
+
+bool addFloat32(const void *inputData1, const nnfw::rt::Shape &inputShape1, const void *inputData2,
+                const nnfw::rt::Shape &inputShape2, int32_t activation, void *outputData,
+                const nnfw::rt::Shape &outputShape)
+{
+  return common::addGeneric(inputData1, inputShape1, inputData2, inputShape2, activation,
+                            outputData, outputShape, sync_scheduler);
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Add.test.cpp b/libs/kernel/acl/src/cl/Add.test.cpp
new file mode 100644
index 0000000..1a281aa
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Add.test.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Add.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+#include "../Add.test.h" // test data
+
+using namespace nnfw::kernel::acl;
+using namespace nnfw::add_vector_test;
+
+TEST(KernelACL_TC, addFloat32_vector_1)
+{
+  const int DIM = 1;
+
+  const nnfw::rt::Shape inputShape1 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  const nnfw::rt::Shape inputShape2 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+
+  std::vector<const float *> inputDataPtrs;
+  std::vector<nnfw::rt::Shape> inputShapes;
+  float outputData[DIM];
+  const nnfw::rt::Shape outputShape = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  bool bret;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::NONE);
+
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bret = addFloat32(nnfw::add_vector_test::x1, inputShape1, nnfw::add_vector_test::y1, inputShape2,
+                    activation, outputData, outputShape);
+
+  EXPECT_EQ(bret, true);
+
+  bret = util::compareData(outputData, nnfw::add_vector_test::expected1, outputShape);
+  EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, addFloat32_vector_100)
+{
+  const int DIM = 100;
+
+  const nnfw::rt::Shape inputShape1 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  const nnfw::rt::Shape inputShape2 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+
+  std::vector<const float *> inputDataPtrs;
+  std::vector<nnfw::rt::Shape> inputShapes;
+  float outputData[DIM];
+  const nnfw::rt::Shape outputShape = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  bool bret;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::NONE);
+
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bret = addFloat32(nnfw::add_vector_test::x2, inputShape1, nnfw::add_vector_test::y2, inputShape2,
+                    activation, outputData, outputShape);
+
+  EXPECT_EQ(bret, true);
+
+  bret = util::compareData(outputData, nnfw::add_vector_test::expected2, outputShape);
+  EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, addFloat32_relu_vector_100)
+{
+  const int DIM = 10;
+
+  const nnfw::rt::Shape inputShape1 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  const nnfw::rt::Shape inputShape2 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+
+  std::vector<const float *> inputDataPtrs;
+  std::vector<nnfw::rt::Shape> inputShapes;
+  float outputData[DIM];
+  const nnfw::rt::Shape outputShape = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  bool bret;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bret = addFloat32(nnfw::add_vector_test::x3, inputShape1, nnfw::add_vector_test::y3, inputShape2,
+                    activation, outputData, outputShape);
+
+  EXPECT_EQ(bret, true);
+
+  bret = util::compareData(outputData, nnfw::add_vector_test::expected3, outputShape);
+  EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/neon/Add.cpp b/libs/kernel/acl/src/neon/Add.cpp
new file mode 100644
index 0000000..8d30909
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Add.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../UniqueTensor.h"
+#include "../util.h"
+
+namespace nnfw
+{
+namespace kernel
+{
+namespace acl
+{
+namespace neon
+{
+
+// this file is copied and modified from reshape.h.
+// TODO: refactor this just like reshape.h
+namespace common
+{
+
+// TODO : Currently, handles only Vector addition. Scala, matrix operation will be added in next
+// commits.
+
+bool addGeneric(const void *inputData1, const nnfw::rt::Shape &inputShape1, const void *inputData2,
+                const nnfw::rt::Shape &inputShape2, int32_t activation, void *outputData,
+                const nnfw::rt::Shape &outputShape)
+{
+
+  assert((inputShape1.dimensions.size() == 1) &&
+         (inputShape2.dimensions.size() == 1)); // current version supports vector only
+
+  auto input_shape1 = util::fromNNShape(inputShape1);
+  auto input_shape2 = util::fromNNShape(inputShape2);
+  auto output_shape = util::fromNNShape(outputShape);
+
+  NEUniqueTensor input1(arm_compute::TensorInfo(input_shape1, arm_compute::Format::F32));
+  NEUniqueTensor input2(arm_compute::TensorInfo(input_shape2, arm_compute::Format::F32));
+  NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+  std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+  auto add_f = std::make_shared<arm_compute::NEArithmeticAddition>();
+
+  // Regarding ConvertPolocy, refer to
+  // https://www.mathworks.com/help/fixedpoint/ug/saturation-and-wrapping.html
+  // TODO: check if using arm_compute::ConvertPolicy::SATURATE is correct
+  add_f->configure(input1.ptr(), input2.ptr(), output.ptr(), arm_compute::ConvertPolicy::SATURATE);
+
+  fns.emplace_back(add_f);
+
+  util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output,
+                                                                                   activation, fns);
+
+  input1.allocate();
+  input2.allocate();
+  output.allocate();
+
+  TensorAccess<VectorInputAccessor>(input1.ref(), (float *)inputData1, inputShape1);
+  TensorAccess<VectorInputAccessor>(input2.ref(), (float *)inputData2, inputShape2);
+
+  for (const auto &fn : fns)
+  {
+    fn->run();
+  }
+
+  TensorAccess<VectorOutputAccessor>(output.ref(), (float *)outputData, outputShape);
+
+  return true;
+}
+} // namespace common
+
+static void sync_scheduler() { arm_compute::CLScheduler::get().sync(); }
+
+bool addFloat32(const void *inputData1, const nnfw::rt::Shape &inputShape1, const void *inputData2,
+                const nnfw::rt::Shape &inputShape2, int32_t activation, void *outputData,
+                const nnfw::rt::Shape &outputShape)
+{
+  return common::addGeneric(inputData1, inputShape1, inputData2, inputShape2, activation,
+                            outputData, outputShape);
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/neon/Add.test.cpp b/libs/kernel/acl/src/neon/Add.test.cpp
new file mode 100644
index 0000000..280312a
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Add.test.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Add.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+#include "../Add.test.h" // test data
+
+using namespace nnfw::kernel::acl;
+// using nnfw::add_vector_test;
+
+TEST(KernelACL_TC, neon_addFloat32_vector_1)
+{
+  const int DIM = 1;
+
+  const nnfw::rt::Shape inputShape1 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  const nnfw::rt::Shape inputShape2 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+
+  std::vector<const float *> inputDataPtrs;
+  std::vector<nnfw::rt::Shape> inputShapes;
+  float outputData[DIM];
+  const nnfw::rt::Shape outputShape = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  bool bret;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::NONE);
+
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bret = neon::addFloat32(nnfw::add_vector_test::x1, inputShape1, nnfw::add_vector_test::y1,
+                          inputShape2, activation, outputData, outputShape);
+
+  EXPECT_EQ(bret, true);
+
+  bret = util::compareData(outputData, nnfw::add_vector_test::expected1, outputShape);
+  EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_addFloat32_vector_100)
+{
+  const int DIM = 100;
+
+  const nnfw::rt::Shape inputShape1 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  const nnfw::rt::Shape inputShape2 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+
+  std::vector<const float *> inputDataPtrs;
+  std::vector<nnfw::rt::Shape> inputShapes;
+  float outputData[DIM];
+  const nnfw::rt::Shape outputShape = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  bool bret;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::NONE);
+
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bret = neon::addFloat32(nnfw::add_vector_test::x2, inputShape1, nnfw::add_vector_test::y2,
+                          inputShape2, activation, outputData, outputShape);
+
+  EXPECT_EQ(bret, true);
+
+  bret = util::compareData(outputData, nnfw::add_vector_test::expected2, outputShape);
+  EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_addFloat32_relu_vector_100)
+{
+  const int DIM = 10;
+
+  const nnfw::rt::Shape inputShape1 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  const nnfw::rt::Shape inputShape2 = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+
+  std::vector<const float *> inputDataPtrs;
+  std::vector<nnfw::rt::Shape> inputShapes;
+  float outputData[DIM];
+  const nnfw::rt::Shape outputShape = {OperandType::FLOAT32, {DIM}, 1.0, 0};
+  bool bret;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+  util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+  bret = neon::addFloat32(nnfw::add_vector_test::x3, inputShape1, nnfw::add_vector_test::y3,
+                          inputShape2, activation, outputData, outputShape);
+
+  EXPECT_EQ(bret, true);
+
+  bret = util::compareData(outputData, nnfw::add_vector_test::expected3, outputShape);
+  EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/util.cpp b/libs/kernel/acl/src/util.cpp
index bf4f08b..b6ca58a 100644
--- a/libs/kernel/acl/src/util.cpp
+++ b/libs/kernel/acl/src/util.cpp
@@ -84,9 +84,24 @@ bool compareData(const float *result, const float *expected, const nnfw::rt::Sha
       }
     }
   }
+  else if (shape.dimensions.size() == 1)
+  {
+    uint32_t numitems = nnfw::rt::getSizeOfDimension(shape, 0);
+    for (int item = 0; item < numitems; item++)
+    {
+      if (!::nnfw::util::fp32::epsilon_equal(*(result + item), *(expected + item), 1))
+      {
+        LOG(ERROR) << "compareData failed: result " << *(result + item) << ", expected "
+                   << *(expected + item) << ", diff "
+                   << ::nnfw::util::fp32::relative_diff(*(result + item), *(expected + item))
+                   << std::endl;
+        return false;
+      }
+    }
+  }
   else
   {
-    // TODO: add a handler for rank 1 and 3
+    // TODO: add a handler for 3
     LOG(ERROR) << "Unhandled shape: " << shape.dimensions.size() << std::endl;
   }
   return true;
-- 
2.7.4