Implement convolution kernel with ACL (#521)
author박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Tue, 10 Apr 2018 06:49:30 +0000 (15:49 +0900)
committer김정현/동작제어Lab(SR)/Senior Engineer/삼성전자 <jh0822.kim@samsung.com>
Tue, 10 Apr 2018 06:49:30 +0000 (15:49 +0900)
This will implement convolution kernel with ACL in the simplest way.

Signed-off-by: SaeHie Park <saehie.park@samsung.com>
src/kernel/acl/CMakeLists.txt
src/kernel/acl/src/Conv2D_acl.cpp
src/kernel/acl/src/Conv2D_acl.test.cpp
src/kernel/acl/src/IO_accessor.cpp [new file with mode: 0644]
src/kernel/acl/src/IO_accessor.h [new file with mode: 0644]

index 21fda7f..372afc5 100644 (file)
@@ -20,15 +20,12 @@ SET(RUNTIME_INCLUDES ${PATH_RUNTIME_NN}/common/include
                      ${PATH_RUNTIME_NN}/depend/android-base/include
                      )
 
-# TODO move to top
-# gtest
-set(GTEST_LIBS libgtest.a libgtest_main.a pthread)
-
 # common
 link_directories(${CMAKE_INSTALL_PREFIX}/lib)
 
 # kernel library
 set(KERNELACL_SRCS "src/Init_acl.cpp"
+                   "src/IO_accessor.cpp"
                    "src/Conv2D_acl.cpp"
                    )
 
@@ -57,6 +54,6 @@ endif()
 target_link_libraries(${LIB_KERNELACL_TEST}
                       ${LIB_KERNELACL}
                       nnfw_util ${NNFW_ACL_LIBS}
-                      ${GTEST_LIBS}
+                      ${NNFW_GTEST_LIBS}
                       )
 install(TARGETS ${LIB_KERNELACL_TEST} DESTINATION unittest)
index e5bde29..44fea16 100644 (file)
@@ -1,9 +1,39 @@
-#include "OperationsUtils.h"
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include "IO_accessor.h"
+
+namespace android {
+namespace nn {
+
+// TODO remove from this source and use it from runtime
+uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) {
+    if (dimensionIdx >= shape.dimensions.size()) {
+        // TODO, log the error
+        return 0;
+    }
+    return shape.dimensions[dimensionIdx];
+}
+
+} // namespace nn
+} // namespace android
 
 namespace nnfw {
 namespace kernel {
 namespace acl {
 
+arm_compute::TensorShape fromNNShape(const android::nn::Shape& shape)
+{
+  // NOTE order may be not correct
+  // TODO check and fix order if incorrect
+  uint32_t c = android::nn::getSizeOfDimension(shape, 0);
+  uint32_t h = android::nn::getSizeOfDimension(shape, 1);
+  uint32_t w = android::nn::getSizeOfDimension(shape, 2);
+  uint32_t n = android::nn::getSizeOfDimension(shape, 3);
+
+  return arm_compute::TensorShape(w, h, c, n);
+}
+
 bool convFloat32(const float* inputData, const android::nn::Shape& inputShape,
                  const float* filterData, const android::nn::Shape& filterShape,
                  const float* biasData, const android::nn::Shape& biasShape,
@@ -13,7 +43,39 @@ bool convFloat32(const float* inputData, const android::nn::Shape& inputShape,
                  int32_t activation,
                  float* outputData, const android::nn::Shape& outputShape)
 {
-  // TODO implement this
+  arm_compute::TensorShape input_shape = fromNNShape(inputShape);
+  arm_compute::TensorShape filter_shape = fromNNShape(filterShape);
+  arm_compute::TensorShape bias_shape = fromNNShape(biasShape);
+  arm_compute::TensorShape output_shape = fromNNShape(outputShape);
+  arm_compute::PadStrideInfo conv_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+                                              padding_left, padding_right,
+                                              padding_top, padding_bottom,
+                                              arm_compute::DimensionRoundingType::FLOOR);
+
+  arm_compute::CLTensor input, output, bias, filter;
+
+  input.allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+  output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+  bias.allocator()->init(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32));
+  filter.allocator()->init(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32));
+
+  arm_compute::CLConvolutionLayer conv_f;
+  conv_f.configure(&input, &filter, &bias, &output, conv_info);
+
+  input.allocator()->allocate();
+  output.allocator()->allocate();
+  bias.allocator()->allocate();
+  filter.allocator()->allocate();
+
+  TensorAccess<InputAccessor>(input, inputData, inputShape);
+  TensorAccess<BiasAccessor>(bias, biasData, biasShape);
+  TensorAccess<WeightAccessor>(filter, filterData, filterShape);
+
+  conv_f.run();
+
+  arm_compute::CLScheduler::get().sync();
+
+  TensorAccess<OutputAccessor>(output, outputData, outputShape);
 
   return true;
 }
index 19589b7..73afa28 100644 (file)
@@ -1,5 +1,5 @@
-#include "gtest/gtest.h"
-#include "OperationsUtils.h"
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
 #include <kernel/acl/nnfw_kernel_acl.h>
 
 namespace nnfw {
@@ -26,12 +26,30 @@ static void initData(float* data, int num, float value)
   }
 }
 
+// compareData
+// return true if result == expected with the shape info,
+// otherwise false
+bool compareData(const float* result, const float* expected, const android::nn::Shape& shape)
+{
+  NN_CHECK_EQ(shape.dimensions.size(), 4);
+
+  uint32_t height = android::nn::getSizeOfDimension(shape, 1);
+  uint32_t width  = android::nn::getSizeOfDimension(shape, 2);
+  uint32_t numitems = height * width;
+  for (int item = 0; item < numitems; item++) {
+    if (*(result + item) != *(expected + item)) {
+      LOG(ERROR) << "compareData failed: result " << *(result + item)
+                 << ", expected " << *(expected + item) << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+
 using namespace nnfw::kernel::acl;
 
-TEST(KernelACL_TC, convFloat32_test)
+TEST(KernelACL_TC, convFloat32_3x3to1x1)
 {
-  nnfw::kernel::acl::Initialize();
-
   float inputData[9];
   const android::nn::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
   float filterData[9];
@@ -45,7 +63,7 @@ TEST(KernelACL_TC, convFloat32_test)
   int32_t stride_width = 1;
   int32_t stride_height = 1;
   int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
-  float* outputData = new float[9];
+  float outputData[1];
   const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
   bool bret;
 
@@ -61,8 +79,70 @@ TEST(KernelACL_TC, convFloat32_test)
                      stride_width, stride_height,
                      activation,
                      outputData, outputShape);
+  EXPECT_EQ(bret, true);
+
+  float expectData[] = { 10.0f };
+  bret = compareData(outputData, expectData, outputShape);
+  EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, convFloat32_3x3to3x3)
+{
+  float inputData[9];
+  const android::nn::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  float filterData[9];
+  const android::nn::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  float biasData[1] = { 1.0 };
+  const android::nn::Shape biasShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+  int32_t padding_left = 1;
+  int32_t padding_right = 1;
+  int32_t padding_top = 1;
+  int32_t padding_bottom = 1;
+  int32_t stride_width = 1;
+  int32_t stride_height = 1;
+  int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+  float outputData[9];
+  const android::nn::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+  bool bret;
 
-  // TODO compare outputData with some expected data
+  initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+  initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+  initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
 
+  bret = convFloat32(inputData, inputShape,
+                     filterData, filterShape,
+                     biasData, biasShape,
+                     padding_left, padding_right,
+                     padding_top, padding_bottom,
+                     stride_width, stride_height,
+                     activation,
+                     outputData, outputShape);
+  EXPECT_EQ(bret, true);
+
+  float expectData[] = {
+    5.0f, 7.0f, 5.0f,
+    7.0f, 10.0f, 7.0f,
+    5.0f, 7.0f, 5.0f
+  };
+  bret = compareData(outputData, expectData, outputShape);
   EXPECT_EQ(bret, true);
 }
+
+class TestEnvironment : public ::testing::Environment
+{
+public:
+  virtual ~TestEnvironment() = default;
+
+  virtual void SetUp()
+  {
+    nnfw::kernel::acl::Initialize();
+  }
+
+  virtual void TearDown()
+  {
+    // DO NOTHING
+  }
+};
+
+static ::testing::Environment* const testingenv =
+  ::testing::AddGlobalTestEnvironment(new TestEnvironment);
diff --git a/src/kernel/acl/src/IO_accessor.cpp b/src/kernel/acl/src/IO_accessor.cpp
new file mode 100644 (file)
index 0000000..441ef47
--- /dev/null
@@ -0,0 +1,93 @@
+#include "IO_accessor.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+InputAccessor::InputAccessor(const float* inputData, const android::nn::Shape& inputShape)
+  : _inputData(inputData)
+  , _inputShape(inputShape)
+{
+}
+
+WeightAccessor::WeightAccessor(const float* filterData, const android::nn::Shape& filterShape)
+  : _filterData(filterData)
+  , _filterShape(filterShape)
+{
+}
+
+BiasAccessor::BiasAccessor(const float* biasData, const android::nn::Shape& biasShape)
+  : _biasData(biasData)
+  , _biasShape(biasShape)
+{
+}
+
+OutputAccessor::OutputAccessor(float* outputData, const android::nn::Shape& outputShape)
+  : _outputData(outputData)
+  , _outputShape(outputShape)
+{
+}
+
+bool InputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+  arm_compute::Window window;
+  window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+  execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+  {
+    uint32_t width  = android::nn::getSizeOfDimension(_inputShape, 2);
+    uint32_t offset = id.y() * width + id.x();
+    *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+        *(_inputData + offset);
+  });
+  return true;
+}
+
+bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+  arm_compute::Window window;
+  window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+  execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+  {
+    uint32_t width  = android::nn::getSizeOfDimension(_filterShape, 2);
+    uint32_t offset = id.y() * width + id.x();
+    *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+        *(_filterData + offset);
+  });
+  return true;
+}
+
+bool BiasAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+  arm_compute::Window window;
+  window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+  execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+  {
+    uint32_t width  = android::nn::getSizeOfDimension(_biasShape, 2);
+    uint32_t offset = id.y() * width + id.x();
+    *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+        *(_biasData + offset);
+  });
+  return true;
+}
+
+bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+  arm_compute::Window window;
+  window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+  execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+  {
+    uint32_t width  = android::nn::getSizeOfDimension(_outputShape, 2);
+    uint32_t offset = id.y() * width + id.x();
+    *(_outputData + offset) =
+        *reinterpret_cast<float *>(tensor.ptr_to_element(id));
+  });
+  return false; // end the network
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/src/kernel/acl/src/IO_accessor.h b/src/kernel/acl/src/IO_accessor.h
new file mode 100644 (file)
index 0000000..476e079
--- /dev/null
@@ -0,0 +1,93 @@
+#ifndef __NNFW_KERNEL_ACL_IO_ACCESSOR_H__
+#define __NNFW_KERNEL_ACL_IO_ACCESSOR_H__
+
+#include <arm_compute/graph/ITensorAccessor.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+#include <OperationsUtils.h> // for android::nn::Shape
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+class InputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+    InputAccessor(const float* inputData, const android::nn::Shape& inputShape);
+    InputAccessor(InputAccessor&&) = default;
+
+    // Inherited methods overriden:
+    bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+    const float* _inputData;
+    const android::nn::Shape& _inputShape;
+};
+
+class WeightAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+    WeightAccessor(const float* filterData, const android::nn::Shape& filterShape);
+    WeightAccessor(WeightAccessor&&) = default;
+
+    // Inherited methods overriden:
+    bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+    const float* _filterData;
+    const android::nn::Shape& _filterShape;
+};
+
+class BiasAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+    BiasAccessor(const float* biasData, const android::nn::Shape& biasShape);
+    BiasAccessor(BiasAccessor&&) = default;
+
+    // Inherited methods overriden:
+    bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+    const float* _biasData;
+    const android::nn::Shape& _biasShape;
+};
+
+class OutputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+    OutputAccessor(float* outputData, const android::nn::Shape& outputShape);
+    OutputAccessor(OutputAccessor&&) = default;
+
+    // Inherited methods overriden:
+    bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+    float* _outputData;
+    const android::nn::Shape& _outputShape;
+};
+
+template<typename AccessorType>
+inline void TensorAccess(arm_compute::CLTensor& tensor, const float* data,
+                         const android::nn::Shape& shape)
+{
+  tensor.map();
+  AccessorType accessor(data, shape);
+  accessor.access_tensor(tensor);
+  tensor.unmap();
+}
+
+template<typename AccessorType>
+inline void TensorAccess(arm_compute::CLTensor& tensor, float* data,
+                         const android::nn::Shape& shape)
+{
+  tensor.map();
+  AccessorType accessor(data, shape);
+  accessor.access_tensor(tensor);
+  tensor.unmap();
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_IO_ACCESSOR_H__