Labs convolution with acl runtime (#500)

author 박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>

Mon, 9 Apr 2018 04:10:49 +0000 (13:10 +0900)

committer 김정현/동작제어Lab(SR)/Senior Engineer/삼성전자 <jh0822.kim@samsung.com>

Mon, 9 Apr 2018 04:10:49 +0000 (13:10 +0900)
author 박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Mon, 9 Apr 2018 04:10:49 +0000 (13:10 +0900)
committer 김정현/동작제어Lab(SR)/Senior Engineer/삼성전자 <jh0822.kim@samsung.com>
Mon, 9 Apr 2018 04:10:49 +0000 (13:10 +0900)
diff --git a/labs/kerneltesting/conv2d/io_accessor.h b/labs/kerneltesting/conv2d/io_accessor.h

index 71ab069..60940a4 100644 (file)
--- a/labs/kerneltesting/conv2d/io_accessor.h
+++ b/labs/kerneltesting/conv2d/io_accessor.h
@@ -2,6 +2,8 @@
  #define __CONV2D_IO_ACCESSOR_H__
  
  #include <arm_compute/graph/ITensorAccessor.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
  #include "types.h"
  
  class InputAccessor : public arm_compute::graph::ITensorAccessor
diff --git a/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp b/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp

index 4463d61..cfceda0 100644 (file)
--- a/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp
+++ b/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp
@@ -13,6 +13,9 @@
  #include <arm_compute/graph/Graph.h>
  #include <arm_compute/graph/Nodes.h>
  
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/CL/functions/CLConvolution.h>
+
  #include "io_accessor.h"
  #include "util/environment.h"
  
@@ -93,6 +96,8 @@ using arm_compute::graph::Graph;
  using arm_compute::TensorInfo;
  using arm_compute::TensorShape;
  
+namespace acl_graph {
+
  bool convFloat32(const float* inputData, const Shape& inputShape,
                   const float* filterData, const Shape& filterShape,
                   const float* biasData, const Shape& biasShape,
@@ -100,14 +105,8 @@ bool convFloat32(const float* inputData, const Shape& inputShape,
                   int32_t padding_top, int32_t padding_bottom,
                   int32_t stride_width, int32_t stride_height,
                   int32_t activation,
-                 float* outputData, const Shape& outputShape,
-                 bool useACL) {
-  if (!useACL)
-    return convFloat32(inputData, inputShape, filterData, filterShape,
-                       biasData, biasShape, padding_left, padding_right,
-                       padding_top, padding_bottom, stride_width, stride_height,
-                       activation, outputData, outputShape);
-
+                 float* outputData, const Shape& outputShape)
+{
    // Try with simple build-run with ACL Layer
    arm_compute::graph::Graph graph;
  
@@ -154,6 +153,154 @@ bool convFloat32(const float* inputData, const Shape& inputShape,
    return true;
  }
  
+} // namespace acl_graph
+
+//-----------------------------------------------------------------------------
+
+namespace acl_runtime {
+
+TensorShape calculate_convolution_layer_output_shape(
+    const arm_compute::TensorShape &input_shape,
+    const arm_compute::TensorShape &weights_shape,
+    const arm_compute::PadStrideInfo &conv_info)
+{
+    unsigned int output_width  = 0;
+    unsigned int output_height = 0;
+
+    // Get output width and height
+    std::tie(output_width, output_height) =
+        arm_compute::scaled_dimensions(
+            input_shape.x(), input_shape.y(),
+            weights_shape.x(), weights_shape.y(),
+            conv_info);
+
+    // Create output shape
+    TensorShape output_shape = input_shape;
+    output_shape.set(0, output_width);
+    output_shape.set(1, output_height);
+    output_shape.set(2, weights_shape[3]);
+
+    return output_shape;
+}
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+                 const float* filterData, const Shape& filterShape,
+                 const float* biasData, const Shape& biasShape,
+                 int32_t padding_left, int32_t padding_right,
+                 int32_t padding_top, int32_t padding_bottom,
+                 int32_t stride_width, int32_t stride_height,
+                 int32_t activation,
+                 float* outputData, const Shape& outputShape)
+{
+  arm_compute::CLScheduler::get().default_init();
+
+  uint32_t tsi_c = getSizeOfDimension(inputShape, 0);
+  uint32_t tsi_h = getSizeOfDimension(inputShape, 1);
+  uint32_t tsi_w = getSizeOfDimension(inputShape, 2);
+  uint32_t tsi_n = getSizeOfDimension(inputShape, 3);
+
+  uint32_t tsk_h = getSizeOfDimension(filterShape, 1);
+  uint32_t tsk_w = getSizeOfDimension(filterShape, 2);
+  uint32_t tsk_n = getSizeOfDimension(filterShape, 3);
+
+  TensorShape input_shape = TensorShape(tsi_w, tsi_h, tsi_c, tsi_n);
+  TensorShape filter_shape = TensorShape(tsi_w, tsi_h, tsi_c, tsi_n);
+  arm_compute::PadStrideInfo conv_info =
+      arm_compute::PadStrideInfo(stride_width, stride_height, padding_top, padding_bottom);
+
+  TensorShape output_shape = calculate_convolution_layer_output_shape(
+                                input_shape, filter_shape, conv_info);
+
+  uint32_t tso_c = output_shape[0];
+  uint32_t tso_w = output_shape[1];
+  uint32_t tso_h = output_shape[2];
+  uint32_t tso_n = output_shape[3];
+
+  arm_compute::CLTensor input, output, bias, filter;
+
+  input.allocator()->init(TensorInfo(tsi_w, tsi_h, arm_compute::Format::F32));
+  output.allocator()->init(TensorInfo(tso_w, tso_h, arm_compute::Format::F32));
+  bias.allocator()->init(TensorInfo(tso_w, tso_h, arm_compute::Format::F32));
+  filter.allocator()->init(TensorInfo(tsk_w, tsk_h, arm_compute::Format::F32));
+
+  input.allocator()->allocate();
+  output.allocator()->allocate();
+  bias.allocator()->allocate();
+  filter.allocator()->allocate();
+
+  input.map();
+  InputAccessor ia(inputData, inputShape);
+  ia.access_tensor(input);
+  input.unmap();
+
+  bias.map();
+  BiasAccessor ba(biasData, biasShape);
+  ba.access_tensor(bias);
+  bias.unmap();
+
+  filter.map();
+  WeightAccessor fa(filterData, filterShape);
+  fa.access_tensor(filter);
+  filter.unmap();
+
+  arm_compute::CLConvolutionLayer conv_f;
+  conv_f.configure(&input, &filter, &bias, &output, conv_info);
+
+  arm_compute::CLScheduler::get().sync();
+
+  conv_f.run();
+
+  output.map();
+  OutputAccessor oa(outputData, outputShape);
+  oa.access_tensor(output);
+  output.unmap();
+
+  return true;
+}
+
+} // namespace acl_runtime
+
+//-----------------------------------------------------------------------------
+
+enum COMPUTE_TYPE {
+  COMPUTE_DEFAULT = 0,
+  COMPUTE_ACLGRAPH,
+  COMPUTE_ACLRT
+};
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+                 const float* filterData, const Shape& filterShape,
+                 const float* biasData, const Shape& biasShape,
+                 int32_t padding_left, int32_t padding_right,
+                 int32_t padding_top, int32_t padding_bottom,
+                 int32_t stride_width, int32_t stride_height,
+                 int32_t activation,
+                 float* outputData, const Shape& outputShape,
+                 COMPUTE_TYPE compType) {
+
+  switch (compType)
+  {
+  case COMPUTE_DEFAULT :
+    return convFloat32(inputData, inputShape, filterData, filterShape,
+                       biasData, biasShape, padding_left, padding_right,
+                       padding_top, padding_bottom, stride_width, stride_height,
+                       activation, outputData, outputShape);
+
+  case COMPUTE_ACLGRAPH :
+    return acl_graph::convFloat32(inputData, inputShape, filterData, filterShape,
+                       biasData, biasShape, padding_left, padding_right,
+                       padding_top, padding_bottom, stride_width, stride_height,
+                       activation, outputData, outputShape);
+
+  case COMPUTE_ACLRT :
+    return acl_runtime::convFloat32(inputData, inputShape, filterData, filterShape,
+                       biasData, biasShape, padding_left, padding_right,
+                       padding_top, padding_bottom, stride_width, stride_height,
+                       activation, outputData, outputShape);
+  }
+  return false;
+}
+
  //-----------------------------------------------------------------------------
  
  void dumpData(const char* name, const float* data, const Shape& shape)
@@ -206,7 +353,7 @@ bool compareData(const float* result, const float* expected, const Shape& shape)
    return true;
  }
  
-int test_3x3_1x1_one(void)
+int test_3x3_1x1_one(COMPUTE_TYPE comptype)
  {
    float inputData[9];
    const Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
@@ -239,7 +386,7 @@ int test_3x3_1x1_one(void)
                       stride_width, stride_height,
                       activation,
                       expectData, outputShape,
-                     false);
+                     COMPUTE_DEFAULT);
  
    bret = convFloat32(inputData, inputShape,
                       filterData, filterShape,
@@ -249,7 +396,7 @@ int test_3x3_1x1_one(void)
                       stride_width, stride_height,
                       activation,
                       outputData, outputShape,
-                     true);
+                     comptype);
  
    dumpData("Input  ", inputData, inputShape);
    dumpData("Filter ", filterData, filterShape);
@@ -303,7 +450,7 @@ int test_3x3_3x3_one(void)
                       stride_width, stride_height,
                       activation,
                       expectData, outputShape,
-                     false);
+                     COMPUTE_DEFAULT);
  
    bret = convFloat32(inputData, inputShape,
                       filterData, filterShape,
@@ -313,7 +460,7 @@ int test_3x3_3x3_one(void)
                       stride_width, stride_height,
                       activation,
                       outputData, outputShape,
-                     true);
+                     COMPUTE_ACLGRAPH);
  
    dumpData("Input  ", inputData, inputShape);
    dumpData("Filter ", filterData, filterShape);
@@ -367,7 +514,7 @@ int test_3x3_3x3_seq(void)
                       stride_width, stride_height,
                       activation,
                       expectData, outputShape,
-                     false);
+                     COMPUTE_DEFAULT);
  
    bret = convFloat32(inputData, inputShape,
                       filterData, filterShape,
@@ -377,7 +524,7 @@ int test_3x3_3x3_seq(void)
                       stride_width, stride_height,
                       activation,
                       outputData, outputShape,
-                     true);
+                     COMPUTE_ACLGRAPH);
  
    dumpData("Input  ", inputData, inputShape);
    dumpData("Filter ", filterData, filterShape);
@@ -403,7 +550,9 @@ int main(int argc, char* argv[])
    int result;
  
    // input 3x3, output 1x1, all data 1.0
-  result = test_3x3_1x1_one();
+  result = test_3x3_1x1_one(COMPUTE_ACLGRAPH);
+  if (result) return result;
+  result = test_3x3_1x1_one(COMPUTE_ACLRT);
    if (result) return result;
  
    // input 3x3, output 3x3, all data 1.0
author	박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
	Mon, 9 Apr 2018 04:10:49 +0000 (13:10 +0900)
committer	김정현/동작제어Lab(SR)/Senior Engineer/삼성전자 <jh0822.kim@samsung.com>
	Mon, 9 Apr 2018 04:10:49 +0000 (13:10 +0900)
labs/kerneltesting/conv2d/io_accessor.h		patch \| blob \| history
labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp		patch \| blob \| history