From 3b83d7da2418c95011eece73be337525e9ac3e2d Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EB=B0=95=EC=84=B8=ED=9D=AC/=EB=8F=99=EC=9E=91=EC=A0=9C?=
 =?utf8?q?=EC=96=B4Lab=28SR=29/Principal=20Engineer/=EC=82=BC=EC=84=B1?=
 =?utf8?q?=EC=A0=84=EC=9E=90?= <saehie.park@samsung.com>
Date: Mon, 9 Apr 2018 13:10:49 +0900
Subject: [PATCH] Labs convolution with acl runtime (#500)

This will update labs convolution test using acl runtime layer

Signed-off-by: SaeHie Park <saehie.park@samsung.com>
---
 labs/kerneltesting/conv2d/io_accessor.h        |   2 +
 labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp | 181 ++++++++++++++++++++++---
 2 files changed, 167 insertions(+), 16 deletions(-)

diff --git a/labs/kerneltesting/conv2d/io_accessor.h b/labs/kerneltesting/conv2d/io_accessor.h
index 71ab069..60940a4 100644
--- a/labs/kerneltesting/conv2d/io_accessor.h
+++ b/labs/kerneltesting/conv2d/io_accessor.h
@@ -2,6 +2,8 @@
 #define __CONV2D_IO_ACCESSOR_H__
 
 #include <arm_compute/graph/ITensorAccessor.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
 #include "types.h"
 
 class InputAccessor : public arm_compute::graph::ITensorAccessor
diff --git a/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp b/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp
index 4463d61..cfceda0 100644
--- a/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp
+++ b/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp
@@ -13,6 +13,9 @@
 #include <arm_compute/graph/Graph.h>
 #include <arm_compute/graph/Nodes.h>
 
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/CL/functions/CLConvolution.h>
+
 #include "io_accessor.h"
 #include "util/environment.h"
 
@@ -93,6 +96,8 @@ using arm_compute::graph::Graph;
 using arm_compute::TensorInfo;
 using arm_compute::TensorShape;
 
+namespace acl_graph {
+
 bool convFloat32(const float* inputData, const Shape& inputShape,
                  const float* filterData, const Shape& filterShape,
                  const float* biasData, const Shape& biasShape,
@@ -100,14 +105,8 @@ bool convFloat32(const float* inputData, const Shape& inputShape,
                  int32_t padding_top, int32_t padding_bottom,
                  int32_t stride_width, int32_t stride_height,
                  int32_t activation,
-                 float* outputData, const Shape& outputShape,
-                 bool useACL) {
-  if (!useACL)
-    return convFloat32(inputData, inputShape, filterData, filterShape,
-                       biasData, biasShape, padding_left, padding_right,
-                       padding_top, padding_bottom, stride_width, stride_height,
-                       activation, outputData, outputShape);
-
+                 float* outputData, const Shape& outputShape)
+{
   // Try with simple build-run with ACL Layer
   arm_compute::graph::Graph graph;
 
@@ -154,6 +153,154 @@ bool convFloat32(const float* inputData, const Shape& inputShape,
   return true;
 }
 
+} // namespace acl_graph
+
+//-----------------------------------------------------------------------------
+
+namespace acl_runtime {
+
+TensorShape calculate_convolution_layer_output_shape(
+    const arm_compute::TensorShape &input_shape,
+    const arm_compute::TensorShape &weights_shape,
+    const arm_compute::PadStrideInfo &conv_info)
+{
+    unsigned int output_width  = 0;
+    unsigned int output_height = 0;
+
+    // Get output width and height
+    std::tie(output_width, output_height) =
+        arm_compute::scaled_dimensions(
+            input_shape.x(), input_shape.y(),
+            weights_shape.x(), weights_shape.y(),
+            conv_info);
+
+    // Create output shape
+    TensorShape output_shape = input_shape;
+    output_shape.set(0, output_width);
+    output_shape.set(1, output_height);
+    output_shape.set(2, weights_shape[3]);
+
+    return output_shape;
+}
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+                 const float* filterData, const Shape& filterShape,
+                 const float* biasData, const Shape& biasShape,
+                 int32_t padding_left, int32_t padding_right,
+                 int32_t padding_top, int32_t padding_bottom,
+                 int32_t stride_width, int32_t stride_height,
+                 int32_t activation,
+                 float* outputData, const Shape& outputShape)
+{
+  arm_compute::CLScheduler::get().default_init();
+
+  uint32_t tsi_c = getSizeOfDimension(inputShape, 0);
+  uint32_t tsi_h = getSizeOfDimension(inputShape, 1);
+  uint32_t tsi_w = getSizeOfDimension(inputShape, 2);
+  uint32_t tsi_n = getSizeOfDimension(inputShape, 3);
+
+  uint32_t tsk_h = getSizeOfDimension(filterShape, 1);
+  uint32_t tsk_w = getSizeOfDimension(filterShape, 2);
+  uint32_t tsk_n = getSizeOfDimension(filterShape, 3);
+
+  TensorShape input_shape = TensorShape(tsi_w, tsi_h, tsi_c, tsi_n);
+  TensorShape filter_shape = TensorShape(tsi_w, tsi_h, tsi_c, tsi_n);
+  arm_compute::PadStrideInfo conv_info =
+      arm_compute::PadStrideInfo(stride_width, stride_height, padding_top, padding_bottom);
+
+  TensorShape output_shape = calculate_convolution_layer_output_shape(
+                                input_shape, filter_shape, conv_info);
+
+  uint32_t tso_c = output_shape[0];
+  uint32_t tso_w = output_shape[1];
+  uint32_t tso_h = output_shape[2];
+  uint32_t tso_n = output_shape[3];
+
+  arm_compute::CLTensor input, output, bias, filter;
+
+  input.allocator()->init(TensorInfo(tsi_w, tsi_h, arm_compute::Format::F32));
+  output.allocator()->init(TensorInfo(tso_w, tso_h, arm_compute::Format::F32));
+  bias.allocator()->init(TensorInfo(tso_w, tso_h, arm_compute::Format::F32));
+  filter.allocator()->init(TensorInfo(tsk_w, tsk_h, arm_compute::Format::F32));
+
+  input.allocator()->allocate();
+  output.allocator()->allocate();
+  bias.allocator()->allocate();
+  filter.allocator()->allocate();
+
+  input.map();
+  InputAccessor ia(inputData, inputShape);
+  ia.access_tensor(input);
+  input.unmap();
+
+  bias.map();
+  BiasAccessor ba(biasData, biasShape);
+  ba.access_tensor(bias);
+  bias.unmap();
+
+  filter.map();
+  WeightAccessor fa(filterData, filterShape);
+  fa.access_tensor(filter);
+  filter.unmap();
+
+  arm_compute::CLConvolutionLayer conv_f;
+  conv_f.configure(&input, &filter, &bias, &output, conv_info);
+
+  arm_compute::CLScheduler::get().sync();
+
+  conv_f.run();
+
+  output.map();
+  OutputAccessor oa(outputData, outputShape);
+  oa.access_tensor(output);
+  output.unmap();
+
+  return true;
+}
+
+} // namespace acl_runtime
+
+//-----------------------------------------------------------------------------
+
+enum COMPUTE_TYPE {
+  COMPUTE_DEFAULT = 0,
+  COMPUTE_ACLGRAPH,
+  COMPUTE_ACLRT
+};
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+                 const float* filterData, const Shape& filterShape,
+                 const float* biasData, const Shape& biasShape,
+                 int32_t padding_left, int32_t padding_right,
+                 int32_t padding_top, int32_t padding_bottom,
+                 int32_t stride_width, int32_t stride_height,
+                 int32_t activation,
+                 float* outputData, const Shape& outputShape,
+                 COMPUTE_TYPE compType) {
+
+  switch (compType)
+  {
+  case COMPUTE_DEFAULT :
+    return convFloat32(inputData, inputShape, filterData, filterShape,
+                       biasData, biasShape, padding_left, padding_right,
+                       padding_top, padding_bottom, stride_width, stride_height,
+                       activation, outputData, outputShape);
+
+  case COMPUTE_ACLGRAPH :
+    return acl_graph::convFloat32(inputData, inputShape, filterData, filterShape,
+                       biasData, biasShape, padding_left, padding_right,
+                       padding_top, padding_bottom, stride_width, stride_height,
+                       activation, outputData, outputShape);
+
+  case COMPUTE_ACLRT :
+    return acl_runtime::convFloat32(inputData, inputShape, filterData, filterShape,
+                       biasData, biasShape, padding_left, padding_right,
+                       padding_top, padding_bottom, stride_width, stride_height,
+                       activation, outputData, outputShape);
+  }
+  return false;
+}
+
 //-----------------------------------------------------------------------------
 
 void dumpData(const char* name, const float* data, const Shape& shape)
@@ -206,7 +353,7 @@ bool compareData(const float* result, const float* expected, const Shape& shape)
   return true;
 }
 
-int test_3x3_1x1_one(void)
+int test_3x3_1x1_one(COMPUTE_TYPE comptype)
 {
   float inputData[9];
   const Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
@@ -239,7 +386,7 @@ int test_3x3_1x1_one(void)
                      stride_width, stride_height,
                      activation,
                      expectData, outputShape,
-                     false);
+                     COMPUTE_DEFAULT);
 
   bret = convFloat32(inputData, inputShape,
                      filterData, filterShape,
@@ -249,7 +396,7 @@ int test_3x3_1x1_one(void)
                      stride_width, stride_height,
                      activation,
                      outputData, outputShape,
-                     true);
+                     comptype);
 
   dumpData("Input  ", inputData, inputShape);
   dumpData("Filter ", filterData, filterShape);
@@ -303,7 +450,7 @@ int test_3x3_3x3_one(void)
                      stride_width, stride_height,
                      activation,
                      expectData, outputShape,
-                     false);
+                     COMPUTE_DEFAULT);
 
   bret = convFloat32(inputData, inputShape,
                      filterData, filterShape,
@@ -313,7 +460,7 @@ int test_3x3_3x3_one(void)
                      stride_width, stride_height,
                      activation,
                      outputData, outputShape,
-                     true);
+                     COMPUTE_ACLGRAPH);
 
   dumpData("Input  ", inputData, inputShape);
   dumpData("Filter ", filterData, filterShape);
@@ -367,7 +514,7 @@ int test_3x3_3x3_seq(void)
                      stride_width, stride_height,
                      activation,
                      expectData, outputShape,
-                     false);
+                     COMPUTE_DEFAULT);
 
   bret = convFloat32(inputData, inputShape,
                      filterData, filterShape,
@@ -377,7 +524,7 @@ int test_3x3_3x3_seq(void)
                      stride_width, stride_height,
                      activation,
                      outputData, outputShape,
-                     true);
+                     COMPUTE_ACLGRAPH);
 
   dumpData("Input  ", inputData, inputShape);
   dumpData("Filter ", filterData, filterShape);
@@ -403,7 +550,9 @@ int main(int argc, char* argv[])
   int result;
 
   // input 3x3, output 1x1, all data 1.0
-  result = test_3x3_1x1_one();
+  result = test_3x3_1x1_one(COMPUTE_ACLGRAPH);
+  if (result) return result;
+  result = test_3x3_1x1_one(COMPUTE_ACLRT);
   if (result) return result;
 
   // input 3x3, output 3x3, all data 1.0
-- 
2.7.4