From 703db79b8efc5be3d9d1517e7b0a04806366c07a Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EA=B9=80=EC=88=98=EC=A7=84/On-Device=20Lab=28SR=29/Enginee?=
 =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= <sjsujin.kim@samsung.com>
Date: Tue, 19 Feb 2019 10:11:39 +0900
Subject: [PATCH] [neurun] Use legacy tflite method in ConvolutionLayer for cpu
 (#4416)

As we've updated the tensorflow to v1.12, `ConvolutionLayer` kernel for `cpu` was updated.

However,`ConvolutionLayer` of cpu kernel for `quant8` makes wrong matching results when running genereated tests.
Legacy methods of that work well for `quant8`.

`Android NN` master barnch Conv2D also keep using legacy methods.
For consistency and being useful to support `quant8`, we can use legacy methods instead of newly added methods.

(Added TODO comment about changing to new version)

Signed-off-by: sjsujinkim <sjsujin.kim@samsung.com>
---
 .../src/backend/cpu/kernel/ConvolutionLayer.cc     | 164 ++++++++++-----------
 .../nnapi/nnapi_gtest.skip.armv7l-linux.neurun.cpu |   9 --
 2 files changed, 79 insertions(+), 94 deletions(-)
diff --git a/runtimes/neurun/src/backend/cpu/kernel/ConvolutionLayer.cc b/runtimes/neurun/src/backend/cpu/kernel/ConvolutionLayer.cc
index e341862..f8cfe1d 100644
--- a/runtimes/neurun/src/backend/cpu/kernel/ConvolutionLayer.cc
+++ b/runtimes/neurun/src/backend/cpu/kernel/ConvolutionLayer.cc
@@ -16,7 +16,8 @@
 
 #include "ConvolutionLayer.h"
 
-#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+// TODO : Discard legacy methods
+#include "tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h"
 #include "OperationUtils.h"
 
 #include <mutex>
@@ -35,51 +36,56 @@ static constexpr int kStaticBufferSize = 1605632;
 static char static_scratch_buffer[kStaticBufferSize];
 static std::mutex executionMutex;
 
-#define ANDROID_NN_CONV_PARAMETERS(Type)                                     \
-  uint32_t height = getSizeOfDimension(_inputShape, 1);                      \
-  uint32_t width = getSizeOfDimension(_inputShape, 2);                       \
-  uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1);               \
-  uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2);                \
-  uint32_t outHeight = getSizeOfDimension(_outputShape, 1);                  \
-  uint32_t outWidth = getSizeOfDimension(_outputShape, 2);                   \
-  uint32_t inDepth = getSizeOfDimension(_inputShape, 3);                     \
-                                                                             \
-  uint32_t paddingHeight = (uint32_t)_paddingTop;                            \
-  uint32_t paddingWidth = (uint32_t)_paddingLeft;                            \
-                                                                             \
-  Shape im2colShape;                                                         \
-  im2colShape.dimensions.resize(4);                                          \
-  im2colShape.dimensions[0] = getSizeOfDimension(_outputShape, 0);           \
-  im2colShape.dimensions[1] = getSizeOfDimension(_outputShape, 1);           \
-  im2colShape.dimensions[2] = getSizeOfDimension(_outputShape, 2);           \
-  im2colShape.dimensions[3] = inDepth * kernelHeight * kernelWidth;          \
-                                                                             \
-  Type *im2colData = nullptr;                                                \
-  uint64_t im2colByteSize = sizeof(Type);                                    \
-  std::unique_ptr<Type[]> im2colGuard;                                       \
-  for (int i = 0; i < 4; i++)                                                \
-  {                                                                          \
-    im2colByteSize *= im2colShape.dimensions[i];                             \
-  }                                                                          \
-  /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */  \
-  if (im2colByteSize >= 0x7fffffff)                                          \
-  {                                                                          \
-    std::cout << "Conv size is too large, not enough memory" << std::endl;   \
-    return false;                                                            \
-  }                                                                          \
-  if (im2colByteSize <= kStaticBufferSize)                                   \
-  {                                                                          \
-    im2colData = reinterpret_cast<Type *>(static_scratch_buffer);            \
-  }                                                                          \
-  else                                                                       \
-  {                                                                          \
-    im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)];     \
-    if (im2colData == nullptr)                                               \
-    {                                                                        \
-      std::cout << "Conv size is too large, not enough memory" << std::endl; \
-      return false;                                                          \
-    }                                                                        \
-    im2colGuard.reset(im2colData);                                           \
+#define ANDROID_NN_CONV_PARAMETERS(Type)                                      \
+  uint32_t height = getSizeOfDimension(_inputShape, 1);                       \
+  uint32_t width = getSizeOfDimension(_inputShape, 2);                        \
+  uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1);                \
+  uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2);                 \
+  uint32_t outHeight = getSizeOfDimension(_outputShape, 1);                   \
+  uint32_t outWidth = getSizeOfDimension(_outputShape, 2);                    \
+  uint32_t inDepth = getSizeOfDimension(_inputShape, 3);                      \
+                                                                              \
+  uint32_t paddingHeight = (uint32_t)_paddingTop;                             \
+  uint32_t paddingWidth = (uint32_t)_paddingLeft;                             \
+                                                                              \
+  tflite::Dims<4> im2colDim;                                                  \
+  im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0);              \
+  im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1);              \
+  im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2);              \
+  im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth;             \
+                                                                              \
+  im2colDim.strides[0] = 1;                                                   \
+  for (int i = 1; i < 4; i++)                                                 \
+  {                                                                           \
+    im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \
+  }                                                                           \
+                                                                              \
+  Type *im2colData = nullptr;                                                 \
+  uint64_t im2colByteSize = sizeof(Type);                                     \
+  std::unique_ptr<Type[]> im2colGuard;                                        \
+  for (int i = 0; i < 4; i++)                                                 \
+  {                                                                           \
+    im2colByteSize *= im2colDim.sizes[i];                                     \
+  }                                                                           \
+  /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */   \
+  if (im2colByteSize >= 0x7fffffff)                                           \
+  {                                                                           \
+    std::cout << "Conv size is too large, not enough memory" << std::endl;    \
+    return false;                                                             \
+  }                                                                           \
+  if (im2colByteSize <= kStaticBufferSize)                                    \
+  {                                                                           \
+    im2colData = reinterpret_cast<Type *>(static_scratch_buffer);             \
+  }                                                                           \
+  else                                                                        \
+  {                                                                           \
+    im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)];      \
+    if (im2colData == nullptr)                                                \
+    {                                                                         \
+      std::cout << "Conv size is too large, not enough memory" << std::endl;  \
+      return false;                                                           \
+    }                                                                         \
+    im2colGuard.reset(im2colData);                                            \
   }
 
 ConvolutionLayer::ConvolutionLayer()
@@ -111,22 +117,12 @@ bool ConvolutionLayer::convFloat32()
   CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
   int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
 
-  ::tflite::ConvParams op_params;
-  op_params.padding_type = ::tflite::PaddingType::kSame;
-  op_params.padding_values.width = paddingWidth;
-  op_params.padding_values.height = paddingHeight;
-  op_params.stride_width = _strideWidth;
-  op_params.stride_height = _strideHeight;
-  op_params.dilation_width_factor = dilationWidthFactor;
-  op_params.dilation_height_factor = dilationHeightFactor;
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  ::tflite::optimized_ops::Conv(op_params, convertShapeToTFLiteShape(_inputShape), _inputData.f,
-                                convertShapeToTFLiteShape(_kernelShape), _kernelData.f,
-                                convertShapeToTFLiteShape(_biasShape), _biasData.f,
-                                convertShapeToTFLiteShape(_outputShape), _outputData.f,
-                                convertShapeToTFLiteShape(im2colShape), im2colDataToPass);
+  tflite::optimized_ops::Conv(
+      _inputData.f, convertShapeToDims(_inputShape), _kernelData.f,
+      convertShapeToDims(_kernelShape), _biasData.f, convertShapeToDims(_biasShape), _strideWidth,
+      _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight,
+      output_activation_min, output_activation_max, _outputData.f, convertShapeToDims(_outputShape),
+      im2colDataToPass, im2colDim);
   return true;
 }
 
@@ -142,6 +138,19 @@ bool ConvolutionLayer::convQuant8()
   int32_t output_shift = 0;
   int32_t output_activation_min = 0;
   int32_t output_activation_max = 0;
+
+  const ::tflite::Dims<4> &kernel_dim = convertShapeToDims(_kernelShape);
+  const int kernel_width = ArraySize(kernel_dim, 1);
+  const int kernel_height = ArraySize(kernel_dim, 2);
+  const bool need_im2col =
+      _strideWidth != 1 || _strideHeight != 1 || kernel_width != 1 || kernel_height != 1;
+
+  uint8_t *im2colDataToPass = nullptr;
+  if (need_im2col)
+  {
+    im2colDataToPass = im2colData;
+  }
+
   if (!GetQuantizedConvolutionMultipler(_inputShape, _kernelShape, _biasShape, _outputShape,
                                         &real_multiplier) ||
       !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
@@ -152,33 +161,19 @@ bool ConvolutionLayer::convQuant8()
                                 &output_activation_max);
   int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
 
-  ::tflite::ConvParams op_params;
-  op_params.padding_type = ::tflite::PaddingType::kSame;
-  op_params.padding_values.width = paddingWidth;
-  op_params.padding_values.height = paddingHeight;
-  op_params.stride_width = _strideWidth;
-  op_params.stride_height = _strideHeight;
-  op_params.dilation_width_factor = dilationWidthFactor;
-  op_params.dilation_height_factor = dilationHeightFactor;
-  op_params.input_offset = inputOffset;
-  op_params.weights_offset = kernelOffset;
-  op_params.output_offset = outputOffset;
-  op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
   static gemmlowp::GemmContext gemm_context;
   // Prevent concurrent executions that may access the scratch buffer and
   // gemm_context.
   std::unique_lock<std::mutex> lock(executionMutex);
   // Alow gemmlowp automatically decide how many threads to use.
   gemm_context.set_max_num_threads(0);
-  ::tflite::optimized_ops::Conv(op_params, convertShapeToTFLiteShape(_inputShape), _inputData.u8,
-                                convertShapeToTFLiteShape(_kernelShape), _kernelData.u8,
-                                convertShapeToTFLiteShape(_biasShape), _biasData.i32,
-                                convertShapeToTFLiteShape(_outputShape), _outputData.u8,
-                                convertShapeToTFLiteShape(im2colShape), im2colData, &gemm_context);
+  tflite::optimized_ops::Conv(
+      _inputData.u8, convertShapeToDims(_inputShape), inputOffset, _kernelData.u8,
+      convertShapeToDims(_kernelShape), kernelOffset, _biasData.i32, convertShapeToDims(_biasShape),
+      _strideWidth, _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth,
+      paddingHeight, outputOffset, output_multiplier, output_shift, output_activation_min,
+      output_activation_max, _outputData.u8, convertShapeToDims(_outputShape), im2colDataToPass,
+      im2colDim, &gemm_context);
   return true;
 }
 
@@ -216,8 +211,7 @@ void ConvolutionLayer::run()
   }
   else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
   {
-    throw std::runtime_error{"ConvolutionLayer : Not tested for TENSOR_QUANT8_ASYMM"};
-    // convQuant8();
+    convQuant8();
   }
 }
 
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.neurun.cpu b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.neurun.cpu
index e229902..a6ce26c 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.neurun.cpu
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.neurun.cpu
@@ -22,14 +22,6 @@ ValidationTestExecution.StartCompute
 ValidationTestExecution.EventWait
 GeneratedTests.add*
 GeneratedTests.argmax*
-GeneratedTests.conv_quant8_channels
-GeneratedTests.conv_quant8_channels_weights_as_inputs
-GeneratedTests.conv_quant8_large
-GeneratedTests.conv_quant8_large_weights_as_inputs
-GeneratedTests.conv_quant8
-GeneratedTests.conv_quant8_overflow
-GeneratedTests.conv_quant8_overflow_weights_as_inputs
-GeneratedTests.conv_quant8_weights_as_inputs
 GeneratedTests.depth_to_space*
 GeneratedTests.depthwise_conv2d*
 GeneratedTests.depthwise_conv
@@ -82,7 +74,6 @@ GeneratedTests.tensorflowmax_ex*
 GeneratedTests.reduce_sum_ex*
 GeneratedTests.topk_v2*
 # Unexpected result
-GeneratedTests.conv_quant8_2
 GeneratedTests.split*
 GeneratedTests.transpose_conv*
 GeneratedTests.pack*
-- 
2.7.4