From 703db79b8efc5be3d9d1517e7b0a04806366c07a Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EA=B9=80=EC=88=98=EC=A7=84/On-Device=20Lab=28SR=29/Enginee?= =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Tue, 19 Feb 2019 10:11:39 +0900 Subject: [PATCH] [neurun] Use legacy tflite method in ConvolutionLayer for cpu (#4416) As we've updated the tensorflow to v1.12, `ConvolutionLayer` kernel for `cpu` was updated. However,`ConvolutionLayer` of cpu kernel for `quant8` makes wrong matching results when running genereated tests. Legacy methods of that work well for `quant8`. `Android NN` master barnch Conv2D also keep using legacy methods. For consistency and being useful to support `quant8`, we can use legacy methods instead of newly added methods. (Added TODO comment about changing to new version) Signed-off-by: sjsujinkim --- .../src/backend/cpu/kernel/ConvolutionLayer.cc | 164 ++++++++++----------- .../nnapi/nnapi_gtest.skip.armv7l-linux.neurun.cpu | 9 -- 2 files changed, 79 insertions(+), 94 deletions(-) diff --git a/runtimes/neurun/src/backend/cpu/kernel/ConvolutionLayer.cc b/runtimes/neurun/src/backend/cpu/kernel/ConvolutionLayer.cc index e341862..f8cfe1d 100644 --- a/runtimes/neurun/src/backend/cpu/kernel/ConvolutionLayer.cc +++ b/runtimes/neurun/src/backend/cpu/kernel/ConvolutionLayer.cc @@ -16,7 +16,8 @@ #include "ConvolutionLayer.h" -#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +// TODO : Discard legacy methods +#include "tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h" #include "OperationUtils.h" #include @@ -35,51 +36,56 @@ static constexpr int kStaticBufferSize = 1605632; static char static_scratch_buffer[kStaticBufferSize]; static std::mutex executionMutex; -#define ANDROID_NN_CONV_PARAMETERS(Type) \ - uint32_t height = getSizeOfDimension(_inputShape, 1); \ - uint32_t width = getSizeOfDimension(_inputShape, 2); \ - uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \ - uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \ - uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ - uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ - uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \ - \ - uint32_t paddingHeight = (uint32_t)_paddingTop; \ - uint32_t paddingWidth = (uint32_t)_paddingLeft; \ - \ - Shape im2colShape; \ - im2colShape.dimensions.resize(4); \ - im2colShape.dimensions[0] = getSizeOfDimension(_outputShape, 0); \ - im2colShape.dimensions[1] = getSizeOfDimension(_outputShape, 1); \ - im2colShape.dimensions[2] = getSizeOfDimension(_outputShape, 2); \ - im2colShape.dimensions[3] = inDepth * kernelHeight * kernelWidth; \ - \ - Type *im2colData = nullptr; \ - uint64_t im2colByteSize = sizeof(Type); \ - std::unique_ptr im2colGuard; \ - for (int i = 0; i < 4; i++) \ - { \ - im2colByteSize *= im2colShape.dimensions[i]; \ - } \ - /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ - if (im2colByteSize >= 0x7fffffff) \ - { \ - std::cout << "Conv size is too large, not enough memory" << std::endl; \ - return false; \ - } \ - if (im2colByteSize <= kStaticBufferSize) \ - { \ - im2colData = reinterpret_cast(static_scratch_buffer); \ - } \ - else \ - { \ - im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ - if (im2colData == nullptr) \ - { \ - std::cout << "Conv size is too large, not enough memory" << std::endl; \ - return false; \ - } \ - im2colGuard.reset(im2colData); \ +#define ANDROID_NN_CONV_PARAMETERS(Type) \ + uint32_t height = getSizeOfDimension(_inputShape, 1); \ + uint32_t width = getSizeOfDimension(_inputShape, 2); \ + uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \ + uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \ + uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ + uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ + uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \ + \ + uint32_t paddingHeight = (uint32_t)_paddingTop; \ + uint32_t paddingWidth = (uint32_t)_paddingLeft; \ + \ + tflite::Dims<4> im2colDim; \ + im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \ + im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \ + im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \ + im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \ + \ + im2colDim.strides[0] = 1; \ + for (int i = 1; i < 4; i++) \ + { \ + im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \ + } \ + \ + Type *im2colData = nullptr; \ + uint64_t im2colByteSize = sizeof(Type); \ + std::unique_ptr im2colGuard; \ + for (int i = 0; i < 4; i++) \ + { \ + im2colByteSize *= im2colDim.sizes[i]; \ + } \ + /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ + if (im2colByteSize >= 0x7fffffff) \ + { \ + std::cout << "Conv size is too large, not enough memory" << std::endl; \ + return false; \ + } \ + if (im2colByteSize <= kStaticBufferSize) \ + { \ + im2colData = reinterpret_cast(static_scratch_buffer); \ + } \ + else \ + { \ + im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ + if (im2colData == nullptr) \ + { \ + std::cout << "Conv size is too large, not enough memory" << std::endl; \ + return false; \ + } \ + im2colGuard.reset(im2colData); \ } ConvolutionLayer::ConvolutionLayer() @@ -111,22 +117,12 @@ bool ConvolutionLayer::convFloat32() CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); int32_t dilationWidthFactor = 1, dilationHeightFactor = 1; - ::tflite::ConvParams op_params; - op_params.padding_type = ::tflite::PaddingType::kSame; - op_params.padding_values.width = paddingWidth; - op_params.padding_values.height = paddingHeight; - op_params.stride_width = _strideWidth; - op_params.stride_height = _strideHeight; - op_params.dilation_width_factor = dilationWidthFactor; - op_params.dilation_height_factor = dilationHeightFactor; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - ::tflite::optimized_ops::Conv(op_params, convertShapeToTFLiteShape(_inputShape), _inputData.f, - convertShapeToTFLiteShape(_kernelShape), _kernelData.f, - convertShapeToTFLiteShape(_biasShape), _biasData.f, - convertShapeToTFLiteShape(_outputShape), _outputData.f, - convertShapeToTFLiteShape(im2colShape), im2colDataToPass); + tflite::optimized_ops::Conv( + _inputData.f, convertShapeToDims(_inputShape), _kernelData.f, + convertShapeToDims(_kernelShape), _biasData.f, convertShapeToDims(_biasShape), _strideWidth, + _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight, + output_activation_min, output_activation_max, _outputData.f, convertShapeToDims(_outputShape), + im2colDataToPass, im2colDim); return true; } @@ -142,6 +138,19 @@ bool ConvolutionLayer::convQuant8() int32_t output_shift = 0; int32_t output_activation_min = 0; int32_t output_activation_max = 0; + + const ::tflite::Dims<4> &kernel_dim = convertShapeToDims(_kernelShape); + const int kernel_width = ArraySize(kernel_dim, 1); + const int kernel_height = ArraySize(kernel_dim, 2); + const bool need_im2col = + _strideWidth != 1 || _strideHeight != 1 || kernel_width != 1 || kernel_height != 1; + + uint8_t *im2colDataToPass = nullptr; + if (need_im2col) + { + im2colDataToPass = im2colData; + } + if (!GetQuantizedConvolutionMultipler(_inputShape, _kernelShape, _biasShape, _outputShape, &real_multiplier) || !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift)) @@ -152,33 +161,19 @@ bool ConvolutionLayer::convQuant8() &output_activation_max); int32_t dilationWidthFactor = 1, dilationHeightFactor = 1; - ::tflite::ConvParams op_params; - op_params.padding_type = ::tflite::PaddingType::kSame; - op_params.padding_values.width = paddingWidth; - op_params.padding_values.height = paddingHeight; - op_params.stride_width = _strideWidth; - op_params.stride_height = _strideHeight; - op_params.dilation_width_factor = dilationWidthFactor; - op_params.dilation_height_factor = dilationHeightFactor; - op_params.input_offset = inputOffset; - op_params.weights_offset = kernelOffset; - op_params.output_offset = outputOffset; - op_params.output_multiplier = output_multiplier; - op_params.output_shift = output_shift; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - static gemmlowp::GemmContext gemm_context; // Prevent concurrent executions that may access the scratch buffer and // gemm_context. std::unique_lock lock(executionMutex); // Alow gemmlowp automatically decide how many threads to use. gemm_context.set_max_num_threads(0); - ::tflite::optimized_ops::Conv(op_params, convertShapeToTFLiteShape(_inputShape), _inputData.u8, - convertShapeToTFLiteShape(_kernelShape), _kernelData.u8, - convertShapeToTFLiteShape(_biasShape), _biasData.i32, - convertShapeToTFLiteShape(_outputShape), _outputData.u8, - convertShapeToTFLiteShape(im2colShape), im2colData, &gemm_context); + tflite::optimized_ops::Conv( + _inputData.u8, convertShapeToDims(_inputShape), inputOffset, _kernelData.u8, + convertShapeToDims(_kernelShape), kernelOffset, _biasData.i32, convertShapeToDims(_biasShape), + _strideWidth, _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, + paddingHeight, outputOffset, output_multiplier, output_shift, output_activation_min, + output_activation_max, _outputData.u8, convertShapeToDims(_outputShape), im2colDataToPass, + im2colDim, &gemm_context); return true; } @@ -216,8 +211,7 @@ void ConvolutionLayer::run() } else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM) { - throw std::runtime_error{"ConvolutionLayer : Not tested for TENSOR_QUANT8_ASYMM"}; - // convQuant8(); + convQuant8(); } } diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.neurun.cpu b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.neurun.cpu index e229902..a6ce26c 100644 --- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.neurun.cpu +++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.neurun.cpu @@ -22,14 +22,6 @@ ValidationTestExecution.StartCompute ValidationTestExecution.EventWait GeneratedTests.add* GeneratedTests.argmax* -GeneratedTests.conv_quant8_channels -GeneratedTests.conv_quant8_channels_weights_as_inputs -GeneratedTests.conv_quant8_large -GeneratedTests.conv_quant8_large_weights_as_inputs -GeneratedTests.conv_quant8 -GeneratedTests.conv_quant8_overflow -GeneratedTests.conv_quant8_overflow_weights_as_inputs -GeneratedTests.conv_quant8_weights_as_inputs GeneratedTests.depth_to_space* GeneratedTests.depthwise_conv2d* GeneratedTests.depthwise_conv @@ -82,7 +74,6 @@ GeneratedTests.tensorflowmax_ex* GeneratedTests.reduce_sum_ex* GeneratedTests.topk_v2* # Unexpected result -GeneratedTests.conv_quant8_2 GeneratedTests.split* GeneratedTests.transpose_conv* GeneratedTests.pack* -- 2.7.4