From: Сергей Баранников/AI Tools Lab /SRR/Engineer/삼성전자 Date: Tue, 29 Jan 2019 12:33:25 +0000 (+0300) Subject: [nnc] Change the ModelIR Conv2D kernel format from HWIO to OHWI (#2941) X-Git-Tag: nncc_backup~907 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a3943b7fc44ed9d465be72adf60b30d3ff0fd3c8;p=platform%2Fcore%2Fml%2Fnnfw.git [nnc] Change the ModelIR Conv2D kernel format from HWIO to OHWI (#2941) * Change the ModelIR Conv2D kernel format from HWIO to OHWI to match TensorFlow Lite format. * Adjust importers and backends. Signed-off-by: Sergei Barannikov --- diff --git a/contrib/nnc/core/modelIR/operations/Conv2DOp.cpp b/contrib/nnc/core/modelIR/operations/Conv2DOp.cpp index e2966a5..6cce0e9 100644 --- a/contrib/nnc/core/modelIR/operations/Conv2DOp.cpp +++ b/contrib/nnc/core/modelIR/operations/Conv2DOp.cpp @@ -21,11 +21,14 @@ namespace mir { namespace ops { void Conv2DOp::inferOutputShapes() { + // Input shape: [N, Hi, Wi, Ci]. + // Kernel shape: [Co, Hk, Wk, Ci]. const auto& input_shape = getInputShape(0); const auto& kernel_shape = getInputShape(1); assert(input_shape.rank() == 4); assert(kernel_shape.rank() == 4); + assert(kernel_shape.dim(3) == input_shape.dim(3)); assert(_strides.rank() == 2); assert(_paddingBefore.size() == 2); assert(_paddingAfter.size() == 2); @@ -34,7 +37,7 @@ void Conv2DOp::inferOutputShapes() { // Batch size and number of channels. output_shape.dim(0) = input_shape.dim(0); - output_shape.dim(3) = kernel_shape.dim(3); + output_shape.dim(3) = kernel_shape.dim(0); // Height and width. for (int i = 0; i < 2; i++) { @@ -42,7 +45,7 @@ void Conv2DOp::inferOutputShapes() { // out_size = ceil((in_size - kernel_size + 1) / stride) = // (in_size - kernel_size + 1 + stride - 1) / stride = // (in_size - kernel_size) / stride + 1 - output_shape.dim(1 + i) = (padded_input - kernel_shape.dim(i)) / _strides.dim(i) + 1; + output_shape.dim(1 + i) = (padded_input - kernel_shape.dim(1 + i)) / _strides.dim(i) + 1; } setOutputShape(0, output_shape); diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp index ff32854..12501cd 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp @@ -210,17 +210,9 @@ void AclCppOpGenerator::visit(ops::SoftmaxOp& op) { } } -/** - * @brief Generate DOM for PadStrideInfo object - * @tparam Oper Class of operation with pad and stride properties - * @param op Operation entity to generate variable for - * @param prefix First part of generated variable name - * @param block Code block where insert variable declaration - * @return generated variable - */ -template -static shared_ptr - genPadStrideInfo(const Oper& op, const string& prefix, ArtifactBlock* block) { +template +shared_ptr +AclCppOpGenerator::genPadStrideInfo(const Op& op, const string& prefix, ArtifactBlock* block) { using AF = ArtifactFactory; const Shape& strides = transposeShape<1, 0>(op.getStrides()); @@ -741,15 +733,22 @@ void AclCppOpGenerator::visit(ops::PadOp& op) { template void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, const string& suffix) { - IODescriptor ir_input = op.getPrevNodes()[0]; - IODescriptor ir_weights = op.getPrevNodes()[1]; + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_weights = op.getInput(1); IODescriptor ir_output = op.getOutput(0); auto ir_weights_op = dynamic_cast(ir_weights.op); if (ir_weights_op == nullptr) throw AclCppException("Unsupported operation type"); - auto ir_weights_tensor = transposeTensor<3, 2, 0, 1>(ir_weights_op->getValue()); + auto ir_weights_tensor = ir_weights_op->getValue(); + if (op.getType() == Operation::Type::conv2D) { + // [Co, Hk, Wk, Ci] -> [Co, Ci, Hk, Wk]. + ir_weights_tensor = transposeTensor<0, 3, 1, 2>(ir_weights_tensor); + } else { + ir_weights_tensor = transposeTensor<3, 2, 0, 1>(ir_weights_tensor); + } + const Shape& ir_weights_shape = ir_weights_tensor.getShape(); // get output tensor name that is used as base for other names diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h index c4de819..45f824b 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h @@ -103,6 +103,19 @@ private: std::shared_ptr genTransposeACLtoMIR(const std::string& name, const mir::Shape& input_shape, const std::shared_ptr& input); + + /** + * @brief Generate DOM for PadStrideInfo object + * @tparam Oper Class of operation with pad and stride properties + * @param op Operation entity to generate variable for + * @param prefix First part of generated variable name + * @param block Code block where insert variable declaration + * @return generated variable + */ + template + std::shared_ptr + genPadStrideInfo(const Op& op, const std::string& prefix, ArtifactBlock* block); + /** * @brief The common part of the convolution and the depthwise convolution. */ diff --git a/contrib/nnc/passes/caffe2_frontend/caffe2_op_creator.cpp b/contrib/nnc/passes/caffe2_frontend/caffe2_op_creator.cpp index e9da1e2..0f9b9bb 100644 --- a/contrib/nnc/passes/caffe2_frontend/caffe2_op_creator.cpp +++ b/contrib/nnc/passes/caffe2_frontend/caffe2_op_creator.cpp @@ -295,7 +295,7 @@ std::vector Caffe2OpCreator::convertConv(const std::vector(kernel_tensor); auto kernel = createOp("Constant", kernel_tensor)->getOutput(0); result = createOp("Conv2D", convertCaffeToMIR(inputs[0]), kernel, stride_shape, pad_before, pad_after); diff --git a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp index aef1a05..93cb2fa 100644 --- a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp +++ b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp @@ -258,6 +258,7 @@ CaffeOpCreator::convertConvolution(const caffe::LayerParameter& layer, // first we need to convert kernel of grouped convolution to appropriate ordinary kernel kernel_weights = fixGroupedKernel(params.group(), kernel_weights); } + kernel_weights = transposeTensor<3, 0, 1, 2>(kernel_weights); auto kernel = createOp("", kernel_weights)->getOutput(0); result = createOp(layer.name(), convertCaffeToMIR(inputs[0]), kernel, strides, padding, padding); diff --git a/contrib/nnc/passes/interpreter/ops/Conv2D.cpp b/contrib/nnc/passes/interpreter/ops/Conv2D.cpp index 176587c..a481f86 100644 --- a/contrib/nnc/passes/interpreter/ops/Conv2D.cpp +++ b/contrib/nnc/passes/interpreter/ops/Conv2D.cpp @@ -16,6 +16,7 @@ #include "Conv2D.h" #include "core/modelIR/ShapeRange.h" +#include "core/modelIR/TensorUtil.h" namespace nnc { @@ -26,8 +27,8 @@ using namespace mir; // Kernel is in [filter_height, filter_width, in_channels, out_channels] // Refer to https://www.tensorflow.org/api_docs/python/tf/nn/conv2d for info std::vector Conv2D::operator()() { - const Shape& in_shape = _op.getInputShape(0); - const Shape& kernel_shape = _op.getInputShape(1); + const Shape& in_shape = _input.getShape(); + const Shape& kernel_shape = _kernel.getShape(); const Shape& out_shape = _op.getOutputShape(0); const Shape& strides = _op.getStrides(); const std::vector& pads = _op.getPaddingBefore(); @@ -41,6 +42,7 @@ std::vector Conv2D::operator()() { int32_t num_kernels = kernel_shape.dim(3); + Tensor kernel(_kernel); auto res = allocate_tensor(_op.getOutputShape(0)); Tensor res_accessor(res); @@ -61,7 +63,7 @@ std::vector Conv2D::operator()() { in_index.at(3) = kernel_index.at(2); if (in_range.contains(in_index)) { - auto kernel_region = _kernel.getRegion(kernel_index); + auto kernel_region = kernel.getRegion(kernel_index); assert(kernel_region.size() == num_kernels); float in_val = _input.at(in_index); for (int32_t kernel_i = 0; kernel_i < num_kernels; ++kernel_i) { @@ -77,7 +79,7 @@ std::vector Conv2D::operator()() { Conv2D::Conv2D(const TensorVariant& input, const TensorVariant& kernel, const ops::Conv2DOp& op) - : _input(input), _kernel(kernel), _op(op) { + : _input(input), _kernel(transposeTensor<1, 2, 3, 0>(kernel)), _op(op) { } } // namespace nnc diff --git a/contrib/nnc/passes/interpreter/ops/Conv2D.h b/contrib/nnc/passes/interpreter/ops/Conv2D.h index 01d0cea..4c45889 100644 --- a/contrib/nnc/passes/interpreter/ops/Conv2D.h +++ b/contrib/nnc/passes/interpreter/ops/Conv2D.h @@ -32,7 +32,7 @@ public: private: const mir::Tensor _input; - mir::Tensor _kernel; + mir::TensorVariant _kernel; const mir::ops::Conv2DOp& _op; }; diff --git a/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp b/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp index 10684cb..584cc3d 100644 --- a/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp +++ b/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp @@ -163,6 +163,7 @@ ONNXOpCreator::convertConv2D(const std::vector& inputs, // first we need to convert kernel of grouped convolution to appropriate ordinary kernel if (num_groups != 1) kernel_tensor = fixGroupedKernel(num_groups, kernel_tensor); + kernel_tensor = transposeTensor<3, 0, 1, 2>(kernel_tensor); auto kernel = createOp(kernel_tensor)->getOutput(0); result = createOp(transposed_input, kernel, cdata.strides_shape, cdata.padding_before, cdata.padding_after); diff --git a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp index be4231a..b2561d4 100644 --- a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp +++ b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp @@ -296,7 +296,7 @@ void ModelAnalyzer::visit(ops::ConcatOp& op) { void ModelAnalyzer::visit(ops::Conv2DOp& op) { const auto& kernel_shape = op.getInputShape(1); const auto& out_shape = op.getOutputShape(0); - const int32_t tmp_size = kernel_shape.dim(0) * kernel_shape.dim(1) * kernel_shape.dim(2) + const int32_t tmp_size = kernel_shape.dim(1) * kernel_shape.dim(2) * kernel_shape.dim(3) * out_shape.dim(0) * out_shape.dim(1) * out_shape.dim(2); updateMaxTemporarySize(static_cast(tmp_size)); appendOperationToInference(&op, "conv2d", {_temp_tensor_id}); diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def index 5c4649f..050f4b8 100644 --- a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def +++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def @@ -199,38 +199,21 @@ void conv2d(Tensor& out, const char* params, const Tensor& input, const Tensor& const auto pad_h = static_cast(pads[0]); const auto pad_w = static_cast(pads[1]); - // Transpose the kernel from HWIO to OHWI format. - const Shape kernel_shape = kernel.getShape(); - const RuntimeShape kernel_rt_shape = {static_cast(kernel_shape[3]), - static_cast(kernel_shape[0]), - static_cast(kernel_shape[1]), - static_cast(kernel_shape[2])}; - - const RuntimeShape out_rt_shape = shapeToRuntimeShape(out_shape); - const RuntimeShape im2col_shape{out_rt_shape.Dims(0), //batch - out_rt_shape.Dims(1), //height - out_rt_shape.Dims(2), //width - static_cast(kernel_shape[2] * - kernel_shape[0] * - kernel_shape[1])}; + const Shape& kernel_shape = kernel.getShape(); + const Shape im2col_shape{out_shape[0], out_shape[1], out_shape[2], + kernel_shape[1] * kernel_shape[2] * kernel_shape[3]}; float* im2col_data = nullptr; - if (stride_w != 1 || stride_h != 1 || kernel_shape[0] != 1 || kernel_shape[1] != 1) { + if (stride_w != 1 || stride_h != 1 || kernel_shape[1] != 1 || kernel_shape[2] != 1) { im2col_data = temporary.getData(); } - const ConvParams conv_params{{pad_w, pad_h}, stride_w, stride_h}; - - unique_ptr kernel_data(new float[kernel_rt_shape.FlatSize()]); - TransposeParams transpose_params{4, {3, 0, 1, 2}}; - Transpose(transpose_params, - shapeToRuntimeShape(kernel_shape), kernel.getData(), - kernel_rt_shape, kernel_data.get()); + const ConvParams conv_params{{pad_w, pad_h}, stride_w, stride_h}; Conv(conv_params, shapeToRuntimeShape(input.getShape()), input.getData(), - kernel_rt_shape, kernel_data.get(), - out_rt_shape, out.getData(), - im2col_shape, im2col_data); + shapeToRuntimeShape(kernel_shape), kernel.getData(), + shapeToRuntimeShape(out_shape), out.getData(), + shapeToRuntimeShape(im2col_shape), im2col_data); } void convTransposed2d(Tensor& out, const char* params, const Tensor& input, const Tensor& kernel, diff --git a/contrib/nnc/passes/tflite_frontend/tflite_op_creator.cpp b/contrib/nnc/passes/tflite_frontend/tflite_op_creator.cpp index 34150ac..6c6e746 100644 --- a/contrib/nnc/passes/tflite_frontend/tflite_op_creator.cpp +++ b/contrib/nnc/passes/tflite_frontend/tflite_op_creator.cpp @@ -67,7 +67,7 @@ static void calculatePadding(tflite::Padding padding, ? std::max(0, window_shape.dim(i) - strides.dim(i)) : std::max(0, window_shape.dim(i) - input_shape.dim(1 + i) % strides.dim(i)); padding_before[i] = padding / 2; - padding_after[i] = (padding + 1) / 2; + padding_after[i] = padding - padding_before[i]; } break; case tflite::Padding_VALID: @@ -108,10 +108,7 @@ TFLiteOpCreator::convertConv2D(const Conv2DOptions* opts, auto kernel = inputs.at(1); auto bias = inputs.at(2); - // OHWI -> HWIO - // TODO Insert TransposeOp instead when ACL backend is ready for that. - const auto& kernel_tensor = mir::transposeTensor<1, 2, 3, 0>(extractTensor(kernel)); - kernel = createOp(kernel_tensor)->getOutput(0); + kernel = createOp(extractTensor(kernel))->getOutput(0); Shape strides{opts->stride_h(), opts->stride_w()}; std::vector padding_before(2); @@ -119,7 +116,8 @@ TFLiteOpCreator::convertConv2D(const Conv2DOptions* opts, const auto& input_shape = input.getShape(); const auto& kernel_shape = kernel.getShape(); - calculatePadding(opts->padding(), input_shape, kernel_shape, + Shape window_shape{kernel_shape.dim(1), kernel_shape.dim(2)}; + calculatePadding(opts->padding(), input_shape, window_shape, strides, padding_before, padding_after); auto result = createOp(input, kernel, strides, padding_before, padding_after); @@ -150,7 +148,8 @@ TFLiteOpCreator::convertDepthwiseConv2D(const DepthwiseConv2DOptions* opts, const auto& input_shape = input.getShape(); const auto& kernel_shape = kernel.getShape(); - calculatePadding(opts->padding(), input_shape, kernel_shape, + Shape window_shape{kernel_shape.dim(0), kernel_shape.dim(1)}; + calculatePadding(opts->padding(), input_shape, window_shape, strides, padding_before, padding_after); auto result = createOp(input, kernel, diff --git a/contrib/nnc/unittests/acl_backend/MIRToDOM.cpp b/contrib/nnc/unittests/acl_backend/MIRToDOM.cpp index 70333bd..c2360d6 100644 --- a/contrib/nnc/unittests/acl_backend/MIRToDOM.cpp +++ b/contrib/nnc/unittests/acl_backend/MIRToDOM.cpp @@ -311,7 +311,7 @@ TEST(acl_backend_mir_to_dom, DISABLED_conv_transposed2d) { TEST(acl_backend_mir_to_dom, conv2d) { const int32_t channels = 3; - mir::Shape kernel_shape{3, 3, channels, 1}; // Height, Width, input Channels, output Channel + mir::Shape kernel_shape{1, 3, 3, channels}; // output Channels, Height, Width, input Channels mir::Shape strides{1, 1}; mir::TensorVariant kernel_tensor = createTensorVariant(kernel_shape); diff --git a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp index b88b830..0dd0276 100644 --- a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp +++ b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp @@ -610,7 +610,7 @@ TEST(cpp_operations_test, conv2d) { for (iT stride_h = 1; stride_h <= 3; ++stride_h) for (iT stride_w = 1; stride_w <= 3; ++stride_w) { vector input_shape_data{3, 5, 7, static_cast(input_c)}; // NHWC - vector kernel_shape_data{kernel_h, kernel_w, input_c, output_c}; // HWCN + vector kernel_shape_data{output_c, kernel_h, kernel_w, input_c}; // OHWI mir::Shape strides{stride_h, stride_w}; vector> input_ntensors(2); Tensor input_atensor0;