From: 장지섭/On-Device Lab(SR)/Engineer/삼성전자 Date: Wed, 6 Nov 2019 07:03:23 +0000 (+0900) Subject: Make ncnn backend to support TransposeConv in row major (#8806) X-Git-Tag: submit/tizen/20191205.083104~373 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=aa672be76b981983ecf035f2b00d7f4db0e5849b;p=platform%2Fcore%2Fml%2Fnnfw.git Make ncnn backend to support TransposeConv in row major (#8806) This commit makes ncnn(srcn) backend to support TransposeConv in row major. Signed-off-by: jiseob.jang --- diff --git a/runtime/neurun/backend/srcn/ConstantInitializer.cc b/runtime/neurun/backend/srcn/ConstantInitializer.cc index 6da8759..d01b4bc 100644 --- a/runtime/neurun/backend/srcn/ConstantInitializer.cc +++ b/runtime/neurun/backend/srcn/ConstantInitializer.cc @@ -22,46 +22,50 @@ namespace { template -static void -PermuteKernel(const neurun::model::Operand &model_obj, neurun::backend::operand::IObject &obj, - const neurun::model::Layout frontend_layout = neurun::model::Layout::UNKNOWN) +static void PermuteKernel(const neurun::model::Operand &model_obj, + neurun::backend::operand::IObject &obj, + const std::vector &permutation) { const auto shape = model_obj.shape(); auto base = reinterpret_cast(model_obj.data().base()); assert(shape.rank() == 4); - - // TODO Support frontend layout - UNUSED_RELEASE(frontend_layout); + assert(permutation.size() == 4); + assert(permutation[0] != permutation[1] && permutation[0] != permutation[2] && + permutation[0] != permutation[3]); + assert(permutation[1] != permutation[2] && permutation[1] != permutation[3]); + assert(permutation[2] != permutation[3]); + assert(permutation[0] < 4 && permutation[1] < 4 && permutation[2] < 4 && permutation[3] < 4); obj.access([&](::neurun::backend::operand::ITensor &tensor) { - // NOTE The srcn takes a HWOI layout as kernel filter even though image layout is NHWC. - // This policy is the same with the tensorflow policy. - // So using srcn library, we need to change kernel layout to HWOI from OHWI. - const int32_t outch = shape.dim(0); - const int32_t height = shape.dim(1); - const int32_t width = shape.dim(2); - const int32_t inch = shape.dim(3); - const auto to_dim = ::neurun::backend::srcn::kernel::convertCoordinates( - {outch, height, width, inch}, ::neurun::backend::srcn::kernel::FilterLayout::OHWI, - ::neurun::backend::srcn::kernel::FilterLayout::HWOI); - for (auto i = 0; i < outch; ++i) + if (permutation[0] == 0 && permutation[1] == 1 && permutation[2] == 2 && permutation[3] == 3) + { + memcpy(tensor.buffer(), base, shape.num_elements() * sizeof(T)); + } + else { - for (auto j = 0; j < height; ++j) + const int32_t dim0 = shape.dim(0); + const int32_t dim1 = shape.dim(1); + const int32_t dim2 = shape.dim(2); + const int32_t dim3 = shape.dim(3); + for (auto i = 0; i < dim0; ++i) { - for (auto k = 0; k < width; ++k) + for (auto j = 0; j < dim1; ++j) { - for (auto l = 0; l < inch; ++l) + for (auto k = 0; k < dim2; ++k) { - const auto coords = ::neurun::backend::srcn::kernel::convertCoordinates( - {i, j, k, l}, ::neurun::backend::srcn::kernel::FilterLayout::OHWI, - ::neurun::backend::srcn::kernel::FilterLayout::HWOI); - const size_t offset = coords[0] * to_dim[1] * to_dim[2] * to_dim[3] + - coords[1] * to_dim[2] * to_dim[3] + coords[2] * to_dim[3] + - coords[3]; - T *into = reinterpret_cast(tensor.buffer() + offset * sizeof(T)); - T value = *(base + i * height * width * inch + j * width * inch + k * inch + l); - *into = value; + for (auto l = 0; l < dim3; ++l) + { + Coordinates frontend_coords{i, j, k, l}; + Coordinates coords = frontend_coords; + coords.set(0, frontend_coords[permutation[0]]); + coords.set(1, frontend_coords[permutation[1]]); + coords.set(2, frontend_coords[permutation[2]]); + coords.set(3, frontend_coords[permutation[3]]); + T *into = reinterpret_cast(tensor.buffer() + tensor.calcOffset(coords)); + T value = *(base + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3 + l); + *into = value; + } } } } @@ -100,7 +104,8 @@ void ConstantInitializer::run() } void ConstantInitializer::registerPermuteKernelInitializer(const model::OperandIndex &index, - const model::Operand &obj) + const model::Operand &obj, + const std::vector &permutation) { // For only CONSTANTS if (!obj.isConstant()) @@ -115,17 +120,17 @@ void ConstantInitializer::registerPermuteKernelInitializer(const model::OperandI switch (type) { case DataType::FLOAT32: - _init_map[index] = std::bind(PermuteKernel, _1, _2, _current_subg_layout); + _init_map[index] = std::bind(PermuteKernel, _1, _2, permutation); break; case DataType::INT32: - _init_map[index] = std::bind(PermuteKernel, _1, _2, _current_subg_layout); + _init_map[index] = std::bind(PermuteKernel, _1, _2, permutation); break; case DataType::UINT32: - _init_map[index] = std::bind(PermuteKernel, _1, _2, _current_subg_layout); + _init_map[index] = std::bind(PermuteKernel, _1, _2, permutation); break; case DataType::BOOL8: case DataType::QUANT8_ASYMM: - _init_map[index] = std::bind(PermuteKernel, _1, _2, _current_subg_layout); + _init_map[index] = std::bind(PermuteKernel, _1, _2, permutation); break; default: throw std::runtime_error("Not supported, yet"); @@ -135,9 +140,30 @@ void ConstantInitializer::registerPermuteKernelInitializer(const model::OperandI void ConstantInitializer::visit(const model::operation::TransposeConv &node) { + // NOTE The srcn deconvolution layer takes a HWOI layout as kernel filter even though image layout + // is NHWC. + // This policy is the same with the tensorflow policy. + // So for using srcn library, we need to change kernel layout to HWOI from OHWI or OIHW in + // this case. + // Also the srcn deconvolution layer takes a OIHW layout as kernel filter if image's layout + // is NCHW const auto &kernel_index = node.getInputs().at(model::operation::TransposeConv::KERNEL); const auto &kernel_obj = _operands.at(kernel_index); - registerPermuteKernelInitializer(kernel_index, kernel_obj); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = _tensor_builder->wrapTensor(kernel_index)->ptr()->layout(); + assert(frontend_layout == neurun::model::Layout::NHWC || + frontend_layout == neurun::model::Layout::NCHW); + assert(backend_layout == neurun::model::Layout::NHWC || + backend_layout == neurun::model::Layout::NCHW); + const auto frontend_filter_layout = frontend_layout == neurun::model::Layout::NHWC + ? kernel::FilterLayout::OHWI + : kernel::FilterLayout::OIHW; + const auto backend_filter_layout = backend_layout == neurun::model::Layout::NHWC + ? kernel::FilterLayout::HWOI + : kernel::FilterLayout::IOHW; + registerPermuteKernelInitializer( + kernel_index, kernel_obj, + kernel::getFilterPermutation(frontend_filter_layout, backend_filter_layout)); } } // namespace srcn diff --git a/runtime/neurun/backend/srcn/ConstantInitializer.h b/runtime/neurun/backend/srcn/ConstantInitializer.h index 7eac7bd..559aaa6 100644 --- a/runtime/neurun/backend/srcn/ConstantInitializer.h +++ b/runtime/neurun/backend/srcn/ConstantInitializer.h @@ -20,6 +20,7 @@ #include #include #include "TensorBuilder.h" +#include namespace neurun { @@ -38,8 +39,8 @@ public: void run() override; public: - void registerPermuteKernelInitializer(const model::OperandIndex &index, - const model::Operand &obj); + void registerPermuteKernelInitializer(const model::OperandIndex &index, const model::Operand &obj, + const std::vector &permutation); public: void visit(const model::operation::TransposeConv &) override; diff --git a/runtime/neurun/backend/srcn/KernelGenerator.cc b/runtime/neurun/backend/srcn/KernelGenerator.cc index 6a305db..d1e56bc 100644 --- a/runtime/neurun/backend/srcn/KernelGenerator.cc +++ b/runtime/neurun/backend/srcn/KernelGenerator.cc @@ -107,10 +107,11 @@ void KernelGenerator::visit(const model::operation::TransposeConv &node) const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); - // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. - const auto &ker_shape = _ctx.at(ker_index).shape(); - const auto ker_height = ker_shape.dim(1); - const auto ker_width = ker_shape.dim(2); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in] if NHWC and [depth_out, + // depth_in, kernel_height, kernel_width] if NCHW. + const auto &ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout); + const auto ker_height = ker_shape.H; + const auto ker_width = ker_shape.W; const auto stride = node.param().stride; const int padding_type = (node.param().padding.type == model::PaddingType::SAME); const auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, @@ -133,7 +134,8 @@ void KernelGenerator::visit(const model::operation::TransposeConv &node) fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr, padding_type, padding.left, padding.right, padding.top, padding.bottom, - stride.horizontal, stride.vertical, ofm_alloc->buffer(), ofm_backend_descr); + stride.horizontal, stride.vertical, ofm_alloc->buffer(), ofm_backend_descr, + backend_layout); _execution_builder->append(std::move(fn)); } diff --git a/runtime/neurun/backend/srcn/ShapeFixer.cc b/runtime/neurun/backend/srcn/ShapeFixer.cc index 3c7bd15..62067fa 100644 --- a/runtime/neurun/backend/srcn/ShapeFixer.cc +++ b/runtime/neurun/backend/srcn/ShapeFixer.cc @@ -87,7 +87,7 @@ void ShapeFixer::visit(const model::operation::TransposeConv &node) assert(backend_layout == model::Layout::NCHW || backend_layout == model::Layout::NHWC); const auto backend_filter_layout = backend_layout == model::Layout::NHWC ? kernel::FilterLayout::HWOI - : kernel::FilterLayout::OIHW; + : kernel::FilterLayout::IOHW; model::OperandInfo backend_info{ asKernelShape(kernel_obj.shape(), frontend_filter_layout, backend_filter_layout), diff --git a/runtime/neurun/backend/srcn/kernel/OperationUtils.cc b/runtime/neurun/backend/srcn/kernel/OperationUtils.cc index 56ac525..aeb5515 100644 --- a/runtime/neurun/backend/srcn/kernel/OperationUtils.cc +++ b/runtime/neurun/backend/srcn/kernel/OperationUtils.cc @@ -80,6 +80,23 @@ Coordinates convertCoordinates(const Coordinates &coordinates, FilterLayout from coordinates[permutation[2]], coordinates[permutation[3]]}; } +nnfw::srcn::convType_t convertLayout(model::Layout layout) +{ + assert(layout == model::Layout::NHWC || layout == model::Layout::NCHW); + if (layout == model::Layout::NHWC) + { + return nnfw::srcn::col_major; + } + else if (layout == model::Layout::NCHW) + { + return nnfw::srcn::row_major; + } + else + { + throw std::runtime_error("Not supported layout"); + } +} + TensorDescriptor getTensorDescriptor(const ::neurun::model::Operand &o, ::neurun::model::Layout frontend_layout, ::neurun::model::Layout backend_layout) diff --git a/runtime/neurun/backend/srcn/kernel/OperationUtils.h b/runtime/neurun/backend/srcn/kernel/OperationUtils.h index c10be46..75b081f 100644 --- a/runtime/neurun/backend/srcn/kernel/OperationUtils.h +++ b/runtime/neurun/backend/srcn/kernel/OperationUtils.h @@ -24,6 +24,7 @@ #include "model/Operand.h" #include "model/DataType.h" #include +#include using OperandType = neurun::model::DataType; using neurun::util::Coordinates; @@ -70,6 +71,8 @@ std::vector getFilterPermutation(FilterLayout from_layout, FilterLayout Coordinates convertCoordinates(const Coordinates &from_coordinates, FilterLayout from_layout, FilterLayout to_layout); +nnfw::srcn::convType_t convertLayout(model::Layout layout); + TensorDescriptor getTensorDescriptor(const ::neurun::model::Operand &o, ::neurun::model::Layout frontend_layout, ::neurun::model::Layout backend_layout); diff --git a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc index 4a391cf..2f619e1 100644 --- a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc +++ b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc @@ -31,7 +31,8 @@ namespace kernel TransposeConvLayer::TransposeConvLayer() : _inputData(), _kernelData(), _outputData(), _inputDescr(), _kernelDescr(), _outputDescr(), _paddingType(0), _paddingLeft(0), _paddingTop(0), _paddingRight(0), _paddingBottom(0), - _strideWidth(0), _strideHeight(0), _inputType(OperandType::FLOAT32) + _strideWidth(0), _strideHeight(0), _inputType(OperandType::FLOAT32), + _layout(nnfw::srcn::col_major) { // DO NOTHING } @@ -41,31 +42,39 @@ void TransposeConvLayer::convFloat32() nnfw::srcn::convMat_t in_mat, out_mat, kernel_mat; nnfw::srcn::convParams_t in_param; + assert(_layout == nnfw::srcn::col_major || _layout == nnfw::srcn::row_major); + size_t height_index = _layout == nnfw::srcn::col_major ? 1 : 2; + size_t width_index = _layout == nnfw::srcn::col_major ? 2 : 3; + size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + size_t kernel_input_depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + size_t kernel_output_depth_index = 0; const int batches = MatchingDim(_inputDescr, 0, _outputDescr, 0); - const int input_height = _inputDescr.dimensions[1]; - const int input_width = _inputDescr.dimensions[2]; - const int input_depth = MatchingDim(_inputDescr, 3, _kernelDescr, 3); + const int input_height = _inputDescr.dimensions[height_index]; + const int input_width = _inputDescr.dimensions[width_index]; + const int input_depth = + MatchingDim(_inputDescr, depth_index, _kernelDescr, kernel_input_depth_index); in_mat.c = input_depth; in_mat.w = input_width; in_mat.h = input_height; in_mat.n = batches; in_mat.data = _inputData.f; - const int output_height = _outputDescr.dimensions[1]; - const int output_width = _outputDescr.dimensions[2]; - const int output_depth = MatchingDim(_kernelDescr, 0, _outputDescr, 3); + const int output_height = _outputDescr.dimensions[height_index]; + const int output_width = _outputDescr.dimensions[width_index]; + const int output_depth = + MatchingDim(_kernelDescr, kernel_output_depth_index, _outputDescr, depth_index); out_mat.c = output_depth; out_mat.w = output_width; out_mat.h = output_height; out_mat.n = batches; out_mat.data = _outputData.f; - const int ker_height = _kernelDescr.dimensions[1]; - const int ker_width = _kernelDescr.dimensions[2]; - kernel_mat.c = output_depth; + const int ker_height = _kernelDescr.dimensions[height_index]; + const int ker_width = _kernelDescr.dimensions[width_index]; + kernel_mat.c = input_depth; kernel_mat.w = ker_width; kernel_mat.h = ker_height; - kernel_mat.n = input_depth; + kernel_mat.n = output_depth; kernel_mat.data = _kernelData.f; in_param.kernel_w = ker_width; @@ -78,7 +87,7 @@ void TransposeConvLayer::convFloat32() in_param.dilation_w = 1; in_param.dilation_h = 1; - nnfw::srcn::srcn_deconvolution2D(in_mat, kernel_mat, out_mat, in_param, 4, nnfw::srcn::col_major); + nnfw::srcn::srcn_deconvolution2D(in_mat, kernel_mat, out_mat, in_param, 4, _layout); } void TransposeConvLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, @@ -87,8 +96,9 @@ void TransposeConvLayer::configure(uint8_t *inputData, const TensorDescriptor in const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight, uint8_t *outputData, - const TensorDescriptor outputDescr) + const TensorDescriptor outputDescr, model::Layout layout) { + _layout = convertLayout(layout); _inputData.u8 = inputData; _inputDescr = inputDescr; _inputType = inputDescr.type; diff --git a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h index 9c68f35..6eac9b4 100644 --- a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h +++ b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h @@ -18,6 +18,7 @@ #define __NEURUN_BACKEND_SRCN_KERNEL_TRANSPOSECONV_LAYER_H__ #include +#include #include "OperationUtils.h" @@ -41,7 +42,8 @@ public: const TensorDescriptor kernelDescr, const uint32_t paddingType, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, - uint8_t *outputData, const TensorDescriptor outputDescr); + uint8_t *outputData, const TensorDescriptor outputDescr, + model::Layout backend_layout); void run(); void runSync() @@ -70,6 +72,7 @@ private: uint32_t _strideHeight; OperandType _inputType; + nnfw::srcn::convType_t _layout; }; } // namespace kernel