From a969c300bc4f1546d93d66c8337038d73154f64e Mon Sep 17 00:00:00 2001 From: "Efimov Alexander/AI Tools Lab/./Samsung Electronics" Date: Mon, 3 Sep 2018 22:34:14 +0300 Subject: [PATCH] Support for grouped convolution for caffe (#1209) Adds support for grouped convolution in caffe: fix kernel tensor on shape inference Signed-off-by: Efimov Alexander --- .../plugin/caffe_frontend/caffe_model_visitor.cpp | 2 + .../nnc/plugin/caffe_frontend/caffe_op_creator.cpp | 71 +++++++++++++++++++++- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/contrib/nnc/plugin/caffe_frontend/caffe_model_visitor.cpp b/contrib/nnc/plugin/caffe_frontend/caffe_model_visitor.cpp index 2e4907b..90be10d 100644 --- a/contrib/nnc/plugin/caffe_frontend/caffe_model_visitor.cpp +++ b/contrib/nnc/plugin/caffe_frontend/caffe_model_visitor.cpp @@ -214,6 +214,8 @@ std::vector> ModelVisitor::createOpParams(const LayerP if (lp.has_convolution_param() && blob.shape().dim_size() == 4) { + // TODO support non default channel axis + assert(lp.convolution_param().axis() == 1 && "assuming channel axis number set to default"); params.emplace_back(transposeTensor<2, 3, 1, 0>(tensor)); } else if (lp.has_inner_product_param() && blob.shape().dim_size() == 2) diff --git a/contrib/nnc/plugin/caffe_frontend/caffe_op_creator.cpp b/contrib/nnc/plugin/caffe_frontend/caffe_op_creator.cpp index 7c92b39..355585a 100644 --- a/contrib/nnc/plugin/caffe_frontend/caffe_op_creator.cpp +++ b/contrib/nnc/plugin/caffe_frontend/caffe_op_creator.cpp @@ -9,6 +9,9 @@ #include "core/modelIR/operations/reshape_op.h" #include "core/modelIR/operations/fully_connected_op.h" +#include "core/modelIR/Index.h" +#include "core/modelIR/ShapeRange.h" + #include "plugin/common_frontend/shape_helper.h" #include "caffe_op_creator.h" @@ -258,6 +261,66 @@ __attribute__ ((unused)) static int getAxisValue(const OptsType& opts) return axis; } +/** Convert kernel for grouped 2d convolution in kernel for ordinary 2d convolution + * + * Grouped convolution breaks input and kernel channels into selected number of groups and applies convolution in every group of channels independently. + * This technique allows to save kernel size(channels from different groups are not merged, no need to store redundant 0 weights). + * This is not supported by compiler for now, so this function unfolds compact kernel into classic flavored "every input layer affects every output layer", + * by inserting zero coefficients where needed + * + * @param groups number of groups in grouped convolution + * @param foldedKernel original grouped kernel + * @return unfolded kernel, compatible with ordinary conv2D operation + */ +static std::shared_ptr fixGroupedKernel(int groups, std::shared_ptr foldedKernel) +{ + const int kernelInChanNum = 2; + const int kernelOutChanNum = 3; + + const Shape &kernelShape = foldedKernel->getShape(); + auto kernelInChannels = kernelShape.dim(kernelInChanNum); + auto kernelOutChannels = kernelShape.dim(kernelOutChanNum); + auto inChannels = kernelInChannels * groups; + + // Original kernel has shape [H, W, inputChannels/groups, outputChannels] + // here creates unfolded kernel with shape [H, W, inputChannels, outputChannels] + Shape unfoldKernelShape(kernelShape); + unfoldKernelShape.dim(kernelInChanNum) = inChannels; + auto bufferSize = num_elements(unfoldKernelShape) * foldedKernel->getElementSize(); + std::shared_ptr buffer(new char[bufferSize], std::default_delete()); + size_t dataSize = foldedKernel->getElementSize(); + std::shared_ptr unfoldKernel = + std::make_shared(unfoldKernelShape, buffer, foldedKernel->getDataType(), dataSize); + + int inGroupSize = kernelInChannels; + int outGroupSize = kernelOutChannels / groups; + assert(kernelOutChannels % groups == 0); + + // Iterate over "unfolded" kernel Shape and insert appropriate values into result kernel + for (const core::data::Index &idx: core::data::ShapeRange(unfoldKernelShape)) + { + auto inGroupNo = idx.at(kernelInChanNum) / inGroupSize; + auto outGroupNo = idx.at(kernelOutChanNum) / outGroupSize; + // check that input channel group fits output channel group + if (inGroupNo == outGroupNo) + { + // compute index in original kernel that corresponds output index + core::data::Index foldedIdx(idx); + foldedIdx.at(kernelInChanNum) %= inGroupSize; + + std::copy(foldedKernel->at(foldedIdx), foldedKernel->at(foldedIdx) + dataSize, unfoldKernel->at(idx)); + } + else + { + // fill element of output kernel with zero element + assert(foldedKernel->getDataType() == IrTensor::DTYPE::FLOAT && "unsupported data type, add appropriate zero element creation"); + float *elem = reinterpret_cast(unfoldKernel->at(idx)); + *elem = 0.0f; + } + } + return unfoldKernel; +} + } // namespace util std::vector OpCreator::createConv2D(InputOps inputs, InputParams params, @@ -269,7 +332,13 @@ std::vector OpCreator::createConv2D(InputOps inputs, InputParams par ops::PaddingType padType = util::getConvPadType(opts); Shape strideShape = util::getConvStride(opts); - auto outputs = createOp(inputs, std::move(*params[0]), + std::shared_ptr unfoldedTensor = params[0]; + if (opts.group() != 1) + { + // first we need to convert kernel of grouped convolution to appropriate ordinary kernel + unfoldedTensor = util::fixGroupedKernel(opts.group(), params[0]); + } + auto outputs = createOp(inputs, std::move(*unfoldedTensor), strideShape, padType); // bias_term is optional (so might not be present) and defaults to true -- 2.7.4