From: Efimov Alexander/AI Tools Lab/./Samsung Electronics <a.efimov@samsung.com>
Date: Mon, 3 Sep 2018 19:34:14 +0000 (+0300)
Subject: Support for grouped convolution for caffe (#1209)
X-Git-Tag: nncc_backup~1969
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a969c300bc4f1546d93d66c8337038d73154f64e;p=platform%2Fcore%2Fml%2Fnnfw.git

Support for grouped convolution for caffe (#1209)

Adds support for grouped convolution in caffe: fix kernel tensor on shape inference

Signed-off-by: Efimov Alexander <a.efimov@samsung.com>
---

diff --git a/contrib/nnc/plugin/caffe_frontend/caffe_model_visitor.cpp b/contrib/nnc/plugin/caffe_frontend/caffe_model_visitor.cpp
index 2e4907b..90be10d 100644
--- a/contrib/nnc/plugin/caffe_frontend/caffe_model_visitor.cpp
+++ b/contrib/nnc/plugin/caffe_frontend/caffe_model_visitor.cpp
@@ -214,6 +214,8 @@ std::vector<std::shared_ptr<IrTensor>> ModelVisitor::createOpParams(const LayerP
 
     if (lp.has_convolution_param() && blob.shape().dim_size() == 4)
     {
+      // TODO support non default channel axis
+      assert(lp.convolution_param().axis() == 1 && "assuming channel axis number set to default");
       params.emplace_back(transposeTensor<2, 3, 1, 0>(tensor));
     }
     else if (lp.has_inner_product_param() && blob.shape().dim_size() == 2)
diff --git a/contrib/nnc/plugin/caffe_frontend/caffe_op_creator.cpp b/contrib/nnc/plugin/caffe_frontend/caffe_op_creator.cpp
index 7c92b39..355585a 100644
--- a/contrib/nnc/plugin/caffe_frontend/caffe_op_creator.cpp
+++ b/contrib/nnc/plugin/caffe_frontend/caffe_op_creator.cpp
@@ -9,6 +9,9 @@
 #include "core/modelIR/operations/reshape_op.h"
 #include "core/modelIR/operations/fully_connected_op.h"
 
+#include "core/modelIR/Index.h"
+#include "core/modelIR/ShapeRange.h"
+
 #include "plugin/common_frontend/shape_helper.h"
 #include "caffe_op_creator.h"
 
@@ -258,6 +261,66 @@ __attribute__ ((unused)) static int getAxisValue(const OptsType& opts)
   return axis;
 }
 
+/** Convert kernel for grouped 2d convolution in kernel for ordinary 2d convolution
+ *
+ * Grouped convolution breaks input and kernel channels into selected number of groups and applies convolution in every group of channels independently.
+ * This technique allows to save kernel size(channels from different groups are not merged, no need to store redundant 0 weights).
+ * This is not supported by compiler for now, so this function unfolds compact kernel into classic flavored "every input layer affects every output layer",
+ * by inserting zero coefficients where needed
+ *
+ * @param groups number of groups in grouped convolution
+ * @param foldedKernel original grouped kernel
+ * @return unfolded kernel, compatible with ordinary conv2D operation
+ */
+static std::shared_ptr<IrTensor> fixGroupedKernel(int groups, std::shared_ptr<IrTensor> foldedKernel)
+{
+  const int kernelInChanNum = 2;
+  const int kernelOutChanNum = 3;
+
+  const Shape &kernelShape = foldedKernel->getShape();
+  auto kernelInChannels = kernelShape.dim(kernelInChanNum);
+  auto kernelOutChannels = kernelShape.dim(kernelOutChanNum);
+  auto inChannels = kernelInChannels * groups;
+
+  // Original kernel has shape [H, W, inputChannels/groups, outputChannels]
+  // here creates unfolded kernel with shape [H, W, inputChannels, outputChannels]
+  Shape unfoldKernelShape(kernelShape);
+  unfoldKernelShape.dim(kernelInChanNum) = inChannels;
+  auto bufferSize = num_elements(unfoldKernelShape) * foldedKernel->getElementSize();
+  std::shared_ptr<char> buffer(new char[bufferSize], std::default_delete<char[]>());
+  size_t dataSize = foldedKernel->getElementSize();
+  std::shared_ptr<IrTensor> unfoldKernel =
+          std::make_shared<IrTensor>(unfoldKernelShape, buffer, foldedKernel->getDataType(), dataSize);
+
+  int inGroupSize = kernelInChannels;
+  int outGroupSize = kernelOutChannels / groups;
+  assert(kernelOutChannels % groups == 0);
+
+  // Iterate over "unfolded" kernel Shape and insert appropriate values into result kernel
+  for (const core::data::Index &idx: core::data::ShapeRange(unfoldKernelShape))
+  {
+    auto inGroupNo = idx.at(kernelInChanNum) / inGroupSize;
+    auto outGroupNo = idx.at(kernelOutChanNum) / outGroupSize;
+    // check that input channel group fits output channel group
+    if (inGroupNo == outGroupNo)
+    {
+      // compute index in original kernel that corresponds output index
+      core::data::Index foldedIdx(idx);
+      foldedIdx.at(kernelInChanNum) %= inGroupSize;
+
+      std::copy(foldedKernel->at(foldedIdx), foldedKernel->at(foldedIdx) + dataSize, unfoldKernel->at(idx));
+    }
+    else
+    {
+      // fill element of output kernel with zero element
+      assert(foldedKernel->getDataType() == IrTensor::DTYPE::FLOAT && "unsupported data type, add appropriate zero element creation");
+      float *elem = reinterpret_cast<float *>(unfoldKernel->at(idx));
+      *elem = 0.0f;
+    }
+  }
+  return unfoldKernel;
+}
+
 } // namespace util
 
 std::vector<INode::Ref> OpCreator::createConv2D(InputOps inputs, InputParams params,
@@ -269,7 +332,13 @@ std::vector<INode::Ref> OpCreator::createConv2D(InputOps inputs, InputParams par
   ops::PaddingType padType = util::getConvPadType(opts);
   Shape strideShape = util::getConvStride(opts);
 
-  auto outputs = createOp<ops::Conv2DOp>(inputs, std::move(*params[0]),
+  std::shared_ptr<IrTensor> unfoldedTensor = params[0];
+  if (opts.group() != 1)
+  {
+    // first we need to convert kernel of grouped convolution to appropriate ordinary kernel
+    unfoldedTensor = util::fixGroupedKernel(opts.group(), params[0]);
+  }
+  auto outputs = createOp<ops::Conv2DOp>(inputs, std::move(*unfoldedTensor),
                                          strideShape, padType);
 
   // bias_term is optional (so might not be present) and defaults to true