From: 장지섭/On-Device Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>
Date: Wed, 6 Nov 2019 07:03:23 +0000 (+0900)
Subject: Make ncnn backend to support TransposeConv in row major (#8806)
X-Git-Tag: submit/tizen/20191205.083104~373
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=aa672be76b981983ecf035f2b00d7f4db0e5849b;p=platform%2Fcore%2Fml%2Fnnfw.git

Make ncnn backend to support TransposeConv in row major (#8806)

This commit makes ncnn(srcn) backend to support TransposeConv in row major.

Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
---

diff --git a/runtime/neurun/backend/srcn/ConstantInitializer.cc b/runtime/neurun/backend/srcn/ConstantInitializer.cc
index 6da8759..d01b4bc 100644
--- a/runtime/neurun/backend/srcn/ConstantInitializer.cc
+++ b/runtime/neurun/backend/srcn/ConstantInitializer.cc
@@ -22,46 +22,50 @@ namespace
 {
 
 template <typename T>
-static void
-PermuteKernel(const neurun::model::Operand &model_obj, neurun::backend::operand::IObject &obj,
-              const neurun::model::Layout frontend_layout = neurun::model::Layout::UNKNOWN)
+static void PermuteKernel(const neurun::model::Operand &model_obj,
+                          neurun::backend::operand::IObject &obj,
+                          const std::vector<int32_t> &permutation)
 {
   const auto shape = model_obj.shape();
   auto base = reinterpret_cast<const T *>(model_obj.data().base());
 
   assert(shape.rank() == 4);
-
-  // TODO Support frontend layout
-  UNUSED_RELEASE(frontend_layout);
+  assert(permutation.size() == 4);
+  assert(permutation[0] != permutation[1] && permutation[0] != permutation[2] &&
+         permutation[0] != permutation[3]);
+  assert(permutation[1] != permutation[2] && permutation[1] != permutation[3]);
+  assert(permutation[2] != permutation[3]);
+  assert(permutation[0] < 4 && permutation[1] < 4 && permutation[2] < 4 && permutation[3] < 4);
 
   obj.access([&](::neurun::backend::operand::ITensor &tensor) {
-    // NOTE The srcn takes a HWOI layout as kernel filter even though image layout is NHWC.
-    //      This policy is the same with the tensorflow policy.
-    //      So using srcn library, we need to change kernel layout to HWOI from OHWI.
-    const int32_t outch = shape.dim(0);
-    const int32_t height = shape.dim(1);
-    const int32_t width = shape.dim(2);
-    const int32_t inch = shape.dim(3);
-    const auto to_dim = ::neurun::backend::srcn::kernel::convertCoordinates(
-        {outch, height, width, inch}, ::neurun::backend::srcn::kernel::FilterLayout::OHWI,
-        ::neurun::backend::srcn::kernel::FilterLayout::HWOI);
-    for (auto i = 0; i < outch; ++i)
+    if (permutation[0] == 0 && permutation[1] == 1 && permutation[2] == 2 && permutation[3] == 3)
+    {
+      memcpy(tensor.buffer(), base, shape.num_elements() * sizeof(T));
+    }
+    else
     {
-      for (auto j = 0; j < height; ++j)
+      const int32_t dim0 = shape.dim(0);
+      const int32_t dim1 = shape.dim(1);
+      const int32_t dim2 = shape.dim(2);
+      const int32_t dim3 = shape.dim(3);
+      for (auto i = 0; i < dim0; ++i)
       {
-        for (auto k = 0; k < width; ++k)
+        for (auto j = 0; j < dim1; ++j)
         {
-          for (auto l = 0; l < inch; ++l)
+          for (auto k = 0; k < dim2; ++k)
           {
-            const auto coords = ::neurun::backend::srcn::kernel::convertCoordinates(
-                {i, j, k, l}, ::neurun::backend::srcn::kernel::FilterLayout::OHWI,
-                ::neurun::backend::srcn::kernel::FilterLayout::HWOI);
-            const size_t offset = coords[0] * to_dim[1] * to_dim[2] * to_dim[3] +
-                                  coords[1] * to_dim[2] * to_dim[3] + coords[2] * to_dim[3] +
-                                  coords[3];
-            T *into = reinterpret_cast<T *>(tensor.buffer() + offset * sizeof(T));
-            T value = *(base + i * height * width * inch + j * width * inch + k * inch + l);
-            *into = value;
+            for (auto l = 0; l < dim3; ++l)
+            {
+              Coordinates frontend_coords{i, j, k, l};
+              Coordinates coords = frontend_coords;
+              coords.set(0, frontend_coords[permutation[0]]);
+              coords.set(1, frontend_coords[permutation[1]]);
+              coords.set(2, frontend_coords[permutation[2]]);
+              coords.set(3, frontend_coords[permutation[3]]);
+              T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords));
+              T value = *(base + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3 + l);
+              *into = value;
+            }
           }
         }
       }
@@ -100,7 +104,8 @@ void ConstantInitializer::run()
 }
 
 void ConstantInitializer::registerPermuteKernelInitializer(const model::OperandIndex &index,
-                                                           const model::Operand &obj)
+                                                           const model::Operand &obj,
+                                                           const std::vector<int32_t> &permutation)
 {
   // For only CONSTANTS
   if (!obj.isConstant())
@@ -115,17 +120,17 @@ void ConstantInitializer::registerPermuteKernelInitializer(const model::OperandI
   switch (type)
   {
     case DataType::FLOAT32:
-      _init_map[index] = std::bind(PermuteKernel<float>, _1, _2, _current_subg_layout);
+      _init_map[index] = std::bind(PermuteKernel<float>, _1, _2, permutation);
       break;
     case DataType::INT32:
-      _init_map[index] = std::bind(PermuteKernel<int32_t>, _1, _2, _current_subg_layout);
+      _init_map[index] = std::bind(PermuteKernel<int32_t>, _1, _2, permutation);
       break;
     case DataType::UINT32:
-      _init_map[index] = std::bind(PermuteKernel<uint32_t>, _1, _2, _current_subg_layout);
+      _init_map[index] = std::bind(PermuteKernel<uint32_t>, _1, _2, permutation);
       break;
     case DataType::BOOL8:
     case DataType::QUANT8_ASYMM:
-      _init_map[index] = std::bind(PermuteKernel<uint8_t>, _1, _2, _current_subg_layout);
+      _init_map[index] = std::bind(PermuteKernel<uint8_t>, _1, _2, permutation);
       break;
     default:
       throw std::runtime_error("Not supported, yet");
@@ -135,9 +140,30 @@ void ConstantInitializer::registerPermuteKernelInitializer(const model::OperandI
 
 void ConstantInitializer::visit(const model::operation::TransposeConv &node)
 {
+  // NOTE The srcn deconvolution layer takes a HWOI layout as kernel filter even though image layout
+  //      is NHWC.
+  //      This policy is the same with the tensorflow policy.
+  //      So for using srcn library, we need to change kernel layout to HWOI from OHWI or OIHW in
+  //      this case.
+  //      Also the srcn deconvolution layer takes a OIHW layout as kernel filter if image's layout
+  //      is NCHW
   const auto &kernel_index = node.getInputs().at(model::operation::TransposeConv::KERNEL);
   const auto &kernel_obj = _operands.at(kernel_index);
-  registerPermuteKernelInitializer(kernel_index, kernel_obj);
+  const auto frontend_layout = _current_subg_layout;
+  const auto backend_layout = _tensor_builder->wrapTensor(kernel_index)->ptr()->layout();
+  assert(frontend_layout == neurun::model::Layout::NHWC ||
+         frontend_layout == neurun::model::Layout::NCHW);
+  assert(backend_layout == neurun::model::Layout::NHWC ||
+         backend_layout == neurun::model::Layout::NCHW);
+  const auto frontend_filter_layout = frontend_layout == neurun::model::Layout::NHWC
+                                          ? kernel::FilterLayout::OHWI
+                                          : kernel::FilterLayout::OIHW;
+  const auto backend_filter_layout = backend_layout == neurun::model::Layout::NHWC
+                                         ? kernel::FilterLayout::HWOI
+                                         : kernel::FilterLayout::IOHW;
+  registerPermuteKernelInitializer(
+      kernel_index, kernel_obj,
+      kernel::getFilterPermutation(frontend_filter_layout, backend_filter_layout));
 }
 
 } // namespace srcn
diff --git a/runtime/neurun/backend/srcn/ConstantInitializer.h b/runtime/neurun/backend/srcn/ConstantInitializer.h
index 7eac7bd..559aaa6 100644
--- a/runtime/neurun/backend/srcn/ConstantInitializer.h
+++ b/runtime/neurun/backend/srcn/ConstantInitializer.h
@@ -20,6 +20,7 @@
 #include <backend/IConstantInitializer.h>
 #include <model/Operands.h>
 #include "TensorBuilder.h"
+#include <util/Coordinates.h>
 
 namespace neurun
 {
@@ -38,8 +39,8 @@ public:
   void run() override;
 
 public:
-  void registerPermuteKernelInitializer(const model::OperandIndex &index,
-                                        const model::Operand &obj);
+  void registerPermuteKernelInitializer(const model::OperandIndex &index, const model::Operand &obj,
+                                        const std::vector<int32_t> &permutation);
 
 public:
   void visit(const model::operation::TransposeConv &) override;
diff --git a/runtime/neurun/backend/srcn/KernelGenerator.cc b/runtime/neurun/backend/srcn/KernelGenerator.cc
index 6a305db..d1e56bc 100644
--- a/runtime/neurun/backend/srcn/KernelGenerator.cc
+++ b/runtime/neurun/backend/srcn/KernelGenerator.cc
@@ -107,10 +107,11 @@ void KernelGenerator::visit(const model::operation::TransposeConv &node)
 
   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
-  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
-  const auto &ker_shape = _ctx.at(ker_index).shape();
-  const auto ker_height = ker_shape.dim(1);
-  const auto ker_width = ker_shape.dim(2);
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in] if NHWC and [depth_out,
+  // depth_in, kernel_height, kernel_width] if NCHW.
+  const auto &ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout);
+  const auto ker_height = ker_shape.H;
+  const auto ker_width = ker_shape.W;
   const auto stride = node.param().stride;
   const int padding_type = (node.param().padding.type == model::PaddingType::SAME);
   const auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape,
@@ -133,7 +134,8 @@ void KernelGenerator::visit(const model::operation::TransposeConv &node)
 
   fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr,
                 padding_type, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, ofm_alloc->buffer(), ofm_backend_descr);
+                stride.horizontal, stride.vertical, ofm_alloc->buffer(), ofm_backend_descr,
+                backend_layout);
 
   _execution_builder->append(std::move(fn));
 }
diff --git a/runtime/neurun/backend/srcn/ShapeFixer.cc b/runtime/neurun/backend/srcn/ShapeFixer.cc
index 3c7bd15..62067fa 100644
--- a/runtime/neurun/backend/srcn/ShapeFixer.cc
+++ b/runtime/neurun/backend/srcn/ShapeFixer.cc
@@ -87,7 +87,7 @@ void ShapeFixer::visit(const model::operation::TransposeConv &node)
   assert(backend_layout == model::Layout::NCHW || backend_layout == model::Layout::NHWC);
   const auto backend_filter_layout = backend_layout == model::Layout::NHWC
                                          ? kernel::FilterLayout::HWOI
-                                         : kernel::FilterLayout::OIHW;
+                                         : kernel::FilterLayout::IOHW;
 
   model::OperandInfo backend_info{
       asKernelShape(kernel_obj.shape(), frontend_filter_layout, backend_filter_layout),
diff --git a/runtime/neurun/backend/srcn/kernel/OperationUtils.cc b/runtime/neurun/backend/srcn/kernel/OperationUtils.cc
index 56ac525..aeb5515 100644
--- a/runtime/neurun/backend/srcn/kernel/OperationUtils.cc
+++ b/runtime/neurun/backend/srcn/kernel/OperationUtils.cc
@@ -80,6 +80,23 @@ Coordinates convertCoordinates(const Coordinates &coordinates, FilterLayout from
                      coordinates[permutation[2]], coordinates[permutation[3]]};
 }
 
+nnfw::srcn::convType_t convertLayout(model::Layout layout)
+{
+  assert(layout == model::Layout::NHWC || layout == model::Layout::NCHW);
+  if (layout == model::Layout::NHWC)
+  {
+    return nnfw::srcn::col_major;
+  }
+  else if (layout == model::Layout::NCHW)
+  {
+    return nnfw::srcn::row_major;
+  }
+  else
+  {
+    throw std::runtime_error("Not supported layout");
+  }
+}
+
 TensorDescriptor getTensorDescriptor(const ::neurun::model::Operand &o,
                                      ::neurun::model::Layout frontend_layout,
                                      ::neurun::model::Layout backend_layout)
diff --git a/runtime/neurun/backend/srcn/kernel/OperationUtils.h b/runtime/neurun/backend/srcn/kernel/OperationUtils.h
index c10be46..75b081f 100644
--- a/runtime/neurun/backend/srcn/kernel/OperationUtils.h
+++ b/runtime/neurun/backend/srcn/kernel/OperationUtils.h
@@ -24,6 +24,7 @@
 #include "model/Operand.h"
 #include "model/DataType.h"
 #include <model/InternalType.h>
+#include <ncnn/srcn/conv_type.h>
 
 using OperandType = neurun::model::DataType;
 using neurun::util::Coordinates;
@@ -70,6 +71,8 @@ std::vector<int32_t> getFilterPermutation(FilterLayout from_layout, FilterLayout
 Coordinates convertCoordinates(const Coordinates &from_coordinates, FilterLayout from_layout,
                                FilterLayout to_layout);
 
+nnfw::srcn::convType_t convertLayout(model::Layout layout);
+
 TensorDescriptor getTensorDescriptor(const ::neurun::model::Operand &o,
                                      ::neurun::model::Layout frontend_layout,
                                      ::neurun::model::Layout backend_layout);
diff --git a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc
index 4a391cf..2f619e1 100644
--- a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc
+++ b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc
@@ -31,7 +31,8 @@ namespace kernel
 TransposeConvLayer::TransposeConvLayer()
     : _inputData(), _kernelData(), _outputData(), _inputDescr(), _kernelDescr(), _outputDescr(),
       _paddingType(0), _paddingLeft(0), _paddingTop(0), _paddingRight(0), _paddingBottom(0),
-      _strideWidth(0), _strideHeight(0), _inputType(OperandType::FLOAT32)
+      _strideWidth(0), _strideHeight(0), _inputType(OperandType::FLOAT32),
+      _layout(nnfw::srcn::col_major)
 {
   // DO NOTHING
 }
@@ -41,31 +42,39 @@ void TransposeConvLayer::convFloat32()
   nnfw::srcn::convMat_t in_mat, out_mat, kernel_mat;
   nnfw::srcn::convParams_t in_param;
 
+  assert(_layout == nnfw::srcn::col_major || _layout == nnfw::srcn::row_major);
+  size_t height_index = _layout == nnfw::srcn::col_major ? 1 : 2;
+  size_t width_index = _layout == nnfw::srcn::col_major ? 2 : 3;
+  size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
+  size_t kernel_input_depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
+  size_t kernel_output_depth_index = 0;
   const int batches = MatchingDim(_inputDescr, 0, _outputDescr, 0);
-  const int input_height = _inputDescr.dimensions[1];
-  const int input_width = _inputDescr.dimensions[2];
-  const int input_depth = MatchingDim(_inputDescr, 3, _kernelDescr, 3);
+  const int input_height = _inputDescr.dimensions[height_index];
+  const int input_width = _inputDescr.dimensions[width_index];
+  const int input_depth =
+      MatchingDim(_inputDescr, depth_index, _kernelDescr, kernel_input_depth_index);
   in_mat.c = input_depth;
   in_mat.w = input_width;
   in_mat.h = input_height;
   in_mat.n = batches;
   in_mat.data = _inputData.f;
 
-  const int output_height = _outputDescr.dimensions[1];
-  const int output_width = _outputDescr.dimensions[2];
-  const int output_depth = MatchingDim(_kernelDescr, 0, _outputDescr, 3);
+  const int output_height = _outputDescr.dimensions[height_index];
+  const int output_width = _outputDescr.dimensions[width_index];
+  const int output_depth =
+      MatchingDim(_kernelDescr, kernel_output_depth_index, _outputDescr, depth_index);
   out_mat.c = output_depth;
   out_mat.w = output_width;
   out_mat.h = output_height;
   out_mat.n = batches;
   out_mat.data = _outputData.f;
 
-  const int ker_height = _kernelDescr.dimensions[1];
-  const int ker_width = _kernelDescr.dimensions[2];
-  kernel_mat.c = output_depth;
+  const int ker_height = _kernelDescr.dimensions[height_index];
+  const int ker_width = _kernelDescr.dimensions[width_index];
+  kernel_mat.c = input_depth;
   kernel_mat.w = ker_width;
   kernel_mat.h = ker_height;
-  kernel_mat.n = input_depth;
+  kernel_mat.n = output_depth;
   kernel_mat.data = _kernelData.f;
 
   in_param.kernel_w = ker_width;
@@ -78,7 +87,7 @@ void TransposeConvLayer::convFloat32()
   in_param.dilation_w = 1;
   in_param.dilation_h = 1;
 
-  nnfw::srcn::srcn_deconvolution2D(in_mat, kernel_mat, out_mat, in_param, 4, nnfw::srcn::col_major);
+  nnfw::srcn::srcn_deconvolution2D(in_mat, kernel_mat, out_mat, in_param, 4, _layout);
 }
 
 void TransposeConvLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
@@ -87,8 +96,9 @@ void TransposeConvLayer::configure(uint8_t *inputData, const TensorDescriptor in
                                    const uint32_t paddingRight, const uint32_t paddingTop,
                                    const uint32_t paddingBottom, const uint32_t strideWidth,
                                    const uint32_t strideHeight, uint8_t *outputData,
-                                   const TensorDescriptor outputDescr)
+                                   const TensorDescriptor outputDescr, model::Layout layout)
 {
+  _layout = convertLayout(layout);
   _inputData.u8 = inputData;
   _inputDescr = inputDescr;
   _inputType = inputDescr.type;
diff --git a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h
index 9c68f35..6eac9b4 100644
--- a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h
+++ b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h
@@ -18,6 +18,7 @@
 #define __NEURUN_BACKEND_SRCN_KERNEL_TRANSPOSECONV_LAYER_H__
 
 #include <exec/IFunction.h>
+#include <ncnn/srcn/conv_type.h>
 
 #include "OperationUtils.h"
 
@@ -41,7 +42,8 @@ public:
                  const TensorDescriptor kernelDescr, const uint32_t paddingType,
                  const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
                  const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
-                 uint8_t *outputData, const TensorDescriptor outputDescr);
+                 uint8_t *outputData, const TensorDescriptor outputDescr,
+                 model::Layout backend_layout);
 
   void run();
   void runSync()
@@ -70,6 +72,7 @@ private:
   uint32_t _strideHeight;
 
   OperandType _inputType;
+  nnfw::srcn::convType_t _layout;
 };
 
 } // namespace kernel