From 85708532881069c6c7b2d26cd1537bf8685a81a7 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=D0=90=D0=BD=D0=B4=D1=80=D0=B5=D0=B9=20=D0=A8=D0=B5=D0=B4?=
 =?utf8?q?=D1=8C=D0=BA=D0=BE/AI=20Tools=20Lab=20/SRR/Assistant=20Engineer/?=
 =?utf8?q?=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?=
 <a.shedko@partner.samsung.com>
Date: Mon, 3 Dec 2018 20:03:56 +0300
Subject: [PATCH] [nnc] Support non-unit batch in deconv (#2471)

Added support of Batch dimension in Deconv operation
* shape inference
* interpreter
* c++ softBackend

Signed-off-by: Andrei Shedko <a.shedko@partner.samsung.com>
---
 contrib/nnc/core/modelIR/operations/DeConv2DOp.cpp | 26 +++++++-------
 contrib/nnc/passes/interpreter/ops/DeConv2D.cpp    | 41 ++++++++++++----------
 .../soft_backend/code_snippets/cpp_operations.def  | 17 +++++----
 .../nnc/unittests/soft_backend/CPPOperations.cpp   |  7 ++--
 4 files changed, 51 insertions(+), 40 deletions(-)
diff --git a/contrib/nnc/core/modelIR/operations/DeConv2DOp.cpp b/contrib/nnc/core/modelIR/operations/DeConv2DOp.cpp
index 2ce2739..dda82d9 100644
--- a/contrib/nnc/core/modelIR/operations/DeConv2DOp.cpp
+++ b/contrib/nnc/core/modelIR/operations/DeConv2DOp.cpp
@@ -29,34 +29,36 @@ void DeConv2DOp::inferOutputShapes() {
   auto& strides = getStrides();
   auto input_rank = input_shape.rank();
 
-  assert(input_rank == 3);
+  assert(input_rank == 4);
   assert(kernel_shape.rank() == 4);
-  assert(kernel_shape.dim(3) == input_shape.dim(2));
+  assert(kernel_shape.dim(3) == input_shape.dim(3));
 
   Shape output_shape;
   output_shape.resize(input_rank);
 
+  // Assumes no batch strides.
   switch (_paddingType) {
     case ops::PaddingType::Same:
-      for (int32_t d = 0; d < input_rank; ++d)
-        output_shape.dim(d) = input_shape.dim(d) * strides.dim(d) - strides.dim(d) + 1;
+      for (int d = 1; d < 3; d++)
+        output_shape.dim(d) = input_shape.dim(d) * strides.dim(d - 1) - strides.dim(d - 1) + 1;
       break;
     case ops::PaddingType::Valid:
-      for (int32_t d = 0; d < input_rank; ++d)
-        output_shape.dim(d) =
-            input_shape.dim(d) * strides.dim(d) + kernel_shape.dim(d) - strides.dim(d);
+      for (int d = 1; d < 3; d++) {
+        output_shape.dim(d) = input_shape.dim(d) * strides.dim(d - 1) +
+                              kernel_shape.dim(d - 1) - strides.dim(d - 1);
+      }
       break;
     case ops::PaddingType::Custom:
-      for (int32_t d = 0; d < input_rank - 1; ++d)
-        output_shape.dim(d) =
-            input_shape.dim(d) * strides.dim(d) + kernel_shape.dim(d) - strides.dim(d) -
-            2 * getPadding(d);
+      for (int d = 1; d < 3; d++) {
+        output_shape.dim(d) = input_shape.dim(d) * strides.dim(d - 1) +
+                              kernel_shape.dim(d - 1) - strides.dim(d - 1) - 2 * getPadding(0);
+      }
       break;
     default: {
       assert(false && "invalid padding type");
     }
   }
-
+  output_shape.dim(0) = input_shape.dim(0);
   output_shape.dim(-1) = kernel_shape.dim(-2);
   setOutputShape(0, output_shape);
 }
diff --git a/contrib/nnc/passes/interpreter/ops/DeConv2D.cpp b/contrib/nnc/passes/interpreter/ops/DeConv2D.cpp
index c98429d..ff23d62 100644
--- a/contrib/nnc/passes/interpreter/ops/DeConv2D.cpp
+++ b/contrib/nnc/passes/interpreter/ops/DeConv2D.cpp
@@ -29,17 +29,16 @@ using namespace mir;
 using namespace mir::ops;
 
 std::vector<nnc::mir::TensorVariant> nnc::DeConv2D::operator()() {
-  auto res = allocate_tensor(_out_shape);
+  Shape out_shape = _out_shape;
+  auto res = allocate_tensor(out_shape);
   Tensor<float> res_accesor(res);
   Index pads({_op.getPadding(0), _op.getPadding(1), 0});
 
-  Shape out_shape = res_accesor.getShape();
-  out_shape.dim(2) = 1;
+  out_shape.dim(3) = 1;
   ShapeRange out_range(out_shape);
 
-  const Shape& in_shape = _input.getShape();
-  ShapeRange in_range(_input.getShape());
-
+  Shape in_shape = _input.getShape();
+  ShapeRange in_range(in_shape);
 
   std::shared_ptr<TensorVariant> tr_kernel;
   const std::shared_ptr<const mir::TensorVariant> kernel_ptr(
@@ -62,19 +61,23 @@ std::vector<nnc::mir::TensorVariant> nnc::DeConv2D::operator()() {
       // flag that keeps info on whether the current input element is from input
       // or is from dilation by stride
       bool is_from_input = true;
-      for (int32_t d = 0; d < input_idx.rank()-1; ++d) {
-        const auto num = (out_idx.at(d) - kernel.getShape().dim(d) + pads.at(d) +1 + kernel_idx.at(d) );
-        const auto div_res =  num / _strides.dim(d);
-        const auto rem = num % _strides.dim(d);
+      for (int32_t d = 1; d < input_idx.rank() - 1; ++d) {
+        const auto num = (out_idx.at(d) - kernel.getShape().dim(d - 1) + pads.at(d - 1) + 1 +
+                          kernel_idx.at(d - 1));
+        const auto div_res = num / _strides.dim(d - 1);
+        const auto rem = num % _strides.dim(d - 1);
         is_from_input = is_from_input && rem == 0;
         if (rem != 0) break;
         input_idx.at(d) = div_res;
       }
-      input_idx.at(2) = kernel_idx.at(2);
+      // batch is same as output's
+      input_idx.at(0) = out_idx.at(0);
+      // channel index - same as kernel's
+      input_idx.at(3) = kernel_idx.at(2);
 
       // rotate kernel 180 deg around last axis
       // by index transform
-      for (int32_t d = 0; d < input_idx.rank()-1; ++d) {
+      for (int32_t d = 0; d < 2; ++d) {
         kernel_idx.at(d) = kernel.getShape().dim(d) - kernel_idx.at(d) -1;
       }
 
@@ -99,15 +102,15 @@ DeConv2D::DeConv2D(const TensorVariant &input, const DeConv2DOp &op)
         : _input(input), _kernel(op.getKernel()), _strides(op.getStrides()),
           _padding(op.getPaddingType()), _out_shape(op.getOutputShape(0)), _op(op) {
 
-  assert(_op.getInputShape(0).rank() == 3);
-  assert(input.getShape().rank() == 3);
-  assert(_kernel.getShape().rank() == 4);
-  const auto& ks = _kernel.getShape();
-  const auto& is = input.getShape();
+  assert(_op.getInputShape(0).rank() == 4);
+  const auto& kernel_shape = _kernel.getShape();
+  const auto& inp_shape = input.getShape();
+  assert(inp_shape.rank() == 4);
+  assert(kernel_shape.rank() == 4);
   assert(_strides.dim(2) == 1);
   assert(_op.getPadding(2) == 0);
-  // kernel shape is [hw"oc""ic"]; input's - [hw"ic"]
-  assert(ks.dim(3) == is.dim(2));
+  // kernel shape inp_shape [hw"oc""ic"]; input's - ["batch"hw"ic"]
+  assert(kernel_shape.dim(3) == inp_shape.dim(3));
 }
 
 } // namespace nnc
diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def
index c9d7d5a..b04cc11 100644
--- a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def
+++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def
@@ -275,7 +275,7 @@ void conv2d(Tensor &out, const char *params, const Tensor &in)
 
 void convTransposed2d(Tensor &out, const char *params, const Tensor &in) {
   const float *input = in.getData();
-  RuntimeShape input_shape = shapeToRuntimeShapePad4(in.getShape());
+  RuntimeShape input_shape = shapeToRuntimeShape(in.getShape());
   KernelRT kernel = deserializeKernelRT(params);
   Shape strides = deserializeShape(params);
   // pads type. unused for now
@@ -286,7 +286,7 @@ void convTransposed2d(Tensor &out, const char *params, const Tensor &in) {
 
   out.reShape(out_s);
 
-  RuntimeShape out_shape = shapeToRuntimeShapePad4(out_s);
+  RuntimeShape out_shape = shapeToRuntimeShape(out_s);
 
   const short stride_w = strides[1];
   const short stride_h = strides[0];
@@ -294,11 +294,16 @@ void convTransposed2d(Tensor &out, const char *params, const Tensor &in) {
   const short pad_w = pads[1];
   const short pad_h = pads[0];
 
-  const int kw = kernel.shape.Dims(2);
-  const int kh = kernel.shape.Dims(1);
+  const int ker_width = kernel.shape.Dims(2);
+  const int ker_height = kernel.shape.Dims(1);
 
-  RuntimeShape im2col_shape = RuntimeShape({1,1,(int) (out_s[0]*out_s[1]),
-                                       input_shape.Dims(3)*kw*kh});
+  RuntimeShape im2col_shape = RuntimeShape({
+                                             (int)out_s[0],
+                                             (int)out_s[1],
+                                             (int)out_s[2],
+                                             // in depth
+                                             input_shape.Dims(3) * ker_width * ker_height
+                                           });
 
   const auto convPara = ConvParams({PaddingType::kSame,
                                     PaddingValues({pad_w,pad_h}), stride_w, stride_h});
diff --git a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp
index f7109c0..75a7891 100644
--- a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp
+++ b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp
@@ -458,7 +458,7 @@ TEST(cpp_operations_test, convTransposed2d)
         for (iT outputC = 1; outputC <= 3; ++outputC)
           for (iT strideH = 1; strideH <= 3; ++strideH)
             for (iT strideW = 1; strideW <= 3; ++strideW) {
-              vector<int> inputShapeData{9, 3, static_cast<int>(inputC)};  // HWC
+              vector<int> inputShapeData{3, 9, 3, static_cast<int>(inputC)};  // NHWC
               mir::Shape kernelShape{kernelH, kernelW, outputC, inputC};
               mir::Shape strides{strideH, strideW, 1};
               vector<unique_ptr<mir::TensorVariant>> inputNTensors(1);
@@ -466,8 +466,9 @@ TEST(cpp_operations_test, convTransposed2d)
               fillTensors(inputNTensors[0], aInputTensor, inputShapeData, 1.0f);
               auto padT = mir::ops::PaddingType::Same;
               mir::TensorVariant kernel = createNTensor(kernelShape, 1.0f);
-              auto opGenerator = [kernel, strides, padT](mir::Graph &g,
-                                                         const std::vector<mir::IODescriptor>& inputs) {
+              auto opGenerator = [kernel, strides, padT](
+                mir::Graph& g, const std::vector<mir::IODescriptor>& inputs) {
+
                 return g.create<mir::ops::DeConv2DOp>("y", inputs[0], kernel, strides, padT);
               };
 
-- 
2.7.4