From da2223f90d51a1d1b98aa2eeec613efce30086f5 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EC=9E=A5=EC=A7=80=EC=84=AD/On-Device=20Lab=28SR=29/Enginee?=
 =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= <jiseob.jang@samsung.com>
Date: Thu, 17 Jan 2019 12:53:19 +0900
Subject: [PATCH] [PACL] Fix reshape bug. (#4254)

This commit fixes reshape bug that reshape does not support non 4-D input and 4-D output in PACL.
  - Fix GenericReshapeLayer bug
  - Set the apply_dim_correction input shape of GenericFullyConnectedLayer to `false`

Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
---
 .../runtime/misc/functions/GenericReshapeLayer.cpp | 91 +++++++++++++++++-----
 runtimes/pure_arm_compute/src/compilation.cc       |  6 +-
 2 files changed, 74 insertions(+), 23 deletions(-)

diff --git a/libs/ARMComputeEx/src/runtime/misc/functions/GenericReshapeLayer.cpp b/libs/ARMComputeEx/src/runtime/misc/functions/GenericReshapeLayer.cpp
index 251ff5d..43c1831 100644
--- a/libs/ARMComputeEx/src/runtime/misc/functions/GenericReshapeLayer.cpp
+++ b/libs/ARMComputeEx/src/runtime/misc/functions/GenericReshapeLayer.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "arm_compute/core/ITensorInfo.h"
 #include "arm_compute/runtime/misc/functions/GenericReshapeLayer.h"
 
 namespace arm_compute
@@ -21,37 +22,81 @@ namespace arm_compute
 namespace misc
 {
 
+namespace
+{
+
+bool shouldPermute(arm_compute::ITensorInfo *input, arm_compute::ITensorInfo *output)
+{
+  return (input->num_dimensions() == 4 || output->num_dimensions() == 4) &&
+         (input->num_dimensions() != output->num_dimensions());
+}
+
+} // namespace
+
 void GenericReshapeLayer::configure(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
   _input = input;
   _output = output;
 
-  // NOTE This vector comes from CLPermuteKernel implementation
-  //
-  // This implementation permutes a tensor of shape W / H / C into another tensor of shape C / W / H
-  //
-  //     Original | Permuted
-  // 0 | W        | C (from 2)
-  // 1 | H        | W (from 0)
-  // 2 | C        | H (from 1)
-  //
-  const arm_compute::PermutationVector pv{2, 0, 1};
+  arm_compute::PermutationVector pv;
+  if (input->info()->num_dimensions() == 4 && output->info()->num_dimensions() != 4)
+  {
+    // NOTE This vector comes from CLPermuteKernel implementation
+    //
+    // This implementation permutes a tensor of shape W / H / C into another tensor of shape
+    // C / W / H
+    //
+    //     Original | Permuted
+    // 0 | W        | C (from 2)
+    // 1 | H        | W (from 0)
+    // 2 | C        | H (from 1)
+    //
+    pv = arm_compute::PermutationVector{2, 0, 1};
+  }
+  else if (input->info()->num_dimensions() != 4 && output->info()->num_dimensions() == 4)
+  {
+    // NOTE This vector comes from CLPermuteKernel implementation
+    //
+    // This implementation permutes a tensor of shape C / W / H into another tensor of shape
+    // W / H / C
+    //
+    //     Original | Permuted
+    // 0 | C        | W (from 1)
+    // 1 | W        | H (from 2)
+    // 2 | H        | C (from 0)
+    //
+    pv = arm_compute::PermutationVector{1, 2, 0};
+  }
 
   if (utils::isGpuMode())
   {
-    _cl_permute.configure(CAST_CL(input), &_cl_permuted, pv);
-    _cl_reshape.configure(&_cl_permuted, CAST_CL(output));
+    if (shouldPermute(input->info(), output->info()))
+    {
+      _cl_permute.configure(CAST_CL(input), &_cl_permuted, pv);
+      _cl_reshape.configure(&_cl_permuted, CAST_CL(output));
 
-    // NOTE _permuted is inaccessible from outside, and thus it is safe to invoke allocate here.
-    _cl_permuted.allocator()->allocate();
+      // NOTE _permuted is inaccessible from outside, and thus it is safe to invoke allocate here.
+      _cl_permuted.allocator()->allocate();
+    }
+    else
+    {
+      _cl_reshape.configure(CAST_CL(input), CAST_CL(output));
+    }
   }
   else
   {
-    _neon_permute.configure(input, &_neon_permuted, pv);
-    _neon_reshape.configure(&_neon_permuted, output);
+    if (shouldPermute(input->info(), output->info()))
+    {
+      _neon_permute.configure(input, &_neon_permuted, pv);
+      _neon_reshape.configure(&_neon_permuted, output);
 
-    // NOTE _permuted is inaccessible from outside, and thus it is safe to invoke allocate here.
-    _neon_permuted.allocator()->allocate();
+      // NOTE _permuted is inaccessible from outside, and thus it is safe to invoke allocate here.
+      _neon_permuted.allocator()->allocate();
+    }
+    else
+    {
+      _neon_reshape.configure(input, output);
+    }
   }
 }
 
@@ -59,12 +104,18 @@ void GenericReshapeLayer::run(void)
 {
   if (utils::isGpuMode())
   {
-    _cl_permute.run();
+    if (shouldPermute(_input->info(), _output->info()))
+    {
+      _cl_permute.run();
+    }
     _cl_reshape.run();
   }
   else
   {
-    _neon_permute.run();
+    if (shouldPermute(_input->info(), _output->info()))
+    {
+      _neon_permute.run();
+    }
     _neon_reshape.run();
   }
 }
diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc
index b0da882..8f45816 100644
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -2141,7 +2141,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
     assert(feature_size == batch_size * input_size);
 
     _builder.addShapeConstr(input_index,
-                            asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+                            asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
                                          _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
                                          _ctx.at(input_index).zeroPoint()));
 
@@ -2279,8 +2279,8 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node)
   const ::internal::tflite::operand::Index output_index{node.param().output_index};
   const ::internal::tflite::operand::Index input_index{node.param().input_index};
 
-  auto input_shape = asTensorShape(_ctx.at(input_index).shape());
-  auto output_shape = asTensorShape(_ctx.at(output_index).shape());
+  auto input_shape = asTensorShape(_ctx.at(input_index).shape(), false);
+  auto output_shape = asTensorShape(_ctx.at(output_index).shape(), false);
 
   assert(input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3] ==
          output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3]);
-- 
2.7.4