Make layout of frontend and backend to be the same for Gather, Reshape and FC (#7711)
author장지섭/On-Device Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>
Wed, 25 Sep 2019 21:51:40 +0000 (06:51 +0900)
committer이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>
Wed, 25 Sep 2019 21:51:40 +0000 (06:51 +0900)
This commit makes layout of frontend and backend to be the same for Gather, Reshape and FC.

Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
12 files changed:
runtimes/neurun/backend/acl_cl/KernelGenerator.cc
runtimes/neurun/backend/acl_cl/ShapeFixer.cc
runtimes/neurun/backend/acl_neon/KernelGenerator.cc
runtimes/neurun/core/include/graph/Graph.h
runtimes/neurun/core/include/model/Subgraph.h
runtimes/neurun/core/include/model/Subgraphs.h
runtimes/neurun/core/src/graph/Graph.cc
runtimes/neurun/core/src/graph/pass/PermutationOperationPass.cc
runtimes/neurun/core/src/graph/pass/PermutationOperationPass.h
runtimes/neurun/core/src/model/Subgraph.cc
runtimes/neurun/core/src/model/Subgraphs.cc
tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt

index bca0c5b..efefc32 100644 (file)
@@ -18,8 +18,6 @@
 
 #include <arm_compute/runtime/CL/CLFunctions.h>   // Include all ARM Compute CL functions
 #include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
-#include <arm_compute/runtime/misc/functions/GenericGather.h>
-#include <arm_compute/runtime/misc/functions/GenericReshapeLayer.h>
 
 #include <AclFunction.h>
 #include <Convert.h>
@@ -459,10 +457,11 @@ void KernelGenerator::visit(const model::operation::FullyConnectedNode &node)
   const auto activation = node.param().activation;
 
   auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
-  auto weight_alloc = _tensor_builder->at(weight_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
-  auto acl_layout = output_alloc->handle()->info()->data_layout();
+  const auto input_alloc = _tensor_builder->at(input_index).get();
+  const auto weight_alloc = _tensor_builder->at(weight_index).get();
+  const auto bias_alloc = _tensor_builder->at(bias_index).get();
+  const auto frontend_layout = _current_subg_layout;
+  const auto acl_layout = output_alloc->handle()->info()->data_layout();
 
   auto fn = nnfw::cpp14::make_unique<arm_compute::CLFullyConnectedReshapingLayer>(
       _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
@@ -470,10 +469,8 @@ void KernelGenerator::visit(const model::operation::FullyConnectedNode &node)
   fn->configure(
       input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
       needs_reshape,
-      ::neurun::backend::acl_common::asTensorShape(/* TODO Support NCHW frontend */
-                                                   reshape, model::Layout::NHWC,
-                                                   ::neurun::backend::acl_common::asRuntimeLayout(
-                                                       acl_layout)));
+      ::neurun::backend::acl_common::asTensorShape(
+          reshape, frontend_layout, ::neurun::backend::acl_common::asRuntimeLayout(acl_layout)));
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -554,9 +551,15 @@ void KernelGenerator::visit(const model::operation::ReshapeNode &node)
   auto input_alloc = _tensor_builder->at(input_index).get();
 
   // NOTE This operation must not be changed the layout from frontend to backend
-  //      However, this runtime can be change the layout of this operation from NHWC to NCHW now
-  // TODO Change the layout of frontend and backend to be the same and layer to CLReshapeLayer
-  auto fn = nnfw::cpp14::make_unique<::arm_compute::misc::GenericReshapeLayer>();
+  //      So, PermutationOperationPass makes layouts of frontend and backend the same.
+  const auto frontend_layout = _current_subg_layout;
+  const auto backend_layout = output_alloc->layout();
+  assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
+         frontend_layout == backend_layout);
+  UNUSED_RELEASE(frontend_layout);
+  UNUSED_RELEASE(backend_layout);
+
+  auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReshapeLayer>();
 
   fn->configure(input_alloc->handle(), output_alloc->handle());
 
@@ -1647,15 +1650,12 @@ void KernelGenerator::visit(const model::operation::GatherNode &node)
   const auto ifm_shape = _ctx.at(ifm_index).shape();
 
   const auto axis_value = static_cast<int>(_ctx.at(axis_index).asScalar<int32_t>());
-  // Converting in reverse order
   const int axis =
       ::neurun::backend::acl_common::ToARMComputeAxis(ifm_shape.rank(), axis_value).value();
 
   auto ofm_alloc = _tensor_builder->at(ofm_index).get();
   auto ifm_alloc = _tensor_builder->at(ifm_index).get();
   auto indices_alloc = _tensor_builder->at(indices_index).get();
-  auto acl_layout = ofm_alloc->handle()->info()->data_layout();
-  UNUSED_RELEASE(acl_layout);
 
   // NOTE The frontend layout and backend layout must be the same for this operation.
   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1665,17 +1665,13 @@ void KernelGenerator::visit(const model::operation::GatherNode &node)
   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
-  // TODO Remove this workaround
-  //      It is a workaround how to set the layout of these backend tensors to the layout of the
-  //      frontend when creating them
-  // TODO Supports front-end in NCHW
-  // TODO Change the layout of frontend and backend to be the same
-  // assert(::arm_compute::DataLayout::NHWC == acl_layout);
-  assert(acl_layout == ifm_alloc->handle()->info()->data_layout());
-  assert(acl_layout == indices_alloc->handle()->info()->data_layout());
-
-  // TODO Change to CLGather
-  auto fn = nnfw::cpp14::make_unique<::arm_compute::misc::GenericGather>();
+  const auto backend_layout = ofm_alloc->layout();
+  UNUSED_RELEASE(backend_layout);
+  assert(backend_layout == ifm_alloc->layout());
+  assert(backend_layout == indices_alloc->layout());
+  assert(ifm_shape.rank() < 4 || _current_subg_layout == backend_layout);
+
+  auto fn = nnfw::cpp14::make_unique<::arm_compute::CLGatherEx>();
 
   fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
 
index 4f597dc..8f13e37 100644 (file)
@@ -18,8 +18,6 @@
 
 #include <arm_compute/runtime/CL/CLFunctions.h>   // Include all ARM Compute CL functions
 #include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
-#include <arm_compute/runtime/misc/functions/GenericGather.h>
-#include <arm_compute/runtime/misc/functions/GenericReshapeLayer.h>
 
 #include <AclFunction.h>
 #include <Convert.h>
index 56eaba2..001a58d 100644 (file)
@@ -648,10 +648,11 @@ void KernelGenerator::visit(const model::operation::FullyConnectedNode &node)
   const auto activation = node.param().activation;
 
   auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
-  auto weight_alloc = _tensor_builder->at(weight_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
-  auto acl_layout = output_alloc->handle()->info()->data_layout();
+  const auto input_alloc = _tensor_builder->at(input_index).get();
+  const auto weight_alloc = _tensor_builder->at(weight_index).get();
+  const auto bias_alloc = _tensor_builder->at(bias_index).get();
+  const auto frontend_layout = _current_subg_layout;
+  const auto acl_layout = output_alloc->handle()->info()->data_layout();
 
   auto fn = nnfw::cpp14::make_unique<arm_compute::NEFullyConnectedReshapingLayer>(
       _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
@@ -659,10 +660,8 @@ void KernelGenerator::visit(const model::operation::FullyConnectedNode &node)
   fn->configure(
       input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
       needs_reshape,
-      ::neurun::backend::acl_common::asTensorShape(/* TODO Support NCHW frontend */
-                                                   reshape, model::Layout::NHWC,
-                                                   ::neurun::backend::acl_common::asRuntimeLayout(
-                                                       acl_layout)));
+      ::neurun::backend::acl_common::asTensorShape(
+          reshape, frontend_layout, ::neurun::backend::acl_common::asRuntimeLayout(acl_layout)));
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1438,6 +1437,15 @@ void KernelGenerator::visit(const model::operation::ReshapeNode &node)
   auto output_alloc = _tensor_builder->at(output_index).get();
   auto input_alloc = _tensor_builder->at(input_index).get();
 
+  // NOTE This operation must not be changed the layout from frontend to backend
+  //      So, PermutationOperationPass makes layouts of frontend and backend the same.
+  const auto frontend_layout = _current_subg_layout;
+  const auto backend_layout = output_alloc->layout();
+  assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
+         frontend_layout == backend_layout);
+  UNUSED_RELEASE(frontend_layout);
+  UNUSED_RELEASE(backend_layout);
+
   auto fn = nnfw::cpp14::make_unique<arm_compute::NEReshapeLayer>();
 
   fn->configure(input_alloc->handle(), output_alloc->handle());
index cb1ffb5..2248388 100644 (file)
@@ -179,6 +179,7 @@ public:
   const operation::LowerInfo *getLowerInfo(const model::SubgraphIndex &subg_index) const;
   void setLowerInfo(const model::SubgraphIndex &subg_index,
                     std::unique_ptr<operation::LowerInfo> &&lower_info);
+  void removeLowerInfo(const model::SubgraphIndex &subg_index);
   const operand::LowerInfo *getLowerInfo(const model::OperandIndex &index) const;
   operand::LowerInfo *getLowerInfo(const model::OperandIndex &index);
   void setLowerInfo(const model::OperandIndex &index,
index 70abf6a..2fbf7b3 100644 (file)
@@ -71,8 +71,18 @@ public:
   std::string getStr(void) const;
 
 public:
+  void remove(const OperationIndex &index);
+
+public:
   Layout getLayout() const { return _layout; }
 
+public:
+  std::vector<Element>::const_iterator begin() const { return _operations.begin(); }
+  std::vector<Element>::const_iterator end() const { return _operations.end(); }
+
+private:
+  bool exist(const neurun::model::OperationIndex &index) const;
+
 private:
   std::vector<Element> _operations;
 
index 13bc549..6946649 100644 (file)
@@ -70,6 +70,12 @@ public:
    * @param msg Message that will be displayed
    */
   void dump(const std::string &msg) const;
+  /**
+   * @brief Remove an operation from Subgraph
+   *
+   * @param operation_index Operation index to be removed
+   */
+  void removeFromSubgraph(const OperationIndex &operation_index);
 
 private:
   SubgraphIndex findOperation(const OperationIndex &operation_index) const;
index 4264b1a..76f6073 100644 (file)
@@ -31,6 +31,7 @@
 #include "backend/IConfig.h"
 #include "pass/PermutationInsertionPass.h"
 #include "pass/PermutationEliminationPass.h"
+#include "pass/PermutationOperationPass.h"
 
 namespace neurun
 {
@@ -417,8 +418,11 @@ void Graph::lower(void)
     });
   }
 
-  // Run PermutationInsertionPass
+  // Run Permutation Passes
   {
+    pass::PermutationOperationPass po_pass(*this);
+    po_pass.run();
+
     pass::PermutationInsertionPass pi_pass(*this);
     pi_pass.run();
     // Implemented code no longer works.
@@ -482,6 +486,20 @@ void Graph::setLowerInfo(const model::SubgraphIndex &subg_index,
   _lower_info_map->operation.insert(std::make_pair(subg_index, std::move(lower_info)));
 }
 
+void Graph::removeLowerInfo(const model::SubgraphIndex &subg_index)
+{
+  auto &subg_lower_info = _lower_info_map->operation;
+  assert(subg_lower_info.find(subg_index) != subg_lower_info.end());
+  for (auto it = subg_lower_info.begin(); it != subg_lower_info.end(); ++it)
+  {
+    if (it->first == subg_index)
+    {
+      subg_lower_info.erase(it);
+      break;
+    }
+  }
+}
+
 const operand::LowerInfo *Graph::getLowerInfo(const model::OperandIndex &index) const
 {
   if (!_lower_info_map)
index 2050f57..b4cdf62 100644 (file)
 
 #include "PermutationOperationPass.h"
 
+#include "backend/Backend.h"
+#include "backend/IConfig.h"
+#include "graph/Graph.h"
+
 namespace neurun
 {
 namespace graph
@@ -23,19 +27,197 @@ namespace graph
 namespace pass
 {
 
-void PermutationOperationPass::visit(const model::operation::FullyConnectedNode &)
+void PermutationOperationPass::changeToKeepLayout(const model::Operation &node)
 {
-  // TODO Implement
+  const auto &output_ind = node.getOutputs().at(0);
+  const auto &output_obj = _graph.operands().at(output_ind);
+
+  assert(output_obj.getDef().size() == 1);
+  const auto &node_index = output_obj.getDef().list().front();
+  const auto &subg_index = _graph.subgraphs().getOperation(node_index);
+
+  const auto frontend_layout = _graph.subgraphs().at(subg_index).getLayout();
+  const auto backend_layout = _graph.getLowerInfo(subg_index)->layout();
+
+  if (frontend_layout == backend_layout)
+  {
+    return;
+  }
+
+  // CPU supports only NHWC now
+  if (_graph.getLowerInfo(subg_index)->backend()->config()->id() != "cpu")
+  {
+    // TODO Change backend of this node
+    assert(frontend_layout == model::Layout::NHWC || backend_layout == model::Layout::UNKNOWN);
+  }
+
+  // Divide subgraph based on target operation
+  {
+    auto &above_subg = _graph.subgraphs().at(subg_index);
+
+    // Create new subgraph and move information from existing subgraph to new subgraph if target
+    // node is the end of subgraph
+    auto it = above_subg.begin();
+    // Find iterator of target node in subgraph
+    while ((it++)->index != node_index)
+      ;
+    if (it != above_subg.end())
+    {
+      const auto &below_subg_index =
+          _graph.subgraphs().emplace(it->index, *it->node, above_subg.getLayout());
+      auto &below_subg = _graph.subgraphs().at(below_subg_index);
+      below_subg.setInputs(it->node->getInputs());
+      below_subg.setOutputs(it->node->getOutputs());
+
+      std::vector<model::OperationIndex> remove_list;
+      remove_list.emplace_back(it->index);
+      while (++it != above_subg.end())
+      {
+        below_subg.appendOperation(it->index, *it->node);
+        below_subg.setOutputs(it->node->getOutputs());
+        remove_list.emplace_back(it->index);
+      }
+
+      above_subg.setOutputs(node.getOutputs());
+      for (const auto &index : remove_list)
+      {
+        above_subg.remove(index);
+      }
+
+      const auto subg_li = _graph.getLowerInfo(subg_index);
+      _graph.setLowerInfo(below_subg_index, nnfw::cpp14::make_unique<graph::operation::LowerInfo>(
+                                                subg_li->backend(), subg_li->layout()));
+    }
+  }
+
+  // Remove target operation from subgraph and insert the target operation to new subgraph
+  {
+    const auto backend = _graph.getLowerInfo(subg_index)->backend();
+
+    // Remove target operation from subraph
+    _graph.subgraphs().removeFromSubgraph(node_index);
+
+    if (!_graph.subgraphs().exist(subg_index))
+    {
+      // Remove lowerinfo for subgraph of target operation if the subgraph does not exist
+      _graph.removeLowerInfo(subg_index);
+    }
+    else
+    {
+      // Update subgraph of target operation if the subgraph exists
+      auto &above_subg = _graph.subgraphs().at(subg_index);
+      const auto last_node = (--above_subg.end())->node;
+      above_subg.setOutputs(last_node->getOutputs());
+    }
+
+    // Create new subgraph and set information to the subgraph
+    auto new_subg_index = _graph.subgraphs().emplace(node_index, node, frontend_layout);
+    auto &new_subg = _graph.subgraphs().at(new_subg_index);
+    new_subg.setInputs(node.getInputs());
+    new_subg.setOutputs(node.getOutputs());
+    _graph.setLowerInfo(new_subg_index, nnfw::cpp14::make_unique<graph::operation::LowerInfo>(
+                                            backend, frontend_layout));
+  }
+
+  // Change PermuteFactors of operands of target node
+  {
+    const auto &subg_index = _graph.subgraphs().getOperation(node_index);
+    const auto subg_li = _graph.getLowerInfo(subg_index);
+    const auto backend = subg_li->backend();
+    const operand::PermuteFactor removed_factor{backend, backend_layout};
+    const operand::PermuteFactor new_factor{backend, frontend_layout};
+    for (const auto &input : node.getInputs())
+    {
+      bool canRemove = true;
+      for (const auto &use : _graph.operands().at(input).getUses().list())
+      {
+        if (use != node_index)
+        {
+          const auto &use_subg_index = _graph.subgraphs().getOperation(use);
+          auto use_subg_li = _graph.getLowerInfo(use_subg_index);
+          if (use_subg_li->backend() == backend && use_subg_li->layout() == backend_layout)
+          {
+            canRemove = false;
+            break;
+          }
+        }
+      }
+
+      auto lower_info = _graph.getLowerInfo(input);
+      if (canRemove)
+      {
+        lower_info->removeUsePermuteFactor(removed_factor);
+      }
+      lower_info->addUsePermuteFactor(new_factor);
+
+      // Whether if node's input is an input of model or a constant
+      if (_graph.operands().at(input).getDef().size() == 0)
+      {
+        assert(_graph.getInputs().contains(input) || _graph.operands().at(input).isConstant());
+        lower_info->removeDefPermuteFactor(removed_factor);
+        lower_info->addDefPermuteFactor(new_factor);
+      }
+    }
+
+    for (const auto &output : node.getOutputs())
+    {
+      auto lower_info = _graph.getLowerInfo(output);
+      lower_info->removeDefPermuteFactor(removed_factor);
+      lower_info->addDefPermuteFactor(new_factor);
+
+      // Whether if node's output is an output of model
+      if (_graph.operands().at(output).getUses().size() == 0)
+      {
+        assert(_graph.getOutputs().contains(output));
+        lower_info->removeUsePermuteFactor(removed_factor);
+        lower_info->addUsePermuteFactor(new_factor);
+      }
+    }
+  }
 }
 
-void PermutationOperationPass::visit(const model::operation::GatherNode &)
+void PermutationOperationPass::visit(const model::operation::FullyConnectedNode &node)
 {
-  // TODO Implement
+  const auto &input_ind = node.getInputs().at(model::operation::FullyConnectedNode::Input::INPUT);
+  const auto &input_obj = _graph.operands().at(input_ind);
+  const auto &input_shape = input_obj.shape();
+
+  if (input_shape.rank() == 4)
+  {
+    changeToKeepLayout(node);
+  }
 }
 
-void PermutationOperationPass::visit(const model::operation::ReshapeNode &)
+void PermutationOperationPass::visit(const model::operation::GatherNode &node)
 {
-  // TODO Implement
+  const auto &input_ind = node.getInputs().at(model::operation::GatherNode::Input::INPUT);
+  const auto &input_obj = _graph.operands().at(input_ind);
+  const auto &input_shape = input_obj.shape();
+
+  const auto &output_ind = node.getOutputs().at(0);
+  const auto &output_obj = _graph.operands().at(output_ind);
+  const auto &output_shape = output_obj.shape();
+
+  if (input_shape.rank() >= 4 || output_shape.rank() >= 4)
+  {
+    changeToKeepLayout(node);
+  }
+}
+
+void PermutationOperationPass::visit(const model::operation::ReshapeNode &node)
+{
+  const auto &input_ind = node.getInputs().at(model::operation::ReshapeNode::Input::INPUT);
+  const auto &input_obj = _graph.operands().at(input_ind);
+  const auto &input_shape = input_obj.shape();
+
+  const auto &output_ind = node.getOutputs().at(0);
+  const auto &output_obj = _graph.operands().at(output_ind);
+  const auto &output_shape = output_obj.shape();
+
+  if (input_shape.rank() >= 4 || output_shape.rank() >= 4)
+  {
+    changeToKeepLayout(node);
+  }
 }
 
 } // namespace pass
index 6fc0133..c5b4ba7 100644 (file)
@@ -38,6 +38,9 @@ public:
   void visit(const model::operation::FullyConnectedNode &) final;
   void visit(const model::operation::GatherNode &) final;
   void visit(const model::operation::ReshapeNode &) final;
+
+private:
+  void changeToKeepLayout(const model::Operation &);
 };
 
 } // namespace pass
index 4b84027..f283279 100644 (file)
@@ -54,5 +54,30 @@ std::string Subgraph::getStr() const
   return ss.str();
 }
 
+void Subgraph::remove(const OperationIndex &index)
+{
+  assert(exist(index));
+  for (auto it = _operations.cbegin(); it != _operations.cend(); ++it)
+  {
+    if (it->index == index)
+    {
+      _operations.erase(it);
+      break;
+    }
+  }
+}
+
+bool Subgraph::exist(const neurun::model::OperationIndex &index) const
+{
+  for (const auto &element : _operations)
+  {
+    if (element.index == index)
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
 } // namespace model
 } // namespace neurun
index 64d806d..c53eb97 100644 (file)
@@ -56,6 +56,17 @@ void Subgraphs::dump(const std::string &msg) const
   });
 }
 
+void Subgraphs::removeFromSubgraph(const OperationIndex &operation_index)
+{
+  const auto subg_index = findOperation(operation_index);
+  auto &subg = at(subg_index);
+  subg.remove(operation_index);
+  if (subg.size() == 0)
+  {
+    remove(subg_index);
+  }
+}
+
 SubgraphIndex Subgraphs::findOperation(const OperationIndex &operation_index) const
 {
   SubgraphIndex ret;
index 3500a9f..842f78c 100644 (file)
@@ -6,7 +6,7 @@ depthwise_conv_2d
 div
 embedding_lookup
 floor
-fullyconnected/fc1
+fullyconnected
 hashtable_lookup
 l2_normalization
 l2_pool_2d