Make acl_neon KernelGenerator supports NCHW frontend layout (#6551)

author 장지섭/On-Device Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>

Wed, 14 Aug 2019 00:41:41 +0000 (09:41 +0900)

committer 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>

Wed, 14 Aug 2019 00:41:40 +0000 (09:41 +0900)
author 장지섭/On-Device Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>
Wed, 14 Aug 2019 00:41:41 +0000 (09:41 +0900)
committer 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Wed, 14 Aug 2019 00:41:40 +0000 (09:41 +0900)
diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc

index 044366c..2bd41a6 100644 (file)
--- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
+++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
@@ -208,8 +208,8 @@ void KernelGenerator::visit(const model::operation::Conv2DNode &node)
    const auto ker_index{node.getInputs().at(Conv2DNode::Input::KERNEL)};
    const auto bias_index{node.getInputs().at(Conv2DNode::Input::BIAS)};
  
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
    // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
    const auto &ker_shape = _ctx.at(ker_index).shape();
    const auto ker_height = ker_shape.dim(1);
@@ -246,8 +246,8 @@ void KernelGenerator::visit(const model::operation::DepthwiseConv2DNode &node)
    const auto ker_index{node.getInputs().at(DepthwiseConv2DNode::Input::KERNEL)};
    const auto bias_index{node.getInputs().at(DepthwiseConv2DNode::Input::BIAS)};
  
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
    // Kernel format is [1, kernel_height, kernel_width, depth_out].
    const auto &ker_shape = _ctx.at(ker_index).shape();
    const auto ker_height = ker_shape.dim(1);
@@ -292,8 +292,8 @@ void KernelGenerator::visit(const model::operation::MaxPool2DNode &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(model::operation::MaxPool2DNode::Input::INPUT)};
  
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
  
    const auto kh = node.param().kh;
    const auto kw = node.param().kw;
@@ -343,9 +343,13 @@ void KernelGenerator::visit(const model::operation::MeanNode &node)
  
    const auto ifm_shape = _ctx.at(ifm_index).shape();
  
-  std::set<uint32_t> axis;
+  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  std::set<uint32_t> axes;
    {
      const auto ifm_rank = ifm_shape.rank();
+    const auto frontend_layout = _current_subg_layout;
+    const auto backend_layout = ifm_alloc->layout();
      const auto axis_shape = _ctx.at(axis_index).shape();
      switch (axis_shape.rank())
      {
@@ -356,7 +360,9 @@ void KernelGenerator::visit(const model::operation::MeanNode &node)
          {
            axis_value += ifm_rank;
          }
-        axis.insert(::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value());
+        axes.insert(::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value,
+                                                                    frontend_layout, backend_layout)
+                        .value());
          break;
        }
        case 1: // vector
@@ -374,8 +380,9 @@ void KernelGenerator::visit(const model::operation::MeanNode &node)
            {
              axis_value += ifm_rank;
            }
-          axis.insert(
-              ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value());
+          axes.insert(::neurun::backend::acl_common::ToARMComputeAxis(
+                          ifm_rank, axis_value, frontend_layout, backend_layout)
+                          .value());
          }
          break;
        }
@@ -384,29 +391,14 @@ void KernelGenerator::visit(const model::operation::MeanNode &node)
      }
    }
  
-  const auto ifm_rank = ifm_shape.rank();
-
-  bool keep_dims = _ctx.at(keep_dims_index).asScalar<int32_t>() != 0;
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
    arm_compute::Coordinates fixed_axis;
-  // TODO Support NCHW frontend
-  // TODO Change the layout of frontend and backend to be the same
-  auto acl_layout = ifm_alloc->handle()->info()->data_layout();
-  // CWHN -> WHCN
-  uint32_t permutation[4] = {2, 0, 1, 3};
-  for (auto a : axis)
+  for (auto a : axes)
    {
-    if (acl_layout == ::arm_compute::DataLayout::NCHW && ifm_rank == 4)
-    {
-      fixed_axis.set(fixed_axis.num_dimensions(), permutation[a]);
-    }
-    else
-    {
-      fixed_axis.set(fixed_axis.num_dimensions(), a);
-    }
+    fixed_axis.set(fixed_axis.num_dimensions(), a);
    }
  
+  bool keep_dims = _ctx.at(keep_dims_index).asScalar<int32_t>() != 0;
+
    std::unique_ptr<::arm_compute::IFunction> fn;
  
    // NOTE NEReduceMean has a bug that does not support NHWC layout
@@ -427,8 +419,8 @@ void KernelGenerator::visit(const model::operation::AvgPool2DNode &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(model::operation::AvgPool2DNode::Input::INPUT)};
  
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
  
    const auto kh = node.param().kh;
    const auto kw = node.param().kw;
@@ -536,7 +528,8 @@ void KernelGenerator::visit(const model::operation::FullyConnectedNode &node)
    neurun::model::Shape reshape(2);
    if (input_rank == 4)
    {
-    model::FeatureShape ifm_shape_feature = _ctx.at(input_index).shape().asFeature();
+    model::FeatureShape ifm_shape_feature =
+        _ctx.at(input_index).shape().asFeature(_current_subg_layout);
      auto feature_size =
          ifm_shape_feature.N * ifm_shape_feature.C * ifm_shape_feature.H * ifm_shape_feature.W;
author	장지섭/On-Device Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>
	Wed, 14 Aug 2019 00:41:41 +0000 (09:41 +0900)
committer	오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
	Wed, 14 Aug 2019 00:41:40 +0000 (09:41 +0900)