From: 장지섭/On-Device Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>
Date: Tue, 17 Sep 2019 08:23:58 +0000 (+0900)
Subject: Make to support ArgMax op for acl neon (#7515)
X-Git-Tag: accepted/tizen/unified/20190918.102349~19
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4d4f85e1561eaf7dfd9f4126e24b85af24168d55;p=platform%2Fcore%2Fml%2Fnnfw.git

Make to support ArgMax op for acl neon (#7515)

This commit makes to support ArgMax op for acl neon except int32 type.
  - Introduce NEArgMinMax layer
  - Apply NEArgMinMax layer for neurun

Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
---

diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
index 6eb0830..fb5323d 100644
--- a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -16,6 +16,7 @@
 #ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
 #define __ARM_COMPUTE_NEFUNCTIONSEX_H__
 
+#include <arm_compute/runtime/NEON/functions/NEArgMinMax.h>
 #include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEPReLU.h>
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h
new file mode 100644
index 0000000..604cd93
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
+#define __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform reduce min/max operation */
+template <ReductionOperation op> class NEArgMinMaxStatic : public IFunction
+{
+public:
+  /** Constructor */
+  NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+  /** Configure kernel
+   *
+   * @note Supported tensor rank: up to 4
+   *
+   * @param[in]  input          Source tensor. Data type supported: QASYMM8/F16/F32
+   * @param[in]  axis           Reduction axis.
+   * @param[out] output         Destination tensor. Data type supported: Same as @p input
+   */
+  void configure(ITensor *input, int axis, ITensor *output);
+
+  /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMax
+   *
+   * @param[in] input          Source tensor. Data type supported: QASYMM8/F16/F32
+   * @param[in] axis Reduction axis.
+   * @param[in] output         Destination tensor. Data type supported: Same as @p input
+   *
+   * @return A status
+   */
+  static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output);
+
+  // Inherited methods overridden:
+  void run() override;
+
+private:
+  MemoryGroup _memory_group;
+  NEArgMinMaxLayer _reduction_kernel;
+  Tensor _reduced_out;
+  NEReshapeLayer _reshape;
+};
+
+/** Basic function to run arg max. */
+using NEArgMax = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
+/** Basic function to run arg min. */
+using NEArgMin = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp
new file mode 100644
index 0000000..5ba465b
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEArgMinMax.h"
+
+#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+namespace arm_compute
+{
+
+template <ReductionOperation OP>
+NEArgMinMaxStatic<OP>::NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_group(std::move(memory_manager)), _reduction_kernel(), _reduced_out(), _reshape()
+{
+}
+
+template <ReductionOperation OP>
+Status NEArgMinMaxStatic<OP>::validate(const ITensorInfo *input, int axis,
+                                       const ITensorInfo *output)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+  ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
+                                                       DataType::F32);
+
+  TensorShape out_shape = input->tensor_shape();
+  const int input_dims = input->num_dimensions();
+  int axis_local = axis;
+
+  // Convert negative axis
+  axis_local = wrap_around(axis_local, input_dims);
+
+  ARM_COMPUTE_RETURN_ERROR_ON(axis_local > 3);
+  ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local) > input->num_dimensions() - 1);
+  out_shape.remove_dimension(axis_local);
+
+  const TensorInfo out_info = output->clone()->set_tensor_shape(out_shape);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
+
+  return Status{};
+}
+
+template <ReductionOperation OP>
+void NEArgMinMaxStatic<OP>::configure(ITensor *input, int axis, ITensor *output)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+
+  int axis_local = axis;
+  const int input_dims = input->info()->num_dimensions();
+
+  // Convert negative axis
+  axis_local = wrap_around(axis_local, input_dims);
+
+  // Perform reduction for axis
+  TensorShape intermediate_shape = input->info()->tensor_shape();
+  intermediate_shape.set(axis_local, 1);
+  auto in = input;
+
+  _reduced_out.allocator()->init(TensorInfo(intermediate_shape, output->info()->num_channels(),
+                                            output->info()->data_type(),
+                                            output->info()->quantization_info()));
+  _memory_group.manage(&_reduced_out);
+  _reduction_kernel.configure(in, axis_local, &_reduced_out, OP);
+
+  // Allocate intermediate tensor
+  _reduced_out.allocator()->allocate();
+
+  // Configure reshape layer if we want to drop the dimensions
+  TensorShape out_shape = input->info()->tensor_shape();
+  out_shape.remove_dimension(axis_local);
+  auto_init_if_empty(*output->info(), output->info()->clone()->set_tensor_shape(out_shape));
+  _reshape.configure(&_reduced_out, output);
+}
+
+template <ReductionOperation OP> void NEArgMinMaxStatic<OP>::run()
+{
+  MemoryGroupResourceScope scope_mg(_memory_group);
+
+  _reduction_kernel.run();
+  _reshape.run();
+}
+
+// Supported Specializations
+template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
+template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
+} // namespace arm_compute
diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
index 32222d1..0293b83 100644
--- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
+++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
@@ -181,6 +181,51 @@ void KernelGenerator::visit(const model::operation::AbsNode &node)
   _execution_builder->append(std::move(acl_fn));
 }
 
+void KernelGenerator::visit(const model::operation::ArgMaxNode &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(model::operation::ArgMaxNode::Input::INPUT)};
+  const auto axis_index{node.param().axis_index};
+
+  auto ifm_shape = _ctx.at(ifm_index).shape();
+  auto ofm_shape = _ctx.at(ofm_index).shape();
+  auto axis_shape = _ctx.at(axis_index).shape();
+
+  assert(_ctx.at(axis_index).isConstant());
+  // Axis rank is always 1.
+  assert(axis_shape.rank() == 1);
+
+  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  const auto ifm_rank = ifm_shape.rank();
+  auto frontend_layout = _current_subg_layout;
+  auto backend_layout = ifm_alloc->layout();
+  int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+  if (axis_value < 0)
+  {
+    axis_value += ifm_rank;
+  }
+  assert(axis_value >= 0 && axis_value < ifm_rank);
+  const auto fixed_axis =
+      acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+
+  // auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>();
+  auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMax>();
+
+  // NOTE
+  // if (ofm_alloc->info()->data_type() == arm_compute::DataType::S32)
+  //{
+  ofm_alloc->info()->set_data_type(arm_compute::DataType::U32);
+  //}
+  fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle());
+  // fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
+  // arm_compute::ReductionOperation::ARG_IDX_MAX);
+
+  auto acl_fn = asAclFunction(std::move(fn));
+
+  _execution_builder->append(std::move(acl_fn));
+}
+
 void KernelGenerator::visit(const model::operation::Conv2DNode &node)
 {
   using model::operation::Conv2DNode;
diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.h b/runtimes/neurun/backend/acl_neon/KernelGenerator.h
index 7b93c4f..28ef565 100644
--- a/runtimes/neurun/backend/acl_neon/KernelGenerator.h
+++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.h
@@ -37,6 +37,7 @@ public:
 
   void visit(const model::Subgraph &) override;
   void visit(const model::operation::AbsNode &) override;
+  void visit(const model::operation::ArgMaxNode &) override;
   void visit(const model::operation::Conv2DNode &) override;
   void visit(const model::operation::DepthwiseConv2DNode &) override;
   void visit(const model::operation::MaxPool2DNode &) override;
diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
index 1fdb5db..f78b566 100644
--- a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
+++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
@@ -61,6 +61,8 @@ ShapeFixer::ShapeFixer(const neurun::model::Operands &ctx,
 
 void ShapeFixer::visit(const model::operation::AbsNode &) { /* DO NOTHING */}
 
+void ShapeFixer::visit(const model::operation::ArgMaxNode &) { /* DO NOTHING */}
+
 void ShapeFixer::visit(const model::operation::Conv2DNode &) { /* DO NOTHING */}
 
 void ShapeFixer::visit(const model::operation::DepthwiseConv2DNode &) { /* DO NOTHING */}
diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.h b/runtimes/neurun/backend/acl_neon/ShapeFixer.h
index f5c6721..796ea39 100644
--- a/runtimes/neurun/backend/acl_neon/ShapeFixer.h
+++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.h
@@ -38,6 +38,7 @@ public:
   std::shared_ptr<ITensorBuilder> tensor_builder() override { return _tensor_builder; }
 
   void visit(const model::operation::AbsNode &) override;
+  void visit(const model::operation::ArgMaxNode &) override;
   void visit(const model::operation::Conv2DNode &) override;
   void visit(const model::operation::DepthwiseConv2DNode &) override;
   void visit(const model::operation::MaxPool2DNode &) override;
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
index cc93f97..255ef0b 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
@@ -3,7 +3,6 @@
 #
 # Not support operations
 TrivialTest.BroadcastMulTwo
-GeneratedTests.argmax*
 GeneratedTests.depth_to_space*
 GeneratedTests.dequantize
 GeneratedTests.embedding_lookup*
@@ -28,3 +27,6 @@ GeneratedTests.exp_ex_1D_float
 GeneratedTests.exp_ex_2D_float
 # Unsupported optional input that has shape
 GeneratedTests.lstm2*
+# Unsupported data type
+GeneratedTests.argmax_ex_int32
+GeneratedTests.argmax_ex_neg_axis_int32