From 623dc9580587572577f4d1119213666a03496bcd Mon Sep 17 00:00:00 2001
From: =?utf8?q?Prasanna=20R/SNAP=20/SRI-Bangalore/Engineer/=EC=82=BC?=
 =?utf8?q?=EC=84=B1=EC=A0=84=EC=9E=90?= <prasanna.r@samsung.com>
Date: Tue, 4 Dec 2018 09:02:17 +0530
Subject: [PATCH] Add CL kernel calls for ReduceMin from PACL. (#3772)

This patch adds CL kernel calls for ReduceMin from PACL.

Signed-off-by: prasannar <prasanna.r@samsung.com>
---
 runtimes/pure_arm_compute/src/compilation.cc | 122 ++++++++++++++++++++++++++-
 1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc
index 44a4aa0..8b0457d 100644
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -2559,7 +2559,127 @@ void Planner::visit(const ::internal::tflite::op::ReduceMin::Node &node)
 {
   VERBOSE(ReduceMin) << "Configure REDUCEMIN operation" << std::endl;
 
-  throw std::runtime_error("Not supported, yet");
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+  auto ifm_shape = _ctx.at(ifm_index).shape();
+  auto ofm_shape = _ctx.at(ofm_index).shape();
+  auto axis_shape = _ctx.at(axis_index).shape();
+  assert(ifm_shape.rank() <= 4);
+  assert(ofm_shape.rank() <= ifm_shape.rank());
+  assert(_ctx.at(axis_index).hasData());
+  assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+  // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+  // supports cases reducing height and width or reducing depth.
+  // TODO We have to support all cases of dimensions up to 4.
+  // For correct permuting, we have to set output's shape to be equal in dimension position of the
+  // input. But the positions of the same dimensions in the input and output may be set differently.
+  // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+  // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+  // extend it in 4 dimensions, it should be {1,1,3,5}.
+  // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+  // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+  // next operation is not desired.
+  if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+  {
+    if (ofm_shape.rank() == 2)
+    {
+      // Reducing HW
+      assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+    }
+    else if (ofm_shape.rank() == 3)
+    {
+      // Reducing C or
+      // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+      assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+                 ifm_shape.dim(2) == ofm_shape.dim(2) ||
+             (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+              (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+              ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+    }
+  }
+
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+  std::set<uint32_t> axis;
+  {
+    const auto ifm_rank = ifm_shape.rank();
+    switch (axis_shape.rank())
+    {
+      case 0: // scalar
+      {
+        int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+        if (axis_value < 0)
+        {
+          axis_value += ifm_rank;
+        }
+        axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+        break;
+      }
+      case 1: // vector
+      {
+        const auto axis_base = _ctx.at(axis_index).data().base();
+        const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+        // If axis's data does not exist as constant values and can be gotten as input data, we have
+        // to find a way to infer output shape when sinking output.
+        assert(axis_base != nullptr);
+        for (uint32_t n = 0; n < axis_size; ++n)
+        {
+          int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+          if (axis_value < 0)
+          {
+            axis_value += ifm_rank;
+          }
+          axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+        }
+        break;
+      }
+      default:
+        throw std::runtime_error("Not supported");
+        break;
+    }
+  }
+
+  // Construct operation parameters
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+    std::set<uint32_t> axis;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+  param.axis = axis;
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::make_unique<::arm_compute::CLReduceOperation>();
+
+      fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+                    ::arm_compute::ReduceOperation::MIN);
+
+      builder.append("ReduceMin", std::move(fn));
+    }
+    else
+      throw std::runtime_error("Not supported, yet");
+  };
+
+  _builder.addStage(stage);
 }
 
 void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node)
-- 
2.7.4