From 9e4e486d709ac6679e2049bd82623063005250b6 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EB=82=A8=EA=B6=81=EC=84=9D/On-Device=20Lab=28SR=29/Enginee?=
 =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= <sk.namkoong@samsung.com>
Date: Fri, 20 Sep 2019 16:43:15 +0900
Subject: [PATCH] [locomotiv] Introduce TensorReduce Operation (#7643)

* [locomotiv] Introduce TensorReduce Operation

This commit will introduce `TensorReduce` operation in `locomotiv`
For now, only supports `ReduceFunc::Mean`

Signed-off-by: Seok NamKoong <sk.namkoong@samsung.com>

* restructure
---
 compiler/locomotiv/src/Node.lst                   |   1 +
 compiler/locomotiv/src/Node/TensorReduce.cpp      | 153 ++++++++++++++++++++++
 compiler/locomotiv/src/Node/TensorReduce.test.cpp | 104 +++++++++++++++
 3 files changed, 258 insertions(+)
 create mode 100644 compiler/locomotiv/src/Node/TensorReduce.cpp
 create mode 100644 compiler/locomotiv/src/Node/TensorReduce.test.cpp
diff --git a/compiler/locomotiv/src/Node.lst b/compiler/locomotiv/src/Node.lst
index 3427a70..35aef1c 100644
--- a/compiler/locomotiv/src/Node.lst
+++ b/compiler/locomotiv/src/Node.lst
@@ -33,5 +33,6 @@ NODE(Reshape<loco::ReshapeType::Fixed>)
 NODE(Tanh)
 NODE(TensorBroadcast)
 NODE(TensorConcat)
+NODE(TensorReduce)
 NODE(TensorSoftmax)
 NODE(TransposedConv2D)
diff --git a/compiler/locomotiv/src/Node/TensorReduce.cpp b/compiler/locomotiv/src/Node/TensorReduce.cpp
new file mode 100644
index 0000000..fae7a75
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorReduce.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Buffer;
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+Index reduced_index(const Index &index, const loco::TensorAxisSet &axes)
+{
+  Index r_index;
+
+  r_index.resize(index.rank());
+  for (uint32_t i = 0; i < index.rank(); ++i)
+    r_index.at(i) = (axes.defined(i)) ? 0 : index.at(i);
+
+  return r_index;
+}
+
+Shape reduced_shape(const Shape &shape, const loco::TensorAxisSet &axes)
+{
+  Shape r_shape;
+
+  r_shape.resize(shape.rank());
+  for (uint32_t i = 0; i < shape.rank(); ++i)
+    r_shape.dim(i) = (axes.defined(i)) ? 1 : shape.dim(i);
+
+  return r_shape;
+}
+
+} // namespace
+
+namespace
+{
+
+template <typename T, loco::ReduceFunc F> struct ReduceFunction
+{
+  static void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorAxisSet &axes)
+  {
+    throw std::runtime_error("Not supported ReduceFunc type");
+  }
+};
+
+template <typename T> struct ReduceFunction<T, loco::ReduceFunc::Mean>
+{
+  static void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorAxisSet &axes)
+  {
+    for (IndexEnumerator e{rhs.shape()}; e.valid(); e.advance())
+    {
+      const auto &index = e.current();
+      const auto r_index = reduced_index(index, axes);
+
+      lhs.at(r_index) += rhs.at(index);
+    }
+
+    uint32_t r_cnt = 1;
+    for (uint32_t i = 0; i < rhs.shape().rank(); ++i)
+      if (axes.defined(i))
+        r_cnt *= rhs.shape().dim(i);
+
+    for (IndexEnumerator e{lhs.shape()}; e.valid(); e.advance())
+    {
+      const auto &index = e.current();
+      lhs.at(index) /= static_cast<T>(r_cnt);
+    }
+  }
+};
+
+template <typename T>
+void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorReduce &node)
+{
+  switch (node.func())
+  {
+    case loco::ReduceFunc::Mean:
+      ReduceFunction<T, loco::ReduceFunc::Mean>::apply(lhs, rhs, *node.axes());
+      break;
+
+    // TODO Support more ReduceFunc type
+    default:
+      break;
+  }
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorReduce *node)
+{
+  auto input_data = annot_data(node->input());
+  auto input_shape = input_data->shape();
+
+  validate(input_data, "Input not ready");
+  validate(annot_domain(node->input()) == loco::Domain::Tensor,
+           "Input domain of TensorReduce is not Tensor");
+
+  std::unique_ptr<NodeData> reduce_data = nullptr;
+  Shape r_shape = reduced_shape(*input_shape, *node->axes());
+  switch (input_data->dtype())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      auto input_bufptr = input_data->as_f32_bufptr();
+      auto reduce_buf = make_buffer<float, LexicalLayout>(r_shape);
+
+      apply(reduce_buf, *input_bufptr, *node);
+
+      reduce_data = make_data(reduce_buf);
+      break;
+    }
+    default:
+      throw std::runtime_error("NYI for this DataType");
+  }
+
+  assert(reduce_data != nullptr);
+  annot_data(node, std::move(reduce_data));
+  annot_domain(node, annot_domain(node->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorReduce.test.cpp b/compiler/locomotiv/src/Node/TensorReduce.test.cpp
new file mode 100644
index 0000000..68398ca
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorReduce.test.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Fixed_Reduce_Mean, f32_0)
+{
+  // Make pull-TensorReduce(Mean) graph
+  auto g = loco::make_graph();
+  auto pull_input = g->nodes()->create<loco::Pull>();
+  pull_input->dtype(loco::DataType::FLOAT32);
+  pull_input->shape({1, 2, 2});
+  auto reduce_node = g->nodes()->create<loco::TensorReduce>();
+  reduce_node->input(pull_input);
+  reduce_node->axes()->insert(0);
+  reduce_node->axes()->insert(1);
+  reduce_node->func(loco::ReduceFunc::Mean);
+
+  // Make and assign data to pull node
+  auto pull_input_buf = make_buffer<float, LexicalLayout>({1, 2, 2});
+  pull_input_buf.at(Index{0, 0, 0}) = 1.1f;
+  pull_input_buf.at(Index{0, 0, 1}) = 2.2f;
+  pull_input_buf.at(Index{0, 1, 0}) = 5.5f;
+  pull_input_buf.at(Index{0, 1, 1}) = 6.6f;
+  auto pull_input_data = locomotiv::make_data(pull_input_buf);
+  locomotiv::annot_data(pull_input, std::move(pull_input_data));
+  locomotiv::annot_domain(pull_input, loco::Domain::Tensor);
+
+  locomotiv::NodeExecution::get().run(reduce_node);
+
+  auto kShape = Shape{1, 1, 2};
+  auto reduce_data = locomotiv::annot_data(reduce_node);
+  ASSERT_NE(reduce_data, nullptr);
+  ASSERT_EQ(reduce_data->dtype(), loco::DataType::FLOAT32);
+  ASSERT_EQ(*(reduce_data->shape()), kShape);
+  ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 0}), 3.3f);
+  ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 1}), 4.4f);
+
+  ASSERT_EQ(locomotiv::annot_domain(reduce_node), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Fixed_Reduce_Mean, f32_1)
+{
+  // Make pull-TensorReduce(Mean) graph
+  auto g = loco::make_graph();
+  auto pull_input = g->nodes()->create<loco::Pull>();
+  pull_input->dtype(loco::DataType::FLOAT32);
+  pull_input->shape({1, 2, 2});
+  auto reduce_node = g->nodes()->create<loco::TensorReduce>();
+  reduce_node->input(pull_input);
+  reduce_node->axes()->insert(1);
+  reduce_node->axes()->insert(2);
+  reduce_node->func(loco::ReduceFunc::Mean);
+
+  // Make and assign data to pull node
+  auto pull_input_buf = make_buffer<float, LexicalLayout>({1, 2, 2});
+  pull_input_buf.at(Index{0, 0, 0}) = 1.1f;
+  pull_input_buf.at(Index{0, 0, 1}) = 2.2f;
+  pull_input_buf.at(Index{0, 1, 0}) = 5.5f;
+  pull_input_buf.at(Index{0, 1, 1}) = 6.6f;
+  auto pull_input_data = locomotiv::make_data(pull_input_buf);
+  locomotiv::annot_data(pull_input, std::move(pull_input_data));
+  locomotiv::annot_domain(pull_input, loco::Domain::Tensor);
+
+  locomotiv::NodeExecution::get().run(reduce_node);
+
+  auto kShape = Shape{1, 1, 1};
+  auto reduce_data = locomotiv::annot_data(reduce_node);
+  ASSERT_NE(reduce_data, nullptr);
+  ASSERT_EQ(reduce_data->dtype(), loco::DataType::FLOAT32);
+  ASSERT_EQ(*(reduce_data->shape()), kShape);
+  ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 0}), 3.85f);
+
+  ASSERT_EQ(locomotiv::annot_domain(reduce_node), loco::Domain::Tensor);
+}
-- 
2.7.4