From 8047298daf221d13baef1d2a63a2896f5c7f5c6a Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EC=B5=9C=EC=84=B1=EC=A7=84/=EB=8F=99=EC=9E=91=EC=A0=9C?=
 =?utf8?q?=EC=96=B4Lab=28SR=29/Principal=20Engineer/=EC=82=BC=EC=84=B1?=
 =?utf8?q?=EC=A0=84=EC=9E=90?= <lotieye.choi@samsung.com>
Date: Thu, 26 Jul 2018 10:59:25 +0900
Subject: [PATCH] Introduce ADD broadcast for inputs without initialized data
 (#1780)

This commit introduces ADD broadcast for inputs without initialized data

-In case input data is not initialized.
-related with #1765 (inputs with initialized data)

Signed-off-by: SungJin Choi <lotieye.choi@samsung.com>
---
 runtimes/pure_arm_compute/src/compilation.cc |  8 +++++++
 runtimes/pure_arm_compute/src/compilation.h  | 14 +++++++++++
 runtimes/pure_arm_compute/src/execution.cc   | 36 ++++++++++++++++++++--------
 runtimes/pure_arm_compute/src/execution.h    | 12 ++++++++++
 4 files changed, 60 insertions(+), 10 deletions(-)
diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc
index d697757..4bd219f 100644
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -2691,6 +2691,12 @@ public:
 public:
   void finalize(void) const;
 
+public:
+  std::map<int, ::internal::tflite::operand::Shape> &shapeForBroadcast(void)
+  {
+    return _broadcasting_tensor_shape;
+  }
+
 private:
   ::internal::arm_compute::Plan &_plan;
 
@@ -3092,6 +3098,8 @@ int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
 
   plan_builder.finalize();
 
+  compilation->setShapeForBroadcast(plan_builder.shapeForBroadcast());
+
   return ANEURALNETWORKS_NO_ERROR;
 }
 
diff --git a/runtimes/pure_arm_compute/src/compilation.h b/runtimes/pure_arm_compute/src/compilation.h
index bcbaa59..c523a7d 100644
--- a/runtimes/pure_arm_compute/src/compilation.h
+++ b/runtimes/pure_arm_compute/src/compilation.h
@@ -18,9 +18,23 @@ public:
 
 public:
   void publish(std::shared_ptr<const internal::arm_compute::Plan> &plan) { plan = _plan; }
+  void publish(std::shared_ptr<const std::map<int, ::internal::tflite::operand::Shape>>
+                   &broadcasting_tensor_shape)
+  {
+    broadcasting_tensor_shape = _broadcasting_tensor_shape;
+  }
+  void
+  setShapeForBroadcast(std::map<int, ::internal::tflite::operand::Shape> &broadcasting_tensor_shape)
+  {
+    _broadcasting_tensor_shape =
+        std::make_shared<const std::map<int, ::internal::tflite::operand::Shape>>(
+            broadcasting_tensor_shape);
+  }
 
 private:
   std::shared_ptr<internal::arm_compute::Plan> _plan;
+  std::shared_ptr<const std::map<int, ::internal::tflite::operand::Shape>>
+      _broadcasting_tensor_shape;
 };
 
 #endif
diff --git a/runtimes/pure_arm_compute/src/execution.cc b/runtimes/pure_arm_compute/src/execution.cc
index a297da5..fabf700 100644
--- a/runtimes/pure_arm_compute/src/execution.cc
+++ b/runtimes/pure_arm_compute/src/execution.cc
@@ -287,11 +287,14 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
                                     ANeuralNetworksExecution **execution)
 {
   std::shared_ptr<const ::internal::arm_compute::Plan> plan;
-
   compilation->publish(plan);
-
   *execution = new ANeuralNetworksExecution{plan};
 
+  std::shared_ptr<const std::map<int, ::internal::tflite::operand::Shape>>
+      broadcasting_tensor_shape;
+  compilation->publish(broadcasting_tensor_shape);
+  (*execution)->setShapeForBroadcast(broadcasting_tensor_shape);
+
   return ANEURALNETWORKS_NO_ERROR;
 }
 
@@ -314,27 +317,40 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32
     input_type = type->type;
   }
 
-  if (operands.at(operand_index).shape().rank() == 1)
+  auto shape = operands.at(operand_index).shape();
+  auto rank = shape.rank();
+
+  if (execution->shapeForBroadcast() != nullptr)
+  {
+    auto it = execution->shapeForBroadcast()->find(operand_index.asInt());
+    if (it != execution->shapeForBroadcast()->end())
+    {
+      rank = 4;
+      shape = it->second;
+    }
+  }
+
+  if (rank == 1)
   {
-    const auto len = operands.at(operand_index).shape().dim(0);
+    const auto len = shape.dim(0);
 
     asVectorSource(execution, input_type, index, len, buffer, length);
   }
-  else if (operands.at(operand_index).shape().rank() == 2)
+  else if (rank == 2)
   {
-    const auto &operand_shape = operands.at(operand_index).shape().asMatrix();
+    const auto &operand_shape = shape.asMatrix();
 
     asMatrixSource(execution, input_type, index, operand_shape, buffer, length);
   }
-  else if (operands.at(operand_index).shape().rank() == 3)
+  else if (rank == 3)
   {
-    const auto &operand_shape = operands.at(operand_index).shape().asTensor();
+    const auto &operand_shape = shape.asTensor();
 
     asTensorSource(execution, input_type, index, operand_shape, buffer, length);
   }
-  else if (operands.at(operand_index).shape().rank() == 4)
+  else if (rank == 4)
   {
-    const auto &operand_shape = operands.at(operand_index).shape().asFeature();
+    const auto &operand_shape = shape.asFeature();
 
     asFeatureSource(execution, input_type, index, operand_shape, buffer, length);
   }
diff --git a/runtimes/pure_arm_compute/src/execution.h b/runtimes/pure_arm_compute/src/execution.h
index 4a43378..015b3cd 100644
--- a/runtimes/pure_arm_compute/src/execution.h
+++ b/runtimes/pure_arm_compute/src/execution.h
@@ -17,9 +17,21 @@ public:
 
 public:
   const internal::arm_compute::Plan &plan(void) const { return *_plan; }
+  std::shared_ptr<const std::map<int, ::internal::tflite::operand::Shape>> shapeForBroadcast(void)
+  {
+    return _broadcasting_tensor_shape;
+  }
+  void setShapeForBroadcast(
+      const std::shared_ptr<const std::map<int, ::internal::tflite::operand::Shape>>
+          &broadcasting_tensor_shape)
+  {
+    _broadcasting_tensor_shape = broadcasting_tensor_shape;
+  }
 
 private:
   std::shared_ptr<const internal::arm_compute::Plan> _plan;
+  std::shared_ptr<const std::map<int, ::internal::tflite::operand::Shape>>
+      _broadcasting_tensor_shape = nullptr;
 
 public:
   // TODO Use InputIndex instead of int
-- 
2.7.4