From fa696ca05e5232871e80951cc065ea9cd08c1014 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EC=9D=B4=EC=83=81=EA=B7=9C/=EB=8F=99=EC=9E=91=EC=A0=9C?=
 =?utf8?q?=EC=96=B4Lab=28SR=29/Principal=20Engineer/=EC=82=BC=EC=84=B1?=
 =?utf8?q?=EC=A0=84=EC=9E=90?= <sg5.lee@samsung.com>
Date: Wed, 22 Aug 2018 19:03:51 +0900
Subject: [PATCH] pureACl profiler keeps track of operator's index (#2412)

Some operators in model are expanded into multiple Steps.
To analyze the profile data, it need to keep track of operator index
to interpreter op's.

Now, we know where the ACL kernel is generated by seeing Step's _op_idx
member.

Related Issue: #2376

Signed-off-by: Sanggyu Lee <sg5.lee@samsung.com>
---
 runtimes/pure_arm_compute/src/compilation.cc         | 20 +++++++++++++++++++-
 runtimes/pure_arm_compute/src/execution.cc           |  2 +-
 runtimes/pure_arm_compute/src/internal/arm_compute.h |  7 +++++++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc
index f4c8413..789011f 100644
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -3271,6 +3271,16 @@ public:
     _plan.operations().at(_plan.operations().size() - 1).name() = name;
   }
 
+#ifdef TFLITE_PROFILING_ENABLED
+public:
+  int plan_op_size() const { return _plan.operations().size(); }
+  void addOpIndexToSteps(int from, int to, int op_idx)
+  {
+    for (int i = from; i < to; ++i)
+      _plan.operations().at(i).op_idx() = op_idx;
+  }
+#endif
+
 private:
   ::internal::arm_compute::Plan &_plan;
 };
@@ -3538,9 +3548,17 @@ void PlanBuilder::finalize(void) const
   AllocationContext allocation_context{_plan};
   ExecutionBuilder execution_builder{_plan};
 
-  for (const auto &stage : _stages)
+  for (int idx = 0; idx < _stages.size(); idx++)
   {
+    const auto &stage = _stages[idx];
+#ifdef TFLITE_PROFILING_ENABLED
+    int from = execution_builder.plan_op_size();
+#endif
     stage(allocation_context, execution_builder);
+#ifdef TFLITE_PROFILING_ENABLED
+    int to = execution_builder.plan_op_size();
+    execution_builder.addOpIndexToSteps(from, to, idx);
+#endif
   }
 
   // Allocate Tensor Memory
diff --git a/runtimes/pure_arm_compute/src/execution.cc b/runtimes/pure_arm_compute/src/execution.cc
index a64b54a..90a93bc 100644
--- a/runtimes/pure_arm_compute/src/execution.cc
+++ b/runtimes/pure_arm_compute/src/execution.cc
@@ -493,7 +493,7 @@ int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
   for (uint32_t n = 0; n < operations.size(); ++n)
   {
     auto prof = profiling::Context::get().getProfiler();
-    SCOPED_OPERATOR_PROFILE(prof, n);
+    SCOPED_OPERATOR_PROFILE(prof, operations.at(n).op_idx());
     operations.at(n).run();
 
     if (sync)
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute.h b/runtimes/pure_arm_compute/src/internal/arm_compute.h
index cacdfce..6fe8c80 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute.h
@@ -103,6 +103,13 @@ public:
 private:
   std::string _name;
   std::unique_ptr<::arm_compute::IFunction> _func;
+#ifdef TFLITE_PROFILING_ENABLED
+public:
+  int op_idx() const { return _op_idx; }
+  int &op_idx() { return _op_idx; }
+private:
+  int _op_idx;
+#endif
 };
 
 } // namespace op
-- 
2.7.4