From fa696ca05e5232871e80951cc065ea9cd08c1014 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EC=9D=B4=EC=83=81=EA=B7=9C/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Principal=20Engineer/=EC=82=BC=EC=84=B1?= =?utf8?q?=EC=A0=84=EC=9E=90?= Date: Wed, 22 Aug 2018 19:03:51 +0900 Subject: [PATCH] pureACl profiler keeps track of operator's index (#2412) Some operators in model are expanded into multiple Steps. To analyze the profile data, it need to keep track of operator index to interpreter op's. Now, we know where the ACL kernel is generated by seeing Step's _op_idx member. Related Issue: #2376 Signed-off-by: Sanggyu Lee --- runtimes/pure_arm_compute/src/compilation.cc | 20 +++++++++++++++++++- runtimes/pure_arm_compute/src/execution.cc | 2 +- runtimes/pure_arm_compute/src/internal/arm_compute.h | 7 +++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index f4c8413..789011f 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -3271,6 +3271,16 @@ public: _plan.operations().at(_plan.operations().size() - 1).name() = name; } +#ifdef TFLITE_PROFILING_ENABLED +public: + int plan_op_size() const { return _plan.operations().size(); } + void addOpIndexToSteps(int from, int to, int op_idx) + { + for (int i = from; i < to; ++i) + _plan.operations().at(i).op_idx() = op_idx; + } +#endif + private: ::internal::arm_compute::Plan &_plan; }; @@ -3538,9 +3548,17 @@ void PlanBuilder::finalize(void) const AllocationContext allocation_context{_plan}; ExecutionBuilder execution_builder{_plan}; - for (const auto &stage : _stages) + for (int idx = 0; idx < _stages.size(); idx++) { + const auto &stage = _stages[idx]; +#ifdef TFLITE_PROFILING_ENABLED + int from = execution_builder.plan_op_size(); +#endif stage(allocation_context, execution_builder); +#ifdef TFLITE_PROFILING_ENABLED + int to = execution_builder.plan_op_size(); + execution_builder.addOpIndexToSteps(from, to, idx); +#endif } // Allocate Tensor Memory diff --git a/runtimes/pure_arm_compute/src/execution.cc b/runtimes/pure_arm_compute/src/execution.cc index a64b54a..90a93bc 100644 --- a/runtimes/pure_arm_compute/src/execution.cc +++ b/runtimes/pure_arm_compute/src/execution.cc @@ -493,7 +493,7 @@ int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution, for (uint32_t n = 0; n < operations.size(); ++n) { auto prof = profiling::Context::get().getProfiler(); - SCOPED_OPERATOR_PROFILE(prof, n); + SCOPED_OPERATOR_PROFILE(prof, operations.at(n).op_idx()); operations.at(n).run(); if (sync) diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute.h b/runtimes/pure_arm_compute/src/internal/arm_compute.h index cacdfce..6fe8c80 100644 --- a/runtimes/pure_arm_compute/src/internal/arm_compute.h +++ b/runtimes/pure_arm_compute/src/internal/arm_compute.h @@ -103,6 +103,13 @@ public: private: std::string _name; std::unique_ptr<::arm_compute::IFunction> _func; +#ifdef TFLITE_PROFILING_ENABLED +public: + int op_idx() const { return _op_idx; } + int &op_idx() { return _op_idx; } +private: + int _op_idx; +#endif }; } // namespace op -- 2.7.4