From e4ba53a85c559d4fe574305276ac815cf7995762 Mon Sep 17 00:00:00 2001 From: Derek Lamberti Date: Mon, 1 Oct 2018 09:28:57 +0100 Subject: [PATCH] IVGCVSW-1824 Fix slow profiling of neon. (~50% reduced end-to-end time) Change-Id: I58295c298934317a2b365887bd9f9f6705cd0a21 --- src/armnn/NeonInterceptorScheduler.cpp | 31 ++++++++++--------------------- src/armnn/NeonInterceptorScheduler.hpp | 7 ++++--- src/armnn/NeonTimer.cpp | 11 +++++++++-- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/armnn/NeonInterceptorScheduler.cpp b/src/armnn/NeonInterceptorScheduler.cpp index 8363def..a5ca315 100644 --- a/src/armnn/NeonInterceptorScheduler.cpp +++ b/src/armnn/NeonInterceptorScheduler.cpp @@ -9,9 +9,8 @@ namespace armnn{ -NeonInterceptorScheduler::NeonInterceptorScheduler(NeonTimer::KernelMeasurements& kernels, - arm_compute::IScheduler &realScheduler) - : m_Kernels(kernels), m_RealScheduler(realScheduler) +NeonInterceptorScheduler::NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler) + : m_RealScheduler(realScheduler) { } @@ -27,32 +26,22 @@ unsigned int NeonInterceptorScheduler::num_threads() const void NeonInterceptorScheduler::schedule(arm_compute::ICPPKernel* kernel, const Hints& hints) { - m_Timer.Start(); + WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now(); m_RealScheduler.schedule(kernel, hints.split_dimension()); - m_Timer.Stop(); + WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now(); - std::vector measurements = m_Timer.GetMeasurements(); - BOOST_ASSERT(!measurements.empty()); - - Measurement measurement(measurements.front()); // NOTE: 1st measurement is delta - measurement.m_Name = kernel->name(); - m_Kernels.push_back(std::move(measurement)); + const auto delta = std::chrono::duration(stopTime - startTime); + m_Kernels->emplace_back(kernel->name(), delta.count(), Measurement::Unit::TIME_US); } void NeonInterceptorScheduler::run_workloads(std::vector & workloads) { - m_Timer.Start(); - // NOTE: we should think about utilising the tag to make profiling more understandable + WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now(); m_RealScheduler.run_tagged_workloads(workloads, nullptr); - m_Timer.Stop(); - - std::vector measurements = m_Timer.GetMeasurements(); - BOOST_ASSERT_MSG(measurements.size() == 3, "WallClockTimer does not have correct amount of measurements."); + WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now(); - // WallClockTimer has 3 measurements, duration always being the first. - Measurement measurement(measurements.front()); - measurement.m_Name = "Workload"; - m_Kernels.push_back(std::move(measurement)); + const auto delta = std::chrono::duration(stopTime - startTime); + m_Kernels->emplace_back(std::string("Workload"), delta.count(), Measurement::Unit::TIME_US); } } // namespace armnn \ No newline at end of file diff --git a/src/armnn/NeonInterceptorScheduler.hpp b/src/armnn/NeonInterceptorScheduler.hpp index 37966b8..f33b79a 100644 --- a/src/armnn/NeonInterceptorScheduler.hpp +++ b/src/armnn/NeonInterceptorScheduler.hpp @@ -17,7 +17,7 @@ namespace armnn class NeonInterceptorScheduler : public arm_compute::IScheduler { public: - NeonInterceptorScheduler(NeonTimer::KernelMeasurements &kernels, arm_compute::IScheduler &realScheduler); + NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler); ~NeonInterceptorScheduler() = default; void set_num_threads(unsigned int numThreads) override; @@ -28,10 +28,11 @@ public: void run_workloads(std::vector &workloads) override; + void SetKernels(NeonTimer::KernelMeasurements* kernels) { m_Kernels = kernels; } + NeonTimer::KernelMeasurements* GetKernels() { return m_Kernels; } private: - NeonTimer::KernelMeasurements& m_Kernels; + NeonTimer::KernelMeasurements* m_Kernels; arm_compute::IScheduler& m_RealScheduler; - WallClockTimer m_Timer; }; } // namespace armnn diff --git a/src/armnn/NeonTimer.cpp b/src/armnn/NeonTimer.cpp index 1ee0c64..219edc9 100644 --- a/src/armnn/NeonTimer.cpp +++ b/src/armnn/NeonTimer.cpp @@ -13,24 +13,31 @@ namespace armnn { +namespace +{ +static thread_local auto g_Interceptor = std::make_shared(arm_compute::Scheduler::get()); +} void NeonTimer::Start() { m_Kernels.clear(); + BOOST_ASSERT(g_Interceptor->GetKernels() == nullptr); + g_Interceptor->SetKernels(&m_Kernels); + m_RealSchedulerType = arm_compute::Scheduler::get_type(); //Note: We can't currently replace a custom scheduler if(m_RealSchedulerType != arm_compute::Scheduler::Type::CUSTOM) { // Keep the real schedule and add NeonInterceptorScheduler as an interceptor m_RealScheduler = &arm_compute::Scheduler::get(); - auto interceptor = std::make_shared(m_Kernels, *m_RealScheduler); - arm_compute::Scheduler::set(std::static_pointer_cast(interceptor)); + arm_compute::Scheduler::set(std::static_pointer_cast(g_Interceptor)); } } void NeonTimer::Stop() { // Restore real scheduler + g_Interceptor->SetKernels(nullptr); arm_compute::Scheduler::set(m_RealSchedulerType); m_RealScheduler = nullptr; } -- 2.7.4