[trace][intel pt] Measure the time it takes to decode a thread in per-cpu mode
authorPeicong Wu <wupeicong@fb.com>
Wed, 29 Jun 2022 18:42:14 +0000 (11:42 -0700)
committerWalter Erquinigo <wallace@fb.com>
Wed, 13 Jul 2022 18:08:14 +0000 (11:08 -0700)
This metric was missing. We were only measuring in per-thread mode, and
this completes the work.

For a sample trace I have, the `dump info` command shows

```
Timing for this thread:
    Decoding instructions: 0.12s

```

I also improved a bit the TaskTime function so that callers don't need to
specify the template argument

Differential Revision: https://reviews.llvm.org/D129249

lldb/source/Plugins/Trace/intel-pt/TaskTimer.h
lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp
lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp

index 92b56325743707e11eaee239cbd7bc81adfc21d7..2b85ed30334f388ab4b87299d9829f54817f5371 100644 (file)
@@ -35,9 +35,9 @@ public:
   ///
   /// \return
   ///     The return value of the task.
-  template <class R> R TimeTask(llvm::StringRef name, std::function<R()> task) {
+  template <typename C> auto TimeTask(llvm::StringRef name, C task) {
     auto start = std::chrono::steady_clock::now();
-    R result = task();
+    auto result = task();
     auto end = std::chrono::steady_clock::now();
     std::chrono::milliseconds duration =
         std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
index 8b90afb219afea79e0f5fbc98c0f07fa93609023..d3ac61f7e658266385641c5c2ba843f7daeacd25 100644 (file)
@@ -35,9 +35,8 @@ Expected<DecodedThreadSP> ThreadDecoder::Decode() {
 }
 
 llvm::Expected<DecodedThreadSP> ThreadDecoder::DoDecode() {
-  return m_trace.GetTimer()
-      .ForThread(m_thread_sp->GetID())
-      .TimeTask<Expected<DecodedThreadSP>>(
+  return m_trace.GetThreadTimer(m_thread_sp->GetID())
+      .TimeTask(
           "Decoding instructions", [&]() -> Expected<DecodedThreadSP> {
             DecodedThreadSP decoded_thread_sp =
                 std::make_shared<DecodedThread>(m_thread_sp);
index a5faae419780769f08e593f2f070ac66f412118d..55053805462d27d236ade04bb4570b536d736216 100644 (file)
@@ -199,10 +199,10 @@ void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose) {
                               std::chrono::milliseconds duration) {
       s.Format("    {0}: {1:2}s\n", name, duration.count() / 1000.0);
     };
-    GetTimer().ForThread(tid).ForEachTimedTask(print_duration);
+    GetThreadTimer(tid).ForEachTimedTask(print_duration);
 
     s << "\n  Timing for global tasks:\n";
-    GetTimer().ForGlobal().ForEachTimedTask(print_duration);
+    GetGlobalTimer().ForEachTimedTask(print_duration);
   }
 
   // Instruction events stats
@@ -507,3 +507,11 @@ Error TraceIntelPT::OnThreadBufferRead(lldb::tid_t tid,
 }
 
 TaskTimer &TraceIntelPT::GetTimer() { return GetUpdatedStorage().task_timer; }
+
+ScopedTaskTimer &TraceIntelPT::GetThreadTimer(lldb::tid_t tid) {
+  return GetTimer().ForThread(tid);
+}
+
+ScopedTaskTimer &TraceIntelPT::GetGlobalTimer() {
+  return GetTimer().ForGlobal();
+}
index 09ecbe7da61ae491f970f049865ea785f1004895..af68539a072ea5b206dfc01445786de699c19582 100644 (file)
@@ -157,6 +157,14 @@ public:
   ///     The timer object for this trace.
   TaskTimer &GetTimer();
 
+  /// \return
+  ///     The ScopedTaskTimer object for the given thread in this trace.
+  ScopedTaskTimer &GetThreadTimer(lldb::tid_t tid);
+
+  /// \return
+  ///     The global copedTaskTimer object for this trace.
+  ScopedTaskTimer &GetGlobalTimer();
+
   TraceIntelPTSP GetSharedPtr();
 
 private:
index d2dbc049672c83185e816327b3feec28742b9d80..12fbe934a31b9134c004a1c833d314753bd0d00e 100644 (file)
@@ -39,30 +39,35 @@ Expected<DecodedThreadSP> TraceIntelPTMultiCpuDecoder::Decode(Thread &thread) {
   if (Error err = CorrelateContextSwitchesAndIntelPtTraces())
     return std::move(err);
 
-  auto it = m_decoded_threads.find(thread.GetID());
-  if (it != m_decoded_threads.end())
-    return it->second;
-
-  DecodedThreadSP decoded_thread_sp =
-      std::make_shared<DecodedThread>(thread.shared_from_this());
-
   TraceIntelPTSP trace_sp = GetTrace();
 
-  Error err = trace_sp->OnAllCpusBinaryDataRead(
-      IntelPTDataKinds::kIptTrace,
-      [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error {
-        auto it = m_continuous_executions_per_thread->find(thread.GetID());
-        if (it != m_continuous_executions_per_thread->end())
-          return DecodeSystemWideTraceForThread(*decoded_thread_sp, *trace_sp,
-                                                buffers, it->second);
-
-        return Error::success();
+  return trace_sp
+      ->GetThreadTimer(thread.GetID())
+      .TimeTask("Decoding instructions", [&]() -> Expected<DecodedThreadSP> {
+        auto it = m_decoded_threads.find(thread.GetID());
+        if (it != m_decoded_threads.end())
+          return it->second;
+
+        DecodedThreadSP decoded_thread_sp =
+            std::make_shared<DecodedThread>(thread.shared_from_this());
+
+        Error err = trace_sp->OnAllCpusBinaryDataRead(
+            IntelPTDataKinds::kIptTrace,
+            [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error {
+              auto it =
+                  m_continuous_executions_per_thread->find(thread.GetID());
+              if (it != m_continuous_executions_per_thread->end())
+                return DecodeSystemWideTraceForThread(
+                    *decoded_thread_sp, *trace_sp, buffers, it->second);
+
+              return Error::success();
+            });
+        if (err)
+          return std::move(err);
+
+        m_decoded_threads.try_emplace(thread.GetID(), decoded_thread_sp);
+        return decoded_thread_sp;
       });
-  if (err)
-    return std::move(err);
-
-  m_decoded_threads.try_emplace(thread.GetID(), decoded_thread_sp);
-  return decoded_thread_sp;
 }
 
 static Expected<std::vector<IntelPTThreadSubtrace>>
@@ -153,7 +158,7 @@ Error TraceIntelPTMultiCpuDecoder::CorrelateContextSwitchesAndIntelPtTraces() {
   if (m_continuous_executions_per_thread)
     return Error::success();
 
-  Error err = GetTrace()->GetTimer().ForGlobal().TimeTask<Error>(
+  Error err = GetTrace()->GetGlobalTimer().TimeTask(
       "Context switch and Intel PT traces correlation", [&]() -> Error {
         if (auto correlation = DoCorrelateContextSwitchesAndIntelPtTraces()) {
           m_continuous_executions_per_thread.emplace(std::move(*correlation));