GetLiveProcessBinaryData(llvm::StringRef kind);
/// Get the size of the data returned by \a GetLiveThreadBinaryData
- llvm::Optional<size_t> GetLiveThreadBinaryDataSize(lldb::tid_t tid,
- llvm::StringRef kind);
+ llvm::Optional<uint64_t> GetLiveThreadBinaryDataSize(lldb::tid_t tid,
+ llvm::StringRef kind);
/// Get the size of the data returned by \a GetLiveCoreBinaryData
- llvm::Optional<size_t> GetLiveCoreBinaryDataSize(lldb::core_id_t core_id,
- llvm::StringRef kind);
+ llvm::Optional<uint64_t> GetLiveCoreBinaryDataSize(lldb::core_id_t core_id,
+ llvm::StringRef kind);
/// Get the size of the data returned by \a GetLiveProcessBinaryData
- llvm::Optional<size_t> GetLiveProcessBinaryDataSize(llvm::StringRef kind);
+ llvm::Optional<uint64_t> GetLiveProcessBinaryDataSize(llvm::StringRef kind);
/// Constructor for post mortem processes
Trace(llvm::ArrayRef<lldb::ProcessSP> postmortem_processes,
/// \{
/// tid -> data kind -> size
- llvm::DenseMap<lldb::tid_t, std::unordered_map<std::string, size_t>>
+ llvm::DenseMap<lldb::tid_t, std::unordered_map<std::string, uint64_t>>
m_live_thread_data;
/// core id -> data kind -> size
- llvm::DenseMap<lldb::core_id_t, std::unordered_map<std::string, size_t>>
+ llvm::DenseMap<lldb::core_id_t, std::unordered_map<std::string, uint64_t>>
m_live_core_data;
/// data kind -> size
- std::unordered_map<std::string, size_t> m_live_process_data;
+ std::unordered_map<std::string, uint64_t> m_live_process_data;
/// \}
/// The list of cores being traced. Might be \b None depending on the plug-in.
/// Optional tid if the data is related to a thread.
llvm::Optional<lldb::tid_t> tid;
/// Optional core id if the data is related to a cpu core.
- llvm::Optional<lldb::tid_t> core_id;
+ llvm::Optional<lldb::core_id_t> core_id;
/// Offset in bytes from where to start reading the data.
uint64_t offset;
/// Number of bytes to read.
///
/// \return
/// Nanosecond wall time.
- std::chrono::nanoseconds ToNanos(uint64_t tsc);
+ std::chrono::nanoseconds ToNanos(uint64_t tsc) const;
+
+ uint64_t ToTSC(std::chrono::nanoseconds nanos) const;
uint32_t time_mult;
uint16_t time_shift;
"buffer but only {1} are available",
size, output.size()));
- return data;
+ return output;
}
Expected<std::vector<uint8_t>>
"buffer but only {1} are available",
size, output.size()));
- return data;
+ return output;
}
Error PerfEvent::DisableWithIoctl() {
size_t PerfEvent::GetEffectiveDataBufferSize() const {
perf_event_mmap_page &mmap_metadata = GetMetadataPage();
- if (mmap_metadata.data_head <= mmap_metadata.data_size)
+ if (mmap_metadata.data_head < mmap_metadata.data_size)
return mmap_metadata.data_head;
else
return mmap_metadata.data_size; // The buffer has wrapped.
packet.Printf("%" PRIx64, tid);
StringExtractorGDBRemote response;
- if (SendPacketAndWaitForResponse(packet.GetString(), response)
- == PacketResult::Success) {
+ if (SendPacketAndWaitForResponse(packet.GetString(), response) ==
+ PacketResult::Success) {
if (response.IsOKResponse())
return {{pid, tid}};
GDBRemoteCommunication::PacketResult::Success) {
if (response.IsErrorResponse())
return response.GetStatus().ToError();
- if (response.IsUnsupportedResponse())
- return llvm::createStringError(llvm::inconvertibleErrorCode(),
- "jLLDBTraceGetBinaryData is unsupported");
std::string data;
response.GetEscapedBinaryData(data);
return std::vector<uint8_t>(data.begin(), data.end());
TraceCursorIntelPT.cpp
TraceIntelPT.cpp
TraceIntelPTJSONStructs.cpp
+ TraceIntelPTMultiCoreDecoder.cpp
TraceIntelPTSessionFileParser.cpp
TraceIntelPTSessionSaver.cpp
llvm::ArrayRef<OptionDefinition> GetDefinitions() override;
- size_t m_trace_buffer_size;
+ uint64_t m_trace_buffer_size;
bool m_enable_tsc;
- llvm::Optional<size_t> m_psb_period;
+ llvm::Optional<uint64_t> m_psb_period;
};
CommandObjectThreadTraceStartIntelPT(TraceIntelPT &trace,
llvm::ArrayRef<OptionDefinition> GetDefinitions() override;
- size_t m_trace_buffer_size;
- size_t m_process_buffer_size_limit;
+ uint64_t m_trace_buffer_size;
+ uint64_t m_process_buffer_size_limit;
bool m_enable_tsc;
- llvm::Optional<size_t> m_psb_period;
+ llvm::Optional<uint64_t> m_psb_period;
bool m_per_core_tracing;
};
using namespace lldb_private::trace_intel_pt;
using namespace llvm;
-void ThreadTaskTimer::ForEachTimedTask(
+void ScopedTaskTimer::ForEachTimedTask(
std::function<void(const std::string &event,
std::chrono::milliseconds duration)>
callback) {
}
}
-ThreadTaskTimer &TaskTimer::ForThread(lldb::tid_t tid) {
+ScopedTaskTimer &TaskTimer::ForThread(lldb::tid_t tid) {
auto it = m_thread_timers.find(tid);
if (it == m_thread_timers.end())
- it = m_thread_timers.try_emplace(tid, ThreadTaskTimer{}).first;
+ it = m_thread_timers.try_emplace(tid, ScopedTaskTimer{}).first;
return it->second;
}
+
+ScopedTaskTimer &TaskTimer::ForGlobal() { return m_global_timer; }
namespace trace_intel_pt {
/// Class used to track the duration of long running tasks related to a single
-/// thread for reporting.
-class ThreadTaskTimer {
+/// scope for reporting.
+class ScopedTaskTimer {
public:
/// Execute the given \p task and record its duration.
///
public:
/// \return
/// The timer object for the given thread.
- ThreadTaskTimer &ForThread(lldb::tid_t tid);
+ ScopedTaskTimer &ForThread(lldb::tid_t tid);
+
+ /// \return
+ /// The timer object for global tasks.
+ ScopedTaskTimer &ForGlobal();
private:
- llvm::DenseMap<lldb::tid_t, ThreadTaskTimer> m_thread_timers;
+ llvm::DenseMap<lldb::tid_t, ScopedTaskTimer> m_thread_timers;
+ ScopedTaskTimer m_global_timer;
};
} // namespace trace_intel_pt
}
}
if (session.cores) {
+ std::vector<core_id_t> cores;
+
for (const JSONCore &core : *session.cores) {
SetPostMortemCoreDataFile(core.core_id, IntelPTDataKinds::kTraceBuffer,
FileSpec(core.trace_buffer));
SetPostMortemCoreDataFile(core.core_id,
IntelPTDataKinds::kPerfContextSwitchTrace,
FileSpec(core.context_switch_trace));
+ cores.push_back(core.core_id);
}
+
+ std::vector<tid_t> tids;
+ for (const JSONProcess &process : session.processes)
+ for (const JSONThread &thread : process.threads)
+ tids.push_back(thread.tid);
+
+ m_multicore_decoder.emplace(*this, cores, tids,
+ *session.tsc_perf_zero_conversion);
}
}
thread.shared_from_this(),
createStringError(inconvertibleErrorCode(), error));
+ if (m_multicore_decoder)
+ return m_multicore_decoder->Decode(thread);
+
auto it = m_thread_decoders.find(thread.GetID());
if (it == m_thread_decoders.end())
return std::make_shared<DecodedThread>(
}
s << "\n";
- Expected<size_t> raw_size = GetRawTraceSize(thread);
- if (!raw_size) {
- s.Format(" {0}\n", toString(raw_size.takeError()));
+ Expected<Optional<uint64_t>> raw_size_or_error = GetRawTraceSize(thread);
+ if (!raw_size_or_error) {
+ s.Format(" {0}\n", toString(raw_size_or_error.takeError()));
return;
}
+ Optional<uint64_t> raw_size = *raw_size_or_error;
DecodedThreadSP decoded_trace_sp = Decode(thread);
- size_t insn_len = decoded_trace_sp->GetInstructionsCount();
- size_t mem_used = decoded_trace_sp->CalculateApproximateMemoryUsage();
-
- s.Format(" Total number of instructions: {0}\n", insn_len);
-
- s << "\n Memory usage:\n";
- s.Format(" Raw trace size: {0} KiB\n", *raw_size / 1024);
- s.Format(
- " Total approximate memory usage (excluding raw trace): {0:2} KiB\n",
- (double)mem_used / 1024);
- if (insn_len != 0)
- s.Format(" Average memory usage per instruction (excluding raw trace): "
- "{0:2} bytes\n",
- (double)mem_used / insn_len);
-
- s << "\n Timing:\n";
- GetTimer().ForThread(tid).ForEachTimedTask(
- [&](const std::string &name, std::chrono::milliseconds duration) {
- s.Format(" {0}: {1:2}s\n", name, duration.count() / 1000.0);
- });
-
- const DecodedThread::EventsStats &events_stats =
- decoded_trace_sp->GetEventsStats();
- s << "\n Events:\n";
- s.Format(" Number of instructions with events: {0}\n",
- events_stats.total_instructions_with_events);
- s.Format(" Number of individual events: {0}\n", events_stats.total_count);
- for (const auto &event_to_count : events_stats.events_counts) {
- s.Format(" {0}: {1}\n",
- trace_event_utils::EventToDisplayString(event_to_count.first),
- event_to_count.second);
+
+ /// Instruction stats
+ {
+ uint64_t insn_len = decoded_trace_sp->GetInstructionsCount();
+ uint64_t mem_used = decoded_trace_sp->CalculateApproximateMemoryUsage();
+
+ s.Format(" Total number of instructions: {0}\n", insn_len);
+
+ s << "\n Memory usage:\n";
+ if (raw_size)
+ s.Format(" Raw trace size: {0} KiB\n", *raw_size / 1024);
+
+ s.Format(
+ " Total approximate memory usage (excluding raw trace): {0:2} KiB\n",
+ (double)mem_used / 1024);
+ if (insn_len != 0)
+ s.Format(
+ " Average memory usage per instruction (excluding raw trace): "
+ "{0:2} bytes\n",
+ (double)mem_used / insn_len);
+ }
+
+ // Timing
+ {
+ s << "\n Timing for this thread:\n";
+ auto print_duration = [&](const std::string &name,
+ std::chrono::milliseconds duration) {
+ s.Format(" {0}: {1:2}s\n", name, duration.count() / 1000.0);
+ };
+ GetTimer().ForThread(tid).ForEachTimedTask(print_duration);
+
+ s << "\n Timing for global tasks:\n";
+ GetTimer().ForGlobal().ForEachTimedTask(print_duration);
+ }
+
+ // Instruction events stats
+ {
+ const DecodedThread::EventsStats &events_stats =
+ decoded_trace_sp->GetEventsStats();
+ s << "\n Events:\n";
+ s.Format(" Number of instructions with events: {0}\n",
+ events_stats.total_instructions_with_events);
+ s.Format(" Number of individual events: {0}\n",
+ events_stats.total_count);
+ for (const auto &event_to_count : events_stats.events_counts) {
+ s.Format(" {0}: {1}\n",
+ trace_event_utils::EventToDisplayString(event_to_count.first),
+ event_to_count.second);
+ }
}
- s << "\n Errors:\n";
- const DecodedThread::LibiptErrorsStats &tsc_errors_stats =
- decoded_trace_sp->GetTscErrorsStats();
- s.Format(" Number of TSC decoding errors: {0}\n",
- tsc_errors_stats.total_count);
- for (const auto &error_message_to_count :
- tsc_errors_stats.libipt_errors_counts) {
- s.Format(" {0}: {1}\n", error_message_to_count.first,
- error_message_to_count.second);
+ // Multicode decoding stats
+ if (m_multicore_decoder) {
+ s << "\n Multi-core decoding:\n";
+ s.Format(" Total number of continuous executions found: {0}\n",
+ m_multicore_decoder->GetTotalContinuousExecutionsCount());
+ s.Format(" Number of continuous executions for this thread: {0}\n",
+ m_multicore_decoder->GetNumContinuousExecutionsForThread(tid));
+ }
+
+ // Errors
+ {
+ s << "\n Errors:\n";
+ const DecodedThread::LibiptErrorsStats &tsc_errors_stats =
+ decoded_trace_sp->GetTscErrorsStats();
+ s.Format(" Number of TSC decoding errors: {0}\n",
+ tsc_errors_stats.total_count);
+ for (const auto &error_message_to_count :
+ tsc_errors_stats.libipt_errors_counts) {
+ s.Format(" {0}: {1}\n", error_message_to_count.first,
+ error_message_to_count.second);
+ }
}
}
-llvm::Expected<size_t> TraceIntelPT::GetRawTraceSize(Thread &thread) {
- size_t size;
+llvm::Expected<Optional<uint64_t>>
+TraceIntelPT::GetRawTraceSize(Thread &thread) {
+ if (m_multicore_decoder)
+ return None; // TODO: calculate the amount of intel pt raw trace associated
+ // with the given thread.
+ if (GetLiveProcess())
+ return GetLiveThreadBinaryDataSize(thread.GetID(),
+ IntelPTDataKinds::kTraceBuffer);
+ uint64_t size;
auto callback = [&](llvm::ArrayRef<uint8_t> data) {
size = data.size();
return Error::success();
Error TraceIntelPT::DoRefreshLiveProcessState(TraceGetStateResponse state,
StringRef json_response) {
m_thread_decoders.clear();
-
- for (const TraceThreadState &thread_state : state.traced_threads) {
- ThreadSP thread_sp =
- GetLiveProcess()->GetThreadList().FindThreadByID(thread_state.tid);
- m_thread_decoders.emplace(
- thread_state.tid, std::make_unique<ThreadDecoder>(thread_sp, *this));
- }
+ m_tsc_conversion.reset();
+ m_multicore_decoder.reset();
Expected<TraceIntelPTGetStateResponse> intelpt_state =
json::parse<TraceIntelPTGetStateResponse>(json_response,
if (!intelpt_state)
return intelpt_state.takeError();
+ if (!intelpt_state->cores) {
+ for (const TraceThreadState &thread_state : state.traced_threads) {
+ ThreadSP thread_sp =
+ GetLiveProcess()->GetThreadList().FindThreadByID(thread_state.tid);
+ m_thread_decoders.emplace(
+ thread_state.tid, std::make_unique<ThreadDecoder>(thread_sp, *this));
+ }
+ } else {
+ std::vector<core_id_t> cores;
+ for (const TraceCoreState &core : *intelpt_state->cores)
+ cores.push_back(core.core_id);
+
+ std::vector<tid_t> tids;
+ for (const TraceThreadState &thread : intelpt_state->traced_threads)
+ tids.push_back(thread.tid);
+
+ if (!intelpt_state->tsc_perf_zero_conversion)
+ return createStringError(inconvertibleErrorCode(),
+ "Missing perf time_zero conversion values");
+ m_multicore_decoder.emplace(*this, cores, tids,
+ *intelpt_state->tsc_perf_zero_conversion);
+ }
+
m_tsc_conversion = intelpt_state->tsc_perf_zero_conversion;
if (m_tsc_conversion) {
Log *log = GetLog(LLDBLog::Target);
bool TraceIntelPT::IsTraced(lldb::tid_t tid) {
RefreshLiveProcessState();
+ if (m_multicore_decoder)
+ return m_multicore_decoder->TracesThread(tid);
return m_thread_decoders.count(tid);
}
return message->c_str();
}
-Error TraceIntelPT::Start(size_t trace_buffer_size,
- size_t total_buffer_size_limit, bool enable_tsc,
- Optional<size_t> psb_period, bool per_core_tracing) {
+Error TraceIntelPT::Start(uint64_t trace_buffer_size,
+ uint64_t total_buffer_size_limit, bool enable_tsc,
+ Optional<uint64_t> psb_period,
+ bool per_core_tracing) {
TraceIntelPTStartRequest request;
request.trace_buffer_size = trace_buffer_size;
request.process_buffer_size_limit = total_buffer_size_limit;
request.enable_tsc = enable_tsc;
- request.psb_period =
- psb_period.map([](size_t val) { return static_cast<uint64_t>(val); });
+ request.psb_period = psb_period;
request.type = GetPluginName().str();
request.per_core_tracing = per_core_tracing;
return Trace::Start(toJSON(request));
}
Error TraceIntelPT::Start(StructuredData::ObjectSP configuration) {
- size_t trace_buffer_size = kDefaultTraceBufferSize;
- size_t process_buffer_size_limit = kDefaultProcessBufferSizeLimit;
+ uint64_t trace_buffer_size = kDefaultTraceBufferSize;
+ uint64_t process_buffer_size_limit = kDefaultProcessBufferSizeLimit;
bool enable_tsc = kDefaultEnableTscValue;
- Optional<size_t> psb_period = kDefaultPsbPeriod;
+ Optional<uint64_t> psb_period = kDefaultPsbPeriod;
bool per_core_tracing = kDefaultPerCoreTracing;
if (configuration) {
}
llvm::Error TraceIntelPT::Start(llvm::ArrayRef<lldb::tid_t> tids,
- size_t trace_buffer_size, bool enable_tsc,
- Optional<size_t> psb_period) {
+ uint64_t trace_buffer_size, bool enable_tsc,
+ Optional<uint64_t> psb_period) {
TraceIntelPTStartRequest request;
request.trace_buffer_size = trace_buffer_size;
request.enable_tsc = enable_tsc;
- request.psb_period =
- psb_period.map([](size_t val) { return static_cast<uint64_t>(val); });
+ request.psb_period = psb_period;
request.type = GetPluginName().str();
request.tids.emplace();
for (lldb::tid_t tid : tids)
#include "TaskTimer.h"
#include "ThreadDecoder.h"
+#include "TraceIntelPTMultiCoreDecoder.h"
#include "TraceIntelPTSessionFileParser.h"
+
#include "lldb/Utility/FileSpec.h"
#include "lldb/lldb-types.h"
#include "llvm/Support/raw_ostream.h"
void DumpTraceInfo(Thread &thread, Stream &s, bool verbose) override;
- llvm::Expected<size_t> GetRawTraceSize(Thread &thread);
+ llvm::Expected<llvm::Optional<uint64_t>> GetRawTraceSize(Thread &thread);
llvm::Error DoRefreshLiveProcessState(TraceGetStateResponse state,
llvm::StringRef json_response) override;
/// \return
/// \a llvm::Error::success if the operation was successful, or
/// \a llvm::Error otherwise.
- llvm::Error Start(size_t trace_buffer_size, size_t total_buffer_size_limit,
- bool enable_tsc, llvm::Optional<size_t> psb_period,
+ llvm::Error Start(uint64_t trace_buffer_size,
+ uint64_t total_buffer_size_limit, bool enable_tsc,
+ llvm::Optional<uint64_t> psb_period,
bool m_per_core_tracing);
/// \copydoc Trace::Start
/// \return
/// \a llvm::Error::success if the operation was successful, or
/// \a llvm::Error otherwise.
- llvm::Error Start(llvm::ArrayRef<lldb::tid_t> tids, size_t trace_buffer_size,
- bool enable_tsc, llvm::Optional<size_t> psb_period);
+ llvm::Error Start(llvm::ArrayRef<lldb::tid_t> tids,
+ uint64_t trace_buffer_size, bool enable_tsc,
+ llvm::Optional<uint64_t> psb_period);
/// \copydoc Trace::Start
llvm::Error Start(llvm::ArrayRef<lldb::tid_t> tids,
/// It is provided by either a session file or a live process' "cpuInfo"
/// binary data.
llvm::Optional<pt_cpu> m_cpu_info;
+ llvm::Optional<TraceIntelPTMultiCoreDecoder> m_multicore_decoder;
+ /// These decoders are used for the non-per-core case
std::map<lldb::tid_t, std::unique_ptr<ThreadDecoder>> m_thread_decoders;
/// Helper variable used to track long running operations for telemetry.
TaskTimer m_task_timer;
!o.map("type", session.type) || !o.map("cores", session.cores) ||
!o.map("tscPerfZeroConversion", session.tsc_perf_zero_conversion))
return false;
+ if (session.cores && !session.tsc_perf_zero_conversion) {
+ path.report(
+ "\"tscPerfZeroConversion\" is required when \"cores\" is provided");
+ return false;
+ }
// We have to do this because the compiler fails at doing it automatically
// because pt_cpu is not in a namespace
if (!fromJSON(*value.getAsObject()->get("cpuInfo"), session.cpu_info,
--- /dev/null
+//===-- TraceIntelPTMultiCoreDecoder.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TraceIntelPTMultiCoreDecoder.h"
+
+#include "TraceIntelPT.h"
+
+#include "llvm/Support/Error.h"
+
+#include <linux/perf_event.h>
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace lldb_private::trace_intel_pt;
+using namespace llvm;
+
+struct PerfContextSwitchRecord {
+ struct perf_event_header header;
+ uint32_t next_prev_pid;
+ uint32_t next_prev_tid;
+ uint32_t pid, tid;
+ uint64_t time_in_nanos;
+
+ bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; }
+};
+
+struct ContextSwitchRecord {
+ uint64_t tsc;
+ bool is_out;
+ /// A pid of 0 indicates an execution in the kernel
+ lldb::pid_t pid;
+ lldb::tid_t tid;
+
+ bool IsOut() const { return is_out; }
+
+ bool IsIn() const { return !is_out; }
+};
+
+uint64_t ThreadContinuousExecution::GetErrorFreeTSC() const {
+ switch (variant) {
+ case Variant::Complete:
+ return tscs.complete.start; // end would also work
+ case Variant::HintedStart:
+ return tscs.hinted_start.end;
+ case Variant::HintedEnd:
+ return tscs.hinted_end.start;
+ case Variant::OnlyEnd:
+ return tscs.only_end.end;
+ case Variant::OnlyStart:
+ return tscs.only_start.start;
+ }
+}
+
+ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution(
+ lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start, uint64_t end) {
+ ThreadContinuousExecution o(core_id, tid);
+ o.variant = Variant::Complete;
+ o.tscs.complete.start = start;
+ o.tscs.complete.end = end;
+ return o;
+}
+
+ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution(
+ lldb::core_id_t core_id, lldb::tid_t tid, uint64_t hinted_start,
+ uint64_t end) {
+ ThreadContinuousExecution o(core_id, tid);
+ o.variant = Variant::HintedStart;
+ o.tscs.hinted_start.hinted_start = hinted_start;
+ o.tscs.hinted_start.end = end;
+ return o;
+}
+
+ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution(
+ lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start,
+ uint64_t hinted_end) {
+ ThreadContinuousExecution o(core_id, tid);
+ o.variant = Variant::HintedEnd;
+ o.tscs.hinted_end.start = start;
+ o.tscs.hinted_end.hinted_end = hinted_end;
+ return o;
+}
+
+ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution(
+ lldb::core_id_t core_id, lldb::tid_t tid, uint64_t end) {
+ ThreadContinuousExecution o(core_id, tid);
+ o.variant = Variant::OnlyEnd;
+ o.tscs.only_end.end = end;
+ return o;
+}
+
+ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution(
+ lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start) {
+ ThreadContinuousExecution o(core_id, tid);
+ o.variant = Variant::OnlyStart;
+ o.tscs.only_start.start = start;
+ return o;
+}
+
+bool ThreadContinuousExecution::operator<(
+ const ThreadContinuousExecution &o) const {
+ // We can compare by GetErrorFreeTSC because context switches across CPUs can
+ // be sorted by any of its TSC.
+ return GetErrorFreeTSC() < o.GetErrorFreeTSC();
+}
+
+/// Tries to recover a continuous execution by analyzing two consecutive context
+/// switch records.
+static Error
+HandleContextSwitch(core_id_t core_id,
+ const LinuxPerfZeroTscConversion &tsc_conversion,
+ const ContextSwitchRecord &record,
+ const Optional<ContextSwitchRecord> &prev_record,
+ std::function<void(ThreadContinuousExecution &&execution)>
+ on_new_thread_execution) {
+ if (!prev_record) {
+ if (record.IsOut())
+ on_new_thread_execution(ThreadContinuousExecution::CreateOnlyEndExecution(
+ core_id, record.tid, record.tsc));
+ // The 'in' case will be handled later when we try to look for its end
+ return Error::success();
+ }
+
+ const ContextSwitchRecord &prev = *prev_record;
+ if (prev.tsc > record.tsc)
+ return createStringError(
+ inconvertibleErrorCode(),
+ formatv("A context switch record out doesn't happen after the previous "
+ "record. Previous TSC= {0}, current TSC = {1}.",
+ prev.tsc, record.tsc));
+
+ if (record.IsIn() && prev.IsIn()) {
+ // We found two consecutive ins, which means that we didn't capture
+ // the end of the previous execution.
+ on_new_thread_execution(ThreadContinuousExecution::CreateHintedEndExecution(
+ core_id, prev.tid, prev.tsc, record.tsc - 1));
+ } else if (record.IsOut() && prev.IsOut()) {
+ // We found two consecutive outs, that means that we didn't capture
+ // the beginning of the current execution.
+ on_new_thread_execution(
+ ThreadContinuousExecution::CreateHintedStartExecution(
+ core_id, record.tid, prev.tsc + 1, record.tsc));
+ } else if (record.IsOut() && prev.IsIn()) {
+ if (record.pid == prev.pid && record.tid == prev.tid) {
+ /// A complete execution
+ on_new_thread_execution(
+ ThreadContinuousExecution::CreateCompleteExecution(
+ core_id, record.tid, prev.tsc, record.tsc));
+ } else {
+ // An out after the in of a different thread. The first one doesn't
+ // have an end, and the second one doesn't have a start.
+ on_new_thread_execution(
+ ThreadContinuousExecution::CreateHintedEndExecution(
+ core_id, prev.tid, prev.tsc, record.tsc - 1));
+ on_new_thread_execution(
+ ThreadContinuousExecution::CreateHintedStartExecution(
+ core_id, record.tid, prev.tsc + 1, record.tsc));
+ }
+ }
+ return Error::success();
+}
+
+/// Decodes a context switch trace gotten with perf_event_open.
+///
+/// \param[in] data
+/// The context switch trace in binary format.
+///
+/// \param[i] core_id
+/// The core_id where the trace were gotten from.
+///
+/// \param[in] tsc_conversion
+/// The conversion values used to confert nanoseconds to TSC.
+///
+/// \param[in] on_new_thread_execution
+/// Callback to be invoked whenever a continuous execution is recovered from
+/// the trace.
+static Error DecodePerfContextSwitchTrace(
+ ArrayRef<uint8_t> data, core_id_t core_id,
+ const LinuxPerfZeroTscConversion &tsc_conversion,
+ std::function<void(ThreadContinuousExecution &&execution)>
+ on_new_thread_execution) {
+ auto CreateError = [&](size_t offset, auto error) -> Error {
+ return createStringError(inconvertibleErrorCode(),
+ formatv("Malformed perf context switch trace for "
+ "cpu {0} at offset {1}. {2}",
+ core_id, offset, error));
+ };
+
+ Optional<ContextSwitchRecord> prev_record;
+ for (size_t offset = 0; offset < data.size();) {
+ const PerfContextSwitchRecord &perf_record =
+ *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() +
+ offset);
+ // A record of 1000 uint64_t's or more should mean that the data is wrong
+ if (perf_record.header.size == 0 ||
+ perf_record.header.size > sizeof(uint64_t) * 1000)
+ return CreateError(offset, formatv("A record of {0} bytes was found.",
+ perf_record.header.size));
+
+ // We add + 100 to this record because some systems might have custom
+ // records. In any case, we are looking only for abnormal data.
+ if (perf_record.header.type >= PERF_RECORD_MAX + 100)
+ return CreateError(offset, formatv("Invalid record type {0} was found.",
+ perf_record.header.type));
+
+ if (perf_record.header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+ ContextSwitchRecord record{tsc_conversion.ToTSC(std::chrono::nanoseconds(
+ perf_record.time_in_nanos)),
+ perf_record.IsOut(),
+ static_cast<lldb::pid_t>(perf_record.pid),
+ static_cast<lldb::tid_t>(perf_record.tid)};
+
+ if (Error err = HandleContextSwitch(core_id, tsc_conversion, record,
+ prev_record, on_new_thread_execution))
+ return CreateError(offset, toString(std::move(err)));
+
+ prev_record = record;
+ }
+ offset += perf_record.header.size;
+ }
+
+ // We might have an incomplete last record
+ if (prev_record && prev_record->IsIn())
+ on_new_thread_execution(ThreadContinuousExecution::CreateOnlyStartExecution(
+ core_id, prev_record->tid, prev_record->tsc));
+
+ return Error::success();
+}
+
+TraceIntelPTMultiCoreDecoder::TraceIntelPTMultiCoreDecoder(
+ TraceIntelPT &trace, ArrayRef<core_id_t> core_ids, ArrayRef<tid_t> tids,
+ const LinuxPerfZeroTscConversion &tsc_conversion)
+ : m_trace(trace), m_cores(core_ids.begin(), core_ids.end()),
+ m_tids(tids.begin(), tids.end()), m_tsc_conversion(tsc_conversion) {}
+
+bool TraceIntelPTMultiCoreDecoder::TracesThread(lldb::tid_t tid) const {
+ return m_tids.count(tid);
+}
+
+DecodedThreadSP TraceIntelPTMultiCoreDecoder::Decode(Thread &thread) {
+ if (Error err = DecodeContextSwitchTraces())
+ return std::make_shared<DecodedThread>(thread.shared_from_this(),
+ std::move(err));
+
+ return std::make_shared<DecodedThread>(
+ thread.shared_from_this(),
+ createStringError(inconvertibleErrorCode(), "unimplemented"));
+}
+
+Error TraceIntelPTMultiCoreDecoder::DecodeContextSwitchTraces() {
+ if (m_setup_error)
+ return createStringError(inconvertibleErrorCode(), m_setup_error->c_str());
+
+ if (m_continuous_executions_per_thread)
+ return Error::success();
+
+ m_continuous_executions_per_thread.emplace();
+
+ auto do_decode = [&]() -> Error {
+ // We'll decode all context switch traces, identify continuous executions
+ // and group them by thread.
+ for (core_id_t core_id : m_cores) {
+ Error err = m_trace.OnCoreBinaryDataRead(
+ core_id, IntelPTDataKinds::kPerfContextSwitchTrace,
+ [&](ArrayRef<uint8_t> data) -> Error {
+ return DecodePerfContextSwitchTrace(
+ data, core_id, m_tsc_conversion,
+ [&](const ThreadContinuousExecution &execution) {
+ (*m_continuous_executions_per_thread)[execution.tid]
+ .push_back(execution);
+ });
+ });
+ if (err) {
+ m_setup_error = toString(std::move(err));
+ return createStringError(inconvertibleErrorCode(),
+ m_setup_error->c_str());
+ }
+ }
+ // We now sort the executions of each to have them ready for instruction
+ // decoding
+ for (auto &tid_executions : *m_continuous_executions_per_thread)
+ std::sort(tid_executions.second.begin(), tid_executions.second.end());
+
+ return Error::success();
+ };
+
+ return m_trace.GetTimer().ForGlobal().TimeTask<Error>(
+ "Context switch trace decoding", do_decode);
+}
+
+size_t TraceIntelPTMultiCoreDecoder::GetNumContinuousExecutionsForThread(
+ lldb::tid_t tid) const {
+ if (!m_continuous_executions_per_thread)
+ return 0;
+ auto it = m_continuous_executions_per_thread->find(tid);
+ if (it == m_continuous_executions_per_thread->end())
+ return 0;
+ return it->second.size();
+}
+
+size_t TraceIntelPTMultiCoreDecoder::GetTotalContinuousExecutionsCount() const {
+ if (!m_continuous_executions_per_thread)
+ return 0;
+ size_t count = 0;
+ for (const auto &kv : *m_continuous_executions_per_thread)
+ count += kv.second.size();
+ return count;
+}
--- /dev/null
+//===-- TraceIntelPTMultiCoreDecoder.h --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H
+#define LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H
+
+#include "ThreadDecoder.h"
+
+namespace lldb_private {
+namespace trace_intel_pt {
+
+/// This class indicates the time interval in which a thread was running
+/// continuously on a cpu core.
+///
+/// In most cases both endpoints of the intervals can be accurately recovered
+/// from a context switch trace, but in some cases one of these endpoints might
+/// be guessed or not known at all, due to contention problems in the trace or
+/// because tracing was interrupted.
+///
+/// Note: we use the terms CPU and cores interchangeably.
+struct ThreadContinuousExecution {
+ enum class Variant {
+ /// Both endpoints are known
+ Complete,
+ /// The end is known and we have a guess for the start
+ HintedStart,
+ /// The start is known and we have a guess for the end
+ HintedEnd,
+ /// We only know the start. This might be the last entry of a core trace.
+ OnlyStart,
+ /// We only know the end. This might be the first entry or a core trace.
+ OnlyEnd,
+ } variant;
+
+ union {
+ struct {
+ uint64_t start;
+ uint64_t end;
+ } complete;
+ struct {
+ uint64_t start;
+ } only_start;
+ struct {
+ uint64_t end;
+ } only_end;
+ /// The following 'hinted' structures are useful when there are contention
+ /// problems in the trace
+ struct {
+ uint64_t hinted_start;
+ uint64_t end;
+ } hinted_start;
+ struct {
+ uint64_t start;
+ uint64_t hinted_end;
+ } hinted_end;
+ } tscs;
+
+ lldb::core_id_t core_id;
+ lldb::tid_t tid;
+
+ /// \return
+ /// A tsc that we are certain of, either the start or the end.
+ uint64_t GetErrorFreeTSC() const;
+
+ /// Constructors for the different variants of this object
+ ///
+ /// \{
+ static ThreadContinuousExecution
+ CreateCompleteExecution(lldb::core_id_t core_id, lldb::tid_t tid,
+ uint64_t start, uint64_t end);
+
+ static ThreadContinuousExecution
+ CreateHintedStartExecution(lldb::core_id_t core_id, lldb::tid_t tid,
+ uint64_t hinted_start, uint64_t end);
+
+ static ThreadContinuousExecution
+ CreateHintedEndExecution(lldb::core_id_t core_id, lldb::tid_t tid,
+ uint64_t start, uint64_t hinted_end);
+
+ static ThreadContinuousExecution
+ CreateOnlyEndExecution(lldb::core_id_t core_id, lldb::tid_t tid,
+ uint64_t end);
+
+ static ThreadContinuousExecution
+ CreateOnlyStartExecution(lldb::core_id_t core_id, lldb::tid_t tid,
+ uint64_t start);
+ /// \}
+
+ /// Comparator by TSCs
+ bool operator<(const ThreadContinuousExecution &o) const;
+
+private:
+ ThreadContinuousExecution(lldb::core_id_t core_id, lldb::tid_t tid)
+ : core_id(core_id), tid(tid) {}
+};
+
+/// Class used to decode a multi-core Intel PT trace. It assumes that each
+/// thread could have potentially been executed on different cores. It uses a
+/// context switch trace per CPU with timestamps to identify which thread owns
+/// each Intel PT decoded instruction and in which order. It also assumes that
+/// the Intel PT data and context switches might have gaps in their traces due
+/// to contention or race conditions.
+class TraceIntelPTMultiCoreDecoder {
+public:
+ /// \param[in] core_ids
+ /// The list of cores where the traced programs were running on.
+ ///
+ /// \param[in] tid
+ /// The full list of tids that were traced.
+ ///
+ /// \param[in] tsc_conversion
+ /// The conversion values for converting between nanoseconds and TSCs.
+ TraceIntelPTMultiCoreDecoder(
+ TraceIntelPT &trace, llvm::ArrayRef<lldb::core_id_t> core_ids,
+ llvm::ArrayRef<lldb::tid_t> tids,
+ const LinuxPerfZeroTscConversion &tsc_conversion);
+
+ /// \return
+ /// A \a DecodedThread for the \p thread by decoding its instructions on all
+ /// CPUs, sorted by TSCs.
+ DecodedThreadSP Decode(Thread &thread);
+
+ /// \return
+ /// \b true if the given \p tid is managed by this decoder, regardless of
+ /// whether there's tracing data associated to it or not.
+ bool TracesThread(lldb::tid_t tid) const;
+
+ /// \return
+ /// The number of continuous executions found for the given \p tid.
+ size_t GetNumContinuousExecutionsForThread(lldb::tid_t tid) const;
+
+ /// \return
+ /// The total number of continuous executions found across CPUs.
+ size_t GetTotalContinuousExecutionsCount() const;
+
+private:
+ /// Traverse the context switch traces and recover the continuous executions
+ /// by thread.
+ llvm::Error DecodeContextSwitchTraces();
+
+ TraceIntelPT &m_trace;
+ std::set<lldb::core_id_t> m_cores;
+ std::set<lldb::tid_t> m_tids;
+ llvm::Optional<
+ llvm::DenseMap<lldb::tid_t, std::vector<ThreadContinuousExecution>>>
+ m_continuous_executions_per_thread;
+ LinuxPerfZeroTscConversion m_tsc_conversion;
+ /// This variable will be non-None if a severe error happened during the setup
+ /// of the decoder.
+ llvm::Optional<std::string> m_setup_error;
+};
+
+} // namespace trace_intel_pt
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H
else
return HandleError(parsed_process.takeError());
}
+
return parsed_processes;
}
// Path to the raw Intel PT buffer for this core.
"contextSwitchTrace": string,
// Path to the raw perf_event_open context switch trace file for this core.
+ // The perf_event must have been configured with PERF_SAMPLE_TID and
+ // PERF_SAMPLE_TIME, as well as sample_id_all = 1.
}
],
"tscPerfZeroConversion"?: {
ArrayRef<uint8_t> data) {
std::basic_fstream<char> out_fs = std::fstream(
output_file.GetPath().c_str(), std::ios::out | std::ios::binary);
- out_fs.write(reinterpret_cast<const char *>(&data[0]),
- data.size() * sizeof(uint8_t));
+ if (!data.empty())
+ out_fs.write(reinterpret_cast<const char *>(&data[0]), data.size());
+
out_fs.close();
if (!out_fs)
return createStringError(inconvertibleErrorCode(),
return m_live_process->TraceGetState(GetPluginName());
}
-Optional<size_t> Trace::GetLiveThreadBinaryDataSize(lldb::tid_t tid,
- llvm::StringRef kind) {
+Optional<uint64_t> Trace::GetLiveThreadBinaryDataSize(lldb::tid_t tid,
+ llvm::StringRef kind) {
auto it = m_live_thread_data.find(tid);
if (it == m_live_thread_data.end())
return None;
- std::unordered_map<std::string, size_t> &single_thread_data = it->second;
+ std::unordered_map<std::string, uint64_t> &single_thread_data = it->second;
auto single_thread_data_it = single_thread_data.find(kind.str());
if (single_thread_data_it == single_thread_data.end())
return None;
return single_thread_data_it->second;
}
-Optional<size_t> Trace::GetLiveCoreBinaryDataSize(lldb::core_id_t core_id,
- llvm::StringRef kind) {
+Optional<uint64_t> Trace::GetLiveCoreBinaryDataSize(lldb::core_id_t core_id,
+ llvm::StringRef kind) {
auto it = m_live_core_data.find(core_id);
if (it == m_live_core_data.end())
return None;
- std::unordered_map<std::string, size_t> &single_core_data = it->second;
+ std::unordered_map<std::string, uint64_t> &single_core_data = it->second;
auto single_thread_data_it = single_core_data.find(kind.str());
if (single_thread_data_it == single_core_data.end())
return None;
return single_thread_data_it->second;
}
-Optional<size_t> Trace::GetLiveProcessBinaryDataSize(llvm::StringRef kind) {
+Optional<uint64_t> Trace::GetLiveProcessBinaryDataSize(llvm::StringRef kind) {
auto data_it = m_live_process_data.find(kind.str());
if (data_it == m_live_process_data.end())
return None;
if (!m_live_process)
return createStringError(inconvertibleErrorCode(),
"Tracing requires a live process.");
- llvm::Optional<size_t> size = GetLiveThreadBinaryDataSize(tid, kind);
+ llvm::Optional<uint64_t> size = GetLiveThreadBinaryDataSize(tid, kind);
if (!size)
return createStringError(
inconvertibleErrorCode(),
if (!m_live_process)
return createStringError(inconvertibleErrorCode(),
"Tracing requires a live process.");
- llvm::Optional<size_t> size = GetLiveCoreBinaryDataSize(core_id, kind);
+ llvm::Optional<uint64_t> size = GetLiveCoreBinaryDataSize(core_id, kind);
if (!size)
return createStringError(
inconvertibleErrorCode(),
if (!m_live_process)
return createStringError(inconvertibleErrorCode(),
"Tracing requires a live process.");
- llvm::Optional<size_t> size = GetLiveProcessBinaryDataSize(kind);
+ llvm::Optional<uint64_t> size = GetLiveProcessBinaryDataSize(kind);
if (!size)
return createStringError(
inconvertibleErrorCode(),
bool fromJSON(const json::Value &value, TraceCoreState &packet,
json::Path path) {
ObjectMapper o(value, path);
- int64_t core_id;
+ uint64_t core_id;
if (!o || !o.map("coreId", core_id) ||
!o.map("binaryData", packet.binary_data))
return false;
bool fromJSON(const json::Value &value, TraceGetBinaryDataRequest &packet,
Path path) {
ObjectMapper o(value, path);
- return o && o.map("type", packet.type) && o.map("kind", packet.kind) &&
- o.map("tid", packet.tid) && o.map("offset", packet.offset) &&
- o.map("size", packet.size) && o.map("coreId", packet.core_id);
+ Optional<uint64_t> core_id;
+ if (!o || !o.map("type", packet.type) || !o.map("kind", packet.kind) ||
+ !o.map("tid", packet.tid) || !o.map("offset", packet.offset) ||
+ !o.map("size", packet.size) || !o.map("coreId", core_id))
+ return false;
+
+ if (core_id)
+ packet.core_id = static_cast<lldb::core_id_t>(*core_id);
+ return true;
}
/// \}
return base;
}
-std::chrono::nanoseconds LinuxPerfZeroTscConversion::ToNanos(uint64_t tsc) {
+std::chrono::nanoseconds
+LinuxPerfZeroTscConversion::ToNanos(uint64_t tsc) const {
uint64_t quot = tsc >> time_shift;
uint64_t rem_flag = (((uint64_t)1 << time_shift) - 1);
uint64_t rem = tsc & rem_flag;
((rem * time_mult) >> time_shift)};
}
+uint64_t
+LinuxPerfZeroTscConversion::ToTSC(std::chrono::nanoseconds nanos) const {
+ uint64_t time = nanos.count() - time_zero;
+ uint64_t quot = time / time_mult;
+ uint64_t rem = time % time_mult;
+ return (quot << time_shift) + (rem << time_shift) / time_mult;
+}
+
json::Value toJSON(const LinuxPerfZeroTscConversion &packet) {
return json::Value(json::Object{
{"timeMult", packet.time_mult},
Total approximate memory usage (excluding raw trace): 1.27 KiB
Average memory usage per instruction (excluding raw trace): 61.76 bytes
- Timing:
- Decoding instructions: ''', '''s
+ Timing for this thread:
+ Decoding instructions: ''', '''
Events:
Number of instructions with events: 1
Total approximate memory usage (excluding raw trace): 1.27 KiB
Average memory usage per instruction (excluding raw trace): 61.76 bytes
- Timing:
- Decoding instructions: ''', '''s
+ Timing for this thread:
+ Decoding instructions: ''', '''
Events:
Number of instructions with events: 1
# We must have captured the context switch of when the target resumed
self.assertTrue(found_non_empty_context_switch)
+ self.expect("thread trace dump instructions", substrs=['unimplemented'])
self.traceStopProcess()