From c49d14aca5c7b06a4f896e9923bdab8a33f2866d Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Sun, 23 Oct 2022 10:04:46 -0700 Subject: [PATCH] [trace][intel pt] Simple detection of infinite decoding loops The low-level decoder might fall into an infinite decoding loop for various reasons, the simplest being an infinite direct loop reached due to wrong handling of self-modified code in the kernel, e.g. it might reach ``` 0x0A: pause 0x0C: jump to 0x0A ``` In this case, all the code is sequential and requires no packets to be decoded. The low-level decoder would produce an output like the following ``` 0x0A: pause 0x0C: jump to 0x0A 0x0A: pause 0x0C: jump to 0x0A 0x0A: pause 0x0C: jump to 0x0A ... infinite amount of times ``` These cases require stopping the decoder to avoid infinite work and signal this at least as a trace error. - Add a check that breaks decoding of a single PSB once 500k instructions have been decoded since the last packet was processed. - Add a check that looks for infinite loops after certain amount of instructions have been decoded since the last packet was processed. - Add some `settings` properties for tweaking the thresholds of the checks above. This is also nice because it does the basic work needed for future settings. - Add an AnomalyDetector class that inspects the DecodedThread and the libipt decoder in search for anomalies. These anomalies are then signaled as fatal errors in the trace. - Add an ErrorStats class that keeps track of all the errors in a DecodedThread, with a special counter for fatal errors. - Add an entry for decoded thread errors in the `dump info` command. Some notes are added in the code and in the documention of the settings, so please read them. Besides that, I haven't been unable to create a test case in LLVM style, but I've found an anomaly in the thread #12 of the trace 72533820-3eb8-4465-b8e4-4e6bf0ccca99 at Meta. We have to figure out how to artificially create traces with this kind of anomalies in LLVM style. With this change, that anomalous thread now shows: ``` (lldb)thread trace dump instructions 12 -e -i 23101 thread #12: tid = 8 ...missing instructions 23101: (error) anomalous trace: possible infinite loop detected of size 2 vmlinux-5.12.0-0_fbk8_clang_6656_gc85768aa64da`panic_smp_self_stop + 5 [inlined] rep_nop at processor.h:13:2 23100: 0xffffffff81342785 pause vmlinux-5.12.0-0_fbk8_clang_6656_gc85768aa64da`panic_smp_self_stop + 7 at panic.c:87:2 23099: 0xffffffff81342787 jmp 0xffffffff81342785 ; <+5> [inlined] rep_nop at processor.h:13:2 vmlinux-5.12.0-0_fbk8_clang_6656_gc85768aa64da`panic_smp_self_stop + 5 [inlined] rep_nop at processor.h:13:2 23098: 0xffffffff81342785 pause vmlinux-5.12.0-0_fbk8_clang_6656_gc85768aa64da`panic_smp_self_stop + 7 at panic.c:87:2 23097: 0xffffffff81342787 jmp 0xffffffff81342785 ; <+5> [inlined] rep_nop at processor.h:13:2 vmlinux-5.12.0-0_fbk8_clang_6656_gc85768aa64da`panic_smp_self_stop + 5 [inlined] rep_nop at processor.h:13:2 23096: 0xffffffff81342785 pause vmlinux-5.12.0-0_fbk8_clang_6656_gc85768aa64da`panic_smp_self_stop + 7 at panic.c:87:2 23095: 0xffffffff81342787 jmp 0xffffffff81342785 ; <+5> [inlined] rep_nop at processor.h:13:2 ``` It used to be in an infinite loop where the decoder never stopped. Besides that, the dump info command shows ``` (lldb) thread trace dump info 12 Errors: Number of individual errors: 32 Number of fatal errors: 1 Number of other errors: 31 ``` and in json format ``` (lldb) thread trace dump info 12 -j "errors": { "totalCount": 32, "libiptErrors": {}, "fatalErrors": 1, "otherErrors": 31 } ``` Differential Revision: https://reviews.llvm.org/D136557 --- lldb/include/lldb/Core/PluginManager.h | 7 +- lldb/source/Core/PluginManager.cpp | 18 +- lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt | 13 +- .../Plugins/Trace/intel-pt/DecodedThread.cpp | 37 +++- lldb/source/Plugins/Trace/intel-pt/DecodedThread.h | 62 ++++-- .../Plugins/Trace/intel-pt/LibiptDecoder.cpp | 211 ++++++++++++++++++++- .../source/Plugins/Trace/intel-pt/TraceIntelPT.cpp | 82 +++++++- lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h | 19 ++ .../Trace/intel-pt/TraceIntelPTProperties.td | 24 +++ lldb/test/API/commands/trace/TestTraceDumpInfo.py | 6 + lldb/test/API/commands/trace/TestTraceLoad.py | 12 ++ 11 files changed, 453 insertions(+), 38 deletions(-) create mode 100644 lldb/source/Plugins/Trace/intel-pt/TraceIntelPTProperties.td diff --git a/lldb/include/lldb/Core/PluginManager.h b/lldb/include/lldb/Core/PluginManager.h index ed8917f..4e036e0 100644 --- a/lldb/include/lldb/Core/PluginManager.h +++ b/lldb/include/lldb/Core/PluginManager.h @@ -342,7 +342,8 @@ public: llvm::StringRef name, llvm::StringRef description, TraceCreateInstanceFromBundle create_callback_from_bundle, TraceCreateInstanceForLiveProcess create_callback_for_live_process, - llvm::StringRef schema); + llvm::StringRef schema, + DebuggerInitializeCallback debugger_init_callback); static bool UnregisterPlugin(TraceCreateInstanceFromBundle create_callback); @@ -487,6 +488,10 @@ public: Debugger &debugger, const lldb::OptionValuePropertiesSP &properties_sp, ConstString description, bool is_global_property); + static bool CreateSettingForTracePlugin( + Debugger &debugger, const lldb::OptionValuePropertiesSP &properties_sp, + ConstString description, bool is_global_property); + static lldb::OptionValuePropertiesSP GetSettingForObjectFilePlugin(Debugger &debugger, ConstString setting_name); diff --git a/lldb/source/Core/PluginManager.cpp b/lldb/source/Core/PluginManager.cpp index ff0c1dc..05ca0ff 100644 --- a/lldb/source/Core/PluginManager.cpp +++ b/lldb/source/Core/PluginManager.cpp @@ -1051,9 +1051,10 @@ struct TraceInstance llvm::StringRef name, llvm::StringRef description, CallbackType create_callback_from_bundle, TraceCreateInstanceForLiveProcess create_callback_for_live_process, - llvm::StringRef schema) + llvm::StringRef schema, DebuggerInitializeCallback debugger_init_callback) : PluginInstance( - name, description, create_callback_from_bundle), + name, description, create_callback_from_bundle, + debugger_init_callback), schema(schema), create_callback_for_live_process(create_callback_for_live_process) {} @@ -1072,10 +1073,10 @@ bool PluginManager::RegisterPlugin( llvm::StringRef name, llvm::StringRef description, TraceCreateInstanceFromBundle create_callback_from_bundle, TraceCreateInstanceForLiveProcess create_callback_for_live_process, - llvm::StringRef schema) { + llvm::StringRef schema, DebuggerInitializeCallback debugger_init_callback) { return GetTracePluginInstances().RegisterPlugin( name, description, create_callback_from_bundle, - create_callback_for_live_process, schema); + create_callback_for_live_process, schema, debugger_init_callback); } bool PluginManager::UnregisterPlugin( @@ -1506,6 +1507,7 @@ CreateSettingForPlugin(Debugger &debugger, ConstString plugin_type_name, static const char *kDynamicLoaderPluginName("dynamic-loader"); static const char *kPlatformPluginName("platform"); static const char *kProcessPluginName("process"); +static const char *kTracePluginName("trace"); static const char *kObjectFilePluginName("object-file"); static const char *kSymbolFilePluginName("symbol-file"); static const char *kJITLoaderPluginName("jit-loader"); @@ -1559,6 +1561,14 @@ bool PluginManager::CreateSettingForProcessPlugin( properties_sp, description, is_global_property); } +bool PluginManager::CreateSettingForTracePlugin( + Debugger &debugger, const lldb::OptionValuePropertiesSP &properties_sp, + ConstString description, bool is_global_property) { + return CreateSettingForPlugin(debugger, ConstString(kTracePluginName), + ConstString("Settings for trace plug-ins"), + properties_sp, description, is_global_property); +} + lldb::OptionValuePropertiesSP PluginManager::GetSettingForObjectFilePlugin(Debugger &debugger, ConstString setting_name) { diff --git a/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt b/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt index 4148a02..90f36ed 100644 --- a/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt +++ b/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt @@ -13,6 +13,14 @@ lldb_tablegen(TraceIntelPTCommandOptions.inc -gen-lldb-option-defs SOURCE TraceIntelPTOptions.td TARGET TraceIntelPTOptionsGen) +lldb_tablegen(TraceIntelPTProperties.inc -gen-lldb-property-defs + SOURCE TraceIntelPTProperties.td + TARGET TraceIntelPTPropertiesGen) + +lldb_tablegen(TraceIntelPTPropertiesEnum.inc -gen-lldb-property-enum-defs + SOURCE TraceIntelPTProperties.td + TARGET TraceIntelPTPropertiesEnumGen) + add_lldb_library(lldbPluginTraceIntelPT PLUGIN CommandObjectTraceStartIntelPT.cpp DecodedThread.cpp @@ -38,4 +46,7 @@ add_lldb_library(lldbPluginTraceIntelPT PLUGIN ) -add_dependencies(lldbPluginTraceIntelPT TraceIntelPTOptionsGen) +add_dependencies(lldbPluginTraceIntelPT + TraceIntelPTOptionsGen + TraceIntelPTPropertiesGen + TraceIntelPTPropertiesEnumGen) diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp index 38a54ff..0a84906 100644 --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp @@ -170,6 +170,10 @@ DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index) { return prev(next_it)->second; } +uint64_t DecodedThread::GetTotalInstructionCount() const { + return m_insn_count; +} + void DecodedThread::AppendEvent(lldb::TraceEvent event) { CreateNewTraceItem(lldb::eTraceItemKindEvent).event = event; m_events_stats.RecordEvent(event); @@ -177,27 +181,25 @@ void DecodedThread::AppendEvent(lldb::TraceEvent event) { void DecodedThread::AppendInstruction(const pt_insn &insn) { CreateNewTraceItem(lldb::eTraceItemKindInstruction).load_address = insn.ip; + m_insn_count++; } void DecodedThread::AppendError(const IntelPTError &error) { CreateNewTraceItem(lldb::eTraceItemKindError).error = ConstString(error.message()).AsCString(); + m_error_stats.RecordError(/*fatal=*/false); } -void DecodedThread::AppendCustomError(StringRef err) { +void DecodedThread::AppendCustomError(StringRef err, bool fatal) { CreateNewTraceItem(lldb::eTraceItemKindError).error = ConstString(err).AsCString(); + m_error_stats.RecordError(fatal); } lldb::TraceEvent DecodedThread::GetEventByIndex(int item_index) const { return m_item_data[item_index].event; } -void DecodedThread::LibiptErrorsStats::RecordError(int libipt_error_code) { - libipt_errors_counts[pt_errstr(pt_errcode(libipt_error_code))]++; - total_count++; -} - const DecodedThread::EventsStats &DecodedThread::GetEventsStats() const { return m_events_stats; } @@ -207,6 +209,29 @@ void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event) { total_count++; } +uint64_t DecodedThread::ErrorStats::GetTotalCount() const { + uint64_t total = 0; + for (const auto &[kind, count] : libipt_errors) + total += count; + + return total + other_errors + fatal_errors; +} + +void DecodedThread::ErrorStats::RecordError(bool fatal) { + if (fatal) + fatal_errors++; + else + other_errors++; +} + +void DecodedThread::ErrorStats::RecordError(int libipt_error_code) { + libipt_errors[pt_errstr(pt_errcode(libipt_error_code))]++; +} + +const DecodedThread::ErrorStats &DecodedThread::GetErrorStats() const { + return m_error_stats; +} + lldb::TraceItemKind DecodedThread::GetItemKindByIndex(uint64_t item_index) const { return static_cast(m_item_kinds[item_index]); diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h index 4e84540..bbe0fb9 100644 --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h @@ -61,15 +61,6 @@ class DecodedThread : public std::enable_shared_from_this { public: using TSC = uint64_t; - // Struct holding counts for libipts errors; - struct LibiptErrorsStats { - // libipt error -> count - llvm::DenseMap libipt_errors_counts; - size_t total_count = 0; - - void RecordError(int libipt_error_code); - }; - /// A structure that represents a maximal range of trace items associated to /// the same TSC value. struct TSCRange { @@ -125,16 +116,38 @@ public: bool InRange(uint64_t item_index) const; }; - // Struct holding counts for events; + // Struct holding counts for events struct EventsStats { /// A count for each individual event kind. We use an unordered map instead /// of a DenseMap because DenseMap can't understand enums. - std::unordered_map events_counts; - size_t total_count = 0; + /// + /// Note: We can't use DenseMap because lldb::TraceEvent is not + /// automatically handled correctly by DenseMap. We'd need to implement a + /// custom DenseMapInfo struct for TraceEvent and that's a bit too much for + /// such a simple structure. + std::unordered_map events_counts; + uint64_t total_count = 0; void RecordEvent(lldb::TraceEvent event); }; + // Struct holding counts for errors + struct ErrorStats { + /// The following counters are mutually exclusive + /// \{ + uint64_t other_errors = 0; + uint64_t fatal_errors = 0; + // libipt error -> count + llvm::DenseMap libipt_errors; + /// \} + + uint64_t GetTotalCount() const; + + void RecordError(int libipt_error_code); + + void RecordError(bool fatal); + }; + DecodedThread( lldb::ThreadSP thread_sp, const llvm::Optional &tsc_conversion); @@ -194,12 +207,22 @@ public: /// The load address of the instruction at the given index. lldb::addr_t GetInstructionLoadAddress(uint64_t item_index) const; + /// \return + /// The number of instructions in this trace (not trace items). + uint64_t GetTotalInstructionCount() const; + /// Return an object with statistics of the trace events that happened. /// /// \return /// The stats object of all the events. const EventsStats &GetEventsStats() const; + /// Return an object with statistics of the trace errors that happened. + /// + /// \return + /// The stats object of all the events. + const ErrorStats &GetErrorStats() const; + /// The approximate size in bytes used by this instance, /// including all the already decoded instructions. size_t CalculateApproximateMemoryUsage() const; @@ -221,7 +244,14 @@ public: void AppendError(const IntelPTError &error); /// Append a custom decoding. - void AppendCustomError(llvm::StringRef error); + /// + /// \param[in] error + /// The error message. + /// + /// \param[in] fatal + /// If \b true, then the whole decoded thread should be discarded because a + /// fatal anomaly has been found. + void AppendCustomError(llvm::StringRef error, bool fatal = false); /// Append an event. void AppendEvent(lldb::TraceEvent); @@ -289,10 +319,16 @@ private: /// TSC -> nanos conversion utility. llvm::Optional m_tsc_conversion; + /// Statistics of all tracing errors. + ErrorStats m_error_stats; + /// Statistics of all tracing events. EventsStats m_events_stats; /// Total amount of time spent decoding. std::chrono::milliseconds m_total_decoding_time{0}; + + /// Total number of instructions in the trace. + uint64_t m_insn_count = 0; }; using DecodedThreadSP = std::shared_ptr; diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp index 9526ad8..10f0bbe 100644 --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp @@ -128,6 +128,182 @@ CreateQueryDecoder(TraceIntelPT &trace_intel_pt, ArrayRef buffer) { return PtQueryDecoderUP(decoder_ptr, QueryDecoderDeleter); } +/// Class used to identify anomalies in traces, which should often indicate a +/// fatal error in the trace. +class PSBBlockAnomalyDetector { +public: + PSBBlockAnomalyDetector(pt_insn_decoder &decoder, + TraceIntelPT &trace_intel_pt, + DecodedThread &decoded_thread) + : m_decoder(decoder), m_decoded_thread(decoded_thread) { + m_infinite_decoding_loop_threshold = + trace_intel_pt.GetGlobalProperties() + .GetInfiniteDecodingLoopVerificationThreshold(); + m_extremely_large_decoding_threshold = + trace_intel_pt.GetGlobalProperties() + .GetExtremelyLargeDecodingThreshold(); + m_next_infinite_decoding_loop_threshold = + m_infinite_decoding_loop_threshold; + } + + /// \return + /// An \a llvm::Error if an anomaly that includes the last instruction item + /// in the trace, or \a llvm::Error::success otherwise. + Error DetectAnomaly() { + RefreshPacketOffset(); + uint64_t insn_added_since_last_packet_offset = + m_decoded_thread.GetTotalInstructionCount() - + m_insn_count_at_last_packet_offset; + + // We want to check if we might have fallen in an infinite loop. As this + // check is not a no-op, we want to do it when we have a strong suggestion + // that things went wrong. First, we check how many instructions we have + // decoded since we processed an Intel PT packet for the last time. This + // number should be low, because at some point we should see branches, jumps + // or interrupts that require a new packet to be processed. Once we reach + // certain threshold we start analyzing the trace. + // + // We use the number of decoded instructions since the last Intel PT packet + // as a proxy because, in fact, we don't expect a single packet to give, + // say, 100k instructions. That would mean that there are 100k sequential + // instructions without any single branch, which is highly unlikely, or that + // we found an infinite loop using direct jumps, e.g. + // + // 0x0A: nop or pause + // 0x0C: jump to 0x0A + // + // which is indeed code that is found in the kernel. I presume we reach + // this kind of code in the decoder because we don't handle self-modified + // code in post-mortem kernel traces. + // + // We are right now only signaling the anomaly as a trace error, but it + // would be more conservative to also discard all the trace items found in + // this PSB. I prefer not to do that for the time being to give more + // exposure to this kind of anomalies and help debugging. Discarding the + // trace items would just make investigation harded. + // + // Finally, if the user wants to see if a specific thread has an anomaly, + // it's enough to run the `thread trace dump info` command and look for the + // count of this kind of errors. + + if (insn_added_since_last_packet_offset >= + m_extremely_large_decoding_threshold) { + // In this case, we have decoded a massive amount of sequential + // instructions that don't loop. Honestly I wonder if this will ever + // happen, but better safe than sorry. + return createStringError( + inconvertibleErrorCode(), + "anomalous trace: possible infinite trace detected"); + } + if (insn_added_since_last_packet_offset == + m_next_infinite_decoding_loop_threshold) { + if (Optional loop_size = TryIdentifyInfiniteLoop()) { + return createStringError( + inconvertibleErrorCode(), + "anomalous trace: possible infinite loop detected of size %" PRIu64, + *loop_size); + } + m_next_infinite_decoding_loop_threshold *= 2; + } + return Error::success(); + } + +private: + Optional TryIdentifyInfiniteLoop() { + // The infinite decoding loops we'll encounter are due to sequential + // instructions that repeat themselves due to direct jumps, therefore in a + // cycle each individual address will only appear once. We use this + // information to detect cycles by finding the last 2 ocurrences of the last + // instruction added to the trace. Then we traverse the trace making sure + // that these two instructions where the ends of a repeating loop. + + // This is a utility that returns the most recent instruction index given a + // position in the trace. If the given position is an instruction, that + // position is returned. It skips non-instruction items. + auto most_recent_insn_index = + [&](uint64_t item_index) -> Optional { + while (true) { + if (m_decoded_thread.GetItemKindByIndex(item_index) == + lldb::eTraceItemKindInstruction) { + return item_index; + } + if (item_index == 0) + return None; + item_index--; + } + return None; + }; + // Similar to most_recent_insn_index but skips the starting position. + auto prev_insn_index = [&](uint64_t item_index) -> Optional { + if (item_index == 0) + return None; + return most_recent_insn_index(item_index - 1); + }; + + // We first find the most recent instruction. + Optional last_insn_index_opt = + *prev_insn_index(m_decoded_thread.GetItemsCount()); + if (!last_insn_index_opt) + return None; + uint64_t last_insn_index = *last_insn_index_opt; + + // We then find the most recent previous occurrence of that last + // instruction. + Optional last_insn_copy_index = prev_insn_index(last_insn_index); + uint64_t loop_size = 1; + while (last_insn_copy_index && + m_decoded_thread.GetInstructionLoadAddress(*last_insn_copy_index) != + m_decoded_thread.GetInstructionLoadAddress(last_insn_index)) { + last_insn_copy_index = prev_insn_index(*last_insn_copy_index); + loop_size++; + } + if (!last_insn_copy_index) + return None; + + // Now we check if the segment between these last positions of the last + // instruction address is in fact a repeating loop. + uint64_t loop_elements_visited = 1; + uint64_t insn_index_a = last_insn_index, + insn_index_b = *last_insn_copy_index; + while (loop_elements_visited < loop_size) { + if (Optional prev = prev_insn_index(insn_index_a)) + insn_index_a = *prev; + else + return None; + if (Optional prev = prev_insn_index(insn_index_b)) + insn_index_b = *prev; + else + return None; + if (m_decoded_thread.GetInstructionLoadAddress(insn_index_a) != + m_decoded_thread.GetInstructionLoadAddress(insn_index_b)) + return None; + loop_elements_visited++; + } + return loop_size; + } + + // Refresh the internal counters if a new packet offset has been visited + void RefreshPacketOffset() { + lldb::addr_t new_packet_offset; + if (!IsLibiptError(pt_insn_get_offset(&m_decoder, &new_packet_offset)) && + new_packet_offset != m_last_packet_offset) { + m_last_packet_offset = new_packet_offset; + m_next_infinite_decoding_loop_threshold = + m_infinite_decoding_loop_threshold; + m_insn_count_at_last_packet_offset = + m_decoded_thread.GetTotalInstructionCount(); + } + } + + pt_insn_decoder &m_decoder; + DecodedThread &m_decoded_thread; + lldb::addr_t m_last_packet_offset = LLDB_INVALID_ADDRESS; + uint64_t m_insn_count_at_last_packet_offset = 0; + uint64_t m_infinite_decoding_loop_threshold; + uint64_t m_next_infinite_decoding_loop_threshold; + uint64_t m_extremely_large_decoding_threshold; +}; + /// Class that decodes a raw buffer for a single PSB block using the low level /// libipt library. It assumes that kernel and user mode instructions are not /// mixed in the same PSB block. @@ -155,9 +331,10 @@ public: /// appended to. It might have already some instructions. PSBBlockDecoder(PtInsnDecoderUP &&decoder_up, const PSBBlock &psb_block, Optional next_block_ip, - DecodedThread &decoded_thread) + DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt) : m_decoder_up(std::move(decoder_up)), m_psb_block(psb_block), - m_next_block_ip(next_block_ip), m_decoded_thread(decoded_thread) {} + m_next_block_ip(next_block_ip), m_decoded_thread(decoded_thread), + m_anomaly_detector(*m_decoder_up, trace_intel_pt, decoded_thread) {} /// \param[in] trace_intel_pt /// The main Trace object that own the PSB block. @@ -192,7 +369,7 @@ public: return decoder_up.takeError(); return PSBBlockDecoder(std::move(*decoder_up), psb_block, next_block_ip, - decoded_thread); + decoded_thread, trace_intel_pt); } void DecodePSBBlock() { @@ -213,12 +390,24 @@ public: } private: - /// Decode all the instructions and events of the given PSB block. - /// - /// \param[in] status - /// The status that was result of synchronizing to the most recent PSB. + /// Append an instruction and return \b false if and only if a serious anomaly + /// has been detected. + bool AppendInstructionAndDetectAnomalies(const pt_insn &insn) { + m_decoded_thread.AppendInstruction(insn); + + if (Error err = m_anomaly_detector.DetectAnomaly()) { + m_decoded_thread.AppendCustomError(toString(std::move(err)), + /*fatal=*/true); + return false; + } + return true; + } + /// Decode all the instructions and events of the given PSB block. The + /// decoding loop might stop abruptly if an infinite decoding loop is + /// detected. void DecodeInstructionsAndEvents(int status) { pt_insn insn; + while (true) { status = ProcessPTEvents(status); @@ -238,7 +427,9 @@ private: } else if (IsEndOfStream(status)) { break; } - m_decoded_thread.AppendInstruction(insn); + + if (!AppendInstructionAndDetectAnomalies(insn)) + return; } // We need to keep querying non-branching instructions until we hit the @@ -247,7 +438,8 @@ private: // https://github.com/intel/libipt/blob/master/doc/howto_libipt.md#parallel-decode if (m_next_block_ip && insn.ip != 0) { while (insn.ip != *m_next_block_ip) { - m_decoded_thread.AppendInstruction(insn); + if (!AppendInstructionAndDetectAnomalies(insn)) + return; status = pt_insn_next(m_decoder_up.get(), &insn, sizeof(insn)); @@ -313,6 +505,7 @@ private: PSBBlock m_psb_block; Optional m_next_block_ip; DecodedThread &m_decoded_thread; + PSBBlockAnomalyDetector m_anomaly_detector; }; Error lldb_private::trace_intel_pt::DecodeSingleTraceForThread( diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp index a732d4d..1782675 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp @@ -16,6 +16,7 @@ #include "TraceIntelPTBundleSaver.h" #include "TraceIntelPTConstants.h" #include "lldb/Core/PluginManager.h" +#include "lldb/Interpreter/OptionValueProperties.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" #include "llvm/ADT/None.h" @@ -39,11 +40,57 @@ TraceIntelPT::GetThreadTraceStartCommand(CommandInterpreter &interpreter) { new CommandObjectThreadTraceStartIntelPT(*this, interpreter)); } +#define LLDB_PROPERTIES_traceintelpt +#include "TraceIntelPTProperties.inc" + +enum { +#define LLDB_PROPERTIES_traceintelpt +#include "TraceIntelPTPropertiesEnum.inc" +}; + +ConstString TraceIntelPT::PluginProperties::GetSettingName() { + return ConstString(TraceIntelPT::GetPluginNameStatic()); +} + +TraceIntelPT::PluginProperties::PluginProperties() : Properties() { + m_collection_sp = std::make_shared(GetSettingName()); + m_collection_sp->Initialize(g_traceintelpt_properties); +} + +uint64_t +TraceIntelPT::PluginProperties::GetInfiniteDecodingLoopVerificationThreshold() { + const uint32_t idx = ePropertyInfiniteDecodingLoopVerificationThreshold; + return m_collection_sp->GetPropertyAtIndexAsUInt64( + nullptr, idx, g_traceintelpt_properties[idx].default_uint_value); +} + +uint64_t TraceIntelPT::PluginProperties::GetExtremelyLargeDecodingThreshold() { + const uint32_t idx = ePropertyExtremelyLargeDecodingThreshold; + return m_collection_sp->GetPropertyAtIndexAsUInt64( + nullptr, idx, g_traceintelpt_properties[idx].default_uint_value); +} + +TraceIntelPT::PluginProperties &TraceIntelPT::GetGlobalProperties() { + static TraceIntelPT::PluginProperties g_settings; + return g_settings; +} + void TraceIntelPT::Initialize() { - PluginManager::RegisterPlugin(GetPluginNameStatic(), "Intel Processor Trace", - CreateInstanceForTraceBundle, - CreateInstanceForLiveProcess, - TraceIntelPTBundleLoader::GetSchema()); + PluginManager::RegisterPlugin( + GetPluginNameStatic(), "Intel Processor Trace", + CreateInstanceForTraceBundle, CreateInstanceForLiveProcess, + TraceIntelPTBundleLoader::GetSchema(), DebuggerInitialize); +} + +void TraceIntelPT::DebuggerInitialize(Debugger &debugger) { + if (!PluginManager::GetSettingForProcessPlugin( + debugger, PluginProperties::GetSettingName())) { + const bool is_global_setting = true; + PluginManager::CreateSettingForTracePlugin( + debugger, GetGlobalProperties().GetValueProperties(), + ConstString("Properties for the intel-pt trace plug-in."), + is_global_setting); + } } void TraceIntelPT::Terminate() { @@ -273,6 +320,20 @@ void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose, event_to_count.second); } } + // Trace error stats + { + const DecodedThread::ErrorStats &error_stats = + decoded_thread_sp->GetErrorStats(); + s << "\n Errors:\n"; + s.Format(" Number of individual errors: {0}\n", + error_stats.GetTotalCount()); + s.Format(" Number of fatal errors: {0}\n", error_stats.fatal_errors); + for (const auto &[kind, count] : error_stats.libipt_errors) { + s.Format(" Number of libipt errors of kind [{0}]: {1}\n", kind, + count); + } + s.Format(" Number of other errors: {0}\n", error_stats.other_errors); + } if (storage.multicpu_decoder) { s << "\n Multi-cpu decoding:\n"; @@ -353,6 +414,19 @@ void TraceIntelPT::DumpTraceInfoAsJson(Thread &thread, Stream &s, } }); }); + // Trace error stats + const DecodedThread::ErrorStats &error_stats = + decoded_thread_sp->GetErrorStats(); + json_str.attributeObject("errors", [&] { + json_str.attribute("totalCount", error_stats.GetTotalCount()); + json_str.attributeObject("libiptErrors", [&] { + for (const auto &[kind, count] : error_stats.libipt_errors) { + json_str.attribute(kind, count); + } + }); + json_str.attribute("fatalErrors", error_stats.fatal_errors); + json_str.attribute("otherErrors", error_stats.other_errors); + }); if (storage.multicpu_decoder) { json_str.attribute( diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h index 542cad0..f671183 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h @@ -22,6 +22,23 @@ namespace trace_intel_pt { class TraceIntelPT : public Trace { public: + /// Properties to be used with the `settings` command. + class PluginProperties : public Properties { + public: + static ConstString GetSettingName(); + + PluginProperties(); + + ~PluginProperties() override = default; + + uint64_t GetInfiniteDecodingLoopVerificationThreshold(); + + uint64_t GetExtremelyLargeDecodingThreshold(); + }; + + /// Return the global properties for this trace plug-in. + static PluginProperties &GetGlobalProperties(); + void Dump(Stream *s) const override; llvm::Expected SaveToDisk(FileSpec directory, @@ -59,6 +76,8 @@ public: CreateInstanceForLiveProcess(Process &process); static llvm::StringRef GetPluginNameStatic() { return "intel-pt"; } + + static void DebuggerInitialize(Debugger &debugger); /// \} lldb::CommandObjectSP diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTProperties.td b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTProperties.td new file mode 100644 index 0000000..d338df1 --- /dev/null +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTProperties.td @@ -0,0 +1,24 @@ +include "../../../../include/lldb/Core/PropertiesBase.td" + +let Definition = "traceintelpt" in { + def InfiniteDecodingLoopVerificationThreshold: + Property<"infinite-decoding-loop-verification-threshold", "UInt64">, + Global, + DefaultUnsignedValue<10000>, + Desc<"Specify how many instructions following an individual Intel PT " + "packet must have been decoded before triggering the verification of " + "infinite decoding loops. If no decoding loop has been found after this " + "threshold T, another attempt will be done after 2T instructions, then " + "4T, 8T and so on, which guarantees a total linear time spent checking " + "this anomaly. If a loop is found, then decoding of the corresponding " + "PSB block is stopped. An error is hence emitted in the trace and " + "decoding is resumed in the next PSB block.">; + def ExtremelyLargeDecodingThreshold: + Property<"extremely-large-decoding-threshold", "UInt64">, + Global, + DefaultUnsignedValue<500000>, + Desc<"Specify how many instructions following an individual Intel PT " + "packet must have been decoded before stopping the decoding of the " + "corresponding PSB block. An error is hence emitted in the trace and " + "decoding is resumed in the next PSB block.">; +} diff --git a/lldb/test/API/commands/trace/TestTraceDumpInfo.py b/lldb/test/API/commands/trace/TestTraceDumpInfo.py index 1407cb3..c6a62c7 100644 --- a/lldb/test/API/commands/trace/TestTraceDumpInfo.py +++ b/lldb/test/API/commands/trace/TestTraceDumpInfo.py @@ -78,6 +78,12 @@ class TestTraceDumpInfo(TraceIntelPTTestCaseBase): "software disabled tracing": 2, "trace synchronization point": 1 } + }, + "errors": { + "totalCount": 0, + "libiptErrors": {}, + "fatalErrors": 0, + "otherErrors": 0 } }, "globalStats": { diff --git a/lldb/test/API/commands/trace/TestTraceLoad.py b/lldb/test/API/commands/trace/TestTraceLoad.py index e55d680..67a8f56 100644 --- a/lldb/test/API/commands/trace/TestTraceLoad.py +++ b/lldb/test/API/commands/trace/TestTraceLoad.py @@ -37,6 +37,12 @@ class TestTraceLoad(TraceIntelPTTestCaseBase): "totalCount": 0, "individualCounts": {} }, + "errors": { + "totalCount": 0, + "libiptErrors": {}, + "fatalErrors": 0, + "otherErrors": 0 + }, "continuousExecutions": 0, "PSBBlocks": 0 }, @@ -72,6 +78,12 @@ class TestTraceLoad(TraceIntelPTTestCaseBase): "HW clock tick": 8 } }, + "errors": { + "totalCount": 1, + "libiptErrors": {}, + "fatalErrors": 0, + "otherErrors": 1 + }, "continuousExecutions": 1, "PSBBlocks": 1 }, -- 2.7.4