[lldb][trace] Add a basic function call dump [2] - Implement the reconstruction algorithm

author Walter Erquinigo <wallace@fb.com>

Mon, 10 Oct 2022 19:57:13 +0000 (12:57 -0700)

committer Walter Erquinigo <wallace@fb.com>

Tue, 18 Oct 2022 20:57:53 +0000 (13:57 -0700)
author Walter Erquinigo <wallace@fb.com>
Mon, 10 Oct 2022 19:57:13 +0000 (12:57 -0700)
committer Walter Erquinigo <wallace@fb.com>
Tue, 18 Oct 2022 20:57:53 +0000 (13:57 -0700)
diff --git a/lldb/include/lldb/Target/TraceDumper.h b/lldb/include/lldb/Target/TraceDumper.h

index 2936cdc..bcbe8e5 100644 (file)
--- a/lldb/include/lldb/Target/TraceDumper.h
+++ b/lldb/include/lldb/Target/TraceDumper.h
@@ -6,9 +6,9 @@
  //
  //===----------------------------------------------------------------------===//
  
-#include "lldb/Target/TraceCursor.h"
-
  #include "lldb/Symbol/SymbolContext.h"
+#include "lldb/Target/TraceCursor.h"
+#include <optional>
  
  #ifndef LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
  #define LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
@@ -73,6 +73,304 @@ public:
      llvm::Optional<lldb::cpu_id_t> cpu_id;
    };
  
+  /// An object representing a traced function call.
+  ///
+  /// A function call is represented using segments and subcalls.
+  ///
+  /// TracedSegment:
+  ///   A traced segment is a maximal list of consecutive traced instructions
+  ///   that belong to the same function call. A traced segment will end in
+  ///   three possible ways:
+  ///     - With a call to a function deeper in the callstack. In this case,
+  ///     most of the times this nested call will return
+  ///       and resume with the next segment of this segment's owning function
+  ///       call. More on this later.
+  ///     - Abruptly due to end of trace. In this case, we weren't able to trace
+  ///     the end of this function call.
+  ///     - Simply a return higher in the callstack.
+  ///
+  ///   In terms of implementation details, as segment can be represented with
+  ///   the beginning and ending instruction IDs from the instruction trace.
+  ///
+  ///  UntracedSegment:
+  ///   It might happen that we didn't trace the beginning of a function and we
+  ///   saw it for the first time as part of a return. As a way to signal these
+  ///   cases, we have a placeholder UntracedSegment class that completes the
+  ///   callgraph.
+  ///
+  ///  Example:
+  ///   We might have this piece of execution:
+  ///
+  ///     main() [offset 0x00 to 0x20] [traced instruction ids 1 to 4]
+  ///       foo()  [offset 0x00 to 0x80] [traced instruction ids 5 to 20] # main
+  ///       invoked foo
+  ///     main() [offset 0x24 to 0x40] [traced instruction ids 21 to 30]
+  ///
+  ///   In this case, our function main invokes foo. We have 3 segments: main
+  ///   [offset 0x00 to 0x20], foo() [offset 0x00 to 0x80], and main() [offset
+  ///   0x24 to 0x40]. We also have the instruction ids from the corresponding
+  ///   linear instruction trace for each segment.
+  ///
+  ///   But what if we started tracing since the middle of foo? Then we'd have
+  ///   an incomplete trace
+  ///
+  ///       foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10]
+  ///     main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20]
+  ///
+  ///   Notice that we changed the instruction ids because this is a new trace.
+  ///   Here, in order to have a somewhat complete tree with good traversal
+  ///   capabilities, we can create an UntracedSegment to signal the portion of
+  ///   main() that we didn't trace. We don't know if this segment was in fact
+  ///   multiple segments with many function calls. We'll never know. The
+  ///   resulting tree looks like the following:
+  ///
+  ///     main() [untraced]
+  ///       foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10]
+  ///     main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20]
+  ///
+  ///   And in pseudo-code:
+  ///
+  ///     FunctionCall [
+  ///       UntracedSegment {
+  ///         symbol: main()
+  ///         nestedCall: FunctionCall [ # this untraced segment has a nested
+  ///         call
+  ///           TracedSegment {
+  ///             symbol: foo()
+  ///             fromInstructionId: 1
+  ///             toInstructionId: 10
+  ///             nestedCall: none # this doesn't have a nested call
+  ///           }
+  ///         }
+  ///       ],
+  ///       TracedSegment {
+  ///         symbol: main()
+  ///         fromInstructionId: 11
+  ///         toInstructionId: 20
+  ///         nestedCall: none # this also doesn't have a nested call
+  ///       }
+  ///   ]
+  ///
+  ///   We can see the nested structure and how instructions are represented as
+  ///   segments.
+  ///
+  ///
+  ///   Returns:
+  ///     Code doesn't always behave intuitively. Some interesting functions
+  ///     might modify the stack and thus change the behavior of common
+  ///     instructions like CALL and RET. We try to identify these cases, and
+  ///     the result is that the return edge from a segment might connect with a
+  ///     function call very high the stack. For example, you might have
+  ///
+  ///     main()
+  ///       foo()
+  ///         bar()
+  ///         # here bar modifies the stack and pops foo() from it. Then it
+  ///         finished the a RET (return)
+  ///     main() # we came back directly to main()
+  ///
+  ///     I have observed some trampolines doing this, as well as some std
+  ///     functions (like ostream functions). So consumers should be aware of
+  ///     this.
+  ///
+  ///     There are all sorts of "abnormal" behaviors you can see in code, and
+  ///     whenever we fail at identifying what's going on, we prefer to create a
+  ///     new tree.
+  ///
+  ///   Function call forest:
+  ///     A single tree would suffice if a trace didn't contain errors nor
+  ///     abnormal behaviors that made our algorithms fail. Sadly these
+  ///     anomalies exist and we prefer not to use too many heuristics and
+  ///     probably end up lying to the user. So we create a new tree from the
+  ///     point we can't continue using the previous tree. This results in
+  ///     having a forest instead of a single tree. This is probably the best we
+  ///     can do if we consumers want to use this data to perform performance
+  ///     analysis or reverse debugging.
+  ///
+  ///   Non-functions:
+  ///     Not everything in a program is a function. There are blocks of
+  ///     instructions that are simply labeled or even regions without symbol
+  ///     information that we don't what they are. We treat all of them as
+  ///     functions for simplicity.
+  ///
+  ///   Errors:
+  ///     Whenever an error is found, a new tree with a single segment is
+  ///     created. All consecutive errors after the original one are then
+  ///     appended to this segment. As a note, something that GDB does is to use
+  ///     some heuristics to merge trees that were interrupted by errors. We are
+  ///     leaving that out of scope until a feature like that one is really
+  ///     needed.
+
+  /// Forward declaration
+  class FunctionCall;
+  using FunctionCallUP = std::unique_ptr<FunctionCall>;
+
+  class FunctionCall {
+  public:
+    class TracedSegment {
+    public:
+      /// \param[in] cursor_sp
+      ///   A cursor pointing to the beginning of the segment.
+      ///
+      /// \param[in] symbol_info
+      ///   The symbol information of the first instruction of the segment.
+      ///
+      /// \param[in] call
+      ///   The FunctionCall object that owns this segment.
+      TracedSegment(const lldb::TraceCursorSP &cursor_sp,
+                    const SymbolInfo &symbol_info, FunctionCall &owning_call)
+          : m_first_insn_id(cursor_sp->GetId()),
+            m_last_insn_id(cursor_sp->GetId()),
+            m_first_symbol_info(symbol_info), m_last_symbol_info(symbol_info),
+            m_owning_call(owning_call) {}
+
+      /// \return
+      ///   The chronologically first instruction ID in this segment.
+      lldb::user_id_t GetFirstInstructionID() const;
+      /// \return
+      ///   The chronologically last instruction ID in this segment.
+      lldb::user_id_t GetLastInstructionID() const;
+
+      /// \return
+      ///   The symbol information of the chronologically first instruction ID
+      ///   in this segment.
+      const SymbolInfo &GetFirstInstructionSymbolInfo() const;
+
+      /// \return
+      ///   The symbol information of the chronologically last instruction ID in
+      ///   this segment.
+      const SymbolInfo &GetLastInstructionSymbolInfo() const;
+
+      /// \return
+      ///   Get the call that owns this segment.
+      const FunctionCall &GetOwningCall() const;
+
+      /// Append a new instruction to this segment.
+      ///
+      /// \param[in] cursor_sp
+      ///   A cursor pointing to the new instruction.
+      ///
+      /// \param[in] symbol_info
+      ///   The symbol information of the new instruction.
+      void AppendInsn(const lldb::TraceCursorSP &cursor_sp,
+                      const SymbolInfo &symbol_info);
+
+      /// Create a nested call at the end of this segment.
+      ///
+      /// \param[in] cursor_sp
+      ///   A cursor pointing to the first instruction of the nested call.
+      ///
+      /// \param[in] symbol_info
+      ///   The symbol information of the first instruction of the nested call.
+      FunctionCall &CreateNestedCall(const lldb::TraceCursorSP &cursor_sp,
+                                     const SymbolInfo &symbol_info);
+
+      /// Executed the given callback if there's a nested call at the end of
+      /// this segment.
+      void IfNestedCall(std::function<void(const FunctionCall &function_call)>
+                            callback) const;
+
+    private:
+      TracedSegment(const TracedSegment &) = delete;
+      TracedSegment &operator=(TracedSegment const &);
+
+      /// Delimiting instruction IDs taken chronologically.
+      /// \{
+      lldb::user_id_t m_first_insn_id;
+      lldb::user_id_t m_last_insn_id;
+      /// \}
+      /// An optional nested call starting at the end of this segment.
+      FunctionCallUP m_nested_call;
+      /// The symbol information of the delimiting instructions
+      /// \{
+      SymbolInfo m_first_symbol_info;
+      SymbolInfo m_last_symbol_info;
+      /// \}
+      FunctionCall &m_owning_call;
+    };
+
+    class UntracedSegment {
+    public:
+      /// Note: Untraced segments can only exist if have also seen a traced
+      /// segment of the same function call. Thus, we can use those traced
+      /// segments if we want symbol information and such.
+
+      UntracedSegment(FunctionCallUP &&nested_call)
+          : m_nested_call(std::move(nested_call)) {}
+
+      const FunctionCall &GetNestedCall() const;
+
+    private:
+      UntracedSegment(const UntracedSegment &) = delete;
+      UntracedSegment &operator=(UntracedSegment const &);
+      FunctionCallUP m_nested_call;
+    };
+
+    /// Create a new function call given an instruction. This will also create a
+    /// segment for that instruction.
+    ///
+    /// \param[in] cursor_sp
+    ///   A cursor pointing to the first instruction of that function call.
+    ///
+    /// \param[in] symbol_info
+    ///   The symbol information of that first instruction.
+    FunctionCall(const lldb::TraceCursorSP &cursor_sp,
+                 const SymbolInfo &symbol_info);
+
+    /// Append a new traced segment to this funciton call.
+    ///
+    /// \param[in] cursor_sp
+    ///   A cursor pointing to the first instruction of the new segment.
+    ///
+    /// \param[in] symbol_info
+    ///   The symbol information of that first instruction.
+    void AppendSegment(const lldb::TraceCursorSP &cursor_sp,
+                       const SymbolInfo &symbol_info);
+
+    /// \return
+    ///   The symbol info of some traced instruction of this call.
+    const SymbolInfo &GetSymbolInfo() const;
+
+    /// \return
+    ///   \b true if and only if the instructions in this function call are
+    ///   trace errors, in which case this function call is a fake one.
+    bool IsError() const;
+
+    /// \return
+    ///   The list of traced segments of this call.
+    const std::deque<TracedSegment> &GetTracedSegments() const;
+
+    /// \return
+    ///   A non-const reference to the most-recent traced segment.
+    TracedSegment &GetLastTracedSegment();
+
+    /// Create an untraced segment for this call that jumps to the provided
+    /// nested call.
+    void SetUntracedSegment(FunctionCallUP &&nested_call);
+
+    /// \return
+    ///   A optional to the untraced segment of this call.
+    const llvm::Optional<UntracedSegment> &GetUntracedSegment() const;
+
+    /// \return
+    ///   A pointer to the parent call. It may be \b nullptr.
+    FunctionCall *GetParentCall() const;
+
+    void SetParentCall(FunctionCall &parent_call);
+
+  private:
+    /// An optional untraced segment that precedes all the traced segments.
+    llvm::Optional<UntracedSegment> m_untraced_segment;
+    /// The traced segments in order. We used a deque to prevent moving these
+    /// objects when appending to the list, which would happen with vector.
+    std::deque<TracedSegment> m_traced_segments;
+    /// The parent call, which might be null. Useful for reconstructing
+    /// callstacks.
+    FunctionCall *m_parent_call = nullptr;
+    /// Whether this call represents a list of consecutive errors.
+    bool m_is_error;
+  };
+
    /// Interface used to abstract away the format in which the instruction
    /// information will be dumped.
    class OutputWriter {
@@ -84,6 +382,10 @@ public:
  
      /// Dump a trace item (instruction, error or event).
      virtual void TraceItem(const TraceItem &item) = 0;
+
+    /// Dump a function call forest.
+    virtual void FunctionCallForest(const std::vector<FunctionCallUP> &forest) {
+    }
    };
  
    /// Create a instruction dumper for the cursor.
@@ -113,6 +415,9 @@ public:
    ///     if no instructions were visited.
    llvm::Optional<lldb::user_id_t> DumpInstructions(size_t count);
  
+  /// Dump all function calls forwards chronologically and hierarchically
+  void DumpFunctionCalls();
+
  private:
    /// Create a trace item for the current position without symbol information.
    TraceItem CreatRawTraceItem();
diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp

index ae2a8ed..f7a571d 100644 (file)
--- a/lldb/source/Commands/CommandObjectThread.cpp
+++ b/lldb/source/Commands/CommandObjectThread.cpp
@@ -2128,11 +2128,11 @@ public:
  
        switch (short_option) {
        case 'j': {
-        m_json = true;
+        m_dumper_options.json = true;
          break;
        }
        case 'J': {
-        m_pretty_json = true;
+        m_dumper_options.pretty_print_json = true;
          break;
        }
        case 'F': {
@@ -2146,8 +2146,7 @@ public:
      }
  
      void OptionParsingStarting(ExecutionContext *execution_context) override {
-      m_json = false;
-      m_pretty_json = false;
+      m_dumper_options = {};
        m_output_file = llvm::None;
      }
  
@@ -2158,8 +2157,7 @@ public:
      static const size_t kDefaultCount = 20;
  
      // Instance variables to hold the values for command options.
-    bool m_json;
-    bool m_pretty_json;
+    TraceDumperOptions m_dumper_options;
      llvm::Optional<FileSpec> m_output_file;
    };
  
@@ -2187,10 +2185,30 @@ protected:
        result.AppendError("invalid thread\n");
        return false;
      }
-    result.AppendMessageWithFormatv(
-        "json = {0}, pretty_json = {1}, file = {2}, thread = {3}",
-        m_options.m_json, m_options.m_pretty_json, !!m_options.m_output_file,
-        thread_sp->GetID());
+
+    llvm::Expected<TraceCursorSP> cursor_or_error =
+        m_exe_ctx.GetTargetSP()->GetTrace()->CreateNewCursor(*thread_sp);
+
+    if (!cursor_or_error) {
+      result.AppendError(llvm::toString(cursor_or_error.takeError()));
+      return false;
+    }
+    TraceCursorSP &cursor_sp = *cursor_or_error;
+
+    llvm::Optional<StreamFile> out_file;
+    if (m_options.m_output_file) {
+      out_file.emplace(m_options.m_output_file->GetPath().c_str(),
+                       File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate,
+                       lldb::eFilePermissionsFileDefault);
+    }
+
+    m_options.m_dumper_options.forwards = true;
+
+    TraceDumper dumper(std::move(cursor_sp),
+                       out_file ? *out_file : result.GetOutputStream(),
+                       m_options.m_dumper_options);
+
+    dumper.DumpFunctionCalls();
      return true;
    }
  
diff --git a/lldb/source/Target/TraceDumper.cpp b/lldb/source/Target/TraceDumper.cpp

index 152feb6..2dbcb49 100644 (file)
--- a/lldb/source/Target/TraceDumper.cpp
+++ b/lldb/source/Target/TraceDumper.cpp
@@ -7,7 +7,6 @@
  //===----------------------------------------------------------------------===//
  
  #include "lldb/Target/TraceDumper.h"
-
  #include "lldb/Core/Module.h"
  #include "lldb/Symbol/CompileUnit.h"
  #include "lldb/Symbol/Function.h"
@@ -26,15 +25,20 @@ static Optional<const char *> ToOptionalString(const char *s) {
      return None;
    return s;
  }
+
+static const char *GetModuleName(const SymbolContext &sc) {
+  if (!sc.module_sp)
+    return nullptr;
+  return sc.module_sp->GetFileSpec().GetFilename().AsCString();
+}
+
  /// \return
  ///   The module name (basename if the module is a file, or the actual name if
  ///   it's a virtual module), or \b nullptr if no name nor module was found.
  static const char *GetModuleName(const TraceDumper::TraceItem &item) {
-  if (!item.symbol_info || !item.symbol_info->sc.module_sp)
+  if (!item.symbol_info)
      return nullptr;
-  return item.symbol_info->sc.module_sp->GetFileSpec()
-      .GetFilename()
-      .AsCString();
+  return GetModuleName(item.symbol_info->sc);
  }
  
  // This custom LineEntry validator is neded because some line_entries have
@@ -64,10 +68,12 @@ static bool FileLineAndColumnMatches(const LineEntry &a, const LineEntry &b) {
  ///       - module
  ///       - symbol
  ///       - function
-///       - line
+///       - inlined function
+///       - source line info
  static bool
  IsSameInstructionSymbolContext(const TraceDumper::SymbolInfo &prev_insn,
-                               const TraceDumper::SymbolInfo &insn) {
+                               const TraceDumper::SymbolInfo &insn,
+                               bool check_source_line_info = true) {
    // module checks
    if (insn.sc.module_sp != prev_insn.sc.module_sp)
      return false;
@@ -78,11 +84,23 @@ IsSameInstructionSymbolContext(const TraceDumper::SymbolInfo &prev_insn,
  
    // function checks
    if (!insn.sc.function && !prev_insn.sc.function)
-    return true;
+    return true; // This means two dangling instruction in the same module. We
+                 // can assume they are part of the same unnamed symbol
    else if (insn.sc.function != prev_insn.sc.function)
      return false;
  
+  Block *inline_block_a =
+      insn.sc.block ? insn.sc.block->GetContainingInlinedBlock() : nullptr;
+  Block *inline_block_b = prev_insn.sc.block
+                              ? prev_insn.sc.block->GetContainingInlinedBlock()
+                              : nullptr;
+  if (inline_block_a != inline_block_b)
+    return false;
+
    // line entry checks
+  if (!check_source_line_info)
+    return true;
+
    const bool curr_line_valid = IsLineEntryValid(insn.sc.line_entry);
    const bool prev_line_valid = IsLineEntryValid(prev_insn.sc.line_entry);
    if (curr_line_valid && prev_line_valid)
@@ -100,6 +118,14 @@ public:
  
    void NoMoreData() override { m_s << "    no more data\n"; }
  
+  void FunctionCallForest(
+      const std::vector<TraceDumper::FunctionCallUP> &forest) override {
+    for (size_t i = 0; i < forest.size(); i++) {
+      m_s.Format("\n[call tree #{0}]\n", i);
+      DumpFunctionCallTree(*forest[i]);
+    }
+  }
+
    void TraceItem(const TraceDumper::TraceItem &item) override {
      if (item.symbol_info) {
        if (!item.prev_symbol_info ||
@@ -174,6 +200,78 @@ public:
    }
  
  private:
+  void
+  DumpSegmentContext(const TraceDumper::FunctionCall::TracedSegment &segment) {
+    if (segment.GetOwningCall().IsError()) {
+      m_s << "<tracing errors>";
+      return;
+    }
+
+    const SymbolContext &first_sc = segment.GetFirstInstructionSymbolInfo().sc;
+    first_sc.DumpStopContext(
+        &m_s, segment.GetFirstInstructionSymbolInfo().exe_ctx.GetTargetPtr(),
+        segment.GetFirstInstructionSymbolInfo().address,
+        /*show_fullpaths=*/false,
+        /*show_module=*/true, /*show_inlined_frames=*/false,
+        /*show_function_arguments=*/true,
+        /*show_function_name=*/true);
+    m_s << " to ";
+    const SymbolContext &last_sc = segment.GetLastInstructionSymbolInfo().sc;
+    if (IsLineEntryValid(first_sc.line_entry) &&
+        IsLineEntryValid(last_sc.line_entry)) {
+      m_s.Format("{0}:{1}", last_sc.line_entry.line, last_sc.line_entry.column);
+    } else {
+      last_sc.DumpStopContext(
+          &m_s, segment.GetFirstInstructionSymbolInfo().exe_ctx.GetTargetPtr(),
+          segment.GetLastInstructionSymbolInfo().address,
+          /*show_fullpaths=*/false,
+          /*show_module=*/false, /*show_inlined_frames=*/false,
+          /*show_function_arguments=*/false,
+          /*show_function_name=*/false);
+    }
+  }
+
+  void DumpUntracedContext(const TraceDumper::FunctionCall &function_call) {
+    if (function_call.IsError()) {
+      m_s << "tracing error";
+    }
+    const SymbolContext &sc = function_call.GetSymbolInfo().sc;
+
+    const char *module_name = GetModuleName(sc);
+    if (!module_name)
+      m_s << "(none)";
+    else if (!sc.function && !sc.symbol)
+      m_s << module_name << "`(none)";
+    else
+      m_s << module_name << "`" << sc.GetFunctionName().AsCString();
+  }
+
+  void DumpFunctionCallTree(const TraceDumper::FunctionCall &function_call) {
+    if (function_call.GetUntracedSegment()) {
+      m_s.Indent();
+      DumpUntracedContext(function_call);
+      m_s << "\n";
+
+      m_s.IndentMore();
+      DumpFunctionCallTree(function_call.GetUntracedSegment()->GetNestedCall());
+      m_s.IndentLess();
+    }
+
+    for (const TraceDumper::FunctionCall::TracedSegment &segment :
+         function_call.GetTracedSegments()) {
+      m_s.Indent();
+      DumpSegmentContext(segment);
+      m_s.Format("  [{0}, {1}]\n", segment.GetFirstInstructionID(),
+                 segment.GetLastInstructionID());
+
+      segment.IfNestedCall([&](const TraceDumper::FunctionCall &nested_call) {
+        m_s.IndentMore();
+        DumpFunctionCallTree(nested_call);
+        m_s.IndentLess();
+      });
+    }
+  }
+
    Stream &m_s;
    TraceDumperOptions m_options;
    bool m_was_prev_instruction_an_error = false;
@@ -335,13 +433,12 @@ TraceDumper::TraceItem TraceDumper::CreatRawTraceItem() {
  /// instruction's symbol context when possible.
  static SymbolContext
  CalculateSymbolContext(const Address &address,
-                       const TraceDumper::SymbolInfo &prev_symbol_info) {
+                       const SymbolContext &prev_symbol_context) {
    AddressRange range;
-  if (prev_symbol_info.sc.GetAddressRange(eSymbolContextEverything, 0,
-                                          /*inline_block_range*/ false,
-                                          range) &&
+  if (prev_symbol_context.GetAddressRange(eSymbolContextEverything, 0,
+                                          /*inline_block_range*/ true, range) &&
        range.Contains(address))
-    return prev_symbol_info.sc;
+    return prev_symbol_context;
  
    SymbolContext sc;
    address.CalculateSymbolContext(&sc, eSymbolContextEverything);
@@ -384,6 +481,19 @@ CalculateDisass(const TraceDumper::SymbolInfo &symbol_info,
                     : InstructionSP());
  }
  
+static TraceDumper::SymbolInfo
+CalculateSymbolInfo(const ExecutionContext &exe_ctx, lldb::addr_t load_address,
+                    const TraceDumper::SymbolInfo &prev_symbol_info) {
+  TraceDumper::SymbolInfo symbol_info;
+  symbol_info.exe_ctx = exe_ctx;
+  symbol_info.address.SetLoadAddress(load_address, exe_ctx.GetTargetPtr());
+  symbol_info.sc =
+      CalculateSymbolContext(symbol_info.address, prev_symbol_info.sc);
+  std::tie(symbol_info.disassembler, symbol_info.instruction) =
+      CalculateDisass(symbol_info, prev_symbol_info, exe_ctx);
+  return symbol_info;
+}
+
  Optional<lldb::user_id_t> TraceDumper::DumpInstructions(size_t count) {
    ThreadSP thread_sp = m_cursor_sp->GetExecutionContextRef().GetThreadSP();
  
@@ -424,14 +534,8 @@ Optional<lldb::user_id_t> TraceDumper::DumpInstructions(size_t count) {
        item.load_address = m_cursor_sp->GetLoadAddress();
  
        if (!m_options.raw) {
-        SymbolInfo symbol_info;
-        symbol_info.exe_ctx = exe_ctx;
-        symbol_info.address.SetLoadAddress(item.load_address,
-                                           exe_ctx.GetTargetPtr());
-        symbol_info.sc =
-            CalculateSymbolContext(symbol_info.address, prev_symbol_info);
-        std::tie(symbol_info.disassembler, symbol_info.instruction) =
-            CalculateDisass(symbol_info, prev_symbol_info, exe_ctx);
+        SymbolInfo symbol_info =
+            CalculateSymbolInfo(exe_ctx, item.load_address, prev_symbol_info);
          item.prev_symbol_info = prev_symbol_info;
          item.symbol_info = symbol_info;
          prev_symbol_info = symbol_info;
@@ -443,3 +547,324 @@ Optional<lldb::user_id_t> TraceDumper::DumpInstructions(size_t count) {
      m_writer_up->NoMoreData();
    return last_id;
  }
+
+void TraceDumper::FunctionCall::TracedSegment::AppendInsn(
+    const TraceCursorSP &cursor_sp,
+    const TraceDumper::SymbolInfo &symbol_info) {
+  m_last_insn_id = cursor_sp->GetId();
+  m_last_symbol_info = symbol_info;
+}
+
+lldb::user_id_t
+TraceDumper::FunctionCall::TracedSegment::GetFirstInstructionID() const {
+  return m_first_insn_id;
+}
+
+lldb::user_id_t
+TraceDumper::FunctionCall::TracedSegment::GetLastInstructionID() const {
+  return m_last_insn_id;
+}
+
+void TraceDumper::FunctionCall::TracedSegment::IfNestedCall(
+    std::function<void(const FunctionCall &function_call)> callback) const {
+  if (m_nested_call)
+    callback(*m_nested_call);
+}
+
+const TraceDumper::FunctionCall &
+TraceDumper::FunctionCall::TracedSegment::GetOwningCall() const {
+  return m_owning_call;
+}
+
+TraceDumper::FunctionCall &
+TraceDumper::FunctionCall::TracedSegment::CreateNestedCall(
+    const TraceCursorSP &cursor_sp,
+    const TraceDumper::SymbolInfo &symbol_info) {
+  m_nested_call = std::make_unique<FunctionCall>(cursor_sp, symbol_info);
+  m_nested_call->SetParentCall(m_owning_call);
+  return *m_nested_call;
+}
+
+const TraceDumper::SymbolInfo &
+TraceDumper::FunctionCall::TracedSegment::GetFirstInstructionSymbolInfo()
+    const {
+  return m_first_symbol_info;
+}
+
+const TraceDumper::SymbolInfo &
+TraceDumper::FunctionCall::TracedSegment::GetLastInstructionSymbolInfo() const {
+  return m_last_symbol_info;
+}
+
+const TraceDumper::FunctionCall &
+TraceDumper::FunctionCall::UntracedSegment::GetNestedCall() const {
+  return *m_nested_call;
+}
+
+TraceDumper::FunctionCall::FunctionCall(
+    const TraceCursorSP &cursor_sp,
+    const TraceDumper::SymbolInfo &symbol_info) {
+  m_is_error = cursor_sp->IsError();
+  AppendSegment(cursor_sp, symbol_info);
+}
+
+void TraceDumper::FunctionCall::AppendSegment(
+    const TraceCursorSP &cursor_sp,
+    const TraceDumper::SymbolInfo &symbol_info) {
+  m_traced_segments.emplace_back(cursor_sp, symbol_info, *this);
+}
+
+const TraceDumper::SymbolInfo &
+TraceDumper::FunctionCall::GetSymbolInfo() const {
+  return m_traced_segments.back().GetLastInstructionSymbolInfo();
+}
+
+bool TraceDumper::FunctionCall::IsError() const { return m_is_error; }
+
+const std::deque<TraceDumper::FunctionCall::TracedSegment> &
+TraceDumper::FunctionCall::GetTracedSegments() const {
+  return m_traced_segments;
+}
+
+TraceDumper::FunctionCall::TracedSegment &
+TraceDumper::FunctionCall::GetLastTracedSegment() {
+  return m_traced_segments.back();
+}
+
+const Optional<TraceDumper::FunctionCall::UntracedSegment> &
+TraceDumper::FunctionCall::GetUntracedSegment() const {
+  return m_untraced_segment;
+}
+
+void TraceDumper::FunctionCall::SetUntracedSegment(
+    TraceDumper::FunctionCallUP &&nested_call) {
+  m_untraced_segment.emplace(std::move(nested_call));
+}
+
+TraceDumper::FunctionCall *TraceDumper::FunctionCall::GetParentCall() const {
+  return m_parent_call;
+}
+
+void TraceDumper::FunctionCall::SetParentCall(
+    TraceDumper::FunctionCall &parent_call) {
+  m_parent_call = &parent_call;
+}
+
+/// Given an instruction that happens after a return, find the ancestor function
+/// call that owns it. If this ancestor doesn't exist, create a new ancestor and
+/// make it the root of the tree.
+///
+/// \param[in] last_function_call
+///   The function call that performs the return.
+///
+/// \param[in] symbol_info
+///   The symbol information of the instruction after the return.
+///
+/// \param[in] cursor_sp
+///   The cursor pointing to the instruction after the return.
+///
+/// \param[in,out] roots
+///   The object owning the roots. It might be modified if a new root needs to
+///   be created.
+///
+/// \return
+///   A reference to the function call that owns the new instruction
+static TraceDumper::FunctionCall &AppendReturnedInstructionToFunctionCallForest(
+    TraceDumper::FunctionCall &last_function_call,
+    const TraceDumper::SymbolInfo &symbol_info, const TraceCursorSP &cursor_sp,
+    std::vector<TraceDumper::FunctionCallUP> &roots) {
+
+  // We omit the current node because we can't return to itself.
+  TraceDumper::FunctionCall *ancestor = last_function_call.GetParentCall();
+
+  for (; ancestor; ancestor = ancestor->GetParentCall()) {
+    // This loop traverses the tree until it finds a call that we can return to.
+    if (IsSameInstructionSymbolContext(ancestor->GetSymbolInfo(), symbol_info,
+                                       /*check_source_line_info=*/false)) {
+      // We returned to this symbol, so we are assuming we are returning there
+      // Note: If this is not robust enough, we should actually check if we
+      // returning to the instruction that follows the last instruction from
+      // that call, as that's the behavior of CALL instructions.
+      ancestor->AppendSegment(cursor_sp, symbol_info);
+      return *ancestor;
+    }
+  }
+
+  // We didn't find the call we were looking for, so we now create a synthetic
+  // one that will contain the new instruction in its first traced segment.
+  TraceDumper::FunctionCallUP new_root =
+      std::make_unique<TraceDumper::FunctionCall>(cursor_sp, symbol_info);
+  // This new root will own the previous root through an untraced segment.
+  new_root->SetUntracedSegment(std::move(roots.back()));
+  roots.pop_back();
+  // We update the roots container to point to the new root
+  roots.emplace_back(std::move(new_root));
+  return *roots.back();
+}
+
+/// Append an instruction to a function call forest. The new instruction might
+/// be appended to the current segment, to a new nest call, or return to an
+/// ancestor call.
+///
+/// \param[in] exe_ctx
+///   The exeuction context of the traced thread.
+///
+/// \param[in] last_function_call
+///   The chronologically most recent function call before the new instruction.
+///
+/// \param[in] prev_symbol_info
+///   The symbol information of the previous instruction in the trace.
+///
+/// \param[in] symbol_info
+///   The symbol information of the new instruction.
+///
+/// \param[in] cursor_sp
+///   The cursor pointing to the new instruction.
+///
+/// \param[in,out] roots
+///   The object owning the roots. It might be modified if a new root needs to
+///   be created.
+///
+/// \return
+///   A reference to the function call that owns the new instruction.
+static TraceDumper::FunctionCall &AppendInstructionToFunctionCallForest(
+    const ExecutionContext &exe_ctx,
+    TraceDumper::FunctionCall *last_function_call,
+    const TraceDumper::SymbolInfo &prev_symbol_info,
+    const TraceDumper::SymbolInfo &symbol_info, const TraceCursorSP &cursor_sp,
+    std::vector<TraceDumper::FunctionCallUP> &roots) {
+  if (!last_function_call || last_function_call->IsError()) {
+    // We create a brand new root
+    roots.emplace_back(
+        std::make_unique<TraceDumper::FunctionCall>(cursor_sp, symbol_info));
+    return *roots.back();
+  }
+
+  AddressRange range;
+  if (symbol_info.sc.GetAddressRange(
+          eSymbolContextBlock | eSymbolContextFunction | eSymbolContextSymbol,
+          0, /*inline_block_range*/ true, range)) {
+    if (range.GetBaseAddress() == symbol_info.address) {
+      // Our instruction is the first instruction of a function. This has
+      // to be a call. This should also identify if a trampoline or the linker
+      // is making a call using a non-CALL instruction.
+      return last_function_call->GetLastTracedSegment().CreateNestedCall(
+          cursor_sp, symbol_info);
+    }
+  }
+  if (IsSameInstructionSymbolContext(prev_symbol_info, symbol_info,
+                                     /*check_source_line_info=*/false)) {
+    // We are still in the same function. This can't be a call because otherwise
+    // we would be in the first instruction of the symbol.
+    last_function_call->GetLastTracedSegment().AppendInsn(cursor_sp,
+                                                          symbol_info);
+    return *last_function_call;
+  }
+  // Now we are in a different symbol. Let's see if this is a return or a
+  // call
+  switch (last_function_call->GetLastTracedSegment()
+              .GetLastInstructionSymbolInfo()
+              .instruction->GetControlFlowKind(&exe_ctx)) {
+  case lldb::eInstructionControlFlowKindCall:
+  case lldb::eInstructionControlFlowKindFarCall: {
+    // This is a regular call
+    return last_function_call->GetLastTracedSegment().CreateNestedCall(
+        cursor_sp, symbol_info);
+  }
+  case lldb::eInstructionControlFlowKindFarReturn:
+  case lldb::eInstructionControlFlowKindReturn: {
+    // We should have caught most trampolines and linker functions earlier, so
+    // let's assume this is a regular return.
+    return AppendReturnedInstructionToFunctionCallForest(
+        *last_function_call, symbol_info, cursor_sp, roots);
+  }
+  default:
+    // we changed symbols not using a call or return and we are not in the
+    // beginning of a symbol, so this should be something very artificial
+    // or maybe a jump to some label in the middle of it section.
+
+    // We first check if it's a return from an inline method
+    if (prev_symbol_info.sc.block &&
+        prev_symbol_info.sc.block->GetContainingInlinedBlock()) {
+      return AppendReturnedInstructionToFunctionCallForest(
+          *last_function_call, symbol_info, cursor_sp, roots);
+    }
+    // Now We assume it's a call. We should revisit this in the future.
+    // Ideally we should be able to decide whether to create a new tree,
+    // or go deeper or higher in the stack.
+    return last_function_call->GetLastTracedSegment().CreateNestedCall(
+        cursor_sp, symbol_info);
+  }
+}
+
+/// Append an error to a function call forest. The new error might be appended
+/// to the current segment if it contains errors or will create a new root.
+///
+/// \param[in] last_function_call
+///   The chronologically most recent function call before the new error.
+///
+/// \param[in] cursor_sp
+///   The cursor pointing to the new error.
+///
+/// \param[in,out] roots
+///   The object owning the roots. It might be modified if a new root needs to
+///   be created.
+///
+/// \return
+///   A reference to the function call that owns the new error.
+TraceDumper::FunctionCall &AppendErrorToFunctionCallForest(
+    TraceDumper::FunctionCall *last_function_call, TraceCursorSP &cursor_sp,
+    std::vector<TraceDumper::FunctionCallUP> &roots) {
+  if (last_function_call && last_function_call->IsError()) {
+    last_function_call->GetLastTracedSegment().AppendInsn(
+        cursor_sp, TraceDumper::SymbolInfo{});
+    return *last_function_call;
+  } else {
+    roots.emplace_back(std::make_unique<TraceDumper::FunctionCall>(
+        cursor_sp, TraceDumper::SymbolInfo{}));
+    return *roots.back();
+  }
+}
+
+static std::vector<TraceDumper::FunctionCallUP>
+CreateFunctionCallForest(TraceCursorSP &cursor_sp,
+                         const ExecutionContext &exe_ctx) {
+
+  std::vector<TraceDumper::FunctionCallUP> roots;
+  TraceDumper::SymbolInfo prev_symbol_info;
+
+  TraceDumper::FunctionCall *last_function_call = nullptr;
+
+  for (; cursor_sp->HasValue(); cursor_sp->Next()) {
+    if (cursor_sp->IsError()) {
+      last_function_call = &AppendErrorToFunctionCallForest(last_function_call,
+                                                            cursor_sp, roots);
+      prev_symbol_info = {};
+    } else if (cursor_sp->IsInstruction()) {
+      TraceDumper::SymbolInfo symbol_info = CalculateSymbolInfo(
+          exe_ctx, cursor_sp->GetLoadAddress(), prev_symbol_info);
+
+      last_function_call = &AppendInstructionToFunctionCallForest(
+          exe_ctx, last_function_call, prev_symbol_info, symbol_info, cursor_sp,
+          roots);
+      prev_symbol_info = symbol_info;
+    } else if (cursor_sp->GetEventType() == eTraceEventCPUChanged) {
+      // TODO: In case of a CPU change, we create a new root because we haven't
+      // investigated yet if a call tree can safely continue or if interrupts
+      // could have polluted the original call tree.
+      last_function_call = nullptr;
+      prev_symbol_info = {};
+    }
+  }
+
+  return roots;
+}
+
+void TraceDumper::DumpFunctionCalls() {
+  ThreadSP thread_sp = m_cursor_sp->GetExecutionContextRef().GetThreadSP();
+  ExecutionContext exe_ctx;
+  thread_sp->GetProcess()->GetTarget().CalculateExecutionContext(exe_ctx);
+
+  m_writer_up->FunctionCallForest(
+      CreateFunctionCallForest(m_cursor_sp, exe_ctx));
+}
diff --git a/lldb/test/API/commands/trace/TestTraceDumpFunctionCalls.py b/lldb/test/API/commands/trace/TestTraceDumpFunctionCalls.py

index 147a5c9..70f8edb 100644 (file)
--- a/lldb/test/API/commands/trace/TestTraceDumpFunctionCalls.py
+++ b/lldb/test/API/commands/trace/TestTraceDumpFunctionCalls.py
@@ -3,15 +3,107 @@ from lldbsuite.test.lldbtest import *
  from lldbsuite.test.decorators import *
  
  class TestTraceDumpInfo(TraceIntelPTTestCaseBase):
-    def testDumpFunctionCalls(self):
+    def testDumpSimpleFunctionCalls(self):
        self.expect("trace load -v " +
          os.path.join(self.getSourceDir(), "intelpt-trace", "trace.json"))
  
        self.expect("thread trace dump function-calls 2",
          error=True, substrs=['error: no thread with index: "2"'])
  
-      self.expect("thread trace dump function-calls 1 -j",
-        substrs=['json = true, pretty_json = false, file = false, thread = 3842849'])
+      # We don't support yet JSON
+      self.expect("thread trace dump function-calls 1 -j", substrs=['[]'])
  
-      self.expect("thread trace dump function-calls 1 -F /tmp -J",
-        substrs=['false, pretty_json = true, file = true, thread = 3842849'])
+      # We test first some code without function call
+      self.expect("thread trace dump function-calls 1",
+        substrs=['''thread #1: tid = 3842849
+
+[call tree #0]
+a.out`main + 4 at main.cpp:2 to 4:0  [1, 22]'''])
+
+    def testFunctionCallsWithErrors(self):
+      self.expect("trace load -v " +
+        os.path.join(self.getSourceDir(), "intelpt-multi-core-trace", "trace.json"))
+
+      # We expect that tracing errors appear as a different tree
+      self.expect("thread trace dump function-calls 2",
+        substrs=['''thread #2: tid = 3497496
+
+[call tree #0]
+m.out`foo() + 65 at multi_thread.cpp:12:21 to 12:21  [4, 19524]
+
+[call tree #1]
+<tracing errors>  [19532, 19532]'''])
+
+      self.expect("thread trace dump function-calls 3",
+        substrs=['''thread #3: tid = 3497497
+
+[call tree #0]
+m.out`bar() + 30 at multi_thread.cpp:19:3 to 20:6  [5, 61831]
+
+[call tree #1]
+<tracing errors>  [61834, 61834]'''])
+
+    def testInlineFunctionCalls(self):
+      self.expect("file " + os.path.join(self.getSourceDir(), "inline-function", "a.out"))
+      self.expect("b main") # we'll trace from the beginning of main
+      self.expect("b 17")
+      self.expect("r")
+      self.expect("thread trace start")
+      self.expect("c")
+      self.expect("thread trace dump function-calls",
+        substrs=['''[call tree #0]
+a.out`main + 8 at inline.cpp:15:7 to 16:14  [1, 5]
+  a.out`foo(int) at inline.cpp:8:16 to 9:15  [6, 13]
+    a.out`foo(int) + 22 [inlined] mult(int, int) at inline.cpp:2:7 to 5:10  [14, 21]
+  a.out`foo(int) + 49 at inline.cpp:9:15 to 12:1  [22, 26]
+a.out`main + 25 at inline.cpp:16:14 to 16:14  [27, 27]'''])
+
+    def testIncompleteInlineFunctionCalls(self):
+      self.expect("file " + os.path.join(self.getSourceDir(), "inline-function", "a.out"))
+      self.expect("b 4") # we'll trace from the middle of the inline method
+      self.expect("b 17")
+      self.expect("r")
+      self.expect("thread trace start")
+      self.expect("c")
+      self.expect("thread trace dump function-calls",
+        substrs=['''[call tree #0]
+a.out`main
+  a.out`foo(int)
+    a.out`foo(int) + 36 [inlined] mult(int, int) + 14 at inline.cpp:4:5 to 5:10  [1, 5]
+  a.out`foo(int) + 49 at inline.cpp:9:15 to 12:1  [6, 10]
+a.out`main + 25 at inline.cpp:16:14 to 16:14  [11, 11]'''])
+
+    def testMultifileFunctionCalls(self):
+      # This test is extremely important because it first calls the method foo() which requires going through the dynamic linking.
+      # You'll see the entry "a.out`symbol stub for: foo()" which will invoke the ld linker, which will in turn find the actual foo
+      # function and eventually invoke it.  However, we don't have the image of the linker in the trace bundle, so we'll see errors
+      # because the decoder couldn't find the linker binary! After those failures, the linker will resume right where we return to
+      # main after foo() finished.
+      # Then, we call foo() again, but because it has already been loaded by the linker, we don't invoke the linker anymore! And
+      # we'll see a nice tree without errors in this second invocation. Something interesting happens here. We still have an
+      # invocation to the symbol stub for foo(), but it modifies the stack so that when we return from foo() we don't stop again
+      # at the symbol stub, but instead we return directly to main. This is an example of returning several levels up in the
+      # call stack.
+      # Not only that, we also have an inline method in between.
+      self.expect("trace load " + os.path.join(self.getSourceDir(), "intelpt-trace-multi-file", "multi-file-no-ld.json"))
+      self.expect("thread trace dump function-calls",
+        substrs=['''thread #1: tid = 815455
+
+[call tree #0]
+a.out`main + 15 at main.cpp:10 to 10:0  [1, 1]
+  a.out`symbol stub for: foo() to <+11>  [3, 5]
+    a.out`a.out[0x0000000000400510] to a.out[0x0000000000400516]  [6, 7]
+
+[call tree #1]
+<tracing errors>  [8, 8]
+
+[call tree #2]
+a.out`main + 20 at main.cpp:10 to 12:0  [10, 14]
+  a.out`main + 34 [inlined] inline_function() at main.cpp:4 to 6:0  [16, 20]
+a.out`main + 55 at main.cpp:14 to 16:0  [21, 25]
+  a.out`symbol stub for: foo() to <+0>  [26, 26]
+    libfoo.so`foo() at foo.cpp:3 to 4:0  [27, 30]
+      libfoo.so`symbol stub for: bar() to <+0>  [31, 31]
+        libbar.so`bar() at bar.cpp:1 to 4:0  [32, 40]
+    libfoo.so`foo() + 13 at foo.cpp:4 to 6:0  [41, 48]
+a.out`main + 68 at main.cpp:16 to 16:0  [49, 51]'''])
diff --git a/lldb/test/API/commands/trace/TestTraceLoad.py b/lldb/test/API/commands/trace/TestTraceLoad.py

index a793dd6..e55d680 100644 (file)
--- a/lldb/test/API/commands/trace/TestTraceLoad.py
+++ b/lldb/test/API/commands/trace/TestTraceLoad.py
@@ -68,8 +68,8 @@ class TestTraceLoad(TraceIntelPTTestCaseBase):
        "individualCounts": {
          "software disabled tracing": 1,
          "trace synchronization point": 1,
-        "HW clock tick": 8,
-        "CPU core changed": 1
+        "CPU core changed": 1,
+        "HW clock tick": 8
        }
      },
      "continuousExecutions": 1,
diff --git a/lldb/test/API/commands/trace/inline-function/a.out b/lldb/test/API/commands/trace/inline-function/a.out

new file mode 100755 (executable)

index 0000000..101fb12

Binary files /dev/null and b/lldb/test/API/commands/trace/inline-function/a.out differ
diff --git a/lldb/test/API/commands/trace/inline-function/inline.cpp b/lldb/test/API/commands/trace/inline-function/inline.cpp

new file mode 100644 (file)

index 0000000..6d880b6
--- /dev/null
+++ b/lldb/test/API/commands/trace/inline-function/inline.cpp
@@ -0,0 +1,18 @@
+__attribute__((always_inline)) inline int mult(int x, int y) {
+  int f = x * y;
+  f++;
+  f *= f;
+  return f;
+}
+
+int foo(int x) {
+  int z = mult(x, x - 1);
+  z++;
+  return z;
+}
+
+int main() {
+  int x = 12;
+  int z = foo(x);
+  return z + x;
+}
author	Walter Erquinigo <wallace@fb.com>
	Mon, 10 Oct 2022 19:57:13 +0000 (12:57 -0700)
committer	Walter Erquinigo <wallace@fb.com>
	Tue, 18 Oct 2022 20:57:53 +0000 (13:57 -0700)
lldb/include/lldb/Target/TraceDumper.h		patch \| blob \| history
lldb/source/Commands/CommandObjectThread.cpp		patch \| blob \| history
lldb/source/Target/TraceDumper.cpp		patch \| blob \| history
lldb/test/API/commands/trace/TestTraceDumpFunctionCalls.py		patch \| blob \| history
lldb/test/API/commands/trace/TestTraceLoad.py		patch \| blob \| history
lldb/test/API/commands/trace/inline-function/a.out	[new file with mode: 0755]	patch \| blob
lldb/test/API/commands/trace/inline-function/inline.cpp	[new file with mode: 0644]	patch \| blob