[Symbolizer] Handle {{{bt}}} symbolizer markup element.
authorDaniel Thornburgh <dthorn@google.com>
Fri, 5 Aug 2022 21:58:44 +0000 (14:58 -0700)
committerDaniel Thornburgh <dthorn@google.com>
Wed, 31 Aug 2022 16:49:32 +0000 (09:49 -0700)
This adds support for backtrace generation to the llvm-symbolizer markup
filter, which is likely the largest use case.

Reviewed By: peter.smith

Differential Revision: https://reviews.llvm.org/D132706

llvm/docs/CommandGuide/llvm-symbolizer.rst
llvm/docs/SymbolizerMarkupFormat.rst
llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
llvm/test/DebugInfo/symbolize-filter-markup-bt.test [new file with mode: 0644]

index 5870e0e..abb174c 100644 (file)
@@ -254,9 +254,21 @@ OPTIONS
   and prints the results to standard output. The following markup elements are
   not yet supported:
 
-  * ``{{bt}}``
-  * ``{{hexdict}}``
-  * ``{{dumpfile}}``
+  * ``{{{hexdict}}}``
+  * ``{{{dumpfile}}}``
+
+  The ``{{{bt}}}`` backtrace element reports frames using the following syntax:
+
+  ``#<number>[.<inline>] <address> <function> <file>:<line>:<col> (<module>+<relative address>)``
+
+  ``<inline>`` provides frame numbers for calls inlined into the caller
+  coresponding to ``<number>``. The inlined call numbers start at 1 and increase
+  from callee to caller.
+
+  ``<address>`` is an address inside the call instruction to the function.  The
+  address may not be the start of the instruction.  ``<relative address>`` is
+  the corresponding virtual offset in the ``<module>`` loaded at that address.
+
 
 .. _llvm-symbolizer-opt-f:
 
index b06cc20..169e57a 100644 (file)
@@ -205,7 +205,7 @@ human-readable symbolic form.
     {{{data:0x12345678}}}
     {{{data:0xffffffff9abcdef0}}}
 
-``{{{bt:%u:%p}}}``, ``{{{bt:%u:%p:ra}}}``, ``{{{bt:%u:%p:pc}}}`` [#not_yet_implemented]_
+``{{{bt:%u:%p}}}``, ``{{{bt:%u:%p:ra}}}``, ``{{{bt:%u:%p:pc}}}``
 
   This represents one frame in a backtrace. It usually appears on a line by
   itself (surrounded only by whitespace), in a sequence of such lines with
index b597e9b..a54f8f5 100644 (file)
@@ -97,6 +97,7 @@ private:
   bool tryPresentation(const MarkupNode &Node);
   bool trySymbol(const MarkupNode &Node);
   bool tryPC(const MarkupNode &Node);
+  bool tryBackTrace(const MarkupNode &Node);
   bool tryData(const MarkupNode &Node);
 
   bool trySGR(const MarkupNode &Node);
@@ -118,6 +119,7 @@ private:
   Optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const;
   Optional<std::string> parseMode(StringRef Str) const;
   Optional<PCType> parsePCType(StringRef Str) const;
+  Optional<uint64_t> parseFrameNumber(StringRef Str) const;
 
   bool checkTag(const MarkupNode &Node) const;
   bool checkNumFields(const MarkupNode &Element, size_t Size) const;
index 70fb49d..d96c0c8 100644 (file)
@@ -27,6 +27,7 @@
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
@@ -216,6 +217,8 @@ bool MarkupFilter::tryPresentation(const MarkupNode &Node) {
     return true;
   if (tryPC(Node))
     return true;
+  if (tryBackTrace(Node))
+    return true;
   return tryData(Node);
 }
 
@@ -269,8 +272,7 @@ bool MarkupFilter::tryPC(const MarkupNode &Node) {
     printRawElement(Node);
     return true;
   }
-  if (LI->FileName == DILineInfo::BadString &&
-      LI->FunctionName == DILineInfo::BadString && LI->Line == 0) {
+  if (!*LI) {
     printRawElement(Node);
     return true;
   }
@@ -286,6 +288,87 @@ bool MarkupFilter::tryPC(const MarkupNode &Node) {
   return true;
 }
 
+bool MarkupFilter::tryBackTrace(const MarkupNode &Node) {
+  if (Node.Tag != "bt")
+    return false;
+  if (!checkNumFieldsAtLeast(Node, 2))
+    return true;
+  if (!checkNumFieldsAtMost(Node, 3))
+    return true;
+
+  Optional<uint64_t> FrameNumber = parseFrameNumber(Node.Fields[0]);
+  if (!FrameNumber)
+    return true;
+
+  Optional<uint64_t> Addr = parseAddr(Node.Fields[1]);
+  if (!Addr)
+    return true;
+
+  // Backtrace addresses are assumed to be return addresses by default.
+  PCType Type = PCType::ReturnAddress;
+  if (Node.Fields.size() == 3) {
+    Optional<PCType> ParsedType = parsePCType(Node.Fields[2]);
+    if (!ParsedType)
+      return true;
+    Type = *ParsedType;
+  }
+  *Addr = adjustAddr(*Addr, Type);
+
+  const MMap *MMap = getContainingMMap(*Addr);
+  if (!MMap) {
+    WithColor::error() << "no mmap covers address\n";
+    reportLocation(Node.Fields[0].begin());
+    printRawElement(Node);
+    return true;
+  }
+  uint64_t MRA = MMap->getModuleRelativeAddr(*Addr);
+
+  Expected<DIInliningInfo> II =
+      Symbolizer.symbolizeInlinedCode(MMap->Mod->BuildID, {MRA});
+  if (!II) {
+    WithColor::defaultErrorHandler(II.takeError());
+    printRawElement(Node);
+    return true;
+  }
+
+  highlight();
+  for (unsigned I = 0, E = II->getNumberOfFrames(); I != E; ++I) {
+    auto Header = formatv("{0, +6}", formatv("#{0}", FrameNumber)).sstr<16>();
+    // Don't highlight the # sign as a value.
+    size_t NumberIdx = Header.find("#") + 1;
+    OS << Header.substr(0, NumberIdx);
+    printValue(Header.substr(NumberIdx));
+    if (I == E - 1) {
+      OS << "   ";
+    } else {
+      OS << '.';
+      printValue(formatv("{0, -2}", I + 1));
+    }
+    printValue(formatv(" {0:x16} ", *Addr));
+
+    DILineInfo LI = II->getFrame(I);
+    if (LI) {
+      printValue(LI.FunctionName);
+      OS << ' ';
+      printValue(LI.FileName);
+      OS << ':';
+      printValue(Twine(LI.Line));
+      OS << ':';
+      printValue(Twine(LI.Column));
+      OS << ' ';
+    }
+    OS << '(';
+    printValue(MMap->Mod->Name);
+    OS << "+";
+    printValue(formatv("{0:x}", MRA));
+    OS << ')';
+    if (I != E - 1)
+      OS << lineEnding();
+  }
+  restoreColor();
+  return true;
+}
+
 bool MarkupFilter::tryData(const MarkupNode &Node) {
   if (Node.Tag != "data")
     return false;
@@ -502,6 +585,16 @@ Optional<uint64_t> MarkupFilter::parseSize(StringRef Str) const {
   return ID;
 }
 
+// Parse a frame number (%i in the spec).
+Optional<uint64_t> MarkupFilter::parseFrameNumber(StringRef Str) const {
+  uint64_t ID;
+  if (Str.getAsInteger(10, ID)) {
+    reportTypeError(Str, "frame number");
+    return None;
+  }
+  return ID;
+}
+
 // Parse a build ID (%x in the spec).
 Optional<SmallVector<uint8_t>> MarkupFilter::parseBuildID(StringRef Str) const {
   std::string Bytes;
diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-bt.test b/llvm/test/DebugInfo/symbolize-filter-markup-bt.test
new file mode 100644 (file)
index 0000000..a170113
--- /dev/null
@@ -0,0 +1,213 @@
+REQUIRES: x86-registered-target
+RUN: split-file %s %t
+RUN: mkdir -p %t/.build-id/ab
+RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t/asm.s \
+RUN:   -o %t/.build-id/ab/cdef.debug
+RUN: llvm-symbolizer --debug-file-directory=%t --filter-markup < %t/input \
+RUN:   > %t.output 2> %t.err
+RUN: FileCheck %s --input-file=%t.output --match-full-lines \
+RUN:   --implicit-check-not {{.}}
+RUN: FileCheck %s --check-prefix=ERR --input-file=%t.err --match-full-lines
+
+CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abcdef [0x10-0x10f](r)[[END:\]{3}]]
+CHECK:    #0.1  0x0000000000000018 second /tmp[[SEP:[/\\]]]tmp.c:8:3 (a.o+0x8)
+CHECK:    #0    0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8)
+CHECK:    #1.1  0x0000000000000018 second /tmp[[SEP]]tmp.c:8:3 (a.o+0x8)
+CHECK:    #1    0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8)
+CHECK:    #0.1  0x0000000000000018 second /tmp[[SEP]]tmp.c:8:3 (a.o+0x8)
+CHECK:    #0    0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8)
+CHECK:    #0    0x0000000000000019 first /tmp[[SEP]]tmp.c:5:1 (a.o+0x9)
+CHECK:    #0    0x00000000000000fe (a.o+0xee)
+CHECK: [[BEGIN]]bt:0:0x111[[END]]
+
+ERR: error: expected at least 2 field(s); found 0
+ERR: error: no mmap covers address
+ERR: error: expected PC type; found ''
+ERR: error: expected at most 3 field(s); found 4
+
+;--- input
+{{{module:0:a.o:elf:abcdef}}}
+{{{mmap:0x10:256:load:0:r:0}}}
+{{{bt:0:0x19}}}
+{{{bt:1:0x19}}}
+{{{bt:0:0x19:ra}}}
+{{{bt:0:0x19:pc}}}
+{{{bt:0:0xff}}}
+
+{{{bt}}}
+{{{bt:0:0x111}}}
+{{{bt:0:0:}}}
+{{{bt:0:0:pc:}}}
+;--- asm.s
+# Generated by running "clang -finline -g -S tmp.c" in the following tmp.c on
+# Linux x86_64:
+#
+# static void second(void);
+# void first(void) {
+#   second();
+# }
+# void second(void) {}
+       .text
+       .file   "tmp.c"
+       .globl  first                           # -- Begin function first
+       .p2align        4, 0x90
+       .type   first,@function
+first:                                  # @first
+.Lfunc_begin0:
+       .file   1 "/tmp" "tmp.c"
+       .loc    1 3 0                           # tmp.c:3:0
+       .cfi_startproc
+# %bb.0:
+       pushq   %rbp
+       .cfi_def_cfa_offset 16
+       .cfi_offset %rbp, -16
+       movq    %rsp, %rbp
+       .cfi_def_cfa_register %rbp
+.Ltmp0:
+       .loc    1 8 3 prologue_end              # tmp.c:8:3
+       callq   first
+.Ltmp1:
+       .loc    1 5 1                           # tmp.c:5:1
+       popq    %rbp
+       .cfi_def_cfa %rsp, 8
+       retq
+.Ltmp2:
+.Lfunc_end0:
+       .size   first, .Lfunc_end0-first
+       .cfi_endproc
+                                        # -- End function
+       .section        .debug_abbrev,"",@progbits
+       .byte   1                               # Abbreviation Code
+       .byte   17                              # DW_TAG_compile_unit
+       .byte   1                               # DW_CHILDREN_yes
+       .byte   37                              # DW_AT_producer
+       .byte   14                              # DW_FORM_strp
+       .byte   19                              # DW_AT_language
+       .byte   5                               # DW_FORM_data2
+       .byte   3                               # DW_AT_name
+       .byte   14                              # DW_FORM_strp
+       .byte   16                              # DW_AT_stmt_list
+       .byte   23                              # DW_FORM_sec_offset
+       .byte   27                              # DW_AT_comp_dir
+       .byte   14                              # DW_FORM_strp
+       .byte   17                              # DW_AT_low_pc
+       .byte   1                               # DW_FORM_addr
+       .byte   18                              # DW_AT_high_pc
+       .byte   6                               # DW_FORM_data4
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   2                               # Abbreviation Code
+       .byte   46                              # DW_TAG_subprogram
+       .byte   0                               # DW_CHILDREN_no
+       .byte   3                               # DW_AT_name
+       .byte   14                              # DW_FORM_strp
+       .byte   58                              # DW_AT_decl_file
+       .byte   11                              # DW_FORM_data1
+       .byte   59                              # DW_AT_decl_line
+       .byte   11                              # DW_FORM_data1
+       .byte   39                              # DW_AT_prototyped
+       .byte   25                              # DW_FORM_flag_present
+       .byte   63                              # DW_AT_external
+       .byte   25                              # DW_FORM_flag_present
+       .byte   32                              # DW_AT_inline
+       .byte   11                              # DW_FORM_data1
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   3                               # Abbreviation Code
+       .byte   46                              # DW_TAG_subprogram
+       .byte   1                               # DW_CHILDREN_yes
+       .byte   17                              # DW_AT_low_pc
+       .byte   1                               # DW_FORM_addr
+       .byte   18                              # DW_AT_high_pc
+       .byte   6                               # DW_FORM_data4
+       .byte   64                              # DW_AT_frame_base
+       .byte   24                              # DW_FORM_exprloc
+       .byte   3                               # DW_AT_name
+       .byte   14                              # DW_FORM_strp
+       .byte   58                              # DW_AT_decl_file
+       .byte   11                              # DW_FORM_data1
+       .byte   59                              # DW_AT_decl_line
+       .byte   11                              # DW_FORM_data1
+       .byte   39                              # DW_AT_prototyped
+       .byte   25                              # DW_FORM_flag_present
+       .byte   63                              # DW_AT_external
+       .byte   25                              # DW_FORM_flag_present
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   4                               # Abbreviation Code
+       .byte   29                              # DW_TAG_inlined_subroutine
+       .byte   0                               # DW_CHILDREN_no
+       .byte   49                              # DW_AT_abstract_origin
+       .byte   19                              # DW_FORM_ref4
+       .byte   17                              # DW_AT_low_pc
+       .byte   1                               # DW_FORM_addr
+       .byte   18                              # DW_AT_high_pc
+       .byte   6                               # DW_FORM_data4
+       .byte   88                              # DW_AT_call_file
+       .byte   11                              # DW_FORM_data1
+       .byte   89                              # DW_AT_call_line
+       .byte   11                              # DW_FORM_data1
+       .byte   87                              # DW_AT_call_column
+       .byte   11                              # DW_FORM_data1
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   0                               # EOM(3)
+       .section        .debug_info,"",@progbits
+.Lcu_begin0:
+       .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+       .short  4                               # DWARF version number
+       .long   .debug_abbrev                   # Offset Into Abbrev. Section
+       .byte   8                               # Address Size (in bytes)
+       .byte   1                               # Abbrev [1] 0xb:0x52 DW_TAG_compile_unit
+       .long   .Linfo_string0                  # DW_AT_producer
+       .short  12                              # DW_AT_language
+       .long   .Linfo_string1                  # DW_AT_name
+       .long   .Lline_table_start0             # DW_AT_stmt_list
+       .long   .Linfo_string2                  # DW_AT_comp_dir
+       .quad   .Lfunc_begin0                   # DW_AT_low_pc
+       .long   .Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+       .byte   2                               # Abbrev [2] 0x2a:0x8 DW_TAG_subprogram
+       .long   .Linfo_string3                  # DW_AT_name
+       .byte   1                               # DW_AT_decl_file
+       .byte   7                               # DW_AT_decl_line
+                                        # DW_AT_prototyped
+                                        # DW_AT_external
+       .byte   1                               # DW_AT_inline
+       .byte   3                               # Abbrev [3] 0x32:0x2a DW_TAG_subprogram
+       .quad   .Lfunc_begin0                   # DW_AT_low_pc
+       .long   .Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+       .byte   1                               # DW_AT_frame_base
+       .byte   86
+       .long   .Linfo_string4                  # DW_AT_name
+       .byte   1                               # DW_AT_decl_file
+       .byte   3                               # DW_AT_decl_line
+                                        # DW_AT_prototyped
+                                        # DW_AT_external
+       .byte   4                               # Abbrev [4] 0x47:0x14 DW_TAG_inlined_subroutine
+       .long   42                              # DW_AT_abstract_origin
+       .quad   .Ltmp0                          # DW_AT_low_pc
+       .long   .Ltmp1-.Ltmp0                   # DW_AT_high_pc
+       .byte   1                               # DW_AT_call_file
+       .byte   4                               # DW_AT_call_line
+       .byte   3                               # DW_AT_call_column
+       .byte   0                               # End Of Children Mark
+       .byte   0                               # End Of Children Mark
+.Ldebug_info_end0:
+       .section        .debug_str,"MS",@progbits,1
+.Linfo_string0:
+       .asciz  "Debian clang version 13.0.1-6" # string offset=0
+.Linfo_string1:
+       .asciz  "tmp.c"                         # string offset=30
+.Linfo_string2:
+       .asciz  "/tmp" # string offset=36
+.Linfo_string3:
+       .asciz  "second"                        # string offset=85
+.Linfo_string4:
+       .asciz  "first"                         # string offset=92
+       .ident  "Debian clang version 13.0.1-6"
+       .section        ".note.GNU-stack","",@progbits
+       .addrsig
+       .addrsig_sym first
+       .section        .debug_line,"",@progbits
+.Lline_table_start0: