From: Daniel Thornburgh Date: Fri, 5 Aug 2022 21:58:44 +0000 (-0700) Subject: [Symbolizer] Handle {{{bt}}} symbolizer markup element. X-Git-Tag: upstream/17.0.6~34836 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ea99225521cba6dec1ad4ca70a8665829e772fa9;p=platform%2Fupstream%2Fllvm.git [Symbolizer] Handle {{{bt}}} symbolizer markup element. This adds support for backtrace generation to the llvm-symbolizer markup filter, which is likely the largest use case. Reviewed By: peter.smith Differential Revision: https://reviews.llvm.org/D132706 --- diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst index 5870e0e..abb174c 100644 --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -254,9 +254,21 @@ OPTIONS and prints the results to standard output. The following markup elements are not yet supported: - * ``{{bt}}`` - * ``{{hexdict}}`` - * ``{{dumpfile}}`` + * ``{{{hexdict}}}`` + * ``{{{dumpfile}}}`` + + The ``{{{bt}}}`` backtrace element reports frames using the following syntax: + + ``#[.]
:: (+)`` + + ```` provides frame numbers for calls inlined into the caller + coresponding to ````. The inlined call numbers start at 1 and increase + from callee to caller. + + ``
`` is an address inside the call instruction to the function. The + address may not be the start of the instruction. ```` is + the corresponding virtual offset in the ```` loaded at that address. + .. _llvm-symbolizer-opt-f: diff --git a/llvm/docs/SymbolizerMarkupFormat.rst b/llvm/docs/SymbolizerMarkupFormat.rst index b06cc20..169e57a 100644 --- a/llvm/docs/SymbolizerMarkupFormat.rst +++ b/llvm/docs/SymbolizerMarkupFormat.rst @@ -205,7 +205,7 @@ human-readable symbolic form. {{{data:0x12345678}}} {{{data:0xffffffff9abcdef0}}} -``{{{bt:%u:%p}}}``, ``{{{bt:%u:%p:ra}}}``, ``{{{bt:%u:%p:pc}}}`` [#not_yet_implemented]_ +``{{{bt:%u:%p}}}``, ``{{{bt:%u:%p:ra}}}``, ``{{{bt:%u:%p:pc}}}`` This represents one frame in a backtrace. It usually appears on a line by itself (surrounded only by whitespace), in a sequence of such lines with diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h index b597e9b..a54f8f5 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h @@ -97,6 +97,7 @@ private: bool tryPresentation(const MarkupNode &Node); bool trySymbol(const MarkupNode &Node); bool tryPC(const MarkupNode &Node); + bool tryBackTrace(const MarkupNode &Node); bool tryData(const MarkupNode &Node); bool trySGR(const MarkupNode &Node); @@ -118,6 +119,7 @@ private: Optional> parseBuildID(StringRef Str) const; Optional parseMode(StringRef Str) const; Optional parsePCType(StringRef Str) const; + Optional parseFrameNumber(StringRef Str) const; bool checkTag(const MarkupNode &Node) const; bool checkNumFields(const MarkupNode &Element, size_t Size) const; diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp index 70fb49d..d96c0c8 100644 --- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -27,6 +27,7 @@ #include "llvm/Demangle/Demangle.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -216,6 +217,8 @@ bool MarkupFilter::tryPresentation(const MarkupNode &Node) { return true; if (tryPC(Node)) return true; + if (tryBackTrace(Node)) + return true; return tryData(Node); } @@ -269,8 +272,7 @@ bool MarkupFilter::tryPC(const MarkupNode &Node) { printRawElement(Node); return true; } - if (LI->FileName == DILineInfo::BadString && - LI->FunctionName == DILineInfo::BadString && LI->Line == 0) { + if (!*LI) { printRawElement(Node); return true; } @@ -286,6 +288,87 @@ bool MarkupFilter::tryPC(const MarkupNode &Node) { return true; } +bool MarkupFilter::tryBackTrace(const MarkupNode &Node) { + if (Node.Tag != "bt") + return false; + if (!checkNumFieldsAtLeast(Node, 2)) + return true; + if (!checkNumFieldsAtMost(Node, 3)) + return true; + + Optional FrameNumber = parseFrameNumber(Node.Fields[0]); + if (!FrameNumber) + return true; + + Optional Addr = parseAddr(Node.Fields[1]); + if (!Addr) + return true; + + // Backtrace addresses are assumed to be return addresses by default. + PCType Type = PCType::ReturnAddress; + if (Node.Fields.size() == 3) { + Optional ParsedType = parsePCType(Node.Fields[2]); + if (!ParsedType) + return true; + Type = *ParsedType; + } + *Addr = adjustAddr(*Addr, Type); + + const MMap *MMap = getContainingMMap(*Addr); + if (!MMap) { + WithColor::error() << "no mmap covers address\n"; + reportLocation(Node.Fields[0].begin()); + printRawElement(Node); + return true; + } + uint64_t MRA = MMap->getModuleRelativeAddr(*Addr); + + Expected II = + Symbolizer.symbolizeInlinedCode(MMap->Mod->BuildID, {MRA}); + if (!II) { + WithColor::defaultErrorHandler(II.takeError()); + printRawElement(Node); + return true; + } + + highlight(); + for (unsigned I = 0, E = II->getNumberOfFrames(); I != E; ++I) { + auto Header = formatv("{0, +6}", formatv("#{0}", FrameNumber)).sstr<16>(); + // Don't highlight the # sign as a value. + size_t NumberIdx = Header.find("#") + 1; + OS << Header.substr(0, NumberIdx); + printValue(Header.substr(NumberIdx)); + if (I == E - 1) { + OS << " "; + } else { + OS << '.'; + printValue(formatv("{0, -2}", I + 1)); + } + printValue(formatv(" {0:x16} ", *Addr)); + + DILineInfo LI = II->getFrame(I); + if (LI) { + printValue(LI.FunctionName); + OS << ' '; + printValue(LI.FileName); + OS << ':'; + printValue(Twine(LI.Line)); + OS << ':'; + printValue(Twine(LI.Column)); + OS << ' '; + } + OS << '('; + printValue(MMap->Mod->Name); + OS << "+"; + printValue(formatv("{0:x}", MRA)); + OS << ')'; + if (I != E - 1) + OS << lineEnding(); + } + restoreColor(); + return true; +} + bool MarkupFilter::tryData(const MarkupNode &Node) { if (Node.Tag != "data") return false; @@ -502,6 +585,16 @@ Optional MarkupFilter::parseSize(StringRef Str) const { return ID; } +// Parse a frame number (%i in the spec). +Optional MarkupFilter::parseFrameNumber(StringRef Str) const { + uint64_t ID; + if (Str.getAsInteger(10, ID)) { + reportTypeError(Str, "frame number"); + return None; + } + return ID; +} + // Parse a build ID (%x in the spec). Optional> MarkupFilter::parseBuildID(StringRef Str) const { std::string Bytes; diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-bt.test b/llvm/test/DebugInfo/symbolize-filter-markup-bt.test new file mode 100644 index 0000000..a170113 --- /dev/null +++ b/llvm/test/DebugInfo/symbolize-filter-markup-bt.test @@ -0,0 +1,213 @@ +REQUIRES: x86-registered-target +RUN: split-file %s %t +RUN: mkdir -p %t/.build-id/ab +RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t/asm.s \ +RUN: -o %t/.build-id/ab/cdef.debug +RUN: llvm-symbolizer --debug-file-directory=%t --filter-markup < %t/input \ +RUN: > %t.output 2> %t.err +RUN: FileCheck %s --input-file=%t.output --match-full-lines \ +RUN: --implicit-check-not {{.}} +RUN: FileCheck %s --check-prefix=ERR --input-file=%t.err --match-full-lines + +CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abcdef [0x10-0x10f](r)[[END:\]{3}]] +CHECK: #0.1 0x0000000000000018 second /tmp[[SEP:[/\\]]]tmp.c:8:3 (a.o+0x8) +CHECK: #0 0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8) +CHECK: #1.1 0x0000000000000018 second /tmp[[SEP]]tmp.c:8:3 (a.o+0x8) +CHECK: #1 0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8) +CHECK: #0.1 0x0000000000000018 second /tmp[[SEP]]tmp.c:8:3 (a.o+0x8) +CHECK: #0 0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8) +CHECK: #0 0x0000000000000019 first /tmp[[SEP]]tmp.c:5:1 (a.o+0x9) +CHECK: #0 0x00000000000000fe (a.o+0xee) +CHECK: [[BEGIN]]bt:0:0x111[[END]] + +ERR: error: expected at least 2 field(s); found 0 +ERR: error: no mmap covers address +ERR: error: expected PC type; found '' +ERR: error: expected at most 3 field(s); found 4 + +;--- input +{{{module:0:a.o:elf:abcdef}}} +{{{mmap:0x10:256:load:0:r:0}}} +{{{bt:0:0x19}}} +{{{bt:1:0x19}}} +{{{bt:0:0x19:ra}}} +{{{bt:0:0x19:pc}}} +{{{bt:0:0xff}}} + +{{{bt}}} +{{{bt:0:0x111}}} +{{{bt:0:0:}}} +{{{bt:0:0:pc:}}} +;--- asm.s +# Generated by running "clang -finline -g -S tmp.c" in the following tmp.c on +# Linux x86_64: +# +# static void second(void); +# void first(void) { +# second(); +# } +# void second(void) {} + .text + .file "tmp.c" + .globl first # -- Begin function first + .p2align 4, 0x90 + .type first,@function +first: # @first +.Lfunc_begin0: + .file 1 "/tmp" "tmp.c" + .loc 1 3 0 # tmp.c:3:0 + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp0: + .loc 1 8 3 prologue_end # tmp.c:8:3 + callq first +.Ltmp1: + .loc 1 5 1 # tmp.c:5:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp2: +.Lfunc_end0: + .size first, .Lfunc_end0-first + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 39 # DW_AT_prototyped + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 32 # DW_AT_inline + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 39 # DW_AT_prototyped + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 0 # DW_CHILDREN_no + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x52 DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 12 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x8 DW_TAG_subprogram + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + # DW_AT_prototyped + # DW_AT_external + .byte 1 # DW_AT_inline + .byte 3 # Abbrev [3] 0x32:0x2a DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 3 # DW_AT_decl_line + # DW_AT_prototyped + # DW_AT_external + .byte 4 # Abbrev [4] 0x47:0x14 DW_TAG_inlined_subroutine + .long 42 # DW_AT_abstract_origin + .quad .Ltmp0 # DW_AT_low_pc + .long .Ltmp1-.Ltmp0 # DW_AT_high_pc + .byte 1 # DW_AT_call_file + .byte 4 # DW_AT_call_line + .byte 3 # DW_AT_call_column + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "Debian clang version 13.0.1-6" # string offset=0 +.Linfo_string1: + .asciz "tmp.c" # string offset=30 +.Linfo_string2: + .asciz "/tmp" # string offset=36 +.Linfo_string3: + .asciz "second" # string offset=85 +.Linfo_string4: + .asciz "first" # string offset=92 + .ident "Debian clang version 13.0.1-6" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym first + .section .debug_line,"",@progbits +.Lline_table_start0: