From 82f0c42dadd751a5d457b53a9c1adceb3c9eabbb Mon Sep 17 00:00:00 2001 From: George Rimar Date: Wed, 1 Nov 2017 07:42:38 +0000 Subject: [PATCH] [ELF] - Teach LLD to report line numbers for data symbols. This is PR34826. Currently LLD is unable to report line number when reporting duplicate declaration of some variable. That happens because for extracting line information we always use .debug_line section content which describes mapping from machine instructions to source file locations, what does not help for variables as does not describe them. In this patch I am taking the approproate information about variables locations from the .debug_info section. Differential revision: https://reviews.llvm.org/D38721 llvm-svn: 317080 --- lld/ELF/InputFiles.cpp | 67 +++++++++++++++- lld/ELF/InputFiles.h | 4 +- lld/ELF/InputSection.cpp | 52 +++++++----- lld/ELF/InputSection.h | 2 +- lld/ELF/Relocations.cpp | 4 +- lld/ELF/SymbolTable.cpp | 4 +- lld/test/ELF/conflict-debug-variable.s | 141 +++++++++++++++++++++++++++++++++ 7 files changed, 246 insertions(+), 28 deletions(-) create mode 100644 lld/test/ELF/conflict-debug-variable.s diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index e1cc2fe..98329cd 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -67,7 +67,7 @@ Optional elf::readFile(StringRef Path) { return MBRef; } -template void ObjFile::initializeDwarfLine() { +template void ObjFile::initializeDwarf() { DWARFContext Dwarf(make_unique>(this)); const DWARFObject &Obj = Dwarf.getDWARFObj(); DwarfLine.reset(new DWARFDebugLine); @@ -77,7 +77,68 @@ template void ObjFile::initializeDwarfLine() { // The second parameter is offset in .debug_line section // for compilation unit (CU) of interest. We have only one // CU (object file), so offset is always 0. - DwarfLine->getOrParseLineTable(LineData, 0); + const DWARFDebugLine::LineTable *LT = + DwarfLine->getOrParseLineTable(LineData, 0); + + // Return if there is no debug information about CU available. + if (!Dwarf.getNumCompileUnits()) + return; + + // Loop over variable records and insert them to VariableLoc. + DWARFCompileUnit *CU = Dwarf.getCompileUnitAtIndex(0); + for (const auto &Entry : CU->dies()) { + DWARFDie Die(CU, &Entry); + // Skip all tags that are not variables. + if (Die.getTag() != dwarf::DW_TAG_variable) + continue; + + // Skip if a local variable because we don't need them for generating error + // messages. In general, only non-local symbols can fail to be linked. + if (!dwarf::toUnsigned(Die.find(dwarf::DW_AT_external), 0)) + continue; + + // Get the source filename index for the variable. + unsigned File = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_file), 0); + if (!LT->hasFileAtIndex(File)) + continue; + + // Get the line number on which the variable is declared. + unsigned Line = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_line), 0); + + // Get the name of the variable and add the collected information to + // VariableLoc. Usually Name is non-empty, but it can be empty if the input + // object file lacks some debug info. + StringRef Name = dwarf::toString(Die.find(dwarf::DW_AT_name), ""); + if (!Name.empty()) + VariableLoc.insert({Name, {File, Line}}); + } +} + +// Returns the pair of file name and line number describing location of data +// object (variable, array, etc) definition. +template +Optional> +ObjFile::getVariableLoc(StringRef Name) { + llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); }); + + // There is always only one CU so it's offset is 0. + const DWARFDebugLine::LineTable *LT = DwarfLine->getLineTable(0); + if (!LT) + return None; + + // Return if we have no debug information about data object. + auto It = VariableLoc.find(Name); + if (It == VariableLoc.end()) + return None; + + // Take file name string from line table. + std::string FileName; + if (!LT->getFileNameByIndex( + It->second.first /* File */, nullptr, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FileName)) + return None; + + return std::make_pair(FileName, It->second.second /*Line*/); } // Returns source line information for a given offset @@ -85,7 +146,7 @@ template void ObjFile::initializeDwarfLine() { template Optional ObjFile::getDILineInfo(InputSectionBase *S, uint64_t Offset) { - llvm::call_once(InitDwarfLine, [this]() { initializeDwarfLine(); }); + llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); }); // The offset to CU is 0. const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 5641dbb..e1eaa5d 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -185,6 +185,7 @@ public: // If no information is available, returns "". std::string getLineInfo(InputSectionBase *S, uint64_t Offset); llvm::Optional getDILineInfo(InputSectionBase *, uint64_t); + llvm::Optional> getVariableLoc(StringRef Name); // MIPS GP0 value defined by this file. This value represents the gp value // used to create the relocatable object and required to support @@ -200,7 +201,7 @@ private: void initializeSections(llvm::DenseSet &ComdatGroups); void initializeSymbols(); - void initializeDwarfLine(); + void initializeDwarf(); InputSectionBase *getRelocTarget(const Elf_Shdr &Sec); InputSectionBase *createInputSection(const Elf_Shdr &Sec); StringRef getSectionName(const Elf_Shdr &Sec); @@ -216,6 +217,7 @@ private: // single object file, so we cache debugging information in order to // parse it only once for each object file we link. std::unique_ptr DwarfLine; + llvm::DenseMap> VariableLoc; llvm::once_flag InitDwarfLine; }; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 131b4d5..2e99e3c 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -261,31 +261,41 @@ std::string InputSectionBase::getLocation(uint64_t Offset) { return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str(); } -// Returns a source location string. This function is intended to be -// used for constructing an error message. The returned message looks -// like this: +// Concatenates arguments to construct a string representing an error location. +static std::string createFileLineMsg(StringRef Path, unsigned Line) { + std::string Filename = path::filename(Path); + std::string Lineno = ":" + std::to_string(Line); + if (Filename == Path) + return Filename + Lineno; + return Filename + Lineno + " (" + Path.str() + Lineno + ")"; +} + +// This function is intended to be used for constructing an error message. +// The returned message looks like this: // // foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42) // -// Returns an empty string if there's no way to get line info. -template std::string InputSectionBase::getSrcMsg(uint64_t Offset) { +// Returns an empty string if there's no way to get line info. +template +std::string InputSectionBase::getSrcMsg(const SymbolBody &Sym, + uint64_t Offset) { // Synthetic sections don't have input files. ObjFile *File = getFile(); if (!File) return ""; - Optional Info = File->getDILineInfo(this, Offset); + // In DWARF, functions and variables are stored to different places. + // First, lookup a function for a given offset. + if (Optional Info = File->getDILineInfo(this, Offset)) + return createFileLineMsg(Info->FileName, Info->Line); - // File->SourceFile contains STT_FILE symbol, and that is a last resort. - if (!Info) - return File->SourceFile; + // If it failed, lookup again as a variable. + if (Optional> FileLine = + File->getVariableLoc(Sym.getName())) + return createFileLineMsg(FileLine->first, FileLine->second); - std::string Path = Info->FileName; - std::string Filename = path::filename(Path); - std::string Lineno = ":" + std::to_string(Info->Line); - if (Filename == Path) - return Filename + Lineno; - return Filename + Lineno + " (" + Path + Lineno + ")"; + // File->SourceFile contains STT_FILE symbol, and that is a last resort. + return File->SourceFile; } // Returns a filename string along with an optional section name. This @@ -1004,10 +1014,14 @@ template std::string InputSectionBase::getLocation(uint64_t); template std::string InputSectionBase::getLocation(uint64_t); template std::string InputSectionBase::getLocation(uint64_t); -template std::string InputSectionBase::getSrcMsg(uint64_t); -template std::string InputSectionBase::getSrcMsg(uint64_t); -template std::string InputSectionBase::getSrcMsg(uint64_t); -template std::string InputSectionBase::getSrcMsg(uint64_t); +template std::string InputSectionBase::getSrcMsg(const SymbolBody &, + uint64_t); +template std::string InputSectionBase::getSrcMsg(const SymbolBody &, + uint64_t); +template std::string InputSectionBase::getSrcMsg(const SymbolBody &, + uint64_t); +template std::string InputSectionBase::getSrcMsg(const SymbolBody &, + uint64_t); template void InputSection::writeTo(uint8_t *); template void InputSection::writeTo(uint8_t *); diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index c42eca4..f299e40 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -177,7 +177,7 @@ public: // Returns a source location string. Used to construct an error message. template std::string getLocation(uint64_t Offset); - template std::string getSrcMsg(uint64_t Offset); + template std::string getSrcMsg(const SymbolBody &Sym, uint64_t Offset); std::string getObjMsg(uint64_t Offset); // Each section knows how to relocate itself. These functions apply diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 67a42ed..7089247 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -74,7 +74,7 @@ static std::string getLocation(InputSectionBase &S, const SymbolBody &Sym, uint64_t Off) { std::string Msg = "\n>>> defined in " + toString(Sym.getFile()) + "\n>>> referenced by "; - std::string Src = S.getSrcMsg(Off); + std::string Src = S.getSrcMsg(Sym, Off); if (!Src.empty()) Msg += Src + "\n>>> "; return Msg + S.getObjMsg(Off); @@ -728,7 +728,7 @@ static bool maybeReportUndefined(SymbolBody &Sym, InputSectionBase &Sec, std::string Msg = "undefined symbol: " + toString(Sym) + "\n>>> referenced by "; - std::string Src = Sec.getSrcMsg(Offset); + std::string Src = Sec.getSrcMsg(Sym, Offset); if (!Src.empty()) Msg += Src + "\n>>> "; Msg += Sec.getObjMsg(Offset); diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 83fd2de..457387b 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -456,9 +456,9 @@ static void reportDuplicate(SymbolBody *Sym, InputSectionBase *ErrSec, // >>> defined at baz.c:563 // >>> baz.o in archive libbaz.a auto *Sec1 = cast(D->Section); - std::string Src1 = Sec1->getSrcMsg(D->Value); + std::string Src1 = Sec1->getSrcMsg(*Sym, D->Value); std::string Obj1 = Sec1->getObjMsg(D->Value); - std::string Src2 = ErrSec->getSrcMsg(ErrOffset); + std::string Src2 = ErrSec->getSrcMsg(*Sym, ErrOffset); std::string Obj2 = ErrSec->getObjMsg(ErrOffset); std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at "; diff --git a/lld/test/ELF/conflict-debug-variable.s b/lld/test/ELF/conflict-debug-variable.s new file mode 100644 index 0000000..69e8665 --- /dev/null +++ b/lld/test/ELF/conflict-debug-variable.s @@ -0,0 +1,141 @@ +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o +# RUN: llvm-dwarfdump %t.o | FileCheck -check-prefix=INPUT %s +# RUN: not ld.lld %t.o %t.o -o %t 2>&1 | FileCheck %s + +# INPUT: .debug_info contents: +# INPUT: DW_TAG_variable +# INPUT-NEXT: DW_AT_name ("foo") +# INPUT-NEXT: DW_AT_decl_file ("1.c") +# INPUT-NEXT: DW_AT_decl_line (1) +# INPUT-NEXT: DW_AT_type (cu + 0x0032 "int") +# INPUT-NEXT: DW_AT_external (true) +# INPUT-NEXT: DW_AT_location (DW_OP_addr 0x0) +# INPUT: DW_TAG_variable +# INPUT-NEXT: DW_AT_name ("bar") +# INPUT-NEXT: DW_AT_decl_file ("1.c") +# INPUT-NEXT: DW_AT_decl_line (2) +# INPUT-NEXT: DW_AT_type (cu + 0x0032 "int") +# INPUT-NEXT: DW_AT_external (true) +# INPUT-NEXT: DW_AT_location (DW_OP_addr 0x0) + +## Check we use information from .debug_info in messages. +# CHECK: duplicate symbol: bar +# CHECK-NEXT: >>> defined at 1.c:2 +# CHECK-NEXT: >>> {{.*}}:(bar) +# CHECK-NEXT: >>> defined at 1.c:2 +# CHECK-NEXT: >>> {{.*}}:(.data+0x0) +# CHECK: duplicate symbol: foo +# CHECK-NEXT: >>> defined at 1.c:1 +# CHECK-NEXT: >>> {{.*}}:(foo) +# CHECK-NEXT: >>> defined at 1.c:1 +# CHECK-NEXT: >>> {{.*}}:(.bss+0x0) + +# Used reduced output from following code and gcc 7.1.0 +# to produce this input file: +# Source (1.c): +# int foo = 0; +# int bar = 1; +# Invocation: g++ -g -S 1.c + +.bss +.globl foo +.type foo, @object +.size foo, 4 +foo: + +.data +.globl bar +.type bar, @object +.size bar, 4 +bar: + +.text +.file 1 "1.c" + +.section .debug_info,"",@progbits + .long 0x4b # Compile Unit: length = 0x0000004b) + .value 0x4 # version = 0x0004 + .long 0 # abbr_offset = 0x0 + .byte 0x8 # addr_size = 0x08 + + .uleb128 0x1 # DW_TAG_compile_unit [1] * + .long 0 # DW_AT_producer [DW_FORM_strp] ( .debug_str[0x00000000] = ) + .byte 0x4 # DW_AT_language [DW_FORM_data1] (DW_LANG_C_plus_plus) + .string "1.c" # DW_AT_name [DW_FORM_string] ("1.c") + .long 0 # DW_AT_comp_dir [DW_FORM_strp] ( .debug_str[0x00000000] = ) + .long 0 # DW_AT_stmt_list [DW_FORM_sec_offset] (0x00000000) + + .uleb128 0x2 # DW_TAG_variable [2] + .string "foo" # DW_AT_name [DW_FORM_string] ("foo") + .byte 0x1 # DW_AT_decl_file [DW_FORM_data1] ("1.c") + .byte 0x1 # DW_AT_decl_line [DW_FORM_data1] (1) + .long 0x32 # DW_AT_type [DW_FORM_ref4] (cu + 0x0032 => {0x00000032}) + .uleb128 0x9 # DW_AT_external [DW_FORM_flag_present] (true) + .byte 0x3 + .quad foo # DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) + + .uleb128 0x3 # DW_TAG_base_type [3] + .byte 0x4 # DW_AT_byte_size [DW_FORM_data1] (0x04) + .byte 0x5 # DW_AT_encoding [DW_FORM_data1] (DW_ATE_signed) + .string "int" # DW_AT_name [DW_FORM_string] ("int") + + .uleb128 0x2 # DW_TAG_variable [2] + .string "bar" # DW_AT_name [DW_FORM_string] ("bar") + .byte 0x1 # DW_AT_decl_file [DW_FORM_data1] ("1.c") + .byte 0x2 # DW_AT_decl_line [DW_FORM_data1] (2) + .long 0x32 # DW_AT_type [DW_FORM_ref4] (cu + 0x0032 => {0x00000032}) + .uleb128 0x9 # DW_AT_external [DW_FORM_flag_present] (true) + .byte 0x3 + .quad bar # DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) + .byte 0 # END + + +.section .debug_abbrev,"",@progbits + .uleb128 0x1 # Abbreviation code. + .uleb128 0x11 # DW_TAG_compile_unit + + .byte 0x1 # ID + .uleb128 0x25 # DW_AT_producer, DW_FORM_strp + .uleb128 0xe + .uleb128 0x13 # DW_AT_language, DW_FORM_data1 + .uleb128 0xb + .uleb128 0x3 # DW_AT_name, DW_FORM_string + .uleb128 0x8 + .uleb128 0x1b # DW_AT_comp_dir, DW_FORM_strp + .uleb128 0xe + .uleb128 0x10 # DW_AT_stmt_list, DW_FORM_sec_offset + .uleb128 0x17 + .byte 0 + .byte 0 + + .uleb128 0x2 # ID + .uleb128 0x34 # DW_TAG_variable, DW_CHILDREN_no + .byte 0 + .uleb128 0x3 # DW_AT_name, DW_FORM_string + .uleb128 0x8 + .uleb128 0x3a # DW_AT_decl_file, DW_FORM_data1 + .uleb128 0xb + .uleb128 0x3b # DW_AT_decl_line, DW_FORM_data1 + .uleb128 0xb + .uleb128 0x49 # DW_AT_type, DW_FORM_ref4 + .uleb128 0x13 + .uleb128 0x3f # DW_AT_external, DW_FORM_flag_present + .uleb128 0x19 + .uleb128 0x2 # DW_AT_location, DW_FORM_exprloc + .uleb128 0x18 + .byte 0 + .byte 0 + + .uleb128 0x3 # ID + .uleb128 0x24 # DW_TAG_base_type, DW_CHILDREN_no + .byte 0 + .uleb128 0xb # DW_AT_byte_size, DW_FORM_data1 + .uleb128 0xb + .uleb128 0x3e # DW_AT_encoding, DW_FORM_data1 + .uleb128 0xb + .uleb128 0x3 # DW_AT_name, DW_FORM_string + .uleb128 0x8 + .byte 0 + .byte 0 + .byte 0 + -- 2.7.4