From: wlei Date: Tue, 22 Feb 2022 20:09:01 +0000 (-0800) Subject: [llvm-profgen] Support symbol loading for debug fission X-Git-Tag: upstream/15.0.7~15519 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b3a778fb5eca0e6032f3d62c03a673c2431be8a0;p=platform%2Fupstream%2Fllvm.git [llvm-profgen] Support symbol loading for debug fission Support to load debug info from dwarf split file, like .dwo, .dwp files. Leverage the `getNonSkeletonUnitDIE(false)` API to achieve this. Add test cause to make sure all the ranges is well retrieved by the loader. Reviewed By: ayermolo, hoy, wenlei Differential Revision: https://reviews.llvm.org/D115973 --- diff --git a/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.exe b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.exe new file mode 100755 index 0000000..86a7775 Binary files /dev/null and b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.exe differ diff --git a/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.o.yaml b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.o.yaml new file mode 100644 index 0000000..a3ace75 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.o.yaml @@ -0,0 +1,92 @@ +# Source code: + +# int foo() { +# return 1; +# } +# +# int main() { +# foo(); +# return 0; +# } + +# Build instructions (clang version 15.0.0): +# split-dwarf-single.o: clang -gsplit-dwarf=single -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -O0 -g -o split-dwarf-single.o -c +# split-dwarf-single.o.yaml: obj2yaml split-dwarf-single.o > split-dwarf-single.o.yaml + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SectionHeaderStringTable: .strtab +Sections: + - Name: .debug_abbrev + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 014A00101772171B25B442197625111B12067317000000 + - Name: .debug_info + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 2400000005000408000000000428758115ED87CF0100000000000000000001002C00000000000000 + - Name: .debug_str_offsets + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 0C000000050000000000000000000000 + - Name: .debug_str_offsets.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 1C000000050000000000000004000000080000000D000000760000007D000000 + - Name: .debug_str.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 666F6F00696E74006D61696E00636C616E672076657273696F6E2031352E302E30202868747470733A2F2F6769746875622E636F6D2F6C6C766D2F6C6C766D2D70726F6A6563742E67697420313565336538396239626162323838363862303930656539663336326161386630333233303934372900746573742E630073706C69742D64776172662D73696E676C652E6F00 + - Name: .debug_info.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 3900000005000508000000000428758115ED87CF01030C00040502000B00000001560000013800000002011C0000000156020005380000000301050400 + - Name: .debug_abbrev.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 01110125251305032576250000022E00111B1206401803253A0B3B0B49133F19000003240003253E0B0B0B000000 + - Name: .debug_line + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 5E0000000500080037000000010101FB0E0D00010101010000000100000101011F010000000003011F020F051E010000000000EF173AFD4B2F5E20815DE19BD24360F4040000090200000000000000000105030A4B0500BD05030AE5590208000101 + - Name: .debug_line_str + Type: SHT_PROGBITS + Flags: [ SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 2E00746573742E6300 + - Type: SectionHeaderTable + Sections: + - Name: .strtab + - Name: .debug_abbrev + - Name: .debug_info + - Name: .debug_str_offsets + - Name: .debug_str + - Name: .debug_str_offsets.dwo + - Name: .debug_str.dwo + - Name: .debug_info.dwo + - Name: .debug_abbrev.dwo + - Name: .debug_addr + - Name: .debug_line + - Name: .debug_line_str +DWARF: + debug_str: + - . + - split-dwarf-single.o + debug_addr: + - Length: 0x14 + Version: 0x5 + AddressSize: 0x8 + Entries: + - {} + - Address: 0x10 +... diff --git a/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.dwo.yaml b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.dwo.yaml new file mode 100644 index 0000000..688011a --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.dwo.yaml @@ -0,0 +1,52 @@ +# Source code: + +# int foo() { +# return 1; +# } +# +# int main() { +# foo(); +# return 0; +# } + +# Build instructions (clang version 15.0.0): +# split-dwarf-split.dwo: clang -gsplit-dwarf=split -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -O0 -g -o split-dwarf-split.o -c +# split-dwarf-split.dwo.yaml: obj2yaml split-dwarf-split.dwo > split-dwarf-split.dwo.yaml + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SectionHeaderStringTable: .strtab +Sections: + - Name: .debug_str_offsets.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 1C000000050000000000000004000000080000000D000000760000007D000000 + - Name: .debug_str.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 666F6F00696E74006D61696E00636C616E672076657273696F6E2031352E302E30202868747470733A2F2F6769746875622E636F6D2F6C6C766D2F6C6C766D2D70726F6A6563742E67697420313565336538396239626162323838363862303930656539663336326161386630333233303934372900746573742E630073706C69742D64776172662D73706C69742E64776F00 + - Name: .debug_info.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 3900000005000508000000000428758115ED87CF01030C00040502000B00000001560000013800000002011C0000000156020005380000000301050400 + - Name: .debug_abbrev.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 01110125251305032576250000022E00111B1206401803253A0B3B0B49133F19000003240003253E0B0B0B000000 + - Type: SectionHeaderTable + Sections: + - Name: .strtab + - Name: .debug_str_offsets.dwo + - Name: .debug_str.dwo + - Name: .debug_info.dwo + - Name: .debug_abbrev.dwo +... diff --git a/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.exe b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.exe new file mode 100755 index 0000000..4d0ff57 Binary files /dev/null and b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.exe differ diff --git a/llvm/test/tools/llvm-profgen/split-dwarf.test b/llvm/test/tools/llvm-profgen/split-dwarf.test new file mode 100644 index 0000000..09daea7 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/split-dwarf.test @@ -0,0 +1,50 @@ +; RUN: rm -rf %t +; RUN: mkdir -p %t +; RUN: cd %t + +; RUN: echo -e "0\n0" > %t.prof + +; Test -gsplit-dwarf=single +; RUN: cp %S/Inputs/split-dwarf-single.exe %t/split-dwarf-single.exe +; RUN: yaml2obj %S/Inputs/split-dwarf-single.o.yaml -o %t/split-dwarf-single.o +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/split-dwarf-single.exe --output=%t1 --fill-zero-for-all-funcs +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-SPLIT-DWARF + +; Test -gsplit-dwarf=split +; RUN: cp %S/Inputs/split-dwarf-split.exe %t/split-dwarf-split.exe +; RUN: yaml2obj %S/Inputs/split-dwarf-split.dwo.yaml -o %t/split-dwarf-split.dwo +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/split-dwarf-split.exe --output=%t2 --fill-zero-for-all-funcs +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-SPLIT-DWARF +; Test --dwp +; RUN: llvm-dwp %t/split-dwarf-split.dwo -o %t/split-dwarf-split.dwp +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --dwp=%t/split-dwarf-split.dwp --binary=%t/split-dwarf-split.exe --output=%t3 --fill-zero-for-all-funcs +; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-SPLIT-DWARF + +; Make sure that all function ranges are well retrieved and have been populated with zero. + +; CHECK-SPLIT-DWARF: foo:0:0 +; CHECK-SPLIT-DWARF-NEXT: 0: 0 +; CHECK-SPLIT-DWARF-NEXT: 1: 0 +; CHECK-SPLIT-DWARF-NEXT: main:0:0 +; CHECK-SPLIT-DWARF-NEXT: 0: 0 +; CHECK-SPLIT-DWARF-NEXT: 1: 0 +; CHECK-SPLIT-DWARF-NEXT: 2: 0 + + +; Build instructions: +; split-dwarf-single.o: clang -gsplit-dwarf=single -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -O0 -g -o split-dwarf-single.o -c +; split-dwarf-single.exe: clang -fdebug-compilation-dir=. split-dwarf-single.o -o split-dwarf-single.exe -fdebug-info-for-profiling -O0 -g + +; split-dwarf-split.dwo: clang -gsplit-dwarf=split -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -O0 -g -o split-dwarf-split.o -c +; split-dwarf-split.exe: clang -fdebug-compilation-dir=. split-dwarf-split.o -o split-dwarf-split.exe -fdebug-info-for-profiling -O0 -g + +; Source code: + +int foo() { + return 1; +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 3430b03..dca6906 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -43,6 +43,11 @@ static cl::opt UseDwarfCorrelation( cl::desc("Use dwarf for profile correlation even when binary contains " "pseudo probe.")); +static cl::opt + DWPPath("dwp", cl::init(""), cl::ZeroOrMore, + cl::desc("Path of .dwp file. When not specified, it will be " + ".dwp in the same directory as the main binary.")); + static cl::list DisassembleFunctions( "disassemble-functions", cl::CommaSeparated, cl::desc("List of functions to print disassembly for. Accept demangled " @@ -610,69 +615,94 @@ void ProfiledBinary::checkUseFSDiscriminator( } } -void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) { - auto DebugContext = llvm::DWARFContext::create(Obj); - if (!DebugContext) - exitWithError("Misssing debug info.", Path); +void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { + for (const auto &DieInfo : CompilationUnit.dies()) { + llvm::DWARFDie Die(&CompilationUnit, &DieInfo); - for (const auto &CompilationUnit : DebugContext->compile_units()) { - for (const auto &DieInfo : CompilationUnit->dies()) { - llvm::DWARFDie Die(CompilationUnit.get(), &DieInfo); + if (!Die.isSubprogramDIE()) + continue; + auto Name = Die.getName(llvm::DINameKind::LinkageName); + if (!Name) + Name = Die.getName(llvm::DINameKind::ShortName); + if (!Name) + continue; - if (!Die.isSubprogramDIE()) - continue; - auto Name = Die.getName(llvm::DINameKind::LinkageName); - if (!Name) - Name = Die.getName(llvm::DINameKind::ShortName); - if (!Name) - continue; + auto RangesOrError = Die.getAddressRanges(); + if (!RangesOrError) + continue; + const DWARFAddressRangesVector &Ranges = RangesOrError.get(); - auto RangesOrError = Die.getAddressRanges(); - if (!RangesOrError) - continue; - const DWARFAddressRangesVector &Ranges = RangesOrError.get(); + if (Ranges.empty()) + continue; + + // Different DWARF symbols can have same function name, search or create + // BinaryFunction indexed by the name. + auto Ret = BinaryFunctions.emplace(Name, BinaryFunction()); + auto &Func = Ret.first->second; + if (Ret.second) + Func.FuncName = Ret.first->first; - if (Ranges.empty()) + for (const auto &Range : Ranges) { + uint64_t FuncStart = Range.LowPC; + uint64_t FuncSize = Range.HighPC - FuncStart; + + if (FuncSize == 0 || FuncStart < getPreferredBaseAddress()) continue; - // Different DWARF symbols can have same function name, search or create - // BinaryFunction indexed by the name. - auto Ret = BinaryFunctions.emplace(Name, BinaryFunction()); - auto &Func = Ret.first->second; - if (Ret.second) - Func.FuncName = Ret.first->first; - - for (const auto &Range : Ranges) { - uint64_t FuncStart = Range.LowPC; - uint64_t FuncSize = Range.HighPC - FuncStart; - - if (FuncSize == 0 || FuncStart < getPreferredBaseAddress()) - continue; - - uint64_t StartOffset = FuncStart - getPreferredBaseAddress(); - uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress(); - - // We may want to know all ranges for one function. Here group the - // ranges and store them into BinaryFunction. - Func.Ranges.emplace_back(StartOffset, EndOffset); - - auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange()); - if (R.second) { - FuncRange &FRange = R.first->second; - FRange.Func = &Func; - FRange.StartOffset = StartOffset; - FRange.EndOffset = EndOffset; - } else { - WithColor::warning() - << "Duplicated symbol start address at " - << format("%8" PRIx64, StartOffset + getPreferredBaseAddress()) - << " " << R.first->second.getFuncName() << " and " << Name - << "\n"; - } + uint64_t StartOffset = FuncStart - getPreferredBaseAddress(); + uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress(); + + // We may want to know all ranges for one function. Here group the + // ranges and store them into BinaryFunction. + Func.Ranges.emplace_back(StartOffset, EndOffset); + + auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange()); + if (R.second) { + FuncRange &FRange = R.first->second; + FRange.Func = &Func; + FRange.StartOffset = StartOffset; + FRange.EndOffset = EndOffset; + } else { + WithColor::warning() + << "Duplicated symbol start address at " + << format("%8" PRIx64, StartOffset + getPreferredBaseAddress()) + << " " << R.first->second.getFuncName() << " and " << Name << "\n"; } } } - assert(!StartOffset2FuncRangeMap.empty() && "Misssing debug info."); +} + +void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) { + auto DebugContext = llvm::DWARFContext::create( + Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, DWPPath); + if (!DebugContext) + exitWithError("Error creating the debug info context", Path); + + for (const auto &CompilationUnit : DebugContext->compile_units()) + loadSymbolsFromDWARFUnit(*CompilationUnit.get()); + + // Handles DWO sections that can either be in .o, .dwo or .dwp files. + for (const auto &CompilationUnit : DebugContext->compile_units()) { + DWARFUnit *const DwarfUnit = CompilationUnit.get(); + if (llvm::Optional DWOId = DwarfUnit->getDWOId()) { + DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); + if (!DWOCU->isDWOUnit()) { + std::string DWOName = dwarf::toString( + DwarfUnit->getUnitDIE().find( + {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), + ""); + WithColor::warning() + << "DWO debug information for " << DWOName + << " was not loaded. Please check the .o, .dwo or .dwp path.\n"; + continue; + } + loadSymbolsFromDWARFUnit(*DWOCU); + } + } + + if (BinaryFunctions.empty()) + WithColor::warning() << "Loading of DWARF info completed, but no binary " + "functions have been retrieved.\n"; } void ProfiledBinary::populateSymbolListFromDWARF( @@ -689,6 +719,7 @@ void ProfiledBinary::setupSymbolizer() { SymbolizerOpts.DefaultArch = TheTriple.getArchName().str(); SymbolizerOpts.UseSymbolTable = false; SymbolizerOpts.RelativeAddresses = false; + SymbolizerOpts.DWPName = DWPPath; Symbolizer = std::make_unique(SymbolizerOpts); } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 33b0b81..d359f79 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -290,6 +290,9 @@ class ProfiledBinary { // Load debug info of subprograms from DWARF section. void loadSymbolsFromDWARF(ObjectFile &Obj); + // Load debug info from DWARF unit. + void loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit); + // A function may be spilt into multiple non-continuous address ranges. We use // this to set whether start offset of a function is the real entry of the // function and also set false to the non-function label.