From: Jez Ng Date: Tue, 8 Nov 2022 21:33:22 +0000 (-0500) Subject: [lld-macho] Overhaul map file code X-Git-Tag: upstream/17.0.6~28178 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=213dbdbef0bad835abca0753f9e59b17dc2bcde2;p=platform%2Fupstream%2Fllvm.git [lld-macho] Overhaul map file code The previous map file code left out was modeled after LLD-ELF's implementation. However, ld64's map file differs quite a bit from LLD-ELF's. I've revamped our map file implementation so it is better able to emit ld64-style map files. Notable differences: * ld64 doesn't demangle symbols in map files, regardless of whether `-demangle` is passed. So we don't have to bother with `getSymbolStrings()`. * ld64 doesn't emit symbols in cstring sections; it emits just the literal values. Moreover, it emits these literal values regardless of whether they are labeled with a symbol. * ld64 emits map file entries for things that are not strictly symbols, such as unwind info, GOT entries, etc. That isn't handled in this diff, but this redesign makes them easy to implement. Additionally, the previous implementation sorted the symbols so as to emit them in address order. This was slow and unnecessary -- the symbols can already be traversed in address order by walking the list of OutputSections. This brings significant speedups. Here's the numbers from the chromium_framework_less_dwarf benchmark on my Mac Pro, with the `-map` argument added to the response file: base diff difference (95% CI) sys_time 2.922 ± 0.059 2.950 ± 0.085 [ -0.7% .. +2.5%] user_time 11.464 ± 0.191 8.290 ± 0.123 [ -28.7% .. -26.7%] wall_time 11.235 ± 0.175 9.184 ± 0.169 [ -19.3% .. -17.2%] samples 16 23 (It's worth noting that map files are written in parallel with the output binary, but they often took longer to write than the binary itself.) Finally, I did further cleanups to the map-file.s test -- there was no real need to have a custom-named section. There were also alt_entry symbol declarations that had no corresponding definition. Either way, neither custom-named sections nor alt_entry symbols trigger special code paths in our map file implementation. Reviewed By: #lld-macho, Roger Differential Revision: https://reviews.llvm.org/D137368 --- diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp index 8f1b6a1..5d6c87b 100644 --- a/lld/MachO/MapFile.cpp +++ b/lld/MachO/MapFile.cpp @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file implements the -map option. It shows lists in order and -// hierarchically the outputFile, arch, input files, output sections and -// symbols: +// This file implements the -map option, which maps address ranges to their +// respective contents, plus the input file these contents were originally from. +// The contents (typically symbols) are listed in address order. Dead-stripped +// contents are included as well. // // # Path: test // # Arch: x86_84 @@ -28,15 +29,16 @@ //===----------------------------------------------------------------------===// #include "MapFile.h" +#include "ConcatOutputSection.h" #include "Config.h" #include "InputFiles.h" #include "InputSection.h" -#include "OutputSection.h" #include "OutputSegment.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/TimeProfiler.h" @@ -45,69 +47,75 @@ using namespace llvm::sys; using namespace lld; using namespace lld::macho; +struct CStringInfo { + uint32_t fileIndex; + StringRef str; +}; + struct MapInfo { SmallVector files; - SmallVector liveSymbols; SmallVector deadSymbols; + DenseMap>> + liveCStringsForSection; + SmallVector deadCStrings; }; static MapInfo gatherMapInfo() { MapInfo info; for (InputFile *file : inputFiles) if (isa(file) || isa(file)) { - bool hasEmittedSymbol = false; + uint32_t fileIndex = info.files.size() + 1; + bool isReferencedFile = false; + + // Gather the dead symbols. We don't have to bother with the live ones + // because we will pick them up as we iterate over the OutputSections + // later. for (Symbol *sym : file->symbols) { if (auto *d = dyn_cast_or_null(sym)) - if (d->isec && d->getFile() == file) { - if (d->isLive()) { - assert(!shouldOmitFromOutput(d->isec)); - info.liveSymbols.push_back(d); - } else { + // Only emit the prevailing definition of a symbol. Also, don't emit + // the symbol if it is part of a cstring section (we use the literal + // value instead, similar to ld64) + if (d->isec && d->getFile() == file && + !isa(d->isec)) { + isReferencedFile = true; + if (!d->isLive()) info.deadSymbols.push_back(d); + } + } + + // Gather all the cstrings (both live and dead). A CString(Output)Section + // doesn't provide us a way of figuring out which InputSections its + // cstring contents came from, so we need to build up that mapping here. + for (const Section *sec : file->sections) { + for (const Subsection &subsec : sec->subsections) { + if (auto isec = dyn_cast(subsec.isec)) { + auto &liveCStrings = info.liveCStringsForSection[isec->parent]; + for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { + if (piece.live) + liveCStrings.push_back({isec->parent->addr + piece.outSecOff, + {fileIndex, isec->getStringRef(i)}}); + else + info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)}); + isReferencedFile = true; } - hasEmittedSymbol = true; + } else { + break; } + } } - if (hasEmittedSymbol) - info.files.push_back(file); - } - parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(), - [](Defined *a, Defined *b) { return a->getVA() < b->getVA(); }); - return info; -} -// Construct a map from symbols to their stringified representations. -// Demangling symbols (which is what toString() does) is slow, so -// we do that in batch using parallel-for. -static DenseMap -getSymbolStrings(ArrayRef syms) { - std::vector str(syms.size()); - parallelFor(0, syms.size(), [&](size_t i) { - raw_string_ostream os(str[i]); - Defined *sym = syms[i]; - - switch (sym->isec->kind()) { - case InputSection::CStringLiteralKind: { - // Output "literal string: " - const auto *isec = cast(sym->isec); - const StringPiece &piece = isec->getStringPiece(sym->value); - assert( - sym->value == piece.inSecOff && - "We expect symbols to always point to the start of a StringPiece."); - StringRef str = isec->getStringRef(&piece - &(*isec->pieces.begin())); - (os << "literal string: ").write_escaped(str); - break; - } - case InputSection::ConcatKind: - case InputSection::WordLiteralKind: - os << toString(*sym); + if (isReferencedFile) + info.files.push_back(file); } - }); - DenseMap ret; - for (size_t i = 0, e = syms.size(); i < e; ++i) - ret[syms[i]] = std::move(str[i]); - return ret; + // cstrings are not stored in sorted order in their OutputSections, so we sort + // them here. + for (auto &liveCStrings : info.liveCStringsForSection) + parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) { + return p1.first < p2.first; + }); + return info; } void macho::writeMapFile() { @@ -124,16 +132,12 @@ void macho::writeMapFile() { return; } - // Dump output path. os << format("# Path: %s\n", config->outputFile.str().c_str()); - - // Dump output architecture. os << format("# Arch: %s\n", getArchitectureName(config->arch()).str().c_str()); MapInfo info = gatherMapInfo(); - // Dump table of object files. os << "# Object files:\n"; os << format("[%3u] %s\n", 0, (const char *)"linker synthesized"); uint32_t fileIndex = 1; @@ -143,7 +147,6 @@ void macho::writeMapFile() { readerToFileOrdinal[file] = fileIndex++; } - // Dump table of sections os << "# Sections:\n"; os << "# Address\tSize \tSegment\tSection\n"; for (OutputSegment *seg : outputSegments) @@ -155,28 +158,48 @@ void macho::writeMapFile() { seg->name.str().c_str(), osec->name.str().c_str()); } - // Dump table of symbols - DenseMap liveSymbolStrings = - getSymbolStrings(info.liveSymbols); os << "# Symbols:\n"; os << "# Address\tSize \tFile Name\n"; - for (Defined *sym : info.liveSymbols) { - assert(sym->isLive()); - os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size, - readerToFileOrdinal[sym->getFile()], - liveSymbolStrings[sym].c_str()); + for (const OutputSegment *seg : outputSegments) { + for (const OutputSection *osec : seg->getSections()) { + if (auto *concatOsec = dyn_cast(osec)) { + for (const InputSection *isec : concatOsec->inputs) { + for (Defined *sym : isec->symbols) + os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), + sym->size, readerToFileOrdinal[sym->getFile()], + sym->getName().str().data()); + } + } else if (osec == in.cStringSection || osec == in.objcMethnameSection) { + const auto &liveCStrings = info.liveCStringsForSection.lookup(osec); + uint64_t lastAddr = 0; // strings will never start at address 0, so this + // is a sentinel value + for (const auto &[addr, info] : liveCStrings) { + uint64_t size = 0; + if (addr != lastAddr) + size = info.str.size() + 1; // include null terminator + lastAddr = addr; + os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size, + info.fileIndex); + os.write_escaped(info.str) << "\n"; + } + } + // TODO print other synthetic sections + } } if (config->deadStrip) { - DenseMap deadSymbolStrings = - getSymbolStrings(info.deadSymbols); os << "# Dead Stripped Symbols:\n"; os << "# \tSize \tFile Name\n"; for (Defined *sym : info.deadSymbols) { assert(!sym->isLive()); os << format("<>\t0x%08llX\t[%3u] %s\n", sym->size, readerToFileOrdinal[sym->getFile()], - deadSymbolStrings[sym].c_str()); + sym->getName().str().data()); + } + for (CStringInfo &cstrInfo : info.deadCStrings) { + os << format("<>\t0x%08llX\t[%3u] literal string: ", + cstrInfo.str.size() + 1, cstrInfo.fileIndex); + os.write_escaped(cstrInfo.str) << "\n"; } } } diff --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s index ac5ae9d..fe1ef88 100644 --- a/lld/test/MachO/map-file.s +++ b/lld/test/MachO/map-file.s @@ -4,23 +4,24 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/c-string-literal.s -o %t/c-string-literal.o -# RUN: %lld -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o --time-trace -o %t/test +# RUN: %lld -demangle -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o \ +# RUN: --time-trace -o %t/test # RUN: llvm-objdump --syms --section-headers %t/test > %t/objdump -# RUN: cat %t/objdump %t/map > %t/out -# RUN: FileCheck %s < %t/out +## Check that symbols in cstring sections aren't emitted +# RUN: cat %t/objdump %t/map | FileCheck %s --implicit-check-not _hello_world # RUN: FileCheck %s --check-prefix=MAPFILE < %t/test.time-trace # CHECK: Sections: -# CHECK-NEXT: Idx Name Size VMA Type -# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT -# CHECK-NEXT: 1 obj {{[0-9a-f]+}} [[#%x,DATA:]] TEXT -# CHECK-NEXT: 2 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA -# CHECK-NEXT: 3 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS +# CHECK-NEXT: Idx Name Size VMA Type +# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT +# CHECK-NEXT: 1 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA +# CHECK-NEXT: 2 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS # CHECK: SYMBOL TABLE: # CHECK-DAG: [[#%x,MAIN:]] g F __TEXT,__text _main # CHECK-DAG: [[#%x,NUMBER:]] g O __DATA,__common _number -# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,obj _foo +# CHECK-DAG: [[#%x,BAR:]] g F __TEXT,__text _bar +# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,__text __ZTIN3foo3bar4MethE # CHECK-DAG: [[#%x,HIWORLD:]] g O __TEXT,__cstring _hello_world # CHECK-DAG: [[#%x,HIITSME:]] g O __TEXT,__cstring _hello_its_me @@ -35,43 +36,50 @@ # CHECK-NEXT: # Sections: # CHECK-NEXT: # Address Size Segment Section # CHECK-NEXT: 0x[[#%X,TEXT]] 0x{{[0-9A-F]+}} __TEXT __text -# CHECK-NEXT: 0x[[#%X,DATA]] 0x{{[0-9A-F]+}} __TEXT obj # CHECK-NEXT: 0x[[#%X,CSTR]] 0x{{[0-9A-F]+}} __TEXT __cstring # CHECK-NEXT: 0x[[#%X,BSS]] 0x{{[0-9A-F]+}} __DATA __common # CHECK-NEXT: # Symbols: -# CHECK-NEXT: # Address Size File Name -# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main -# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] _foo -# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n -# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me -# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number +# CHECK-NEXT: # Address Size File Name +# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main +# CHECK-DAG: 0x[[#%X,BAR]] 0x00000001 [ 1] _bar +# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] __ZTIN3foo3bar4MethE +# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n +# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me +# CHECK-DAG: 0x[[#%X,HIITSME + 0xf]] 0x0000000E [ 3] literal string: Hello world!\n +# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number # MAPFILE: "name":"Total Write map file" -# RUN: %lld -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped +# RUN: %lld -demangle -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped # RUN: FileCheck --check-prefix=STRIPPED %s < %t/stripped-map ## C-string literals should be printed as "literal string: " # STRIPPED-LABEL: Dead Stripped Symbols: -# STRIPPED-DAG: <> 0x00000001 [ 2] _foo -# STRIPPED-DAG: <> 0x0000000E [ 3] literal string: Hello world!\n -# STRIPPED-DAG: <> 0x0000000F [ 3] literal string: Hello, it's me -# STRIPPED-DAG: <> 0x00000001 [ 1] _number +# STRIPPED-DAG: <> 0x00000001 [ 1] _bar +# STRIPPED-DAG: <> 0x00000001 [ 1] _number +# STRIPPED-DAG: <> 0x00000001 [ 2] __ZTIN3foo3bar4MethE +# STRIPPED-DAG: <> 0x0000000E [ 3] literal string: Hello world!\n +# STRIPPED-DAG: <> 0x0000000F [ 3] literal string: Hello, it's me +# STRIPPED-DAG: <> 0x0000000E [ 3] literal string: Hello world!\n # RUN: %lld --icf=all -map %t/icf-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/icf # RUN: FileCheck --check-prefix=ICF %s < %t/icf-map +## Verify that folded symbols and cstrings have size zero. Note that ld64 prints +## folded symbols but not folded cstrings; we print both. + # ICF: Symbols: -# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] _foo -# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar +# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] __ZTIN3foo3bar4MethE +# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar +# ICF-DAG: 0x[[#%X,HIWORLD:]] 0x0000000E [ 3] literal string: Hello world!\n +# ICF-DAG: 0x[[#%X,HIWORLD]] 0x00000000 [ 3] literal string: Hello world!\n #--- foo.s -## ICF will only fold sections marked as pure_instructions -.section __TEXT,obj,regular,pure_instructions -.globl _foo -.alt_entry _alt_foo -_foo: +.globl __ZTIN3foo3bar4MethE +## This C++ symbol makes it clear that we do not print the demangled name in +## the map file, even if `-demangle` is passed. +__ZTIN3foo3bar4MethE: nop .subsections_via_symbols @@ -79,12 +87,10 @@ _foo: #--- test.s .comm _number, 1 .globl _main, _bar -.alt_entry _alt_bar _main: ret -.section __TEXT,obj,regular,pure_instructions _bar: nop @@ -101,4 +107,6 @@ _hello_world: _hello_its_me: .asciz "Hello, it's me" +.asciz "Hello world!\n" + .subsections_via_symbols