From 3a1b3c9afe74ef09a16b2ceefeaad854acf3fe7d Mon Sep 17 00:00:00 2001 From: Greg McGary Date: Thu, 4 Nov 2021 20:55:31 -0700 Subject: [PATCH] [lld-macho][nfc] rename parsed-section types & variables This is an NFC diff that prepares for pruning & relocating `__eh_frame`. Along the way, I made the following changes to ... * clarify usage of `section` vs. `subsection` * remove `map` & `vec` from type names * disambiguate class `Section` from template parameter `SectionHeader`. Differential Revision: https://reviews.llvm.org/D113241 --- lld/MachO/Driver.cpp | 12 ++-- lld/MachO/InputFiles.cpp | 150 +++++++++++++++++++++------------------- lld/MachO/InputFiles.h | 24 ++++--- lld/MachO/ObjC.cpp | 15 ++-- lld/MachO/SyntheticSections.cpp | 15 ++-- lld/MachO/Writer.cpp | 6 +- 6 files changed, 120 insertions(+), 102 deletions(-) diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index ddd84bc..37955c9 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1021,10 +1021,10 @@ static void gatherInputSections() { TimeTraceScope timeScope("Gathering input sections"); int inputOrder = 0; for (const InputFile *file : inputFiles) { - for (const SubsectionMap &map : file->subsections) { + for (const Section §ion : file->sections) { ConcatOutputSection *osec = nullptr; - for (const SubsectionEntry &entry : map) { - if (auto *isec = dyn_cast(entry.isec)) { + for (const Subsection &subsection : section.subsections) { + if (auto *isec = dyn_cast(subsection.isec)) { if (isec->isCoalescedWeak()) continue; if (isec->getSegName() == segment_names::ld) { @@ -1036,11 +1036,13 @@ static void gatherInputSections() { osec = ConcatOutputSection::getOrCreateForInput(isec); isec->parent = osec; inputSections.push_back(isec); - } else if (auto *isec = dyn_cast(entry.isec)) { + } else if (auto *isec = + dyn_cast(subsection.isec)) { if (in.cStringSection->inputOrder == UnspecifiedInputOrder) in.cStringSection->inputOrder = inputOrder++; in.cStringSection->addInput(isec); - } else if (auto *isec = dyn_cast(entry.isec)) { + } else if (auto *isec = + dyn_cast(subsection.isec)) { if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) in.wordLiteralSection->inputOrder = inputOrder++; in.wordLiteralSection->addInput(isec); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 769fddd..893bf26 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -257,12 +257,14 @@ static Optional getRecordSize(StringRef segname, StringRef name) { return {}; } -template -void ObjFile::parseSections(ArrayRef
sections) { - subsections.reserve(sections.size()); +// Parse the sequence of sections within a single LC_SEGMENT(_64). +// Split each section into subsections. +template +void ObjFile::parseSections(ArrayRef sectionHeaders) { + sections.reserve(sectionHeaders.size()); auto *buf = reinterpret_cast(mb.getBufferStart()); - for (const Section &sec : sections) { + for (const SectionHeader &sec : sectionHeaders) { StringRef name = StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname))); StringRef segname = @@ -273,27 +275,27 @@ void ObjFile::parseSections(ArrayRef
sections) { if (sec.align >= 32) { error("alignment " + std::to_string(sec.align) + " of section " + name + " is too large"); - subsections.push_back({}); + sections.push_back({}); continue; } uint32_t align = 1 << sec.align; uint32_t flags = sec.flags; auto splitRecords = [&](int recordSize) -> void { - subsections.push_back({}); + sections.push_back({}); if (data.empty()) return; - SubsectionMap &subsecMap = subsections.back(); - subsecMap.reserve(data.size() / recordSize); + Subsections &subsections = sections.back().subsections; + subsections.reserve(data.size() / recordSize); auto *isec = make( segname, name, this, data.slice(0, recordSize), align, flags); - subsecMap.push_back({0, isec}); + subsections.push_back({0, isec}); for (uint64_t off = recordSize; off < data.size(); off += recordSize) { // Copying requires less memory than constructing a fresh InputSection. auto *copy = make(*isec); copy->data = data.slice(off, recordSize); - subsecMap.push_back({off, copy}); + subsections.push_back({off, copy}); } }; @@ -315,7 +317,8 @@ void ObjFile::parseSections(ArrayRef
sections) { isec = make(segname, name, this, data, align, flags); } - subsections.push_back({{0, isec}}); + sections.push_back({}); + sections.back().subsections.push_back({0, isec}); } else if (auto recordSize = getRecordSize(segname, name)) { splitRecords(*recordSize); } else if (segname == segment_names::llvm) { @@ -323,9 +326,9 @@ void ObjFile::parseSections(ArrayRef
sections) { // segment. Symbols within those sections point to bitcode metadata // instead of actual symbols. Global symbols within those sections could // have the same name without causing duplicate symbol errors. Push an - // empty map to ensure indices line up for the remaining sections. + // empty entry to ensure indices line up for the remaining sections. // TODO: Evaluate whether the bitcode metadata is needed. - subsections.push_back({}); + sections.push_back({}); } else { auto *isec = make(segname, name, this, data, align, flags); @@ -334,11 +337,12 @@ void ObjFile::parseSections(ArrayRef
sections) { // Instead of emitting DWARF sections, we emit STABS symbols to the // object files that contain them. We filter them out early to avoid // parsing their relocations unnecessarily. But we must still push an - // empty map to ensure the indices line up for the remaining sections. - subsections.push_back({}); + // empty entry to ensure the indices line up for the remaining sections. + sections.push_back({}); debugSections.push_back(isec); } else { - subsections.push_back({{0, isec}}); + sections.push_back({}); + sections.back().subsections.push_back({0, isec}); } } } @@ -351,18 +355,17 @@ void ObjFile::parseSections(ArrayRef
sections) { // any subsection splitting has occurred). It will be updated to represent the // same location as an offset relative to the start of the containing // subsection. -static InputSection *findContainingSubsection(SubsectionMap &map, +static InputSection *findContainingSubsection(Subsections &subsections, uint64_t *offset) { auto it = std::prev(llvm::upper_bound( - map, *offset, [](uint64_t value, SubsectionEntry subsecEntry) { - return value < subsecEntry.offset; - })); + subsections, *offset, + [](uint64_t value, Subsection subsec) { return value < subsec.offset; })); *offset -= it->offset; return it->isec; } -template -static bool validateRelocationInfo(InputFile *file, const Section &sec, +template +static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec, relocation_info rel) { const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type); bool valid = true; @@ -393,14 +396,15 @@ static bool validateRelocationInfo(InputFile *file, const Section &sec, return valid; } -template -void ObjFile::parseRelocations(ArrayRef
sectionHeaders, - const Section &sec, SubsectionMap &subsecMap) { +template +void ObjFile::parseRelocations(ArrayRef sectionHeaders, + const SectionHeader &sec, + Subsections &subsections) { auto *buf = reinterpret_cast(mb.getBufferStart()); ArrayRef relInfos( reinterpret_cast(buf + sec.reloff), sec.nreloc); - auto subsecIt = subsecMap.rbegin(); + auto subsecIt = subsections.rbegin(); for (size_t i = 0; i < relInfos.size(); i++) { // Paired relocations serve as Mach-O's method for attaching a // supplemental datum to a primary relocation record. ELF does not @@ -454,7 +458,8 @@ void ObjFile::parseRelocations(ArrayRef
sectionHeaders, r.addend = isSubtrahend ? 0 : totalAddend; } else { assert(!isSubtrahend); - const Section &referentSec = sectionHeaders[relInfo.r_symbolnum - 1]; + const SectionHeader &referentSecHead = + sectionHeaders[relInfo.r_symbolnum - 1]; uint64_t referentOffset; if (relInfo.r_pcrel) { // The implicit addend for pcrel section relocations is the pcrel offset @@ -464,14 +469,16 @@ void ObjFile::parseRelocations(ArrayRef
sectionHeaders, // have pcrel section relocations. We may want to factor this out into // the arch-specific .cpp file. assert(target->hasAttr(r.type, RelocAttrBits::BYTE4)); - referentOffset = - sec.addr + relInfo.r_address + 4 + totalAddend - referentSec.addr; + referentOffset = sec.addr + relInfo.r_address + 4 + totalAddend - + referentSecHead.addr; } else { // The addend for a non-pcrel relocation is its absolute address. - referentOffset = totalAddend - referentSec.addr; + referentOffset = totalAddend - referentSecHead.addr; } - SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1]; - r.referent = findContainingSubsection(referentSubsecMap, &referentOffset); + Subsections &referentSubsections = + sections[relInfo.r_symbolnum - 1].subsections; + r.referent = + findContainingSubsection(referentSubsections, &referentOffset); r.addend = referentOffset; } @@ -481,14 +488,14 @@ void ObjFile::parseRelocations(ArrayRef
sectionHeaders, // unsorted relocations (in `-r` mode), so we have a fallback for that // uncommon case. InputSection *subsec; - while (subsecIt != subsecMap.rend() && subsecIt->offset > r.offset) + while (subsecIt != subsections.rend() && subsecIt->offset > r.offset) ++subsecIt; - if (subsecIt == subsecMap.rend() || + if (subsecIt == subsections.rend() || subsecIt->offset + subsecIt->isec->getSize() <= r.offset) { - subsec = findContainingSubsection(subsecMap, &r.offset); + subsec = findContainingSubsection(subsections, &r.offset); // Now that we know the relocs are unsorted, avoid trying the 'fast path' // for the other relocations. - subsecIt = subsecMap.rend(); + subsecIt = subsections.rend(); } else { subsec = subsecIt->isec; r.offset -= subsecIt->offset; @@ -509,10 +516,10 @@ void ObjFile::parseRelocations(ArrayRef
sectionHeaders, } else { uint64_t referentOffset = totalAddend - sectionHeaders[minuendInfo.r_symbolnum - 1].addr; - SubsectionMap &referentSubsecMap = - subsections[minuendInfo.r_symbolnum - 1]; + Subsections &referentSubsectVec = + sections[minuendInfo.r_symbolnum - 1].subsections; p.referent = - findContainingSubsection(referentSubsecMap, &referentOffset); + findContainingSubsection(referentSubsectVec, &referentOffset); p.addend = referentOffset; } subsec->relocs.push_back(p); @@ -649,7 +656,7 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, using NList = typename LP::nlist; // Groups indices of the symbols by the sections that contain them. - std::vector> symbolsBySection(subsections.size()); + std::vector> symbolsBySection(sections.size()); symbols.resize(nList.size()); SmallVector undefineds; for (uint32_t i = 0; i < nList.size(); ++i) { @@ -662,9 +669,9 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, StringRef name = strtab + sym.n_strx; if ((sym.n_type & N_TYPE) == N_SECT) { - SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; + Subsections &subsections = sections[sym.n_sect - 1].subsections; // parseSections() may have chosen not to parse this section. - if (subsecMap.empty()) + if (subsections.empty()) continue; symbolsBySection[sym.n_sect - 1].push_back(i); } else if (isUndef(sym)) { @@ -674,16 +681,16 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, } } - for (size_t i = 0; i < subsections.size(); ++i) { - SubsectionMap &subsecMap = subsections[i]; - if (subsecMap.empty()) + for (size_t i = 0; i < sections.size(); ++i) { + Subsections &subsections = sections[i].subsections; + if (subsections.empty()) continue; std::vector &symbolIndices = symbolsBySection[i]; uint64_t sectionAddr = sectionHeaders[i].addr; uint32_t sectionAlign = 1u << sectionHeaders[i].align; - InputSection *lastIsec = subsecMap.back().isec; + InputSection *lastIsec = subsections.back().isec; // Record-based sections have already been split into subsections during // parseSections(), so we simply need to match Symbols to the corresponding // subsection here. @@ -693,7 +700,8 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, const NList &sym = nList[symIndex]; StringRef name = strtab + sym.n_strx; uint64_t symbolOffset = sym.n_value - sectionAddr; - InputSection *isec = findContainingSubsection(subsecMap, &symbolOffset); + InputSection *isec = + findContainingSubsection(subsections, &symbolOffset); if (symbolOffset != 0) { error(toString(lastIsec) + ": symbol " + name + " at misaligned offset"); @@ -706,19 +714,19 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, // Calculate symbol sizes and create subsections by splitting the sections // along symbol boundaries. - // We populate subsecMap by repeatedly splitting the last (highest address) - // subsection. + // We populate subsections by repeatedly splitting the last (highest + // address) subsection. llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { return nList[lhs].n_value < nList[rhs].n_value; }); - SubsectionEntry subsecEntry = subsecMap.back(); + Subsection subsec = subsections.back(); for (size_t j = 0; j < symbolIndices.size(); ++j) { uint32_t symIndex = symbolIndices[j]; const NList &sym = nList[symIndex]; StringRef name = strtab + sym.n_strx; - InputSection *isec = subsecEntry.isec; + InputSection *isec = subsec.isec; - uint64_t subsecAddr = sectionAddr + subsecEntry.offset; + uint64_t subsecAddr = sectionAddr + subsec.offset; size_t symbolOffset = sym.n_value - subsecAddr; uint64_t symbolSize = j + 1 < symbolIndices.size() @@ -758,8 +766,8 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, // subsection's offset from the last aligned address. We should consider // emulating that behavior. nextIsec->align = MinAlign(sectionAlign, sym.n_value); - subsecMap.push_back({sym.n_value - sectionAddr, nextIsec}); - subsecEntry = subsecMap.back(); + subsections.push_back({sym.n_value - sectionAddr, nextIsec}); + subsec = subsections.back(); } } @@ -785,7 +793,8 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName, make(segName.take_front(16), sectName.take_front(16), /*file=*/this, data); isec->live = true; - subsections.push_back({{0, isec}}); + sections.push_back({}); + sections.back().subsections.push_back({0, isec}); } ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName) @@ -800,7 +809,7 @@ ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName) template void ObjFile::parse() { using Header = typename LP::mach_header; using SegmentCommand = typename LP::segment_command; - using Section = typename LP::section; + using SectionHeader = typename LP::section; using NList = typename LP::nlist; auto *buf = reinterpret_cast(mb.getBufferStart()); @@ -826,11 +835,11 @@ template void ObjFile::parse() { parseLCLinkerOption(this, cmd->count, data); } - ArrayRef
sectionHeaders; + ArrayRef sectionHeaders; if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) { auto *c = reinterpret_cast(cmd); - sectionHeaders = - ArrayRef
{reinterpret_cast(c + 1), c->nsects}; + sectionHeaders = ArrayRef{ + reinterpret_cast(c + 1), c->nsects}; parseSections(sectionHeaders); } @@ -846,9 +855,10 @@ template void ObjFile::parse() { // The relocations may refer to the symbols, so we parse them after we have // parsed all the symbols. - for (size_t i = 0, n = subsections.size(); i < n; ++i) - if (!subsections[i].empty()) - parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]); + for (size_t i = 0, n = sections.size(); i < n; ++i) + if (!sections[i].subsections.empty()) + parseRelocations(sectionHeaders, sectionHeaders[i], + sections[i].subsections); parseDebugInfo(); if (config->emitDataInCodeInfo) @@ -897,20 +907,20 @@ void ObjFile::parseDataInCode() { // Create pointers from symbols to their associated compact unwind entries. void ObjFile::registerCompactUnwind() { // First, locate the __compact_unwind section. - SubsectionMap *cuSubsecMap = nullptr; - for (SubsectionMap &map : subsections) { - if (map.empty()) + Section *cuSection = nullptr; + for (Section §ion : sections) { + if (section.subsections.empty()) continue; - if (map[0].isec->getSegName() != segment_names::ld) + if (section.subsections[0].isec->getSegName() != segment_names::ld) continue; - cuSubsecMap = ↦ + cuSection = §ion; break; } - if (!cuSubsecMap) + if (!cuSection) return; - for (SubsectionEntry &entry : *cuSubsecMap) { - ConcatInputSection *isec = cast(entry.isec); + for (Subsection &subsection : cuSection->subsections) { + ConcatInputSection *isec = cast(subsection.isec); // Hack!! Since each CUE contains a different function address, if ICF // operated naively and compared the entire contents of each CUE, entries // with identical unwind info but belonging to different functions would diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index e5f12ad..2c870b7 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -51,11 +51,17 @@ extern std::unique_ptr tar; // If .subsections_via_symbols is set, each InputSection will be split along // symbol boundaries. The field offset represents the offset of the subsection // from the start of the original pre-split InputSection. -struct SubsectionEntry { - uint64_t offset; - InputSection *isec; +struct Subsection { + uint64_t offset = 0; + InputSection *isec = nullptr; +}; + +using Subsections = std::vector; + +struct Section { + // uint64_t address = 0; // TODO(gkm): this will debut with __eh_frame handler + Subsections subsections; }; -using SubsectionMap = std::vector; class InputFile { public: @@ -75,7 +81,7 @@ public: MemoryBufferRef mb; std::vector symbols; - std::vector subsections; + std::vector
sections; // Provides an easy way to sort InputFiles deterministically. const int id; @@ -109,16 +115,16 @@ public: private: template void parse(); - template void parseSections(ArrayRef
); + template void parseSections(ArrayRef); template void parseSymbols(ArrayRef sectionHeaders, ArrayRef nList, const char *strtab, bool subsectionsViaSymbols); template Symbol *parseNonSectionSymbol(const NList &sym, StringRef name); - template - void parseRelocations(ArrayRef
sectionHeaders, const Section &, - SubsectionMap &); + template + void parseRelocations(ArrayRef sectionHeaders, + const SectionHeader &, Subsections &); void parseDebugInfo(); void parseDataInCode(); void registerCompactUnwind(); diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index ab9aaec..3c40c5d 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -21,7 +21,7 @@ using namespace lld; using namespace lld::macho; template static bool objectHasObjCSection(MemoryBufferRef mb) { - using Section = typename LP::section; + using SectionHeader = typename LP::section; auto *hdr = reinterpret_cast(mb.getBufferStart()); @@ -30,12 +30,13 @@ template static bool objectHasObjCSection(MemoryBufferRef mb) { if (const auto *c = findCommand(hdr, LP::segmentLCType)) { - auto sectionHeaders = - ArrayRef
{reinterpret_cast(c + 1), c->nsects}; - for (const Section &sec : sectionHeaders) { - StringRef sectname(sec.sectname, - strnlen(sec.sectname, sizeof(sec.sectname))); - StringRef segname(sec.segname, strnlen(sec.segname, sizeof(sec.segname))); + auto sectionHeaders = ArrayRef{ + reinterpret_cast(c + 1), c->nsects}; + for (const SectionHeader &secHead : sectionHeaders) { + StringRef sectname(secHead.sectname, + strnlen(secHead.sectname, sizeof(secHead.sectname))); + StringRef segname(secHead.segname, + strnlen(secHead.segname, sizeof(secHead.segname))); if ((segname == segment_names::data && sectname == section_names::objcCatList) || (segname == segment_names::text && diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 8a3a4aa..2527389 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -734,7 +734,7 @@ DataInCodeSection::DataInCodeSection() template static std::vector collectDataInCodeEntries() { using SegmentCommand = typename LP::segment_command; - using Section = typename LP::section; + using SectionHeader = typename LP::section; std::vector dataInCodeEntries; for (const InputFile *inputFile : inputFiles) { @@ -745,8 +745,8 @@ static std::vector collectDataInCodeEntries() { findCommand(objFile->mb.getBufferStart(), LP::segmentLCType)); if (!c) continue; - ArrayRef
sections{reinterpret_cast(c + 1), - c->nsects}; + ArrayRef sectionHeaders{ + reinterpret_cast(c + 1), c->nsects}; ArrayRef entries = objFile->dataInCodeEntries; if (entries.empty()) @@ -754,15 +754,14 @@ static std::vector collectDataInCodeEntries() { // For each code subsection find 'data in code' entries residing in it. // Compute the new offset values as // + - <__TEXT address>. - for (size_t i = 0, n = sections.size(); i < n; ++i) { - const SubsectionMap &subsecMap = objFile->subsections[i]; - for (const SubsectionEntry &subsecEntry : subsecMap) { - const InputSection *isec = subsecEntry.isec; + for (size_t i = 0, n = sectionHeaders.size(); i < n; ++i) { + for (const Subsection &subsec : objFile->sections[i].subsections) { + const InputSection *isec = subsec.isec; if (!isCodeSection(isec)) continue; if (cast(isec)->shouldOmitFromOutput()) continue; - const uint64_t beginAddr = sections[i].addr + subsecEntry.offset; + const uint64_t beginAddr = sectionHeaders[i].addr + subsec.offset; auto it = llvm::lower_bound( entries, beginAddr, [](const MachO::data_in_code_entry &entry, uint64_t addr) { diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 203a04f..093a380 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -227,7 +227,7 @@ public: void writeTo(uint8_t *buf) const override { using SegmentCommand = typename LP::segment_command; - using Section = typename LP::section; + using SectionHeader = typename LP::section; auto *c = reinterpret_cast(buf); buf += sizeof(SegmentCommand); @@ -248,8 +248,8 @@ public: if (osec->isHidden()) continue; - auto *sectHdr = reinterpret_cast
(buf); - buf += sizeof(Section); + auto *sectHdr = reinterpret_cast(buf); + buf += sizeof(SectionHeader); memcpy(sectHdr->sectname, osec->name.data(), osec->name.size()); memcpy(sectHdr->segname, name.data(), name.size()); -- 2.7.4