From d9b6f7e312c11ff69779939bb2be0bd53936a333 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Fri, 12 Nov 2021 16:01:25 -0500 Subject: [PATCH] [lld-macho] Teach ICF to dedup functions with identical unwind info MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Dedup'ing unwind info is tricky because each CUE contains a different function address, if ICF operated naively and compared the entire contents of each CUE, entries with identical unwind info but belonging to different functions would never be considered identical. To work around this problem, we slice away the function address before performing ICF. We rely on `relocateCompactUnwind()` to correctly handle these truncated input sections. Here are the numbers before and after D109944, D109945, and this diff were applied, as tested on my 3.2 GHz 16-Core Intel Xeon W: Without any optimizations: base diff difference (95% CI) sys_time 0.849 ± 0.015 0.896 ± 0.012 [ +4.8% .. +6.2%] user_time 3.357 ± 0.030 3.512 ± 0.023 [ +4.3% .. +5.0%] wall_time 3.944 ± 0.039 4.032 ± 0.031 [ +1.8% .. +2.6%] samples 40 38 With `-dead_strip`: base diff difference (95% CI) sys_time 0.847 ± 0.010 0.896 ± 0.012 [ +5.2% .. +6.5%] user_time 3.377 ± 0.014 3.532 ± 0.015 [ +4.4% .. +4.8%] wall_time 3.962 ± 0.024 4.060 ± 0.030 [ +2.1% .. +2.8%] samples 47 30 With `-dead_strip` and `--icf=all`: base diff difference (95% CI) sys_time 0.935 ± 0.013 0.957 ± 0.018 [ +1.5% .. +3.2%] user_time 3.472 ± 0.022 6.531 ± 0.046 [ +87.6% .. +88.7%] wall_time 4.080 ± 0.040 5.329 ± 0.060 [ +30.0% .. +31.2%] samples 37 30 Unsurprisingly, ICF is now a lot slower, likely due to the much larger number of input sections it needs to process. But the rest of the linker only suffers a mild slowdown. Note that the compact-unwind-bad-reloc.s test was expanded because we now handle the relocation for CUE's function address in a separate code path from the rest of the CUE relocations. The extended test covers both code paths. Reviewed By: #lld-macho, oontvoo Differential Revision: https://reviews.llvm.org/D109946 --- lld/MachO/ICF.cpp | 43 ++++++++++++++++++++++++--------- lld/MachO/InputFiles.cpp | 34 +++++++++++++++++++++----- lld/MachO/UnwindInfoSection.cpp | 8 +++--- lld/test/MachO/icf.s | 19 ++++++++++++--- 4 files changed, 78 insertions(+), 26 deletions(-) diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index 57e1da3f9a7f..6c3a565e74a9 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -180,8 +180,31 @@ static bool equalsVariable(const ConcatInputSection *ia, } return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; }; - return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), - f); + if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f)) + return false; + + // If there are symbols with associated unwind info, check that the unwind + // info matches. For simplicity, we only handle the case where there are only + // symbols at offset zero within the section (which is typically the case with + // .subsections_via_symbols.) + auto hasCU = [](Defined *d) { return d->compactUnwind; }; + auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU); + auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU); + if (itA == ia->symbols.end()) + return itB == ib->symbols.end(); + if (itB == ib->symbols.end()) + return false; + const Defined *da = *itA; + const Defined *db = *itB; + if (da->compactUnwind->icfEqClass[icfPass % 2] != + db->compactUnwind->icfEqClass[icfPass % 2] || + da->value != 0 || db->value != 0) + return false; + auto isZero = [](Defined *d) { return d->value == 0; }; + return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) == + ia->symbols.end() && + std::find_if_not(std::next(itB), ib->symbols.end(), isZero) == + ib->symbols.end(); } // Find the first InputSection after BEGIN whose equivalence class differs @@ -337,18 +360,14 @@ void macho::foldIdenticalSections() { // FIXME: consider non-code __text sections as hashable? bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) && !isec->shouldOmitFromOutput() && isec->isHashableForICF(); - // ICF can't fold functions with unwind info - if (isHashable) - for (Defined *d : isec->symbols) - if (d->compactUnwind) { - isHashable = false; - break; - } - - if (isHashable) + if (isHashable) { hashable.push_back(isec); - else + for (Defined *d : isec->symbols) + if (d->compactUnwind) + hashable.push_back(d->compactUnwind); + } else { isec->icfEqClass[0] = ++icfUniqueID; + } } parallelForEach(hashable, [](ConcatInputSection *isec) { isec->hashForICF(); }); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 6e6a61e0d6d4..1b233920ffb2 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -904,16 +904,31 @@ void ObjFile::registerCompactUnwind() { for (SubsectionEntry &entry : *cuSubsecMap) { ConcatInputSection *isec = cast(entry.isec); + // Hack!! Since each CUE contains a different function address, if ICF + // operated naively and compared the entire contents of each CUE, entries + // with identical unwind info but belonging to different functions would + // never be considered equivalent. To work around this problem, we slice + // away the function address here. (Note that we do not adjust the offsets + // of the corresponding relocations.) We rely on `relocateCompactUnwind()` + // to correctly handle these truncated input sections. + isec->data = isec->data.slice(target->wordSize); + ConcatInputSection *referentIsec; - for (const Reloc &r : isec->relocs) { - if (r.offset != 0) + for (auto it = isec->relocs.begin(); it != isec->relocs.end();) { + Reloc &r = *it; + // We only wish to handle the relocation for CUE::functionAddress. + if (r.offset != 0) { + ++it; continue; + } uint64_t add = r.addend; if (auto *sym = cast_or_null(r.referent.dyn_cast())) { // Check whether the symbol defined in this file is the prevailing one. // Skip if it is e.g. a weak def that didn't prevail. - if (sym->getFile() != this) + if (sym->getFile() != this) { + ++it; continue; + } add += sym->value; referentIsec = cast(sym->isec); } else { @@ -926,16 +941,23 @@ void ObjFile::registerCompactUnwind() { // The functionAddress relocations are typically section relocations. // However, unwind info operates on a per-symbol basis, so we search for // the function symbol here. - auto it = llvm::lower_bound( + auto symIt = llvm::lower_bound( referentIsec->symbols, add, [](Defined *d, uint64_t add) { return d->value < add; }); // The relocation should point at the exact address of a symbol (with no // addend). - if (it == referentIsec->symbols.end() || (*it)->value != add) { + if (symIt == referentIsec->symbols.end() || (*symIt)->value != add) { assert(referentIsec->wasCoalesced); + ++it; continue; } - (*it)->compactUnwind = isec; + (*symIt)->compactUnwind = isec; + // Since we've sliced away the functionAddress, we should remove the + // corresponding relocation too. Given that clang emits relocations in + // reverse order of address, this relocation should be at the end of the + // vector for most of our input object files, so this is typically an O(1) + // operation. + it = isec->relocs.erase(it); } } } diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index c499826e1059..06f8d449feb5 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -295,7 +295,8 @@ void UnwindInfoSectionImpl::relocateCompactUnwind( return; // Write the rest of the CUE. - memcpy(buf, d->compactUnwind->data.data(), d->compactUnwind->data.size()); + memcpy(buf + sizeof(Ptr), d->compactUnwind->data.data(), + d->compactUnwind->data.size()); for (const Reloc &r : d->compactUnwind->relocs) { uint64_t referentVA = 0; if (auto *referentSym = r.referent.dyn_cast()) { @@ -309,9 +310,8 @@ void UnwindInfoSectionImpl::relocateCompactUnwind( } } else { auto *referentIsec = r.referent.get(); - ConcatInputSection *concatIsec = checkTextSegment(referentIsec); - if (!concatIsec->shouldOmitFromOutput()) - referentVA = referentIsec->getVA(r.addend); + checkTextSegment(referentIsec); + referentVA = referentIsec->getVA(r.addend); } writeAddress(buf + r.offset, referentVA, r.length); } diff --git a/lld/test/MachO/icf.s b/lld/test/MachO/icf.s index 2fc24052e9f0..5e61e941bffa 100644 --- a/lld/test/MachO/icf.s +++ b/lld/test/MachO/icf.s @@ -32,8 +32,9 @@ # CHECK: [[#%x,CALL_RECURSIVE_2:]] l F __TEXT,__text _call_recursive_2 # CHECK: [[#%x,CHECK_LENGTH_1:]] l F __TEXT,__text _check_length_1 # CHECK: [[#%x,CHECK_LENGTH_2:]] l F __TEXT,__text _check_length_2 -# CHECK: [[#%x,HAS_UNWIND_1:]] l F __TEXT,__text _has_unwind_1 +# CHECK: [[#%x,HAS_UNWIND_2:]] l F __TEXT,__text _has_unwind_1 # CHECK: [[#%x,HAS_UNWIND_2:]] l F __TEXT,__text _has_unwind_2 +# CHECK: [[#%x,HAS_UNWIND_3:]] l F __TEXT,__text _has_unwind_3 # CHECK: [[#%x,MUTALLY_RECURSIVE_2:]] l F __TEXT,__text _mutually_recursive_1 # CHECK: [[#%x,MUTALLY_RECURSIVE_2:]] l F __TEXT,__text _mutually_recursive_2 # CHECK: [[#%x,INIT_2:]] l F __TEXT,__text _init_1 @@ -64,8 +65,9 @@ # CHECK: callq 0x[[#%x,CALL_RECURSIVE_2:]] <_call_recursive_2> # CHECK: callq 0x[[#%x,CHECK_LENGTH_1:]] <_check_length_1> # CHECK: callq 0x[[#%x,CHECK_LENGTH_2:]] <_check_length_2> -# CHECK: callq 0x[[#%x,HAS_UNWIND_1:]] <_has_unwind_1> # CHECK: callq 0x[[#%x,HAS_UNWIND_2:]] <_has_unwind_2> +# CHECK: callq 0x[[#%x,HAS_UNWIND_2:]] <_has_unwind_2> +# CHECK: callq 0x[[#%x,HAS_UNWIND_3:]] <_has_unwind_3> # CHECK: callq 0x[[#%x,MUTALLY_RECURSIVE_2:]] <_mutually_recursive_2> # CHECK: callq 0x[[#%x,MUTALLY_RECURSIVE_2:]] <_mutually_recursive_2> ## FIXME: Mutually-recursive functions with identical bodies (see below) @@ -170,8 +172,7 @@ _check_length_2: _my_personality: mov $1345, %rax -## No fold: functions have unwind info. -## FIXME: Fold functions with identical unwind info. +## Functions with identical unwind info should be folded. _has_unwind_1: .cfi_startproc .cfi_personality 155, _my_personality @@ -186,6 +187,15 @@ _has_unwind_2: ret .cfi_endproc +## This function has different unwind info from the preceding two, and therefore +## should not be folded. +_has_unwind_3: + .cfi_startproc + .cfi_personality 155, _my_personality + .cfi_def_cfa_offset 8 + ret + .cfi_endproc + ## Fold: Mutually-recursive functions with symmetric bodies _mutually_recursive_1: callq _mutually_recursive_1 # call myself @@ -253,6 +263,7 @@ _main: callq _check_length_2 callq _has_unwind_1 callq _has_unwind_2 + callq _has_unwind_3 callq _mutually_recursive_1 callq _mutually_recursive_2 callq _asymmetric_recursive_1 -- 2.34.1