From 9dedfb1fa8cf6d54d35fd1ec1e01c44429325076 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 30 Nov 2016 01:50:03 +0000 Subject: [PATCH] Change how we manage groups in ICF. Previously, on each iteration in ICF, we scan the entire vector of input sections to find boundaries of groups having the same ID. This patch changes the algorithm so that we now have a vector of ranges. Each range contains a starting index and an ending index of the group. So we no longer have to search boundaries on each iteration. Performance-wise, this seems neutral. Instead of searching boundaries, we now have to maintain ranges. But I think this is more readable than the previous implementation. Moreover, this makes easy to parallelize the main loop of ICF, which I'll do in a follow-up patch. llvm-svn: 288228 --- lld/ELF/ICF.cpp | 196 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 114 insertions(+), 82 deletions(-) diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 03c88d6..4968fd1a 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -71,21 +71,34 @@ using namespace llvm::ELF; using namespace llvm::object; namespace { +struct Range { + size_t Begin; + size_t End; +}; + template class ICF { public: void run(); private: - uint64_t NextId = 1; + void segregate(Range *R, bool Constant); + + template + bool constantEq(ArrayRef RelsA, ArrayRef RelsB); + + template + bool variableEq(const InputSection *A, ArrayRef RelsA, + const InputSection *B, ArrayRef RelsB); - using Comparator = std::function *, - const InputSection *)>; + bool equalsConstant(const InputSection *A, const InputSection *B); + bool equalsVariable(const InputSection *A, const InputSection *B); - void segregate(MutableArrayRef *> Arr, Comparator Eq); + std::vector *> Sections; + std::vector Ranges; - void - forEachGroup(std::vector *> &V, - std::function *>)> Fn); + // The main loop is repeated until we get a convergence. + bool Repeat = false; // If Repeat is true, we need to repeat. + int Cnt = 0; // Counter for the main loop. }; } @@ -104,56 +117,51 @@ template static bool isEligible(InputSection *S) { S->Name != ".init" && S->Name != ".fini"; } -template static std::vector *> getSections() { - std::vector *> V; - for (InputSectionBase *Sec : Symtab::X->Sections) - if (auto *S = dyn_cast>(Sec)) - if (isEligible(S)) - V.push_back(S); - return V; -} - -// Before calling this function, all sections in Arr must have the -// same group ID. This function compare sections in Arr using Eq and -// assign new group IDs for new groups. -template -void ICF::segregate(MutableArrayRef *> Arr, - Comparator Eq) { - // This loop rearranges Arr so that all sections that are equal in - // terms of Eq are contiguous. The algorithm is quadratic in the - // worst case, but that is not an issue in practice because the - // number of distinct sections in Arr is usually very small. - InputSection **I = Arr.begin(); - for (;;) { - InputSection *Head = *I; +// Before calling this function, all sections in range R must have the +// same group ID. +template void ICF::segregate(Range *R, bool Constant) { + // This loop rearranges sections in range R so that all sections + // that are equal in terms of equals{Constant,Variable} are contiguous + // in Sections vector. + // + // The algorithm is quadratic in the worst case, but that is not an + // issue in practice because the number of the distinct sections in + // [R.Begin, R.End] is usually very small. + while (R->End - R->Begin > 1) { + // Divide range R into two. Let Mid be the start index of the + // second group. auto Bound = std::stable_partition( - I + 1, Arr.end(), [&](InputSection *S) { return Eq(Head, S); }); - if (Bound == Arr.end()) + Sections.begin() + R->Begin + 1, Sections.begin() + R->End, + [&](InputSection *S) { + if (Constant) + return equalsConstant(Sections[R->Begin], S); + return equalsVariable(Sections[R->Begin], S); + }); + size_t Mid = Bound - Sections.begin(); + + if (Mid == R->End) return; - uint64_t Id = NextId++; - for (; I != Bound; ++I) - (*I)->GroupId = Id; - } -} -// Call Fn for each section group having the same group ID. -template -void ICF::forEachGroup( - std::vector *> &V, - std::function *>)> Fn) { - for (InputSection **I = V.data(), **E = I + V.size(); I != E;) { - InputSection *Head = *I; - auto Bound = std::find_if(I + 1, E, [&](InputSection *S) { - return S->GroupId != Head->GroupId; - }); - Fn({I, Bound}); - I = Bound; + // Now we split [R.Begin, R.End) into [R.Begin, Mid) and [Mid, R.End). + if (Mid - R->Begin > 1) + Ranges.push_back({R->Begin, Mid}); + R->Begin = Mid; + + // Update GroupIds for the new group members. We use the index of + // the group first member as a group ID because that is unique. + for (size_t I = Mid; I < R->End; ++I) + Sections[I]->GroupId = Mid; + + // Since we have split a group, we need to repeat the main loop + // later to obtain a convergence. Remember that. + Repeat = true; } } // Compare two lists of relocations. -template -static bool relocationEq(ArrayRef RelsA, ArrayRef RelsB) { +template +template +bool ICF::constantEq(ArrayRef RelsA, ArrayRef RelsB) { auto Eq = [](const RelTy &A, const RelTy &B) { return A.r_offset == B.r_offset && A.getType(Config->Mips64EL) == B.getType(Config->Mips64EL) && @@ -167,22 +175,23 @@ static bool relocationEq(ArrayRef RelsA, ArrayRef RelsB) { // Compare "non-moving" part of two InputSections, namely everything // except relocation targets. template -static bool equalsConstant(const InputSection *A, - const InputSection *B) { +bool ICF::equalsConstant(const InputSection *A, + const InputSection *B) { if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags || A->getSize() != B->getSize() || A->Data != B->Data) return false; if (A->AreRelocsRela) - return relocationEq(A->relas(), B->relas()); - return relocationEq(A->rels(), B->rels()); + return constantEq(A->relas(), B->relas()); + return constantEq(A->rels(), B->rels()); } // Compare two lists of relocations. Returns true if all pairs of // relocations point to the same section in terms of ICF. -template -static bool variableEq(const InputSection *A, ArrayRef RelsA, - const InputSection *B, ArrayRef RelsB) { +template +template +bool ICF::variableEq(const InputSection *A, ArrayRef RelsA, + const InputSection *B, ArrayRef RelsB) { auto Eq = [&](const RelTy &RA, const RelTy &RB) { SymbolBody &SA = A->getFile()->getRelocTargetSym(RA); SymbolBody &SB = B->getFile()->getRelocTargetSym(RB); @@ -197,9 +206,12 @@ static bool variableEq(const InputSection *A, ArrayRef RelsA, return false; if (DA->Value != DB->Value) return false; + auto *X = dyn_cast>(DA->Section); auto *Y = dyn_cast>(DB->Section); - return X && Y && X->GroupId && X->GroupId == Y->GroupId; + if (!X || !Y) + return false; + return X->GroupId != 0 && X->GroupId == Y->GroupId; }; return std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq); @@ -207,8 +219,8 @@ static bool variableEq(const InputSection *A, ArrayRef RelsA, // Compare "moving" part of two InputSections, namely relocation targets. template -static bool equalsVariable(const InputSection *A, - const InputSection *B) { +bool ICF::equalsVariable(const InputSection *A, + const InputSection *B) { if (A->AreRelocsRela) return variableEq(A, A->relas(), B, B->relas()); return variableEq(A, A->rels(), B, B->rels()); @@ -216,12 +228,17 @@ static bool equalsVariable(const InputSection *A, // The main function of ICF. template void ICF::run() { + // Collect sections to merge. + for (InputSectionBase *Sec : Symtab::X->Sections) + if (auto *S = dyn_cast>(Sec)) + if (isEligible(S)) + Sections.push_back(S); + // Initially, we use hash values as section group IDs. Therefore, // if two sections have the same ID, they are likely (but not // guaranteed) to have the same static contents in terms of ICF. - std::vector *> Sections = getSections(); for (InputSection *S : Sections) - // Set MSB on to avoid collisions with serial group IDs + // Set MSB to 1 to avoid collisions with non-hash IDs. S->GroupId = getHash(S) | (uint64_t(1) << 63); // From now on, sections in Sections are ordered so that sections in @@ -235,32 +252,47 @@ template void ICF::run() { return B->Alignment < A->Alignment; }); + // Split sections into groups by ID. And then we are going to + // split groups into more and more smaller groups. + // Note that we do not add single element groups because they + // are already the smallest. + Ranges.reserve(Sections.size()); + for (size_t I = 0, E = Sections.size(); I < E - 1;) { + // Let J be the first index whose element has a different ID. + size_t J = I + 1; + while (J < E && Sections[I]->GroupId == Sections[J]->GroupId) + ++J; + if (J - I > 1) + Ranges.push_back({I, J}); + I = J; + } + // Compare static contents and assign unique IDs for each static content. - forEachGroup(Sections, [&](MutableArrayRef *> V) { - segregate(V, equalsConstant); - }); + std::for_each(Ranges.begin(), Ranges.end(), + [&](Range &R) { segregate(&R, true); }); + ++Cnt; - // Split groups by comparing relocations until we get a convergence. - int Cnt = 1; - for (;;) { + // Split groups by comparing relocations until convergence is obtained. + do { + Repeat = false; + std::for_each(Ranges.begin(), Ranges.end(), + [&](Range &R) { segregate(&R, false); }); ++Cnt; - uint64_t Id = NextId; - forEachGroup(Sections, [&](MutableArrayRef *> V) { - segregate(V, equalsVariable); - }); - if (Id == NextId) - break; - } - log("ICF needed " + Twine(Cnt) + " iterations."); + } while (Repeat); + + log("ICF needed " + Twine(Cnt) + " iterations"); // Merge sections in the same group. - forEachGroup(Sections, [](MutableArrayRef *> V) { - log("selected " + V[0]->Name); - for (InputSection *S : V.slice(1)) { - log(" removed " + S->Name); - V[0]->replace(S); + for (Range R : Ranges) { + if (R.End - R.Begin == 1) + continue; + + log("selected " + Sections[R.Begin]->Name); + for (size_t I = R.Begin + 1; I < R.End; ++I) { + log(" removed " + Sections[I]->Name); + Sections[R.Begin]->replace(Sections[I]); } - }); + } } // ICF entry point function. -- 2.7.4