From 6cd7af51e1a478db7835096b85aac9cc3f464636 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 3 Apr 2018 21:38:18 +0000 Subject: [PATCH] Inline initOffsetMap. In the lld perf builder r328686 had a negative impact in stalled-cycles-frontend. Somehow that stat is not showing on my machine, but the attached patch shows an improvement on cache-misses, which is probably a reasonable proxy. My working theory is that given a large input the pieces vector is out of cache by the time initOffsetMap runs. Both finalizeContents implementation have a convenient location for initializing the OffsetMap, so this seems the best solution. llvm-svn: 329117 --- lld/ELF/InputSection.cpp | 6 ------ lld/ELF/InputSection.h | 4 +--- lld/ELF/SyntheticSections.cpp | 29 +++++++++++++++++------------ 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 006a0de..73f5d66 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -985,12 +985,6 @@ uint64_t MergeInputSection::getOffset(uint64_t Offset) const { return Piece.OutputOff + Addend; } -void MergeInputSection::initOffsetMap() { - OffsetMap.reserve(Pieces.size()); - for (size_t I = 0; I < Pieces.size(); ++I) - OffsetMap[Pieces[I].InputOff] = I; -} - template InputSection::InputSection(ObjFile &, const ELF32LE::Shdr &, StringRef); template InputSection::InputSection(ObjFile &, const ELF32BE::Shdr &, diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 56b5412..d0f83798 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -236,6 +236,7 @@ public: // Splittable sections are handled as a sequence of data // rather than a single large blob of data. std::vector Pieces; + llvm::DenseMap OffsetMap; // Returns I'th piece's data. This function is very hot when // string merging is enabled, so we want to inline. @@ -254,14 +255,11 @@ public: } SyntheticSection *getParent() const; - void initOffsetMap(); private: void splitStrings(ArrayRef A, size_t Size); void splitNonStrings(ArrayRef A, size_t Size); - llvm::DenseMap OffsetMap; - llvm::DenseSet LiveOffsets; }; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 5c214d0..4c6cbff9 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2438,10 +2438,15 @@ void MergeTailSection::finalizeContents() { // finalize() fixed tail-optimized strings, so we can now get // offsets of strings. Get an offset for each string and save it // to a corresponding StringPiece for easy access. - for (MergeInputSection *Sec : Sections) - for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) - if (Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); + for (MergeInputSection *Sec : Sections) { + Sec->OffsetMap.reserve(Sec->Pieces.size()); + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) { + SectionPiece &P = Sec->Pieces[I]; + Sec->OffsetMap[P.InputOff] = I; + if (P.Live) + P.OutputOff = Builder.getOffset(Sec->getData(I)); + } + } } void MergeNoTailSection::writeTo(uint8_t *Buf) { @@ -2494,10 +2499,13 @@ void MergeNoTailSection::finalizeContents() { // So far, section pieces have offsets from beginning of shards, but // we want offsets from beginning of the whole section. Fix them. parallelForEach(Sections, [&](MergeInputSection *Sec) { - for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) - if (Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff += - ShardOffsets[getShardId(Sec->Pieces[I].Hash)]; + Sec->OffsetMap.reserve(Sec->Pieces.size()); + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) { + SectionPiece &P = Sec->Pieces[I]; + Sec->OffsetMap[P.InputOff] = I; + if (P.Live) + P.OutputOff += ShardOffsets[getShardId(P.Hash)]; + } }); } @@ -2573,11 +2581,8 @@ void elf::mergeSections() { } (*I)->addSection(MS); } - for (auto *MS : MergeSections) { + for (auto *MS : MergeSections) MS->finalizeContents(); - parallelForEach(MS->Sections, - [](MergeInputSection *Sec) { Sec->initOffsetMap(); }); - } std::vector &V = InputSections; V.erase(std::remove(V.begin(), V.end(), nullptr), V.end()); -- 2.7.4