From 59013c387e8d91c6a4718bcdba328746a8123bcf Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 29 Jun 2015 21:12:49 +0000 Subject: [PATCH] [opt] Replace the recursive walk for GC with a worklist algorithm. This flattens the entire liveness walk from a recursive mark approach to a worklist approach. It also sinks the worklist management completely out of the SectionChunk and into the Writer by exposing the ability to iterato over children of a chunk and over the symbol bodies of relocated symbols. I'm not 100% happy with the API names, so suggestions welcome there. This allows us to use a single worklist for the entire recursive walk and would also be a natural place to take advantage of parallelism at some future point. With this, we completely inline away the GC walk into the Writer::markLive function and it makes it very easy to profile what is slow. Currently, time is being wasted checking whether a Chunk isa SectionChunk (it essentially always is), finding (or skipping) a replacement for a symbol, and chasing pointers between symbols and their chunks. There are a bunch of things we can do to fix this, and its easier to do them after this change IMO. This change alone saves 1-2% of the time for my self-link of lld.exe (which I'm running and benchmarking on Linux ironically). Perhaps more notably, we'll no longer blow out the stack for large links. =] Just as an FYI, at this point, I/O is starting to really dominate the profile. Well over 10% of the time appears to be inside the kernel doing page table silliness. I think a decent chunk of this can be nuked as well, but it's a little odd as cross-linking in this way isn't really the primary goal here. Differential Revision: http://reviews.llvm.org/D10790 llvm-svn: 240995 --- lld/COFF/Chunks.cpp | 17 ----------------- lld/COFF/Chunks.h | 36 ++++++++++++++++++++++++++++++++++-- lld/COFF/Symbols.h | 2 +- lld/COFF/Writer.cpp | 35 +++++++++++++++++++++++++++++++++-- 4 files changed, 68 insertions(+), 22 deletions(-) diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index bd7b1ae..b26c43a 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -80,23 +80,6 @@ void SectionChunk::writeTo(uint8_t *Buf) { } } -void SectionChunk::mark() { - assert(!Live); - Live = true; - - // Mark all symbols listed in the relocation table for this section. - for (const coff_relocation &Rel : Relocs) { - SymbolBody *B = File->getSymbolBody(Rel.SymbolTableIndex)->getReplacement(); - if (auto *D = dyn_cast(B)) - D->markLive(); - } - - // Mark associative sections if any. - for (Chunk *C : AssocChildren) - if (auto *SC = dyn_cast(C)) - SC->markLive(); -} - void SectionChunk::addAssociative(SectionChunk *Child) { AssocChildren.push_back(Child); // Associative sections are live if their parent COMDATs are live, diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index 3b6bf5f..40e1b25 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -10,8 +10,10 @@ #ifndef LLD_COFF_CHUNKS_H #define LLD_COFF_CHUNKS_H +#include "InputFiles.h" #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Object/COFF.h" #include @@ -32,6 +34,7 @@ class DefinedRegular; class DefinedImportData; class ObjectFile; class OutputSection; +class SymbolBody; // A Chunk represents a chunk of data that will occupy space in the // output (if the resolver chose that). It may or may not be backed by @@ -106,6 +109,24 @@ protected: // A chunk corresponding a section of an input file. class SectionChunk : public Chunk { public: + class symbol_iterator : public llvm::iterator_adaptor_base< + symbol_iterator, const coff_relocation *, + std::random_access_iterator_tag, SymbolBody *> { + friend SectionChunk; + + ObjectFile *File; + + symbol_iterator(ObjectFile *File, const coff_relocation *I) + : symbol_iterator::iterator_adaptor_base(I), File(File) {} + + public: + symbol_iterator() = default; + + SymbolBody *operator*() const { + return File->getSymbolBody(I->SymbolTableIndex); + } + }; + SectionChunk(ObjectFile *File, const coff_section *Header); static bool classof(const Chunk *C) { return C->kind() == SectionKind; } size_t getSize() const override { return Header->SizeOfRawData; } @@ -130,7 +151,19 @@ public: // Used by the garbage collector. bool isRoot() { return Root; } bool isLive() { return Live; } - void markLive() { if (!Live) mark(); } + void markLive() { + assert(!Live && "Cannot mark an already live section!"); + Live = true; + } + + // Allow iteration over the bodies of this chunk's relocated symbols. + llvm::iterator_range symbols() const { + return llvm::make_range(symbol_iterator(File, Relocs.begin()), + symbol_iterator(File, Relocs.end())); + } + + // Allow iteration over the associated child chunks for this section. + ArrayRef children() const { return AssocChildren; } // Used for ICF (Identical COMDAT Folding) void replaceWith(SectionChunk *Other); @@ -156,7 +189,6 @@ private: size_t NumRelocs; // Used by the garbage collector. - void mark(); bool Live = false; bool Root; diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index f87b8f1..4825bd7 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -136,7 +136,7 @@ public: bool isCOMDAT() { return IsCOMDAT; } bool isLive() const { return (*Data)->isLive(); } void markLive() { (*Data)->markLive(); } - Chunk *getChunk() { return *Data; } + SectionChunk *getChunk() { return *Data; } uint64_t getValue() { return Sym.getValue(); } private: diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index c0a7f45..7d85e534 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -111,13 +111,44 @@ void OutputSection::writeHeaderTo(uint8_t *Buf) { void Writer::markLive() { if (!Config->DoGC) return; + + // We build up a worklist of sections which have been marked as live. We only + // push into the worklist when we discover an unmarked section, and we mark + // as we push, so sections never appear twice in the list. + SmallVector Worklist; + for (StringRef Name : Config->GCRoots) if (auto *D = dyn_cast(Symtab->find(Name))) - D->markLive(); + if (!D->isLive()) { + D->markLive(); + Worklist.push_back(D->getChunk()); + } for (Chunk *C : Symtab->getChunks()) if (auto *SC = dyn_cast(C)) - if (SC->isRoot()) + if (SC->isRoot() && !SC->isLive()) { SC->markLive(); + Worklist.push_back(SC); + } + + while (!Worklist.empty()) { + SectionChunk *SC = Worklist.pop_back_val(); + assert(SC->isLive() && "We mark as live when pushing onto the worklist!"); + + // Mark all symbols listed in the relocation table for this section. + for (SymbolBody *S : SC->symbols()) + if (auto *D = dyn_cast(S->getReplacement())) + if (!D->isLive()) { + D->markLive(); + Worklist.push_back(D->getChunk()); + } + + // Mark associative sections if any. + for (SectionChunk *ChildSC : SC->children()) + if (!ChildSC->isLive()) { + ChildSC->markLive(); + Worklist.push_back(ChildSC); + } + } } // Merge identical COMDAT sections. -- 2.7.4