From d9717aa0e420e931d79d85867e06afe3c3e625b0 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 31 Mar 2017 02:28:30 +0000 Subject: [PATCH] LTO: Reduce memory consumption by creating an in-memory symbol table for InputFiles. NFCI. Introduce symbol table data structures that can be potentially written to disk, have the LTO library build those data structures using temporarily constructed modules and redirect the LTO library implementation to go through those data structures. This allows us to remove the LLVMContext and Modules owned by InputFile. With this change I measured a peak memory consumption decrease from 5.4GB to 2.8GB in a no-op incremental ThinLTO link of Chromium on Linux. The impact on memory consumption is larger in COFF linkers where we are currently forced to materialize all metadata in order to read linker options. Peak memory consumption linking a large piece of Chromium for Windows with full LTO and debug info decreases from >64GB (OOM) to 15GB. Part of PR27551. Differential Revision: https://reviews.llvm.org/D31364 llvm-svn: 299168 --- lld/COFF/InputFiles.cpp | 5 +- lld/ELF/InputFiles.cpp | 8 +- llvm/include/llvm/LTO/LTO.h | 207 ++++++------------------- llvm/include/llvm/Object/IRSymtab.h | 298 ++++++++++++++++++++++++++++++++++++ llvm/lib/LTO/LTO.cpp | 212 ++++++++++--------------- llvm/lib/Object/CMakeLists.txt | 1 + llvm/lib/Object/IRSymtab.cpp | 228 +++++++++++++++++++++++++++ llvm/tools/gold/gold-plugin.cpp | 4 +- 8 files changed, 664 insertions(+), 299 deletions(-) create mode 100644 llvm/include/llvm/Object/IRSymtab.h create mode 100644 llvm/lib/Object/IRSymtab.cpp diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index bd3294e..f9befa3 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -355,13 +355,12 @@ void BitcodeFile::parse() { SymbolBody *Alias = Symtab->addUndefined(Saver.save(Fallback)); checkAndSetWeakAlias(Symtab, this, Sym->body(), Alias); } else { - Expected ComdatIndex = ObjSym.getComdatIndex(); - bool IsCOMDAT = ComdatIndex && *ComdatIndex != -1; + bool IsCOMDAT = ObjSym.getComdatIndex() != -1; Sym = Symtab->addRegular(this, SymName, IsCOMDAT); } SymbolBodies.push_back(Sym->body()); } - Directives = check(Obj->getLinkerOpts()); + Directives = Obj->getCOFFLinkerOpts(); } MachineTypes BitcodeFile::getMachineType() { diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 5835caa..8a25d8e 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -818,7 +818,7 @@ static Symbol *createBitcodeSymbol(const std::vector &KeptComdats, uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); - int C = check(ObjSym.getComdatIndex(), F->LogName); + int C = ObjSym.getComdatIndex(); if (C != -1 && !KeptComdats[C]) return Symtab::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, Visibility, Type, CanOmitFromDynSym, @@ -855,10 +855,8 @@ void BitcodeFile::parse(DenseSet &ComdatGroups) { Obj = check(lto::InputFile::create(MBRef), this->LogName); std::vector KeptComdats; - for (StringRef S : Obj->getComdatTable()) { - StringRef N = Saver.save(S); - KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(N)).second); - } + for (StringRef S : Obj->getComdatTable()) + KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(S)).second); for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, this)); diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index 2020d61..b58c03a 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -24,7 +24,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/LTO/Config.h" #include "llvm/Linker/IRMover.h" -#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/IRSymtab.h" #include "llvm/Support/Error.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/thread.h" @@ -79,21 +79,26 @@ class LTO; struct SymbolResolution; class ThinBackendProc; -/// An input file. This is a wrapper for ModuleSymbolTable that exposes only the +/// An input file. This is a symbol table wrapper that only exposes the /// information that an LTO client should need in order to do symbol resolution. class InputFile { +public: + class Symbol; + +private: // FIXME: Remove LTO class friendship once we have bitcode symbol tables. friend LTO; InputFile() = default; - // FIXME: Remove the LLVMContext once we have bitcode symbol tables. - LLVMContext Ctx; - struct InputModule; - std::vector Mods; - ModuleSymbolTable SymTab; + std::vector Mods; + SmallVector Strtab; + std::vector Symbols; + + // [begin, end) for each module + std::vector> ModuleSymIndices; - std::vector Comdats; - DenseMap ComdatMap; + StringRef SourceFileName, COFFLinkerOpts; + std::vector ComdatTable; public: ~InputFile(); @@ -101,170 +106,48 @@ public: /// Create an InputFile. static Expected> create(MemoryBufferRef Object); - class symbol_iterator; - - /// This is a wrapper for ArrayRef::iterator that - /// exposes only the information that an LTO client should need in order to do - /// symbol resolution. - /// - /// This object is ephemeral; it is only valid as long as an iterator obtained - /// from symbols() refers to it. - class Symbol { - friend symbol_iterator; + /// The purpose of this class is to only expose the symbol information that an + /// LTO client should need in order to do symbol resolution. + class Symbol : irsymtab::Symbol { friend LTO; - ArrayRef::iterator I; - const ModuleSymbolTable &SymTab; - const InputFile *File; - uint32_t Flags; - SmallString<64> Name; - - bool shouldSkip() { - return !(Flags & object::BasicSymbolRef::SF_Global) || - (Flags & object::BasicSymbolRef::SF_FormatSpecific); - } - - void skip() { - ArrayRef::iterator E = SymTab.symbols().end(); - while (I != E) { - Flags = SymTab.getSymbolFlags(*I); - if (!shouldSkip()) - break; - ++I; - } - if (I == E) - return; - - Name.clear(); - { - raw_svector_ostream OS(Name); - SymTab.printSymbolName(OS, *I); - } - } - - bool isGV() const { return I->is(); } - GlobalValue *getGV() const { return I->get(); } - public: - Symbol(ArrayRef::iterator I, - const ModuleSymbolTable &SymTab, const InputFile *File) - : I(I), SymTab(SymTab), File(File) { - skip(); - } - - bool isUndefined() const { - return Flags & object::BasicSymbolRef::SF_Undefined; - } - bool isCommon() const { return Flags & object::BasicSymbolRef::SF_Common; } - bool isWeak() const { return Flags & object::BasicSymbolRef::SF_Weak; } - bool isIndirect() const { - return Flags & object::BasicSymbolRef::SF_Indirect; - } - - /// For COFF weak externals, returns the name of the symbol that is used - /// as a fallback if the weak external remains undefined. - std::string getCOFFWeakExternalFallback() const { - assert((Flags & object::BasicSymbolRef::SF_Weak) && - (Flags & object::BasicSymbolRef::SF_Indirect) && - "symbol is not a weak external"); - std::string Name; - raw_string_ostream OS(Name); - SymTab.printSymbolName( - OS, - cast( - cast(getGV())->getAliasee()->stripPointerCasts())); - OS.flush(); - return Name; - } - - /// Returns the mangled name of the global. - StringRef getName() const { return Name; } - - GlobalValue::VisibilityTypes getVisibility() const { - if (isGV()) - return getGV()->getVisibility(); - return GlobalValue::DefaultVisibility; - } - bool canBeOmittedFromSymbolTable() const { - return isGV() && llvm::canBeOmittedFromSymbolTable(getGV()); - } - bool isTLS() const { - // FIXME: Expose a thread-local flag for module asm symbols. - return isGV() && getGV()->isThreadLocal(); - } - - // Returns the index of the comdat this symbol is in or -1 if the symbol - // is not in a comdat. - // FIXME: We have to return Expected because aliases point to an - // arbitrary ConstantExpr and that might not actually be a constant. That - // means we might not be able to find what an alias is aliased to and - // so find its comdat. - Expected getComdatIndex() const; - - uint64_t getCommonSize() const { - assert(Flags & object::BasicSymbolRef::SF_Common); - if (!isGV()) - return 0; - return getGV()->getParent()->getDataLayout().getTypeAllocSize( - getGV()->getType()->getElementType()); - } - unsigned getCommonAlignment() const { - assert(Flags & object::BasicSymbolRef::SF_Common); - if (!isGV()) - return 0; - return getGV()->getAlignment(); - } - }; - - class symbol_iterator { - Symbol Sym; - - public: - symbol_iterator(ArrayRef::iterator I, - const ModuleSymbolTable &SymTab, const InputFile *File) - : Sym(I, SymTab, File) {} - - symbol_iterator &operator++() { - ++Sym.I; - Sym.skip(); - return *this; - } - - symbol_iterator operator++(int) { - symbol_iterator I = *this; - ++*this; - return I; - } - - const Symbol &operator*() const { return Sym; } - const Symbol *operator->() const { return &Sym; } - - bool operator!=(const symbol_iterator &Other) const { - return Sym.I != Other.Sym.I; - } + Symbol(const irsymtab::Symbol &S) : irsymtab::Symbol(S) {} + + using irsymtab::Symbol::isUndefined; + using irsymtab::Symbol::isCommon; + using irsymtab::Symbol::isWeak; + using irsymtab::Symbol::isIndirect; + using irsymtab::Symbol::getName; + using irsymtab::Symbol::getVisibility; + using irsymtab::Symbol::canBeOmittedFromSymbolTable; + using irsymtab::Symbol::isTLS; + using irsymtab::Symbol::getComdatIndex; + using irsymtab::Symbol::getCommonSize; + using irsymtab::Symbol::getCommonAlignment; + using irsymtab::Symbol::getCOFFWeakExternalFallback; }; /// A range over the symbols in this InputFile. - iterator_range symbols() { - return llvm::make_range( - symbol_iterator(SymTab.symbols().begin(), SymTab, this), - symbol_iterator(SymTab.symbols().end(), SymTab, this)); - } + ArrayRef symbols() const { return Symbols; } /// Returns linker options specified in the input file. - Expected getLinkerOpts(); + StringRef getCOFFLinkerOpts() const { return COFFLinkerOpts; } /// Returns the path to the InputFile. StringRef getName() const; /// Returns the source file path specified at compile time. - StringRef getSourceFileName() const; + StringRef getSourceFileName() const { return SourceFileName; } // Returns a table with all the comdats used by this file. - ArrayRef getComdatTable() const { return Comdats; } + ArrayRef getComdatTable() const { return ComdatTable; } private: - iterator_range module_symbols(InputModule &IM); + ArrayRef module_symbols(unsigned I) const { + const auto &Indices = ModuleSymIndices[I]; + return {Symbols.data() + Indices.first, Symbols.data() + Indices.second}; + } }; /// This class wraps an output stream for a native object. Most clients should @@ -452,20 +335,20 @@ private: // Global mapping from mangled symbol names to resolutions. StringMap GlobalResolutions; - void addSymbolToGlobalRes(SmallPtrSet &Used, - const InputFile::Symbol &Sym, SymbolResolution Res, + void addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res, unsigned Partition); // These functions take a range of symbol resolutions [ResI, ResE) and consume // the resolutions used by a single input module by incrementing ResI. After // these functions return, [ResI, ResE) will refer to the resolution range for // the remaining modules in the InputFile. - Error addModule(InputFile &Input, InputFile::InputModule &IM, + Error addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE); - Error addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, + Error addRegularLTO(BitcodeModule BM, + ArrayRef Syms, + const SymbolResolution *&ResI, const SymbolResolution *ResE); - Error addThinLTO(BitcodeModule BM, Module &M, - iterator_range Syms, + Error addThinLTO(BitcodeModule BM, ArrayRef Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE); Error runRegularLTO(AddStreamFn AddStream); diff --git a/llvm/include/llvm/Object/IRSymtab.h b/llvm/include/llvm/Object/IRSymtab.h new file mode 100644 index 0000000..d3129b2 --- /dev/null +++ b/llvm/include/llvm/Object/IRSymtab.h @@ -0,0 +1,298 @@ +//===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains data definitions and a reader and builder for a symbol +// table for LLVM IR. Its purpose is to allow linkers and other consumers of +// bitcode files to efficiently read the symbol table for symbol resolution +// purposes without needing to construct a module in memory. +// +// As with most object files the symbol table has two parts: the symbol table +// itself and a string table which is referenced by the symbol table. +// +// A symbol table corresponds to a single bitcode file, which may consist of +// multiple modules, so symbol tables may likewise contain symbols for multiple +// modules. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_IRSYMTAB_H +#define LLVM_OBJECT_IRSYMTAB_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace irsymtab { +namespace storage { + +// The data structures in this namespace define the low-level serialization +// format. Clients that just want to read a symbol table should use the +// irsymtab::Reader class. + +typedef support::ulittle32_t Word; + +/// A reference to a string in the string table. +struct Str { + Word Offset; + StringRef get(StringRef Strtab) const { + return Strtab.data() + Offset; + } +}; + +/// A reference to a range of objects in the symbol table. +template struct Range { + Word Offset, Size; + ArrayRef get(StringRef Symtab) const { + return {reinterpret_cast(Symtab.data() + Offset), Size}; + } +}; + +/// Describes the range of a particular module's symbols within the symbol +/// table. +struct Module { + Word Begin, End; +}; + +/// This is equivalent to an IR comdat. +struct Comdat { + Str Name; +}; + +/// Contains the information needed by linkers for symbol resolution, as well as +/// by the LTO implementation itself. +struct Symbol { + /// The mangled symbol name. + Str Name; + + /// The unmangled symbol name, or the empty string if this is not an IR + /// symbol. + Str IRName; + + /// The index into Header::Comdats, or -1 if not a comdat member. + Word ComdatIndex; + + Word Flags; + enum FlagBits { + FB_visibility, // 2 bits + FB_undefined = FB_visibility + 2, + FB_weak, + FB_common, + FB_indirect, + FB_used, + FB_tls, + FB_may_omit, + FB_global, + FB_format_specific, + FB_unnamed_addr, + }; + + /// The index into the Uncommon table, or -1 if this symbol does not have an + /// Uncommon. + Word UncommonIndex; +}; + +/// This data structure contains rarely used symbol fields and is optionally +/// referenced by a Symbol. +struct Uncommon { + Word CommonSize, CommonAlign; + + /// COFF-specific: the name of the symbol that a weak external resolves to + /// if not defined. + Str COFFWeakExternFallbackName; +}; + +struct Header { + Range Modules; + Range Comdats; + Range Symbols; + Range Uncommons; + + Str SourceFileName; + + /// COFF-specific: linker directives. + Str COFFLinkerOpts; +}; + +} + +/// Fills in Symtab and Strtab with a valid symbol and string table for Mods. +Error build(ArrayRef Mods, SmallVector &Symtab, + SmallVector &Strtab); + +/// This represents a symbol that has been read from a storage::Symbol and +/// possibly a storage::Uncommon. +struct Symbol { + // Copied from storage::Symbol. + StringRef Name, IRName; + int ComdatIndex; + uint32_t Flags; + + // Copied from storage::Uncommon. + uint32_t CommonSize, CommonAlign; + StringRef COFFWeakExternFallbackName; + + /// Returns the mangled symbol name. + StringRef getName() const { return Name; } + + /// Returns the unmangled symbol name, or the empty string if this is not an + /// IR symbol. + StringRef getIRName() const { return IRName; } + + /// Returns the index into the comdat table (see Reader::getComdatTable()), or + /// -1 if not a comdat member. + int getComdatIndex() const { return ComdatIndex; } + + using S = storage::Symbol; + GlobalValue::VisibilityTypes getVisibility() const { + return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3); + } + bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; } + bool isWeak() const { return (Flags >> S::FB_weak) & 1; } + bool isCommon() const { return (Flags >> S::FB_common) & 1; } + bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; } + bool isUsed() const { return (Flags >> S::FB_used) & 1; } + bool isTLS() const { return (Flags >> S::FB_tls) & 1; } + bool canBeOmittedFromSymbolTable() const { + return (Flags >> S::FB_may_omit) & 1; + } + bool isGlobal() const { return (Flags >> S::FB_global) & 1; } + bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; } + bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; } + + size_t getCommonSize() const { + assert(isCommon()); + return CommonSize; + } + uint32_t getCommonAlignment() const { + assert(isCommon()); + return CommonAlign; + } + + /// COFF-specific: for weak externals, returns the name of the symbol that is + /// used as a fallback if the weak external remains undefined. + StringRef getCOFFWeakExternalFallback() const { + assert(isWeak() && isIndirect()); + return COFFWeakExternFallbackName; + } +}; + +/// This class can be used to read a Symtab and Strtab produced by +/// irsymtab::build. +class Reader { + StringRef Symtab, Strtab; + + ArrayRef Modules; + ArrayRef Comdats; + ArrayRef Symbols; + ArrayRef Uncommons; + + StringRef str(storage::Str S) const { return S.get(Strtab); } + template ArrayRef range(storage::Range R) const { + return R.get(Symtab); + } + const storage::Header &header() const { + return *reinterpret_cast(Symtab.data()); + } + +public: + class SymbolRef; + + Reader() = default; + Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) { + Modules = range(header().Modules); + Comdats = range(header().Comdats); + Symbols = range(header().Symbols); + Uncommons = range(header().Uncommons); + } + + typedef iterator_range> symbol_range; + + /// Returns the symbol table for the entire bitcode file. + /// The symbols enumerated by this method are ephemeral, but they can be + /// copied into an irsymtab::Symbol object. + symbol_range symbols() const; + + /// Returns a slice of the symbol table for the I'th module in the file. + /// The symbols enumerated by this method are ephemeral, but they can be + /// copied into an irsymtab::Symbol object. + symbol_range module_symbols(unsigned I) const; + + /// Returns the source file path specified at compile time. + StringRef getSourceFileName() const { return str(header().SourceFileName); } + + /// Returns a table with all the comdats used by this file. + std::vector getComdatTable() const { + std::vector ComdatTable; + ComdatTable.reserve(Comdats.size()); + for (auto C : Comdats) + ComdatTable.push_back(str(C.Name)); + return ComdatTable; + } + + /// COFF-specific: returns linker options specified in the input file. + StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); } +}; + +/// Ephemeral symbols produced by Reader::symbols() and +/// Reader::module_symbols(). +class Reader::SymbolRef : public Symbol { + const storage::Symbol *SymI, *SymE; + const Reader *R; + +public: + SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, + const Reader *R) + : SymI(SymI), SymE(SymE), R(R) { + read(); + } + + void read() { + if (SymI == SymE) + return; + + Name = R->str(SymI->Name); + IRName = R->str(SymI->IRName); + ComdatIndex = SymI->ComdatIndex; + Flags = SymI->Flags; + + uint32_t UncI = SymI->UncommonIndex; + if (UncI != -1u) { + const storage::Uncommon &Unc = R->Uncommons[UncI]; + CommonSize = Unc.CommonSize; + CommonAlign = Unc.CommonAlign; + COFFWeakExternFallbackName = R->str(Unc.COFFWeakExternFallbackName); + } + } + void moveNext() { + ++SymI; + read(); + } + + bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; } +}; + +inline Reader::symbol_range Reader::symbols() const { + return {SymbolRef(Symbols.begin(), Symbols.end(), this), + SymbolRef(Symbols.end(), Symbols.end(), this)}; +} + +inline Reader::symbol_range Reader::module_symbols(unsigned I) const { + const storage::Module &M = Modules[I]; + const storage::Symbol *MBegin = Symbols.begin() + M.Begin, + *MEnd = Symbols.begin() + M.End; + return {SymbolRef(MBegin, MEnd, this), SymbolRef(MEnd, MEnd, this)}; +} + +} + +} + +#endif diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index f3d258e..4885f65 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -305,14 +305,6 @@ void llvm::thinLTOInternalizeAndPromoteInIndex( thinLTOInternalizeAndPromoteGUID(I.second, I.first, isExported); } -struct InputFile::InputModule { - BitcodeModule BM; - std::unique_ptr Mod; - - // The range of ModuleSymbolTable entries for this input module. - size_t SymBegin, SymEnd; -}; - // Requires a destructor for std::vector. InputFile::~InputFile() = default; @@ -333,87 +325,51 @@ Expected> InputFile::create(MemoryBufferRef Object) { return make_error("Bitcode file does not contain any modules", inconvertibleErrorCode()); - // Create an InputModule for each module in the InputFile, and add it to the - // ModuleSymbolTable. + File->Mods = *BMsOrErr; + + LLVMContext Ctx; + std::vector Mods; + std::vector> OwnedMods; for (auto BM : *BMsOrErr) { Expected> MOrErr = - BM.getLazyModule(File->Ctx, /*ShouldLazyLoadMetadata*/ true, + BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, /*IsImporting*/ false); if (!MOrErr) return MOrErr.takeError(); - size_t SymBegin = File->SymTab.symbols().size(); - File->SymTab.addModule(MOrErr->get()); - size_t SymEnd = File->SymTab.symbols().size(); - - for (const auto &C : (*MOrErr)->getComdatSymbolTable()) { - auto P = File->ComdatMap.insert( - std::make_pair(&C.second, File->Comdats.size())); - assert(P.second); - (void)P; - File->Comdats.push_back(C.first()); - } - - File->Mods.push_back({BM, std::move(*MOrErr), SymBegin, SymEnd}); - } - - return std::move(File); -} + if ((*MOrErr)->getDataLayoutStr().empty()) + return make_error("input module has no datalayout", + inconvertibleErrorCode()); -Expected InputFile::Symbol::getComdatIndex() const { - if (!isGV()) - return -1; - const GlobalObject *GO = getGV()->getBaseObject(); - if (!GO) - return make_error("Unable to determine comdat of alias!", - inconvertibleErrorCode()); - if (const Comdat *C = GO->getComdat()) { - auto I = File->ComdatMap.find(C); - assert(I != File->ComdatMap.end()); - return I->second; + Mods.push_back(MOrErr->get()); + OwnedMods.push_back(std::move(*MOrErr)); } - return -1; -} -Expected InputFile::getLinkerOpts() { - std::string LinkerOpts; - raw_string_ostream LOS(LinkerOpts); - // Extract linker options from module metadata. - for (InputModule &Mod : Mods) { - std::unique_ptr &M = Mod.Mod; - if (auto E = M->materializeMetadata()) - return std::move(E); - if (Metadata *Val = M->getModuleFlag("Linker Options")) { - MDNode *LinkerOptions = cast(Val); - for (const MDOperand &MDOptions : LinkerOptions->operands()) - for (const MDOperand &MDOption : cast(MDOptions)->operands()) - LOS << " " << cast(MDOption)->getString(); - } + SmallVector Symtab; + if (Error E = irsymtab::build(Mods, Symtab, File->Strtab)) + return std::move(E); + + irsymtab::Reader R({Symtab.data(), Symtab.size()}, + {File->Strtab.data(), File->Strtab.size()}); + File->SourceFileName = R.getSourceFileName(); + File->COFFLinkerOpts = R.getCOFFLinkerOpts(); + File->ComdatTable = R.getComdatTable(); + + for (unsigned I = 0; I != Mods.size(); ++I) { + size_t Begin = File->Symbols.size(); + for (const irsymtab::Reader::SymbolRef &Sym : R.module_symbols(I)) + // Skip symbols that are irrelevant to LTO. Note that this condition needs + // to match the one in Skip() in LTO::addRegularLTO(). + if (Sym.isGlobal() && !Sym.isFormatSpecific()) + File->Symbols.push_back(Sym); + File->ModuleSymIndices.push_back({Begin, File->Symbols.size()}); } - // Synthesize export flags for symbols with dllexport storage. - const Triple TT(Mods[0].Mod->getTargetTriple()); - Mangler M; - for (const ModuleSymbolTable::Symbol &Sym : SymTab.symbols()) - if (auto *GV = Sym.dyn_cast()) - emitLinkerFlagsForGlobalCOFF(LOS, GV, TT, M); - LOS.flush(); - return LinkerOpts; + return std::move(File); } StringRef InputFile::getName() const { - return Mods[0].BM.getModuleIdentifier(); -} - -StringRef InputFile::getSourceFileName() const { - return Mods[0].Mod->getSourceFileName(); -} - -iterator_range -InputFile::module_symbols(InputModule &IM) { - return llvm::make_range( - symbol_iterator(SymTab.symbols().data() + IM.SymBegin, SymTab, this), - symbol_iterator(SymTab.symbols().data() + IM.SymEnd, SymTab, this)); + return Mods[0].getModuleIdentifier(); } LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel, @@ -437,21 +393,17 @@ LTO::LTO(Config Conf, ThinBackend Backend, LTO::~LTO() = default; // Add the given symbol to the GlobalResolutions map, and resolve its partition. -void LTO::addSymbolToGlobalRes(SmallPtrSet &Used, - const InputFile::Symbol &Sym, +void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res, unsigned Partition) { - GlobalValue *GV = Sym.isGV() ? Sym.getGV() : nullptr; - auto &GlobalRes = GlobalResolutions[Sym.getName()]; - if (GV) { - GlobalRes.UnnamedAddr &= GV->hasGlobalUnnamedAddr(); - if (Res.Prevailing) - GlobalRes.IRName = GV->getName(); - } + GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); + if (Res.Prevailing) + GlobalRes.IRName = Sym.getIRName(); + // Set the partition to external if we know it is used elsewhere, e.g. // it is visible to a regular object, is referenced from llvm.compiler_used, // or was already recorded as being referenced from a different partition. - if (Res.VisibleToRegularObj || (GV && Used.count(GV)) || + if (Res.VisibleToRegularObj || Sym.isUsed() || (GlobalRes.Partition != GlobalResolution::Unknown && GlobalRes.Partition != Partition)) { GlobalRes.Partition = GlobalResolution::External; @@ -495,41 +447,32 @@ Error LTO::add(std::unique_ptr Input, writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res); const SymbolResolution *ResI = Res.begin(); - for (InputFile::InputModule &IM : Input->Mods) - if (Error Err = addModule(*Input, IM, ResI, Res.end())) + for (unsigned I = 0; I != Input->Mods.size(); ++I) + if (Error Err = addModule(*Input, I, ResI, Res.end())) return Err; assert(ResI == Res.end()); return Error::success(); } -Error LTO::addModule(InputFile &Input, InputFile::InputModule &IM, +Error LTO::addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - // FIXME: move to backend - Module &M = *IM.Mod; - - if (M.getDataLayoutStr().empty()) - return make_error("input module has no datalayout", - inconvertibleErrorCode()); - - if (!Conf.OverrideTriple.empty()) - M.setTargetTriple(Conf.OverrideTriple); - else if (M.getTargetTriple().empty()) - M.setTargetTriple(Conf.DefaultTriple); - - Expected HasThinLTOSummary = IM.BM.hasSummary(); + Expected HasThinLTOSummary = Input.Mods[ModI].hasSummary(); if (!HasThinLTOSummary) return HasThinLTOSummary.takeError(); + auto ModSyms = Input.module_symbols(ModI); if (*HasThinLTOSummary) - return addThinLTO(IM.BM, M, Input.module_symbols(IM), ResI, ResE); + return addThinLTO(Input.Mods[ModI], ModSyms, ResI, ResE); else - return addRegularLTO(IM.BM, ResI, ResE); + return addRegularLTO(Input.Mods[ModI], ModSyms, ResI, ResE); } // Add a regular LTO object to the link. -Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, +Error LTO::addRegularLTO(BitcodeModule BM, + ArrayRef Syms, + const SymbolResolution *&ResI, const SymbolResolution *ResE) { if (!RegularLTO.CombinedModule) { RegularLTO.CombinedModule = @@ -550,9 +493,6 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, ModuleSymbolTable SymTab; SymTab.addModule(&M); - SmallPtrSet Used; - collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); - std::vector Keep; for (GlobalVariable &GV : M.globals()) @@ -564,17 +504,35 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, if (GlobalObject *GO = GA.getBaseObject()) AliasedGlobals.insert(GO); - for (const InputFile::Symbol &Sym : - make_range(InputFile::symbol_iterator(SymTab.symbols().begin(), SymTab, - nullptr), - InputFile::symbol_iterator(SymTab.symbols().end(), SymTab, - nullptr))) { + // In this function we need IR GlobalValues matching the symbols in Syms + // (which is not backed by a module), so we need to enumerate them in the same + // order. The symbol enumeration order of a ModuleSymbolTable intentionally + // matches the order of an irsymtab, but when we read the irsymtab in + // InputFile::create we omit some symbols that are irrelevant to LTO. The + // Skip() function skips the same symbols from the module as InputFile does + // from the symbol table. + auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end(); + auto Skip = [&]() { + while (MsymI != MsymE) { + auto Flags = SymTab.getSymbolFlags(*MsymI); + if ((Flags & object::BasicSymbolRef::SF_Global) && + !(Flags & object::BasicSymbolRef::SF_FormatSpecific)) + return; + ++MsymI; + } + }; + Skip(); + + for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Used, Sym, Res, 0); + addSymbolToGlobalRes(Sym, Res, 0); - if (Sym.isGV()) { - GlobalValue *GV = Sym.getGV(); + assert(MsymI != MsymE); + ModuleSymbolTable::Symbol Msym = *MsymI++; + Skip(); + + if (GlobalValue *GV = Msym.dyn_cast()) { if (Res.Prevailing) { if (Sym.isUndefined()) continue; @@ -612,7 +570,7 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, if (Sym.isCommon()) { // FIXME: We should figure out what to do about commons defined by asm. // For now they aren't reported correctly by ModuleSymbolTable. - auto &CommonRes = RegularLTO.Commons[Sym.getGV()->getName()]; + auto &CommonRes = RegularLTO.Commons[Sym.getIRName()]; CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize()); CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment()); CommonRes.Prevailing |= Res.Prevailing; @@ -620,6 +578,7 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, // FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit. } + assert(MsymI == MsymE); return RegularLTO.Mover->move(std::move(*MOrErr), Keep, [](GlobalValue &, IRMover::ValueAdder) {}, @@ -627,15 +586,10 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, } // Add a ThinLTO object to the link. -// FIXME: This function should not need to take as many parameters once we have -// a bitcode symbol table. -Error LTO::addThinLTO(BitcodeModule BM, Module &M, - iterator_range Syms, +Error LTO::addThinLTO(BitcodeModule BM, + ArrayRef Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - SmallPtrSet Used; - collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); - Expected> SummaryOrErr = BM.getSummary(); if (!SummaryOrErr) return SummaryOrErr.takeError(); @@ -645,11 +599,15 @@ Error LTO::addThinLTO(BitcodeModule BM, Module &M, for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Used, Sym, Res, ThinLTO.ModuleMap.size() + 1); + addSymbolToGlobalRes(Sym, Res, ThinLTO.ModuleMap.size() + 1); - if (Res.Prevailing && Sym.isGV()) - ThinLTO.PrevailingModuleForGUID[Sym.getGV()->getGUID()] = - BM.getModuleIdentifier(); + if (Res.Prevailing) { + if (!Sym.getIRName().empty()) { + auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( + Sym.getIRName(), GlobalValue::ExternalLinkage, "")); + ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier(); + } + } } if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second) diff --git a/llvm/lib/Object/CMakeLists.txt b/llvm/lib/Object/CMakeLists.txt index b895c3f..2007f56 100644 --- a/llvm/lib/Object/CMakeLists.txt +++ b/llvm/lib/Object/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMObject ELFObjectFile.cpp Error.cpp IRObjectFile.cpp + IRSymtab.cpp MachOObjectFile.cpp MachOUniversal.cpp ModuleSummaryIndexObjectFile.cpp diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp new file mode 100644 index 0000000..c69d7f9 --- /dev/null +++ b/llvm/lib/Object/IRSymtab.cpp @@ -0,0 +1,228 @@ +//===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRSymtab.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" + +using namespace llvm; +using namespace irsymtab; + +namespace { + +/// Stores the temporary state that is required to build an IR symbol table. +struct Builder { + SmallVector &Symtab; + SmallVector &Strtab; + Builder(SmallVector &Symtab, SmallVector &Strtab) + : Symtab(Symtab), Strtab(Strtab) {} + + StringTableBuilder StrtabBuilder{StringTableBuilder::ELF}; + + BumpPtrAllocator Alloc; + StringSaver Saver{Alloc}; + + DenseMap ComdatMap; + ModuleSymbolTable Msymtab; + SmallPtrSet Used; + Mangler Mang; + Triple TT; + + std::vector Comdats; + std::vector Mods; + std::vector Syms; + std::vector Uncommons; + + std::string COFFLinkerOpts; + raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts}; + + void setStr(storage::Str &S, StringRef Value) { + S.Offset = StrtabBuilder.add(Value); + } + template + void writeRange(storage::Range &R, const std::vector &Objs) { + R.Offset = Symtab.size(); + R.Size = Objs.size(); + Symtab.insert(Symtab.end(), reinterpret_cast(Objs.data()), + reinterpret_cast(Objs.data() + Objs.size())); + } + + Error addModule(Module *M); + Error addSymbol(ModuleSymbolTable::Symbol Sym); + + Error build(ArrayRef Mods); +}; + +Error Builder::addModule(Module *M) { + collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); + + storage::Module Mod; + Mod.Begin = Msymtab.symbols().size(); + Msymtab.addModule(M); + Mod.End = Msymtab.symbols().size(); + Mods.push_back(Mod); + + if (TT.isOSBinFormatCOFF()) { + if (auto E = M->materializeMetadata()) + return E; + if (Metadata *Val = M->getModuleFlag("Linker Options")) { + MDNode *LinkerOptions = cast(Val); + for (const MDOperand &MDOptions : LinkerOptions->operands()) + for (const MDOperand &MDOption : cast(MDOptions)->operands()) + COFFLinkerOptsOS << " " << cast(MDOption)->getString(); + } + } + + return Error::success(); +} + +Error Builder::addSymbol(ModuleSymbolTable::Symbol Msym) { + Syms.emplace_back(); + storage::Symbol &Sym = Syms.back(); + Sym = {}; + + Sym.UncommonIndex = -1; + storage::Uncommon *Unc = nullptr; + auto Uncommon = [&]() -> storage::Uncommon & { + if (Unc) + return *Unc; + Sym.UncommonIndex = Uncommons.size(); + Uncommons.emplace_back(); + Unc = &Uncommons.back(); + *Unc = {}; + setStr(Unc->COFFWeakExternFallbackName, ""); + return *Unc; + }; + + SmallString<64> Name; + { + raw_svector_ostream OS(Name); + Msymtab.printSymbolName(OS, Msym); + } + setStr(Sym.Name, Saver.save(StringRef(Name))); + + auto Flags = Msymtab.getSymbolFlags(Msym); + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_undefined; + if (Flags & object::BasicSymbolRef::SF_Weak) + Sym.Flags |= 1 << storage::Symbol::FB_weak; + if (Flags & object::BasicSymbolRef::SF_Common) + Sym.Flags |= 1 << storage::Symbol::FB_common; + if (Flags & object::BasicSymbolRef::SF_Indirect) + Sym.Flags |= 1 << storage::Symbol::FB_indirect; + if (Flags & object::BasicSymbolRef::SF_Global) + Sym.Flags |= 1 << storage::Symbol::FB_global; + if (Flags & object::BasicSymbolRef::SF_FormatSpecific) + Sym.Flags |= 1 << storage::Symbol::FB_format_specific; + + Sym.ComdatIndex = -1; + auto *GV = Msym.dyn_cast(); + if (!GV) { + setStr(Sym.IRName, ""); + return Error::success(); + } + + setStr(Sym.IRName, GV->getName()); + + if (Used.count(GV)) + Sym.Flags |= 1 << storage::Symbol::FB_used; + if (GV->isThreadLocal()) + Sym.Flags |= 1 << storage::Symbol::FB_tls; + if (GV->hasGlobalUnnamedAddr()) + Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr; + if (canBeOmittedFromSymbolTable(GV)) + Sym.Flags |= 1 << storage::Symbol::FB_may_omit; + Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility; + + if (Flags & object::BasicSymbolRef::SF_Common) { + Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize( + GV->getType()->getElementType()); + Uncommon().CommonAlign = GV->getAlignment(); + } + + const GlobalObject *Base = GV->getBaseObject(); + if (!Base) + return make_error("Unable to determine comdat of alias!", + inconvertibleErrorCode()); + if (const Comdat *C = Base->getComdat()) { + auto P = ComdatMap.insert(std::make_pair(C, Comdats.size())); + Sym.ComdatIndex = P.first->second; + + if (P.second) { + storage::Comdat Comdat; + setStr(Comdat.Name, C->getName()); + Comdats.push_back(Comdat); + } + } + + if (TT.isOSBinFormatCOFF()) { + emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang); + + if ((Flags & object::BasicSymbolRef::SF_Weak) && + (Flags & object::BasicSymbolRef::SF_Indirect)) { + std::string FallbackName; + raw_string_ostream OS(FallbackName); + Msymtab.printSymbolName( + OS, cast( + cast(GV)->getAliasee()->stripPointerCasts())); + OS.flush(); + setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName)); + } + } + + return Error::success(); +} + +Error Builder::build(ArrayRef IRMods) { + storage::Header Hdr; + + assert(!IRMods.empty()); + setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); + TT = Triple(IRMods[0]->getTargetTriple()); + + // This adds the symbols for each module to Msymtab. + for (auto *M : IRMods) + if (Error Err = addModule(M)) + return Err; + + for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) + if (Error Err = addSymbol(Msym)) + return Err; + + COFFLinkerOptsOS.flush(); + setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts); + + // We are about to fill in the header's range fields, so reserve space for it + // and copy it in afterwards. + Symtab.resize(sizeof(storage::Header)); + writeRange(Hdr.Modules, Mods); + writeRange(Hdr.Comdats, Comdats); + writeRange(Hdr.Symbols, Syms); + writeRange(Hdr.Uncommons, Uncommons); + + *reinterpret_cast(Symtab.data()) = Hdr; + + raw_svector_ostream OS(Strtab); + StrtabBuilder.finalizeInOrder(); + StrtabBuilder.write(OS); + + return Error::success(); +} + +} // anonymous namespace + +Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, + SmallVector &Strtab) { + return Builder(Symtab, Strtab).build(Mods); +} diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index 8c3fa42..9b783d1 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -465,7 +465,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, EC == object::object_error::bitcode_section_not_found) *claimed = 0; else - message(LDPL_ERROR, + message(LDPL_FATAL, "LLVM gold plugin has failed to create LTO module: %s", EI.message().c_str()); }); @@ -536,7 +536,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, sym.size = 0; sym.comdat_key = nullptr; - int CI = check(Sym.getComdatIndex()); + int CI = Sym.getComdatIndex(); if (CI != -1) { StringRef C = Obj->getComdatTable()[CI]; sym.comdat_key = strdup(C.str().c_str()); -- 2.7.4