From a13efc2a73106ed35e6477e23d867e5d53909387 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Tue, 29 Nov 2016 18:05:04 +0000 Subject: [PATCH] Introduce StringRefZ class to represent null-terminated strings. StringRefZ is a class to represent a null-terminated string. String length is computed lazily, so it's more efficient than StringRef to represent strings in string table. The motivation of defining this new class is to merge functions that only differ in string types; we have many constructors that takes `const char *` or `StringRef`. With StringRefZ, we can merge them. Differential Revision: https://reviews.llvm.org/D27037 llvm-svn: 288172 --- lld/ELF/InputFiles.cpp | 30 ++++++++++++++++++------------ lld/ELF/LTO.cpp | 4 ++-- lld/ELF/Strings.cpp | 1 + lld/ELF/Strings.h | 30 ++++++++++++++++++++++++++++++ lld/ELF/SymbolTable.cpp | 18 ++++++++++-------- lld/ELF/SymbolTable.h | 5 +++-- lld/ELF/Symbols.cpp | 48 ++++++++++++++++-------------------------------- lld/ELF/Symbols.h | 43 +++++++++++++++---------------------------- 8 files changed, 95 insertions(+), 84 deletions(-) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 22dca37..c8bcd73 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -438,6 +438,11 @@ SymbolBody *elf::ObjectFile::createSymbolBody(const Elf_Sym *Sym) { int Binding = Sym->getBinding(); InputSectionBase *Sec = getSection(*Sym); + uint8_t StOther = Sym->st_other; + uint8_t Type = Sym->getType(); + uintX_t Value = Sym->st_value; + uintX_t Size = Sym->st_size; + if (Binding == STB_LOCAL) { if (Sym->getType() == STT_FILE) SourceFile = check(Sym->getName(this->StringTable)); @@ -447,20 +452,19 @@ SymbolBody *elf::ObjectFile::createSymbolBody(const Elf_Sym *Sym) { const char *Name = this->StringTable.data() + Sym->st_name; if (Sym->st_shndx == SHN_UNDEF) - return new (BAlloc) Undefined(Name, Sym->st_other, Sym->getType(), this); - return new (BAlloc) DefinedRegular(Name, *Sym, Sec); + return new (BAlloc) + Undefined(Name, /*IsLocal=*/true, StOther, Type, this); + + return new (BAlloc) DefinedRegular(Name, /*IsLocal=*/true, StOther, + Type, Value, Size, Sec, this); } StringRef Name = check(Sym->getName(this->StringTable)); - uint8_t StOther = Sym->st_other; - uint8_t Type = Sym->getType(); - uintX_t Value = Sym->st_value; - uintX_t Size = Sym->st_size; switch (Sym->st_shndx) { case SHN_UNDEF: return elf::Symtab::X - ->addUndefined(Name, Binding, StOther, Type, + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, /*CanOmitFromDynSym=*/false, this) ->body(); case SHN_COMMON: @@ -480,7 +484,7 @@ SymbolBody *elf::ObjectFile::createSymbolBody(const Elf_Sym *Sym) { case STB_GNU_UNIQUE: if (Sec == &InputSection::Discarded) return elf::Symtab::X - ->addUndefined(Name, Binding, StOther, Type, + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, /*CanOmitFromDynSym=*/false, this) ->body(); return elf::Symtab::X @@ -723,12 +727,14 @@ static Symbol *createBitcodeSymbol(const std::vector &KeptComdats, int C = check(ObjSym.getComdatIndex()); if (C != -1 && !KeptComdats[C]) - return Symtab::X->addUndefined(NameRef, Binding, Visibility, Type, - CanOmitFromDynSym, F); + return Symtab::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, + Visibility, Type, CanOmitFromDynSym, + F); if (Flags & BasicSymbolRef::SF_Undefined) - return Symtab::X->addUndefined(NameRef, Binding, Visibility, Type, - CanOmitFromDynSym, F); + return Symtab::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, + Visibility, Type, CanOmitFromDynSym, + F); if (Flags & BasicSymbolRef::SF_Common) return Symtab::X->addCommon(NameRef, ObjSym.getCommonSize(), diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 8d4ad91..a3d6a14 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -97,8 +97,8 @@ BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {} BitcodeCompiler::~BitcodeCompiler() = default; static void undefine(Symbol *S) { - replaceBody(S, S->body()->getName(), STV_DEFAULT, S->body()->Type, - nullptr); + replaceBody(S, S->body()->getName(), /*IsLocal=*/false, + STV_DEFAULT, S->body()->Type, nullptr); } void BitcodeCompiler::add(BitcodeFile &F) { diff --git a/lld/ELF/Strings.cpp b/lld/ELF/Strings.cpp index 6e504fb..28e50ff 100644 --- a/lld/ELF/Strings.cpp +++ b/lld/ELF/Strings.cpp @@ -16,6 +16,7 @@ #include "llvm/Config/config.h" #include "llvm/Demangle/Demangle.h" #include +#include using namespace llvm; using namespace lld; diff --git a/lld/ELF/Strings.h b/lld/ELF/Strings.h index 0a0cccf..e3c13c9 100644 --- a/lld/ELF/Strings.h +++ b/lld/ELF/Strings.h @@ -26,6 +26,36 @@ std::vector parseHex(StringRef S); bool isValidCIdentifier(StringRef S); StringRef unquote(StringRef S); +// This is a lazy version of StringRef. String size is computed lazily +// when it is needed. It is more efficient than StringRef to instantiate +// if you have a string whose size is unknown. +// +// ELF string tables contain a lot of null-terminated strings. +// Most of them are not necessary for the linker because they are names +// of local symbols and the linker doesn't use local symbol names for +// name resolution. So, we use this class to represents strings read +// from string tables. +class StringRefZ { +public: + StringRefZ() : Start(nullptr), Size(0) {} + StringRefZ(const char *S, size_t Size) : Start(S), Size(Size) {} + + /*implicit*/ StringRefZ(const char *S) : Start(S), Size(-1) {} + + /*implicit*/ StringRefZ(llvm::StringRef S) + : Start(S.data()), Size(S.size()) {} + + operator llvm::StringRef() const { + if (Size == (size_t)-1) + Size = strlen(Start); + return {Start, Size}; + } + +private: + const char *Start; + mutable size_t Size; +}; + // This class represents a glob pattern. Supported metacharacters // are "*", "?", "[]" and "[^]". class GlobPattern { diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 6b96676..c7fe4ce 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -236,14 +236,15 @@ SymbolTable::insert(StringRef Name, uint8_t Type, uint8_t Visibility, } template Symbol *SymbolTable::addUndefined(StringRef Name) { - return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0, + return addUndefined(Name, /*IsLocal=*/false, STB_GLOBAL, STV_DEFAULT, + /*Type*/ 0, /*CanOmitFromDynSym*/ false, /*File*/ nullptr); } template -Symbol *SymbolTable::addUndefined(StringRef Name, uint8_t Binding, - uint8_t StOther, uint8_t Type, - bool CanOmitFromDynSym, +Symbol *SymbolTable::addUndefined(StringRef Name, bool IsLocal, + uint8_t Binding, uint8_t StOther, + uint8_t Type, bool CanOmitFromDynSym, InputFile *File) { Symbol *S; bool WasInserted; @@ -251,7 +252,7 @@ Symbol *SymbolTable::addUndefined(StringRef Name, uint8_t Binding, insert(Name, Type, StOther & 3, CanOmitFromDynSym, File); if (WasInserted) { S->Binding = Binding; - replaceBody(S, Name, StOther, Type, File); + replaceBody(S, Name, IsLocal, StOther, Type, File); return S; } if (Binding != STB_WEAK) { @@ -378,8 +379,8 @@ Symbol *SymbolTable::addRegular(StringRef Name, uint8_t StOther, /*CanOmitFromDynSym*/ false, File); int Cmp = compareDefinedNonCommon(S, WasInserted, Binding); if (Cmp > 0) - replaceBody>(S, Name, StOther, Type, Value, Size, - Section, File); + replaceBody>(S, Name, /*IsLocal=*/false, StOther, Type, + Value, Size, Section, File); else if (Cmp == 0) reportDuplicate(S->body(), Section, Value); return S; @@ -432,7 +433,8 @@ Symbol *SymbolTable::addBitcode(StringRef Name, uint8_t Binding, insert(Name, Type, StOther & 3, CanOmitFromDynSym, F); int Cmp = compareDefinedNonCommon(S, WasInserted, Binding); if (Cmp > 0) - replaceBody>(S, Name, StOther, Type, 0, 0, nullptr, F); + replaceBody>(S, Name, /*IsLocal=*/false, StOther, Type, + 0, 0, nullptr, F); else if (Cmp == 0) reportDuplicate(S->body(), F); return S; diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index f37cce1..7ffab38 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -55,8 +55,9 @@ public: uint8_t Visibility = llvm::ELF::STV_HIDDEN); Symbol *addUndefined(StringRef Name); - Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther, - uint8_t Type, bool CanOmitFromDynSym, InputFile *File); + Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding, + uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, + InputFile *File); Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type, uintX_t Value, uintX_t Size, uint8_t Binding, diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index e618f52..0b5ee3f 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -92,22 +92,12 @@ static typename ELFT::uint getSymVA(const SymbolBody &Body, llvm_unreachable("invalid symbol kind"); } -SymbolBody::SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type) - : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(true), +SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type) + : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(IsLocal), IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type), StOther(StOther), Name(Name) {} -SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type) - : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(false), - IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type), - StOther(StOther), NameLen(Name.size()), Name(Name.data()) {} - -StringRef SymbolBody::getName() const { - if (NameLen == (uint32_t)-1) - NameLen = strlen(Name); - return StringRef(Name, NameLen); -} - // Returns true if a symbol can be replaced at load-time by a symbol // with the same name defined in other ELF executable or DSO. bool SymbolBody::isPreemptible() const { @@ -203,7 +193,7 @@ void SymbolBody::parseSymbolVersion() { return; // Truncate the symbol name so that it doesn't include the version string. - NameLen = Pos; + Name = {S.data(), Pos}; // '@@' in a symbol name means the default version. // It is usually the most recent one. @@ -226,11 +216,9 @@ void SymbolBody::parseSymbolVersion() { error("symbol " + S + " has undefined version " + Verstr); } -Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type) - : SymbolBody(K, Name, StOther, Type) {} - -Defined::Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type) - : SymbolBody(K, Name, StOther, Type) {} +Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type) + : SymbolBody(K, Name, IsLocal, StOther, Type) {} template bool DefinedRegular::isMipsPIC() const { if (!Section || !isFunc()) @@ -239,27 +227,23 @@ template bool DefinedRegular::isMipsPIC() const { (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC); } -Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type, - InputFile *File) - : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) { - this->File = File; -} - -Undefined::Undefined(const char *Name, uint8_t StOther, uint8_t Type, - InputFile *File) - : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) { +Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type, InputFile *File) + : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) { this->File = File; } template -DefinedSynthetic::DefinedSynthetic(StringRef N, uintX_t Value, +DefinedSynthetic::DefinedSynthetic(StringRef Name, uintX_t Value, const OutputSectionBase *Section) - : Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */), + : Defined(SymbolBody::DefinedSyntheticKind, Name, /*IsLocal=*/false, + STV_HIDDEN, 0 /* Type */), Value(Value), Section(Section) {} -DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, +DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment, uint8_t StOther, uint8_t Type, InputFile *File) - : Defined(SymbolBody::DefinedCommonKind, N, StOther, Type), + : Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther, + Type), Alignment(Alignment), Size(Size) { this->File = File; } diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index c69a8c0..3f4b682 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -16,6 +16,7 @@ #define LLD_ELF_SYMBOLS_H #include "InputSection.h" +#include "Strings.h" #include "lld/Core/LLVM.h" #include "llvm/Object/Archive.h" @@ -28,7 +29,6 @@ class ArchiveFile; class BitcodeFile; class InputFile; class LazyObjectFile; -class SymbolBody; template class ObjectFile; template class OutputSection; class OutputSectionBase; @@ -69,7 +69,7 @@ public: bool isShared() const { return SymbolKind == SharedKind; } bool isLocal() const { return IsLocal; } bool isPreemptible() const; - StringRef getName() const; + StringRef getName() const { return Name; } uint8_t getVisibility() const { return StOther & 0x3; } void parseSymbolVersion(); @@ -98,8 +98,8 @@ public: uint32_t GlobalDynIndex = -1; protected: - SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type); - SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type); + SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type); const unsigned SymbolKind : 8; @@ -136,17 +136,13 @@ public: bool isFile() const { return Type == llvm::ELF::STT_FILE; } protected: - // Local symbols are not inserted to the symbol table, so we usually - // don't need their names at all. We read symbol names lazily if possible. - mutable uint32_t NameLen = (uint32_t)-1; - const char *Name; + StringRefZ Name; }; // The base class for any defined symbols. class Defined : public SymbolBody { public: - Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type); - Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type); + Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type); static bool classof(const SymbolBody *S) { return S->isDefined(); } }; @@ -175,25 +171,15 @@ template class DefinedRegular : public Defined { typedef typename ELFT::uint uintX_t; public: - DefinedRegular(StringRef Name, uint8_t StOther, uint8_t Type, uintX_t Value, - uintX_t Size, InputSectionBase *Section, InputFile *File) - : Defined(SymbolBody::DefinedRegularKind, Name, StOther, Type), + DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, + uintX_t Value, uintX_t Size, InputSectionBase *Section, + InputFile *File) + : Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type), Value(Value), Size(Size), Section(Section ? Section->Repl : NullInputSection) { this->File = File; } - DefinedRegular(const char *Name, const Elf_Sym &Sym, - InputSectionBase *Section) - : Defined(SymbolBody::DefinedRegularKind, Name, Sym.st_other, - Sym.getType()), - Value(Sym.st_value), Size(Sym.st_size), - Section(Section ? Section->Repl : NullInputSection) { - assert(isLocal()); - if (Section) - this->File = Section->getFile(); - } - // Return true if the symbol is a PIC function. bool isMipsPIC() const; @@ -248,8 +234,8 @@ public: class Undefined : public SymbolBody { public: - Undefined(StringRef Name, uint8_t StOther, uint8_t Type, InputFile *F); - Undefined(const char *Name, uint8_t StOther, uint8_t Type, InputFile *F); + Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, + InputFile *F); static bool classof(const SymbolBody *S) { return S->kind() == UndefinedKind; @@ -270,7 +256,8 @@ public: SharedSymbol(SharedFile *F, StringRef Name, const Elf_Sym &Sym, const Elf_Verdef *Verdef) - : Defined(SymbolBody::SharedKind, Name, Sym.st_other, Sym.getType()), + : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, Sym.st_other, + Sym.getType()), Sym(Sym), Verdef(Verdef) { // IFuncs defined in DSOs are treated as functions by the static linker. if (isGnuIFunc()) @@ -309,7 +296,7 @@ public: protected: Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type) - : SymbolBody(K, Name, llvm::ELF::STV_DEFAULT, Type) {} + : SymbolBody(K, Name, /*IsLocal=*/false, llvm::ELF::STV_DEFAULT, Type) {} }; // LazyArchive symbols represents symbols in archive files. -- 2.7.4