From 4fcbf3842007569880fa916831efefda6b1bd032 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 23 Mar 2023 13:20:15 +0200 Subject: [PATCH] [llvm-lib] Use COFF archive format in llvm-lib (other archive tools don't use this format). We currently just use GNU format for llvm-lib. This mostly works, but ARM64EC needs an additional section that does not really fit GNU format. This patch implements writing in COFF format (as in, it's what archive reader considers as K_COFF). This mostly requires symbol emitting symbol map. Note that, just like in case of MSVC, symbols are de-duplicated in both usual symbol table and the new symbol map. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D143540 --- llvm/lib/Object/ArchiveWriter.cpp | 135 +++++++++++++++++++++++----- llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp | 6 +- llvm/test/tools/llvm-lib/duplicate.test | 9 ++ 3 files changed, 126 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index cd0429a..2d0f92e 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -17,6 +17,7 @@ #include "llvm/BinaryFormat/Magic.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/MachO.h" @@ -43,6 +44,10 @@ using namespace llvm; +struct SymMap { + std::map Map; +}; + NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), MemberName(BufRef.getBufferIdentifier()) {} @@ -169,18 +174,21 @@ static bool isAIXBigArchive(object::Archive::Kind Kind) { return Kind == object::Archive::K_AIXBIG; } +static bool isCOFFArchive(object::Archive::Kind Kind) { + return Kind == object::Archive::K_COFF; +} + static bool isBSDLike(object::Archive::Kind Kind) { switch (Kind) { case object::Archive::K_GNU: case object::Archive::K_GNU64: case object::Archive::K_AIXBIG: + case object::Archive::K_COFF: return false; case object::Archive::K_BSD: case object::Archive::K_DARWIN: case object::Archive::K_DARWIN64: return true; - case object::Archive::K_COFF: - break; } llvm_unreachable("not supported for writting"); } @@ -191,6 +199,10 @@ static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { isBSDLike(Kind) ? support::little : support::big); } +template static void printLE(raw_ostream &Out, T Val) { + support::endian::write(Out, Val, support::little); +} + static void printRestOfMemberHeader( raw_ostream &Out, const sys::TimePoint &ModTime, unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { @@ -295,7 +307,11 @@ printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)}); if (Insertion.second) { Insertion.first->second = StringTable.tell(); - StringTable << M.MemberName << "/\n"; + StringTable << M.MemberName; + if (isCOFFArchive(Kind)) + StringTable << '\0'; + else + StringTable << "/\n"; } NamePos = Insertion.first->second; } @@ -376,6 +392,22 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, uint32_t Pad = isAIXBigArchive(Kind) ? 0 : offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); + + Size += Pad; + if (Padding) + *Padding = Pad; + return Size; +} + +static uint64_t computeSymbolMapSize(uint64_t NumObj, SymMap &SymMap, + uint32_t *Padding = nullptr) { + uint64_t Size = sizeof(uint32_t) * 2; // Number of symbols and objects entries + Size += NumObj * sizeof(uint32_t); // Offset table + + for (auto S : SymMap.Map) + Size += sizeof(uint16_t) + S.first.length() + 1; + + uint32_t Pad = offsetToAlignment(Size, Align(2)); Size += Pad; if (Padding) *Padding = Pad; @@ -399,8 +431,9 @@ static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, } static uint64_t computeHeadersSize(object::Archive::Kind Kind, + uint64_t NumMembers, uint64_t StringMemberSize, uint64_t NumSyms, - uint64_t SymNamesSize) { + uint64_t SymNamesSize, SymMap *SymMap) { uint32_t OffsetSize = is64BitKind(Kind) ? 8 : 4; uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, OffsetSize, SymNamesSize); @@ -410,8 +443,13 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind, writeSymbolTableHeader(Tmp, Kind, true, SymtabSize); return TmpBuf.size(); }; + uint32_t HeaderSize = computeSymbolTableHeaderSize(); + uint64_t Size = strlen("!\n") + HeaderSize + SymtabSize; + + if (SymMap) + Size += HeaderSize + computeSymbolMapSize(NumMembers, *SymMap); - return strlen("!\n") + computeSymbolTableHeaderSize() + SymtabSize + StringMemberSize; + return Size + StringMemberSize; } static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, @@ -420,7 +458,7 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, uint64_t PrevMemberOffset = 0) { // We don't write a symbol table on an archive with no members -- except on // Darwin, where the linker will abort unless the archive has a symbol table. - if (StringTable.empty() && !isDarwin(Kind)) + if (StringTable.empty() && !isDarwin(Kind) && !isCOFFArchive(Kind)) return; unsigned NumSyms = 0; @@ -457,8 +495,35 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, Out.write(uint8_t(0)); } +static void writeSymbolMap(raw_ostream &Out, object::Archive::Kind Kind, + bool Deterministic, ArrayRef Members, + SymMap &SymMap, uint64_t MembersOffset) { + uint32_t Pad; + uint64_t Size = computeSymbolMapSize(Members.size(), SymMap, &Pad); + writeSymbolTableHeader(Out, Kind, Deterministic, Size, 0); + + uint32_t Pos = MembersOffset; + + printLE(Out, Members.size()); + for (const MemberData &M : Members) { + printLE(Out, Pos); // member offset + Pos += M.Header.size() + M.Data.size() + M.Padding.size(); + } + + printLE(Out, SymMap.Map.size()); + + for (auto S : SymMap.Map) + printLE(Out, S.second); + for (auto S : SymMap.Map) + Out << S.first << '\0'; + + while (Pad--) + Out.write(uint8_t(0)); +} + static Expected> -getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { +getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames, + SymMap *SymMap, bool &HasObject) { std::vector Ret; // In the scenario when LLVMContext is populated SymbolicFile will contain a @@ -487,10 +552,22 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { for (const object::BasicSymbolRef &S : Obj->symbols()) { if (!isArchiveSymbol(S)) continue; - Ret.push_back(SymNames.tell()); - if (Error E = S.printName(SymNames)) - return std::move(E); - SymNames << '\0'; + if (SymMap) { + std::string Name; + raw_string_ostream NameStream(Name); + if (Error E = S.printName(NameStream)) + return std::move(E); + if (SymMap->Map.find(Name) != SymMap->Map.end()) + continue; // ignore duplicated symbol + SymMap->Map[Name] = Index; + Ret.push_back(SymNames.tell()); + SymNames << Name << '\0'; + } else { + Ret.push_back(SymNames.tell()); + if (Error E = S.printName(SymNames)) + return std::move(E); + SymNames << '\0'; + } } return Ret; } @@ -498,7 +575,8 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { static Expected> computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, object::Archive::Kind Kind, bool Thin, bool Deterministic, - bool NeedSymbols, ArrayRef NewMembers) { + bool NeedSymbols, SymMap *SymMap, + ArrayRef NewMembers) { static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; uint64_t Pos = @@ -564,7 +642,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, // The big archive format needs to know the offset of the previous member // header. - unsigned PrevOffset = 0; + unsigned PrevOffset = 0, Index = 0; for (const NewArchiveMember &M : NewMembers) { std::string Header; raw_string_ostream Out(Header); @@ -572,6 +650,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, MemoryBufferRef Buf = M.Buf->getMemBufferRef(); StringRef Data = Thin ? "" : Buf.getBuffer(); + Index++; + // ld64 expects the members to be 8-byte aligned for 64-bit content and at // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. This matches the behaviour with cctools and ensures that ld64 @@ -612,7 +692,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, std::vector Symbols; if (NeedSymbols) { Expected> SymbolsOrErr = - getSymbols(Buf, SymNames, HasObject); + getSymbols(Buf, Index, SymNames, SymMap, HasObject); if (!SymbolsOrErr) return createFileError(M.MemberName, SymbolsOrErr.takeError()); Symbols = std::move(*SymbolsOrErr); @@ -624,7 +704,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, // If there are no symbols, emit an empty symbol table, to satisfy Solaris // tools, older versions of which expect a symbol table in a non-empty // archive, regardless of whether there are any symbols in it. - if (HasObject && SymNames.tell() == 0) + if (HasObject && SymNames.tell() == 0 && !isCOFFArchive(Kind)) SymNames << '\0' << '\0' << '\0'; return Ret; } @@ -682,10 +762,16 @@ static Error writeArchiveToStream(raw_ostream &Out, raw_svector_ostream SymNames(SymNamesBuf); SmallString<0> StringTableBuf; raw_svector_ostream StringTable(StringTableBuf); + SymMap SymMap; - Expected> DataOrErr = - computeMemberData(StringTable, SymNames, Kind, Thin, Deterministic, - WriteSymtab, NewMembers); + // COFF symbol map uses 16-bit indexes, so we can't use it if there are too + // many members. + if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe) + Kind = object::Archive::K_GNU; + + Expected> DataOrErr = computeMemberData( + StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab, + isCOFFArchive(Kind) ? &SymMap : nullptr, NewMembers); if (Error E = DataOrErr.takeError()) return E; std::vector &Data = *DataOrErr; @@ -717,8 +803,9 @@ static Error writeArchiveToStream(raw_ostream &Out, // table is at the start of the archive file for other archive formats. if (WriteSymtab && !is64BitKind(Kind)) { // We assume 32-bit offsets to see if 32-bit symbols are possible or not. - HeadersSize = - computeHeadersSize(Kind, StringTableSize, NumSyms, SymNamesBuf.size()); + HeadersSize = computeHeadersSize(Kind, Data.size(), StringTableSize, + NumSyms, SymNamesBuf.size(), + isCOFFArchive(Kind) ? &SymMap : nullptr); // The SYM64 format is used when an archive's member offsets are larger than // 32-bits can hold. The need for this shift in format is detected by @@ -754,10 +841,14 @@ static Error writeArchiveToStream(raw_ostream &Out, if (!isAIXBigArchive(Kind)) { if (WriteSymtab) { if (!HeadersSize) - HeadersSize = computeHeadersSize(Kind, StringTableSize, NumSyms, - SymNamesBuf.size()); + HeadersSize = computeHeadersSize( + Kind, Data.size(), StringTableSize, NumSyms, SymNamesBuf.size(), + isCOFFArchive(Kind) ? &SymMap : nullptr); writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, *HeadersSize); + + if (isCOFFArchive(Kind)) + writeSymbolMap(Out, Kind, Deterministic, Data, SymMap, *HeadersSize); } if (StringTableSize) diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 3a609ee..9ca63be 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -455,10 +455,12 @@ int llvm::libDriverMain(ArrayRef ArgsArr) { // For compatibility with MSVC, reverse member vector after de-duplication. std::reverse(Members.begin(), Members.end()); + bool Thin = Args.hasArg(OPT_llvmlibthin); if (Error E = writeArchive(OutputPath, Members, - /*WriteSymtab=*/true, object::Archive::K_GNU, - /*Deterministic*/ true, Args.hasArg(OPT_llvmlibthin))) { + /*WriteSymtab=*/true, + Thin ? object::Archive::K_GNU : object::Archive::K_COFF, + /*Deterministic*/ true, Thin)) { handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { llvm::errs() << OutputPath << ": " << EI.message() << "\n"; }); diff --git a/llvm/test/tools/llvm-lib/duplicate.test b/llvm/test/tools/llvm-lib/duplicate.test index 098858d..87dae66 100644 --- a/llvm/test/tools/llvm-lib/duplicate.test +++ b/llvm/test/tools/llvm-lib/duplicate.test @@ -14,3 +14,12 @@ CHECK: bar.o CHECK-NEXT: abc.o CHECK-NEXT: foo.o CHECK-NOT: foo.o + +# Check that symbol map contains sorted, de-duplicated symbols. +RUN: cd %t && llvm-lib -out:foo.lib foo.o foo.o abc.o bar.o foo.o foo.o +RUN: llvm-nm --print-armap %t/foo.lib | FileCheck %s --check-prefix=DUP +# DUP: Archive map +# DUP-NEXT: a in abc.o +# DUP-NEXT: b in bar.o +# DUP-NEXT: c in abc.o +# DUP-EMPTY -- 2.7.4