[llvm-lib] Add support for ARM64EC libraries.
authorJacek Caban <jacek@codeweavers.com>
Fri, 21 Apr 2023 12:31:21 +0000 (15:31 +0300)
committerMartin Storsjö <martin@martin.st>
Fri, 21 Apr 2023 12:46:19 +0000 (15:46 +0300)
ARM64EC allows having both pure ARM64 objects and ARM64EC in the
same archive. This allows using single static library for linking
pure ARM64, pure ARM64EC or mixed modules (what MS calls ARM64X:
a single module that may be used in both modes). To achieve that,
such static libraries need two separated symbol maps. The usual map
contains only pure ARM64 symbols, while a new /<ECSYMBOLS>/ section
contains EC symbols. EC symbols map has very similar format to the
usual map, except it doesn't contain object offsets and uses offsets
from regular map instead. This is true even for pure ARM64EC static
library: it will simply have 0 symbols in the symbol map.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D143541

llvm/include/llvm/Object/ArchiveWriter.h
llvm/lib/Object/ArchiveWriter.cpp
llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
llvm/test/tools/llvm-lib/ecsymbols.test
llvm/test/tools/llvm-lib/machine-mismatch.test

index 6acab45215da1c01e3baf522ed2f17a09a22b5e1..c89246f1d5694ef0c2e9f10f7f37e0e7f237bc3b 100644 (file)
@@ -43,7 +43,8 @@ Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To);
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    bool WriteSymtab, object::Archive::Kind Kind,
                    bool Deterministic, bool Thin,
-                   std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr);
+                   std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr,
+                   bool IsEC = false);
 
 // writeArchiveToBuffer is similar to writeArchive but returns the Archive in a
 // buffer instead of writing it out to a file.
index 2d0f92e43a34467260e5d17f8d2f884b1f121d80..e9b88115195f47ddd1cc0f07e1d4c2eccab7d5b8 100644 (file)
@@ -45,7 +45,9 @@
 using namespace llvm;
 
 struct SymMap {
+  bool UseECMap;
   std::map<std::string, uint16_t> Map;
+  std::map<std::string, uint16_t> ECMap;
 };
 
 NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef)
@@ -414,6 +416,20 @@ static uint64_t computeSymbolMapSize(uint64_t NumObj, SymMap &SymMap,
   return Size;
 }
 
+static uint64_t computeECSymbolsSize(SymMap &SymMap,
+                                     uint32_t *Padding = nullptr) {
+  uint64_t Size = sizeof(uint32_t); // Number of symbols
+
+  for (auto S : SymMap.ECMap)
+    Size += sizeof(uint16_t) + S.first.length() + 1;
+
+  uint32_t Pad = offsetToAlignment(Size, Align(2));
+  Size += Pad;
+  if (Padding)
+    *Padding = Pad;
+  return Size;
+}
+
 static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind,
                                    bool Deterministic, uint64_t Size,
                                    uint64_t PrevMemberOffset = 0) {
@@ -446,8 +462,11 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind,
   uint32_t HeaderSize = computeSymbolTableHeaderSize();
   uint64_t Size = strlen("!<arch>\n") + HeaderSize + SymtabSize;
 
-  if (SymMap)
+  if (SymMap) {
     Size += HeaderSize + computeSymbolMapSize(NumMembers, *SymMap);
+    if (SymMap->ECMap.size())
+      Size += HeaderSize + computeECSymbolsSize(*SymMap);
+  }
 
   return Size + StringMemberSize;
 }
@@ -521,6 +540,41 @@ static void writeSymbolMap(raw_ostream &Out, object::Archive::Kind Kind,
     Out.write(uint8_t(0));
 }
 
+static void writeECSymbols(raw_ostream &Out, object::Archive::Kind Kind,
+                           bool Deterministic, ArrayRef<MemberData> Members,
+                           SymMap &SymMap) {
+  uint32_t Pad;
+  uint64_t Size = computeECSymbolsSize(SymMap, &Pad);
+  printGNUSmallMemberHeader(Out, "/<ECSYMBOLS>", now(Deterministic), 0, 0, 0,
+                            Size);
+
+  printLE<uint32_t>(Out, SymMap.ECMap.size());
+
+  for (auto S : SymMap.ECMap)
+    printLE(Out, S.second);
+  for (auto S : SymMap.ECMap)
+    Out << S.first << '\0';
+  while (Pad--)
+    Out.write(uint8_t(0));
+}
+
+static bool isECObject(object::SymbolicFile &Obj) {
+  if (Obj.isCOFF())
+    return cast<llvm::object::COFFObjectFile>(&Obj)->getMachine() !=
+           COFF::IMAGE_FILE_MACHINE_ARM64;
+
+  if (Obj.isIR()) {
+    Expected<std::string> TripleStr =
+        getBitcodeTargetTriple(Obj.getMemoryBufferRef());
+    if (!TripleStr)
+      return false;
+    Triple T(*TripleStr);
+    return T.isWindowsArm64EC() || T.getArch() == Triple::x86_64;
+  }
+
+  return false;
+}
+
 static Expected<std::vector<unsigned>>
 getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames,
            SymMap *SymMap, bool &HasObject) {
@@ -548,20 +602,25 @@ getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames,
     Obj = std::move(*ObjOrErr);
   }
 
+  std::map<std::string, uint16_t> *Map = nullptr;
+  if (SymMap)
+    Map = SymMap->UseECMap && isECObject(*Obj) ? &SymMap->ECMap : &SymMap->Map;
   HasObject = true;
   for (const object::BasicSymbolRef &S : Obj->symbols()) {
     if (!isArchiveSymbol(S))
       continue;
-    if (SymMap) {
+    if (Map) {
       std::string Name;
       raw_string_ostream NameStream(Name);
       if (Error E = S.printName(NameStream))
         return std::move(E);
-      if (SymMap->Map.find(Name) != SymMap->Map.end())
+      if (Map->find(Name) != Map->end())
         continue; // ignore duplicated symbol
-      SymMap->Map[Name] = Index;
-      Ret.push_back(SymNames.tell());
-      SymNames << Name << '\0';
+      (*Map)[Name] = Index;
+      if (Map == &SymMap->Map) {
+        Ret.push_back(SymNames.tell());
+        SymNames << Name << '\0';
+      }
     } else {
       Ret.push_back(SymNames.tell());
       if (Error E = S.printName(SymNames))
@@ -755,7 +814,7 @@ Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
 static Error writeArchiveToStream(raw_ostream &Out,
                                   ArrayRef<NewArchiveMember> NewMembers,
                                   bool WriteSymtab, object::Archive::Kind Kind,
-                                  bool Deterministic, bool Thin) {
+                                  bool Deterministic, bool Thin, bool IsEC) {
   assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
 
   SmallString<0> SymNamesBuf;
@@ -769,6 +828,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
   if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe)
     Kind = object::Archive::K_GNU;
 
+  SymMap.UseECMap = IsEC;
   Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
       StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab,
       isCOFFArchive(Kind) ? &SymMap : nullptr, NewMembers);
@@ -855,6 +915,9 @@ static Error writeArchiveToStream(raw_ostream &Out,
       Out << StringTableMember.Header << StringTableMember.Data
           << StringTableMember.Padding;
 
+    if (WriteSymtab && SymMap.ECMap.size())
+      writeECSymbols(Out, Kind, Deterministic, Data, SymMap);
+
     for (const MemberData &M : Data)
       Out << M.Header << M.Data << M.Padding;
   } else {
@@ -944,7 +1007,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    bool WriteSymtab, object::Archive::Kind Kind,
                    bool Deterministic, bool Thin,
-                   std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
+                   std::unique_ptr<MemoryBuffer> OldArchiveBuf, bool IsEC) {
   Expected<sys::fs::TempFile> Temp =
       sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a");
   if (!Temp)
@@ -952,7 +1015,7 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
   raw_fd_ostream Out(Temp->FD, false);
 
   if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind,
-                                     Deterministic, Thin)) {
+                                     Deterministic, Thin, IsEC)) {
     if (Error DiscardError = Temp->discard())
       return joinErrors(std::move(E), std::move(DiscardError));
     return E;
@@ -981,7 +1044,7 @@ writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab,
   raw_svector_ostream ArchiveStream(ArchiveBufferVector);
 
   if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab,
-                                     Kind, Deterministic, Thin))
+                                     Kind, Deterministic, Thin, false))
     return std::move(E);
 
   return std::make_unique<SmallVectorMemoryBuffer>(
index 9ca63bead9bc7fb87acc14e4f50a7b0842e9155e..9bde40d78fbd32e25bcde4c872da94d409e4b937 100644 (file)
@@ -168,7 +168,8 @@ static Expected<COFF::MachineTypes> getCOFFFileMachine(MemoryBufferRef MB) {
   if (Machine != COFF::IMAGE_FILE_MACHINE_I386 &&
       Machine != COFF::IMAGE_FILE_MACHINE_AMD64 &&
       Machine != COFF::IMAGE_FILE_MACHINE_ARMNT &&
-      Machine != COFF::IMAGE_FILE_MACHINE_ARM64) {
+      Machine != COFF::IMAGE_FILE_MACHINE_ARM64 &&
+      Machine != COFF::IMAGE_FILE_MACHINE_ARM64EC) {
     return createStringError(inconvertibleErrorCode(),
                              "unknown machine: " + std::to_string(Machine));
   }
@@ -181,7 +182,8 @@ static Expected<COFF::MachineTypes> getBitcodeFileMachine(MemoryBufferRef MB) {
   if (!TripleStr)
     return TripleStr.takeError();
 
-  switch (Triple(*TripleStr).getArch()) {
+  Triple T(*TripleStr);
+  switch (T.getArch()) {
   case Triple::x86:
     return COFF::IMAGE_FILE_MACHINE_I386;
   case Triple::x86_64:
@@ -189,13 +191,25 @@ static Expected<COFF::MachineTypes> getBitcodeFileMachine(MemoryBufferRef MB) {
   case Triple::arm:
     return COFF::IMAGE_FILE_MACHINE_ARMNT;
   case Triple::aarch64:
-    return COFF::IMAGE_FILE_MACHINE_ARM64;
+    return T.isWindowsArm64EC() ? COFF::IMAGE_FILE_MACHINE_ARM64EC
+                                : COFF::IMAGE_FILE_MACHINE_ARM64;
   default:
     return createStringError(inconvertibleErrorCode(),
                              "unknown arch in target triple: " + *TripleStr);
   }
 }
 
+static bool machineMatches(COFF::MachineTypes LibMachine,
+                           COFF::MachineTypes FileMachine) {
+  if (LibMachine == FileMachine)
+    return true;
+  // ARM64EC mode allows both pure ARM64, ARM64EC and X64 objects to be mixed in
+  // the archive.
+  return LibMachine == COFF::IMAGE_FILE_MACHINE_ARM64EC &&
+         (FileMachine == COFF::IMAGE_FILE_MACHINE_ARM64 ||
+          FileMachine == COFF::IMAGE_FILE_MACHINE_AMD64);
+}
+
 static void appendFile(std::vector<NewArchiveMember> &Members,
                        COFF::MachineTypes &LibMachine,
                        std::string &LibMachineSource, MemoryBufferRef MB) {
@@ -263,11 +277,18 @@ static void appendFile(std::vector<NewArchiveMember> &Members,
     // this check. See PR42180.
     if (FileMachine != COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
       if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+        if (FileMachine == COFF::IMAGE_FILE_MACHINE_ARM64EC) {
+            llvm::errs() << MB.getBufferIdentifier() << ": file machine type "
+                         << machineToStr(FileMachine)
+                         << " conflicts with inferred library machine type,"
+                         << " use /machine:arm64ec or /machine:arm64x\n";
+            exit(1);
+        }
         LibMachine = FileMachine;
         LibMachineSource =
             (" (inferred from earlier file '" + MB.getBufferIdentifier() + "')")
                 .str();
-      } else if (LibMachine != FileMachine) {
+      } else if (!machineMatches(LibMachine, FileMachine)) {
         llvm::errs() << MB.getBufferIdentifier() << ": file machine type "
                      << machineToStr(FileMachine)
                      << " conflicts with library machine type "
@@ -460,7 +481,8 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
           writeArchive(OutputPath, Members,
                        /*WriteSymtab=*/true,
                        Thin ? object::Archive::K_GNU : object::Archive::K_COFF,
-                       /*Deterministic*/ true, Thin)) {
+                       /*Deterministic*/ true, Thin, nullptr,
+                       LibMachine == COFF::IMAGE_FILE_MACHINE_ARM64EC)) {
     handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
       llvm::errs() << OutputPath << ": " << EI.message() << "\n";
     });
index 28c527b05ffe97b6d6989158ddd71ba6bcbedf71..e38545ab22d6d0a40a8fc090f218114af5ac418b 100644 (file)
@@ -15,3 +15,11 @@ Members:
   - Name:            '/<ECSYMBOLS>/'
     Size:            '0'
 ...
+
+# Check that llvm-lib produces /<ECSYMBOLS>/ members for ARM64EC libraries.
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -triple=arm64ec-pc-windows-msvc -filetype=obj -o arm64ec-foo.o %S/Inputs/a.s
+# RUN: llvm-mc -triple=aarch64-pc-windows-msvc -filetype=obj -o arm64-foo.o %S/Inputs/a.s
+# RUN: llvm-mc -triple=x86_64-pc-windows-msvc -filetype=obj -o x64-foo.o %S/Inputs/b.s
+# RUN: llvm-lib -machine:arm64ec -out:foo.lib arm64-foo.o arm64ec-foo.o x64-foo.o
+# RUN: grep -q '/<ECSYMBOLS>/' foo.lib
index 43a0f7ae75f689b4a1c59173d0244fcf05cc3580..0a6aac624bc28ab74cd94ed72c5286e5558e1014 100644 (file)
@@ -3,6 +3,8 @@ Prepare inputs:
 RUN: rm -rf %t && mkdir -p %t
 RUN: llvm-mc -triple=i386-pc-windows-msvc -filetype=obj -o %t/i386.obj %S/Inputs/a.s
 RUN: llvm-mc -triple=x86_64-pc-windows-msvc -filetype=obj -o %t/x86_64.obj %S/Inputs/a.s
+RUN: llvm-mc -triple=aarch64-pc-windows-msvc -filetype=obj -o %t/arm64.obj %S/Inputs/a.s
+RUN: llvm-mc -triple=arm64ec-pc-windows-msvc -filetype=obj -o %t/arm64ec.obj %S/Inputs/a.s
 RUN: llvm-as -o %t/i386.bc %S/Inputs/i386.ll
 RUN: llvm-as -o %t/x86_64.bc %S/Inputs/x86_64.ll
 RUN: llvm-as -o %t/arm64.bc %S/Inputs/arm64.ll
@@ -43,3 +45,19 @@ If /machine: is passed, its value is authoritative.
 RUN: not llvm-lib /machine:X86 %t/x86_64.obj %t/i386.obj 2>&1 | \
 RUN:     FileCheck --check-prefix=OBJ64 %s
 OBJ64: x86_64.obj: file machine type x64 conflicts with library machine type x86 (from '/machine:X86' flag)
+
+
+Mixing arm64 and x86_64 is possible using arm64ec:
+
+RUN: llvm-lib -machine:arm64ec %t/arm64.bc %t/x86_64.bc %t/arm64.obj %t/x86_64.obj %t/arm64ec.obj
+
+RUN: not llvm-lib %t/arm64ec.obj 2>&1 | FileCheck --check-prefix=NOEC %s
+NOEC: arm64ec.obj: file machine type arm64ec conflicts with inferred library machine type, use /machine:arm64ec or /machine:arm64x
+
+RUN: not llvm-lib -machine:arm64ec %t/arm64ec.obj %t/i386.obj 2>&1 | \
+RUN:     FileCheck --check-prefix=OBJEC %s
+OBJEC: i386.obj: file machine type x86 conflicts with library machine type arm64ec (from '/machine:arm64ec' flag)
+
+RUN: not llvm-lib -machine:arm64ec %t/arm64.bc %t/x86_64.bc %t/i386.bc 2>&1 | \
+RUN:     FileCheck --check-prefix=BCEC %s
+BCEC: i386.bc: file machine type x86 conflicts with library machine type arm64ec (from '/machine:arm64ec' flag)