[llvm-objcopy] [COFF] Fix handling of aux symbols for big objects

author Martin Storsjo <martin@martin.st>

Wed, 23 Jan 2019 11:54:51 +0000 (11:54 +0000)

committer Martin Storsjo <martin@martin.st>

Wed, 23 Jan 2019 11:54:51 +0000 (11:54 +0000)
author Martin Storsjo <martin@martin.st>
Wed, 23 Jan 2019 11:54:51 +0000 (11:54 +0000)
committer Martin Storsjo <martin@martin.st>
Wed, 23 Jan 2019 11:54:51 +0000 (11:54 +0000)
diff --git a/llvm/test/tools/llvm-objcopy/COFF/Inputs/bigobj.o.gz b/llvm/test/tools/llvm-objcopy/COFF/Inputs/bigobj.o.gz

new file mode 100644 (file)

index 0000000..6435f47

Binary files /dev/null and b/llvm/test/tools/llvm-objcopy/COFF/Inputs/bigobj.o.gz differ
diff --git a/llvm/test/tools/llvm-objcopy/COFF/bigobj.test b/llvm/test/tools/llvm-objcopy/COFF/bigobj.test

new file mode 100644 (file)

index 0000000..17968f1
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/COFF/bigobj.test
@@ -0,0 +1,35 @@
+RUN: %python %p/../Inputs/ungzip.py %p/Inputs/bigobj.o.gz > %t.in.o
+
+RUN: llvm-objdump -t %t.in.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-BIG,SYMBOLS-ORIG
+
+# Do a plain copy, to check that section numbers in symbols referring
+# to sections outside of the small object format are handled correctly.
+RUN: llvm-objcopy -R '.text$4' %t.in.o %t.small.o
+RUN: llvm-objdump -t %t.in.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-BIG,SYMBOLS-ORIG
+
+# Remove a section, making the section count fit into a small object.
+RUN: llvm-objcopy -R '.text$4' %t.in.o %t.small.o
+RUN: llvm-objdump -t %t.small.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-SMALL,SYMBOLS-REMOVED-SMALL
+
+# Add a .gnu_debuglink section, forcing the object back to big format.
+RUN: llvm-objcopy --add-gnu-debuglink=%t.in.o %t.small.o %t.big.o
+ llvm-objdump -t %t.big.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-BIG,SYMBOLS-REMOVED-BIG
+
+# In big object format, the .file symbol occupies one symbol table entry for
+# the auxillary data, but needs two entries in the small format, forcing the
+# raw symbol indices of later symbols to change.
+SYMBOLS:            SYMBOL TABLE:
+SYMBOLS-NEXT:       [ 0]{{.*}} (nx 1) {{.*}} .text
+SYMBOLS-NEXT:       AUX scnlen
+SYMBOLS-SMALL-NEXT: [ 2]{{.*}} (nx 2) {{.*}} .file
+SYMBOLS-BIG-NEXT:   [ 2]{{.*}} (nx 1) {{.*}} .file
+SYMBOLS-NEXT:       AUX abcdefghijklmnopqrs
+SYMBOLS-SMALL-NEXT: [ 5]{{.*}} (nx 0) {{.*}} foo
+SYMBOLS-BIG-NEXT:   [ 4]{{.*}} (nx 0) {{.*}} foo
+
+# Check that the section numbers outside of signed 16 bit int range
+# are represented properly. After removing one section, the section
+# numbers decrease.
+SYMBOLS-ORIG:          [ 5](sec 65280){{.*}} symbol65280
+SYMBOLS-REMOVED-SMALL: [ 6](sec 65279){{.*}} symbol65280
+SYMBOLS-REMOVED-BIG:   [ 5](sec 65279){{.*}} symbol65280
diff --git a/llvm/test/tools/llvm-objcopy/ELF/Inputs/ungzip.py b/llvm/test/tools/llvm-objcopy/ELF/Inputs/ungzip.py

deleted file mode 100644 (file)

index c7b1de9..0000000
--- a/llvm/test/tools/llvm-objcopy/ELF/Inputs/ungzip.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import gzip
-import sys
-
-with gzip.open(sys.argv[1], 'rb') as f:
-  writer = getattr(sys.stdout, 'buffer', None)
-  if writer is None:
-    writer = sys.stdout
-    if sys.platform == "win32":
-      import os, msvcrt
-      msvcrt.setmode(sys.stdout.fileno(),os.O_BINARY)
-
-  writer.write(f.read())
-  sys.stdout.flush()
diff --git a/llvm/test/tools/llvm-objcopy/ELF/auto-remove-shndx.test b/llvm/test/tools/llvm-objcopy/ELF/auto-remove-shndx.test

index 5a23493fa94181af8aa60799169fd01bc923671c..8e6c788bf4817da989d7b6df4271478c6007e8ad 100644 (file)
--- a/llvm/test/tools/llvm-objcopy/ELF/auto-remove-shndx.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/auto-remove-shndx.test
@@ -1,4 +1,4 @@
-# RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
+# RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
  # RUN: llvm-objcopy -R .text -R s0 -R s1 -R s2 -R s3 -R s4 -R s5 -R s6 %t %t2
  # RUN: llvm-readobj --sections %t2 | FileCheck --check-prefix=SECS %s
  
diff --git a/llvm/test/tools/llvm-objcopy/ELF/many-sections.test b/llvm/test/tools/llvm-objcopy/ELF/many-sections.test

index 57239f32e4ad90b8a8ce198c9e69830542c4e0c4..1dd41cfb10c3744b15f1b2ba490cd3dceaf09154 100644 (file)
--- a/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
@@ -1,4 +1,4 @@
-RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
+RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
  RUN: llvm-objcopy %t %t2
  RUN: llvm-readobj --file-headers %t2 | FileCheck --check-prefix=EHDR %s
  RUN: llvm-readobj --sections %t2 | FileCheck --check-prefix=SECS %s
diff --git a/llvm/test/tools/llvm-objcopy/ELF/remove-shndx.test b/llvm/test/tools/llvm-objcopy/ELF/remove-shndx.test

index 6cc3a1a291fba7a9da53eeedb889e4c81054a3a5..53ca8e7f220193095fdf6f7b5c74b8a481be6008 100644 (file)
--- a/llvm/test/tools/llvm-objcopy/ELF/remove-shndx.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/remove-shndx.test
@@ -1,6 +1,6 @@
  # This test checks to see that a .symtab_shndx section is added to any binary
  # that needs it, even if the original was removed.
-RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
+RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
  RUN: llvm-objcopy -R .symtab_shndx %t %t2
  RUN: llvm-readobj --sections %t2 | FileCheck %s
  
diff --git a/llvm/test/tools/llvm-objcopy/ELF/strict-no-add.test b/llvm/test/tools/llvm-objcopy/ELF/strict-no-add.test

index 4f24df31bf97fb3649931636d9eac0e7ad566049..348ab7c4fbd7fef200ab77349fb503bfa5dc4236 100644 (file)
--- a/llvm/test/tools/llvm-objcopy/ELF/strict-no-add.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/strict-no-add.test
@@ -1,7 +1,7 @@
  # This test makes sure that sections added at the end that don't have symbols
  # defined in them don't trigger the creation of a large index table.
  
-RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t.0
+RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t.0
  RUN: cat %p/Inputs/alloc-symtab.o > %t
  RUN: llvm-objcopy -R .text -R s0 -R s1 -R s2 -R s3 -R s4 -R s5 -R s6 %t.0 %t2
  RUN: llvm-objcopy --add-section=.s0=%t --add-section=.s1=%t --add-section=.s2=%t %t2 %t2
diff --git a/llvm/test/tools/llvm-objcopy/Inputs/ungzip.py b/llvm/test/tools/llvm-objcopy/Inputs/ungzip.py

new file mode 100644 (file)

index 0000000..c7b1de9
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/Inputs/ungzip.py
@@ -0,0 +1,13 @@
+import gzip
+import sys
+
+with gzip.open(sys.argv[1], 'rb') as f:
+  writer = getattr(sys.stdout, 'buffer', None)
+  if writer is None:
+    writer = sys.stdout
+    if sys.platform == "win32":
+      import os, msvcrt
+      msvcrt.setmode(sys.stdout.fileno(),os.O_BINARY)
+
+  writer.write(f.read())
+  sys.stdout.flush()
diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp

index 20adbe11e7a8d1131c7cd74928ce1c032b9407e5..64b4e79a4e02a327e485afe37e9c9a47ed615010 100644 (file)
--- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -37,7 +37,7 @@ static uint64_t getNextRVA(const Object &Obj) {
      return 0;
    const Section &Last = Obj.getSections().back();
    return alignTo(Last.Header.VirtualAddress + Last.Header.VirtualSize,
-                 Obj.PeHeader.SectionAlignment);
+                 Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1);
  }
  
  static uint32_t getCRC32(StringRef Data) {
@@ -74,8 +74,8 @@ static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) {
    Sec.Name = ".gnu_debuglink";
    Sec.Header.VirtualSize = Sec.getContents().size();
    Sec.Header.VirtualAddress = StartRVA;
-  Sec.Header.SizeOfRawData =
-      alignTo(Sec.Header.VirtualSize, Obj.PeHeader.FileAlignment);
+  Sec.Header.SizeOfRawData = alignTo(Sec.Header.VirtualSize,
+                                     Obj.IsPE ? Obj.PeHeader.FileAlignment : 1);
    // Sec.Header.PointerToRawData is filled in by the writer.
    Sec.Header.PointerToRelocations = 0;
    Sec.Header.PointerToLinenumbers = 0;
diff --git a/llvm/tools/llvm-objcopy/COFF/Object.cpp b/llvm/tools/llvm-objcopy/COFF/Object.cpp

index 8c382c1faef7efd27edf4449e3df1704456e8072..0ad5a05a14441f3aea0beb089648c25fca1830e1 100644 (file)
--- a/llvm/tools/llvm-objcopy/COFF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Object.cpp
@@ -26,12 +26,8 @@ void Object::addSymbols(ArrayRef<Symbol> NewSymbols) {
  
  void Object::updateSymbols() {
    SymbolMap = DenseMap<size_t, Symbol *>(Symbols.size());
-  size_t RawSymIndex = 0;
-  for (Symbol &Sym : Symbols) {
+  for (Symbol &Sym : Symbols)
      SymbolMap[Sym.UniqueId] = &Sym;
-    Sym.RawIndex = RawSymIndex;
-    RawSymIndex += 1 + Sym.Sym.NumberOfAuxSymbols;
-  }
  }
  
  const Symbol *Object::findSymbol(size_t UniqueId) const {
diff --git a/llvm/tools/llvm-objcopy/COFF/Object.h b/llvm/tools/llvm-objcopy/COFF/Object.h

index afa272286ef7609c6d651386cf605ae5b9de4075..21475b068629c24705d6a3150b13e1b41bec57f4 100644 (file)
--- a/llvm/tools/llvm-objcopy/COFF/Object.h
+++ b/llvm/tools/llvm-objcopy/COFF/Object.h
@@ -66,10 +66,24 @@ private:
    std::vector<uint8_t> OwnedContents;
  };
  
+struct AuxSymbol {
+  AuxSymbol(ArrayRef<uint8_t> In) {
+    assert(In.size() == sizeof(Opaque));
+    std::copy(In.begin(), In.end(), Opaque);
+  }
+
+  ArrayRef<uint8_t> getRef() const {
+    return ArrayRef<uint8_t>(Opaque, sizeof(Opaque));
+  }
+
+  uint8_t Opaque[sizeof(object::coff_symbol16)];
+};
+
  struct Symbol {
    object::coff_symbol32 Sym;
    StringRef Name;
-  std::vector<uint8_t> AuxData;
+  std::vector<AuxSymbol> AuxData;
+  StringRef AuxFile;
    ssize_t TargetSectionId;
    ssize_t AssociativeComdatTargetSectionId = 0;
    Optional<size_t> WeakTargetSymbolId;
@@ -132,7 +146,7 @@ private:
  
    ssize_t NextSectionUniqueId = 1; // Allow a UniqueId 0 to mean undefined.
  
-  // Update SymbolMap and RawIndex in each Symbol.
+  // Update SymbolMap.
    void updateSymbols();
  
    // Update SectionMap and Index in each Section.
diff --git a/llvm/tools/llvm-objcopy/COFF/Reader.cpp b/llvm/tools/llvm-objcopy/COFF/Reader.cpp

index 87dd60a43cf3c06ded3930228f07b7c2562a298e..7270bbf94de6f9d58707a13fae222d6859b82382 100644 (file)
--- a/llvm/tools/llvm-objcopy/COFF/Reader.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Reader.cpp
@@ -107,9 +107,24 @@ Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
                   *reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr()));
      if (auto EC = COFFObj.getSymbolName(SymRef, Sym.Name))
        return errorCodeToError(EC);
-    Sym.AuxData = COFFObj.getSymbolAuxData(SymRef);
-    assert((Sym.AuxData.size() %
-            (IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16))) == 0);
+
+    ArrayRef<uint8_t> AuxData = COFFObj.getSymbolAuxData(SymRef);
+    size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16);
+    assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols());
+    // The auxillary symbols are structs of sizeof(coff_symbol16) each.
+    // In the big object format (where symbols are coff_symbol32), each
+    // auxillary symbol is padded with 2 bytes at the end. Copy each
+    // auxillary symbol to the Sym.AuxData vector. For file symbols,
+    // the whole range of aux symbols are interpreted as one null padded
+    // string instead.
+    if (SymRef.isFileRecord())
+      Sym.AuxFile = StringRef(reinterpret_cast<const char *>(AuxData.data()),
+                              AuxData.size())
+                        .rtrim('\0');
+    else
+      for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++)
+        Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol)));
+
      // Find the unique id of the section
      if (SymRef.getSectionNumber() <=
          0) // Special symbol (undefined/absolute/debug)
diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/llvm/tools/llvm-objcopy/COFF/Writer.cpp

index db897e2ff334ccbc19b3102c6953f8ea939d5344..6e69c597217970215fe40b046fed3bbc18b93bdd 100644 (file)
--- a/llvm/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Writer.cpp
@@ -55,7 +55,8 @@ Error COFFWriter::finalizeSymbolContents() {
        if (Sym.Sym.NumberOfAuxSymbols == 1 &&
            Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) {
          coff_aux_section_definition *SD =
-            reinterpret_cast<coff_aux_section_definition *>(Sym.AuxData.data());
+            reinterpret_cast<coff_aux_section_definition *>(
+                Sym.AuxData[0].Opaque);
          uint32_t SDSectionNumber;
          if (Sym.AssociativeComdatTargetSectionId == 0) {
            // Not a comdat associative section; just set the Number field to
@@ -79,7 +80,7 @@ Error COFFWriter::finalizeSymbolContents() {
      // we want to set. Only >= 1 would be required, but only == 1 makes sense.
      if (Sym.WeakTargetSymbolId && Sym.Sym.NumberOfAuxSymbols == 1) {
        coff_aux_weak_external *WE =
-          reinterpret_cast<coff_aux_weak_external *>(Sym.AuxData.data());
+          reinterpret_cast<coff_aux_weak_external *>(Sym.AuxData[0].Opaque);
        const Symbol *Target = Obj.findSymbol(*Sym.WeakTargetSymbolId);
        if (Target == nullptr)
          return createStringError(object_error::invalid_symbol_index,
@@ -141,13 +142,26 @@ size_t COFFWriter::finalizeStringTable() {
  
  template <class SymbolTy>
  std::pair<size_t, size_t> COFFWriter::finalizeSymbolTable() {
-  size_t SymTabSize = Obj.getSymbols().size() * sizeof(SymbolTy);
-  for (const auto &S : Obj.getSymbols())
-    SymTabSize += S.AuxData.size();
-  return std::make_pair(SymTabSize, sizeof(SymbolTy));
+  size_t RawSymIndex = 0;
+  for (auto &S : Obj.getMutableSymbols()) {
+    // Symbols normally have NumberOfAuxSymbols set correctly all the time.
+    // For file symbols, we need to know the output file's symbol size to be
+    // able to calculate the number of slots it occupies.
+    if (!S.AuxFile.empty())
+      S.Sym.NumberOfAuxSymbols =
+          alignTo(S.AuxFile.size(), sizeof(SymbolTy)) / sizeof(SymbolTy);
+    S.RawIndex = RawSymIndex;
+    RawSymIndex += 1 + S.Sym.NumberOfAuxSymbols;
+  }
+  return std::make_pair(RawSymIndex * sizeof(SymbolTy), sizeof(SymbolTy));
  }
  
  Error COFFWriter::finalize(bool IsBigObj) {
+  size_t SymTabSize, SymbolSize;
+  std::tie(SymTabSize, SymbolSize) = IsBigObj
+                                         ? finalizeSymbolTable<coff_symbol32>()
+                                         : finalizeSymbolTable<coff_symbol16>();
+
    if (Error E = finalizeRelocTargets())
      return E;
    if (Error E = finalizeSymbolContents())
@@ -199,10 +213,6 @@ Error COFFWriter::finalize(bool IsBigObj) {
    }
  
    size_t StrTabSize = finalizeStringTable();
-  size_t SymTabSize, SymbolSize;
-  std::tie(SymTabSize, SymbolSize) = IsBigObj
-                                         ? finalizeSymbolTable<coff_symbol32>()
-                                         : finalizeSymbolTable<coff_symbol16>();
  
    size_t PointerToSymbolTable = FileSize;
    // StrTabSize <= 4 is the size of an empty string table, only consisting
@@ -312,8 +322,23 @@ template <class SymbolTy> void COFFWriter::writeSymbolStringTables() {
      copySymbol<SymbolTy, coff_symbol32>(*reinterpret_cast<SymbolTy *>(Ptr),
                                          S.Sym);
      Ptr += sizeof(SymbolTy);
-    std::copy(S.AuxData.begin(), S.AuxData.end(), Ptr);
-    Ptr += S.AuxData.size();
+    if (!S.AuxFile.empty()) {
+      // For file symbols, just write the string into the aux symbol slots,
+      // assuming that the unwritten parts are initialized to zero in the memory
+      // mapped file.
+      std::copy(S.AuxFile.begin(), S.AuxFile.end(), Ptr);
+      Ptr += S.Sym.NumberOfAuxSymbols * sizeof(SymbolTy);
+    } else {
+      // For other auxillary symbols, write their opaque payload into one symbol
+      // table slot each. For big object files, the symbols are larger than the
+      // opaque auxillary symbol struct and we leave padding at the end of each
+      // entry.
+      for (const AuxSymbol &AuxSym : S.AuxData) {
+        ArrayRef<uint8_t> Ref = AuxSym.getRef();
+        std::copy(Ref.begin(), Ref.end(), Ptr);
+        Ptr += sizeof(SymbolTy);
+      }
+    }
    }
    if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) {
      // Always write a string table in object files, even an empty one.
diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.h b/llvm/tools/llvm-objcopy/COFF/Writer.h

index 9b1cfa91d00e8437c8f2ad17117af2c3f1163028..681a8d5e4a66cb7a2774a368504c5bed11502364 100644 (file)
--- a/llvm/tools/llvm-objcopy/COFF/Writer.h
+++ b/llvm/tools/llvm-objcopy/COFF/Writer.h
@@ -30,11 +30,11 @@ class COFFWriter {
    size_t SizeOfInitializedData;
    StringTableBuilder StrTabBuilder;
  
+  template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
    Error finalizeRelocTargets();
    Error finalizeSymbolContents();
    void layoutSections();
    size_t finalizeStringTable();
-  template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
  
    Error finalize(bool IsBigObj);
author	Martin Storsjo <martin@martin.st>
	Wed, 23 Jan 2019 11:54:51 +0000 (11:54 +0000)
committer	Martin Storsjo <martin@martin.st>
	Wed, 23 Jan 2019 11:54:51 +0000 (11:54 +0000)
llvm/test/tools/llvm-objcopy/COFF/Inputs/bigobj.o.gz	[new file with mode: 0644]	patch \| blob
llvm/test/tools/llvm-objcopy/COFF/bigobj.test	[new file with mode: 0644]	patch \| blob
llvm/test/tools/llvm-objcopy/ELF/Inputs/ungzip.py	[deleted file]	patch \| blob \| history
llvm/test/tools/llvm-objcopy/ELF/auto-remove-shndx.test		patch \| blob \| history
llvm/test/tools/llvm-objcopy/ELF/many-sections.test		patch \| blob \| history
llvm/test/tools/llvm-objcopy/ELF/remove-shndx.test		patch \| blob \| history
llvm/test/tools/llvm-objcopy/ELF/strict-no-add.test		patch \| blob \| history
llvm/test/tools/llvm-objcopy/Inputs/ungzip.py	[new file with mode: 0644]	patch \| blob
llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp		patch \| blob \| history
llvm/tools/llvm-objcopy/COFF/Object.cpp		patch \| blob \| history
llvm/tools/llvm-objcopy/COFF/Object.h		patch \| blob \| history
llvm/tools/llvm-objcopy/COFF/Reader.cpp		patch \| blob \| history
llvm/tools/llvm-objcopy/COFF/Writer.cpp		patch \| blob \| history
llvm/tools/llvm-objcopy/COFF/Writer.h		patch \| blob \| history