[llvm-objcopy][MachO] Support indirect symbol table
authorSeiya Nuta <nuta@seiya.me>
Wed, 30 Oct 2019 06:12:17 +0000 (15:12 +0900)
committerSeiya Nuta <nuta@seiya.me>
Wed, 30 Oct 2019 06:12:22 +0000 (15:12 +0900)
Summary:
Parse the indirect symbol table and update the indexes of
symbol entries in the table in the writer in case they have
been changed.

Reviewers: alexshap, rupprecht, jhenderson

Reviewed By: alexshap, rupprecht

Subscribers: jakehehrlich, abrachet, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D66280

llvm/test/tools/llvm-objcopy/MachO/indirect-symbol-table-copy.s [new file with mode: 0644]
llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
llvm/tools/llvm-objcopy/MachO/Object.h

diff --git a/llvm/test/tools/llvm-objcopy/MachO/indirect-symbol-table-copy.s b/llvm/test/tools/llvm-objcopy/MachO/indirect-symbol-table-copy.s
new file mode 100644 (file)
index 0000000..e1a0b18
--- /dev/null
@@ -0,0 +1,64 @@
+## Show that llvm-objcopy copies the indirect symbol table properly.
+# RUN: llvm-mc -assemble -triple x86_64-apple-darwin9 -filetype=obj %s -o %t
+# RUN: llvm-objcopy %t %t.copy
+# RUN: llvm-readobj --symbols --macho-indirect-symbols %t.copy \
+# RUN:   | FileCheck %s
+
+# __DATA,__nl_symbol_ptr
+.non_lazy_symbol_pointer
+bar:
+        .long 0
+baz:
+        .long 0
+
+.indirect_symbol bar
+
+# __DATA,__la_symbol_ptr
+.lazy_symbol_pointer
+foo:
+        .long 0
+
+.indirect_symbol foo
+
+# CHECK:      Symbols [
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: bar (5)
+# CHECK-NEXT:     Type: Section (0xE)
+# CHECK-NEXT:     Section: __nl_symbol_ptr (0x2)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x0
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: baz (1)
+# CHECK-NEXT:     Type: Section (0xE)
+# CHECK-NEXT:     Section: __nl_symbol_ptr (0x2)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x4
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: foo (9)
+# CHECK-NEXT:     Type: Section (0xE)
+# CHECK-NEXT:     Section: __la_symbol_ptr (0x3)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x8
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+# CHECK-NEXT: Indirect Symbols {
+# CHECK-NEXT:   Number: 2
+# CHECK-NEXT:   Symbols [
+# CHECK-NEXT:     Entry {
+# CHECK-NEXT:       Entry Index: 0
+# CHECK-NEXT:       Symbol Index: 0x80000000
+# CHECK-NEXT:     }
+# CHECK-NEXT:     Entry {
+# CHECK-NEXT:       Entry Index: 1
+# CHECK-NEXT:       Symbol Index: 0x2
+# CHECK-NEXT:     }
+# CHECK-NEXT:   ]
+# CHECK-NEXT: }
index ed2b3eb..7e2ebdc 100644 (file)
@@ -256,9 +256,16 @@ void MachOReader::readFunctionStartsData(Object &O) const {
 
 void MachOReader::readIndirectSymbolTable(Object &O) const {
   MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
-  for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i)
-    O.IndirectSymTable.Symbols.push_back(
-        MachOObj.getIndirectSymbolTableEntry(DySymTab, i));
+  constexpr uint32_t AbsOrLocalMask =
+      MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
+  for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
+    uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);
+    if ((Index & AbsOrLocalMask) != 0)
+      O.IndirectSymTable.Symbols.emplace_back(Index, None);
+    else
+      O.IndirectSymTable.Symbols.emplace_back(
+          Index, O.SymTable.getSymbolByIndex(Index));
+  }
 }
 
 std::unique_ptr<Object> MachOReader::create() const {
index 4ec91cc..59d57f7 100644 (file)
@@ -369,11 +369,14 @@ void MachOWriter::writeIndirectSymbolTable() {
       O.LoadCommands[*O.DySymTabCommandIndex]
           .MachOLoadCommand.dysymtab_command_data;
 
-  char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff;
-  assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) &&
-         "Incorrect indirect symbol table size");
-  memcpy(Out, O.IndirectSymTable.Symbols.data(),
-         sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
+  uint32_t *Out =
+      (uint32_t *)(B.getBufferStart() + DySymTabCommand.indirectsymoff);
+  for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) {
+    uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex;
+    if (IsLittleEndian != sys::IsLittleEndianHost)
+      sys::swapByteOrder(Entry);
+    *Out++ = Entry;
+  }
 }
 
 void MachOWriter::writeDataInCodeData() {
index 36b0f7e..bf80253 100644 (file)
@@ -112,8 +112,21 @@ struct SymbolTable {
   const SymbolEntry *getSymbolByIndex(uint32_t Index) const;
 };
 
+struct IndirectSymbolEntry {
+  // The original value in an indirect symbol table. Higher bits encode extra
+  // information (INDIRECT_SYMBOL_LOCAL and INDIRECT_SYMBOL_ABS).
+  uint32_t OriginalIndex;
+  /// The Symbol referenced by this entry. It's None if the index is
+  /// INDIRECT_SYMBOL_LOCAL or INDIRECT_SYMBOL_ABS.
+  Optional<const SymbolEntry *> Symbol;
+
+  IndirectSymbolEntry(uint32_t OriginalIndex,
+                      Optional<const SymbolEntry *> Symbol)
+      : OriginalIndex(OriginalIndex), Symbol(Symbol) {}
+};
+
 struct IndirectSymbolTable {
-  std::vector<uint32_t> Symbols;
+  std::vector<IndirectSymbolEntry> Symbols;
 };
 
 /// The location of the string table inside the binary is described by LC_SYMTAB