[lld-macho] Ensure reads from nlist_64 structs are aligned when necessary
authorJez Ng <jezng@fb.com>
Thu, 21 May 2020 22:26:35 +0000 (15:26 -0700)
committerJez Ng <jezng@fb.com>
Tue, 2 Jun 2020 20:19:38 +0000 (13:19 -0700)
My test refactoring in D80217 seems to have caused yaml2obj to emit
unaligned nlist_64 structs, causing ASAN'd lld to be unhappy. I don't
think this is an issue with yaml2obj though -- llvm-mc also seems to
emit unaligned nlist_64s. This diff makes lld able to safely do aligned
reads under ASAN builds while hopefully creating no overhead for regular
builds on architectures that support unaligned reads.

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D80414

lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/MachOStructs.h [new file with mode: 0644]
lld/MachO/SyntheticSections.cpp
lld/MachO/Writer.cpp

index 50ec84f..321e1ca 100644 (file)
@@ -45,6 +45,7 @@
 #include "Config.h"
 #include "ExportTrie.h"
 #include "InputSection.h"
+#include "MachOStructs.h"
 #include "OutputSection.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
@@ -211,14 +212,14 @@ void InputFile::parseRelocations(const section_64 &sec,
   }
 }
 
-void InputFile::parseSymbols(ArrayRef<nlist_64> nList, const char *strtab,
-                             bool subsectionsViaSymbols) {
+void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
+                             const char *strtab, bool subsectionsViaSymbols) {
   // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
   // out-of-sequence.
   symbols.resize(nList.size());
   std::vector<size_t> altEntrySymIdxs;
 
-  auto createDefined = [&](const nlist_64 &sym, InputSection *isec,
+  auto createDefined = [&](const structs::nlist_64 &sym, InputSection *isec,
                            uint32_t value) -> Symbol * {
     StringRef name = strtab + sym.n_strx;
     if (sym.n_type & N_EXT)
@@ -230,7 +231,7 @@ void InputFile::parseSymbols(ArrayRef<nlist_64> nList, const char *strtab,
   };
 
   for (size_t i = 0, n = nList.size(); i < n; ++i) {
-    const nlist_64 &sym = nList[i];
+    const structs::nlist_64 &sym = nList[i];
 
     // Undefined symbol
     if (!sym.n_sect) {
@@ -289,7 +290,7 @@ void InputFile::parseSymbols(ArrayRef<nlist_64> nList, const char *strtab,
   }
 
   for (size_t idx : altEntrySymIdxs) {
-    const nlist_64 &sym = nList[idx];
+    const structs::nlist_64 &sym = nList[idx];
     SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
     uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr;
     InputSection *subsec = findContainingSubsection(subsecMap, &off);
@@ -311,8 +312,8 @@ ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) {
   // TODO: Error on missing LC_SYMTAB?
   if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
     auto *c = reinterpret_cast<const symtab_command *>(cmd);
-    ArrayRef<nlist_64> nList(
-        reinterpret_cast<const nlist_64 *>(buf + c->symoff), c->nsyms);
+    ArrayRef<structs::nlist_64> nList(
+        reinterpret_cast<const structs::nlist_64 *>(buf + c->symoff), c->nsyms);
     const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
     bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
     parseSymbols(nList, strtab, subsectionsViaSymbols);
index 200ec85..ffe0c2e 100644 (file)
@@ -9,6 +9,8 @@
 #ifndef LLD_MACHO_INPUT_FILES_H
 #define LLD_MACHO_INPUT_FILES_H
 
+#include "MachOStructs.h"
+
 #include "lld/Common/LLVM.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/BinaryFormat/MachO.h"
@@ -52,7 +54,7 @@ protected:
 
   void parseSections(ArrayRef<llvm::MachO::section_64>);
 
-  void parseSymbols(ArrayRef<llvm::MachO::nlist_64> nList, const char *strtab,
+  void parseSymbols(ArrayRef<lld::structs::nlist_64> nList, const char *strtab,
                     bool subsectionsViaSymbols);
 
   void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &);
diff --git a/lld/MachO/MachOStructs.h b/lld/MachO/MachOStructs.h
new file mode 100644 (file)
index 0000000..69b50ec
--- /dev/null
@@ -0,0 +1,36 @@
+//===- MachOStructs.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines structures used in the MachO object file format. Note that
+// unlike llvm/BinaryFormat/MachO.h, the structs here are defined in terms of
+// endian- and alignment-compatibility wrappers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_MACHO_STRUCTS_H
+#define LLD_MACHO_MACHO_STRUCTS_H
+
+#include "llvm/Support/Endian.h"
+
+namespace lld {
+
+namespace structs {
+
+struct nlist_64 {
+  llvm::support::ulittle32_t n_strx;
+  uint8_t n_type;
+  uint8_t n_sect;
+  llvm::support::ulittle16_t n_desc;
+  llvm::support::ulittle64_t n_value;
+};
+
+} // namespace structs
+
+} // namespace lld
+
+#endif
index 26fa192..af1c181 100644 (file)
@@ -10,6 +10,7 @@
 #include "Config.h"
 #include "ExportTrie.h"
 #include "InputFiles.h"
+#include "MachOStructs.h"
 #include "OutputSegment.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
@@ -281,7 +282,7 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
 }
 
 size_t SymtabSection::getSize() const {
-  return symbols.size() * sizeof(nlist_64);
+  return symbols.size() * sizeof(structs::nlist_64);
 }
 
 void SymtabSection::finalizeContents() {
@@ -292,7 +293,7 @@ void SymtabSection::finalizeContents() {
 }
 
 void SymtabSection::writeTo(uint8_t *buf) const {
-  auto *nList = reinterpret_cast<nlist_64 *>(buf);
+  auto *nList = reinterpret_cast<structs::nlist_64 *>(buf);
   for (const SymtabEntry &entry : symbols) {
     nList->n_strx = entry.strx;
     // TODO support other symbol types
index e93cb80..7689661 100644 (file)
@@ -414,8 +414,6 @@ void Writer::assignAddresses(OutputSegment *seg) {
   for (auto &p : seg->getSections()) {
     OutputSection *section = p.second;
     addr = alignTo(addr, section->align);
-    // We must align the file offsets too to avoid misaligned writes of
-    // structs.
     fileOff = alignTo(fileOff, section->align);
     section->addr = addr;
     section->fileOff = fileOff;