Extend obj2yaml to optionally preserve raw __LINKEDIT/__DATA segments.
authorAdrian Prantl <aprantl@apple.com>
Mon, 8 Nov 2021 19:29:04 +0000 (11:29 -0800)
committerAdrian Prantl <aprantl@apple.com>
Mon, 8 Nov 2021 19:30:12 +0000 (11:30 -0800)
I am planning to upstream MachOObjectFile code to support Darwin
chained fixups. In order to test the new parser features we need a way
to produce correct (and incorrect) chained fixups. Right now the only
tool that can produce them is the Darwin linker. To avoid having to
check in binary files, this patch allows obj2yaml to print a hexdump
of the raw LINKEDIT and DATA segment, which both allows to
bootstrap the parser and enables us to easily create malformed inputs
to test error handling in the parser.

This patch adds two new options to obj2yaml:

  -raw-data-segment
  -raw-linkedit-segment

Differential Revision: https://reviews.llvm.org/D113234

llvm/include/llvm/Object/MachO.h
llvm/include/llvm/ObjectYAML/MachOYAML.h
llvm/lib/Object/MachOObjectFile.cpp
llvm/lib/ObjectYAML/MachOEmitter.cpp
llvm/lib/ObjectYAML/MachOYAML.cpp
llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml [new file with mode: 0644]
llvm/tools/obj2yaml/macho2yaml.cpp
llvm/tools/obj2yaml/obj2yaml.cpp
llvm/tools/obj2yaml/obj2yaml.h

index d2ad12e..ca5d63e 100644 (file)
@@ -311,6 +311,9 @@ public:
   bool isSectionBitcode(DataRefImpl Sec) const override;
   bool isDebugSection(DataRefImpl Sec) const override;
 
+  /// Return the raw contents of an entire segment.
+  ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const;
+
   /// When dsymutil generates the companion file, it strips all unnecessary
   /// sections (e.g. everything in the _TEXT segment) by omitting their body
   /// and setting the offset in their corresponding load command to zero.
index 5d1d3ee..ee89f4e 100644 (file)
@@ -131,6 +131,7 @@ struct Object {
   std::vector<LoadCommand> LoadCommands;
   std::vector<Section> Sections;
   LinkEditData LinkEdit;
+  Optional<llvm::yaml::BinaryRef> RawLinkEditSegment;
   DWARFYAML::Data DWARF;
 };
 
index e84defb..7501661 100644 (file)
@@ -2048,6 +2048,46 @@ bool MachOObjectFile::isDebugSection(DataRefImpl Sec) const {
          SectionName == "__swift_ast";
 }
 
+namespace {
+template <typename LoadCommandType>
+ArrayRef<uint8_t> getSegmentContents(const MachOObjectFile &Obj,
+                                     MachOObjectFile::LoadCommandInfo LoadCmd,
+                                     StringRef SegmentName) {
+  auto SegmentOrErr = getStructOrErr<LoadCommandType>(Obj, LoadCmd.Ptr);
+  if (!SegmentOrErr) {
+    consumeError(SegmentOrErr.takeError());
+    return {};
+  }
+  auto &Segment = SegmentOrErr.get();
+  if (StringRef(Segment.segname, 16).startswith(SegmentName))
+    return arrayRefFromStringRef(Obj.getData().slice(
+        Segment.fileoff, Segment.fileoff + Segment.filesize));
+  return {};
+}
+} // namespace
+
+ArrayRef<uint8_t>
+MachOObjectFile::getSegmentContents(StringRef SegmentName) const {
+  for (auto LoadCmd : load_commands()) {
+    ArrayRef<uint8_t> Contents;
+    switch (LoadCmd.C.cmd) {
+    case MachO::LC_SEGMENT:
+      Contents = ::getSegmentContents<MachO::segment_command>(*this, LoadCmd,
+                                                              SegmentName);
+      break;
+    case MachO::LC_SEGMENT_64:
+      Contents = ::getSegmentContents<MachO::segment_command_64>(*this, LoadCmd,
+                                                                 SegmentName);
+      break;
+    default:
+      continue;
+    }
+    if (!Contents.empty())
+      return Contents;
+  }
+  return {};
+}
+
 unsigned MachOObjectFile::getSectionID(SectionRef Sec) const {
   return Sec.getRawDataRefImpl().d.a;
 }
index 63179ae..c653c29 100644 (file)
@@ -288,6 +288,7 @@ void MachOWriter::writeLoadCommands(raw_ostream &OS) {
 }
 
 Error MachOWriter::writeSectionData(raw_ostream &OS) {
+  uint64_t LinkEditOff = 0;
   for (auto &LC : Obj.LoadCommands) {
     switch (LC.Data.load_command_data.cmd) {
     case MachO::LC_SEGMENT:
@@ -297,6 +298,9 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
       if (0 ==
           strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) {
         FoundLinkEditSeg = true;
+        LinkEditOff = segOff;
+        if (Obj.RawLinkEditSegment)
+          continue;
         writeLinkEditData(OS);
       }
       for (auto &Sec : LC.Sections) {
@@ -344,6 +348,13 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
     }
   }
 
+  if (Obj.RawLinkEditSegment) {
+    ZeroToOffset(OS, LinkEditOff);
+    if (OS.tell() - fileStart > LinkEditOff || !LinkEditOff)
+      return createStringError(errc::invalid_argument,
+                               "section offsets don't line up");
+    Obj.RawLinkEditSegment->writeAsBinary(OS);
+  }
   return Error::success();
 }
 
index 757e46c..c9562bd 100644 (file)
@@ -110,6 +110,9 @@ void MappingTraits<MachOYAML::Object>::mapping(IO &IO,
   Object.DWARF.Is64BitAddrSize = Object.Header.magic == MachO::MH_MAGIC_64 ||
                                  Object.Header.magic == MachO::MH_CIGAM_64;
   IO.mapOptional("LoadCommands", Object.LoadCommands);
+
+  if (Object.RawLinkEditSegment || !IO.outputting())
+    IO.mapOptional("__LINKEDIT", Object.RawLinkEditSegment);
   if(!Object.LinkEdit.isEmpty() || !IO.outputting())
     IO.mapOptional("LinkEditData", Object.LinkEdit);
 
diff --git a/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml b/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml
new file mode 100644 (file)
index 0000000..b9d0cae
--- /dev/null
@@ -0,0 +1,184 @@
+# Test that obj2yaml + yaml2obj can round-trip mach-o executables with
+#  raw __LINKEDIT segments.
+#
+# RUN: yaml2obj %s | obj2yaml --raw-segment=data --raw-segment=linkedit | FileCheck %s
+#
+# This file was produced using:
+#   echo "int ext;" > a.c
+#   xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o a.o a.c -c
+#   xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib a.o -o liba.dylib -install_name @executable_path/liba.dylib
+#   echo "extern int ext;" > b.c
+#   echo "int padding;" >> b.c
+#   echo "int *p = &ext + 4;" >> b.c
+#   xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o b.o b.c -c
+#   xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib b.o -o libfixups.dylib -install_name @executable_path/libfixups.dylib -L. -la
+#
+# CHECK:      - sectname:        __data
+# CHECK:        segname:         __DATA
+# CHECK:        content:         '0000001000000080'
+# CHECK: __LINKEDIT:      0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x100000C
+  cpusubtype:      0x0
+  filetype:        0x6
+  ncmds:           16
+  sizeofcmds:      816
+  flags:           0x100085
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         __TEXT
+    vmaddr:          0
+    vmsize:          16384
+    fileoff:         0
+    filesize:        16384
+    maxprot:         5
+    initprot:        5
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x4000
+        size:            0
+        offset:          0x4000
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         ''
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         __DATA
+    vmaddr:          16384
+    vmsize:          16384
+    fileoff:         16384
+    filesize:        16384
+    maxprot:         3
+    initprot:        3
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __data
+        segname:         __DATA
+        addr:            0x4000
+        size:            8
+        offset:          0x4000
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         '0000001000000080'
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __LINKEDIT
+    vmaddr:          32768
+    vmsize:          16384
+    fileoff:         32768
+    filesize:        160
+    maxprot:         1
+    initprot:        1
+    nsects:          0
+    flags:           0
+  - cmd:             LC_ID_DYLIB
+    cmdsize:         64
+    dylib:
+      name:            24
+      timestamp:       1
+      current_version: 0
+      compatibility_version: 0
+    Content:         '@executable_path/libfixups.dylib'
+    ZeroPadBytes:    8
+  - cmd:             LC_DYLD_CHAINED_FIXUPS
+    cmdsize:         16
+    dataoff:         32768
+    datasize:        88
+  - cmd:             LC_DYLD_EXPORTS_TRIE
+    cmdsize:         16
+    dataoff:         32856
+    datasize:        16
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          32880
+    nsyms:           2
+    stroff:          32912
+    strsize:         16
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       0
+    iextdefsym:      0
+    nextdefsym:      1
+    iundefsym:       1
+    nundefsym:       1
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+  - cmd:             LC_UUID
+    cmdsize:         24
+    uuid:            56F7BCE0-C1A7-38E3-A90D-742D8E3D5FA9
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         32
+    platform:        2
+    minos:           983296
+    sdk:             983552
+    ntools:          1
+    Tools:
+      - tool:            3
+        version:         46596096
+  - cmd:             LC_SOURCE_VERSION
+    cmdsize:         16
+    version:         0
+  - cmd:             LC_ENCRYPTION_INFO_64
+    cmdsize:         24
+    cryptoff:        16384
+    cryptsize:       0
+    cryptid:         0
+    pad:             0
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         56
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 0
+      compatibility_version: 0
+    Content:         '@executable_path/liba.dylib'
+    ZeroPadBytes:    5
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         56
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 85917696
+      compatibility_version: 65536
+    Content:         '/usr/lib/libSystem.B.dylib'
+    ZeroPadBytes:    6
+  - cmd:             LC_FUNCTION_STARTS
+    cmdsize:         16
+    dataoff:         32872
+    datasize:        8
+  - cmd:             LC_DATA_IN_CODE
+    cmdsize:         16
+    dataoff:         32880
+    datasize:        0
+__LINKEDIT:      0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000
+...
index b7289bf..d3b4bf1 100644 (file)
@@ -29,6 +29,7 @@ class MachODumper {
 
   const object::MachOObjectFile &Obj;
   std::unique_ptr<DWARFContext> DWARFCtx;
+  unsigned RawSegments;
   void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
   Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
   void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
@@ -52,8 +53,8 @@ class MachODumper {
 
 public:
   MachODumper(const object::MachOObjectFile &O,
-              std::unique_ptr<DWARFContext> DCtx)
-      : Obj(O), DWARFCtx(std::move(DCtx)) {}
+              std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
+      : Obj(O), DWARFCtx(std::move(DCtx)), RawSegments(RawSegments) {}
   Expected<std::unique_ptr<MachOYAML::Object>> dump();
 };
 
@@ -176,6 +177,13 @@ Expected<const char *> MachODumper::extractSections(
     if (Expected<MachOYAML::Section> S =
             constructSection(Sec, Sections.size() + 1)) {
       StringRef SecName(S->sectname);
+
+      // Copy data sections if requested.
+      if ((RawSegments & RawSegments::data) &&
+          StringRef(S->segname).startswith("__DATA"))
+        S->content =
+            yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
+
       if (SecName.startswith("__debug_")) {
         // If the DWARF section cannot be successfully parsed, emit raw content
         // instead of an entry in the DWARF section of the YAML.
@@ -282,7 +290,11 @@ Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
   dumpHeader(Y);
   if (Error Err = dumpLoadCommands(Y))
     return std::move(Err);
-  dumpLinkEdit(Y);
+  if (RawSegments & RawSegments::linkedit)
+    Y->RawLinkEditSegment =
+        yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT"));
+  else
+    dumpLinkEdit(Y);
 
   return std::move(Y);
 }
@@ -587,9 +599,10 @@ void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
   }
 }
 
-Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
+Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
+                 unsigned RawSegments) {
   std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
-  MachODumper Dumper(Obj, std::move(DCtx));
+  MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
   Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
   if (!YAML)
     return YAML.takeError();
@@ -602,7 +615,8 @@ Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
   return Error::success();
 }
 
-Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
+Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
+                 unsigned RawSegments) {
   yaml::YamlObjectFile YAMLFile;
   YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
   MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
@@ -624,7 +638,7 @@ Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
       return SliceObj.takeError();
 
     std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
-    MachODumper Dumper(*SliceObj.get(), std::move(DCtx));
+    MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
     Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
     if (!YAMLObj)
       return YAMLObj.takeError();
@@ -636,12 +650,13 @@ Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
   return Error::success();
 }
 
-Error macho2yaml(raw_ostream &Out, const object::Binary &Binary) {
+Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
+                 unsigned RawSegments) {
   if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
-    return macho2yaml(Out, *MachOObj);
+    return macho2yaml(Out, *MachOObj, RawSegments);
 
   if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
-    return macho2yaml(Out, *MachOObj);
+    return macho2yaml(Out, *MachOObj, RawSegments);
 
   llvm_unreachable("unexpected Mach-O file format");
 }
index e9e47d1..9c7a338 100644 (file)
@@ -1,4 +1,4 @@
-//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===//
+//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -----------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 using namespace llvm;
 using namespace llvm::object;
 
+static cl::opt<std::string>
+    InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+static cl::bits<RawSegments> RawSegment(
+    "raw-segment",
+    cl::desc("Mach-O: dump the raw contents of the listed segments instead of "
+             "parsing them:"),
+    cl::values(clEnumVal(data, "__DATA"), clEnumVal(linkedit, "__LINKEDIT")));
+
 static Error dumpObject(const ObjectFile &Obj) {
   if (Obj.isCOFF())
     return errorCodeToError(coff2yaml(outs(), cast<COFFObjectFile>(Obj)));
@@ -54,7 +62,7 @@ static Error dumpInput(StringRef File) {
   // Universal MachO is not a subclass of ObjectFile, so it needs to be handled
   // here with the other binary types.
   if (Binary.isMachO() || Binary.isMachOUniversalBinary())
-    return macho2yaml(outs(), Binary);
+    return macho2yaml(outs(), Binary, RawSegment.getBits());
   if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary))
     return dumpObject(*Obj);
   if (MinidumpFile *Minidump = dyn_cast<MinidumpFile>(&Binary))
@@ -74,9 +82,6 @@ static void reportError(StringRef Input, Error Err) {
   errs().flush();
 }
 
-cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"),
-                                   cl::init("-"));
-
 int main(int argc, char *argv[]) {
   InitLLVM X(argc, argv);
   cl::ParseCommandLineOptions(argc, argv);
index fdd9b2a..c026482 100644 (file)
 #include "llvm/Support/MemoryBufferRef.h"
 #include <system_error>
 
+enum RawSegments : unsigned { none = 0, data = 1, linkedit = 1 << 1 };
 std::error_code coff2yaml(llvm::raw_ostream &Out,
                           const llvm::object::COFFObjectFile &Obj);
 llvm::Error elf2yaml(llvm::raw_ostream &Out,
                          const llvm::object::ObjectFile &Obj);
-llvm::Error macho2yaml(llvm::raw_ostream &Out,
-                           const llvm::object::Binary &Obj);
+llvm::Error macho2yaml(llvm::raw_ostream &Out, const llvm::object::Binary &Obj,
+                       unsigned RawSegments);
 llvm::Error minidump2yaml(llvm::raw_ostream &Out,
                           const llvm::object::MinidumpFile &Obj);
 llvm::Error xcoff2yaml(llvm::raw_ostream &Out,