[dsymutil] Upstream unobfuscation logic.
authorJonas Devlieghere <jonas@devlieghere.com>
Mon, 7 Jan 2019 23:27:25 +0000 (23:27 +0000)
committerJonas Devlieghere <jonas@devlieghere.com>
Mon, 7 Jan 2019 23:27:25 +0000 (23:27 +0000)
The unobufscation support for BCSymbolMaps was the last piece of code
that hasn't been upstreamed yet. This patch contains a reworked version
of the existing code and relevant tests.

Differential revision: https://reviews.llvm.org/D56346

llvm-svn: 350580

21 files changed:
llvm/test/tools/dsymutil/ARM/obfuscated.test [new file with mode: 0644]
llvm/test/tools/dsymutil/Inputs/E828A486-8433-3A5E-B6DB-A6294D28133D.plist [new file with mode: 0644]
llvm/test/tools/dsymutil/Inputs/obfuscated.2.arm64 [new file with mode: 0644]
llvm/test/tools/dsymutil/Inputs/obfuscated.2.map [new file with mode: 0644]
llvm/test/tools/dsymutil/Inputs/obfuscated.arm64 [new file with mode: 0644]
llvm/test/tools/dsymutil/Inputs/obfuscated.map [new file with mode: 0644]
llvm/test/tools/dsymutil/cmdline.test
llvm/tools/dsymutil/CMakeLists.txt
llvm/tools/dsymutil/DebugMap.h
llvm/tools/dsymutil/DwarfLinker.cpp
llvm/tools/dsymutil/DwarfStreamer.cpp
llvm/tools/dsymutil/DwarfStreamer.h
llvm/tools/dsymutil/LinkUtils.h
llvm/tools/dsymutil/MachODebugMapParser.cpp
llvm/tools/dsymutil/MachOUtils.cpp
llvm/tools/dsymutil/MachOUtils.h
llvm/tools/dsymutil/NonRelocatableStringpool.cpp
llvm/tools/dsymutil/NonRelocatableStringpool.h
llvm/tools/dsymutil/SymbolMap.cpp [new file with mode: 0644]
llvm/tools/dsymutil/SymbolMap.h [new file with mode: 0644]
llvm/tools/dsymutil/dsymutil.cpp

diff --git a/llvm/test/tools/dsymutil/ARM/obfuscated.test b/llvm/test/tools/dsymutil/ARM/obfuscated.test
new file mode 100644 (file)
index 0000000..9ce684c
--- /dev/null
@@ -0,0 +1,166 @@
+REQUIRES: system-darwin
+
+RUN: dsymutil --symbol-map %p/../Inputs/obfuscated.map %p/../Inputs/obfuscated.arm64 -f -o - \
+RUN:     | llvm-dwarfdump -v - \
+RUN:     | FileCheck %s
+
+RUN: dsymutil --symbol-map %p/../Inputs/obfuscated.map %p/../Inputs/obfuscated.arm64 -f -o - \
+RUN:     | llvm-dwarfdump -v - \
+RUN:     | FileCheck --check-prefix=NOHIDDEN %s
+
+RUN: dsymutil --symbol-map %p/../Inputs/obfuscated.2.map %p/../Inputs/obfuscated.2.arm64 -f -o - \
+RUN:     | llvm-dwarfdump -v - \
+RUN:     | FileCheck --check-prefix=NOHIDDEN %s
+
+// Run with plist and make sure dsymutil finds it.
+RUN: mkdir -p %t.dSYM/Contents/Resources/DWARF/
+RUN: mkdir -p %t.mapdir
+RUN: cp %p/../Inputs/obfuscated.arm64 %t.dSYM/Contents/Resources/DWARF/
+RUN: cp %p/../Inputs/E828A486-8433-3A5E-B6DB-A6294D28133D.plist %t.dSYM/Contents/Resources/
+RUN: cp %p/../Inputs/obfuscated.map %t.mapdir/506AA50A-6B26-3B37-86D2-DC6EBD57B720.bcsymbolmap
+RUN: dsymutil --symbol-map %t.mapdir %t.dSYM 2>&1 | FileCheck --check-prefix=OBFUSCATING %s
+
+// Run without plist and make sure dsymutil doesn't crash.
+RUN: rm %t.dSYM/Contents/Resources/E828A486-8433-3A5E-B6DB-A6294D28133D.plist
+RUN: dsymutil --symbol-map %t.mapdir %t.dSYM 2>&1 | FileCheck --check-prefix=NOTOBFUSCATING %s
+
+OBFUSCATING-NOT: not unobfuscating
+
+NOTOBFUSCATING: not unobfuscating
+
+NOHIDDEN-NOT: __hidden#
+
+CHECK: .debug_info contents:
+
+CHECK: DW_TAG_compile_unit [1] *
+CHECK:    DW_AT_producer [DW_FORM_strp]    ( {{.*}} "Apple LLVM version 7.0.0 (clang-700.2.38.2)")
+CHECK:    DW_AT_name [DW_FORM_strp]        ( {{.*}} "main.c")
+CHECK:    DW_AT_comp_dir [DW_FORM_strp]    ( {{.*}} "/Users/steven/dev/alpena/tests/src")
+CHECK:    DW_TAG_subprogram [2]
+CHECK:      DW_AT_name [DW_FORM_strp]      ( {{.*}} "main")
+
+CHECK:  DW_TAG_compile_unit [1] *
+CHECK:    DW_AT_producer [DW_FORM_strp]    ( {{.*}} "Apple LLVM version 7.0.0 (clang-700.2.38.2)")
+CHECK:    DW_AT_name [DW_FORM_strp]        ( {{.*}} "one.c")
+CHECK:    DW_AT_comp_dir [DW_FORM_strp]    ( {{.*}} "/Users/steven/dev/alpena/tests/src")
+CHECK:    DW_TAG_subprogram [2]
+CHECK:      DW_AT_name [DW_FORM_strp]      ( {{.*}} "one")
+
+CHECK:  DW_TAG_compile_unit [1] *
+CHECK:    DW_AT_producer [DW_FORM_strp]    ( {{.*}} "Apple LLVM version 7.0.0 (clang-700.2.38.2)")
+CHECK:    DW_AT_name [DW_FORM_strp]        ( {{.*}} "two.c")
+CHECK:    DW_AT_comp_dir [DW_FORM_strp]    ( {{.*}} "/Users/steven/dev/alpena/tests/src")
+CHECK:    DW_TAG_subprogram [2]
+CHECK:      DW_AT_name [DW_FORM_strp]      ( {{.*}} "two")
+
+CHECK:  DW_TAG_compile_unit [1] *
+CHECK:    DW_AT_producer [DW_FORM_strp]    ( {{.*}} "Apple LLVM version 7.0.0 (clang-700.2.38.2)")
+CHECK:    DW_AT_name [DW_FORM_strp]        ( {{.*}} "three.c")
+CHECK:    DW_AT_comp_dir [DW_FORM_strp]    ( {{.*}} "/Users/steven/dev/alpena/tests/src")
+CHECK:    DW_TAG_subprogram [2]
+CHECK:      DW_AT_name [DW_FORM_strp]      ( {{.*}} "three")
+
+CHECK:  DW_TAG_compile_unit [1] *
+CHECK:    DW_AT_producer [DW_FORM_strp]    ( {{.*}} "Apple LLVM version 7.0.0 (clang-700.2.38.2)")
+CHECK:    DW_AT_name [DW_FORM_strp]        ( {{.*}} "four.c")
+CHECK:    DW_AT_stmt_list [DW_FORM_data4]  (0x0000011e)
+CHECK:    DW_AT_comp_dir [DW_FORM_strp]    ( {{.*}} "/Users/steven/dev/alpena/tests/src")
+CHECK:    DW_TAG_subprogram [2]
+CHECK:      DW_AT_name [DW_FORM_strp]      ( {{.*}} "four")
+
+CHECK:  DW_TAG_compile_unit [1] *
+CHECK:    DW_AT_producer [DW_FORM_strp]    ( {{.*}} "Apple LLVM version 7.0.0 (clang-700.2.38.2)")
+CHECK:    DW_AT_name [DW_FORM_strp]        ( {{.*}} "five.c")
+CHECK:    DW_AT_comp_dir [DW_FORM_strp]    ( {{.*}} "/Users/steven/dev/alpena/tests/src")
+CHECK:    DW_TAG_subprogram [2]
+CHECK:      DW_AT_name [DW_FORM_strp]      ( {{.*}} "five")
+
+CHECK:  DW_TAG_compile_unit [1] *
+CHECK:    DW_AT_producer [DW_FORM_strp]    ( {{.*}} "Apple LLVM version 7.0.0 (clang-700.2.38.2)")
+CHECK:    DW_AT_name [DW_FORM_strp]        ( {{.*}} "six.c")
+CHECK:    DW_AT_comp_dir [DW_FORM_strp]    ( {{.*}} "/Users/steven/dev/alpena/tests/src")
+CHECK:    DW_TAG_subprogram [2]
+CHECK:      DW_AT_name [DW_FORM_strp]      ( {{.*}} "six")
+
+CHECK: .debug_line contents:
+CHECK: file_names[  1]:
+CHECK:            name: "main.c"
+CHECK:       dir_index: 0
+CHECK:        mod_time: 0x00000000
+CHECK: file_names[  1]:
+CHECK:            name: "one.c"
+CHECK:       dir_index: 0
+CHECK:        mod_time: 0x00000000
+CHECK:          length: 0x00000000
+CHECK: file_names[  1]:
+CHECK:            name: "two.c"
+CHECK:       dir_index: 0
+CHECK:        mod_time: 0x00000000
+CHECK:          length: 0x00000000
+CHECK: file_names[  1]:
+CHECK:            name: "three.c"
+CHECK:       dir_index: 0
+CHECK:        mod_time: 0x00000000
+CHECK:          length: 0x00000000
+CHECK: file_names[  1]:
+CHECK:            name: "four.c"
+CHECK:       dir_index: 0
+CHECK:        mod_time: 0x00000000
+CHECK:          length: 0x00000000
+CHECK: file_names[  1]:
+CHECK:            name: "five.c"
+CHECK:       dir_index: 0
+CHECK:        mod_time: 0x00000000
+CHECK:          length: 0x00000000
+CHECK: file_names[  1]:
+CHECK:            name: "six.c"
+CHECK:       dir_index: 0
+CHECK:        mod_time: 0x00000000
+CHECK:          length: 0x00000000
+
+CHECK: .debug_pubnames contents:
+CHECK: length = 0x00000017 version = 0x0002 unit_offset = 0x00000000 unit_size = 0x00000044
+CHECK: 0x0000002e "main"
+CHECK: length = 0x00000016 version = 0x0002 unit_offset = 0x00000044 unit_size = 0x00000044
+CHECK: 0x0000002e "one"
+CHECK: length = 0x00000016 version = 0x0002 unit_offset = 0x00000088 unit_size = 0x00000044
+CHECK: 0x0000002e "two"
+CHECK: length = 0x00000018 version = 0x0002 unit_offset = 0x000000cc unit_size = 0x00000044
+CHECK: 0x0000002e "three"
+CHECK: length = 0x00000017 version = 0x0002 unit_offset = 0x00000110 unit_size = 0x00000044
+CHECK: 0x0000002e "four"
+CHECK: length = 0x00000017 version = 0x0002 unit_offset = 0x00000154 unit_size = 0x00000044
+CHECK: 0x0000002e "five"
+CHECK: length = 0x00000016 version = 0x0002 unit_offset = 0x00000198 unit_size = 0x00000044
+CHECK: 0x0000002e "six"
+
+CHECK: .apple_names contents:
+
+CHECK: String: 0x00000091 "five"
+CHECK-NEXT: Data 0 [
+CHECK-NEXT:   Atom[0]: 0x00000182
+CHECK-NEXT: ]
+CHECK: String: 0x0000009c "six"
+CHECK-NEXT: Data 0 [
+CHECK-NEXT:   Atom[0]: 0x000001c6
+CHECK-NEXT: ]
+CHECK: String: 0x00000078 "three"
+CHECK-NEXT: Data 0 [
+CHECK-NEXT:   Atom[0]: 0x000000fa
+CHECK-NEXT: ]
+CHECK: String: 0x0000006c "two"
+CHECK-NEXT: Data 0 [
+CHECK-NEXT:   Atom[0]: 0x000000b6
+CHECK-NEXT: ]
+CHECK: String: 0x00000057 "main"
+CHECK-NEXT: Data 0 [
+CHECK-NEXT:   Atom[0]: 0x0000002e
+CHECK-NEXT: ]
+CHECK: String: 0x00000085 "four"
+CHECK-NEXT: Data 0 [
+CHECK-NEXT:   Atom[0]: 0x0000013e
+CHECK-NEXT: ]
+CHECK: String: 0x00000062 "one"
+CHECK-NEXT: Data 0 [
+CHECK-NEXT:   Atom[0]: 0x00000072
+CHECK-NEXT: ]
diff --git a/llvm/test/tools/dsymutil/Inputs/E828A486-8433-3A5E-B6DB-A6294D28133D.plist b/llvm/test/tools/dsymutil/Inputs/E828A486-8433-3A5E-B6DB-A6294D28133D.plist
new file mode 100644 (file)
index 0000000..adf7dbf
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+   <key>DBGOriginalUUID</key>
+   <string>506AA50A-6B26-3B37-86D2-DC6EBD57B720</string>
+</dict>
+</plist>
\ No newline at end of file
diff --git a/llvm/test/tools/dsymutil/Inputs/obfuscated.2.arm64 b/llvm/test/tools/dsymutil/Inputs/obfuscated.2.arm64
new file mode 100644 (file)
index 0000000..b40e023
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/obfuscated.2.arm64 differ
diff --git a/llvm/test/tools/dsymutil/Inputs/obfuscated.2.map b/llvm/test/tools/dsymutil/Inputs/obfuscated.2.map
new file mode 100644 (file)
index 0000000..6efca59
--- /dev/null
@@ -0,0 +1,22 @@
+BCSymbolMap Version: 2.0
+_two
+_three
+_four
+_five
+_six
+LLVM version 3.9.0 (ssh://git@stash.sd.apple.com/devtools/clang.git c74ae34bd917b77f9c848bd599dfde2813fb509f)
+main
+main.c
+/Volumes/Data/dev/BitcodeBuildTests/unit
+one
+one.c
+two
+two.c
+three
+three.c
+four
+four.c
+five
+five.c
+six
+six.c
diff --git a/llvm/test/tools/dsymutil/Inputs/obfuscated.arm64 b/llvm/test/tools/dsymutil/Inputs/obfuscated.arm64
new file mode 100644 (file)
index 0000000..8395798
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/obfuscated.arm64 differ
diff --git a/llvm/test/tools/dsymutil/Inputs/obfuscated.map b/llvm/test/tools/dsymutil/Inputs/obfuscated.map
new file mode 100644 (file)
index 0000000..30fed8b
--- /dev/null
@@ -0,0 +1,17 @@
+one
+two
+three
+four
+five
+six
+.str
+Apple LLVM version 7.0.0 (clang-700.2.38.2)
+main
+main.c
+/Users/steven/dev/alpena/tests/src
+one.c
+two.c
+three.c
+four.c
+five.c
+six.c
index c2ddead..60a1a0a 100644 (file)
@@ -17,6 +17,7 @@ HELP: -num-threads=<n>
 HELP: -o=<filename>
 HELP: -oso-prepend-path=<path>
 HELP: -papertrail
+HELP: -symbol-map
 HELP: -symtab
 HELP: -toolchain
 HELP: -update
index f41a6fd..480f78f 100644 (file)
@@ -20,6 +20,7 @@ add_llvm_tool(dsymutil
   MachODebugMapParser.cpp
   MachOUtils.cpp
   NonRelocatableStringpool.cpp
+  SymbolMap.cpp
 
   DEPENDS
   intrinsics_gen
index c988377..d8de37e 100644 (file)
@@ -75,7 +75,7 @@ class DebugMapObject;
 class DebugMap {
   Triple BinaryTriple;
   std::string BinaryPath;
-
+  std::vector<uint8_t> BinaryUUID;
   using ObjectContainer = std::vector<std::unique_ptr<DebugMapObject>>;
 
   ObjectContainer Objects;
@@ -89,8 +89,10 @@ class DebugMap {
   ///@}
 
 public:
-  DebugMap(const Triple &BinaryTriple, StringRef BinaryPath)
-      : BinaryTriple(BinaryTriple), BinaryPath(BinaryPath) {}
+  DebugMap(const Triple &BinaryTriple, StringRef BinaryPath,
+           ArrayRef<uint8_t> BinaryUUID = ArrayRef<uint8_t>())
+      : BinaryTriple(BinaryTriple), BinaryPath(BinaryPath),
+        BinaryUUID(BinaryUUID.begin(), BinaryUUID.end()) {}
 
   using const_iterator = ObjectContainer::const_iterator;
 
@@ -113,6 +115,10 @@ public:
 
   const Triple &getTriple() const { return BinaryTriple; }
 
+  const ArrayRef<uint8_t> getUUID() const {
+    return ArrayRef<uint8_t>(BinaryUUID);
+  }
+
   StringRef getBinaryPath() const { return BinaryPath; }
 
   void print(raw_ostream &OS) const;
index 2862739..0743cfc 100644 (file)
@@ -1701,6 +1701,8 @@ void DwarfLinker::patchLineTableForUnit(CompileUnit &Unit,
   DWARFDataExtractor LineExtractor(
       OrigDwarf.getDWARFObj(), OrigDwarf.getDWARFObj().getLineSection(),
       OrigDwarf.isLittleEndian(), Unit.getOrigUnit().getAddressByteSize());
+  if (Options.Translator)
+    return Streamer->translateLineTable(LineExtractor, StmtOffset, Options);
 
   Error Err = LineTable.parse(LineExtractor, &StmtOffset, OrigDwarf,
                               &Unit.getOrigUnit(), DWARFContext::dumpWarning);
@@ -2245,17 +2247,16 @@ void DwarfLinker::DIECloner::cloneAllCompileUnits(
     if (Linker.Options.NoOutput)
       continue;
 
-    if (LLVM_LIKELY(!Linker.Options.Update)) {
-      // FIXME: for compatibility with the classic dsymutil, we emit an empty
-      // line table for the unit, even if the unit doesn't actually exist in
-      // the DIE tree.
+    // FIXME: for compatibility with the classic dsymutil, we emit
+    // an empty line table for the unit, even if the unit doesn't
+    // actually exist in the DIE tree.
+    if (LLVM_LIKELY(!Linker.Options.Update) || Linker.Options.Translator)
       Linker.patchLineTableForUnit(*CurrentUnit, DwarfContext, Ranges, DMO);
-      Linker.emitAcceleratorEntriesForUnit(*CurrentUnit);
-      Linker.patchRangesForUnit(*CurrentUnit, DwarfContext, DMO);
-      Linker.Streamer->emitLocationsForUnit(*CurrentUnit, DwarfContext);
-    } else {
-      Linker.emitAcceleratorEntriesForUnit(*CurrentUnit);
-    }
+    Linker.emitAcceleratorEntriesForUnit(*CurrentUnit);
+    if (Linker.Options.Update)
+      continue;
+    Linker.patchRangesForUnit(*CurrentUnit, DwarfContext, DMO);
+    Linker.Streamer->emitLocationsForUnit(*CurrentUnit, DwarfContext);
   }
 
   if (Linker.Options.NoOutput)
@@ -2380,7 +2381,7 @@ bool DwarfLinker::link(const DebugMap &Map) {
   // This Dwarf string pool which is used for emission. It must be used
   // serially as the order of calling getStringOffset matters for
   // reproducibility.
-  OffsetsStringPool OffsetsStringPool;
+  OffsetsStringPool OffsetsStringPool(Options.Translator);
 
   // ODR Contexts for the link.
   DeclContextTree ODRContexts;
@@ -2649,7 +2650,7 @@ bool DwarfLinker::link(const DebugMap &Map) {
     pool.wait();
   }
 
-  return Options.NoOutput ? true : Streamer->finish(Map);
+  return Options.NoOutput ? true : Streamer->finish(Map, Options.Translator);
 } // namespace dsymutil
 
 bool linkDwarf(raw_fd_ostream &OutFile, BinaryHolder &BinHolder,
index ef798be..28088ff 100644 (file)
@@ -124,11 +124,11 @@ bool DwarfStreamer::init(Triple TheTriple) {
   return true;
 }
 
-bool DwarfStreamer::finish(const DebugMap &DM) {
+bool DwarfStreamer::finish(const DebugMap &DM, SymbolMapTranslator &T) {
   bool Result = true;
   if (DM.getTriple().isOSDarwin() && !DM.getBinaryPath().empty() &&
       Options.FileType == OutputFileType::Object)
-    Result = MachOUtils::generateDsymCompanion(DM, *MS, OutFile);
+    Result = MachOUtils::generateDsymCompanion(DM, T, *MS, OutFile);
   else
     MS->Finish();
   return Result;
@@ -577,6 +577,89 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
   MS->EmitLabel(LineEndSym);
 }
 
+/// Copy the debug_line over to the updated binary while unobfuscating the file
+/// names and directories.
+void DwarfStreamer::translateLineTable(DataExtractor Data, uint32_t Offset,
+                                       LinkOptions &Options) {
+  MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLineSection());
+  StringRef Contents = Data.getData();
+
+  // We have to deconstruct the line table header, because it contains to
+  // length fields that will need to be updated when we change the length of
+  // the files and directories in there.
+  unsigned UnitLength = Data.getU32(&Offset);
+  unsigned UnitEnd = Offset + UnitLength;
+  MCSymbol *BeginLabel = MC->createTempSymbol();
+  MCSymbol *EndLabel = MC->createTempSymbol();
+  unsigned Version = Data.getU16(&Offset);
+
+  if (Version > 5) {
+    warn("Unsupported line table version: dropping contents and not "
+         "unobfsucating line table.");
+    return;
+  }
+
+  Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+  Asm->OutStreamer->EmitLabel(BeginLabel);
+  Asm->emitInt16(Version);
+  LineSectionSize += 6;
+
+  MCSymbol *HeaderBeginLabel = MC->createTempSymbol();
+  MCSymbol *HeaderEndLabel = MC->createTempSymbol();
+  Asm->EmitLabelDifference(HeaderEndLabel, HeaderBeginLabel, 4);
+  Asm->OutStreamer->EmitLabel(HeaderBeginLabel);
+  Offset += 4;
+  LineSectionSize += 4;
+
+  uint32_t AfterHeaderLengthOffset = Offset;
+  // Skip to the directories.
+  Offset += (Version >= 4) ? 5 : 4;
+  unsigned OpcodeBase = Data.getU8(&Offset);
+  Offset += OpcodeBase - 1;
+  Asm->OutStreamer->EmitBytes(Contents.slice(AfterHeaderLengthOffset, Offset));
+  LineSectionSize += Offset - AfterHeaderLengthOffset;
+
+  // Offset points to the first directory.
+  while (const char *Dir = Data.getCStr(&Offset)) {
+    if (Dir[0] == 0)
+      break;
+
+    StringRef Translated = Options.Translator(Dir);
+    Asm->OutStreamer->EmitBytes(Translated);
+    Asm->emitInt8(0);
+    LineSectionSize += Translated.size() + 1;
+  }
+  Asm->emitInt8(0);
+  LineSectionSize += 1;
+
+  while (const char *File = Data.getCStr(&Offset)) {
+    if (File[0] == 0)
+      break;
+
+    StringRef Translated = Options.Translator(File);
+    Asm->OutStreamer->EmitBytes(Translated);
+    Asm->emitInt8(0);
+    LineSectionSize += Translated.size() + 1;
+
+    uint32_t OffsetBeforeLEBs = Offset;
+    Asm->EmitULEB128(Data.getULEB128(&Offset));
+    Asm->EmitULEB128(Data.getULEB128(&Offset));
+    Asm->EmitULEB128(Data.getULEB128(&Offset));
+    LineSectionSize += Offset - OffsetBeforeLEBs;
+  }
+  Asm->emitInt8(0);
+  LineSectionSize += 1;
+
+  Asm->OutStreamer->EmitLabel(HeaderEndLabel);
+
+  // Copy the actual line table program over.
+  Asm->OutStreamer->EmitBytes(Contents.slice(Offset, UnitEnd));
+  LineSectionSize += UnitEnd - Offset;
+
+  Asm->OutStreamer->EmitLabel(EndLabel);
+  Offset = UnitEnd;
+}
+
 static void emitSectionContents(const object::ObjectFile &Obj,
                                 StringRef SecName, MCStreamer *MS) {
   StringRef Contents;
@@ -586,8 +669,10 @@ static void emitSectionContents(const object::ObjectFile &Obj,
 }
 
 void DwarfStreamer::copyInvariantDebugSection(const object::ObjectFile &Obj) {
-  MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLineSection());
-  emitSectionContents(Obj, "debug_line", MS);
+  if (!Options.Translator) {
+    MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLineSection());
+    emitSectionContents(Obj, "debug_line", MS);
+  }
 
   MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLocSection());
   emitSectionContents(Obj, "debug_loc", MS);
index 679d124..abc8654 100644 (file)
@@ -50,7 +50,7 @@ public:
   bool init(Triple TheTriple);
 
   /// Dump the file to the disk.
-  bool finish(const DebugMap &);
+  bool finish(const DebugMap &, SymbolMapTranslator &T);
 
   AsmPrinter &getAsmPrinter() const { return *Asm; }
 
@@ -104,6 +104,11 @@ public:
                             std::vector<DWARFDebugLine::Row> &Rows,
                             unsigned AdddressSize);
 
+  /// Copy the debug_line over to the updated binary while unobfuscating the
+  /// file names and directories.
+  void translateLineTable(DataExtractor LineData, uint32_t Offset,
+                          LinkOptions &Options);
+
   /// Copy over the debug sections that are not modified when updating.
   void copyInvariantDebugSection(const object::ObjectFile &Obj);
 
index f0abd88..0769741 100644 (file)
 #ifndef LLVM_TOOLS_DSYMUTIL_LINKOPTIONS_H
 #define LLVM_TOOLS_DSYMUTIL_LINKOPTIONS_H
 
+#include "SymbolMap.h"
+
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/WithColor.h"
+
 #include <string>
 
 namespace llvm {
@@ -60,6 +63,9 @@ struct LinkOptions {
   /// -oso-prepend-path
   std::string PrependPath;
 
+  /// Symbol map translator.
+  SymbolMapTranslator Translator;
+
   LinkOptions() = default;
 };
 
index d696e1d..8ff7e22 100644 (file)
@@ -163,7 +163,8 @@ std::unique_ptr<DebugMap>
 MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary,
                                     StringRef BinaryPath) {
   loadMainBinarySymbols(MainBinary);
-  Result = make_unique<DebugMap>(MainBinary.getArchTriple(), BinaryPath);
+  ArrayRef<uint8_t> UUID = MainBinary.getUuid();
+  Result = make_unique<DebugMap>(MainBinary.getArchTriple(), BinaryPath, UUID);
   MainBinaryStrings = MainBinary.getStringTableData();
   for (const SymbolRef &Symbol : MainBinary.symbols()) {
     const DataRefImpl &DRI = Symbol.getRawDataRefImpl();
index cac4ad8..8c54563 100644 (file)
@@ -333,8 +333,8 @@ static unsigned segmentLoadCommandSize(bool Is64Bit, unsigned NumSections) {
 // Stream a dSYM companion binary file corresponding to the binary referenced
 // by \a DM to \a OutFile. The passed \a MS MCStreamer is setup to write to
 // \a OutFile and it must be using a MachObjectWriter object to do so.
-bool generateDsymCompanion(const DebugMap &DM, MCStreamer &MS,
-                           raw_fd_ostream &OutFile) {
+bool generateDsymCompanion(const DebugMap &DM, SymbolMapTranslator &Translator,
+                           MCStreamer &MS, raw_fd_ostream &OutFile) {
   auto &ObjectStreamer = static_cast<MCObjectStreamer &>(MS);
   MCAssembler &MCAsm = ObjectStreamer.getAssembler();
   auto &Writer = static_cast<MachObjectWriter &>(MCAsm.getWriter());
@@ -443,7 +443,7 @@ bool generateDsymCompanion(const DebugMap &DM, MCStreamer &MS,
   }
 
   SmallString<0> NewSymtab;
-  NonRelocatableStringpool NewStrings;
+  NonRelocatableStringpool NewStrings(Translator);
   unsigned NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
   unsigned NumSyms = 0;
   uint64_t NewStringsSize = 0;
index a8be89e..c24f963 100644 (file)
@@ -9,8 +9,11 @@
 #ifndef LLVM_TOOLS_DSYMUTIL_MACHOUTILS_H
 #define LLVM_TOOLS_DSYMUTIL_MACHOUTILS_H
 
+#include "SymbolMap.h"
+
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FileSystem.h"
+
 #include <string>
 
 namespace llvm {
@@ -38,8 +41,8 @@ bool generateUniversalBinary(SmallVectorImpl<ArchAndFile> &ArchFiles,
                              StringRef OutputFileName, const LinkOptions &,
                              StringRef SDKPath);
 
-bool generateDsymCompanion(const DebugMap &DM, MCStreamer &MS,
-                           raw_fd_ostream &OutFile);
+bool generateDsymCompanion(const DebugMap &DM, SymbolMapTranslator &Translator,
+                           MCStreamer &MS, raw_fd_ostream &OutFile);
 
 std::string getArchName(StringRef Arch);
 } // namespace MachOUtils
index d82ff84..b8392a1 100644 (file)
@@ -16,6 +16,8 @@ DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
   if (S.empty() && !Strings.empty())
     return EmptyString;
 
+  if (Translator)
+    S = Translator(S);
   auto I = Strings.insert({S, DwarfStringPoolEntry()});
   auto &Entry = I.first->second;
   if (I.second || !Entry.isIndexed()) {
@@ -29,6 +31,10 @@ DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
 
 StringRef NonRelocatableStringpool::internString(StringRef S) {
   DwarfStringPoolEntry Entry{nullptr, 0, DwarfStringPoolEntry::NotIndexed};
+
+  if (Translator)
+    S = Translator(S);
+
   auto InsertResult = Strings.insert({S, Entry});
   return InsertResult.first->getKey();
 }
index e339e51..c398ff0 100644 (file)
@@ -10,6 +10,8 @@
 #ifndef LLVM_TOOLS_DSYMUTIL_NONRELOCATABLESTRINGPOOL_H
 #define LLVM_TOOLS_DSYMUTIL_NONRELOCATABLESTRINGPOOL_H
 
+#include "SymbolMap.h"
+
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
@@ -32,7 +34,9 @@ public:
   /// order.
   using MapTy = StringMap<DwarfStringPoolEntry, BumpPtrAllocator>;
 
-  NonRelocatableStringpool() {
+  NonRelocatableStringpool(
+      SymbolMapTranslator Translator = SymbolMapTranslator())
+      : Translator(Translator) {
     // Legacy dsymutil puts an empty string at the start of the line table.
     EmptyString = getEntry("");
   }
@@ -62,6 +66,7 @@ private:
   uint32_t CurrentEndOffset = 0;
   unsigned NumEntries = 0;
   DwarfStringPoolEntryRef EmptyString;
+  SymbolMapTranslator Translator;
 };
 
 /// Helper for making strong types.
diff --git a/llvm/tools/dsymutil/SymbolMap.cpp b/llvm/tools/dsymutil/SymbolMap.cpp
new file mode 100644 (file)
index 0000000..cab9374
--- /dev/null
@@ -0,0 +1,162 @@
+//===- tools/dsymutil/SymbolMap.cpp ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SymbolMap.h"
+#include "DebugMap.h"
+#include "MachOUtils.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/WithColor.h"
+
+#ifdef __APPLE__
+#include <CoreFoundation/CoreFoundation.h>
+#include <uuid/uuid.h>
+#endif
+
+namespace llvm {
+namespace dsymutil {
+
+StringRef SymbolMapTranslator::operator()(StringRef Input) {
+  if (!Input.startswith("__hidden#") && !Input.startswith("___hidden#"))
+    return Input;
+
+  bool MightNeedUnderscore = false;
+  StringRef Line = Input.drop_front(sizeof("__hidden#") - 1);
+  if (Line[0] == '#') {
+    Line = Line.drop_front();
+    MightNeedUnderscore = true;
+  }
+
+  std::size_t LineNumber = std::numeric_limits<std::size_t>::max();
+  Line.split('_').first.getAsInteger(10, LineNumber);
+  if (LineNumber >= UnobfuscatedStrings.size()) {
+    WithColor::warning() << "reference to a unexisting unobfuscated string "
+                         << Input << ": symbol map mismatch?\n"
+                         << Line << '\n';
+    return Input;
+  }
+
+  const std::string &Translation = UnobfuscatedStrings[LineNumber];
+  if (!MightNeedUnderscore || !MangleNames)
+    return Translation;
+
+  // Objective-C symbols for the MachO symbol table start with a \1. Please see
+  // `CGObjCCommonMac::GetNameForMethod` in clang.
+  if (Translation[0] == 1)
+    return StringRef(Translation).drop_front();
+
+  // We need permanent storage for the string we are about to create. Just
+  // append it to the vector containing translations. This should only happen
+  // during MachO symbol table translation, thus there should be no risk on
+  // exponential growth.
+  UnobfuscatedStrings.emplace_back("_" + Translation);
+  return UnobfuscatedStrings.back();
+}
+
+SymbolMapTranslator SymbolMapLoader::Load(StringRef InputFile,
+                                          const DebugMap &Map) const {
+  if (SymbolMap.empty())
+    return {};
+
+  std::string SymbolMapPath = SymbolMap;
+
+#if __APPLE__
+  // Look through the UUID Map.
+  if (sys::fs::is_directory(SymbolMapPath) && !Map.getUUID().empty()) {
+    uuid_string_t UUIDString;
+    uuid_unparse_upper((const uint8_t *)Map.getUUID().data(), UUIDString);
+
+    SmallString<256> PlistPath(
+        sys::path::parent_path(sys::path::parent_path(InputFile)));
+    sys::path::append(PlistPath, StringRef(UUIDString).str() + ".plist");
+
+    CFStringRef plistFile = CFStringCreateWithCString(
+        kCFAllocatorDefault, PlistPath.c_str(), kCFStringEncodingUTF8);
+    CFURLRef fileURL = CFURLCreateWithFileSystemPath(
+        kCFAllocatorDefault, plistFile, kCFURLPOSIXPathStyle, false);
+    CFReadStreamRef resourceData =
+        CFReadStreamCreateWithFile(kCFAllocatorDefault, fileURL);
+    if (resourceData) {
+      CFReadStreamOpen(resourceData);
+      CFDictionaryRef plist = (CFDictionaryRef)CFPropertyListCreateWithStream(
+          kCFAllocatorDefault, resourceData, 0, kCFPropertyListImmutable,
+          nullptr, nullptr);
+
+      if (plist) {
+        if (CFDictionaryContainsKey(plist, CFSTR("DBGOriginalUUID"))) {
+          CFStringRef OldUUID = (CFStringRef)CFDictionaryGetValue(
+              plist, CFSTR("DBGOriginalUUID"));
+
+          StringRef UUID(CFStringGetCStringPtr(OldUUID, kCFStringEncodingUTF8));
+          SmallString<256> BCSymbolMapPath(SymbolMapPath);
+          sys::path::append(BCSymbolMapPath, UUID.str() + ".bcsymbolmap");
+          SymbolMapPath = BCSymbolMapPath.str();
+        }
+        CFRelease(plist);
+      }
+      CFReadStreamClose(resourceData);
+      CFRelease(resourceData);
+    }
+    CFRelease(fileURL);
+    CFRelease(plistFile);
+  }
+#endif
+
+  if (sys::fs::is_directory(SymbolMapPath)) {
+    SymbolMapPath += (Twine("/") + sys::path::filename(InputFile) + "-" +
+                      MachOUtils::getArchName(Map.getTriple().getArchName()) +
+                      ".bcsymbolmap")
+                         .str();
+  }
+
+  auto ErrOrMemBuffer = MemoryBuffer::getFile(SymbolMapPath);
+  if (auto EC = ErrOrMemBuffer.getError()) {
+    WithColor::warning() << SymbolMapPath << ": " << EC.message()
+                         << ": not unobfuscating.\n";
+    return {};
+  }
+
+  std::vector<std::string> UnobfuscatedStrings;
+  auto &MemBuf = **ErrOrMemBuffer;
+  StringRef Data(MemBuf.getBufferStart(),
+                 MemBuf.getBufferEnd() - MemBuf.getBufferStart());
+  StringRef LHS;
+  std::tie(LHS, Data) = Data.split('\n');
+  bool MangleNames = false;
+
+  // Check version string first.
+  if (!LHS.startswith("BCSymbolMap Version:")) {
+    // Version string not present, warns but try to parse it.
+    WithColor::warning() << SymbolMapPath
+                         << " is missing version string: assuming 1.0.\n";
+    UnobfuscatedStrings.emplace_back(LHS);
+  } else if (LHS.equals("BCSymbolMap Version: 1.0")) {
+    MangleNames = true;
+  } else if (LHS.equals("BCSymbolMap Version: 2.0")) {
+    MangleNames = false;
+  } else {
+    StringRef VersionNum;
+    std::tie(LHS, VersionNum) = LHS.split(':');
+    WithColor::warning() << SymbolMapPath
+                         << " has unsupported symbol map version" << VersionNum
+                         << ": not unobfuscating.\n";
+    return {};
+  }
+
+  while (!Data.empty()) {
+    std::tie(LHS, Data) = Data.split('\n');
+    UnobfuscatedStrings.emplace_back(LHS);
+  }
+
+  return SymbolMapTranslator(std::move(UnobfuscatedStrings), MangleNames);
+}
+
+} // namespace dsymutil
+} // namespace llvm
diff --git a/llvm/tools/dsymutil/SymbolMap.h b/llvm/tools/dsymutil/SymbolMap.h
new file mode 100644 (file)
index 0000000..e3fbdbb
--- /dev/null
@@ -0,0 +1,54 @@
+//=- tools/dsymutil/SymbolMap.h -----------------------------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_DSYMUTIL_SYMBOLMAP_H
+#define LLVM_TOOLS_DSYMUTIL_SYMBOLMAP_H
+
+#include "llvm/ADT/StringRef.h"
+
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace dsymutil {
+class DebugMap;
+
+/// Callable class to unobfuscate strings based on a BCSymbolMap.
+class SymbolMapTranslator {
+public:
+  SymbolMapTranslator() : MangleNames(false) {}
+
+  SymbolMapTranslator(std::vector<std::string> UnobfuscatedStrings,
+                      bool MangleNames)
+      : UnobfuscatedStrings(std::move(UnobfuscatedStrings)),
+        MangleNames(MangleNames) {}
+
+  StringRef operator()(StringRef Input);
+
+  operator bool() const { return !UnobfuscatedStrings.empty(); }
+
+private:
+  std::vector<std::string> UnobfuscatedStrings;
+  bool MangleNames;
+};
+
+/// Class to initialize SymbolMapTranslators from a BCSymbolMap.
+class SymbolMapLoader {
+public:
+  SymbolMapLoader(std::string SymbolMap) : SymbolMap(std::move(SymbolMap)) {}
+
+  SymbolMapTranslator Load(StringRef InputFile, const DebugMap &Map) const;
+
+private:
+  const std::string SymbolMap;
+};
+} // namespace dsymutil
+} // namespace llvm
+
+#endif // LLVM_TOOLS_DSYMUTIL_SYMBOLMAP_H
index 5fe4067..ec8d050 100644 (file)
@@ -59,6 +59,8 @@ static opt<std::string>
     OutputFileOpt("o",
                   desc("Specify the output file. default: <input file>.dwarf"),
                   value_desc("filename"), cat(DsymCategory));
+static alias OutputFileOptA("out", desc("Alias for -o"),
+                            aliasopt(OutputFileOpt));
 
 static opt<std::string> OsoPrependPath(
     "oso-prepend-path",
@@ -100,6 +102,11 @@ static opt<bool> Update(
     init(false), cat(DsymCategory));
 static alias UpdateA("u", desc("Alias for --update"), aliasopt(Update));
 
+static opt<std::string> SymbolMap(
+    "symbol-map",
+    desc("Updates the existing dSYMs inplace using symbol map specified."),
+    value_desc("bcsymbolmap"), cat(DsymCategory));
+
 static cl::opt<AccelTableKind> AcceleratorTable(
     "accelerator", cl::desc("Output accelerator tables."),
     cl::values(clEnumValN(AccelTableKind::Default, "Default",
@@ -273,8 +280,11 @@ static bool verify(llvm::StringRef OutputFile, llvm::StringRef Arch) {
 }
 
 static Expected<std::string> getOutputFileName(llvm::StringRef InputFile) {
+  if (OutputFileOpt == "-")
+    return OutputFileOpt;
+
   // When updating, do in place replacement.
-  if (OutputFileOpt.empty() && Update)
+  if (OutputFileOpt.empty() && (Update || !SymbolMap.empty()))
     return InputFile;
 
   // If a flat dSYM has been requested, things are pretty simple.
@@ -325,6 +335,9 @@ static Expected<LinkOptions> getOptions() {
   Options.PrependPath = OsoPrependPath;
   Options.TheAccelTableKind = AcceleratorTable;
 
+  if (!SymbolMap.empty())
+    Options.Update = true;
+
   if (Assembly)
     Options.FileType = OutputFileType::Assembly;
 
@@ -443,6 +456,13 @@ int main(int argc, char **argv) {
     return 1;
   }
 
+  if (InputFiles.size() > 1 && !SymbolMap.empty() &&
+      !llvm::sys::fs::is_directory(SymbolMap)) {
+    WithColor::error() << "when unobfuscating multiple files, --symbol-map "
+                       << "needs to point to a directory.\n";
+    return 1;
+  }
+
   if (getenv("RC_DEBUG_OPTIONS"))
     PaperTrailWarnings = true;
 
@@ -457,6 +477,8 @@ int main(int argc, char **argv) {
       return 1;
     }
 
+  SymbolMapLoader SymMapLoader(SymbolMap);
+
   for (auto &InputFile : *InputsOrErr) {
     // Dump the symbol table for each input file and requested arch
     if (DumpStab) {
@@ -511,6 +533,9 @@ int main(int argc, char **argv) {
       if (DumpDebugMap)
         continue;
 
+      if (!SymbolMap.empty())
+        OptionsOrErr->Translator = SymMapLoader.Load(InputFile, *Map);
+
       if (Map->begin() == Map->end())
         WithColor::warning()
             << "no debug symbols in executable (-arch "