[llvm-objdump] Fix jumptable detection when disassembling Mach-O binaries
authorKuba Mracek <mracek@apple.com>
Thu, 1 Aug 2019 15:51:14 +0000 (15:51 +0000)
committerKuba Mracek <mracek@apple.com>
Thu, 1 Aug 2019 15:51:14 +0000 (15:51 +0000)
- Add LC_SEGMENT_64 handling in getSectionsAndSymbols to be able to find the base segment address from 64-bit Mach-O binaries.
- Add "data in code" detection into the !symbolTableWorked case, extract it into a separate function.
- Fix uninitialized variable usage on BaseSegmentAddress (initialize to 0).
- Add test.

Differential Revision: https://reviews.llvm.org/D65491

llvm-svn: 367578

llvm/test/tools/llvm-objdump/macho-data-in-code.ll [new file with mode: 0644]
llvm/tools/llvm-objdump/MachODump.cpp

diff --git a/llvm/test/tools/llvm-objdump/macho-data-in-code.ll b/llvm/test/tools/llvm-objdump/macho-data-in-code.ll
new file mode 100644 (file)
index 0000000..57877da
--- /dev/null
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple x86_64-apple-darwin -filetype=obj -O0 %s -o %t.o
+; RUN: llvm-objdump -macho -disassemble -no-show-raw-insn %t.o | FileCheck %s
+
+; CHECK: .long {{[0-9]+}}      @ KIND_JUMP_TABLE32
+; CHECK: .long {{[0-9]+}}      @ KIND_JUMP_TABLE32
+; CHECK: .long {{[0-9]+}}      @ KIND_JUMP_TABLE32
+; CHECK: .long {{[0-9]+}}      @ KIND_JUMP_TABLE32
+; CHECK-NOT: invalid instruction encoding
+; CHECK-NOT: <unknown>
+
+; ModuleID = '-'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+; Function Attrs: noinline nounwind optnone ssp uwtable
+define void @switchfunc(i32 %i) {
+  switch i32 %i, label %out [
+    i32 0, label %case1
+    i32 1, label %case2
+    i32 2, label %case3
+    i32 3, label %case4
+  ]
+
+case1:
+  call void @foo()
+  br label %out
+
+case2:
+  call void @bar()
+  br label %out
+
+case3:
+  call void @foo()
+  br label %out
+
+case4:
+  call void @bar()
+  br label %out
+
+out:
+  ret void
+}
+
+declare void @foo()
+declare void @bar()
index 58ff7be..a3f3a9d 100644 (file)
@@ -393,10 +393,40 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
         BaseSegmentAddressSet = true;
         BaseSegmentAddress = SLC.vmaddr;
       }
+    } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
+      MachO::segment_command_64 SLC = MachOObj->getSegment64LoadCommand(Command);
+      StringRef SegName = SLC.segname;
+      if (!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
+        BaseSegmentAddressSet = true;
+        BaseSegmentAddress = SLC.vmaddr;
+      }
     }
   }
 }
 
+static bool DumpAndSkipDataInCode(uint64_t PC, const uint8_t *bytes,
+                                 DiceTable &Dices, uint64_t &InstSize) {
+  // Check the data in code table here to see if this is data not an
+  // instruction to be disassembled.
+  DiceTable Dice;
+  Dice.push_back(std::make_pair(PC, DiceRef()));
+  dice_table_iterator DTI =
+      std::search(Dices.begin(), Dices.end(), Dice.begin(), Dice.end(),
+                  compareDiceTableEntries);
+  if (DTI != Dices.end()) {
+    uint16_t Length;
+    DTI->second.getLength(Length);
+    uint16_t Kind;
+    DTI->second.getKind(Kind);
+    InstSize = DumpDataInCode(bytes, Length, Kind);
+    if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) &&
+        (PC == (DTI->first + Length - 1)) && (Length & 1))
+      InstSize++;
+    return true;
+  }
+  return false;
+}
+
 static void printRelocationTargetName(const MachOObjectFile *O,
                                       const MachO::any_relocation_info &RE,
                                       raw_string_ostream &Fmt) {
@@ -7203,7 +7233,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
   std::vector<SectionRef> Sections;
   std::vector<SymbolRef> Symbols;
   SmallVector<uint64_t, 8> FoundFns;
-  uint64_t BaseSegmentAddress;
+  uint64_t BaseSegmentAddress = 0;
 
   getSectionsAndSymbols(MachOOF, Sections, Symbols, FoundFns,
                         BaseSegmentAddress);
@@ -7496,24 +7526,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
         if (!NoShowRawInsn || Arch == Triple::arm)
           outs() << "\t";
 
-        // Check the data in code table here to see if this is data not an
-        // instruction to be disassembled.
-        DiceTable Dice;
-        Dice.push_back(std::make_pair(PC, DiceRef()));
-        dice_table_iterator DTI =
-            std::search(Dices.begin(), Dices.end(), Dice.begin(), Dice.end(),
-                        compareDiceTableEntries);
-        if (DTI != Dices.end()) {
-          uint16_t Length;
-          DTI->second.getLength(Length);
-          uint16_t Kind;
-          DTI->second.getKind(Kind);
-          Size = DumpDataInCode(Bytes.data() + Index, Length, Kind);
-          if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) &&
-              (PC == (DTI->first + Length - 1)) && (Length & 1))
-            Size++;
+        if (DumpAndSkipDataInCode(PC, Bytes.data() + Index, Dices, Size))
           continue;
-        }
 
         SmallVector<char, 64> AnnotationsBytes;
         raw_svector_ostream Annotations(AnnotationsBytes);
@@ -7588,6 +7602,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
         MCInst Inst;
 
         uint64_t PC = SectAddress + Index;
+
+        if (DumpAndSkipDataInCode(PC, Bytes.data() + Index, Dices, InstSize))
+          continue;
+
         SmallVector<char, 64> AnnotationsBytes;
         raw_svector_ostream Annotations(AnnotationsBytes);
         if (DisAsm->getInstruction(Inst, InstSize, Bytes.slice(Index), PC,