[llvm-objdump] Exclude __mh_*_header symbols during MachO disassembly
authorGreg McGary <gkm@fb.com>
Mon, 3 May 2021 02:08:02 +0000 (19:08 -0700)
committerGreg McGary <gkm@fb.com>
Wed, 12 May 2021 13:39:14 +0000 (06:39 -0700)
`__mh_(execute|dylib|dylinker|bundle|preload|object)_header` are special symbols whose values hold the VMA of the Mach header to support introspection. They are attached to the first section in `__TEXT`, even though their addresses are outside `__TEXT`, and they do not refer to code.

It is normally harmless, but when the first section of `__TEXT` has no other symbols, `__mh_*_header` is considered by the disassembler when determing function boundaries. Since `__mh_*_header` refers to an address outside `__TEXT`, the boundary determination fails and disassembly quits.

Since `__TEXT,__text` normally has symbols, this bug is obscured. Experiments placing `__stubs` and `__stub_helper` first exposed the bug, since neither has symbols.

Differential Revision: https://reviews.llvm.org/D101786

llvm/test/tools/llvm-objdump/MachO/no-text-symbols-disassembly.test [new file with mode: 0644]
llvm/tools/llvm-objdump/llvm-objdump.cpp

diff --git a/llvm/test/tools/llvm-objdump/MachO/no-text-symbols-disassembly.test b/llvm/test/tools/llvm-objdump/MachO/no-text-symbols-disassembly.test
new file mode 100644 (file)
index 0000000..d01f860
--- /dev/null
@@ -0,0 +1,92 @@
+## Verify that we see dissassembler output even when there are no
+## symbols in __TEXT,__text.
+
+# RUN: yaml2obj %s -o %t.o
+# RUN: llvm-objdump -d %t.o | FileCheck %s
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000002
+  ncmds:           4
+  sizeofcmds:      544
+  flags:           0x00000001
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __PAGEZERO
+    vmaddr:          0
+    vmsize:          4294967296
+    fileoff:         0
+    filesize:        0
+    maxprot:         0
+    initprot:        0
+    nsects:          0
+    flags:           0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         __TEXT
+    vmaddr:          4294967296
+    vmsize:          4096
+    fileoff:         0
+    filesize:        4096
+    maxprot:         5
+    initprot:        5
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000100000FF7
+        size:            9
+        offset:          0x00000FF7
+        align:           0
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __LINKEDIT
+    vmaddr:          4294971392
+    vmsize:          4096
+    fileoff:         4096
+    filesize:        64
+    maxprot:         1
+    initprot:        1
+    nsects:          0
+    flags:           0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          4096
+    nsyms:           2
+    stroff:          4128
+    strsize:         32
+LinkEditData:
+  NameList:
+    - n_strx:          2
+      n_type:          0x03
+      n_sect:          1
+      n_desc:          16
+      n_value:         4294967296
+    - n_strx:          22
+      n_type:          0x01
+      n_sect:          0
+      n_desc:          0
+      n_value:         0
+  StringTable:
+    - ' '
+    - __mh_execute_header
+    - start
+    - ''
+    - ''
+    - ''
+    - ''
+...
+
+CHECK: Disassembly of section __TEXT,__text:
index c13ccfd..3689dd4 100644 (file)
@@ -1076,10 +1076,15 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
     if (Obj->isELF() && getElfSymbolType(Obj, Symbol) == ELF::STT_SECTION)
       continue;
 
-    // Don't ask a Mach-O STAB symbol for its section unless you know that
-    // STAB symbol's section field refers to a valid section index. Otherwise
-    // the symbol may error trying to load a section that does not exist.
     if (MachO) {
+      // __mh_(execute|dylib|dylinker|bundle|preload|object)_header are special
+      // symbols that support MachO header introspection. They do not bind to
+      // code locations and are irrelevant for disassembly.
+      if (NameOrErr->startswith("__mh_") && NameOrErr->endswith("_header"))
+        continue;
+      // Don't ask a Mach-O STAB symbol for its section unless you know that
+      // STAB symbol's section field refers to a valid section index. Otherwise
+      // the symbol may error trying to load a section that does not exist.
       DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
       uint8_t NType = (MachO->is64Bit() ?
                        MachO->getSymbol64TableEntry(SymDRI).n_type: