[llvm-objdump] Avoid using mapping symbols as branch target labels
authorKristina Bessonova <kbessonova@accesssoftek.com>
Fri, 18 Nov 2022 17:56:51 +0000 (19:56 +0200)
committerKristina Bessonova <kbessonova@accesssoftek.com>
Tue, 6 Dec 2022 10:19:12 +0000 (12:19 +0200)
The main motivation for this change is to avoid ambiguity because
mapping symbol names may not be unique across a binary and do not allow uniquely
identifying target address. So that mapping symbols used as branch target
labels make llvm-objdump output less readable.

Another point is that mapping symbols sometimes appear in
non-allocatable sections, like debug info sections which make objdump
output even more confusing.

For example, a small AArch64 executable may contain plenty of `$d[.*]`
symbols and none of them would be useful as a label for resolving
a branch or a memory operand target address:

```
  0000000000000254 l       .note.ABI-tag 0000000000000000 $d
  00000000000008d4 l       .eh_frame            0000000000000000 $d
  0000000000000868 l       .rodata              0000000000000000 $d
  0000000000011028 l       .data                0000000000000000 $d
  0000000000010db8 l       .fini_array          0000000000000000 $d
  0000000000010db0 l       .init_array          0000000000000000 $d
  00000000000008e8 l       .eh_frame            0000000000000000 $d
  0000000000011034 l       .bss                 0000000000000000 $d
```

Note that GNU objdump doesn't use mapping symbols as branch target
labels for all targets that support such symbols (ARM, AArch64, CSKY).

Differential Revision: https://reviews.llvm.org/D139131

lld/test/ELF/aarch64-range-thunk-extension-plt32.s
llvm/test/CodeGen/AArch64/callbr-asm-obj-file.ll
llvm/test/MC/ARM/branch-disassemble.s
llvm/test/MC/Disassembler/ARM/mve-lol.txt
llvm/tools/llvm-objdump/llvm-objdump.cpp

index 20b497f..bac52c7 100644 (file)
@@ -13,7 +13,7 @@
 
 // The thunk redirects to the address of callee.
 // CHECK-LABEL: <__AArch64AbsLongThunk_callee>:
-// CHECK-NEXT:    10004:       ldr     x16, 0x1000c <$d>
+// CHECK-NEXT:    10004:       ldr     x16, 0x1000c <__AArch64AbsLongThunk_callee+0x8>
 // CHECK-NEXT:    10008:       br      x16
 
 // CHECK-LABEL: <$d>:
index 7125264..4bdcda7 100644 (file)
@@ -9,7 +9,7 @@
 ; CHECK-LABEL: <test1>:
 ; CHECK-LABEL: <$d.1>:
 ; CHECK-LABEL: <$x.2>:
-; CHECK-NEXT:    b 0x2c <$x.4>
+; CHECK-NEXT:    b 0x2c <test1+0x2c>
 ; CHECK-LABEL: <$x.4>:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ldr x30, [sp], #16
@@ -72,7 +72,7 @@ define hidden i32 @test2() local_unnamed_addr {
 ; CHECK-LABEL: <test3>:
 ; CHECK-LABEL: <$d.9>:
 ; CHECK-LABEL: <$x.10>:
-; CHECK-NEXT:    b {{.*}} <$x.12>
+; CHECK-NEXT:    b {{.*}} <test3+0x34>
 ; CHECK-LABEL: <$x.12>:
 ; CHECK-NEXT:    ldr x30, [sp], #16
 ; CHECK-NEXT:    ret
index 6a98953..93c80a1 100644 (file)
@@ -6,9 +6,10 @@
 @ RUN:   | llvm-objdump --no-print-imm-hex --mcpu=cortex-m3 --triple=thumbv7m-arm-none-eabi -d - \
 @ RUN:   | FileCheck %s -check-prefix CHECK-THUMB
 
+foo:
 b.w .Lbranch
-@ CHECK-ARM: b 0xc <$a.0+0xc> @ imm = #4
-@ CHECK-THUMB: b.w 0xc <$t.0+0xc> @ imm = #8
+@ CHECK-ARM: b 0xc <foo+0xc> @ imm = #4
+@ CHECK-THUMB: b.w 0xc <foo+0xc> @ imm = #8
 adds r0, r1, #42
 adds r1, r2, #42
 .Lbranch:
index 075ca0e..4274752 100644 (file)
@@ -2,48 +2,49 @@
 # RUN:   llvm-objdump --no-print-imm-hex -d -r --triple=thumbv8.1m.main --mattr=+mve - | FileCheck %s
 
 # Test to check that we can evaluate branches and their targets, i.e.
-# checking that we see branch targets annotations like <$t.0+0xc> in the
+# checking that we see branch targets annotations like <foo+0xc> in the
 # disassembly.
 
-# CHECK:  wls lr, r3, 0xc <$t.0+0xc>          @ imm = #8
+foo:
+# CHECK:  wls lr, r3, 0xc <foo+0xc>          @ imm = #8
 # CHECK:  vmov  q0, q1
-# CHECK:  le  lr, 0x4 <$t.0+0x4>              @ imm = #-8
+# CHECK:  le  lr, 0x4 <foo+0x4>              @ imm = #-8
 
           wls lr, r3, #8
           vmov  q0, q1
           le  lr, #-8
 
 
-# CHECK:  wlstp.8 lr, r3, 0x18 <$t.0+0x18>    @ imm = #8
+# CHECK:  wlstp.8 lr, r3, 0x18 <foo+0x18>    @ imm = #8
 # CHECK:  vmov  q0, q1
-# CHECK:  letp  lr, 0x10 <$t.0+0x10>          @ imm = #-8
+# CHECK:  letp  lr, 0x10 <foo+0x10>          @ imm = #-8
 
           wlstp.8  lr, r3, #8
           vmov  q0, q1
           letp  lr, #-8
 
 
-# CHECK:  wlstp.16  lr, r3, 0x24 <$t.0+0x24>  @ imm = #8
+# CHECK:  wlstp.16  lr, r3, 0x24 <foo+0x24>  @ imm = #8
 # CHECK:  vmov  q0, q1
-# CHECK:  letp  lr, 0x1c <$t.0+0x1c>          @ imm = #-8
+# CHECK:  letp  lr, 0x1c <foo+0x1c>          @ imm = #-8
 
           wlstp.16  lr, r3, #8
           vmov  q0, q1
           letp  lr, #-8
 
 
-# CHECK:  wlstp.32  lr, r3, 0x30 <$t.0+0x30>  @ imm = #8
+# CHECK:  wlstp.32  lr, r3, 0x30 <foo+0x30>  @ imm = #8
 # CHECK:  vmov  q0, q1
-# CHECK:  letp  lr, 0x28 <$t.0+0x28>          @ imm = #-8
+# CHECK:  letp  lr, 0x28 <foo+0x28>          @ imm = #-8
 
           wlstp.32  lr, r3, #8
           vmov  q0, q1
           letp  lr, #-8
 
 
-# CHECK:  wlstp.64  lr, r3, 0x3c <$t.0+0x3c>  @ imm = #8
+# CHECK:  wlstp.64  lr, r3, 0x3c <foo+0x3c>  @ imm = #8
 # CHECK:  vmov  q0, q1
-# CHECK:  letp  lr, 0x34 <$t.0+0x34>          @ imm = #-8
+# CHECK:  letp  lr, 0x34 <foo+0x34>          @ imm = #-8
 
           wlstp.64  lr, r3, #8
           vmov  q0, q1
index 422e6e5..ab66e1b 100644 (file)
@@ -463,6 +463,11 @@ static bool hasMappingSymbols(const ObjectFile &Obj) {
   return isArmElf(Obj) || isAArch64Elf(Obj) || isCSKYElf(Obj) ;
 }
 
+static bool isMappingSymbol(const SymbolInfoTy &Sym) {
+  return Sym.Name.startswith("$d") || Sym.Name.startswith("$x") ||
+         Sym.Name.startswith("$a") || Sym.Name.startswith("$t");
+}
+
 static void printRelocation(formatted_raw_ostream &OS, StringRef FileName,
                             const RelocationRef &Rel, uint64_t Address,
                             bool Is64Bits) {
@@ -1877,10 +1882,17 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
                 auto It = llvm::partition_point(
                     *TargetSymbols,
                     [=](const SymbolInfoTy &O) { return O.Addr <= Target; });
-                if (It != TargetSymbols->begin()) {
-                  TargetSym = &*(It - 1);
-                  break;
+                while (It != TargetSymbols->begin()) {
+                  --It;
+                  // Skip mapping symbols to avoid possible ambiguity as they
+                  // do not allow uniquely identifying the target address.
+                  if (!hasMappingSymbols(Obj) || !isMappingSymbol(*It)) {
+                    TargetSym = &*It;
+                    break;
+                  }
                 }
+                if (TargetSym)
+                  break;
               }
 
               // Print the labels corresponding to the target if there's any.