[LLD] [COFF] Fix mingw comdat associativity for leader symbols with a different name
authorMartin Storsjö <martin@martin.st>
Sat, 25 Jul 2020 09:25:19 +0000 (12:25 +0300)
committerMartin Storsjö <martin@martin.st>
Mon, 27 Jul 2020 14:32:08 +0000 (17:32 +0300)
For a weak symbol func in a comdat, the actual leader symbol ends up
named like .weak.func.default*. Likewise, for stdcall on i386, the symbol
may be named _func@4, while the section suffix only is "func", which the
previous implementation didn't handle.

This fixes unwinding through weak functions when using
-ffunction-sections in mingw environments.

Differential Revision: https://reviews.llvm.org/D84607

lld/COFF/InputFiles.cpp
lld/test/COFF/associative-comdat-mingw-i386.s
lld/test/COFF/associative-comdat-mingw-weak.s [new file with mode: 0644]

index 0adc2b9..4346b3a 100644 (file)
@@ -348,13 +348,13 @@ void ObjFile::recordPrevailingSymbolForMingw(
   // of the section chunk we actually include instead of discarding it,
   // add the symbol to a map to allow using it for implicitly
   // associating .[px]data$<func> sections to it.
+  // Use the suffix from the .text$<func> instead of the leader symbol
+  // name, for cases where the names differ (i386 mangling/decorations,
+  // cases where the leader is a weak symbol named .weak.func.default*).
   int32_t sectionNumber = sym.getSectionNumber();
   SectionChunk *sc = sparseChunks[sectionNumber];
   if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
-    StringRef name;
-    name = check(coffObj->getSymbolName(sym));
-    if (getMachineType() == I386)
-      name.consume_front("_");
+    StringRef name = sc->getSectionName().split('$').second;
     prevailingSectionMap[name] = sectionNumber;
   }
 }
index 8d89478..3ba8c1c 100644 (file)
@@ -1,10 +1,14 @@
 # REQUIRES: x86
 
-# RUN: llvm-mc -triple=i686-windows-gnu %s -filetype=obj -o %t.obj
+# RUN: llvm-mc -triple=i686-windows-gnu %s -defsym stdcall=0 -filetype=obj -o %t.obj
 
 # RUN: lld-link -lldmingw -entry:main %t.obj -out:%t.exe
 # RUN: llvm-objdump -s %t.exe | FileCheck %s
 
+# RUN: llvm-mc -triple=i686-windows-gnu %s -defsym stdcall=1 -filetype=obj -o %t.stdcall.obj
+# RUN: lld-link -lldmingw -entry:main %t.stdcall.obj -out:%t.stdcall.exe
+# RUN: llvm-objdump -s %t.stdcall.exe | FileCheck %s
+
 # Check that the .eh_frame comdat was included, even if it had no symbols,
 # due to associativity with the symbol _foo.
 
         .globl          _main
         .p2align        4, 0x90
 _main:
+.if stdcall==0
         call            _foo
+.else
+        call            _foo@0
+.endif
         ret
 
         .section        .eh_frame$foo,"dr"
         .linkonce       discard
         .byte           0x42
 
+.if stdcall==0
         .def            _foo;
+.else
+        .def            _foo@0;
+.endif
         .scl            2;
         .type           32;
         .endef
+.if stdcall==0
         .section        .text$foo,"xr",discard,_foo
         .globl          _foo
         .p2align        4
 _foo:
+.else
+        .section        .text$foo,"xr",discard,_foo@0
+        .globl          _foo@0
+        .p2align        4
+_foo@0:
+.endif
         ret
diff --git a/lld/test/COFF/associative-comdat-mingw-weak.s b/lld/test/COFF/associative-comdat-mingw-weak.s
new file mode 100644 (file)
index 0000000..80c738b
--- /dev/null
@@ -0,0 +1,63 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -triple=x86_64-windows-gnu %s -filetype=obj -o %t.obj
+# RUN: llvm-readobj --symbols %t.obj | FileCheck %s --check-prefix=SYMBOL
+
+# RUN: lld-link -lldmingw -entry:main %t.obj -out:%t.exe -lldmap:%t.map -verbose
+# RUN: llvm-readobj --sections %t.exe | FileCheck %s
+
+# CHECK: Sections [
+# CHECK:   Section {
+# CHECK:     Number: 2
+# CHECK-LABEL:     Name: .rdata (2E 72 64 61 74 61 00 00)
+#             This is the critical check to show that .xdata$foo was
+#             retained, while .xdata$bar wasn't. This *must* be 0x24
+#             (0x4 for the .xdata section and 0x20 for the
+#             .ctors/.dtors headers/ends).
+# CHECK-NEXT:     VirtualSize: 0x24
+
+# Check that the weak symbols still are emitted as it was when the test was
+# written, to make sure the test still actually tests what was intended.
+
+# SYMBOL:       Symbol {
+# SYMBOL:         Name: foo
+# SYMBOL-NEXT:    Value: 0
+# SYMBOL-NEXT:    Section: IMAGE_SYM_UNDEFINED (0)
+# SYMBOL-NEXT:    BaseType: Null (0x0)
+# SYMBOL-NEXT:    ComplexType: Null (0x0)
+# SYMBOL-NEXT:    StorageClass: WeakExternal (0x69)
+# SYMBOL-NEXT:    AuxSymbolCount: 1
+# SYMBOL-NEXT:    AuxWeakExternal {
+# SYMBOL-NEXT:      Linked: .weak.foo.default.main (19)
+# SYMBOL-NEXT:      Search: Alias (0x3)
+# SYMBOL-NEXT:    }
+# SYMBOL-NEXT:  }
+
+        .text
+        .globl          main
+main:
+        call            foo
+        retq
+
+# See associative-comdat-mingw.s for the general setup. Here, the leader
+# symbols are weak, which causes the functions foo and bar to be undefined
+# weak externals, while the actual leader symbols are named like
+# .weak.foo.default.main.
+
+        .section        .xdata$foo,"dr"
+        .linkonce       discard
+        .long           42
+
+        .section        .xdata$bar,"dr"
+        .linkonce       discard
+        .long           43
+
+        .section        .text$foo,"xr",discard,foo
+        .weak           foo
+foo:
+        ret
+
+        .section        .text$bar,"xr",discard,bar
+        .weak           bar
+bar:
+        ret