X86: balance the frame prologue and epilogue on Win64
authorSaleem Abdulrasool <compnerd@compnerd.org>
Mon, 14 Jun 2021 00:39:19 +0000 (17:39 -0700)
committerSaleem Abdulrasool <compnerd@compnerd.org>
Wed, 16 Jun 2021 03:13:52 +0000 (20:13 -0700)
This was broken in ba1509da7b89c850c89f0f98afbab375794cd3c8.  The Win64
frame would not perform the setup of the Swift async context parameter
but would tear down the setup in the epilogue resulting in crashes.
This ensures that we do the full setup when we do the tear down.
Although this is non-conforming to the Win64 calling convention, it
corrects the setup and exposes the actual issue that the change
introduced: incorrect frame setup.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D104246

llvm/lib/Target/X86/X86FrameLowering.cpp
llvm/test/CodeGen/X86/swift-async-win64.ll [new file with mode: 0644]
llvm/test/CodeGen/X86/swifttail-async-win64.ll [new file with mode: 0644]

index 2ad2e1f..4cde797 100644 (file)
@@ -1482,20 +1482,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
           .setMIFlag(MachineInstr::FrameSetup);
     }
 
-    if (!IsWin64Prologue && !IsFunclet) {
-      // Update EBP with the new base value.
-      if (!X86FI->hasSwiftAsyncContext()) {
-        BuildMI(MBB, MBBI, DL,
-                TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
-                FramePtr)
-            .addReg(StackPtr)
-            .setMIFlag(MachineInstr::FrameSetup);
-      } else {
+    if (!IsFunclet) {
+      if (X86FI->hasSwiftAsyncContext()) {
+        const auto &Attrs = MF.getFunction().getAttributes();
+
         // Before we update the live frame pointer we have to ensure there's a
         // valid (or null) asynchronous context in its slot just before FP in
         // the frame record, so store it now.
-        const auto &Attrs = MF.getFunction().getAttributes();
-
         if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
           // We have an initial context in r14, store it just before the frame
           // pointer.
@@ -1510,6 +1503,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
               .addImm(0)
               .setMIFlag(MachineInstr::FrameSetup);
         }
+
+        if (NeedsWinCFI) {
+          HasWinCFI = true;
+          BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
+              .addImm(X86::R14)
+              .setMIFlag(MachineInstr::FrameSetup);
+        }
+
         BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
             .addUse(X86::RSP)
             .addImm(1)
@@ -1523,21 +1524,32 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
             .setMIFlag(MachineInstr::FrameSetup);
       }
 
-      if (NeedsDwarfCFI) {
-        // Mark effective beginning of when frame pointer becomes valid.
-        // Define the current CFA to use the EBP/RBP register.
-        unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
-        BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister(
-                                    nullptr, DwarfFramePtr));
-      }
+      if (!IsWin64Prologue && !IsFunclet) {
+        // Update EBP with the new base value.
+        if (!X86FI->hasSwiftAsyncContext())
+          BuildMI(MBB, MBBI, DL,
+                  TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
+                  FramePtr)
+              .addReg(StackPtr)
+              .setMIFlag(MachineInstr::FrameSetup);
 
-      if (NeedsWinFPO) {
-        // .cv_fpo_setframe $FramePtr
-        HasWinCFI = true;
-        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
-            .addImm(FramePtr)
-            .addImm(0)
-            .setMIFlag(MachineInstr::FrameSetup);
+        if (NeedsDwarfCFI) {
+          // Mark effective beginning of when frame pointer becomes valid.
+          // Define the current CFA to use the EBP/RBP register.
+          unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+          BuildCFI(
+              MBB, MBBI, DL,
+              MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
+        }
+
+        if (NeedsWinFPO) {
+          // .cv_fpo_setframe $FramePtr
+          HasWinCFI = true;
+          BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
+              .addImm(FramePtr)
+              .addImm(0)
+              .setMIFlag(MachineInstr::FrameSetup);
+        }
       }
     }
   } else {
diff --git a/llvm/test/CodeGen/X86/swift-async-win64.ll b/llvm/test/CodeGen/X86/swift-async-win64.ll
new file mode 100644 (file)
index 0000000..823b417
--- /dev/null
@@ -0,0 +1,108 @@
+; RUN: llc -mtriple x86_64-unknown-windows-msvc %s -o - | FileCheck %s -check-prefix CHECK64
+; RUN: llc -mtriple i686-windows-msvc %s -o - | FileCheck %s -check-prefix CHECK32
+
+define void @simple(i8* swiftasync %context) "frame-pointer"="all" {
+  ret void
+}
+
+; CHECK64-LABEL: simple:
+; CHECK64: btsq    $60, %rbp
+; CHECK64: pushq   %rbp
+; CHECK64: pushq   %r14
+; CHECK64: leaq    8(%rsp), %rbp
+; [...]
+; CHECK64: addq    $16, %rsp
+; CHECK64: popq    %rbp
+; CHECK64: btrq    $60, %rbp
+; CHECK64: retq
+
+; CHECK32-LABEL simple:
+; CHECK32: movl    8(%ebp), [[TMP:%.*]]
+; CHECK32: movl    [[TMP]], {{.*}}(%ebp)
+
+define void @more_csrs(i8* swiftasync %context) "frame-pointer"="all" {
+  call void asm sideeffect "", "~{r15}"()
+  ret void
+}
+
+; CHECK64-LABEL: more_csrs:
+; CHECK64: btsq    $60, %rbp
+; CHECK64: pushq   %rbp
+; CHECK64: .seh_pushreg %rbp
+; CHECK64: pushq   %r14
+; CHECK64: .seh_pushreg %r14
+; CHECK64: leaq    8(%rsp), %rbp
+; CHECK64: subq    $8, %rsp
+; CHECK64: pushq   %r15
+; CHECK64: .seh_pushreg %r15
+; [...]
+; CHECK64: popq    %r15
+; CHECK64: addq    $16, %rsp
+; CHECK64: popq    %rbp
+; CHECK64: btrq    $60, %rbp
+; CHECK64: retq
+
+declare void @f(i32*)
+
+define void @locals(i8* swiftasync %context) "frame-pointer"="all" {
+  %var = alloca i32, i32 10
+  call void @f(i32* %var)
+  ret void
+}
+
+; CHECK64-LABEL: locals:
+; CHECK64: btsq    $60, %rbp
+; CHECK64: pushq   %rbp
+; CHECK64: .seh_pushreg %rbp
+; CHECK64: pushq   %r14
+; CHECK64: .seh_pushreg %r14
+; CHECK64: leaq    8(%rsp), %rbp
+; CHECK64: subq    $88, %rsp
+
+; CHECK64: leaq    -48(%rbp), %rcx
+; CHECK64: callq   f
+
+; CHECK64: addq    $80, %rsp
+; CHECK64: addq    $16, %rsp
+; CHECK64: popq    %rbp
+; CHECK64: btrq    $60, %rbp
+; CHECK64: retq
+
+define void @use_input_context(i8* swiftasync %context, i8** %ptr) "frame-pointer"="all" {
+  store i8* %context, i8** %ptr
+  ret void
+}
+
+; CHECK64-LABEL: use_input_context:
+; CHECK64: movq    %r14, (%rcx)
+
+declare i8** @llvm.swift.async.context.addr()
+
+define i8** @context_in_func() "frmae-pointer"="non-leaf" {
+  %ptr = call i8** @llvm.swift.async.context.addr()
+  ret i8** %ptr
+}
+
+; CHECK64-LABEL: context_in_func:
+; CHECK64: leaq    -8(%rbp), %rax
+
+; CHECK32-LABEL: context_in_func:
+; CHECK32: movl    %esp, %eax
+
+define void @write_frame_context(i8* swiftasync %context, i8* %new_context) "frame-pointer"="non-leaf" {
+  %ptr = call i8** @llvm.swift.async.context.addr()
+  store i8* %new_context, i8** %ptr
+  ret void
+}
+
+; CHECK64-LABEL: write_frame_context:
+; CHECK64: movq    %rbp, [[TMP:%.*]]
+; CHECK64: subq    $8, [[TMP]]
+; CHECK64: movq    %rcx, ([[TMP]])
+
+define void @simple_fp_elim(i8* swiftasync %context) "frame-pointer"="non-leaf" {
+  ret void
+}
+
+; CHECK64-LABEL: simple_fp_elim:
+; CHECK64-NOT: btsq
diff --git a/llvm/test/CodeGen/X86/swifttail-async-win64.ll b/llvm/test/CodeGen/X86/swifttail-async-win64.ll
new file mode 100644 (file)
index 0000000..429d48b
--- /dev/null
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple x86_64-unknown-windows-msvc %s -o - | FileCheck %s
+
+declare swifttailcc void @callee()
+
+define swifttailcc void @swift_tail() {
+  call void asm "","~{r14}"()
+  tail call swifttailcc void @callee()
+  ret void
+}
+
+; CHECK-LABEL: swift_tail:
+; CHECK-NOT: popq %r14
+
+define void @has_swift_async(i8* swiftasync %contet) {
+  call void asm "","~{r14}"()
+  ret void
+}
+
+; CHECK-LABEL: has_swift_async:
+; CHECK: popq    %r14
+
+; It's impossible to get a tail call from a function without a swiftasync
+; parameter to one with unless the CC is swifttailcc. So it doesn't matter
+; whether r14 is callee-saved in this case.
+define void @calls_swift_async() {
+  call void asm "","~{r14}"()
+  tail call void @has_swift_async(i8* swiftasync null)
+  ret void
+}
+
+; CHECK-LABEL: calls_swift_async:
+; CHECK-NOT jmpq has_swift_async
+
+define swifttailcc void @no_preserve_swiftself() {
+  call void asm "","~{r13}"()
+  ret void
+}
+
+; CHECK-LABEL: no_preserve_swiftself:
+; CHECK-NOT: popq %r13
+
+declare swifttailcc i8* @swift_self(i8* swiftasync %context, i8* swiftself %self)
+
+define swiftcc i8* @call_swift_self(i8* swiftself %self, i8* %context) {
+  ; call void asm "","~{r13}"()
+  ; We get a push r13 but why not with the call below?
+  %res = call swifttailcc i8* @swift_self(i8* swiftasync %context, i8* swiftself %self)
+  ret i8* %res
+}
+
+; CHECK-LABEL: call_swift_self:
+; CHECK: pushq %r13