From 89d0564b6a426afe234f15d7f616c3987f9d5298 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sat, 21 Feb 2015 01:04:47 +0000 Subject: [PATCH] Win64: Stack alignment constraints aren't applied during SET_FPREG Stack realignment occurs after the prolog, not during, for Win64. Because of this, don't factor in the maximum stack alignment when establishing a frame pointer. This fixes PR22572. llvm-svn: 230113 --- llvm/lib/Target/X86/X86FrameLowering.cpp | 61 +++++++++++++------------------- llvm/test/CodeGen/X86/win64_eh.ll | 20 +++++------ llvm/test/CodeGen/X86/win64_frame.ll | 23 ++++++------ 3 files changed, 47 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 713b4fe..042a57e8 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -458,7 +458,7 @@ static unsigned calculateSetFPREG(uint64_t SPAdjust) { const uint64_t Win64MaxSEHOffset = 128; uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. - return static_cast(RoundUpToAlignment(SEHFrameOffset, 16)); + return SEHFrameOffset & -16; } // If we're forcing a stack realignment we can't rely on just the frame @@ -672,14 +672,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // If required, include space for extra hidden slot for stashing base pointer. if (X86FI->getRestoreBasePointer()) FrameSize += SlotSize; - if (RegInfo->needsStackRealignment(MF)) { - // Callee-saved registers are pushed on stack before the stack - // is realigned. - FrameSize -= X86FI->getCalleeSavedFrameSize(); - NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); - } else { - NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); - } + + NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); + + // Callee-saved registers are pushed on stack before the stack is realigned. + if (RegInfo->needsStackRealignment(MF) && !IsWinEH) + NumBytes = RoundUpToAlignment(NumBytes, MaxAlign); // Get the offset of the stack slot for the EBP register, which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. @@ -801,7 +799,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // responsible for adjusting the stack pointer. Touching the stack at 4K // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. - if (NumBytes >= StackProbeSize && UseStackProbe) { + uint64_t AlignedNumBytes = NumBytes; + if (IsWinEH && RegInfo->needsStackRealignment(MF)) + AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); + if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { // Check whether EAX is livein for this function. bool isEAXAlive = isEAXLiveIn(MF); @@ -1005,14 +1006,12 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; - if (RegInfo->needsStackRealignment(MF)) { - // Callee-saved registers were pushed on stack before the stack - // was realigned. - FrameSize -= CSSize; - NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; - } else { - NumBytes = FrameSize - CSSize; - } + NumBytes = FrameSize - CSSize; + + // Callee-saved registers were pushed on stack before the stack was + // realigned. + if (RegInfo->needsStackRealignment(MF) && !IsWinEH) + NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); // Pop EBP. BuildMI(MBB, MBBI, DL, @@ -1185,25 +1184,15 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int64_t FPDelta = 0; if (IsWinEH) { - uint64_t NumBytes = 0; + assert(!MFI->hasCalls() || (StackSize % 16) == 8); + // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; // If required, include space for extra hidden slot for stashing base pointer. if (X86FI->getRestoreBasePointer()) FrameSize += SlotSize; - uint64_t SEHStackAllocAmt = StackSize; - if (RegInfo->needsStackRealignment(MF)) { - // Callee-saved registers are pushed on stack before the stack - // is realigned. - FrameSize -= CSSize; - - uint64_t MaxAlign = - calculateMaxStackAlign(MF); // Desired stack alignment. - NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); - SEHStackAllocAmt = RoundUpToAlignment(SEHStackAllocAmt, 16); - } else { - NumBytes = FrameSize - CSSize; - } + uint64_t NumBytes = FrameSize - CSSize; + uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); if (FI && FI == X86FI->getFAIndex()) return -SEHFrameOffset; @@ -1212,7 +1201,9 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, // pointer followed by return address and the location required by the // restricted Win64 prologue. // Add FPDelta to all offsets below that go through the frame pointer. - FPDelta = SEHStackAllocAmt - SEHFrameOffset; + FPDelta = FrameSize - SEHFrameOffset; + assert((!MFI->hasCalls() || (FPDelta % 16) == 0) && + "FPDelta isn't aligned per the Win64 ABI!"); } @@ -1237,8 +1228,6 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, } else { if (!HasFP) return Offset + StackSize; - if (IsWinEH) - return Offset + FPDelta; // Skip the saved EBP. Offset += SlotSize; @@ -1249,7 +1238,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, Offset -= TailCallReturnAddrDelta; } - return Offset; + return Offset + FPDelta; } int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, diff --git a/llvm/test/CodeGen/X86/win64_eh.ll b/llvm/test/CodeGen/X86/win64_eh.ll index 681023d..7b11150 100644 --- a/llvm/test/CodeGen/X86/win64_eh.ll +++ b/llvm/test/CodeGen/X86/win64_eh.ll @@ -150,18 +150,18 @@ entry: ; WIN64: .seh_pushreg 7 ; WIN64: pushq %rbx ; WIN64: .seh_pushreg 3 -; WIN64: subq $128, %rsp -; WIN64: .seh_stackalloc 128 -; WIN64: leaq 128(%rsp), %rbp -; WIN64: .seh_setframe 5, 128 -; WIN64: movaps %xmm7, -32(%rbp) # 16-byte Spill -; WIN64: .seh_savexmm 7, 96 -; WIN64: movaps %xmm6, -48(%rbp) # 16-byte Spill -; WIN64: .seh_savexmm 6, 80 +; WIN64: subq $96, %rsp +; WIN64: .seh_stackalloc 96 +; WIN64: leaq 96(%rsp), %rbp +; WIN64: .seh_setframe 5, 96 +; WIN64: movaps %xmm7, -16(%rbp) # 16-byte Spill +; WIN64: .seh_savexmm 7, 80 +; WIN64: movaps %xmm6, -32(%rbp) # 16-byte Spill +; WIN64: .seh_savexmm 6, 64 ; WIN64: .seh_endprologue ; WIN64: andq $-64, %rsp -; WIN64: movaps -48(%rbp), %xmm6 # 16-byte Reload -; WIN64: movaps -32(%rbp), %xmm7 # 16-byte Reload +; WIN64: movaps -32(%rbp), %xmm6 # 16-byte Reload +; WIN64: movaps -16(%rbp), %xmm7 # 16-byte Reload ; WIN64: leaq (%rbp), %rsp ; WIN64: popq %rbx ; WIN64: popq %rdi diff --git a/llvm/test/CodeGen/X86/win64_frame.ll b/llvm/test/CodeGen/X86/win64_frame.ll index a450a83..535f8b4 100644 --- a/llvm/test/CodeGen/X86/win64_frame.ll +++ b/llvm/test/CodeGen/X86/win64_frame.ll @@ -9,10 +9,10 @@ define i32 @f1(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) "no-frame-pointer-el define void @f2(i32 %p, ...) "no-frame-pointer-elim"="true" { ; CHECK-LABEL: f2: ; CHECK: .seh_stackalloc 8 - ; CHECK: leaq 16(%rsp), %rbp - ; CHECK: .seh_setframe 5, 16 - ; CHECK: movq %rdx, 16(%rbp) - ; CHECK: leaq 16(%rbp), %rax + ; CHECK: leaq (%rsp), %rbp + ; CHECK: .seh_setframe 5, 0 + ; CHECK: movq %rdx, 32(%rbp) + ; CHECK: leaq 32(%rbp), %rax %ap = alloca i8, align 8 call void @llvm.va_start(i8* %ap) ret void @@ -76,20 +76,21 @@ define i32 @f7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"=" ; CHECK-LABEL: f7: ; CHECK: pushq %rbp ; CHECK: .seh_pushreg 5 - ; CHECK: subq $320, %rsp - ; CHECK: .seh_stackalloc 320 + ; CHECK: subq $304, %rsp + ; CHECK: .seh_stackalloc 304 ; CHECK: leaq 128(%rsp), %rbp ; CHECK: .seh_setframe 5, 128 - ; CHECK: movl 240(%rbp), %eax - ; CHECK: leaq 192(%rbp), %rsp + ; CHECK: andq $-64, %rsp + ; CHECK: movl 224(%rbp), %eax + ; CHECK: leaq 176(%rbp), %rsp alloca [300 x i8], align 64 ret i32 %e } define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" { ; CHECK-LABEL: f8: - ; CHECK: subq $384, %rsp - ; CHECK: .seh_stackalloc 384 + ; CHECK: subq $352, %rsp + ; CHECK: .seh_stackalloc 352 ; CHECK: leaq 128(%rsp), %rbp ; CHECK: .seh_setframe 5, 128 @@ -113,7 +114,7 @@ define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"=" ret i32 %e ; CHECK: movl %esi, %eax - ; CHECK: leaq 256(%rbp), %rsp + ; CHECK: leaq 224(%rbp), %rsp } declare i8* @llvm.returnaddress(i32) nounwind readnone -- 2.7.4