From c6c86f4f81fb9130499f2b75764b8227cddc2375 Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Sat, 10 Aug 2019 02:49:02 +0000 Subject: [PATCH] [X86] Fix stack probe issue on windows32. Summary: On windows if the frame size exceed 4096 bytes, compiler need to generate a call to _alloca_probe. X86CallFrameOptimization pass changes the reserved stack size and cause of stack probe function not be inserted. This patch fix the issue by detecting the call frame size, if the size exceed 4096 bytes, drop X86CallFrameOptimization. Reviewers: craig.topper, wxiao3, annita.zhang, rnk, RKSimon Reviewed By: rnk Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65923 llvm-svn: 368503 --- llvm/lib/Target/X86/X86CallFrameOptimization.cpp | 10 ++++ llvm/lib/Target/X86/X86FrameLowering.cpp | 9 +--- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 ++++++ llvm/lib/Target/X86/X86ISelLowering.h | 2 + llvm/test/CodeGen/X86/nomovtopush.ll | 59 ++++++++++++++++++++++++ 5 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/X86/nomovtopush.ll diff --git a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp index 4b6f5ab..7796945 100644 --- a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp @@ -155,12 +155,22 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) { // This is bad, and breaks SP adjustment. // So, check that all of the frames in the function are closed inside // the same block, and, for good measure, that there are no nested frames. + // + // If any call allocates more argument stack memory than the stack + // probe size, don't do this optimization. Otherwise, this pass + // would need to synthesize additional stack probe calls to allocate + // memory for arguments. unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + bool UseStackProbe = + !STI->getTargetLowering()->getStackProbeSymbolName(MF).empty(); + unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF); for (MachineBasicBlock &BB : MF) { bool InsideFrameSequence = false; for (MachineInstr &MI : BB) { if (MI.getOpcode() == FrameSetupOpcode) { + if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe) + return false; if (InsideFrameSequence) return false; InsideFrameSequence = true; diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index aff686e..47be92e 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1022,14 +1022,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty(); - - // The default stack probe size is 4096 if the function has no stackprobesize - // attribute. - unsigned StackProbeSize = 4096; - if (Fn.hasFnAttribute("stack-probe-size")) - Fn.getFnAttribute("stack-probe-size") - .getValueAsString() - .getAsInteger(0, StackProbeSize); + unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); // Re-align the stack on 64-bit if the x86-interrupt calling convention is // used and an error code was pushed, since the x86-64 ABI requires a 16-byte diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2d3581f..0eb8320 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44970,3 +44970,16 @@ X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk"; return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk"; } + +unsigned +X86TargetLowering::getStackProbeSize(MachineFunction &MF) const { + // The default stack probe size is 4096 if the function has no stackprobesize + // attribute. + unsigned StackProbeSize = 4096; + const Function &Fn = MF.getFunction(); + if (Fn.hasFnAttribute("stack-probe-size")) + Fn.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + return StackProbeSize; +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 8dc58a1..09b0f6b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1207,6 +1207,8 @@ namespace llvm { StringRef getStackProbeSymbolName(MachineFunction &MF) const override; + unsigned getStackProbeSize(MachineFunction &MF) const; + bool hasVectorBlend() const override { return true; } unsigned getMaxSupportedInterleaveFactor() const override { return 4; } diff --git a/llvm/test/CodeGen/X86/nomovtopush.ll b/llvm/test/CodeGen/X86/nomovtopush.ll new file mode 100644 index 0000000..f690c23 --- /dev/null +++ b/llvm/test/CodeGen/X86/nomovtopush.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-pc-windows-msvc | FileCheck %s + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc" + +%struct._param_str = type { i32, i32, [4096 x i32], i32 } + +@g_d = common dso_local local_unnamed_addr global i32 0, align 4 +@g_c = common dso_local local_unnamed_addr global i32 0, align 4 +@g_b = common dso_local local_unnamed_addr global i32 0, align 4 +@g_a = common dso_local local_unnamed_addr global i32 0, align 4 +@g_param = common dso_local global %struct._param_str zeroinitializer, align 4 + +; Function Attrs: nounwind +define dso_local i32 @test() local_unnamed_addr { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl $16396, %eax # imm = 0x400C +; CHECK-NEXT: calll __chkstk +; CHECK-NEXT: movl _g_d, %eax +; CHECK-NEXT: movl _g_c, %ecx +; CHECK-NEXT: movl _g_b, %edx +; CHECK-NEXT: movl _g_a, %esi +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %esi, (%esp) +; CHECK-NEXT: calll _bar +; CHECK-NEXT: movl $4099, %ecx # imm = 0x1003 +; CHECK-NEXT: movl %esp, %edi +; CHECK-NEXT: movl $_g_param, %esi +; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) +; CHECK-NEXT: calll _foo +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addl $16396, %esp # imm = 0x400C +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +entry: + %0 = load i32, i32* @g_d, align 4, !tbaa !3 + %1 = load i32, i32* @g_c, align 4, !tbaa !3 + %2 = load i32, i32* @g_b, align 4, !tbaa !3 + %3 = load i32, i32* @g_a, align 4, !tbaa !3 + %call = tail call i32 @bar(i32 %3, i32 %2, i32 %1, i32 %0) #2 + tail call void @foo(%struct._param_str* byval nonnull align 4 @g_param) #2 + ret i32 0 +} + +declare dso_local i32 @bar(i32, i32, i32, i32) local_unnamed_addr + +declare dso_local void @foo(%struct._param_str* byval align 4) local_unnamed_addr + +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} -- 2.7.4