Re-commit r265039 "[X86] Merge adjacent stack adjustments in eliminateCallFramePseudo...

author Hans Wennborg <hans@hanshq.net>

Mon, 4 Apr 2016 21:02:46 +0000 (21:02 +0000)

committer Hans Wennborg <hans@hanshq.net>

Mon, 4 Apr 2016 21:02:46 +0000 (21:02 +0000)
author Hans Wennborg <hans@hanshq.net>
Mon, 4 Apr 2016 21:02:46 +0000 (21:02 +0000)
committer Hans Wennborg <hans@hanshq.net>
Mon, 4 Apr 2016 21:02:46 +0000 (21:02 +0000)
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp

index 89fa441..c9b29b5 100644 (file)
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -378,11 +378,16 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
    if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
         Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
        PI->getOperand(0).getReg() == StackPtr){
+    assert(PI->getOperand(1).getReg() == StackPtr);
      Offset += PI->getOperand(2).getImm();
      MBB.erase(PI);
      if (!doMergeWithPrevious) MBBI = NI;
    } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
-             PI->getOperand(0).getReg() == StackPtr) {
+             PI->getOperand(0).getReg() == StackPtr &&
+             PI->getOperand(1).getReg() == StackPtr &&
+             PI->getOperand(2).getImm() == 1 &&
+             PI->getOperand(3).getReg() == X86::NoRegister &&
+             PI->getOperand(5).getReg() == X86::NoRegister) {
      // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
      Offset += PI->getOperand(4).getImm();
      MBB.erase(PI);
@@ -390,6 +395,7 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
    } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
                Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
               PI->getOperand(0).getReg() == StackPtr) {
+    assert(PI->getOperand(1).getReg() == StackPtr);
      Offset -= PI->getOperand(2).getImm();
      MBB.erase(PI);
      if (!doMergeWithPrevious) MBBI = NI;
@@ -2533,13 +2539,22 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
        BuildCFI(MBB, I, DL, 
                 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
  
-    if (Amount) {
-      // Add Amount to SP to destroy a frame, and subtract to setup.
-      int Offset = isDestroy ? Amount : -Amount;
+    // Add Amount to SP to destroy a frame, or subtract to setup.
+    int64_t StackAdjustment = isDestroy ? Amount : -Amount;
+
+    if (StackAdjustment) {
+      // Merge with any previous or following adjustment instruction.
+      StackAdjustment += mergeSPUpdates(MBB, I, true);
+      StackAdjustment += mergeSPUpdates(MBB, I, false);
+
+      if (!StackAdjustment) {
+        // This and the merged instruction canceled out each other.
+        return I;
+      }
  
-      if (!(Fn->optForMinSize() && 
-            adjustStackWithPops(MBB, I, DL, Offset)))
-        BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
+      if (!(Fn->optForMinSize() &&
+            adjustStackWithPops(MBB, I, DL, StackAdjustment)))
+        BuildStackAdjustment(MBB, I, DL, StackAdjustment, /*InEpilogue=*/false);
      }
  
      if (DwarfCFI && !hasFP(MF)) {
@@ -2549,14 +2564,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
        // CFI only for EH purposes or for debugging. EH only requires the CFA
        // offset to be correct at each call site, while for debugging we want
        // it to be more precise.
-      int CFAOffset = Amount;
+
        // TODO: When not using precise CFA, we also need to adjust for the
        // InternalAmt here.
-
-      if (CFAOffset) {
-        CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
-        BuildCFI(MBB, I, DL, 
-                 MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
+      if (StackAdjustment) {
+        BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(
+                                 nullptr, -StackAdjustment));
        }
      }
  
diff --git a/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll

index eae0ec2..acd32e4 100644 (file)
--- a/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,6 +1,6 @@
  ; REQUIRES: asserts
  ; RUN: llc < %s -mtriple=i686-unknown-linux -relocation-model=static -stats 2>&1 | \
-; RUN:   grep asm-printer | grep 15
+; RUN:   grep asm-printer | grep 14
  ;
  ; It's possible to schedule this in 14 instructions by avoiding
  ; callee-save registers, but the scheduler isn't currently that
diff --git a/llvm/test/CodeGen/X86/fold-push.ll b/llvm/test/CodeGen/X86/fold-push.ll

index eaf9135..9d3afd1 100644 (file)
--- a/llvm/test/CodeGen/X86/fold-push.ll
+++ b/llvm/test/CodeGen/X86/fold-push.ll
@@ -14,7 +14,7 @@ define void @test(i32 %a, i32 %b) optsize nounwind {
  ; SLM: movl (%esp), [[RELOAD:%e..]]
  ; SLM-NEXT: pushl [[RELOAD]]
  ; CHECK: calll
-; CHECK-NEXT: addl $4, %esp
+; CHECK-NEXT: addl $8, %esp
    %c = add i32 %a, %b
    call void @foo(i32 %c)
    call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
diff --git a/llvm/test/CodeGen/X86/force-align-stack-alloca.ll b/llvm/test/CodeGen/X86/force-align-stack-alloca.ll

index d0cf341..8d42680 100644 (file)
--- a/llvm/test/CodeGen/X86/force-align-stack-alloca.ll
+++ b/llvm/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -32,15 +32,21 @@ define i64 @g(i32 %i) nounwind {
  ; CHECK:      movl   %{{...}}, %esp
  ; CHECK-NOT:         {{[^ ,]*}}, %esp
  ;
-; Next we set up the memset call, and then undo it.
+; Next we set up the memset call.
  ; CHECK:      subl   $20, %esp
  ; CHECK-NOT:         {{[^ ,]*}}, %esp
+; CHECK:      pushl
+; CHECK:      pushl
+; CHECK:      pushl
  ; CHECK:      calll  memset
-; CHECK-NEXT: addl   $32, %esp
+;
+; Deallocating 32 bytes of outgoing call frame for memset and
+; allocating 28 bytes for calling f yields a 4-byte adjustment:
+; CHECK-NEXT: addl   $4, %esp
  ; CHECK-NOT:         {{[^ ,]*}}, %esp
  ;
-; Next we set up the call to 'f'.
-; CHECK:      subl   $28, %esp
+; And move on to call 'f', and then restore the stack.
+; CHECK:      pushl
  ; CHECK-NOT:         {{[^ ,]*}}, %esp
  ; CHECK:      calll  f
  ; CHECK-NEXT: addl   $32, %esp
diff --git a/llvm/test/CodeGen/X86/localescape.ll b/llvm/test/CodeGen/X86/localescape.ll

index 07c3b7f..10ab8dd 100644 (file)
--- a/llvm/test/CodeGen/X86/localescape.ll
+++ b/llvm/test/CodeGen/X86/localescape.ll
@@ -137,6 +137,5 @@ define void @alloc_func_no_frameaddr() {
  ; X86: movl $13, (%esp)
  ; X86: pushl $0
  ; X86: calll _print_framealloc_from_fp
-; X86: addl    $4, %esp
-; X86: addl    $8, %esp
+; X86: addl    $12, %esp
  ; X86: retl
diff --git a/llvm/test/CodeGen/X86/memset-2.ll b/llvm/test/CodeGen/X86/memset-2.ll

index 7f37b62..e9a7b56 100644 (file)
--- a/llvm/test/CodeGen/X86/memset-2.ll
+++ b/llvm/test/CodeGen/X86/memset-2.ll
@@ -5,7 +5,7 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
  
  define fastcc void @t1() nounwind {
  ; CHECK-LABEL: t1:
-; CHECK:         subl $12, %esp
+; CHECK:         subl $16, %esp
  ; CHECK:         pushl $188
  ; CHECK-NEXT:    pushl $0
  ; CHECK-NEXT:    pushl $0
diff --git a/llvm/test/CodeGen/X86/movtopush.ll b/llvm/test/CodeGen/X86/movtopush.ll

index 5dd465e..59bc214 100644 (file)
--- a/llvm/test/CodeGen/X86/movtopush.ll
+++ b/llvm/test/CodeGen/X86/movtopush.ll
@@ -2,6 +2,7 @@
  ; RUN: llc < %s -mtriple=i686-windows -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH
  ; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
  ; RUN: llc < %s -mtriple=i686-windows -stackrealign -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX
  
  %class.Class = type { i32 }
  %struct.s = type { i64 }
@@ -12,6 +13,10 @@ declare x86_thiscallcc void @thiscall(%class.Class* %class, i32 %a, i32 %b, i32
  declare void @oneparam(i32 %a)
  declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
  declare void @struct(%struct.s* byval %a, i32 %b, i32 %c, i32 %d)
+declare void @inalloca(<{ %struct.s }>* inalloca)
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
  
  ; We should get pushes for x86, even though there is a reserved call frame.
  ; Make sure we don't touch x86-64, and that turning it off works.
@@ -223,8 +228,7 @@ entry:
  ; NORMAL-NEXT: pushl $2
  ; NORMAL-NEXT: pushl $1
  ; NORMAL-NEXT: call
-; NORMAL-NEXT: addl $16, %esp
-; NORMAL-NEXT: subl $20, %esp
+; NORMAL-NEXT: subl $4, %esp
  ; NORMAL-NEXT: movl 20(%esp), [[E1:%e..]]
  ; NORMAL-NEXT: movl 24(%esp), [[E2:%e..]]
  ; NORMAL-NEXT: movl    [[E2]], 4(%esp)
@@ -261,7 +265,7 @@ entry:
  ; NORMAL-NEXT: pushl $2
  ; NORMAL-NEXT: pushl $1
  ; NORMAL-NEXT: calll *16(%esp)
-; NORMAL-NEXT: addl $16, %esp
+; NORMAL-NEXT: addl $24, %esp
  define void @test10() optsize {
    %stack_fptr = alloca void (i32, i32, i32, i32)*
    store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
@@ -314,8 +318,7 @@ entry:
  ; NORMAL-NEXT: pushl    $2
  ; NORMAL-NEXT: pushl    $1
  ; NORMAL-NEXT: calll _good
-; NORMAL-NEXT: addl    $16, %esp
-; NORMAL-NEXT: subl    $20, %esp
+; NORMAL-NEXT: subl    $4, %esp
  ; NORMAL: movl    $8, 16(%esp)
  ; NORMAL-NEXT: movl    $7, 12(%esp)
  ; NORMAL-NEXT: movl    $6, 8(%esp)
@@ -358,3 +361,54 @@ entry:
    call void @good(i32 %val1, i32 %val2, i32 %val3, i32 %add)
    ret i32* %ptr3
  }
+
+; Make sure to fold adjacent stack adjustments.
+; LINUX-LABEL: pr27140:
+; LINUX: subl    $12, %esp
+; LINUX: .cfi_def_cfa_offset 16
+; LINUX-NOT: sub
+; LINUX: pushl   $4
+; LINUX: .cfi_adjust_cfa_offset 4
+; LINUX: pushl   $3
+; LINUX: .cfi_adjust_cfa_offset 4
+; LINUX: pushl   $2
+; LINUX: .cfi_adjust_cfa_offset 4
+; LINUX: pushl   $1
+; LINUX: .cfi_adjust_cfa_offset 4
+; LINUX: calll   good
+; LINUX: addl    $28, %esp
+; LINUX: .cfi_adjust_cfa_offset -28
+; LINUX-NOT: add
+; LINUX: retl
+define void @pr27140() optsize {
+entry:
+  tail call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Check that a stack restore (leal -4(%ebp), %esp) doesn't get merged with a
+; stack adjustment (addl $12, %esp). Just because it's a lea doesn't mean it's
+; simply decreasing the stack pointer.
+; NORMAL-LABEL: test14:
+; NORMAL: calll _B_func
+; NORMAL: leal -4(%ebp), %esp
+; NORMAL-NOT: %esp
+; NORMAL: retl
+%struct.A = type { i32, i32 }
+%struct.B = type { i8 }
+declare x86_thiscallcc %struct.B* @B_ctor(%struct.B* returned, %struct.A* byval)
+declare void @B_func(%struct.B* sret, %struct.B*, i32)
+define void @test14(%struct.A* %a) {
+entry:
+  %ref.tmp = alloca %struct.B, align 1
+  %agg.tmp = alloca i64, align 4
+  %tmpcast = bitcast i64* %agg.tmp to %struct.A*
+  %tmp = alloca %struct.B, align 1
+  %0 = bitcast %struct.A* %a to i64*
+  %1 = load i64, i64* %0, align 4
+  store i64 %1, i64* %agg.tmp, align 4
+  %call = call x86_thiscallcc %struct.B* @B_ctor(%struct.B* %ref.tmp, %struct.A* byval %tmpcast)
+  %2 = getelementptr inbounds %struct.B, %struct.B* %tmp, i32 0, i32 0
+  call void @B_func(%struct.B* sret %tmp, %struct.B* %ref.tmp, i32 1)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/push-cfi-debug.ll b/llvm/test/CodeGen/X86/push-cfi-debug.ll

index 1dfe64e..217898f 100644 (file)
--- a/llvm/test/CodeGen/X86/push-cfi-debug.ll
+++ b/llvm/test/CodeGen/X86/push-cfi-debug.ll
@@ -23,8 +23,8 @@ declare x86_stdcallcc void @stdfoo(i32, i32) #0
  ; CHECK: .cfi_adjust_cfa_offset 4
  ; CHECK: calll stdfoo
  ; CHECK: .cfi_adjust_cfa_offset -8
-; CHECK: addl $8, %esp
-; CHECK: .cfi_adjust_cfa_offset -8
+; CHECK: addl $20, %esp
+; CHECK: .cfi_adjust_cfa_offset -20
  define void @test1() #0 !dbg !4 {
  entry:
    tail call void @foo(i32 1, i32 2) #1, !dbg !10
diff --git a/llvm/test/CodeGen/X86/push-cfi.ll b/llvm/test/CodeGen/X86/push-cfi.ll

index 6389708..5498af5 100644 (file)
--- a/llvm/test/CodeGen/X86/push-cfi.ll
+++ b/llvm/test/CodeGen/X86/push-cfi.ll
@@ -82,8 +82,8 @@ cleanup:
  ; LINUX-NEXT: Ltmp{{[0-9]+}}:
  ; LINUX-NEXT: .cfi_adjust_cfa_offset 4
  ; LINUX-NEXT: call
-; LINUX-NEXT: addl $16, %esp
-; LINUX: .cfi_adjust_cfa_offset -16
+; LINUX-NEXT: addl $28, %esp
+; LINUX: .cfi_adjust_cfa_offset -28
  ; DARWIN-NOT: .cfi_escape
  ; DARWIN-NOT: pushl
  define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
author	Hans Wennborg <hans@hanshq.net>
	Mon, 4 Apr 2016 21:02:46 +0000 (21:02 +0000)
committer	Hans Wennborg <hans@hanshq.net>
	Mon, 4 Apr 2016 21:02:46 +0000 (21:02 +0000)
llvm/lib/Target/X86/X86FrameLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll		patch \| blob \| history
llvm/test/CodeGen/X86/fold-push.ll		patch \| blob \| history
llvm/test/CodeGen/X86/force-align-stack-alloca.ll		patch \| blob \| history
llvm/test/CodeGen/X86/localescape.ll		patch \| blob \| history
llvm/test/CodeGen/X86/memset-2.ll		patch \| blob \| history
llvm/test/CodeGen/X86/movtopush.ll		patch \| blob \| history
llvm/test/CodeGen/X86/push-cfi-debug.ll		patch \| blob \| history
llvm/test/CodeGen/X86/push-cfi.ll		patch \| blob \| history