Don't delete empty preheaders in CodeGenPrepare if it would create a critical edge

author Chuang-Yu Cheng <cycheng@multicorewareinc.com>

Tue, 5 Apr 2016 14:06:20 +0000 (14:06 +0000)

committer Chuang-Yu Cheng <cycheng@multicorewareinc.com>

Tue, 5 Apr 2016 14:06:20 +0000 (14:06 +0000)
author Chuang-Yu Cheng <cycheng@multicorewareinc.com>
Tue, 5 Apr 2016 14:06:20 +0000 (14:06 +0000)
committer Chuang-Yu Cheng <cycheng@multicorewareinc.com>
Tue, 5 Apr 2016 14:06:20 +0000 (14:06 +0000)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp

index 89ffab4..c78ad65 100644 (file)
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
  #include "llvm/ADT/SmallSet.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
  #include "llvm/Analysis/TargetLibraryInfo.h"
  #include "llvm/Analysis/TargetTransformInfo.h"
  #include "llvm/Analysis/ValueTracking.h"
@@ -111,6 +112,10 @@ static cl::opt<bool> StressExtLdPromotion(
      cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
               "optimization in CodeGenPrepare"));
  
+static cl::opt<bool> DisablePreheaderProtect(
+    "disable-preheader-prot", cl::Hidden, cl::init(false),
+    cl::desc("Disable protection against removing loop preheaders"));
+
  namespace {
  typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
  typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
@@ -122,6 +127,7 @@ class TypePromotionTransaction;
      const TargetLowering *TLI;
      const TargetTransformInfo *TTI;
      const TargetLibraryInfo *TLInfo;
+    const LoopInfo *LI;
  
      /// As we scan instructions optimizing them, this is the next instruction
      /// to optimize. Transforms that can invalidate this should update it.
@@ -161,6 +167,7 @@ class TypePromotionTransaction;
        // FIXME: When we can selectively preserve passes, preserve the domtree.
        AU.addRequired<TargetLibraryInfoWrapperPass>();
        AU.addRequired<TargetTransformInfoWrapperPass>();
+      AU.addRequired<LoopInfoWrapperPass>();
      }
  
    private:
@@ -218,6 +225,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
      TLI = TM->getSubtargetImpl(F)->getTargetLowering();
    TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
    TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
    OptSize = F.optForSize();
  
    /// This optimization identifies DIV instructions that can be
@@ -359,6 +367,15 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
  /// edges in ways that are non-optimal for isel. Start by eliminating these
  /// blocks so we can split them the way we want them.
  bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
+  SmallPtrSet<BasicBlock *, 16> Preheaders;
+  SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
+  while (!LoopList.empty()) {
+    Loop *L = LoopList.pop_back_val();
+    LoopList.insert(LoopList.end(), L->begin(), L->end());
+    if (BasicBlock *Preheader = L->getLoopPreheader())
+      Preheaders.insert(Preheader);
+  }
+
    bool MadeChange = false;
    // Note that this intentionally skips the entry block.
    for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
@@ -391,6 +408,14 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
      if (!canMergeBlocks(BB, DestBB))
        continue;
  
+    // Do not delete loop preheaders if doing so would create a critical edge.
+    // Loop preheaders can be good locations to spill registers. If the
+    // preheader is deleted and we create a critical edge, registers may be
+    // spilled in the loop body instead.
+    if (!DisablePreheaderProtect && Preheaders.count(BB) &&
+        !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
+     continue;
+
      eliminateMostlyEmptyBlock(BB);
      MadeChange = true;
    }
diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll

index 2c93669..2811f1b 100644 (file)
--- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -29,7 +29,7 @@ target triple = "arm64-apple-ios"
  ; Set the first argument to zero.
  ; CHECK-NEXT: mov w0, wzr
  ; CHECK-NEXT: bl _doSomething
-; 
+;
  ; Without shrink-wrapping, epilogue is in the exit block.
  ; DISABLE: [[EXIT_LABEL]]:
  ; Epilogue code.
@@ -332,11 +332,11 @@ entry:
  ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
  ;
  ; Sum is merged with the returned register.
-; CHECK: mov [[SUM:w0]], wzr
-; CHECK-NEXT: add [[VA_BASE:x[0-9]+]], sp, #16
+; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16
  ; CHECK-NEXT: str [[VA_BASE]], [sp, #8]
  ; CHECK-NEXT: cmp w1, #1
  ; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]]
+; CHECK: mov [[SUM:w0]], wzr
  ;
  ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
  ; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8]
@@ -347,18 +347,18 @@ entry:
  ; CHECK-NEXT: sub w1, w1, #1
  ; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
  ;
-; DISABLE-NEXT: b [[IFEND_LABEL]]
+; DISABLE-NEXT: b
  ; DISABLE: [[ELSE_LABEL]]: ; %if.else
  ; DISABLE: lsl w0, w1, #1
  ;
+; ENABLE: [[ELSE_LABEL]]: ; %if.else
+; ENABLE: lsl w0, w1, #1
+; ENABLE-NEXT: ret
+;
  ; CHECK: [[IFEND_LABEL]]:
  ; Epilogue code.
  ; CHECK: add sp, sp, #16
  ; CHECK-NEXT: ret
-;
-; ENABLE: [[ELSE_LABEL]]: ; %if.else
-; ENABLE: lsl w0, w1, #1
-; ENABLE-NEXT: ret
  define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
  entry:
    %ap = alloca i8*, align 8
diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll

index bf5cf52..8eaf3d5 100644 (file)
--- a/llvm/test/CodeGen/ARM/code-placement.ll
+++ b/llvm/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
    br i1 %0, label %bb2, label %bb
  
  bb:
-; CHECK: LBB0_1:
-; CHECK: bne LBB0_1
-; CHECK-NOT: b LBB0_1
+; CHECK: LBB0_2:
+; CHECK: bne LBB0_2
+; CHECK-NOT: b LBB0_2
  ; CHECK: bx lr
    %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
    %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll

index a1abef9..6678dac 100644 (file)
--- a/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
+++ b/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
@@ -75,7 +75,7 @@ declare void @terminatev()
  
  ; CHECK-LABEL: __Z4foo1c:
  ; CHECK: blx __Znwm
-; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge
+; CHECK: {{.*}}@ %do.body.i.i.i.preheader
  ; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]]
  ; CHECK: {{.*}}@ %do.body.i.i.i
  ; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
diff --git a/llvm/test/CodeGen/Mips/brdelayslot.ll b/llvm/test/CodeGen/Mips/brdelayslot.ll

index 0f46619..8056334 100644 (file)
--- a/llvm/test/CodeGen/Mips/brdelayslot.ll
+++ b/llvm/test/CodeGen/Mips/brdelayslot.ll
@@ -5,19 +5,19 @@
  ; RUN: llc -march=mipsel -disable-mips-df-forward-search=false \
  ; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=FORWARD
  ; RUN: llc -march=mipsel -disable-mips-df-backward-search \
-; RUN: -disable-mips-df-succbb-search=false < %s | \
+; RUN: -disable-mips-df-succbb-search=false -disable-preheader-prot=true < %s | \
  ; RUN: FileCheck %s -check-prefix=SUCCBB
  
  define void @foo1() nounwind {
  entry:
-; Default:     jalr 
-; Default-NOT: nop 
-; Default:     jr 
+; Default:     jalr
+; Default-NOT: nop
+; Default:     jr
  ; Default-NOT: nop
  ; Default:     .end
-; None: jalr 
-; None: nop 
-; None: jr 
+; None: jalr
+; None: nop
+; None: jr
  ; None: nop
  ; None: .end
  
diff --git a/llvm/test/CodeGen/Mips/prevent-hoisting.ll b/llvm/test/CodeGen/Mips/prevent-hoisting.ll

index 81b14d7..696147b 100644 (file)
--- a/llvm/test/CodeGen/Mips/prevent-hoisting.ll
+++ b/llvm/test/CodeGen/Mips/prevent-hoisting.ll
@@ -11,12 +11,12 @@
  ; CHECK-LABEL: readLumaCoeff8x8_CABAC
  
  ; The check for first "addiu" instruction is added so that we can match the correct "b" instruction.
-; CHECK:           addiu ${{[0-9]+}}, $zero, -1
+; CHECK:           andi
  ; CHECK:           b $[[BB0:BB[0-9_]+]]
-; CHECK-NEXT:      addiu ${{[0-9]+}}, $zero, 0
+; CHECK-NEXT:      sll
  
  ; Check that at the start of a fallthrough block there is a instruction that writes to $1.
-; CHECK-NEXT:  {{BB[0-9_#]+}}: 
+; CHECK-NEXT:  {{BB[0-9_#]+}}:
  ; CHECK-NEXT:      lw      $[[R1:[0-9]+]], %got(assignSE2partition)($[[R2:[0-9]+]])
  ; CHECK-NEXT:      sll $1, $[[R0:[0-9]+]], 4
  
diff --git a/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll

index b8e5100..812628b 100644 (file)
--- a/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -19,7 +19,7 @@
  ; reusing the pre-addition register later, or the post-addition one. Currently,
  ; it does the latter, so we check:
  
-; CHECK: # %while.body85.i
+; CHECK: # %while.body85.i{{$}}
  ; CHECK-NOT: # %
  ; CHECK-NOT: add
  ; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]]
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll

index fd389b5..6fe11bf 100644 (file)
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -603,10 +603,8 @@ define void @test_unnatural_cfg_backwards_inner_loop() {
  ;
  ; CHECK: test_unnatural_cfg_backwards_inner_loop
  ; CHECK: %entry
-; CHECK: [[BODY:# BB#[0-9]+]]:
  ; CHECK: %loop2b
  ; CHECK: %loop1
-; CHECK: %loop2a
  
  entry:
    br i1 undef, label %loop2a, label %body
diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll

index 699de22..74a0728 100644 (file)
--- a/llvm/test/CodeGen/X86/break-false-dep.ll
+++ b/llvm/test/CodeGen/X86/break-false-dep.ll
@@ -64,7 +64,7 @@ declare float @llvm.sqrt.f32(float)
  declare double @llvm.sqrt.f64(double)
  
  ; SSE-LABEL: loopdep1
-; SSE: for.body
+; SSE: for.body{{$}}
  ;
  ; This loop contains two cvtsi2ss instructions that update the same xmm
  ; register.  Verify that the execution dependency fix pass breaks those
@@ -139,7 +139,7 @@ ret:
  
  ; This loop contains a cvtsi2sd instruction that has a loop-carried
  ; false dependency on an xmm that is modified by other scalar instructions
-; that follow it in the loop. Additionally, the source of convert is a 
+; that follow it in the loop. Additionally, the source of convert is a
  ; memory operand. Verify the execution dependency fix pass breaks this
  ; dependency by inserting a xor before the convert.
  @x = common global [1024 x double] zeroinitializer, align 16
diff --git a/llvm/test/CodeGen/X86/lsr-static-addr.ll b/llvm/test/CodeGen/X86/lsr-static-addr.ll

index 97451e5..3980bee 100644 (file)
--- a/llvm/test/CodeGen/X86/lsr-static-addr.ll
+++ b/llvm/test/CodeGen/X86/lsr-static-addr.ll
@@ -11,8 +11,8 @@
  ; CHECK-NEXT: incq %rax
  
  
-; ATOM: xorl  %eax, %eax
  ; ATOM: movsd .LCPI0_0(%rip), %xmm0
+; ATOM: xorl  %eax, %eax
  ; ATOM: align
  ; ATOM-NEXT: BB0_2:
  ; ATOM-NEXT: movsd A(,%rax,8)
diff --git a/llvm/test/CodeGen/X86/phi-immediate-factoring.ll b/llvm/test/CodeGen/X86/phi-immediate-factoring.ll

index 6425ef0..05a0bf6 100644 (file)
--- a/llvm/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/llvm/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -1,5 +1,6 @@
  ; REQUIRES: asserts
-; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
+; RUN: llc < %s -disable-preheader-prot=true  -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
+; RUN: llc < %s -disable-preheader-prot=false -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 3
  ; PR1296
  
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll

index 8ee97ae..a02a4ae 100644 (file)
--- a/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -13,7 +13,7 @@ forcond.preheader:            ; preds = %entry
  
  ifthen:                ; preds = %entry
         ret i32 0
-; CHECK: forbody
+; CHECK: forbody{{$}}
  ; CHECK-NOT: mov
  forbody:               ; preds = %forbody, %forcond.preheader
         %indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ]          ; <i32> [#uses=3]
diff --git a/llvm/test/CodeGen/X86/pr2659.ll b/llvm/test/CodeGen/X86/pr2659.ll

index 8003588..debb13e 100644 (file)
--- a/llvm/test/CodeGen/X86/pr2659.ll
+++ b/llvm/test/CodeGen/X86/pr2659.ll
@@ -21,7 +21,7 @@ forcond.preheader:              ; preds = %entry
  ; CHECK: je
  
  ; There should be no moves required in the for loop body.
-; CHECK: %forbody
+; CHECK: %forbody{{$}}
  ; CHECK-NOT: mov
  ; CHECK: jbe
  
diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll

index 0164c16..10658f3 100644 (file)
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -33,7 +33,7 @@ entry:
  
  define void @pr26232(i64 %a) {
  ; KNL-32-LABEL: pr26232:
-; KNL-32:       # BB#0: # %for_test11.preheader
+; KNL-32:       # BB#0: # %for_loop599.preheader
  ; KNL-32-NEXT:    pushl %esi
  ; KNL-32-NEXT:  .Ltmp0:
  ; KNL-32-NEXT:    .cfi_def_cfa_offset 8
diff --git a/llvm/test/CodeGen/X86/sink-blockfreq.ll b/llvm/test/CodeGen/X86/sink-blockfreq.ll

index c2f0411..5436cf2 100644 (file)
--- a/llvm/test/CodeGen/X86/sink-blockfreq.ll
+++ b/llvm/test/CodeGen/X86/sink-blockfreq.ll
@@ -1,5 +1,5 @@
-; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
-; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
+; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
+; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
  
  ; Test that by changing BlockFrequencyInfo we change the order in which
  ; machine-sink looks for sucessor blocks. By not using BFI, both G and B
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll

index 184e300..bdc36bd 100644 (file)
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -102,7 +102,7 @@ while.end:                                        ; preds = %entry
  ; CHECK-NEXT: %for.body3.us.i
  ; CHECK-NEXT: Inner Loop
  ; CHECK: testb
-; CHECK: jne
+; CHECK: je
  ; CHECK: jmp
  define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp {
  entry:
author	Chuang-Yu Cheng <cycheng@multicorewareinc.com>
	Tue, 5 Apr 2016 14:06:20 +0000 (14:06 +0000)
committer	Chuang-Yu Cheng <cycheng@multicorewareinc.com>
	Tue, 5 Apr 2016 14:06:20 +0000 (14:06 +0000)
llvm/lib/CodeGen/CodeGenPrepare.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll		patch \| blob \| history
llvm/test/CodeGen/ARM/code-placement.ll		patch \| blob \| history
llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll		patch \| blob \| history
llvm/test/CodeGen/Mips/brdelayslot.ll		patch \| blob \| history
llvm/test/CodeGen/Mips/prevent-hoisting.ll		patch \| blob \| history
llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll		patch \| blob \| history
llvm/test/CodeGen/X86/block-placement.ll		patch \| blob \| history
llvm/test/CodeGen/X86/break-false-dep.ll		patch \| blob \| history
llvm/test/CodeGen/X86/lsr-static-addr.ll		patch \| blob \| history
llvm/test/CodeGen/X86/phi-immediate-factoring.ll		patch \| blob \| history
llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll		patch \| blob \| history
llvm/test/CodeGen/X86/pr2659.ll		patch \| blob \| history
llvm/test/CodeGen/X86/setcc-lowering.ll		patch \| blob \| history
llvm/test/CodeGen/X86/sink-blockfreq.ll		patch \| blob \| history
llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll		patch \| blob \| history