Using branch probability to guide critical edge splitting.

author Dehao Chen <dehao@google.com>

Thu, 20 Oct 2016 18:06:52 +0000 (18:06 +0000)

committer Dehao Chen <dehao@google.com>

Thu, 20 Oct 2016 18:06:52 +0000 (18:06 +0000)
author Dehao Chen <dehao@google.com>
Thu, 20 Oct 2016 18:06:52 +0000 (18:06 +0000)
committer Dehao Chen <dehao@google.com>
Thu, 20 Oct 2016 18:06:52 +0000 (18:06 +0000)
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp

index d2650db..27d6c20 100644 (file)
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -24,6 +24,7 @@
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/CodeGen/MachineBasicBlock.h"
  #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  #include "llvm/CodeGen/MachineDominators.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -60,6 +61,15 @@ UseBlockFreqInfo("machine-sink-bfi",
             cl::desc("Use block frequency info to find successors to sink"),
             cl::init(true), cl::Hidden);
  
+static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
+    "machine-sink-split-probability-threshold",
+    cl::desc(
+        "Percentage threshold for splitting single-instruction critical edge. "
+        "If the branch threshold is higher than this threshold, we allow "
+        "speculative execution of up to 1 instruction to avoid branching to "
+        "splitted critical edge"),
+    cl::init(40), cl::Hidden);
+
  STATISTIC(NumSunk,      "Number of machine instructions sunk");
  STATISTIC(NumSplit,     "Number of critical edges split");
  STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -74,6 +84,7 @@ namespace {
      MachinePostDominatorTree *PDT; // Machine post dominator tree
      MachineLoopInfo *LI;
      const MachineBlockFrequencyInfo *MBFI;
+    const MachineBranchProbabilityInfo *MBPI;
      AliasAnalysis *AA;
  
      // Remember which edges have been considered for breaking.
@@ -105,6 +116,7 @@ namespace {
        AU.addRequired<MachineDominatorTree>();
        AU.addRequired<MachinePostDominatorTree>();
        AU.addRequired<MachineLoopInfo>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
        AU.addPreserved<MachineDominatorTree>();
        AU.addPreserved<MachinePostDominatorTree>();
        AU.addPreserved<MachineLoopInfo>();
@@ -163,6 +175,7 @@ char MachineSinking::ID = 0;
  char &llvm::MachineSinkingID = MachineSinking::ID;
  INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
                  "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
@@ -283,6 +296,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
    PDT = &getAnalysis<MachinePostDominatorTree>();
    LI = &getAnalysis<MachineLoopInfo>();
    MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
    AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  
    bool EverMadeChange = false;
@@ -383,6 +397,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
    if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
      return true;
  
+  if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
+      BranchProbability(SplitEdgeProbabilityThreshold, 100))
+    return true;
+
    // MI is cheap, we probably don't want to break the critical edge for it.
    // However, if this would allow some definitions of its source operands
    // to be sunk then it's probably worth it.
diff --git a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll

index 32cdf41..364bd5d 100644 (file)
--- a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -38,16 +38,14 @@ entry:
  ; CHECK-ARMV6-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
  ; CHECK-ARMV6-NEXT: [[TRY:.LBB[0-9_]+]]:
  ; CHECK-ARMV6-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
-; CHECK-ARMV6-NEXT: mov [[RES:r[0-9]+]], #0
  ; CHECK-ARMV6-NEXT: cmp [[LD]], [[DESIRED]]
-; CHECK-ARMV6-NEXT: bne [[END:.LBB[0-9_]+]]
+; CHECK-ARMV6-NEXT: movne [[RES:r[0-9]+]], #0
+; CHECK-ARMV6-NEXT: bxne lr
  ; CHECK-ARMV6-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
-; CHECK-ARMV6-NEXT: mov [[RES]], #1
  ; CHECK-ARMV6-NEXT: cmp [[SUCCESS]], #0
-; CHECK-ARMV6-NEXT: bne [[TRY]]
-; CHECK-ARMV6-NEXT: [[END]]:
-; CHECK-ARMV6-NEXT: mov r0, [[RES]]
-; CHECK-ARMV6-NEXT: bx lr
+; CHECK-ARMV6-NEXT: moveq [[RES]], #1
+; CHECK-ARMV6-NEXT: bxeq lr
+; CHECK-ARMV6-NEXT: b [[TRY]]
  
  ; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8:
  ; CHECK-THUMBV6:       mov [[EXPECTED:r[0-9]+]], r1
@@ -64,20 +62,18 @@ entry:
  ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
  ; CHECK-ARMV7-NEXT: .fnstart
  ; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
-; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
-; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
-; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
-; CHECK-ARMV7-NEXT: bne [[FAIL:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: b [[TRY:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]:
  ; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
-; CHECK-ARMV7-NEXT: mov [[RES:r[0-9]+]], #1
  ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
-; CHECK-ARMV7-NEXT: bne [[TRY]]
-; CHECK-ARMV7-NEXT: b [[END:.LBB[0-9_]+]]
-; CHECK-ARMV7-NEXT: [[FAIL]]:
+; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1
+; CHECK-ARMV7-NEXT: bxeq lr
+; CHECK-ARMV7-NEXT: [[TRY]]:
+; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-ARMV7-NEXT: beq [[HEAD]]
  ; CHECK-ARMV7-NEXT: clrex
  ; CHECK-ARMV7-NEXT: mov [[RES]], #0
-; CHECK-ARMV7-NEXT: [[END]]:
-; CHECK-ARMV7-NEXT: mov r0, [[RES]]
  ; CHECK-ARMV7-NEXT: bx lr
  
  ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll

index 8eaf3d5..b9d9024 100644 (file)
--- a/llvm/test/CodeGen/ARM/code-placement.ll
+++ b/llvm/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
    br i1 %0, label %bb2, label %bb
  
  bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_[[LABEL:[0-9]]]:
+; CHECK: bne LBB0_[[LABEL]]
+; CHECK-NOT: b LBB0_[[LABEL]]
  ; CHECK: bx lr
    %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
    %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
@@ -34,14 +34,13 @@ bb2:
  define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
  entry:
  ; CHECK-LABEL: t2:
-; CHECK: beq LBB1_[[RET:.]]
    %0 = icmp eq i32 %passes, 0                     ; <i1> [#uses=1]
    br i1 %0, label %bb5, label %bb.nph15
  
-; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
  bb1:                                              ; preds = %bb2.preheader, %bb1
-; CHECK: LBB1_[[BB1:.]]: @ %bb1
-; CHECK: bne LBB1_[[BB1]]
+; CHECK: LBB1_[[BB3:.]]: @ %bb3
+; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
+; CHECK: blt LBB1_[[BB3]]
    %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
    %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
    %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
@@ -53,9 +52,9 @@ bb1:                                              ; preds = %bb2.preheader, %bb1
    br i1 %exitcond, label %bb3, label %bb1
  
  bb3:                                              ; preds = %bb1, %bb2.preheader
-; CHECK: LBB1_[[BB3:.]]: @ %bb3
-; CHECK: bne LBB1_[[PREHDR]]
-; CHECK-NOT: b LBB1_
+; CHECK: LBB1_[[BB1:.]]: @ %bb1
+; CHECK: bne LBB1_[[BB1]]
+; CHECK: b LBB1_[[BB3]]
    %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
    %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]
    %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1]
@@ -71,8 +70,6 @@ bb2.preheader:                                    ; preds = %bb3, %bb.nph15
    %sum.110 = phi i32 [ 0, %bb.nph15 ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=2]
    br i1 %4, label %bb1, label %bb3
  
-; CHECK: LBB1_[[RET]]: @ %bb5
-; CHECK: pop
  bb5:                                              ; preds = %bb3, %entry
    %sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=1]
    ret i32 %sum.1.lcssa
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll

index 39e2964..807dfe4 100644 (file)
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -478,12 +478,12 @@ define void @fpcmp_unanalyzable_branch(i1 %cond) {
  ; CHECK-LABEL: fpcmp_unanalyzable_branch:
  ; CHECK:       # BB#0: # %entry
  ; CHECK:       # BB#1: # %entry.if.then_crit_edge
-; CHECK:       .LBB10_4: # %if.then
-; CHECK:       .LBB10_5: # %if.end
+; CHECK:       .LBB10_5: # %if.then
+; CHECK:       .LBB10_6: # %if.end
  ; CHECK:       # BB#3: # %exit
  ; CHECK:       jne .LBB10_4
-; CHECK-NEXT:  jnp .LBB10_5
-; CHECK-NEXT:  jmp .LBB10_4
+; CHECK-NEXT:  jnp .LBB10_6
+; CHECK:       jmp .LBB10_5
  
  entry:
  ; Note that this branch must be strongly biased toward
diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll

index 40bbac2..cffc673 100644 (file)
--- a/llvm/test/CodeGen/X86/clz.ll
+++ b/llvm/test/CodeGen/X86/clz.ll
@@ -279,28 +279,32 @@ define i64 @ctlz_i64(i64 %x) {
  define i8 @ctlz_i8_zero_test(i8 %n) {
  ; X32-LABEL: ctlz_i8_zero_test:
  ; X32:       # BB#0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X32-NEXT:    movb $8, %al
-; X32-NEXT:    testb %cl, %cl
-; X32-NEXT:    je .LBB8_2
-; X32-NEXT:  # BB#1: # %cond.false
-; X32-NEXT:    movzbl %cl, %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-NEXT:    testb %al, %al
+; X32-NEXT:    je .LBB8_1
+; X32-NEXT:  # BB#2: # %cond.false
+; X32-NEXT:    movzbl %al, %eax
  ; X32-NEXT:    bsrl %eax, %eax
  ; X32-NEXT:    xorl $7, %eax
-; X32-NEXT:  .LBB8_2: # %cond.end
+; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X32-NEXT:    retl
+; X32-NEXT:  .LBB8_1:
+; X32-NEXT:    movb    $8, %al
  ; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: ctlz_i8_zero_test:
  ; X64:       # BB#0:
-; X64-NEXT:    movb $8, %al
  ; X64-NEXT:    testb %dil, %dil
-; X64-NEXT:    je .LBB8_2
-; X64-NEXT:  # BB#1: # %cond.false
+; X64-NEXT:    je .LBB8_1
+; X64-NEXT:  # BB#2: # %cond.false
  ; X64-NEXT:    movzbl %dil, %eax
  ; X64-NEXT:    bsrl %eax, %eax
  ; X64-NEXT:    xorl $7, %eax
-; X64-NEXT:  .LBB8_2: # %cond.end
+; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB8_1:
+; X64-NEXT:    movb    $8, %al
  ; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
  ; X64-NEXT:    retq
  ;
@@ -327,26 +331,30 @@ define i8 @ctlz_i8_zero_test(i8 %n) {
  define i16 @ctlz_i16_zero_test(i16 %n) {
  ; X32-LABEL: ctlz_i16_zero_test:
  ; X32:       # BB#0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movw $16, %ax
-; X32-NEXT:    testw %cx, %cx
-; X32-NEXT:    je .LBB9_2
-; X32-NEXT:  # BB#1: # %cond.false
-; X32-NEXT:    bsrw %cx, %ax
+; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    testw %ax, %ax
+; X32-NEXT:    je .LBB9_1
+; X32-NEXT:  # BB#2: # %cond.false
+; X32-NEXT:    bsrw %ax, %ax
  ; X32-NEXT:    xorl $15, %eax
-; X32-NEXT:  .LBB9_2: # %cond.end
+; X32-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
+; X32-NEXT:    retl
+; X32-NEXT:  .LBB9_1:
+; X32-NEXT:    movw    $16, %ax
  ; X32-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: ctlz_i16_zero_test:
  ; X64:       # BB#0:
-; X64-NEXT:    movw $16, %ax
  ; X64-NEXT:    testw %di, %di
-; X64-NEXT:    je .LBB9_2
-; X64-NEXT:  # BB#1: # %cond.false
+; X64-NEXT:    je .LBB9_1
+; X64-NEXT:  # BB#2: # %cond.false
  ; X64-NEXT:    bsrw %di, %ax
  ; X64-NEXT:    xorl $15, %eax
-; X64-NEXT:  .LBB9_2: # %cond.end
+; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB9_1:
+; X64-NEXT:    movw $16, %ax
  ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
  ; X64-NEXT:    retq
  ;
@@ -367,25 +375,27 @@ define i16 @ctlz_i16_zero_test(i16 %n) {
  define i32 @ctlz_i32_zero_test(i32 %n) {
  ; X32-LABEL: ctlz_i32_zero_test:
  ; X32:       # BB#0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $32, %eax
-; X32-NEXT:    testl %ecx, %ecx
-; X32-NEXT:    je .LBB10_2
-; X32-NEXT:  # BB#1: # %cond.false
-; X32-NEXT:    bsrl %ecx, %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    testl %eax, %eax
+; X32-NEXT:    je .LBB10_1
+; X32-NEXT:  # BB#2: # %cond.false
+; X32-NEXT:    bsrl %eax, %eax
  ; X32-NEXT:    xorl $31, %eax
-; X32-NEXT:  .LBB10_2: # %cond.end
+; X32-NEXT:    retl
+; X32-NEXT:  .LBB10_1:
+; X32-NEXT:    movl    $32, %eax
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: ctlz_i32_zero_test:
  ; X64:       # BB#0:
-; X64-NEXT:    movl $32, %eax
  ; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB10_2
-; X64-NEXT:  # BB#1: # %cond.false
+; X64-NEXT:    je .LBB10_1
+; X64-NEXT:  # BB#2: # %cond.false
  ; X64-NEXT:    bsrl %edi, %eax
  ; X64-NEXT:    xorl $31, %eax
-; X64-NEXT:  .LBB10_2: # %cond.end
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB10_1:
+; X64-NEXT:    movl $32, %eax
  ; X64-NEXT:    retq
  ;
  ; X32-CLZ-LABEL: ctlz_i32_zero_test:
@@ -464,26 +474,30 @@ define i64 @ctlz_i64_zero_test(i64 %n) {
  define i8 @cttz_i8_zero_test(i8 %n) {
  ; X32-LABEL: cttz_i8_zero_test:
  ; X32:       # BB#0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X32-NEXT:    movb $8, %al
-; X32-NEXT:    testb %cl, %cl
-; X32-NEXT:    je .LBB12_2
-; X32-NEXT:  # BB#1: # %cond.false
-; X32-NEXT:    movzbl %cl, %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-NEXT:    testb %al, %al
+; X32-NEXT:    je .LBB12_1
+; X32-NEXT:  # BB#2: # %cond.false
+; X32-NEXT:    movzbl %al, %eax
  ; X32-NEXT:    bsfl %eax, %eax
-; X32-NEXT:  .LBB12_2: # %cond.end
+; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X32-NEXT:    retl
+; X32-NEXT:  .LBB12_1
+; X32-NEXT:    movb $8, %al
  ; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: cttz_i8_zero_test:
  ; X64:       # BB#0:
-; X64-NEXT:    movb $8, %al
  ; X64-NEXT:    testb %dil, %dil
-; X64-NEXT:    je .LBB12_2
-; X64-NEXT:  # BB#1: # %cond.false
+; X64-NEXT:    je .LBB12_1
+; X64-NEXT:  # BB#2: # %cond.false
  ; X64-NEXT:    movzbl %dil, %eax
  ; X64-NEXT:    bsfl %eax, %eax
-; X64-NEXT:  .LBB12_2: # %cond.end
+; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB12_1:
+; X64-NEXT:    movb $8, %al
  ; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
  ; X64-NEXT:    retq
  ;
@@ -510,23 +524,25 @@ define i8 @cttz_i8_zero_test(i8 %n) {
  define i16 @cttz_i16_zero_test(i16 %n) {
  ; X32-LABEL: cttz_i16_zero_test:
  ; X32:       # BB#0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    testw %ax, %ax
+; X32-NEXT:    je .LBB13_1
+; X32-NEXT:  # BB#2: # %cond.false
+; X32-NEXT:    bsfw %ax, %ax
+; X32-NEXT:    retl
+; X32-NEXT:  .LBB13_1
  ; X32-NEXT:    movw $16, %ax
-; X32-NEXT:    testw %cx, %cx
-; X32-NEXT:    je .LBB13_2
-; X32-NEXT:  # BB#1: # %cond.false
-; X32-NEXT:    bsfw %cx, %ax
-; X32-NEXT:  .LBB13_2: # %cond.end
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: cttz_i16_zero_test:
  ; X64:       # BB#0:
-; X64-NEXT:    movw $16, %ax
  ; X64-NEXT:    testw %di, %di
-; X64-NEXT:    je .LBB13_2
-; X64-NEXT:  # BB#1: # %cond.false
+; X64-NEXT:    je .LBB13_1
+; X64-NEXT:  # BB#2: # %cond.false
  ; X64-NEXT:    bsfw %di, %ax
-; X64-NEXT:  .LBB13_2: # %cond.end
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB13_1:
+; X64-NEXT:    movw $16, %ax
  ; X64-NEXT:    retq
  ;
  ; X32-CLZ-LABEL: cttz_i16_zero_test:
@@ -546,23 +562,25 @@ define i16 @cttz_i16_zero_test(i16 %n) {
  define i32 @cttz_i32_zero_test(i32 %n) {
  ; X32-LABEL: cttz_i32_zero_test:
  ; X32:       # BB#0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    testl %eax, %eax
+; X32-NEXT:    je .LBB14_1
+; X32-NEXT:  # BB#2: # %cond.false
+; X32-NEXT:    bsfl %eax, %eax
+; X32-NEXT:    retl
+; X32-NEXT:  .LBB14_1
  ; X32-NEXT:    movl $32, %eax
-; X32-NEXT:    testl %ecx, %ecx
-; X32-NEXT:    je .LBB14_2
-; X32-NEXT:  # BB#1: # %cond.false
-; X32-NEXT:    bsfl %ecx, %eax
-; X32-NEXT:  .LBB14_2: # %cond.end
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: cttz_i32_zero_test:
  ; X64:       # BB#0:
-; X64-NEXT:    movl $32, %eax
  ; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB14_2
-; X64-NEXT:  # BB#1: # %cond.false
+; X64-NEXT:    je .LBB14_1
+; X64-NEXT:  # BB#2: # %cond.false
  ; X64-NEXT:    bsfl %edi, %eax
-; X64-NEXT:  .LBB14_2: # %cond.end
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB14_1:
+; X64-NEXT:    movl $32, %eax
  ; X64-NEXT:    retq
  ;
  ; X32-CLZ-LABEL: cttz_i32_zero_test:
@@ -642,25 +660,27 @@ define i64 @cttz_i64_zero_test(i64 %n) {
  define i32 @ctlz_i32_fold_cmov(i32 %n) {
  ; X32-LABEL: ctlz_i32_fold_cmov:
  ; X32:       # BB#0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    orl $1, %ecx
-; X32-NEXT:    movl $32, %eax
-; X32-NEXT:    je .LBB16_2
-; X32-NEXT:  # BB#1: # %cond.false
-; X32-NEXT:    bsrl %ecx, %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    orl $1, %eax
+; X32-NEXT:    je .LBB16_1
+; X32-NEXT:  # BB#2: # %cond.false
+; X32-NEXT:    bsrl %eax, %eax
  ; X32-NEXT:    xorl $31, %eax
-; X32-NEXT:  .LBB16_2: # %cond.end
+; X32-NEXT:    retl
+; X32-NEXT:  .LBB16_1
+; X32-NEXT:    movl $32, %eax
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: ctlz_i32_fold_cmov:
  ; X64:       # BB#0:
  ; X64-NEXT:    orl $1, %edi
-; X64-NEXT:    movl $32, %eax
-; X64-NEXT:    je .LBB16_2
-; X64-NEXT:  # BB#1: # %cond.false
+; X64-NEXT:    je .LBB16_1
+; X64-NEXT:  # BB#2: # %cond.false
  ; X64-NEXT:    bsrl %edi, %eax
  ; X64-NEXT:    xorl $31, %eax
-; X64-NEXT:  .LBB16_2: # %cond.end
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB16_1:
+; X64-NEXT:    movl $32, %eax
  ; X64-NEXT:    retq
  ;
  ; X32-CLZ-LABEL: ctlz_i32_fold_cmov:
@@ -716,26 +736,30 @@ define i32 @ctlz_bsr(i32 %n) {
  define i32 @ctlz_bsr_zero_test(i32 %n) {
  ; X32-LABEL: ctlz_bsr_zero_test:
  ; X32:       # BB#0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $32, %eax
-; X32-NEXT:    testl %ecx, %ecx
-; X32-NEXT:    je .LBB18_2
-; X32-NEXT:  # BB#1: # %cond.false
-; X32-NEXT:    bsrl %ecx, %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    testl %eax, %eax
+; X32-NEXT:    je .LBB18_1
+; X32-NEXT:  # BB#2: # %cond.false
+; X32-NEXT:    bsrl %eax, %eax
  ; X32-NEXT:    xorl $31, %eax
-; X32-NEXT:  .LBB18_2: # %cond.end
+; X32-NEXT:    xorl $31, %eax
+; X32-NEXT:    retl
+; X32-NEXT:  .LBB18_1:
+; X32-NEXT:    movl $32, %eax
  ; X32-NEXT:    xorl $31, %eax
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: ctlz_bsr_zero_test:
  ; X64:       # BB#0:
-; X64-NEXT:    movl $32, %eax
  ; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB18_2
-; X64-NEXT:  # BB#1: # %cond.false
+; X64-NEXT:    je .LBB18_1
+; X64-NEXT:  # BB#2: # %cond.false
  ; X64-NEXT:    bsrl %edi, %eax
  ; X64-NEXT:    xorl $31, %eax
-; X64-NEXT:  .LBB18_2: # %cond.end
+; X64-NEXT:    xorl $31, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB18_1:
+; X64-NEXT:    movl $32, %eax
  ; X64-NEXT:    xorl $31, %eax
  ; X64-NEXT:    retq
  ;
diff --git a/llvm/test/CodeGen/X86/loop-search.ll b/llvm/test/CodeGen/X86/loop-search.ll

index 99c21ae..6b29a72 100644 (file)
--- a/llvm/test/CodeGen/X86/loop-search.ll
+++ b/llvm/test/CodeGen/X86/loop-search.ll
@@ -10,19 +10,17 @@ define zeroext i1 @search(i32 %needle, i32* nocapture readonly %haystack, i32 %c
  ; CHECK-NEXT:    testl %edx, %edx
  ; CHECK-NEXT:    jle LBB0_1
  ; CHECK-NEXT:  ## BB#4: ## %for.body.preheader
-; CHECK-NEXT:    movslq %edx, %rcx
-; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    movslq %edx, %rax
+; CHECK-NEXT:    xorl %ecx, %ecx
  ; CHECK-NEXT:    .p2align 4, 0x90
  ; CHECK-NEXT:  LBB0_5: ## %for.body
  ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-;            ### FIXME: This loop invariant should be hoisted
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    cmpl %edi, (%rsi,%rdx,4)
+; CHECK-NEXT:    cmpl %edi, (%rsi,%rcx,4)
  ; CHECK-NEXT:    je LBB0_6
  ; CHECK-NEXT:  ## BB#2: ## %for.cond
  ; CHECK-NEXT:    ## in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT:    incq %rdx
-; CHECK-NEXT:    cmpq %rcx, %rdx
+; CHECK-NEXT:    incq %rcx
+; CHECK-NEXT:    cmpq %rax, %rcx
  ; CHECK-NEXT:    jl LBB0_5
  ;            ### FIXME: BB#3 and LBB0_1 should be merged
  ; CHECK-NEXT:  ## BB#3:
@@ -33,7 +31,8 @@ define zeroext i1 @search(i32 %needle, i32* nocapture readonly %haystack, i32 %c
  ; CHECK-NEXT:    xorl %eax, %eax
  ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
  ; CHECK-NEXT:    retq
-; CHECK-NEXT:  LBB0_6: ## %cleanup
+; CHECK-NEXT:  LBB0_6:
+; CHECK-NEXT:    movb $1, %al
  ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
  ; CHECK-NEXT:    retq
  ;
diff --git a/llvm/test/CodeGen/X86/machine-sink.ll b/llvm/test/CodeGen/X86/machine-sink.ll

new file mode 100644 (file)

index 0000000..8fc8984
--- /dev/null
+++ b/llvm/test/CodeGen/X86/machine-sink.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+; Checks if movl $1 is sinked to critical edge.
+; CHECK-NOT: movl $1
+; CHECK: jbe
+; CHECK: movl $1
+define i32 @test(i32 %n, i32 %k) nounwind  {
+entry:
+  %cmp = icmp ugt i32 %k, %n
+  br i1 %cmp, label %ifthen, label %ifend, !prof !1
+
+ifthen:
+  %y = add i32 %k, 2
+  br label %ifend
+
+ifend:
+  %ret = phi i32 [ 1, %entry ] , [ %y, %ifthen]
+  ret i32 %ret
+}
+
+!1 = !{!"branch_weights", i32 100, i32 1}
diff --git a/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll

index a02a4ae..13605b7 100644 (file)
--- a/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -14,7 +14,9 @@ forcond.preheader:            ; preds = %entry
  ifthen:                ; preds = %entry
         ret i32 0
  ; CHECK: forbody{{$}}
+; There should be no mov instruction in the for body.
  ; CHECK-NOT: mov
+; CHECK: jbe
  forbody:               ; preds = %forbody, %forcond.preheader
         %indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ]          ; <i32> [#uses=3]
         %accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]         ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/pr2659.ll b/llvm/test/CodeGen/X86/pr2659.ll

index debb13e..cc8f847 100644 (file)
--- a/llvm/test/CodeGen/X86/pr2659.ll
+++ b/llvm/test/CodeGen/X86/pr2659.ll
@@ -14,7 +14,7 @@ forcond.preheader:              ; preds = %entry
    br i1 %cmp44, label %afterfor, label %forbody
  
  ; CHECK: %forcond.preheader
-; CHECK: movl $1
+; CHECK: testl
  ; CHECK-NOT: xorl
  ; CHECK-NOT: movl
  ; CHECK-NOT: LBB
@@ -24,6 +24,7 @@ forcond.preheader:              ; preds = %entry
  ; CHECK: %forbody{{$}}
  ; CHECK-NOT: mov
  ; CHECK: jbe
+; CHECK: movl $1
  
  ifthen:         ; preds = %entry
    ret i32 0
diff --git a/llvm/test/DebugInfo/COFF/pieces.ll b/llvm/test/DebugInfo/COFF/pieces.ll

index 8863658..bd2551a 100644 (file)
--- a/llvm/test/DebugInfo/COFF/pieces.ll
+++ b/llvm/test/DebugInfo/COFF/pieces.ll
@@ -37,11 +37,11 @@
  ; ASM-LABEL: loop_csr: # @loop_csr
  ; ASM:        #DEBUG_VALUE: loop_csr:o [bit_piece offset=0 size=32] <- 0
  ; ASM:        #DEBUG_VALUE: loop_csr:o [bit_piece offset=32 size=32] <- 0
-; ASM: # BB#1:                                 # %for.body.preheader
+; ASM: # BB#2:                                 # %for.body.preheader
  ; ASM:         xorl    %edi, %edi
  ; ASM:         xorl    %esi, %esi
  ; ASM:         .p2align        4, 0x90
-; ASM: .LBB0_2:                                # %for.body
+; ASM: .LBB0_3:                                # %for.body
  ; ASM: [[ox_start:\.Ltmp[0-9]+]]:
  ; ASM:        #DEBUG_VALUE: loop_csr:o [bit_piece offset=0 size=32] <- %EDI
  ; ASM:        .cv_loc 0 1 13 11               # t.c:13:11
@@ -57,7 +57,7 @@
  ; ASM:         movl    %eax, %esi
  ; ASM:         #DEBUG_VALUE: loop_csr:o [bit_piece offset=32 size=32] <- %ESI
  ; ASM:         cmpl    n(%rip), %eax
-; ASM:         jl      .LBB0_2
+; ASM:         jl      .LBB0_3
  ; ASM: [[oy_end:\.Ltmp[0-9]+]]:
  ; ASM:         addl    %edi, %esi
  ; ASM:         movl    %esi, %eax
author	Dehao Chen <dehao@google.com>
	Thu, 20 Oct 2016 18:06:52 +0000 (18:06 +0000)
committer	Dehao Chen <dehao@google.com>
	Thu, 20 Oct 2016 18:06:52 +0000 (18:06 +0000)
llvm/lib/CodeGen/MachineSink.cpp		patch \| blob \| history
llvm/test/CodeGen/ARM/atomic-cmpxchg.ll		patch \| blob \| history
llvm/test/CodeGen/ARM/code-placement.ll		patch \| blob \| history
llvm/test/CodeGen/X86/block-placement.ll		patch \| blob \| history
llvm/test/CodeGen/X86/clz.ll		patch \| blob \| history
llvm/test/CodeGen/X86/loop-search.ll		patch \| blob \| history
llvm/test/CodeGen/X86/machine-sink.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll		patch \| blob \| history
llvm/test/CodeGen/X86/pr2659.ll		patch \| blob \| history
llvm/test/DebugInfo/COFF/pieces.ll		patch \| blob \| history