#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
cl::desc("Use block frequency info to find successors to sink"),
cl::init(true), cl::Hidden);
+static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
+ "machine-sink-split-probability-threshold",
+ cl::desc(
+ "Percentage threshold for splitting single-instruction critical edge. "
+ "If the branch threshold is higher than this threshold, we allow "
+ "speculative execution of up to 1 instruction to avoid branching to "
+ "splitted critical edge"),
+ cl::init(40), cl::Hidden);
+
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
MachinePostDominatorTree *PDT; // Machine post dominator tree
MachineLoopInfo *LI;
const MachineBlockFrequencyInfo *MBFI;
+ const MachineBranchProbabilityInfo *MBPI;
AliasAnalysis *AA;
// Remember which edges have been considered for breaking.
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachinePostDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
PDT = &getAnalysis<MachinePostDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool EverMadeChange = false;
if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
return true;
+ if (MBPI->getEdgeProbability(From, To) <=
+ BranchProbability(SplitEdgeProbabilityThreshold, 100))
+ return true;
+
// MI is cheap, we probably don't want to break the critical edge for it.
// However, if this would allow some definitions of its source operands
// to be sunk then it's probably worth it.
; CHECK-ARMV6-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
; CHECK-ARMV6-NEXT: [[TRY:.LBB[0-9_]+]]:
; CHECK-ARMV6-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
-; CHECK-ARMV6-NEXT: mov [[RES:r[0-9]+]], #0
; CHECK-ARMV6-NEXT: cmp [[LD]], [[DESIRED]]
-; CHECK-ARMV6-NEXT: bne [[END:.LBB[0-9_]+]]
+; CHECK-ARMV6-NEXT: movne [[RES:r[0-9]+]], #0
+; CHECK-ARMV6-NEXT: bxne lr
; CHECK-ARMV6-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
-; CHECK-ARMV6-NEXT: mov [[RES]], #1
; CHECK-ARMV6-NEXT: cmp [[SUCCESS]], #0
-; CHECK-ARMV6-NEXT: bne [[TRY]]
-; CHECK-ARMV6-NEXT: [[END]]:
-; CHECK-ARMV6-NEXT: mov r0, [[RES]]
-; CHECK-ARMV6-NEXT: bx lr
+; CHECK-ARMV6-NEXT: moveq [[RES]], #1
+; CHECK-ARMV6-NEXT: bxeq lr
+; CHECK-ARMV6-NEXT: b [[TRY]]
; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8:
; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1
; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
; CHECK-ARMV7-NEXT: .fnstart
; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
-; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
-; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
-; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
-; CHECK-ARMV7-NEXT: bne [[FAIL:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: b [[TRY:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]:
; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
-; CHECK-ARMV7-NEXT: mov [[RES:r[0-9]+]], #1
; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
-; CHECK-ARMV7-NEXT: bne [[TRY]]
-; CHECK-ARMV7-NEXT: b [[END:.LBB[0-9_]+]]
-; CHECK-ARMV7-NEXT: [[FAIL]]:
+; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1
+; CHECK-ARMV7-NEXT: bxeq lr
+; CHECK-ARMV7-NEXT: [[TRY]]:
+; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-ARMV7-NEXT: beq [[HEAD]]
; CHECK-ARMV7-NEXT: clrex
; CHECK-ARMV7-NEXT: mov [[RES]], #0
-; CHECK-ARMV7-NEXT: [[END]]:
-; CHECK-ARMV7-NEXT: mov r0, [[RES]]
; CHECK-ARMV7-NEXT: bx lr
; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
br i1 %0, label %bb2, label %bb
bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_[[LABEL:[0-9]]]:
+; CHECK: bne LBB0_[[LABEL]]
+; CHECK-NOT: b LBB0_[[LABEL]]
; CHECK: bx lr
%list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
%next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
entry:
; CHECK-LABEL: t2:
-; CHECK: beq LBB1_[[RET:.]]
%0 = icmp eq i32 %passes, 0 ; <i1> [#uses=1]
br i1 %0, label %bb5, label %bb.nph15
-; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
bb1: ; preds = %bb2.preheader, %bb1
-; CHECK: LBB1_[[BB1:.]]: @ %bb1
-; CHECK: bne LBB1_[[BB1]]
+; CHECK: LBB1_[[BB3:.]]: @ %bb3
+; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
+; CHECK: blt LBB1_[[BB3]]
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
%sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
%tmp17 = sub i32 %i.07, %indvar ; <i32> [#uses=1]
br i1 %exitcond, label %bb3, label %bb1
bb3: ; preds = %bb1, %bb2.preheader
-; CHECK: LBB1_[[BB3:.]]: @ %bb3
-; CHECK: bne LBB1_[[PREHDR]]
-; CHECK-NOT: b LBB1_
+; CHECK: LBB1_[[BB1:.]]: @ %bb1
+; CHECK: bne LBB1_[[BB1]]
+; CHECK: b LBB1_[[BB3]]
%sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
%3 = add i32 %pass.011, 1 ; <i32> [#uses=2]
%exitcond18 = icmp eq i32 %3, %passes ; <i1> [#uses=1]
%sum.110 = phi i32 [ 0, %bb.nph15 ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=2]
br i1 %4, label %bb1, label %bb3
-; CHECK: LBB1_[[RET]]: @ %bb5
-; CHECK: pop
bb5: ; preds = %bb3, %entry
%sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=1]
ret i32 %sum.1.lcssa
define i8 @ctlz_i8_zero_test(i8 %n) {
; X32-LABEL: ctlz_i8_zero_test:
; X32: # BB#0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X32-NEXT: movb $8, %al
-; X32-NEXT: testb %cl, %cl
-; X32-NEXT: je .LBB8_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: movzbl %cl, %eax
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: testb %al, %al
+; X32-NEXT: je .LBB8_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: bsrl %eax, %eax
; X32-NEXT: xorl $7, %eax
-; X32-NEXT: .LBB8_2: # %cond.end
+; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; X32-NEXT: retl
+; X32-NEXT: .LBB8_1:
+; X32-NEXT: movb $8, %al
; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-NEXT: retl
;
; X64-LABEL: ctlz_i8_zero_test:
; X64: # BB#0:
-; X64-NEXT: movb $8, %al
; X64-NEXT: testb %dil, %dil
-; X64-NEXT: je .LBB8_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB8_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: bsrl %eax, %eax
; X64-NEXT: xorl $7, %eax
-; X64-NEXT: .LBB8_2: # %cond.end
+; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; X64-NEXT: retq
+; X64-NEXT: .LBB8_1:
+; X64-NEXT: movb $8, %al
; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-NEXT: retq
;
define i16 @ctlz_i16_zero_test(i16 %n) {
; X32-LABEL: ctlz_i16_zero_test:
; X32: # BB#0:
-; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movw $16, %ax
-; X32-NEXT: testw %cx, %cx
-; X32-NEXT: je .LBB9_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsrw %cx, %ax
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testw %ax, %ax
+; X32-NEXT: je .LBB9_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsrw %ax, %ax
; X32-NEXT: xorl $15, %eax
-; X32-NEXT: .LBB9_2: # %cond.end
+; X32-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X32-NEXT: retl
+; X32-NEXT: .LBB9_1:
+; X32-NEXT: movw $16, %ax
; X32-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X32-NEXT: retl
;
; X64-LABEL: ctlz_i16_zero_test:
; X64: # BB#0:
-; X64-NEXT: movw $16, %ax
; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB9_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB9_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsrw %di, %ax
; X64-NEXT: xorl $15, %eax
-; X64-NEXT: .LBB9_2: # %cond.end
+; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT: retq
+; X64-NEXT: .LBB9_1:
+; X64-NEXT: movw $16, %ax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
;
define i32 @ctlz_i32_zero_test(i32 %n) {
; X32-LABEL: ctlz_i32_zero_test:
; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl $32, %eax
-; X32-NEXT: testl %ecx, %ecx
-; X32-NEXT: je .LBB10_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsrl %ecx, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: je .LBB10_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsrl %eax, %eax
; X32-NEXT: xorl $31, %eax
-; X32-NEXT: .LBB10_2: # %cond.end
+; X32-NEXT: retl
+; X32-NEXT: .LBB10_1:
+; X32-NEXT: movl $32, %eax
; X32-NEXT: retl
;
; X64-LABEL: ctlz_i32_zero_test:
; X64: # BB#0:
-; X64-NEXT: movl $32, %eax
; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB10_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB10_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsrl %edi, %eax
; X64-NEXT: xorl $31, %eax
-; X64-NEXT: .LBB10_2: # %cond.end
+; X64-NEXT: retq
+; X64-NEXT: .LBB10_1:
+; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
;
; X32-CLZ-LABEL: ctlz_i32_zero_test:
define i8 @cttz_i8_zero_test(i8 %n) {
; X32-LABEL: cttz_i8_zero_test:
; X32: # BB#0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X32-NEXT: movb $8, %al
-; X32-NEXT: testb %cl, %cl
-; X32-NEXT: je .LBB12_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: movzbl %cl, %eax
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: testb %al, %al
+; X32-NEXT: je .LBB12_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: bsfl %eax, %eax
-; X32-NEXT: .LBB12_2: # %cond.end
+; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; X32-NEXT: retl
+; X32-NEXT: .LBB12_1
+; X32-NEXT: movb $8, %al
; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-NEXT: retl
;
; X64-LABEL: cttz_i8_zero_test:
; X64: # BB#0:
-; X64-NEXT: movb $8, %al
; X64-NEXT: testb %dil, %dil
-; X64-NEXT: je .LBB12_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB12_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: bsfl %eax, %eax
-; X64-NEXT: .LBB12_2: # %cond.end
+; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; X64-NEXT: retq
+; X64-NEXT: .LBB12_1:
+; X64-NEXT: movb $8, %al
; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-NEXT: retq
;
define i16 @cttz_i16_zero_test(i16 %n) {
; X32-LABEL: cttz_i16_zero_test:
; X32: # BB#0:
-; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testw %ax, %ax
+; X32-NEXT: je .LBB13_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsfw %ax, %ax
+; X32-NEXT: retl
+; X32-NEXT: .LBB13_1
; X32-NEXT: movw $16, %ax
-; X32-NEXT: testw %cx, %cx
-; X32-NEXT: je .LBB13_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsfw %cx, %ax
-; X32-NEXT: .LBB13_2: # %cond.end
; X32-NEXT: retl
;
; X64-LABEL: cttz_i16_zero_test:
; X64: # BB#0:
-; X64-NEXT: movw $16, %ax
; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB13_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB13_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsfw %di, %ax
-; X64-NEXT: .LBB13_2: # %cond.end
+; X64-NEXT: retq
+; X64-NEXT: .LBB13_1:
+; X64-NEXT: movw $16, %ax
; X64-NEXT: retq
;
; X32-CLZ-LABEL: cttz_i16_zero_test:
define i32 @cttz_i32_zero_test(i32 %n) {
; X32-LABEL: cttz_i32_zero_test:
; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: je .LBB14_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsfl %eax, %eax
+; X32-NEXT: retl
+; X32-NEXT: .LBB14_1
; X32-NEXT: movl $32, %eax
-; X32-NEXT: testl %ecx, %ecx
-; X32-NEXT: je .LBB14_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsfl %ecx, %eax
-; X32-NEXT: .LBB14_2: # %cond.end
; X32-NEXT: retl
;
; X64-LABEL: cttz_i32_zero_test:
; X64: # BB#0:
-; X64-NEXT: movl $32, %eax
; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB14_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB14_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsfl %edi, %eax
-; X64-NEXT: .LBB14_2: # %cond.end
+; X64-NEXT: retq
+; X64-NEXT: .LBB14_1:
+; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
;
; X32-CLZ-LABEL: cttz_i32_zero_test:
define i32 @ctlz_i32_fold_cmov(i32 %n) {
; X32-LABEL: ctlz_i32_fold_cmov:
; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: orl $1, %ecx
-; X32-NEXT: movl $32, %eax
-; X32-NEXT: je .LBB16_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsrl %ecx, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl $1, %eax
+; X32-NEXT: je .LBB16_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsrl %eax, %eax
; X32-NEXT: xorl $31, %eax
-; X32-NEXT: .LBB16_2: # %cond.end
+; X32-NEXT: retl
+; X32-NEXT: .LBB16_1
+; X32-NEXT: movl $32, %eax
; X32-NEXT: retl
;
; X64-LABEL: ctlz_i32_fold_cmov:
; X64: # BB#0:
; X64-NEXT: orl $1, %edi
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: je .LBB16_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB16_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsrl %edi, %eax
; X64-NEXT: xorl $31, %eax
-; X64-NEXT: .LBB16_2: # %cond.end
+; X64-NEXT: retq
+; X64-NEXT: .LBB16_1:
+; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
;
; X32-CLZ-LABEL: ctlz_i32_fold_cmov:
define i32 @ctlz_bsr_zero_test(i32 %n) {
; X32-LABEL: ctlz_bsr_zero_test:
; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl $32, %eax
-; X32-NEXT: testl %ecx, %ecx
-; X32-NEXT: je .LBB18_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsrl %ecx, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: je .LBB18_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsrl %eax, %eax
; X32-NEXT: xorl $31, %eax
-; X32-NEXT: .LBB18_2: # %cond.end
+; X32-NEXT: xorl $31, %eax
+; X32-NEXT: retl
+; X32-NEXT: .LBB18_1:
+; X32-NEXT: movl $32, %eax
; X32-NEXT: xorl $31, %eax
; X32-NEXT: retl
;
; X64-LABEL: ctlz_bsr_zero_test:
; X64: # BB#0:
-; X64-NEXT: movl $32, %eax
; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB18_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB18_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsrl %edi, %eax
; X64-NEXT: xorl $31, %eax
-; X64-NEXT: .LBB18_2: # %cond.end
+; X64-NEXT: xorl $31, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB18_1:
+; X64-NEXT: movl $32, %eax
; X64-NEXT: xorl $31, %eax
; X64-NEXT: retq
;