If bottom of block BB has only one successor OldTop, in most cases it is profitable to move it before OldTop, except the following case:
-->OldTop<-
| . |
| . |
| . |
---Pred |
| |
BB-----
Move BB before OldTop can't reduce the number of taken branches, this patch detects this case and prevent the moving.
Differential Revision: https://reviews.llvm.org/D57067
llvm-svn: 352236
void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
BlockFilterSet *BlockFilter = nullptr);
+ bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,
+ const MachineBasicBlock *OldTop);
MachineBasicBlock *findBestLoopTop(
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(
<< getBlockName(*Chain.begin()) << "\n");
}
+// If bottom of block BB has only one successor OldTop, in most cases it is
+// profitable to move it before OldTop, except the following case:
+//
+// -->OldTop<-
+// | . |
+// | . |
+// | . |
+// ---Pred |
+// | |
+// BB-----
+//
+// If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't
+// layout the other successor below it, so it can't reduce taken branch.
+// In this case we keep its original layout.
+bool
+MachineBlockPlacement::canMoveBottomBlockToTop(
+ const MachineBasicBlock *BottomBlock,
+ const MachineBasicBlock *OldTop) {
+ if (BottomBlock->pred_size() != 1)
+ return true;
+ MachineBasicBlock *Pred = *BottomBlock->pred_begin();
+ if (Pred->succ_size() != 2)
+ return true;
+
+ MachineBasicBlock *OtherBB = *Pred->succ_begin();
+ if (OtherBB == BottomBlock)
+ OtherBB = *Pred->succ_rbegin();
+ if (OtherBB == OldTop)
+ return false;
+
+ return true;
+}
+
/// Find the best loop top block for layout.
///
/// Look for a block which is strictly better than the loop header for laying
if (Pred->succ_size() > 1)
continue;
+ if (!canMoveBottomBlockToTop(Pred, L.getHeader()))
+ continue;
+
BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
if (!BestPred || PredFreq > BestPredFreq ||
(!(PredFreq < BestPredFreq) &&
; FUNC-LABEL: {{^}}loop_land_info_assert:
; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]]
-; SI: s_mov_b64 vcc, [[CMP4M]]
-; SI-NEXT: s_cbranch_vccnz [[CONVEX_EXIT:BB[0-9_]+]]
-; SI-NEXT: s_branch [[FOR_COND_PREHDR:BB[0-9_]+]]
+; SI: s_branch [[INFLOOP:BB[0-9]+_[0-9]+]]
+
+; SI: [[CONVEX_EXIT:BB[0-9_]+]]
+; SI: s_mov_b64 vcc,
+; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
+; SI: s_cbranch_vccnz [[INFLOOP]]
; SI: ; %if.else
; SI: buffer_store_dword
-; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]:
+; SI: [[INFLOOP]]:
+; SI: s_cbranch_vccnz [[CONVEX_EXIT]]
-; SI: [[CONVEX_EXIT]]:
-; SI: s_mov_b64 vcc,
-; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
-; SI: s_branch [[INFLOOP]]
-; SI-NEXT: [[FOR_COND_PREHDR]]:
+; SI: ; %for.cond.preheader
; SI: s_cbranch_vccz [[ENDPGM]]
; SI: [[ENDPGM]]:
; CHECK-DAG: addi 25, 3, _ZN6snappy8internalL8wordmaskE@toc@l
; CHECK-DAG: addis 5, 2, _ZN6snappy8internalL10char_tableE@toc@ha
; CHECK-DAG: addi 24, 5, _ZN6snappy8internalL10char_tableE@toc@l
-; CHECK: b .LBB0_2
-; CHECK: .LBB0_2: # %for.cond
+; CHECK: b .[[LABEL1:[A-Z0-9_]+]]
+; CHECK: .[[LABEL1]]: # %for.cond
; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL8wordmaskE@toc@ha
; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL10char_tableE@toc@ha
; CHECK: bctrl
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: orq $2097152, %r14 ## imm = 0x200000
; CHECK-NEXT: andl $15728640, %r14d ## imm = 0xF00000
-; CHECK-NEXT: jmp LBB0_1
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: LBB0_3: ## %bb.i
-; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: movl 0, %eax
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ssq %rax, %xmm0
-; CHECK-NEXT: movl 4, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ssq %rax, %xmm1
-; CHECK-NEXT: movl 8, %eax
-; CHECK-NEXT: xorps %xmm2, %xmm2
-; CHECK-NEXT: cvtsi2ssq %rax, %xmm2
-; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
-; CHECK-NEXT: movaps %xmm0, 0
; CHECK-NEXT: LBB0_1: ## %bb4
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: cmpq $1048576, %r14 ## imm = 0x100000
; CHECK-NEXT: jne LBB0_1
-; CHECK-NEXT: jmp LBB0_3
+; CHECK-NEXT: ## %bb.3: ## %bb.i
+; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: movl 0, %eax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ssq %rax, %xmm0
+; CHECK-NEXT: movl 4, %eax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2ssq %rax, %xmm1
+; CHECK-NEXT: movl 8, %eax
+; CHECK-NEXT: xorps %xmm2, %xmm2
+; CHECK-NEXT: cvtsi2ssq %rax, %xmm2
+; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
+; CHECK-NEXT: movaps %xmm0, 0
+; CHECK-NEXT: jmp LBB0_1
entry:
br label %bb4
; CHECK-NEXT: # %bb.1: # %for.cond5.preheader
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: movb $1, %bpl
-; CHECK-NEXT: jmp .LBB2_2
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB2_5: # %if.then
-; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT: callq scale
; CHECK-NEXT: .LBB2_2: # %for.cond5
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: vucomisd {{\.LCPI.*}}, %xmm0
; CHECK-NEXT: jne .LBB2_5
-; CHECK-NEXT: jp .LBB2_5
+; CHECK-NEXT: jnp .LBB2_2
+; CHECK-NEXT: .LBB2_5: # %if.then
+; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT: callq scale
; CHECK-NEXT: jmp .LBB2_2
; CHECK-NEXT: .LBB2_6: # %for.end52
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB0_1: # %bb56
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: jmp .LBB0_2
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_3: # %bb35
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: .LBB0_2: # %bb33
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_2
-; CHECK-NEXT: jmp .LBB0_3
+; CHECK-NEXT: # %bb.3: # %bb35
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jmp .LBB0_2
bb1:
br i1 undef, label %L_10, label %L_10
--- /dev/null
+; RUN: llc -mtriple=i686-linux < %s | FileCheck %s
+
+
+define i32 @bar(i32 %count) {
+; Test checks that basic block backedge2 is not moved before header,
+; because it can't reduce taken branches.
+; Later backedge1 and backedge2 is rotated before loop header.
+; CHECK-LABEL: bar
+; CHECK: %.entry
+; CHECK: %.backedge1
+; CHECK: %.backedge2
+; CHECK: %.header
+; CHECK: %.exit
+.entry:
+ %c = shl nsw i32 %count, 2
+ br label %.header
+
+.header:
+ %val1 = call i32 @foo()
+ %cond1 = icmp sgt i32 %val1, 1
+ br i1 %cond1, label %.exit, label %.backedge1
+
+.backedge1:
+ %val2 = call i32 @foo()
+ %cond2 = icmp sgt i32 %val2, 1
+ br i1 %cond2, label %.header, label %.backedge2
+
+.backedge2:
+ %val3 = call i32 @foo()
+ br label %.header
+
+.exit:
+ ret i32 %c
+}
+
+declare i32 @foo()
; CHECK: #DEBUG_VALUE: main:aa <- 0
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG:[0-9a-z]+]]
; CHECK: jmp .LBB0_1
-; CHECK: .LBB0_3:
-; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
-; CHECK: incl %[[REG]]
-; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
+; CHECK: .LBB0_2:
+; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
+; CHECK: jne .LBB0_1
+; CHECK: # %bb.{{.*}}:
+; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
+; CHECK: incl %[[REG]]
+; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
; CHECK: .LBB0_1:
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
-; CHECK: je .LBB0_4
+; CHECK: jne .LBB0_2
; CHECK: # %bb.{{.*}}:
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
-; CHECK: jne .LBB0_1
-; CHECK: jmp .LBB0_3
-; CHECK: .LBB0_4:
-; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
; CHECK: retq
source_filename = "PR37234.cpp"