From 81f3fd4bf81247480d2fa172a65b04951e7a0d3e Mon Sep 17 00:00:00 2001 From: Guozhi Wei Date: Fri, 25 Jan 2019 19:45:13 +0000 Subject: [PATCH] [MBP] Don't move bottom block before header if it can't reduce taken branches If bottom of block BB has only one successor OldTop, in most cases it is profitable to move it before OldTop, except the following case: -->OldTop<- | . | | . | | . | ---Pred | | | BB----- Move BB before OldTop can't reduce the number of taken branches, this patch detects this case and prevent the moving. Differential Revision: https://reviews.llvm.org/D57067 llvm-svn: 352236 --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 38 ++++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll | 18 +++++----- llvm/test/CodeGen/PowerPC/licm-remat.ll | 4 +-- llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll | 31 +++++++++--------- llvm/test/CodeGen/X86/avx-cmp.ll | 9 +++-- llvm/test/CodeGen/X86/avx512-i1test.ll | 9 +++-- .../CodeGen/X86/code_placement_no_header_change.ll | 36 ++++++++++++++++++++ llvm/test/DebugInfo/X86/PR37234.ll | 17 +++++----- 8 files changed, 116 insertions(+), 46 deletions(-) create mode 100644 llvm/test/CodeGen/X86/code_placement_no_header_change.ll diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 797808e..a246717 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -451,6 +451,8 @@ class MachineBlockPlacement : public MachineFunctionPass { void buildChain(const MachineBasicBlock *BB, BlockChain &Chain, BlockFilterSet *BlockFilter = nullptr); + bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock, + const MachineBasicBlock *OldTop); MachineBasicBlock *findBestLoopTop( const MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit( @@ -1756,6 +1758,39 @@ void MachineBlockPlacement::buildChain( << getBlockName(*Chain.begin()) << "\n"); } +// If bottom of block BB has only one successor OldTop, in most cases it is +// profitable to move it before OldTop, except the following case: +// +// -->OldTop<- +// | . | +// | . | +// | . | +// ---Pred | +// | | +// BB----- +// +// If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't +// layout the other successor below it, so it can't reduce taken branch. +// In this case we keep its original layout. +bool +MachineBlockPlacement::canMoveBottomBlockToTop( + const MachineBasicBlock *BottomBlock, + const MachineBasicBlock *OldTop) { + if (BottomBlock->pred_size() != 1) + return true; + MachineBasicBlock *Pred = *BottomBlock->pred_begin(); + if (Pred->succ_size() != 2) + return true; + + MachineBasicBlock *OtherBB = *Pred->succ_begin(); + if (OtherBB == BottomBlock) + OtherBB = *Pred->succ_rbegin(); + if (OtherBB == OldTop) + return false; + + return true; +} + /// Find the best loop top block for layout. /// /// Look for a block which is strictly better than the loop header for laying @@ -1800,6 +1835,9 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, if (Pred->succ_size() > 1) continue; + if (!canMoveBottomBlockToTop(Pred, L.getHeader())) + continue; + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); if (!BestPred || PredFreq > BestPredFreq || (!(PredFreq < BestPredFreq) && diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll index 6215a48..ad68d30 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -96,20 +96,20 @@ declare float @llvm.fabs.f32(float) nounwind readnone ; FUNC-LABEL: {{^}}loop_land_info_assert: ; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}} ; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]] -; SI: s_mov_b64 vcc, [[CMP4M]] -; SI-NEXT: s_cbranch_vccnz [[CONVEX_EXIT:BB[0-9_]+]] -; SI-NEXT: s_branch [[FOR_COND_PREHDR:BB[0-9_]+]] +; SI: s_branch [[INFLOOP:BB[0-9]+_[0-9]+]] + +; SI: [[CONVEX_EXIT:BB[0-9_]+]] +; SI: s_mov_b64 vcc, +; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] +; SI: s_cbranch_vccnz [[INFLOOP]] ; SI: ; %if.else ; SI: buffer_store_dword -; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]: +; SI: [[INFLOOP]]: +; SI: s_cbranch_vccnz [[CONVEX_EXIT]] -; SI: [[CONVEX_EXIT]]: -; SI: s_mov_b64 vcc, -; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] -; SI: s_branch [[INFLOOP]] -; SI-NEXT: [[FOR_COND_PREHDR]]: +; SI: ; %for.cond.preheader ; SI: s_cbranch_vccz [[ENDPGM]] ; SI: [[ENDPGM]]: diff --git a/llvm/test/CodeGen/PowerPC/licm-remat.ll b/llvm/test/CodeGen/PowerPC/licm-remat.ll index e72a8b0..045f7a4 100644 --- a/llvm/test/CodeGen/PowerPC/licm-remat.ll +++ b/llvm/test/CodeGen/PowerPC/licm-remat.ll @@ -24,8 +24,8 @@ define linkonce_odr void @ZN6snappyDecompressor_(%"class.snappy::SnappyDecompres ; CHECK-DAG: addi 25, 3, _ZN6snappy8internalL8wordmaskE@toc@l ; CHECK-DAG: addis 5, 2, _ZN6snappy8internalL10char_tableE@toc@ha ; CHECK-DAG: addi 24, 5, _ZN6snappy8internalL10char_tableE@toc@l -; CHECK: b .LBB0_2 -; CHECK: .LBB0_2: # %for.cond +; CHECK: b .[[LABEL1:[A-Z0-9_]+]] +; CHECK: .[[LABEL1]]: # %for.cond ; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL8wordmaskE@toc@ha ; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL10char_tableE@toc@ha ; CHECK: bctrl diff --git a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 25a7d05..1e6e24c 100644 --- a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -20,22 +20,7 @@ define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp { ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: orq $2097152, %r14 ## imm = 0x200000 ; CHECK-NEXT: andl $15728640, %r14d ## imm = 0xF00000 -; CHECK-NEXT: jmp LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_3: ## %bb.i -; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl 0, %eax -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsi2ssq %rax, %xmm0 -; CHECK-NEXT: movl 4, %eax -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2ssq %rax, %xmm1 -; CHECK-NEXT: movl 8, %eax -; CHECK-NEXT: xorps %xmm2, %xmm2 -; CHECK-NEXT: cvtsi2ssq %rax, %xmm2 -; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] -; CHECK-NEXT: movaps %xmm0, 0 ; CHECK-NEXT: LBB0_1: ## %bb4 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: xorl %eax, %eax @@ -50,7 +35,21 @@ define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp { ; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: cmpq $1048576, %r14 ## imm = 0x100000 ; CHECK-NEXT: jne LBB0_1 -; CHECK-NEXT: jmp LBB0_3 +; CHECK-NEXT: ## %bb.3: ## %bb.i +; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl 0, %eax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: cvtsi2ssq %rax, %xmm0 +; CHECK-NEXT: movl 4, %eax +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: cvtsi2ssq %rax, %xmm1 +; CHECK-NEXT: movl 8, %eax +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: cvtsi2ssq %rax, %xmm2 +; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] +; CHECK-NEXT: movaps %xmm0, 0 +; CHECK-NEXT: jmp LBB0_1 entry: br label %bb4 diff --git a/llvm/test/CodeGen/X86/avx-cmp.ll b/llvm/test/CodeGen/X86/avx-cmp.ll index 534a000..c789ec4 100644 --- a/llvm/test/CodeGen/X86/avx-cmp.ll +++ b/llvm/test/CodeGen/X86/avx-cmp.ll @@ -35,11 +35,7 @@ define void @render() nounwind { ; CHECK-NEXT: # %bb.1: # %for.cond5.preheader ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: movb $1, %bpl -; CHECK-NEXT: jmp .LBB2_2 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB2_5: # %if.then -; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: callq scale ; CHECK-NEXT: .LBB2_2: # %for.cond5 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb %bl, %bl @@ -52,7 +48,10 @@ define void @render() nounwind { ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vucomisd {{\.LCPI.*}}, %xmm0 ; CHECK-NEXT: jne .LBB2_5 -; CHECK-NEXT: jp .LBB2_5 +; CHECK-NEXT: jnp .LBB2_2 +; CHECK-NEXT: .LBB2_5: # %if.then +; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: callq scale ; CHECK-NEXT: jmp .LBB2_2 ; CHECK-NEXT: .LBB2_6: # %for.end52 ; CHECK-NEXT: addq $8, %rsp diff --git a/llvm/test/CodeGen/X86/avx512-i1test.ll b/llvm/test/CodeGen/X86/avx512-i1test.ll index 7cf86fe..108a908 100644 --- a/llvm/test/CodeGen/X86/avx512-i1test.ll +++ b/llvm/test/CodeGen/X86/avx512-i1test.ll @@ -15,16 +15,15 @@ define void @func() { ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_1: # %bb56 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_3: # %bb35 -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: .LBB0_2: # %bb33 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: # %bb.3: # %bb35 +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jmp .LBB0_2 bb1: br i1 undef, label %L_10, label %L_10 diff --git a/llvm/test/CodeGen/X86/code_placement_no_header_change.ll b/llvm/test/CodeGen/X86/code_placement_no_header_change.ll new file mode 100644 index 0000000..0275606 --- /dev/null +++ b/llvm/test/CodeGen/X86/code_placement_no_header_change.ll @@ -0,0 +1,36 @@ +; RUN: llc -mtriple=i686-linux < %s | FileCheck %s + + +define i32 @bar(i32 %count) { +; Test checks that basic block backedge2 is not moved before header, +; because it can't reduce taken branches. +; Later backedge1 and backedge2 is rotated before loop header. +; CHECK-LABEL: bar +; CHECK: %.entry +; CHECK: %.backedge1 +; CHECK: %.backedge2 +; CHECK: %.header +; CHECK: %.exit +.entry: + %c = shl nsw i32 %count, 2 + br label %.header + +.header: + %val1 = call i32 @foo() + %cond1 = icmp sgt i32 %val1, 1 + br i1 %cond1, label %.exit, label %.backedge1 + +.backedge1: + %val2 = call i32 @foo() + %cond2 = icmp sgt i32 %val2, 1 + br i1 %cond2, label %.header, label %.backedge2 + +.backedge2: + %val3 = call i32 @foo() + br label %.header + +.exit: + ret i32 %c +} + +declare i32 @foo() diff --git a/llvm/test/DebugInfo/X86/PR37234.ll b/llvm/test/DebugInfo/X86/PR37234.ll index 51fae94..6f73887 100644 --- a/llvm/test/DebugInfo/X86/PR37234.ll +++ b/llvm/test/DebugInfo/X86/PR37234.ll @@ -22,19 +22,18 @@ ; CHECK: #DEBUG_VALUE: main:aa <- 0 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG:[0-9a-z]+]] ; CHECK: jmp .LBB0_1 -; CHECK: .LBB0_3: -; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] -; CHECK: incl %[[REG]] -; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] +; CHECK: .LBB0_2: +; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] +; CHECK: jne .LBB0_1 +; CHECK: # %bb.{{.*}}: +; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] +; CHECK: incl %[[REG]] +; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] ; CHECK: .LBB0_1: ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] -; CHECK: je .LBB0_4 +; CHECK: jne .LBB0_2 ; CHECK: # %bb.{{.*}}: ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] -; CHECK: jne .LBB0_1 -; CHECK: jmp .LBB0_3 -; CHECK: .LBB0_4: -; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] ; CHECK: retq source_filename = "PR37234.cpp" -- 2.7.4