From 214997c63bc170b7859dcd03c551d6feaacb2dc2 Mon Sep 17 00:00:00 2001 From: Daniel Jasper Date: Fri, 20 Mar 2015 10:00:37 +0000 Subject: [PATCH] [MBP] Don't outline short optional branches With the option -outline-optional-branches, LLVM will place optional branches out of line (more details on r231230). With this patch, this is not done for short optional branches. A short optional branch is a branch containing a single block with an instruction count below a certain threshold (defaulting to 3). Still everything is guarded under -outline-optional-branches). Outlining a short branch can't significantly improve code locality. It can however decrease performance because of the additional jmp and in cases where the optional branch is hot. This fixes a compile time regression I have observed in a benchmark. Review: http://reviews.llvm.org/D8108 llvm-svn: 232802 --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 27 ++++++++++++++++++-- .../code_placement_outline_optional_branches.ll | 29 +++++++++++++++++++++- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 06cc7ba..86a4922 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -74,6 +74,12 @@ static cl::opt OutlineOptionalBranches( "post dominator, out of line."), cl::init(false), cl::Hidden); +static cl::opt OutlineOptionalThreshold( + "outline-optional-threshold", + cl::desc("Don't outline optional branches that are a single block with an " + "instruction count below this threshold"), + cl::init(4), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -377,8 +383,25 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, // dominates all terminators of the MachineFunction. If it does, other // successors must be optional. Don't do this for cold branches. if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() && - UnavoidableBlocks.count(Succ) > 0) - return Succ; + UnavoidableBlocks.count(Succ) > 0) { + auto HasShortOptionalBranch = [&]() { + for (MachineBasicBlock *Pred : Succ->predecessors()) { + // Check whether there is an unplaced optional branch. + if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || + BlockToChain[Pred] == &Chain) + continue; + // Check whether the optional branch has exactly one BB. + if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB) + continue; + // Check whether the optional branch is small. + if (Pred->size() < OutlineOptionalThreshold) + return true; + } + return false; + }; + if (!HasShortOptionalBranch()) + return Succ; + } // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. diff --git a/llvm/test/CodeGen/X86/code_placement_outline_optional_branches.ll b/llvm/test/CodeGen/X86/code_placement_outline_optional_branches.ll index 18c3d99..3364915 100644 --- a/llvm/test/CodeGen/X86/code_placement_outline_optional_branches.ll +++ b/llvm/test/CodeGen/X86/code_placement_outline_optional_branches.ll @@ -1,20 +1,30 @@ ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -outline-optional-branches < %s | FileCheck %s -check-prefix=CHECK-OUTLINE -define void @foo(i32 %t1, i32 %t2) { +define void @foo(i32 %t1, i32 %t2, i32 %t3) { ; Test that we lift the call to 'c' up to immediately follow the call to 'b' ; when we disable the cfg conflict check. ; ; CHECK-LABEL: foo: ; CHECK: callq a +; CHECK: callq a +; CHECK: callq a +; CHECK: callq a ; CHECK: callq b ; CHECK: callq c ; CHECK: callq d +; CHECK: callq e +; CHECK: callq f ; ; CHECK-OUTLINE-LABEL: foo: ; CHECK-OUTLINE: callq b ; CHECK-OUTLINE: callq c ; CHECK-OUTLINE: callq d +; CHECK-OUTLINE: callq e +; CHECK-OUTLINE: callq f +; CHECK-OUTLINE: callq a +; CHECK-OUTLINE: callq a +; CHECK-OUTLINE: callq a ; CHECK-OUTLINE: callq a entry: @@ -23,6 +33,9 @@ entry: if.then: call void @a() + call void @a() + call void @a() + call void @a() br label %if.end if.end: @@ -39,6 +52,18 @@ if.then2: if.end2: call void @d() + br label %shortbranch + +shortbranch: + %cmp3 = icmp eq i32 %t3, 0 + br i1 %cmp3, label %if.then3, label %if.end3 + +if.then3: + call void @e() + br label %if.end3 + +if.end3: + call void @f() ret void } @@ -46,5 +71,7 @@ declare void @a() declare void @b() declare void @c() declare void @d() +declare void @e() +declare void @f() !1 = !{!"branch_weights", i32 64, i32 4} -- 2.7.4