From d66607bd8cd137e6c90b46ac6d4979c6942ea427 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 Apr 2016 17:11:17 +0000 Subject: [PATCH] [CodeGenPrepare] use branch weight metadata to decide if a select should be turned into a branch This is part of solving PR27344: https://llvm.org/bugs/show_bug.cgi?id=27344 CGP should undo the SimplifyCFG transform for the same reason that earlier patches have used this same mechanism: it's possible that passes between SimplifyCFG and CGP may be able to optimize the IR further with a select in place. For the TLI hook default, >99% taken or not taken is chosen as the default threshold for a highly predictable branch. Even the most limited HW branch predictors will be correct on this branch almost all the time, so even a massive mispredict penalty perf loss would be overcome by the win from all the times the branch was predicted correctly. As a follow-up, we could make the default target hook less conservative by using the SchedMachineModel's MispredictPenalty. Or we could just let targets override the default by implementing the hook with that and other target-specific options. Note that trying to statically determine mispredict rates for close-to-balanced profile weight data is generally impossible if the HW is sufficiently advanced. Ie, 50/50 taken/not-taken might still be 100% predictable. Finally, note that this patch as-is will not solve PR27344 because the current __builtin_unpredictable() branch weight default values are 4 and 64. A proposal to change that is in D19435. Differential Revision: http://reviews.llvm.org/D19488 llvm-svn: 267572 --- llvm/include/llvm/IR/Instruction.h | 5 +++++ llvm/include/llvm/IR/Instructions.h | 5 ----- llvm/include/llvm/Target/TargetLowering.h | 5 +++++ llvm/lib/CodeGen/CodeGenPrepare.cpp | 32 ++++++++++++++++++++----------- llvm/lib/CodeGen/TargetLoweringBase.cpp | 15 +++++++++++++++ llvm/lib/IR/Instructions.cpp | 22 --------------------- llvm/lib/IR/Metadata.cpp | 24 +++++++++++++++++++++++ llvm/test/CodeGen/X86/cmov-into-branch.ll | 29 +++++++++++++++++++++++++--- 8 files changed, 96 insertions(+), 41 deletions(-) diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index e44b23d..ea262de 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -217,6 +217,11 @@ public: /// Sets the metadata on this instruction from the AAMDNodes structure. void setAAMetadata(const AAMDNodes &N); + /// Retrieve the raw weight values of a conditional branch or select. + /// Returns true on success with profile weights filled in. + /// Returns false if no metadata or invalid metadata was found. + bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal); + /// Set the debug location information for this instruction. void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc); } diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 43cb9a2..8ad86906 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -2906,11 +2906,6 @@ public: /// continues to map correctly to each operand. void swapSuccessors(); - /// Retrieve the raw weight values of a conditional branch. - /// Returns true on success with profile weights filled in. - /// Returns false if no metadata or invalid metadata was found. - bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal); - // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::Br); diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 678817b..e814f4f 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -41,6 +41,7 @@ #include namespace llvm { + class BranchProbability; class CallInst; class CCState; class CCValAssign; @@ -261,6 +262,10 @@ public: return PredictableSelectIsExpensive; } + /// If a branch or a select condition is skewed in one direction by more than + /// this factor, it is very likely to be predicted correctly. + virtual BranchProbability getPredictableBranchThreshold() const; + /// isLoadBitCastBeneficial() - Return true if the following transform /// is beneficial. /// fold (conv (load x)) -> (load (conv*)x) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 659b0c5..3adad3b 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -4538,11 +4539,25 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { /// Returns true if a SelectInst should be turned into an explicit branch. static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, + const TargetLowering *TLI, SelectInst *SI) { + // If even a predictable select is cheap, then a branch can't be cheaper. + if (!TLI->isPredictableSelectExpensive()) + return false; + // FIXME: This should use the same heuristics as IfConversion to determine - // whether a select is better represented as a branch. This requires that - // branch probability metadata is preserved for the select, which is not the - // case currently. + // whether a select is better represented as a branch. + + // If metadata tells us that the select condition is obviously predictable, + // then we want to replace the select with a branch. + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t Max = std::max(TrueWeight, FalseWeight); + uint64_t Sum = TrueWeight + FalseWeight; + auto Probability = BranchProbability::getBranchProbability(Max, Sum); + if (Probability > TLI->getPredictableBranchThreshold()) + return true; + } CmpInst *Cmp = dyn_cast(SI->getCondition()); @@ -4580,14 +4595,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { else SelectKind = TargetLowering::ScalarValSelect; - // Do we have efficient codegen support for this kind of 'selects' ? - if (TLI->isSelectSupported(SelectKind)) { - // We have efficient codegen support for the select instruction. - // Check if it is profitable to keep this 'select'. - if (!TLI->isPredictableSelectExpensive() || - !isFormingBranchFromSelectProfitable(TTI, SI)) - return false; - } + if (TLI->isSelectSupported(SelectKind) && + !isFormingBranchFromSelectProfitable(TTI, TLI, SI)) + return false; ModifiedDT = true; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index aa4e035..0355b8d 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -43,6 +44,17 @@ static cl::opt JumpIsExpensiveOverride( cl::desc("Do not create extra branches to split comparison logic."), cl::Hidden); +// Although this default value is arbitrary, it is not random. It is assumed +// that a condition that evaluates the same way by a higher percentage than this +// is best represented as control flow. Therefore, the default value N should be +// set such that the win from N% correct executions is greater than the loss +// from (100 - N)% mispredicted executions for the majority of intended targets. +static cl::opt MinPercentageForPredictableBranch( + "min-predictable-branch", cl::init(99), + cl::desc("Minimum percentage (0-100) that a condition must be either true " + "or false to assume that the condition is predictable"), + cl::Hidden); + /// InitLibcallNames - Set default libcall names. /// static void InitLibcallNames(const char **Names, const Triple &TT) { @@ -1625,6 +1637,9 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast); } +BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const { + return BranchProbability(MinPercentageForPredictableBranch, 100); +} //===----------------------------------------------------------------------===// // TargetTransformInfo Helpers diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index d66ec8f..5cd52f7 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1120,28 +1120,6 @@ void BranchInst::swapSuccessors() { MDNode::get(ProfileData->getContext(), Ops)); } -bool BranchInst::extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) { - assert(isConditional() && - "Looking for probabilities on unconditional branch?"); - auto *ProfileData = getMetadata(LLVMContext::MD_prof); - if (!ProfileData || ProfileData->getNumOperands() != 3) - return false; - - auto *ProfDataName = dyn_cast(ProfileData->getOperand(0)); - if (!ProfDataName || !ProfDataName->getString().equals("branch_weights")) - return false; - - auto *CITrue = mdconst::dyn_extract(ProfileData->getOperand(1)); - auto *CIFalse = mdconst::dyn_extract(ProfileData->getOperand(2)); - if (!CITrue || !CIFalse) - return false; - - TrueVal = CITrue->getValue().getZExtValue(); - FalseVal = CIFalse->getValue().getZExtValue(); - - return true; -} - BasicBlock *BranchInst::getSuccessorV(unsigned idx) const { return getSuccessor(idx); } diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index 8a04e63..9435cd2 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -1251,6 +1251,30 @@ void Instruction::getAllMetadataOtherThanDebugLocImpl( Info.getAll(Result); } +bool Instruction::extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) { + assert((getOpcode() == Instruction::Br || + getOpcode() == Instruction::Select) && + "Looking for branch weights on something besides branch or select"); + + auto *ProfileData = getMetadata(LLVMContext::MD_prof); + if (!ProfileData || ProfileData->getNumOperands() != 3) + return false; + + auto *ProfDataName = dyn_cast(ProfileData->getOperand(0)); + if (!ProfDataName || !ProfDataName->getString().equals("branch_weights")) + return false; + + auto *CITrue = mdconst::dyn_extract(ProfileData->getOperand(1)); + auto *CIFalse = mdconst::dyn_extract(ProfileData->getOperand(2)); + if (!CITrue || !CIFalse) + return false; + + TrueVal = CITrue->getValue().getZExtValue(); + FalseVal = CIFalse->getValue().getZExtValue(); + + return true; +} + void Instruction::clearMetadataHashEntries() { assert(hasMetadataHashEntry() && "Caller should check"); getContext().pImpl->InstructionMetadata.erase(this); diff --git a/llvm/test/CodeGen/X86/cmov-into-branch.ll b/llvm/test/CodeGen/X86/cmov-into-branch.ll index 99aa4d5..35c1d34 100644 --- a/llvm/test/CodeGen/X86/cmov-into-branch.ll +++ b/llvm/test/CodeGen/X86/cmov-into-branch.ll @@ -79,13 +79,15 @@ define i32 @weighted_select1(i32 %a, i32 %b) { ret i32 %sel } -; TODO: If a select is obviously predictable, turn it into a branch. +; If a select is obviously predictable, turn it into a branch. define i32 @weighted_select2(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select2: ; CHECK: # BB#0: ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: cmovnel %edi, %esi -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: jne [[LABEL_BB5:.*]] +; CHECK: movl %esi, %edi +; CHECK-NEXT: [[LABEL_BB5]] +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq ; %cmp = icmp ne i32 %a, 0 @@ -93,6 +95,27 @@ define i32 @weighted_select2(i32 %a, i32 %b) { ret i32 %sel } +; Note the reversed profile weights: it doesn't matter if it's +; obviously true or obviously false. +; Either one should become a branch rather than conditional move. +; TODO: But likely true vs. likely false should affect basic block placement? +define i32 @weighted_select3(i32 %a, i32 %b) { +; CHECK-LABEL: weighted_select3: +; CHECK: # BB#0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne [[LABEL_BB6:.*]] +; CHECK: movl %esi, %edi +; CHECK-NEXT: [[LABEL_BB6]] +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq +; + %cmp = icmp ne i32 %a, 0 + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !2 + ret i32 %sel +} + + !0 = !{!"branch_weights", i32 1, i32 99} !1 = !{!"branch_weights", i32 1, i32 100} +!2 = !{!"branch_weights", i32 100, i32 1} -- 2.7.4