OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(false),
cl::desc("Enable converting phi types in CodeGenPrepare"));
+static cl::opt<unsigned>
+ HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
+ cl::desc("Least BB number of huge function."));
+
namespace {
enum ExtType {
// information of a promoted instruction invalid.
};
+enum ModifyDT {
+ NotModifyDT, // Not Modify any DT.
+ ModifyBBDT, // Modify the Basic Block Dominator Tree.
+ ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
+ // This usually means we move/delete/insert instruction
+ // in a Basic Block. So we should re-iterate instructions
+ // in such Basic Block.
+};
+
using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
std::unique_ptr<DominatorTree> DT;
public:
+ /// If encounter huge function, we need to limit the build time.
+ bool IsHugeFunc = false;
+
+ /// FreshBBs is like worklist, it collected the updated BBs which need
+ /// to be optimized again.
+ /// Note: Consider building time in this pass, when a BB updated, we need
+ /// to insert such BB into FreshBBs for huge function.
+ SmallSet<BasicBlock *, 32> FreshBBs;
+
static char ID; // Pass identification, replacement for typeid
CodeGenPrepare() : FunctionPass(ID) {
bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
bool isPreheader);
bool makeBitReverse(Instruction &I);
- bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
- bool optimizeInst(Instruction *I, bool &ModifiedDT);
+ bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
+ bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
unsigned AddrSpace);
bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
bool optimizeInlineAsmInst(CallInst *CS);
- bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
+ bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *Load);
bool optimizeSwitchPhiConstants(SwitchInst *SI);
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
- bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
+ bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
bool fixupDbgValue(Instruction *I);
bool placeDbgValues(Function &F);
bool placePseudoProbes(Function &F);
Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
bool HasPromoted, TypePromotionTransaction &TPT,
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
- bool splitBranchCondition(Function &F, bool &ModifiedDT);
+ bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
bool simplifyOffsetableRelocate(GCStatepointInst &I);
bool tryToSinkFreeOperands(Instruction *I);
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
CmpInst *Cmp, Intrinsic::ID IID);
- bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
- bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
- bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
+ bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
+ bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
+ bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
void verifyBFIUpdates(Function &F);
};
// Clear per function information.
InsertedInsts.clear();
PromotedInsts.clear();
+ FreshBBs.clear();
TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
SubtargetInfo = TM->getSubtargetImpl(F);
// unconditional branch.
EverMadeChange |= eliminateMostlyEmptyBlocks(F);
- bool ModifiedDT = false;
+ ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
if (!DisableBranchOpts)
EverMadeChange |= splitBranchCondition(F, ModifiedDT);
EverMadeChange |=
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
+ // If we are optimzing huge function, we need to consider the build time.
+ // Because the basic algorithm's complex is near O(N!).
+ IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
+
bool MadeChange = true;
+ bool FuncIterated = false;
while (MadeChange) {
MadeChange = false;
DT.reset();
+
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
- bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration);
+ if (FuncIterated && !FreshBBs.contains(&BB))
+ continue;
- // Restart BB iteration if the dominator tree of the Function was changed
- if (ModifiedDTOnIteration)
- break;
+ ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
+ bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
+
+ MadeChange |= Changed;
+ if (IsHugeFunc) {
+ // If the BB is updated, it may still has chance to be optimized.
+ // This usually happen at sink optimization.
+ // For example:
+ //
+ // bb0:
+ // %and = and i32 %a, 4
+ // %cmp = icmp eq i32 %and, 0
+ //
+ // If the %cmp sink to other BB, the %and will has chance to sink.
+ if (Changed)
+ FreshBBs.insert(&BB);
+ else if (FuncIterated)
+ FreshBBs.erase(&BB);
+
+ if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
+ DT.reset();
+ } else {
+ // For small/normal functions, we restart BB iteration if the dominator
+ // tree of the Function was changed.
+ if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
+ break;
+ }
}
+ // We have iterated all the BB in the (only work for huge) function.
+ FuncIterated = IsHugeFunc;
+
if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
MadeChange |= mergeSExts(F);
if (!LargeOffsetGEPMap.empty())
// Merge BB into SinglePred and delete it.
MergeBlockIntoPredecessor(BB);
Preds.insert(SinglePred);
+
+ if (IsHugeFunc) {
+ // Update FreshBBs to optimize the merged BB.
+ FreshBBs.insert(SinglePred);
+ FreshBBs.erase(BB);
+ }
}
}
return true;
}
+/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
+static void replaceAllUsesWith(Value *Old, Value *New,
+ SmallSet<BasicBlock *, 32> &FreshBBs,
+ bool IsHuge) {
+ auto *OldI = dyn_cast<Instruction>(Old);
+ if (OldI) {
+ for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (IsHuge)
+ FreshBBs.insert(User->getParent());
+ }
+ }
+ Old->replaceAllUsesWith(New);
+}
+
/// Eliminate a basic block that has only phi's and an unconditional branch in
/// it.
void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
// Note: BB(=SinglePred) will not be deleted on this path.
// DestBB(=its single successor) is the one that was deleted.
LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
+
+ if (IsHugeFunc) {
+ // Update FreshBBs to optimize the merged BB.
+ FreshBBs.insert(SinglePred);
+ FreshBBs.erase(DestBB);
+ }
return;
}
}
Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
if (BO->getOpcode() != Instruction::Xor) {
Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
- BO->replaceAllUsesWith(Math);
+ replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
} else
assert(BO->hasOneUse() &&
"Patterns with XOr should use the BO only in the compare");
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
- Cmp->replaceAllUsesWith(OV);
+ replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
Cmp->eraseFromParent();
BO->eraseFromParent();
return true;
/// Try to combine the compare into a call to the llvm.uadd.with.overflow
/// intrinsic. Return true if any changes were made.
-bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT) {
+bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
+ ModifyDT &ModifiedDT) {
Value *A, *B;
BinaryOperator *Add;
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
return false;
// Reset callers - do not crash by iterating over a dead instruction.
- ModifiedDT = true;
+ ModifiedDT = ModifyDT::ModifyInstDT;
return true;
}
-bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT) {
+bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
+ ModifyDT &ModifiedDT) {
// We are not expecting non-canonical/degenerate code. Just bail out.
Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
if (isa<Constant>(A) && isa<Constant>(B))
return false;
// Reset callers - do not crash by iterating over a dead instruction.
- ModifiedDT = true;
+ ModifiedDT = ModifyDT::ModifyInstDT;
return true;
}
return true;
}
-bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
+bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
/// If the transform is performed, return true and set ModifiedDT to true.
static bool despeculateCountZeros(IntrinsicInst *CountZeros,
const TargetLowering *TLI,
- const DataLayout *DL, bool &ModifiedDT) {
+ const DataLayout *DL, ModifyDT &ModifiedDT,
+ SmallSet<BasicBlock *, 32> &FreshBBs,
+ bool IsHugeFunc) {
// If a zero input is undefined, it doesn't make sense to despeculate that.
if (match(CountZeros->getOperand(1), m_One()))
return false;
// The intrinsic will be sunk behind a compare against zero and branch.
BasicBlock *StartBlock = CountZeros->getParent();
BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
+ if (IsHugeFunc)
+ FreshBBs.insert(CallBlock);
// Create another block after the count zero intrinsic. A PHI will be added
// in this block to select the result of the intrinsic or the bit-width
// constant if the input to the intrinsic is zero.
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
+ if (IsHugeFunc)
+ FreshBBs.insert(EndBlock);
// Set up a builder to create a compare, conditional branch, and PHI.
IRBuilder<> Builder(CountZeros->getContext());
// or the bit width of the operand.
Builder.SetInsertPoint(&EndBlock->front());
PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
- CountZeros->replaceAllUsesWith(PN);
+ replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
PN->addIncoming(BitWidth, StartBlock);
PN->addIncoming(CountZeros, CallBlock);
// undefined zero argument to 'true'. This will also prevent reprocessing the
// intrinsic; we only despeculate when a zero input is defined.
CountZeros->setArgOperand(1, Builder.getTrue());
- ModifiedDT = true;
+ ModifiedDT = ModifyDT::ModifyBBDT;
return true;
}
-bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
+bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
BasicBlock *BB = CI->getParent();
// Lower inline assembly if we can.
LargeOffsetGEPMap.erase(II);
}
- II->replaceAllUsesWith(ArgVal);
+ replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
II->eraseFromParent();
return true;
}
case Intrinsic::cttz:
case Intrinsic::ctlz:
// If counting zeros is expensive, try to avoid it.
- return despeculateCountZeros(II, TLI, DL, ModifiedDT);
+ return despeculateCountZeros(II, TLI, DL, ModifiedDT, FreshBBs,
+ IsHugeFunc);
case Intrinsic::fshl:
case Intrinsic::fshr:
return optimizeFunnelShift(II);
auto *One = ConstantInt::getSigned(II->getType(), 1);
auto *CGep =
ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One);
- II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType()));
+ replaceAllUsesWith(II, ConstantExpr::getPtrToInt(CGep, II->getType()),
+ FreshBBs, IsHugeFunc);
II->eraseFromParent();
return true;
}
FortifiedLibCallSimplifier Simplifier(TLInfo, true);
IRBuilder<> Builder(CI);
if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
- CI->replaceAllUsesWith(V);
+ replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
CI->eraseFromParent();
return true;
}
/// ret i32 %tmp2
/// @endcode
bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
- bool &ModifiedDT) {
+ ModifyDT &ModifiedDT) {
+ if (!BB->getTerminator())
+ return false;
+
ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
if (!RetI)
return false;
BFI->setBlockFreq(
BB,
(BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency());
- ModifiedDT = Changed = true;
+ ModifiedDT = ModifyDT::ModifyBBDT;
+ Changed = true;
++NumRetsDup;
}
bool inserted = false;
for (auto &Pt : CurPts) {
if (getDT(F).dominates(Inst, Pt)) {
- Pt->replaceAllUsesWith(Inst);
+ replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
RemovedInsts.insert(Pt);
Pt->removeFromParent();
Pt = Inst;
// Give up if we need to merge in a common dominator as the
// experiments show it is not profitable.
continue;
- Inst->replaceAllUsesWith(Pt);
+ replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
RemovedInsts.insert(Inst);
Inst->removeFromParent();
inserted = true;
if (GEP->getType() != I8PtrTy)
NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
}
- GEP->replaceAllUsesWith(NewGEP);
+ replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
LargeOffsetGEPID.erase(GEP);
LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
GEP->eraseFromParent();
for (Instruction *U : Uses) {
if (isa<BitCastInst>(U)) {
DeletedInstrs.insert(U);
- U->replaceAllUsesWith(ValMap[U->getOperand(0)]);
+ replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
} else {
U->setOperand(0,
new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
// Remove any old phi's that have been converted.
for (auto *I : DeletedInstrs) {
- I->replaceAllUsesWith(PoisonValue::get(I->getType()));
+ replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
I->eraseFromParent();
}
// Replace all uses of load with new and (except for the use of load in the
// new and itself).
- Load->replaceAllUsesWith(NewAnd);
+ replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
NewAnd->setOperand(0, Load);
// Remove any and instructions that are now redundant.
// Check that the and mask is the same as the one we decided to put on the
// new and.
if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
- And->replaceAllUsesWith(NewAnd);
+ replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
if (&*CurInstIterator == And)
CurInstIterator = std::next(And->getIterator());
And->eraseFromParent();
Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
- Shift->replaceAllUsesWith(NewSel);
+ replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
Shift->eraseFromParent();
return true;
}
Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
- Fsh->replaceAllUsesWith(NewSel);
+ replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
Fsh->eraseFromParent();
return true;
}
BasicBlock *StartBlock = SI->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+ if (IsHugeFunc)
+ FreshBBs.insert(EndBlock);
BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
// Delete the unconditional branch that was just created by the split.
TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
EndBlock->getParent(), EndBlock);
TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ if (IsHugeFunc)
+ FreshBBs.insert(TrueBlock);
TrueBranch->setDebugLoc(SI->getDebugLoc());
}
auto *TrueInst = cast<Instruction>(SI->getTrueValue());
if (FalseBlock == nullptr) {
FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
EndBlock->getParent(), EndBlock);
+ if (IsHugeFunc)
+ FreshBBs.insert(FalseBlock);
FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
FalseBranch->setDebugLoc(SI->getDebugLoc());
}
FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
EndBlock->getParent(), EndBlock);
+ if (IsHugeFunc)
+ FreshBBs.insert(FalseBlock);
auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
FalseBranch->setDebugLoc(SI->getDebugLoc());
}
PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
PN->setDebugLoc(SI->getDebugLoc());
- SI->replaceAllUsesWith(PN);
+ replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
SI->eraseFromParent();
INS.erase(SI);
++NumSelectsExpanded;
Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
- SVI->replaceAllUsesWith(BC2);
+ replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
RecursivelyDeleteTriviallyDeadInstructions(
SVI, TLInfo, nullptr,
[&](Value *V) { removeAllAssertingVHReferences(V); });
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
Instruction *NI = UI->clone();
+
+ if (IsHugeFunc) {
+ // Now we clone an instruction, its operands' defs may sink to this BB
+ // now. So we put the operands defs' BBs into FreshBBs to do optmization.
+ for (unsigned I = 0; I < NI->getNumOperands(); ++I) {
+ auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I));
+ if (!OpDef)
+ continue;
+ FreshBBs.insert(OpDef->getParent());
+ }
+ }
+
NewInstructions[UI] = NI;
MaybeDead.insert(UI);
LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
return true;
}
-static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) {
+static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
+ SmallSet<BasicBlock *, 32> &FreshBBs,
+ bool IsHugeFunc) {
// Try and convert
// %c = icmp ult %x, 8
// br %c, bla, blb
ConstantInt::get(UI->getType(), 0));
LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
- Cmp->replaceAllUsesWith(NewCmp);
+ replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
return true;
}
if (Cmp->isEquality() &&
ConstantInt::get(UI->getType(), 0));
LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
- Cmp->replaceAllUsesWith(NewCmp);
+ replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
return true;
}
}
return false;
}
-bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
+bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
if (InsertedInsts.count(I))
// trivial PHI, go ahead and zap it here.
if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
LargeOffsetGEPMap.erase(P);
- P->replaceAllUsesWith(V);
+ replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
P->eraseFromParent();
++NumPHIsElim;
return true;
Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
GEPI->getName(), GEPI);
NC->setDebugLoc(GEPI->getDebugLoc());
- GEPI->replaceAllUsesWith(NC);
+ replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
GEPI->eraseFromParent();
++NumGEPsElim;
optimizeInst(NC, ModifiedDT);
F->takeName(FI);
CmpI->setOperand(Const0 ? 1 : 0, F);
}
- FI->replaceAllUsesWith(CmpI);
+ replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
FI->eraseFromParent();
return true;
}
case Instruction::ExtractElement:
return optimizeExtractElementInst(cast<ExtractElementInst>(I));
case Instruction::Br:
- return optimizeBranch(cast<BranchInst>(I), *TLI);
+ return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
}
return false;
if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
return false;
Instruction *LastInst = Insts.back();
- I.replaceAllUsesWith(LastInst);
+ replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
RecursivelyDeleteTriviallyDeadInstructions(
&I, TLInfo, nullptr,
[&](Value *V) { removeAllAssertingVHReferences(V); });
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
-bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
SunkAddrs.clear();
bool MadeChange = false;
- CurInstIterator = BB.begin();
- while (CurInstIterator != BB.end()) {
- MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
- if (ModifiedDT)
- return true;
- }
+ do {
+ CurInstIterator = BB.begin();
+ ModifiedDT = ModifyDT::NotModifyDT;
+ while (CurInstIterator != BB.end()) {
+ MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
+ if (ModifiedDT != ModifyDT::NotModifyDT) {
+ // For huge function we tend to quickly go though the inner optmization
+ // opportunities in the BB. So we go back to the BB head to re-optimize
+ // each instruction instead of go back to the function head.
+ if (IsHugeFunc) {
+ DT.reset();
+ getDT(*BB.getParent());
+ break;
+ } else {
+ return true;
+ }
+ }
+ }
+ } while (ModifiedDT == ModifyDT::ModifyInstDT);
bool MadeBitReverse = true;
while (MadeBitReverse) {
///
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
///
-bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
+bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
return false;
auto *TmpBB =
BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
BB.getParent(), BB.getNextNode());
+ if (IsHugeFunc)
+ FreshBBs.insert(TmpBB);
// Update original basic block by using the first condition directly by the
// branch instruction and removing the no longer needed and/or instruction.
}
}
- ModifiedDT = true;
+ ModifiedDT = ModifyDT::ModifyBBDT;
MadeChange = true;
LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();