From 3e5f67f356a1b9dc31df72ac846e688143ce8a48 Mon Sep 17 00:00:00 2001 From: Rafael Auler Date: Wed, 9 Mar 2022 10:46:15 -0800 Subject: [PATCH] [BOLT] Increase coverage of shrink wrapping [4/5] Change shrink-wrapping to try a priority list of save positions, instead of trying the best one and giving up if it doesn't work. This also increases coverage. Reviewed By: Amir Differential Revision: https://reviews.llvm.org/D126114 --- bolt/include/bolt/Passes/ShrinkWrapping.h | 5 +- bolt/lib/Passes/ShrinkWrapping.cpp | 125 +++++++++++++++--------------- 2 files changed, 65 insertions(+), 65 deletions(-) diff --git a/bolt/include/bolt/Passes/ShrinkWrapping.h b/bolt/include/bolt/Passes/ShrinkWrapping.h index 457c9e5..b92e24f 100644 --- a/bolt/include/bolt/Passes/ShrinkWrapping.h +++ b/bolt/include/bolt/Passes/ShrinkWrapping.h @@ -303,9 +303,8 @@ class ShrinkWrapping { std::vector PopOffsetByReg; std::vector DomOrder; CalleeSavedAnalysis CSA; - std::vector> SavePos; - std::vector BestSaveCount; - std::vector BestSavePos; + std::vector> BestSaveCount; + std::vector> BestSavePos; /// Pass stats static std::atomic_uint64_t SpillsMovedRegularMode; diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp b/bolt/lib/Passes/ShrinkWrapping.cpp index 94771b3..980f491 100644 --- a/bolt/lib/Passes/ShrinkWrapping.cpp +++ b/bolt/lib/Passes/ShrinkWrapping.cpp @@ -779,7 +779,7 @@ void ShrinkWrapping::pruneUnwantedCSRs() { } void ShrinkWrapping::computeSaveLocations() { - SavePos = std::vector>(BC.MRI->getNumRegs()); + BestSavePos = std::vector>(BC.MRI->getNumRegs()); ReachingInsns &RI = Info.getReachingInsnsBackwards(); DominatorAnalysis &DA = Info.getDominatorAnalysis(); StackPointerTracking &SPT = Info.getStackPointerTracking(); @@ -819,11 +819,12 @@ void ShrinkWrapping::computeSaveLocations() { if (BBDominatedUses == UsesByReg[I]) { LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName() << " as a save pos for " << I << "\n"); - SavePos[I].insert(First); + BestSavePos[I].push_back(First); LLVM_DEBUG({ dbgs() << "Dominated uses are:\n"; for (int J : UsesByReg[I].set_bits()) { dbgs() << "Idx " << J << ": "; + BC.printInstruction(dbgs(), *DA.Expressions[J]); DA.Expressions[J]->dump(); } }); @@ -831,27 +832,32 @@ void ShrinkWrapping::computeSaveLocations() { } } - BestSaveCount = std::vector(BC.MRI->getNumRegs(), - std::numeric_limits::max()); - BestSavePos = std::vector(BC.MRI->getNumRegs(), nullptr); + BestSaveCount = std::vector>(BC.MRI->getNumRegs()); + auto &InsnToBB = Info.getInsnToBBMap(); for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { if (!CSA.CalleeSaved[I]) continue; - for (MCInst *Pos : SavePos[I]) { - BinaryBasicBlock *BB = InsnToBB[Pos]; - uint64_t Count = BB->getExecutionCount(); - if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && - Count < BestSaveCount[I]) { - BestSavePos[I] = Pos; - BestSaveCount[I] = Count; - } + std::stable_sort(BestSavePos[I].begin(), BestSavePos[I].end(), + [&](const MCInst *A, const MCInst *B) { + const BinaryBasicBlock *BBA = InsnToBB[A]; + const BinaryBasicBlock *BBB = InsnToBB[B]; + const uint64_t CountA = BBA->getKnownExecutionCount(); + const uint64_t CountB = BBB->getKnownExecutionCount(); + return CountB < CountA; + }); + + for (MCInst *Pos : BestSavePos[I]) { + const BinaryBasicBlock *BB = InsnToBB[Pos]; + const uint64_t Count = BB->getKnownExecutionCount(); + BestSaveCount[I].push_back(Count); } } } void ShrinkWrapping::computeDomOrder() { + DomOrder = std::vector(BC.MRI->getNumRegs(), 0); std::vector Order; for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { Order.push_back(I); @@ -860,17 +866,19 @@ void ShrinkWrapping::computeDomOrder() { DominatorAnalysis &DA = Info.getDominatorAnalysis(); auto &InsnToBB = Info.getInsnToBBMap(); llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) { - BinaryBasicBlock *BBA = BestSavePos[A] ? InsnToBB[BestSavePos[A]] : nullptr; - BinaryBasicBlock *BBB = BestSavePos[B] ? InsnToBB[BestSavePos[B]] : nullptr; + BinaryBasicBlock *BBA = + BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr; + BinaryBasicBlock *BBB = + BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr; if (BBA == BBB) return A < B; if (!BBA && BBB) return false; if (BBA && !BBB) return true; - if (DA.doesADominateB(*BestSavePos[A], *BestSavePos[B])) + if (DA.doesADominateB(*BestSavePos[A].back(), *BestSavePos[B].back())) return true; - if (DA.doesADominateB(*BestSavePos[B], *BestSavePos[A])) + if (DA.doesADominateB(*BestSavePos[B].back(), *BestSavePos[A].back())) return false; return A < B; }); @@ -885,33 +893,25 @@ bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave, if (!CSA.CalleeSaved[CSR]) return false; - uint64_t BestCount = BestSaveCount[CSR]; - BestPosSave = BestSavePos[CSR]; - bool ShouldMove = false; - if (BestCount != std::numeric_limits::max() && - BestCount < (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) { - LLVM_DEBUG({ - auto &InsnToBB = Info.getInsnToBBMap(); - dbgs() << "Better position for saves found in func " << BF.getPrintName() - << " count << " << BF.getKnownExecutionCount() << "\n"; - dbgs() << "Reg: " << CSR - << "; New BB: " << InsnToBB[BestPosSave]->getName() - << " Freq reduction: " << (CurSavingCost - BestCount) << "\n"; - }); - TotalEstimatedWin += CurSavingCost - BestCount; - ShouldMove = true; - } - - if (!ShouldMove) + assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() && + "save position vectors out of sync"); + if (BestSaveCount[CSR].empty()) return false; - if (!BestPosSave) { - LLVM_DEBUG({ - dbgs() << "Dropping opportunity because we don't know where to put " - "stores -- total est. freq reduc: " - << TotalEstimatedWin << "\n"; - }); + + const uint64_t BestCount = BestSaveCount[CSR].back(); + BestPosSave = BestSavePos[CSR].back(); + if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) return false; - } + + LLVM_DEBUG({ + auto &InsnToBB = Info.getInsnToBBMap(); + dbgs() << "Better position for saves found in func " << BF.getPrintName() + << " count << " << BF.getKnownExecutionCount() << "\n"; + dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName() + << " Freq reduction: " << (CurSavingCost - BestCount) << "\n"; + }); + + TotalEstimatedWin = CurSavingCost - BestCount; return true; } @@ -973,7 +973,6 @@ ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, uint64_t TotalEstimatedWin) { SmallVector Frontier; SmallVector IsCritEdge; - bool CannotPlace = false; DominatorAnalysis &DA = Info.getDominatorAnalysis(); SmallVector CritEdgesFrom; @@ -994,8 +993,10 @@ ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, for (ProgramPoint &PP : Frontier) { bool HasCritEdges = false; if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) && - doesInstUsesCSR(*PP.getInst(), CSR)) - CannotPlace = true; + doesInstUsesCSR(*PP.getInst(), CSR)) { + Frontier.clear(); + return Frontier; + } BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); CritEdgesFrom.emplace_back(FrontierBB); CritEdgesTo.emplace_back(0); @@ -1003,8 +1004,6 @@ ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, // Check for invoke instructions at the dominance frontier, which indicates // the landing pad is not dominated. if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) { - LLVM_DEBUG( - dbgs() << "Bailing on restore placement to avoid LP splitting\n"); Frontier.clear(); return Frontier; } @@ -1054,15 +1053,6 @@ ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, Info.invalidateAll(); classifyCSRUses(); } - if (CannotPlace) { - LLVM_DEBUG({ - dbgs() << "Dropping opportunity because restore placement failed" - " -- total est. freq reduc: " - << TotalEstimatedWin << "\n"; - }); - Frontier.clear(); - return Frontier; - } return Frontier; } @@ -1281,13 +1271,26 @@ void ShrinkWrapping::moveSaveRestores() { std::vector> MovedRegs; uint64_t TotalEstimatedWin = 0; + computeDomOrder(); for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { MCInst *BestPosSave = nullptr; uint64_t EstimatedWin = 0; - if (!isBestSavePosCold(I, BestPosSave, EstimatedWin)) - continue; - SmallVector RestorePoints = - doRestorePlacement(BestPosSave, I, EstimatedWin); + SmallVector RestorePoints; + while (RestorePoints.empty() && + isBestSavePosCold(I, BestPosSave, EstimatedWin)) { + RestorePoints = doRestorePlacement(BestPosSave, I, EstimatedWin); + if (RestorePoints.empty()) { + LLVM_DEBUG({ + dbgs() << "Dropping opportunity because restore placement failed" + " -- total est. freq reduc: " + << EstimatedWin << ". Will try " + << (BestSaveCount[I].size() - 1) << " more times.\n"; + }); + BestSaveCount[I].pop_back(); + BestSavePos[I].pop_back(); + computeDomOrder(); + } + } if (RestorePoints.empty()) { SpillsFailedDynamicCount += EstimatedWin; continue; @@ -1958,7 +1961,6 @@ bool ShrinkWrapping::perform(bool HotOnly) { HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false); PushOffsetByReg = std::vector(BC.MRI->getNumRegs(), 0LL); PopOffsetByReg = std::vector(BC.MRI->getNumRegs(), 0LL); - DomOrder = std::vector(BC.MRI->getNumRegs(), 0); // Update pass statistics uint64_t TotalInstrs = 0ULL; @@ -2001,7 +2003,6 @@ bool ShrinkWrapping::perform(bool HotOnly) { classifyCSRUses(); pruneUnwantedCSRs(); computeSaveLocations(); - computeDomOrder(); moveSaveRestores(); LLVM_DEBUG({ dbgs() << "Func before shrink-wrapping: \n"; -- 2.7.4