/// ScannedRemattable - true when remattable values have been identified.
bool ScannedRemattable;
+ /// DeadRemats - The saved instructions which have already been dead after
+ /// rematerialization but not deleted yet -- to be done in postOptimization.
+ SmallPtrSet<MachineInstr *, 32> *DeadRemats;
+
/// Remattable - Values defined by remattable instructions as identified by
/// tii.isTriviallyReMaterializable().
SmallPtrSet<const VNInfo*,4> Remattable;
/// @param vrm Map of virtual registers to physical registers for this
/// function. If NULL, no virtual register map updates will
/// be done. This could be the case if called before Regalloc.
+ /// @param deadRemats The collection of all the instructions defining an
+ /// original reg and are dead after remat.
LiveRangeEdit(LiveInterval *parent, SmallVectorImpl<unsigned> &newRegs,
MachineFunction &MF, LiveIntervals &lis, VirtRegMap *vrm,
- Delegate *delegate = nullptr)
+ Delegate *delegate = nullptr,
+ SmallPtrSet<MachineInstr *, 32> *deadRemats = nullptr)
: Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis),
- VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()),
- TheDelegate(delegate), FirstNew(newRegs.size()),
- ScannedRemattable(false) {
+ VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), TheDelegate(delegate),
+ FirstNew(newRegs.size()), ScannedRemattable(false),
+ DeadRemats(deadRemats) {
MRI.setDelegate(this);
}
bool empty() const { return size() == 0; }
unsigned get(unsigned idx) const { return NewRegs[idx+FirstNew]; }
+ /// pop_back - It allows LiveRangeEdit users to drop new registers.
+ /// The context is when an original def instruction of a register is
+ /// dead after rematerialization, we still want to keep it for following
+ /// rematerializations. We save the def instruction in DeadRemats,
+ /// and replace the original dst register with a new dummy register so
+ /// the live range of original dst register can be shrinked normally.
+ /// We don't want to allocate phys register for the dummy register, so
+ /// we want to drop it from the NewRegs set.
+ void pop_back() { NewRegs.pop_back(); }
+
ArrayRef<unsigned> regs() const {
return makeArrayRef(NewRegs).slice(FirstNew);
}
/// Remat - Information needed to rematerialize at a specific location.
struct Remat {
VNInfo *ParentVNI; // parent_'s value at the remat location.
- MachineInstr *OrigMI; // Instruction defining ParentVNI.
+ MachineInstr *OrigMI; // Instruction defining OrigVNI. It contains the
+ // real expr for remat.
explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(nullptr) {}
};
/// canRematerializeAt - Determine if ParentVNI can be rematerialized at
/// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
/// When cheapAsAMove is set, only cheap remats are allowed.
- bool canRematerializeAt(Remat &RM,
- SlotIndex UseIdx,
+ bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx,
bool cheapAsAMove);
/// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
return Rematted.count(ParentVNI);
}
+ void markDeadRemat(MachineInstr *inst) {
+ // DeadRemats is an optional field.
+ if (DeadRemats)
+ DeadRemats->insert(inst);
+ }
+
/// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
/// to erase it from LIS.
void eraseVirtReg(unsigned Reg);
/// RegsBeingSpilled lists registers currently being spilled by the register
/// allocator. These registers should not be split into new intervals
/// as currently those new intervals are not guaranteed to spill.
- void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
- ArrayRef<unsigned> RegsBeingSpilled = None);
+ /// NoSplit indicates this func is used after the iterations of selectOrSplit
+ /// where registers should not be split into new intervals.
+ void eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
+ ArrayRef<unsigned> RegsBeingSpilled = None,
+ bool NoSplit = false);
/// calculateRegClassAndHint - Recompute register class and hint for each new
/// register.
//===----------------------------------------------------------------------===//
#include "Spiller.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
STATISTIC(NumFolded, "Number of folded stack accesses");
STATISTIC(NumFoldedLoads, "Number of folded loads");
STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
-STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads");
-STATISTIC(NumHoists, "Number of hoisted spills");
static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
cl::desc("Disable inline spill hoisting"));
namespace {
+class HoistSpillHelper {
+ LiveIntervals &LIS;
+ LiveStacks &LSS;
+ AliasAnalysis *AA;
+ MachineDominatorTree &MDT;
+ MachineLoopInfo &Loops;
+ VirtRegMap &VRM;
+ MachineFrameInfo &MFI;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const MachineBlockFrequencyInfo &MBFI;
+
+ // Map from StackSlot to its original register.
+ DenseMap<int, unsigned> StackSlotToReg;
+ // Map from pair of (StackSlot and Original VNI) to a set of spills which
+ // have the same stackslot and have equal values defined by Original VNI.
+ // These spills are mergeable and are hoist candiates.
+ typedef MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>
+ MergeableSpillsMap;
+ MergeableSpillsMap MergeableSpills;
+
+ /// This is the map from original register to a set containing all its
+ /// siblings. To hoist a spill to another BB, we need to find out a live
+ /// sibling there and use it as the source of the new spill.
+ DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap;
+
+ bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock &BB,
+ unsigned &LiveReg);
+
+ void rmRedundantSpills(
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
+
+ void getVisitOrders(
+ MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineDomTreeNode *> &Orders,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
+
+ void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI,
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns);
+
+public:
+ HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+ MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+ TII(*mf.getSubtarget().getInstrInfo()),
+ TRI(*mf.getSubtarget().getRegisterInfo()),
+ MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
+
+ void addToMergeableSpills(MachineInstr *Spill, int StackSlot,
+ unsigned Original);
+ bool rmFromMergeableSpills(MachineInstr *Spill, int StackSlot);
+ void hoistAllSpills(LiveRangeEdit &Edit);
+};
+
class InlineSpiller : public Spiller {
MachineFunction &MF;
LiveIntervals &LIS;
// Values that failed to remat at some point.
SmallPtrSet<VNInfo*, 8> UsedValues;
-public:
- // Information about a value that was defined by a copy from a sibling
- // register.
- struct SibValueInfo {
- // True when all reaching defs were reloads: No spill is necessary.
- bool AllDefsAreReloads;
-
- // True when value is defined by an original PHI not from splitting.
- bool DefByOrigPHI;
-
- // True when the COPY defining this value killed its source.
- bool KillsSource;
-
- // The preferred register to spill.
- unsigned SpillReg;
-
- // The value of SpillReg that should be spilled.
- VNInfo *SpillVNI;
-
- // The block where SpillVNI should be spilled. Currently, this must be the
- // block containing SpillVNI->def.
- MachineBasicBlock *SpillMBB;
-
- // A defining instruction that is not a sibling copy or a reload, or NULL.
- // This can be used as a template for rematerialization.
- MachineInstr *DefMI;
-
- // List of values that depend on this one. These values are actually the
- // same, but live range splitting has placed them in different registers,
- // or SSA update needed to insert PHI-defs to preserve SSA form. This is
- // copies of the current value and phi-kills. Usually only phi-kills cause
- // more than one dependent value.
- TinyPtrVector<VNInfo*> Deps;
-
- SibValueInfo(unsigned Reg, VNInfo *VNI)
- : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false),
- SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {}
-
- // Returns true when a def has been found.
- bool hasDef() const { return DefByOrigPHI || DefMI; }
- };
-
-private:
- // Values in RegsToSpill defined by sibling copies.
- typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
- SibValueMap SibValues;
-
// Dead defs generated during spilling.
SmallVector<MachineInstr*, 8> DeadDefs;
+ // Object records spills information and does the hoisting.
+ HoistSpillHelper HSpiller;
+
~InlineSpiller() override {}
public:
MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
TII(*mf.getSubtarget().getInstrInfo()),
TRI(*mf.getSubtarget().getRegisterInfo()),
- MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
+ MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
+ HSpiller(pass, mf, vrm) {}
void spill(LiveRangeEdit &) override;
+ void postOptimization() override;
private:
bool isSnippet(const LiveInterval &SnipLI);
}
bool isSibling(unsigned Reg);
- MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
- void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = nullptr);
- void analyzeSiblingValues();
-
- bool hoistSpill(LiveInterval &SpillLI, MachineInstr &CopyMI);
+ bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
void markValueUsed(LiveInterval*, VNInfo*);
}
}
-
-//===----------------------------------------------------------------------===//
-// Sibling Values
-//===----------------------------------------------------------------------===//
-
-// After live range splitting, some values to be spilled may be defined by
-// copies from sibling registers. We trace the sibling copies back to the
-// original value if it still exists. We need it for rematerialization.
-//
-// Even when the value can't be rematerialized, we still want to determine if
-// the value has already been spilled, or we may want to hoist the spill from a
-// loop.
-
bool InlineSpiller::isSibling(unsigned Reg) {
return TargetRegisterInfo::isVirtualRegister(Reg) &&
VRM.getOriginal(Reg) == Original;
}
-#ifndef NDEBUG
-static raw_ostream &operator<<(raw_ostream &OS,
- const InlineSpiller::SibValueInfo &SVI) {
- OS << "spill " << PrintReg(SVI.SpillReg) << ':'
- << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def;
- if (SVI.SpillMBB)
- OS << " in BB#" << SVI.SpillMBB->getNumber();
- if (SVI.AllDefsAreReloads)
- OS << " all-reloads";
- if (SVI.DefByOrigPHI)
- OS << " orig-phi";
- if (SVI.KillsSource)
- OS << " kill";
- OS << " deps[";
- for (VNInfo *Dep : SVI.Deps)
- OS << ' ' << Dep->id << '@' << Dep->def;
- OS << " ]";
- if (SVI.DefMI)
- OS << " def: " << *SVI.DefMI;
- else
- OS << '\n';
- return OS;
-}
-#endif
-
-/// propagateSiblingValue - Propagate the value in SVI to dependents if it is
-/// known. Otherwise remember the dependency for later.
+/// It is beneficial to spill to earlier place in the same BB in case
+/// as follows:
+/// There is an alternative def earlier in the same MBB.
+/// Hoist the spill as far as possible in SpillMBB. This can ease
+/// register pressure:
///
-/// @param SVIIter SibValues entry to propagate.
-/// @param VNI Dependent value, or NULL to propagate to all saved dependents.
-void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter,
- VNInfo *VNI) {
- SibValueMap::value_type *SVI = &*SVIIter;
-
- // When VNI is non-NULL, add it to SVI's deps, and only propagate to that.
- TinyPtrVector<VNInfo*> FirstDeps;
- if (VNI) {
- FirstDeps.push_back(VNI);
- SVI->second.Deps.push_back(VNI);
- }
-
- // Has the value been completely determined yet? If not, defer propagation.
- if (!SVI->second.hasDef())
- return;
-
- // Work list of values to propagate.
- SmallSetVector<SibValueMap::value_type *, 8> WorkList;
- WorkList.insert(SVI);
-
- do {
- SVI = WorkList.pop_back_val();
- TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
- VNI = nullptr;
-
- SibValueInfo &SV = SVI->second;
- if (!SV.SpillMBB)
- SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def);
-
- DEBUG(dbgs() << " prop to " << Deps->size() << ": "
- << SVI->first->id << '@' << SVI->first->def << ":\t" << SV);
-
- assert(SV.hasDef() && "Propagating undefined value");
-
- // Should this value be propagated as a preferred spill candidate? We don't
- // propagate values of registers that are about to spill.
- bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
- unsigned SpillDepth = ~0u;
-
- for (VNInfo *Dep : *Deps) {
- SibValueMap::iterator DepSVI = SibValues.find(Dep);
- assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
- SibValueInfo &DepSV = DepSVI->second;
- if (!DepSV.SpillMBB)
- DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def);
-
- bool Changed = false;
-
- // Propagate defining instruction.
- if (!DepSV.hasDef()) {
- Changed = true;
- DepSV.DefMI = SV.DefMI;
- DepSV.DefByOrigPHI = SV.DefByOrigPHI;
- }
-
- // Propagate AllDefsAreReloads. For PHI values, this computes an AND of
- // all predecessors.
- if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) {
- Changed = true;
- DepSV.AllDefsAreReloads = false;
- }
-
- // Propagate best spill value.
- if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) {
- if (SV.SpillMBB == DepSV.SpillMBB) {
- // DepSV is in the same block. Hoist when dominated.
- if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) {
- // This is an alternative def earlier in the same MBB.
- // Hoist the spill as far as possible in SpillMBB. This can ease
- // register pressure:
- //
- // x = def
- // y = use x
- // s = copy x
- //
- // Hoisting the spill of s to immediately after the def removes the
- // interference between x and y:
- //
- // x = def
- // spill x
- // y = use x<kill>
- //
- // This hoist only helps when the DepSV copy kills its source.
- Changed = true;
- DepSV.SpillReg = SV.SpillReg;
- DepSV.SpillVNI = SV.SpillVNI;
- DepSV.SpillMBB = SV.SpillMBB;
- }
- } else {
- // DepSV is in a different block.
- if (SpillDepth == ~0u)
- SpillDepth = Loops.getLoopDepth(SV.SpillMBB);
-
- // Also hoist spills to blocks with smaller loop depth, but make sure
- // that the new value dominates. Non-phi dependents are always
- // dominated, phis need checking.
-
- const BranchProbability MarginProb(4, 5); // 80%
- // Hoist a spill to outer loop if there are multiple dependents (it
- // can be beneficial if more than one dependents are hoisted) or
- // if DepSV (the hoisting source) is hotter than SV (the hoisting
- // destination) (we add a 80% margin to bias a little towards
- // loop depth).
- bool HoistCondition =
- (MBFI.getBlockFreq(DepSV.SpillMBB) >=
- (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) ||
- Deps->size() > 1;
-
- if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
- HoistCondition &&
- (!DepSVI->first->isPHIDef() ||
- MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
- Changed = true;
- DepSV.SpillReg = SV.SpillReg;
- DepSV.SpillVNI = SV.SpillVNI;
- DepSV.SpillMBB = SV.SpillMBB;
- }
- }
- }
-
- if (!Changed)
- continue;
-
- // Something changed in DepSVI. Propagate to dependents.
- WorkList.insert(&*DepSVI);
-
- DEBUG(dbgs() << " update " << DepSVI->first->id << '@'
- << DepSVI->first->def << " to:\t" << DepSV);
- }
- } while (!WorkList.empty());
-}
-
-/// traceSiblingValue - Trace a value that is about to be spilled back to the
-/// real defining instructions by looking through sibling copies. Always stay
-/// within the range of OrigVNI so the registers are known to carry the same
-/// value.
+/// x = def
+/// y = use x
+/// s = copy x
///
-/// Determine if the value is defined by all reloads, so spilling isn't
-/// necessary - the value is already in the stack slot.
+/// Hoisting the spill of s to immediately after the def removes the
+/// interference between x and y:
///
-/// Return a defining instruction that may be a candidate for rematerialization.
+/// x = def
+/// spill x
+/// y = use x<kill>
///
-MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
- VNInfo *OrigVNI) {
- // Check if a cached value already exists.
- SibValueMap::iterator SVI;
- bool Inserted;
- std::tie(SVI, Inserted) =
- SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI)));
- if (!Inserted) {
- DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':'
- << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second);
- return SVI->second.DefMI;
- }
-
- DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
- << UseVNI->id << '@' << UseVNI->def << '\n');
-
- // List of (Reg, VNI) that have been inserted into SibValues, but need to be
- // processed.
- SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
- WorkList.push_back(std::make_pair(UseReg, UseVNI));
-
- LiveInterval &OrigLI = LIS.getInterval(Original);
- do {
- unsigned Reg;
- VNInfo *VNI;
- std::tie(Reg, VNI) = WorkList.pop_back_val();
- DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
- << ":\t");
-
- // First check if this value has already been computed.
- SVI = SibValues.find(VNI);
- assert(SVI != SibValues.end() && "Missing SibValues entry");
-
- // Trace through PHI-defs created by live range splitting.
- if (VNI->isPHIDef()) {
- // Stop at original PHIs. We don't know the value at the
- // predecessors. Look up the VNInfo for the current definition
- // in OrigLI, to properly determine whether or not this phi was
- // added by splitting.
- if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) {
- DEBUG(dbgs() << "orig phi value\n");
- SVI->second.DefByOrigPHI = true;
- SVI->second.AllDefsAreReloads = false;
- propagateSiblingValue(SVI);
- continue;
- }
-
- // This is a PHI inserted by live range splitting. We could trace the
- // live-out value from predecessor blocks, but that search can be very
- // expensive if there are many predecessors and many more PHIs as
- // generated by tail-dup when it sees an indirectbr. Instead, look at
- // all the non-PHI defs that have the same value as OrigVNI. They must
- // jointly dominate VNI->def. This is not optimal since VNI may actually
- // be jointly dominated by a smaller subset of defs, so there is a change
- // we will miss a AllDefsAreReloads optimization.
-
- // Separate all values dominated by OrigVNI into PHIs and non-PHIs.
- SmallVector<VNInfo*, 8> PHIs, NonPHIs;
- LiveInterval &LI = LIS.getInterval(Reg);
-
- for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end();
- VI != VE; ++VI) {
- VNInfo *VNI2 = *VI;
- if (VNI2->isUnused())
- continue;
- if (!OrigLI.containsOneValue() &&
- OrigLI.getVNInfoAt(VNI2->def) != OrigVNI)
- continue;
- if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def)
- PHIs.push_back(VNI2);
- else
- NonPHIs.push_back(VNI2);
- }
- DEBUG(dbgs() << "split phi value, checking " << PHIs.size()
- << " phi-defs, and " << NonPHIs.size()
- << " non-phi/orig defs\n");
-
- // Create entries for all the PHIs. Don't add them to the worklist, we
- // are processing all of them in one go here.
- for (VNInfo *PHI : PHIs)
- SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI)));
-
- // Add every PHI as a dependent of all the non-PHIs.
- for (VNInfo *NonPHI : NonPHIs) {
- // Known value? Try an insertion.
- std::tie(SVI, Inserted) =
- SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
- // Add all the PHIs as dependents of NonPHI.
- SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(),
- PHIs.end());
- // This is the first time we see NonPHI, add it to the worklist.
- if (Inserted)
- WorkList.push_back(std::make_pair(Reg, NonPHI));
- else
- // Propagate to all inserted PHIs, not just VNI.
- propagateSiblingValue(SVI);
- }
-
- // Next work list item.
- continue;
- }
-
- MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
- assert(MI && "Missing def");
-
- // Trace through sibling copies.
- if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
- if (isSibling(SrcReg)) {
- LiveInterval &SrcLI = LIS.getInterval(SrcReg);
- LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
- assert(SrcQ.valueIn() && "Copy from non-existing value");
- // Check if this COPY kills its source.
- SVI->second.KillsSource = SrcQ.isKill();
- VNInfo *SrcVNI = SrcQ.valueIn();
- DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
- << SrcVNI->id << '@' << SrcVNI->def
- << " kill=" << unsigned(SVI->second.KillsSource) << '\n');
- // Known sibling source value? Try an insertion.
- std::tie(SVI, Inserted) = SibValues.insert(
- std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI)));
- // This is the first time we see Src, add it to the worklist.
- if (Inserted)
- WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
- propagateSiblingValue(SVI, VNI);
- // Next work list item.
- continue;
- }
- }
-
- // Track reachable reloads.
- SVI->second.DefMI = MI;
- SVI->second.SpillMBB = MI->getParent();
- int FI;
- if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
- DEBUG(dbgs() << "reload\n");
- propagateSiblingValue(SVI);
- // Next work list item.
- continue;
- }
-
- // Potential remat candidate.
- DEBUG(dbgs() << "def " << *MI);
- SVI->second.AllDefsAreReloads = false;
- propagateSiblingValue(SVI);
- } while (!WorkList.empty());
-
- // Look up the value we were looking for. We already did this lookup at the
- // top of the function, but SibValues may have been invalidated.
- SVI = SibValues.find(UseVNI);
- assert(SVI != SibValues.end() && "Didn't compute requested info");
- DEBUG(dbgs() << " traced to:\t" << SVI->second);
- return SVI->second.DefMI;
-}
-
-/// analyzeSiblingValues - Trace values defined by sibling copies back to
-/// something that isn't a sibling copy.
+/// This hoist only helps when the copy kills its source.
///
-/// Keep track of values that may be rematerializable.
-void InlineSpiller::analyzeSiblingValues() {
- SibValues.clear();
-
- // No siblings at all?
- if (Edit->getReg() == Original)
- return;
-
- LiveInterval &OrigLI = LIS.getInterval(Original);
- for (unsigned Reg : RegsToSpill) {
- LiveInterval &LI = LIS.getInterval(Reg);
- for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
- VE = LI.vni_end(); VI != VE; ++VI) {
- VNInfo *VNI = *VI;
- if (VNI->isUnused())
- continue;
- MachineInstr *DefMI = nullptr;
- if (!VNI->isPHIDef()) {
- DefMI = LIS.getInstructionFromIndex(VNI->def);
- assert(DefMI && "No defining instruction");
- }
- // Check possible sibling copies.
- if (VNI->isPHIDef() || DefMI->isCopy()) {
- VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
- assert(OrigVNI && "Def outside original live range");
- if (OrigVNI->def != VNI->def)
- DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
- }
- if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
- DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
- << VNI->def << " may remat from " << *DefMI);
- }
- }
- }
-}
-
-/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
-/// a spill at a better location.
-bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr &CopyMI) {
+bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
+ MachineInstr &CopyMI) {
SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
+#ifndef NDEBUG
VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
- SibValueMap::iterator I = SibValues.find(VNI);
- if (I == SibValues.end())
- return false;
-
- const SibValueInfo &SVI = I->second;
-
- // Let the normal folding code deal with the boring case.
- if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
- return false;
-
- // SpillReg may have been deleted by remat and DCE.
- if (!LIS.hasInterval(SVI.SpillReg)) {
- DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
- SibValues.erase(I);
- return false;
- }
+#endif
- LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
- if (!SibLI.containsValue(SVI.SpillVNI)) {
- DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
- SibValues.erase(I);
+ unsigned SrcReg = CopyMI.getOperand(1).getReg();
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx);
+ LiveQueryResult SrcQ = SrcLI.Query(Idx);
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(SrcVNI->def);
+ if (DefMBB != CopyMI.getParent() || !SrcQ.isKill())
return false;
- }
// Conservatively extend the stack slot range to the range of the original
// value. We may be able to do better with stack slot coloring by being more
DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
<< *StackInt << '\n');
- // Already spilled everywhere.
- if (SVI.AllDefsAreReloads) {
- DEBUG(dbgs() << "\tno spill needed: " << SVI);
- ++NumOmitReloadSpill;
- return true;
- }
- // We are going to spill SVI.SpillVNI immediately after its def, so clear out
+ // We are going to spill SrcVNI immediately after its def, so clear out
// any later spills of the same value.
- eliminateRedundantSpills(SibLI, SVI.SpillVNI);
+ eliminateRedundantSpills(SrcLI, SrcVNI);
- MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
MachineBasicBlock::iterator MII;
- if (SVI.SpillVNI->isPHIDef())
+ if (SrcVNI->isPHIDef())
MII = MBB->SkipPHIsAndLabels(MBB->begin());
else {
- MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
assert(DefMI && "Defining instruction disappeared");
MII = DefMI;
++MII;
}
// Insert spill without kill flag immediately after def.
- TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
- MRI.getRegClass(SVI.SpillReg), &TRI);
+ TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot,
+ MRI.getRegClass(SrcReg), &TRI);
--MII; // Point to store instruction.
LIS.InsertMachineInstrInMaps(*MII);
- DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
+ DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
+ HSpiller.addToMergeableSpills(&(*MII), StackSlot, Original);
++NumSpills;
- ++NumHoists;
return true;
}
MI->setDesc(TII.get(TargetOpcode::KILL));
DeadDefs.push_back(MI);
++NumSpillsRemoved;
- --NumSpills;
+ if (HSpiller.rmFromMergeableSpills(MI, StackSlot))
+ --NumSpills;
}
}
} while (!WorkList.empty());
if (SnippetCopies.count(&MI))
return false;
- // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
LiveRangeEdit::Remat RM(ParentVNI);
- SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
- if (SibI != SibValues.end())
- RM.OrigMI = SibI->second.DefMI;
- if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
+ RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+
+ if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) {
markValueUsed(&VirtReg, ParentVNI);
DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
return false;
/// reMaterializeAll - Try to rematerialize as many uses as possible,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
- // analyzeSiblingValues has already tested all relevant defining instructions.
if (!Edit->anyRematerializable(AA))
return;
if (InstrReg != Reg || FI != StackSlot)
return false;
+ if (!IsLoad)
+ HSpiller.rmFromMergeableSpills(MI, StackSlot);
+
DEBUG(dbgs() << "Coalescing stack access: " << *MI);
LIS.RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
LIS.removePhysRegDefAt(Reg, Idx);
}
+ int FI;
+ if (TII.isStoreToStackSlot(MI, FI) && HSpiller.rmFromMergeableSpills(MI, FI))
+ --NumSpills;
LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
MI->eraseFromParent();
if (!WasCopy)
++NumFolded;
- else if (Ops.front().second == 0)
+ else if (Ops.front().second == 0) {
++NumSpills;
- else
+ HSpiller.addToMergeableSpills(FoldMI, StackSlot, Original);
+ } else
++NumReloads;
return true;
}
DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
"spill"));
++NumSpills;
+ HSpiller.addToMergeableSpills(std::next(MI), StackSlot, Original);
}
/// spillAroundUses - insert spill code around each use of Reg.
continue;
}
if (RI.Writes) {
- // Hoist the spill of a sib-reg copy.
- if (hoistSpill(OldLI, *MI)) {
+ if (hoistSpillInsideBB(OldLI, *MI)) {
// This COPY is now dead, the value is already in the stack slot.
MI->getOperand(0).setIsDead();
DeadDefs.push_back(MI);
assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
collectRegsToSpill();
- analyzeSiblingValues();
reMaterializeAll();
// Remat may handle everything.
Edit->calculateRegClassAndHint(MF, Loops, MBFI);
}
+
+/// Optimizations after all the reg selections and spills are done.
+///
+void InlineSpiller::postOptimization() {
+ SmallVector<unsigned, 4> NewVRegs;
+ LiveRangeEdit LRE(nullptr, NewVRegs, MF, LIS, &VRM, nullptr);
+ HSpiller.hoistAllSpills(LRE);
+ assert(NewVRegs.size() == 0 &&
+ "No new vregs should be generated in hoistAllSpills");
+}
+
+/// When a spill is inserted, add the spill to MergeableSpills map.
+///
+void HoistSpillHelper::addToMergeableSpills(MachineInstr *Spill, int StackSlot,
+ unsigned Original) {
+ StackSlotToReg[StackSlot] = Original;
+ SlotIndex Idx = LIS.getInstructionIndex(*Spill);
+ VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
+ std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
+ MergeableSpills[MIdx].insert(Spill);
+}
+
+/// When a spill is removed, remove the spill from MergeableSpills map.
+/// Return true if the spill is removed successfully.
+///
+bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr *Spill,
+ int StackSlot) {
+ int Original = StackSlotToReg[StackSlot];
+ if (!Original)
+ return false;
+ SlotIndex Idx = LIS.getInstructionIndex(*Spill);
+ VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
+ std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
+ return MergeableSpills[MIdx].erase(Spill);
+}
+
+/// Check BB to see if it is a possible target BB to place a hoisted spill,
+/// i.e., there should be a living sibling of OrigReg at the insert point.
+///
+bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI,
+ MachineBasicBlock &BB, unsigned &LiveReg) {
+ SlotIndex Idx;
+ MachineBasicBlock::iterator MI = BB.getFirstTerminator();
+ if (MI != BB.end())
+ Idx = LIS.getInstructionIndex(*MI);
+ else
+ Idx = LIS.getMBBEndIdx(&BB).getPrevSlot();
+ SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg];
+ assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI &&
+ "Unexpected VNI");
+
+ for (auto const SibReg : Siblings) {
+ LiveInterval &LI = LIS.getInterval(SibReg);
+ VNInfo *VNI = LI.getVNInfoAt(Idx);
+ if (VNI) {
+ LiveReg = SibReg;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Remove redundent spills in the same BB. Save those redundent spills in
+/// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map.
+///
+void HoistSpillHelper::rmRedundantSpills(
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
+ // For each spill saw, check SpillBBToSpill[] and see if its BB already has
+ // another spill inside. If a BB contains more than one spill, only keep the
+ // earlier spill with smaller SlotIndex.
+ for (const auto CurrentSpill : Spills) {
+ MachineBasicBlock *Block = CurrentSpill->getParent();
+ MachineDomTreeNode *Node = MDT.DT->getNode(Block);
+ MachineInstr *PrevSpill = SpillBBToSpill[Node];
+ if (PrevSpill) {
+ SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
+ SlotIndex CIdx = LIS.getInstructionIndex(*CurrentSpill);
+ MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
+ MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
+ SpillsToRm.push_back(SpillToRm);
+ SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
+ } else {
+ SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
+ }
+ }
+ for (const auto SpillToRm : SpillsToRm)
+ Spills.erase(SpillToRm);
+}
+
+/// Starting from \p Root find a top-down traversal order of the dominator
+/// tree to visit all basic blocks containing the elements of \p Spills.
+/// Redundant spills will be found and put into \p SpillsToRm at the same
+/// time. \p SpillBBToSpill will be populated as part of the process and
+/// maps a basic block to the first store occurring in the basic block.
+/// \post SpillsToRm.union(Spills\@post) == Spills\@pre
+///
+void HoistSpillHelper::getVisitOrders(
+ MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineDomTreeNode *> &Orders,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
+ // The set contains all the possible BB nodes to which we may hoist
+ // original spills.
+ SmallPtrSet<MachineDomTreeNode *, 8> WorkSet;
+ // Save the BB nodes on the path from the first BB node containing
+ // non-redundent spill to the Root node.
+ SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath;
+ // All the spills to be hoisted must originate from a single def instruction
+ // to the OrigReg. It means the def instruction should dominate all the spills
+ // to be hoisted. We choose the BB where the def instruction is located as
+ // the Root.
+ MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom();
+ // For every node on the dominator tree with spill, walk up on the dominator
+ // tree towards the Root node until it is reached. If there is other node
+ // containing spill in the middle of the path, the previous spill saw will
+ // be redundent and the node containing it will be removed. All the nodes on
+ // the path starting from the first node with non-redundent spill to the Root
+ // node will be added to the WorkSet, which will contain all the possible
+ // locations where spills may be hoisted to after the loop below is done.
+ for (const auto Spill : Spills) {
+ MachineBasicBlock *Block = Spill->getParent();
+ MachineDomTreeNode *Node = MDT[Block];
+ MachineInstr *SpillToRm = nullptr;
+ while (Node != RootIDomNode) {
+ // If Node dominates Block, and it already contains a spill, the spill in
+ // Block will be redundent.
+ if (Node != MDT[Block] && SpillBBToSpill[Node]) {
+ SpillToRm = SpillBBToSpill[MDT[Block]];
+ break;
+ /// If we see the Node already in WorkSet, the path from the Node to
+ /// the Root node must already be traversed by another spill.
+ /// Then no need to repeat.
+ } else if (WorkSet.count(Node)) {
+ break;
+ } else {
+ NodesOnPath.insert(Node);
+ }
+ Node = Node->getIDom();
+ }
+ if (SpillToRm) {
+ SpillsToRm.push_back(SpillToRm);
+ } else {
+ // Add a BB containing the original spills to SpillsToKeep -- i.e.,
+ // set the initial status before hoisting start. The value of BBs
+ // containing original spills is set to 0, in order to descriminate
+ // with BBs containing hoisted spills which will be inserted to
+ // SpillsToKeep later during hoisting.
+ SpillsToKeep[MDT[Block]] = 0;
+ WorkSet.insert(NodesOnPath.begin(), NodesOnPath.end());
+ }
+ NodesOnPath.clear();
+ }
+
+ // Sort the nodes in WorkSet in top-down order and save the nodes
+ // in Orders. Orders will be used for hoisting in runHoistSpills.
+ unsigned idx = 0;
+ Orders.push_back(MDT.DT->getNode(Root));
+ do {
+ MachineDomTreeNode *Node = Orders[idx++];
+ const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+ for (unsigned i = 0; i != NumChildren; ++i) {
+ MachineDomTreeNode *Child = Children[i];
+ if (WorkSet.count(Child))
+ Orders.push_back(Child);
+ }
+ } while (idx != Orders.size());
+ assert(Orders.size() == WorkSet.size() &&
+ "Orders have different size with WorkSet");
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n");
+ SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
+ for (; RIt != Orders.rend(); RIt++)
+ DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ",");
+ DEBUG(dbgs() << "\n");
+#endif
+}
+
+/// Try to hoist spills according to BB hotness. The spills to removed will
+/// be saved in \p SpillsToRm. The spills to be inserted will be saved in
+/// \p SpillsToIns.
+///
+void HoistSpillHelper::runHoistSpills(
+ unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) {
+ // Visit order of dominator tree nodes.
+ SmallVector<MachineDomTreeNode *, 32> Orders;
+ // SpillsToKeep contains all the nodes where spills are to be inserted
+ // during hoisting. If the spill to be inserted is an original spill
+ // (not a hoisted one), the value of the map entry is 0. If the spill
+ // is a hoisted spill, the value of the map entry is the VReg to be used
+ // as the source of the spill.
+ DenseMap<MachineDomTreeNode *, unsigned> SpillsToKeep;
+ // Map from BB to the first spill inside of it.
+ DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill;
+
+ rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill);
+
+ MachineBasicBlock *Root = LIS.getMBBFromIndex(OrigVNI.def);
+ getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep,
+ SpillBBToSpill);
+
+ // SpillsInSubTreeMap keeps the map from a dom tree node to a pair of
+ // nodes set and the cost of all the spills inside those nodes.
+ // The nodes set are the locations where spills are to be inserted
+ // in the subtree of current node.
+ typedef std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>
+ NodesCostPair;
+ DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap;
+ // Iterate Orders set in reverse order, which will be a bottom-up order
+ // in the dominator tree. Once we visit a dom tree node, we know its
+ // children have already been visited and the spill locations in the
+ // subtrees of all the children have been determined.
+ SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
+ for (; RIt != Orders.rend(); RIt++) {
+ MachineBasicBlock *Block = (*RIt)->getBlock();
+
+ // If Block contains an original spill, simply continue.
+ if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() && !SpillsToKeep[*RIt]) {
+ SpillsInSubTreeMap[*RIt].first.insert(*RIt);
+ // SpillsInSubTreeMap[*RIt].second contains the cost of spill.
+ SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block);
+ continue;
+ }
+
+ // Collect spills in subtree of current node (*RIt) to
+ // SpillsInSubTreeMap[*RIt].first.
+ const std::vector<MachineDomTreeNode *> &Children = (*RIt)->getChildren();
+ unsigned NumChildren = Children.size();
+ for (unsigned i = 0; i != NumChildren; ++i) {
+ MachineDomTreeNode *Child = Children[i];
+ if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end())
+ continue;
+ // The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below
+ // should be placed before getting the begin and end iterators of
+ // SpillsInSubTreeMap[Child].first, or else the iterators may be
+ // invalidated when SpillsInSubTreeMap[*RIt] is seen the first time
+ // and the map grows and then the original buckets in the map are moved.
+ SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree =
+ SpillsInSubTreeMap[*RIt].first;
+ BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second;
+ SubTreeCost += SpillsInSubTreeMap[Child].second;
+ auto BI = SpillsInSubTreeMap[Child].first.begin();
+ auto EI = SpillsInSubTreeMap[Child].first.end();
+ SpillsInSubTree.insert(BI, EI);
+ SpillsInSubTreeMap.erase(Child);
+ }
+
+ SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree =
+ SpillsInSubTreeMap[*RIt].first;
+ BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second;
+ // No spills in subtree, simply continue.
+ if (SpillsInSubTree.empty())
+ continue;
+
+ // Check whether Block is a possible candidate to insert spill.
+ unsigned LiveReg = 0;
+ if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg))
+ continue;
+
+ // If there are multiple spills that could be merged, bias a little
+ // to hoist the spill.
+ BranchProbability MarginProb = (SpillsInSubTree.size() > 1)
+ ? BranchProbability(9, 10)
+ : BranchProbability(1, 1);
+ if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) {
+ // Hoist: Move spills to current Block.
+ for (const auto SpillBB : SpillsInSubTree) {
+ // When SpillBB is a BB contains original spill, insert the spill
+ // to SpillsToRm.
+ if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() &&
+ !SpillsToKeep[SpillBB]) {
+ MachineInstr *SpillToRm = SpillBBToSpill[SpillBB];
+ SpillsToRm.push_back(SpillToRm);
+ }
+ // SpillBB will not contain spill anymore, remove it from SpillsToKeep.
+ SpillsToKeep.erase(SpillBB);
+ }
+ // Current Block is the BB containing the new hoisted spill. Add it to
+ // SpillsToKeep. LiveReg is the source of the new spill.
+ SpillsToKeep[*RIt] = LiveReg;
+ DEBUG({
+ dbgs() << "spills in BB: ";
+ for (const auto Rspill : SpillsInSubTree)
+ dbgs() << Rspill->getBlock()->getNumber() << " ";
+ dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber()
+ << "\n";
+ });
+ SpillsInSubTree.clear();
+ SpillsInSubTree.insert(*RIt);
+ SubTreeCost = MBFI.getBlockFreq(Block);
+ }
+ }
+ // For spills in SpillsToKeep with LiveReg set (i.e., not original spill),
+ // save them to SpillsToIns.
+ for (const auto Ent : SpillsToKeep) {
+ if (Ent.second)
+ SpillsToIns[Ent.first->getBlock()] = Ent.second;
+ }
+}
+
+/// For spills with equal values, remove redundent spills and hoist the left
+/// to less hot spots.
+///
+/// Spills with equal values will be collected into the same set in
+/// MergeableSpills when spill is inserted. These equal spills are originated
+/// from the same define instruction and are dominated by the instruction.
+/// Before hoisting all the equal spills, redundent spills inside in the same
+/// BB is first marked to be deleted. Then starting from spills left, walk up
+/// on the dominator tree towards the Root node where the define instruction
+/// is located, mark the dominated spills to be deleted along the way and
+/// collect the BB nodes on the path from non-dominated spills to the define
+/// instruction into a WorkSet. The nodes in WorkSet are the candidate places
+/// where we consider to hoist the spills. We iterate the WorkSet in bottom-up
+/// order, and for each node, we will decide whether to hoist spills inside
+/// its subtree to that node. In this way, we can get benefit locally even if
+/// hoisting all the equal spills to one cold place is impossible.
+///
+void HoistSpillHelper::hoistAllSpills(LiveRangeEdit &Edit) {
+ // Save the mapping between stackslot and its original reg.
+ DenseMap<int, unsigned> SlotToOrigReg;
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ int Slot = VRM.getStackSlot(Reg);
+ if (Slot != VirtRegMap::NO_STACK_SLOT)
+ SlotToOrigReg[Slot] = VRM.getOriginal(Reg);
+ unsigned Original = VRM.getPreSplitReg(Reg);
+ if (!MRI.def_empty(Reg))
+ Virt2SiblingsMap[Original].insert(Reg);
+ }
+
+ // Each entry in MergeableSpills contains a spill set with equal values.
+ for (auto &Ent : MergeableSpills) {
+ int Slot = Ent.first.first;
+ unsigned OrigReg = SlotToOrigReg[Slot];
+ VNInfo *OrigVNI = Ent.first.second;
+ SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second;
+ if (Ent.second.empty())
+ continue;
+
+ DEBUG({
+ dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n"
+ << "Equal spills in BB: ";
+ for (const auto spill : EqValSpills)
+ dbgs() << spill->getParent()->getNumber() << " ";
+ dbgs() << "\n";
+ });
+
+ // SpillsToRm is the spill set to be removed from EqValSpills.
+ SmallVector<MachineInstr *, 16> SpillsToRm;
+ // SpillsToIns is the spill set to be newly inserted after hoisting.
+ DenseMap<MachineBasicBlock *, unsigned> SpillsToIns;
+
+ runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
+
+ DEBUG({
+ dbgs() << "Finally inserted spills in BB: ";
+ for (const auto Ispill : SpillsToIns)
+ dbgs() << Ispill.first->getNumber() << " ";
+ dbgs() << "\nFinally removed spills in BB: ";
+ for (const auto Rspill : SpillsToRm)
+ dbgs() << Rspill->getParent()->getNumber() << " ";
+ dbgs() << "\n";
+ });
+
+ // Stack live range update.
+ LiveInterval &StackIntvl = LSS.getInterval(Slot);
+ if (!SpillsToIns.empty() || !SpillsToRm.empty()) {
+ LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+ StackIntvl.MergeValueInAsValue(OrigLI, OrigVNI,
+ StackIntvl.getValNumInfo(0));
+ }
+
+ // Insert hoisted spills.
+ for (auto const Insert : SpillsToIns) {
+ MachineBasicBlock *BB = Insert.first;
+ unsigned LiveReg = Insert.second;
+ MachineBasicBlock::iterator MI = BB->getFirstTerminator();
+ TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot,
+ MRI.getRegClass(LiveReg), &TRI);
+ LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI);
+ ++NumSpills;
+ }
+
+ // Remove redundent spills or change them to dead instructions.
+ NumSpills -= SpillsToRm.size();
+ for (auto const RMEnt : SpillsToRm) {
+ RMEnt->setDesc(TII.get(TargetOpcode::KILL));
+ for (unsigned i = RMEnt->getNumOperands(); i; --i) {
+ MachineOperand &MO = RMEnt->getOperand(i - 1);
+ if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead())
+ RMEnt->RemoveOperand(i - 1);
+ }
+ }
+ Edit.eliminateDeadDefs(SpillsToRm, None, true);
+ }
+}
for (VNInfo *VNI : getParent().valnos) {
if (VNI->isUnused())
continue;
- MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
+ unsigned Original = VRM->getOriginal(getReg());
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
if (!DefMI)
continue;
- checkRematerializable(VNI, DefMI, aa);
+ checkRematerializable(OrigVNI, DefMI, aa);
}
ScannedRemattable = true;
}
return true;
}
-bool LiveRangeEdit::canRematerializeAt(Remat &RM,
- SlotIndex UseIdx,
- bool cheapAsAMove) {
+bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
+ SlotIndex UseIdx, bool cheapAsAMove) {
assert(ScannedRemattable && "Call anyRematerializable first");
// Use scanRemattable info.
- if (!Remattable.count(RM.ParentVNI))
+ if (!Remattable.count(OrigVNI))
return false;
// No defining instruction provided.
SlotIndex DefIdx;
- if (RM.OrigMI)
- DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
- else {
- DefIdx = RM.ParentVNI->def;
- RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
- assert(RM.OrigMI && "No defining instruction for remattable value");
- }
+ assert(RM.OrigMI && "No defining instruction for remattable value");
+ DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
// If only cheap remats were requested, bail out early.
if (cheapAsAMove && !TII.isAsCheapAsAMove(RM.OrigMI))
bool Late) {
assert(RM.OrigMI && "Invalid remat");
TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+ // DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg
+ // to false anyway in case the isDead flag of RM.OrigMI's dest register
+ // is true.
+ (*--MI).getOperand(0).setIsDead(false);
Rematted.insert(RM.ParentVNI);
- return LIS.getSlotIndexes()
- ->insertMachineInstrInMaps(*--MI, Late)
- .getRegSlot();
+ return LIS.getSlotIndexes()->insertMachineInstrInMaps(*MI, Late).getRegSlot();
}
void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
// Collect virtual registers to be erased after MI is gone.
SmallVector<unsigned, 8> RegsToErase;
bool ReadsPhysRegs = false;
+ bool isOrigDef = false;
+ unsigned Dest;
+ if (VRM && MI->getOperand(0).isReg()) {
+ Dest = MI->getOperand(0).getReg();
+ unsigned Original = VRM->getOriginal(Dest);
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+ isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
+ }
// Check for live intervals that may shrink
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
}
DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
} else {
- if (TheDelegate)
- TheDelegate->LRE_WillEraseInstruction(MI);
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
- ++NumDCEDeleted;
+ // If the dest of MI is an original reg, don't delete the inst. Replace
+ // the dest with a new reg, keep the inst for remat of other siblings.
+ // The inst is saved in LiveRangeEdit::DeadRemats and will be deleted
+ // after all the allocations of the func are done.
+ if (isOrigDef) {
+ LiveInterval &NewLI = createEmptyIntervalFrom(Dest);
+ VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+ NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
+ pop_back();
+ markDeadRemat(MI);
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ MI->substituteRegister(Dest, NewLI.reg, 0, TRI);
+ MI->getOperand(0).setIsDead(true);
+ } else {
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
}
// Erase any virtregs that are now empty and unused. There may be <undef>
}
}
-void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
- ArrayRef<unsigned> RegsBeingSpilled) {
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
+ ArrayRef<unsigned> RegsBeingSpilled,
+ bool NoSplit) {
ToShrinkSet ToShrink;
for (;;) {
if (!LIS.shrinkToUses(LI, &Dead))
continue;
+ if (NoSplit)
+ continue;
+
// Don't create new intervals for a register being spilled.
// The new intervals would have to be spilled anyway so its not worth it.
// Also they currently aren't spilled so creating them and not spilling
}
}
}
+
+void RegAllocBase::postOptimization() {
+ spiller().postOptimization();
+ for (auto DeadInst : DeadRemats) {
+ LIS->RemoveMachineInstrFromMaps(*DeadInst);
+ DeadInst->eraseFromParent();
+ }
+ DeadRemats.clear();
+}
LiveRegMatrix *Matrix;
RegisterClassInfo RegClassInfo;
+ /// Inst which is a def of an original reg and whose defs are already all
+ /// dead after remat is saved in DeadRemats. The deletion of such inst is
+ /// postponed till all the allocations are done, so its remat expr is
+ /// always available for the remat of all the siblings of the original reg.
+ SmallPtrSet<MachineInstr *, 32> DeadRemats;
+
RegAllocBase()
: TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {}
// physical register assignments.
void allocatePhysRegs();
+ // Include spiller post optimization and removing dead defs left because of
+ // rematerialization.
+ virtual void postOptimization();
+
// Get a temporary reference to a Spiller instance.
virtual Spiller &spiller() = 0;
Matrix->unassign(Spill);
// Spill the extracted interval.
- LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM);
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
spiller().spill(LRE);
}
return true;
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
- LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM);
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
spiller().spill(LRE);
// The live virtual register requesting allocation was spilled, so tell
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
allocatePhysRegs();
+ postOptimization();
// Diagnostic output before rewriting
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "AllocationOrder.h"
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <queue>
STATISTIC(NumLocalSplits, "Number of split local live ranges");
STATISTIC(NumEvicted, "Number of interferences evicted");
-static cl::opt<SplitEditor::ComplementSpillMode>
-SplitSpillMode("split-spill-mode", cl::Hidden,
- cl::desc("Spill mode for splitting live ranges"),
- cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
- clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
- clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
- clEnumValEnd),
- cl::init(SplitEditor::SM_Partition));
+static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
+ "split-spill-mode", cl::Hidden,
+ cl::desc("Spill mode for splitting live ranges"),
+ cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
+ clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
+ clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
+ clEnumValEnd),
+ cl::init(SplitEditor::SM_Speed));
static cl::opt<unsigned>
LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
SmallVectorImpl<unsigned> &NewVRegs) {
SmallVector<unsigned, 8> UsedCands;
// Prepare split editor.
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitSpillMode);
// Assign all edge bundles to the preferred candidate, or NoCand.
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
unsigned Reg = VirtReg.reg;
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitSpillMode);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
// Always enable split spill mode, since we're effectively spilling to a
// register.
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitEditor::SM_Size);
ArrayRef<SlotIndex> Uses = SA->getUseSlots();
<< '-' << Uses[BestAfter] << ", " << BestDiff
<< ", " << (BestAfter - BestBefore + 1) << " instrs\n");
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit);
SE->openIntv();
NewVRegs.push_back(VirtReg.reg);
} else {
NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
- LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
allocatePhysRegs();
tryHintsRecoloring();
+ postOptimization();
+
releaseMemory();
return true;
}
RegSet VRegsToAlloc, EmptyIntervalVRegs;
+ /// Inst which is a def of an original reg and whose defs are already all
+ /// dead after remat is saved in DeadRemats. The deletion of such inst is
+ /// postponed till all the allocations are done, so its remat expr is
+ /// always available for the remat of all the siblings of the original reg.
+ SmallPtrSet<MachineInstr *, 32> DeadRemats;
+
/// \brief Finds the initial set of vreg intervals to allocate.
void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
VirtRegMap &VRM) const;
+ void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
};
char RegAllocPBQP::ID = 0;
VirtRegMap &VRM, Spiller &VRegSpiller) {
VRegsToAlloc.erase(VReg);
- LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM);
+ LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
+ nullptr, &DeadRemats);
VRegSpiller.spill(LRE);
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
}
}
+void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
+ VRegSpiller.postOptimization();
+ /// Remove dead defs because of rematerialization.
+ for (auto DeadInst : DeadRemats) {
+ LIS.RemoveMachineInstrFromMaps(*DeadInst);
+ DeadInst->eraseFromParent();
+ }
+ DeadRemats.clear();
+}
+
static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
unsigned NumInstr) {
// All intervals have a spill weight that is mostly proportional to the number
// Finalise allocation, allocate empty ranges.
finalizeAlloc(MF, LIS, VRM);
+ postOptimization(*VRegSpiller, LIS);
VRegsToAlloc.clear();
EmptyIntervalVRegs.clear();
class MachineFunction;
class MachineFunctionPass;
class VirtRegMap;
+ class LiveIntervals;
/// Spiller interface.
///
/// spill - Spill the LRE.getParent() live interval.
virtual void spill(LiveRangeEdit &LRE) = 0;
-
+ virtual void postOptimization(){};
};
/// Create and return a spiller that will insert spill code directly instead
Spiller *createInlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm);
-
}
#endif
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
bool Late = RegIdx != 0;
// Attempt cheap-as-a-copy rematerialization.
+ unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
LiveRangeEdit::Remat RM(ParentVNI);
- if (Edit->canRematerializeAt(RM, UseIdx, true)) {
+ RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+
+ if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
++NumRemats;
} else {
}
}
-void SplitEditor::hoistCopiesForSize() {
+void SplitEditor::computeRedundantBackCopies(
+ DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) {
+ LiveInterval *LI = &LIS.getInterval(Edit->get(0));
+ LiveInterval *Parent = &Edit->getParent();
+ SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums());
+ SmallPtrSet<VNInfo *, 8> DominatedVNIs;
+
+ // Aggregate VNIs having the same value as ParentVNI.
+ for (VNInfo *VNI : LI->valnos) {
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ EqualVNs[ParentVNI->id].insert(VNI);
+ }
+
+ // For VNI aggregation of each ParentVNI, collect dominated, i.e.,
+ // redundant VNIs to BackCopies.
+ for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
+ VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ if (!NotToHoistSet.count(ParentVNI->id))
+ continue;
+ SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin();
+ SmallPtrSetIterator<VNInfo *> It2 = It1;
+ for (; It1 != EqualVNs[ParentVNI->id].end(); ++It1) {
+ It2 = It1;
+ for (++It2; It2 != EqualVNs[ParentVNI->id].end(); ++It2) {
+ if (DominatedVNIs.count(*It1) || DominatedVNIs.count(*It2))
+ continue;
+
+ MachineBasicBlock *MBB1 = LIS.getMBBFromIndex((*It1)->def);
+ MachineBasicBlock *MBB2 = LIS.getMBBFromIndex((*It2)->def);
+ if (MBB1 == MBB2) {
+ DominatedVNIs.insert((*It1)->def < (*It2)->def ? (*It2) : (*It1));
+ } else if (MDT.dominates(MBB1, MBB2)) {
+ DominatedVNIs.insert(*It2);
+ } else if (MDT.dominates(MBB2, MBB1)) {
+ DominatedVNIs.insert(*It1);
+ }
+ }
+ }
+ if (!DominatedVNIs.empty()) {
+ forceRecompute(0, ParentVNI);
+ for (auto VNI : DominatedVNIs) {
+ BackCopies.push_back(VNI);
+ }
+ DominatedVNIs.clear();
+ }
+ }
+}
+
+/// For SM_Size mode, find a common dominator for all the back-copies for
+/// the same ParentVNI and hoist the backcopies to the dominator BB.
+/// For SM_Speed mode, if the common dominator is hot and it is not beneficial
+/// to do the hoisting, simply remove the dominated backcopies for the same
+/// ParentVNI.
+void SplitEditor::hoistCopies() {
// Get the complement interval, always RegIdx 0.
LiveInterval *LI = &LIS.getInterval(Edit->get(0));
LiveInterval *Parent = &Edit->getParent();
// indexed by ParentVNI->id.
typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
+ // The total cost of all the back-copies for each ParentVNI.
+ SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums());
+ // The ParentVNI->id set for which hoisting back-copies are not beneficial
+ // for Speed.
+ DenseSet<unsigned> NotToHoistSet;
// Find the nearest common dominator for parent values with multiple
// back-copies. If a single back-copy dominates, put it in DomPair.second.
continue;
MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
+
DomPair &Dom = NearestDom[ParentVNI->id];
// Keep directly defined parent values. This is either a PHI or an
else if (Near != Dom.first)
// None dominate. Hoist to common dominator, need new def.
Dom = DomPair(Near, SlotIndex());
+ Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB);
}
DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
// Get a less loopy dominator than Dom.first.
Dom.first = findShallowDominator(Dom.first, DefMBB);
+ if (SpillMode == SM_Speed &&
+ MBFI.getBlockFreq(Dom.first) > Costs[ParentVNI->id]) {
+ NotToHoistSet.insert(ParentVNI->id);
+ continue;
+ }
SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
Dom.second =
defFromParent(0, ParentVNI, Last, *Dom.first,
continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
const DomPair &Dom = NearestDom[ParentVNI->id];
- if (!Dom.first || Dom.second == VNI->def)
+ if (!Dom.first || Dom.second == VNI->def ||
+ NotToHoistSet.count(ParentVNI->id))
continue;
BackCopies.push_back(VNI);
forceRecompute(0, ParentVNI);
}
+
+ // If it is not beneficial to hoist all the BackCopies, simply remove
+ // redundant BackCopies in speed mode.
+ if (SpillMode == SM_Speed && !NotToHoistSet.empty())
+ computeRedundantBackCopies(NotToHoistSet, BackCopies);
+
removeBackCopies(BackCopies);
}
}
void SplitEditor::extendPHIKillRanges() {
- // Extend live ranges to be live-out for successor PHI values.
+ // Extend live ranges to be live-out for successor PHI values.
for (const VNInfo *PHIVNI : Edit->getParent().valnos) {
if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
continue;
unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
+
+ // Check whether PHI is dead.
+ const LiveRange::Segment *Segment = LR.getSegmentContaining(PHIVNI->def);
+ assert(Segment != nullptr && "Missing segment for VNI");
+ if (Segment->end == PHIVNI->def.getDeadSlot()) {
+ // This is a dead PHI. Remove it.
+ LR.removeSegment(*Segment, true);
+ continue;
+ }
+
LiveRangeCalc &LRC = getLRCalc(RegIdx);
MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
// Dead defs end at the dead slot.
if (S.end != S.valno->def.getDeadSlot())
continue;
+ if (S.valno->isPHIDef())
+ continue;
MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
assert(MI && "Missing instruction for dead def");
MI->addRegisterDead(LI->reg, &TRI);
// Leave all back-copies as is.
break;
case SM_Size:
- hoistCopiesForSize();
- break;
case SM_Speed:
- llvm_unreachable("Spill mode 'speed' not implemented yet");
+ // hoistCopies will behave differently between size and speed.
+ hoistCopies();
}
// Transfer the simply mapped values, check if any are skipped.
bool Skipped = transferValues();
+
+ // Rewrite virtual registers, possibly extending ranges.
+ rewriteAssigned(Skipped);
+
if (Skipped)
extendPHIKillRanges();
else
++NumSimple;
- // Rewrite virtual registers, possibly extending ranges.
- rewriteAssigned(Skipped);
-
// Delete defs that were rematted everywhere.
if (Skipped)
deleteRematVictims();
#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/SmallPtrSet.h"
MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB);
- /// hoistCopiesForSize - Hoist back-copies to the complement interval in a
- /// way that minimizes code size. This implements the SM_Size spill mode.
- void hoistCopiesForSize();
+ /// Find out all the backCopies dominated by others.
+ void computeRedundantBackCopies(DenseSet<unsigned> &NotToHoistSet,
+ SmallVectorImpl<VNInfo *> &BackCopies);
+
+ /// Hoist back-copies to the complement interval. It tries to hoist all
+ /// the back-copies to one BB if it is beneficial, or else simply remove
+ /// redundent backcopies dominated by others.
+ void hoistCopies();
/// transferValues - Transfer values to the new ranges.
/// Return true if any ranges were skipped.
+++ /dev/null
-;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=true -mcpu=cortex-a57 -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED
-;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=false -mcpu=cortex-a57 -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR
-
-; Check that we do not end up with useless spill code.
-;
-; Move to the basic block we are interested in.
-;
-; CHECK: // %if.then.120
-;
-; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill
-; Check that w21 wouldn't need to be spilled since it is never reused.
-; REGULAR-NOT: {{[wx]}}21{{,?}}
-;
-; Check that w22 is used to carry a value through the call.
-; DEFERRED-NOT: str {{[wx]}}22,
-; DEFERRED: mov {{[wx]}}22,
-; DEFERRED-NOT: str {{[wx]}}22,
-;
-; CHECK: bl fprintf
-;
-; DEFERRED-NOT: ldr {{[wx]}}22,
-; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22
-; DEFERRED-NOT: ldr {{[wx]}}22,
-;
-; REGULAR-NOT: {{[wx]}}21{{,?}}
-; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload
-;
-; End of the basic block we are interested in.
-; CHECK: b
-; CHECK: {{[^:]+}}: // %sw.bb.123
-
-%struct.__sFILE = type { i8*, i32, i32, i32, i32, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-%struct.__sbuf = type { i8*, i64 }
-%struct.DState = type { %struct.bz_stream*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
-%struct.bz_stream = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* }
-
-@__sF = external global [0 x %struct.__sFILE], align 8
-@.str = private unnamed_addr constant [20 x i8] c"\0A [%d: stuff+mf \00", align 1
-
-declare i32 @fprintf(%struct.__sFILE* nocapture, i8* nocapture readonly, ...)
-
-declare void @bar(i32)
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
-
-define i32 @foo(%struct.DState* %s) {
-entry:
- %state = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 1
- %tmp = load i32, i32* %state, align 4
- %cmp = icmp eq i32 %tmp, 10
- %save_i = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 40
- br i1 %cmp, label %if.end.thread, label %if.end
-
-if.end.thread: ; preds = %entry
- %save_j = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
- %save_t = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
- %save_alphaSize = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
- %save_nGroups = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
- %save_nSelectors = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
- %save_EOB = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
- %save_groupNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
- %save_groupPos = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
- %save_nextSym = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
- %save_nblockMAX = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
- %save_nblock = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
- %save_es = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
- %save_N = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
- %save_curr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
- %save_zt = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
- %save_zn = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
- %save_zvec = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
- %save_zj = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
- %tmp1 = bitcast i32* %save_i to i8*
- call void @llvm.memset.p0i8.i64(i8* %tmp1, i8 0, i64 108, i32 4, i1 false)
- br label %sw.default
-
-if.end: ; preds = %entry
- %.pre = load i32, i32* %save_i, align 4
- %save_j3.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
- %.pre406 = load i32, i32* %save_j3.phi.trans.insert, align 4
- %save_t4.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
- %.pre407 = load i32, i32* %save_t4.phi.trans.insert, align 4
- %save_alphaSize5.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
- %.pre408 = load i32, i32* %save_alphaSize5.phi.trans.insert, align 4
- %save_nGroups6.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
- %.pre409 = load i32, i32* %save_nGroups6.phi.trans.insert, align 4
- %save_nSelectors7.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
- %.pre410 = load i32, i32* %save_nSelectors7.phi.trans.insert, align 4
- %save_EOB8.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
- %.pre411 = load i32, i32* %save_EOB8.phi.trans.insert, align 4
- %save_groupNo9.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
- %.pre412 = load i32, i32* %save_groupNo9.phi.trans.insert, align 4
- %save_groupPos10.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
- %.pre413 = load i32, i32* %save_groupPos10.phi.trans.insert, align 4
- %save_nextSym11.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
- %.pre414 = load i32, i32* %save_nextSym11.phi.trans.insert, align 4
- %save_nblockMAX12.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
- %.pre415 = load i32, i32* %save_nblockMAX12.phi.trans.insert, align 4
- %save_nblock13.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
- %.pre416 = load i32, i32* %save_nblock13.phi.trans.insert, align 4
- %save_es14.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
- %.pre417 = load i32, i32* %save_es14.phi.trans.insert, align 4
- %save_N15.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
- %.pre418 = load i32, i32* %save_N15.phi.trans.insert, align 4
- %save_curr16.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
- %.pre419 = load i32, i32* %save_curr16.phi.trans.insert, align 4
- %save_zt17.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
- %.pre420 = load i32, i32* %save_zt17.phi.trans.insert, align 4
- %save_zn18.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
- %.pre421 = load i32, i32* %save_zn18.phi.trans.insert, align 4
- %save_zvec19.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
- %.pre422 = load i32, i32* %save_zvec19.phi.trans.insert, align 4
- %save_zj20.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
- %.pre423 = load i32, i32* %save_zj20.phi.trans.insert, align 4
- switch i32 %tmp, label %sw.default [
- i32 13, label %sw.bb
- i32 14, label %if.end.sw.bb.65_crit_edge
- i32 25, label %if.end.sw.bb.123_crit_edge
- ]
-
-if.end.sw.bb.123_crit_edge: ; preds = %if.end
- %.pre433 = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
- br label %sw.bb.123
-
-if.end.sw.bb.65_crit_edge: ; preds = %if.end
- %bsLive69.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
- %.pre426 = load i32, i32* %bsLive69.phi.trans.insert, align 4
- br label %sw.bb.65
-
-sw.bb: ; preds = %if.end
- %sunkaddr = ptrtoint %struct.DState* %s to i64
- %sunkaddr485 = add i64 %sunkaddr, 8
- %sunkaddr486 = inttoptr i64 %sunkaddr485 to i32*
- store i32 13, i32* %sunkaddr486, align 4
- %bsLive = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
- %tmp2 = load i32, i32* %bsLive, align 4
- %cmp28.400 = icmp sgt i32 %tmp2, 7
- br i1 %cmp28.400, label %sw.bb.if.then.29_crit_edge, label %if.end.33.lr.ph
-
-sw.bb.if.then.29_crit_edge: ; preds = %sw.bb
- %sunkaddr487 = ptrtoint %struct.DState* %s to i64
- %sunkaddr488 = add i64 %sunkaddr487, 32
- %sunkaddr489 = inttoptr i64 %sunkaddr488 to i32*
- %.pre425 = load i32, i32* %sunkaddr489, align 4
- br label %if.then.29
-
-if.end.33.lr.ph: ; preds = %sw.bb
- %tmp3 = bitcast %struct.DState* %s to %struct.bz_stream**
- %.pre424 = load %struct.bz_stream*, %struct.bz_stream** %tmp3, align 8
- %avail_in.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre424, i64 0, i32 1
- %.pre430 = load i32, i32* %avail_in.phi.trans.insert, align 4
- %tmp4 = add i32 %.pre430, -1
- br label %if.end.33
-
-if.then.29: ; preds = %while.body.backedge, %sw.bb.if.then.29_crit_edge
- %tmp5 = phi i32 [ %.pre425, %sw.bb.if.then.29_crit_edge ], [ %or, %while.body.backedge ]
- %.lcssa393 = phi i32 [ %tmp2, %sw.bb.if.then.29_crit_edge ], [ %add, %while.body.backedge ]
- %sub = add nsw i32 %.lcssa393, -8
- %shr = lshr i32 %tmp5, %sub
- %and = and i32 %shr, 255
- %sunkaddr491 = ptrtoint %struct.DState* %s to i64
- %sunkaddr492 = add i64 %sunkaddr491, 36
- %sunkaddr493 = inttoptr i64 %sunkaddr492 to i32*
- store i32 %sub, i32* %sunkaddr493, align 4
- %blockSize100k = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 9
- store i32 %and, i32* %blockSize100k, align 4
- %and.off = add nsw i32 %and, -49
- %tmp6 = icmp ugt i32 %and.off, 8
- br i1 %tmp6, label %save_state_and_return, label %if.end.62
-
-if.end.33: ; preds = %while.body.backedge, %if.end.33.lr.ph
- %lsr.iv482 = phi i32 [ %tmp4, %if.end.33.lr.ph ], [ %lsr.iv.next483, %while.body.backedge ]
- %tmp7 = phi i32 [ %tmp2, %if.end.33.lr.ph ], [ %add, %while.body.backedge ]
- %cmp35 = icmp eq i32 %lsr.iv482, -1
- br i1 %cmp35, label %save_state_and_return, label %if.end.37
-
-if.end.37: ; preds = %if.end.33
- %tmp8 = bitcast %struct.bz_stream* %.pre424 to i8**
- %sunkaddr494 = ptrtoint %struct.DState* %s to i64
- %sunkaddr495 = add i64 %sunkaddr494, 32
- %sunkaddr496 = inttoptr i64 %sunkaddr495 to i32*
- %tmp9 = load i32, i32* %sunkaddr496, align 4
- %shl = shl i32 %tmp9, 8
- %tmp10 = load i8*, i8** %tmp8, align 8
- %tmp11 = load i8, i8* %tmp10, align 1
- %conv = zext i8 %tmp11 to i32
- %or = or i32 %conv, %shl
- store i32 %or, i32* %sunkaddr496, align 4
- %add = add nsw i32 %tmp7, 8
- %sunkaddr497 = ptrtoint %struct.DState* %s to i64
- %sunkaddr498 = add i64 %sunkaddr497, 36
- %sunkaddr499 = inttoptr i64 %sunkaddr498 to i32*
- store i32 %add, i32* %sunkaddr499, align 4
- %incdec.ptr = getelementptr inbounds i8, i8* %tmp10, i64 1
- store i8* %incdec.ptr, i8** %tmp8, align 8
- %sunkaddr500 = ptrtoint %struct.bz_stream* %.pre424 to i64
- %sunkaddr501 = add i64 %sunkaddr500, 8
- %sunkaddr502 = inttoptr i64 %sunkaddr501 to i32*
- store i32 %lsr.iv482, i32* %sunkaddr502, align 4
- %sunkaddr503 = ptrtoint %struct.bz_stream* %.pre424 to i64
- %sunkaddr504 = add i64 %sunkaddr503, 12
- %sunkaddr505 = inttoptr i64 %sunkaddr504 to i32*
- %tmp12 = load i32, i32* %sunkaddr505, align 4
- %inc = add i32 %tmp12, 1
- store i32 %inc, i32* %sunkaddr505, align 4
- %cmp49 = icmp eq i32 %inc, 0
- br i1 %cmp49, label %if.then.51, label %while.body.backedge
-
-if.then.51: ; preds = %if.end.37
- %sunkaddr506 = ptrtoint %struct.bz_stream* %.pre424 to i64
- %sunkaddr507 = add i64 %sunkaddr506, 16
- %sunkaddr508 = inttoptr i64 %sunkaddr507 to i32*
- %tmp13 = load i32, i32* %sunkaddr508, align 4
- %inc53 = add i32 %tmp13, 1
- store i32 %inc53, i32* %sunkaddr508, align 4
- br label %while.body.backedge
-
-while.body.backedge: ; preds = %if.then.51, %if.end.37
- %lsr.iv.next483 = add i32 %lsr.iv482, -1
- %cmp28 = icmp sgt i32 %add, 7
- br i1 %cmp28, label %if.then.29, label %if.end.33
-
-if.end.62: ; preds = %if.then.29
- %sub64 = add nsw i32 %and, -48
- %sunkaddr509 = ptrtoint %struct.DState* %s to i64
- %sunkaddr510 = add i64 %sunkaddr509, 40
- %sunkaddr511 = inttoptr i64 %sunkaddr510 to i32*
- store i32 %sub64, i32* %sunkaddr511, align 4
- br label %sw.bb.65
-
-sw.bb.65: ; preds = %if.end.62, %if.end.sw.bb.65_crit_edge
- %bsLive69.pre-phi = phi i32* [ %bsLive69.phi.trans.insert, %if.end.sw.bb.65_crit_edge ], [ %bsLive, %if.end.62 ]
- %tmp14 = phi i32 [ %.pre426, %if.end.sw.bb.65_crit_edge ], [ %sub, %if.end.62 ]
- %sunkaddr512 = ptrtoint %struct.DState* %s to i64
- %sunkaddr513 = add i64 %sunkaddr512, 8
- %sunkaddr514 = inttoptr i64 %sunkaddr513 to i32*
- store i32 14, i32* %sunkaddr514, align 4
- %cmp70.397 = icmp sgt i32 %tmp14, 7
- br i1 %cmp70.397, label %if.then.72, label %if.end.82.lr.ph
-
-if.end.82.lr.ph: ; preds = %sw.bb.65
- %tmp15 = bitcast %struct.DState* %s to %struct.bz_stream**
- %.pre427 = load %struct.bz_stream*, %struct.bz_stream** %tmp15, align 8
- %avail_in84.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre427, i64 0, i32 1
- %.pre431 = load i32, i32* %avail_in84.phi.trans.insert, align 4
- %tmp16 = add i32 %.pre431, -1
- br label %if.end.82
-
-if.then.72: ; preds = %while.body.68.backedge, %sw.bb.65
- %.lcssa390 = phi i32 [ %tmp14, %sw.bb.65 ], [ %add97, %while.body.68.backedge ]
- %sub76 = add nsw i32 %.lcssa390, -8
- %sunkaddr516 = ptrtoint %struct.DState* %s to i64
- %sunkaddr517 = add i64 %sunkaddr516, 36
- %sunkaddr518 = inttoptr i64 %sunkaddr517 to i32*
- store i32 %sub76, i32* %sunkaddr518, align 4
- %currBlockNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 11
- %tmp17 = load i32, i32* %currBlockNo, align 4
- %inc117 = add nsw i32 %tmp17, 1
- store i32 %inc117, i32* %currBlockNo, align 4
- %verbosity = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 12
- %tmp18 = load i32, i32* %verbosity, align 4
- %cmp118 = icmp sgt i32 %tmp18, 1
- br i1 %cmp118, label %if.then.120, label %sw.bb.123, !prof !0
-
-if.end.82: ; preds = %while.body.68.backedge, %if.end.82.lr.ph
- %lsr.iv480 = phi i32 [ %tmp16, %if.end.82.lr.ph ], [ %lsr.iv.next481, %while.body.68.backedge ]
- %tmp19 = phi i32 [ %tmp14, %if.end.82.lr.ph ], [ %add97, %while.body.68.backedge ]
- %cmp85 = icmp eq i32 %lsr.iv480, -1
- br i1 %cmp85, label %save_state_and_return, label %if.end.88
-
-if.end.88: ; preds = %if.end.82
- %tmp20 = bitcast %struct.bz_stream* %.pre427 to i8**
- %sunkaddr519 = ptrtoint %struct.DState* %s to i64
- %sunkaddr520 = add i64 %sunkaddr519, 32
- %sunkaddr521 = inttoptr i64 %sunkaddr520 to i32*
- %tmp21 = load i32, i32* %sunkaddr521, align 4
- %shl90 = shl i32 %tmp21, 8
- %tmp22 = load i8*, i8** %tmp20, align 8
- %tmp23 = load i8, i8* %tmp22, align 1
- %conv93 = zext i8 %tmp23 to i32
- %or94 = or i32 %conv93, %shl90
- store i32 %or94, i32* %sunkaddr521, align 4
- %add97 = add nsw i32 %tmp19, 8
- %sunkaddr522 = ptrtoint %struct.DState* %s to i64
- %sunkaddr523 = add i64 %sunkaddr522, 36
- %sunkaddr524 = inttoptr i64 %sunkaddr523 to i32*
- store i32 %add97, i32* %sunkaddr524, align 4
- %incdec.ptr100 = getelementptr inbounds i8, i8* %tmp22, i64 1
- store i8* %incdec.ptr100, i8** %tmp20, align 8
- %sunkaddr525 = ptrtoint %struct.bz_stream* %.pre427 to i64
- %sunkaddr526 = add i64 %sunkaddr525, 8
- %sunkaddr527 = inttoptr i64 %sunkaddr526 to i32*
- store i32 %lsr.iv480, i32* %sunkaddr527, align 4
- %sunkaddr528 = ptrtoint %struct.bz_stream* %.pre427 to i64
- %sunkaddr529 = add i64 %sunkaddr528, 12
- %sunkaddr530 = inttoptr i64 %sunkaddr529 to i32*
- %tmp24 = load i32, i32* %sunkaddr530, align 4
- %inc106 = add i32 %tmp24, 1
- store i32 %inc106, i32* %sunkaddr530, align 4
- %cmp109 = icmp eq i32 %inc106, 0
- br i1 %cmp109, label %if.then.111, label %while.body.68.backedge
-
-if.then.111: ; preds = %if.end.88
- %sunkaddr531 = ptrtoint %struct.bz_stream* %.pre427 to i64
- %sunkaddr532 = add i64 %sunkaddr531, 16
- %sunkaddr533 = inttoptr i64 %sunkaddr532 to i32*
- %tmp25 = load i32, i32* %sunkaddr533, align 4
- %inc114 = add i32 %tmp25, 1
- store i32 %inc114, i32* %sunkaddr533, align 4
- br label %while.body.68.backedge
-
-while.body.68.backedge: ; preds = %if.then.111, %if.end.88
- %lsr.iv.next481 = add i32 %lsr.iv480, -1
- %cmp70 = icmp sgt i32 %add97, 7
- br i1 %cmp70, label %if.then.72, label %if.end.82
-
-if.then.120: ; preds = %if.then.72
- %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* getelementptr inbounds ([0 x %struct.__sFILE], [0 x %struct.__sFILE]* @__sF, i64 0, i64 2), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i64 0, i64 0), i32 %inc117)
- br label %sw.bb.123
-
-sw.bb.123: ; preds = %if.then.120, %if.then.72, %if.end.sw.bb.123_crit_edge
- %bsLive127.pre-phi = phi i32* [ %.pre433, %if.end.sw.bb.123_crit_edge ], [ %bsLive69.pre-phi, %if.then.72 ], [ %bsLive69.pre-phi, %if.then.120 ]
- %sunkaddr534 = ptrtoint %struct.DState* %s to i64
- %sunkaddr535 = add i64 %sunkaddr534, 8
- %sunkaddr536 = inttoptr i64 %sunkaddr535 to i32*
- store i32 25, i32* %sunkaddr536, align 4
- %tmp26 = load i32, i32* %bsLive127.pre-phi, align 4
- %cmp128.395 = icmp sgt i32 %tmp26, 7
- br i1 %cmp128.395, label %sw.bb.123.if.then.130_crit_edge, label %if.end.140.lr.ph
-
-sw.bb.123.if.then.130_crit_edge: ; preds = %sw.bb.123
- %sunkaddr537 = ptrtoint %struct.DState* %s to i64
- %sunkaddr538 = add i64 %sunkaddr537, 32
- %sunkaddr539 = inttoptr i64 %sunkaddr538 to i32*
- %.pre429 = load i32, i32* %sunkaddr539, align 4
- br label %if.then.130
-
-if.end.140.lr.ph: ; preds = %sw.bb.123
- %tmp27 = bitcast %struct.DState* %s to %struct.bz_stream**
- %.pre428 = load %struct.bz_stream*, %struct.bz_stream** %tmp27, align 8
- %avail_in142.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre428, i64 0, i32 1
- %.pre432 = load i32, i32* %avail_in142.phi.trans.insert, align 4
- %tmp28 = add i32 %.pre432, -1
- br label %if.end.140
-
-if.then.130: ; preds = %while.body.126.backedge, %sw.bb.123.if.then.130_crit_edge
- %tmp29 = phi i32 [ %.pre429, %sw.bb.123.if.then.130_crit_edge ], [ %or152, %while.body.126.backedge ]
- %.lcssa = phi i32 [ %tmp26, %sw.bb.123.if.then.130_crit_edge ], [ %add155, %while.body.126.backedge ]
- %sub134 = add nsw i32 %.lcssa, -8
- %shr135 = lshr i32 %tmp29, %sub134
- store i32 %sub134, i32* %bsLive127.pre-phi, align 4
- %origPtr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 13
- %tmp30 = load i32, i32* %origPtr, align 4
- %shl175 = shl i32 %tmp30, 8
- %conv176 = and i32 %shr135, 255
- %or177 = or i32 %shl175, %conv176
- store i32 %or177, i32* %origPtr, align 4
- %nInUse = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 27
- %tmp31 = load i32, i32* %nInUse, align 4
- %add179 = add nsw i32 %tmp31, 2
- br label %save_state_and_return
-
-if.end.140: ; preds = %while.body.126.backedge, %if.end.140.lr.ph
- %lsr.iv = phi i32 [ %tmp28, %if.end.140.lr.ph ], [ %lsr.iv.next, %while.body.126.backedge ]
- %tmp32 = phi i32 [ %tmp26, %if.end.140.lr.ph ], [ %add155, %while.body.126.backedge ]
- %cmp143 = icmp eq i32 %lsr.iv, -1
- br i1 %cmp143, label %save_state_and_return, label %if.end.146
-
-if.end.146: ; preds = %if.end.140
- %tmp33 = bitcast %struct.bz_stream* %.pre428 to i8**
- %sunkaddr541 = ptrtoint %struct.DState* %s to i64
- %sunkaddr542 = add i64 %sunkaddr541, 32
- %sunkaddr543 = inttoptr i64 %sunkaddr542 to i32*
- %tmp34 = load i32, i32* %sunkaddr543, align 4
- %shl148 = shl i32 %tmp34, 8
- %tmp35 = load i8*, i8** %tmp33, align 8
- %tmp36 = load i8, i8* %tmp35, align 1
- %conv151 = zext i8 %tmp36 to i32
- %or152 = or i32 %conv151, %shl148
- store i32 %or152, i32* %sunkaddr543, align 4
- %add155 = add nsw i32 %tmp32, 8
- store i32 %add155, i32* %bsLive127.pre-phi, align 4
- %incdec.ptr158 = getelementptr inbounds i8, i8* %tmp35, i64 1
- store i8* %incdec.ptr158, i8** %tmp33, align 8
- %sunkaddr544 = ptrtoint %struct.bz_stream* %.pre428 to i64
- %sunkaddr545 = add i64 %sunkaddr544, 8
- %sunkaddr546 = inttoptr i64 %sunkaddr545 to i32*
- store i32 %lsr.iv, i32* %sunkaddr546, align 4
- %sunkaddr547 = ptrtoint %struct.bz_stream* %.pre428 to i64
- %sunkaddr548 = add i64 %sunkaddr547, 12
- %sunkaddr549 = inttoptr i64 %sunkaddr548 to i32*
- %tmp37 = load i32, i32* %sunkaddr549, align 4
- %inc164 = add i32 %tmp37, 1
- store i32 %inc164, i32* %sunkaddr549, align 4
- %cmp167 = icmp eq i32 %inc164, 0
- br i1 %cmp167, label %if.then.169, label %while.body.126.backedge
-
-if.then.169: ; preds = %if.end.146
- %sunkaddr550 = ptrtoint %struct.bz_stream* %.pre428 to i64
- %sunkaddr551 = add i64 %sunkaddr550, 16
- %sunkaddr552 = inttoptr i64 %sunkaddr551 to i32*
- %tmp38 = load i32, i32* %sunkaddr552, align 4
- %inc172 = add i32 %tmp38, 1
- store i32 %inc172, i32* %sunkaddr552, align 4
- br label %while.body.126.backedge
-
-while.body.126.backedge: ; preds = %if.then.169, %if.end.146
- %lsr.iv.next = add i32 %lsr.iv, -1
- %cmp128 = icmp sgt i32 %add155, 7
- br i1 %cmp128, label %if.then.130, label %if.end.140
-
-sw.default: ; preds = %if.end, %if.end.thread
- %tmp39 = phi i32 [ 0, %if.end.thread ], [ %.pre, %if.end ]
- %tmp40 = phi i32 [ 0, %if.end.thread ], [ %.pre406, %if.end ]
- %tmp41 = phi i32 [ 0, %if.end.thread ], [ %.pre407, %if.end ]
- %tmp42 = phi i32 [ 0, %if.end.thread ], [ %.pre408, %if.end ]
- %tmp43 = phi i32 [ 0, %if.end.thread ], [ %.pre409, %if.end ]
- %tmp44 = phi i32 [ 0, %if.end.thread ], [ %.pre410, %if.end ]
- %tmp45 = phi i32 [ 0, %if.end.thread ], [ %.pre411, %if.end ]
- %tmp46 = phi i32 [ 0, %if.end.thread ], [ %.pre412, %if.end ]
- %tmp47 = phi i32 [ 0, %if.end.thread ], [ %.pre413, %if.end ]
- %tmp48 = phi i32 [ 0, %if.end.thread ], [ %.pre414, %if.end ]
- %tmp49 = phi i32 [ 0, %if.end.thread ], [ %.pre415, %if.end ]
- %tmp50 = phi i32 [ 0, %if.end.thread ], [ %.pre416, %if.end ]
- %tmp51 = phi i32 [ 0, %if.end.thread ], [ %.pre417, %if.end ]
- %tmp52 = phi i32 [ 0, %if.end.thread ], [ %.pre418, %if.end ]
- %tmp53 = phi i32 [ 0, %if.end.thread ], [ %.pre419, %if.end ]
- %tmp54 = phi i32 [ 0, %if.end.thread ], [ %.pre420, %if.end ]
- %tmp55 = phi i32 [ 0, %if.end.thread ], [ %.pre421, %if.end ]
- %tmp56 = phi i32 [ 0, %if.end.thread ], [ %.pre422, %if.end ]
- %tmp57 = phi i32 [ 0, %if.end.thread ], [ %.pre423, %if.end ]
- %save_j3.pre-phi469 = phi i32* [ %save_j, %if.end.thread ], [ %save_j3.phi.trans.insert, %if.end ]
- %save_t4.pre-phi467 = phi i32* [ %save_t, %if.end.thread ], [ %save_t4.phi.trans.insert, %if.end ]
- %save_alphaSize5.pre-phi465 = phi i32* [ %save_alphaSize, %if.end.thread ], [ %save_alphaSize5.phi.trans.insert, %if.end ]
- %save_nGroups6.pre-phi463 = phi i32* [ %save_nGroups, %if.end.thread ], [ %save_nGroups6.phi.trans.insert, %if.end ]
- %save_nSelectors7.pre-phi461 = phi i32* [ %save_nSelectors, %if.end.thread ], [ %save_nSelectors7.phi.trans.insert, %if.end ]
- %save_EOB8.pre-phi459 = phi i32* [ %save_EOB, %if.end.thread ], [ %save_EOB8.phi.trans.insert, %if.end ]
- %save_groupNo9.pre-phi457 = phi i32* [ %save_groupNo, %if.end.thread ], [ %save_groupNo9.phi.trans.insert, %if.end ]
- %save_groupPos10.pre-phi455 = phi i32* [ %save_groupPos, %if.end.thread ], [ %save_groupPos10.phi.trans.insert, %if.end ]
- %save_nextSym11.pre-phi453 = phi i32* [ %save_nextSym, %if.end.thread ], [ %save_nextSym11.phi.trans.insert, %if.end ]
- %save_nblockMAX12.pre-phi451 = phi i32* [ %save_nblockMAX, %if.end.thread ], [ %save_nblockMAX12.phi.trans.insert, %if.end ]
- %save_nblock13.pre-phi449 = phi i32* [ %save_nblock, %if.end.thread ], [ %save_nblock13.phi.trans.insert, %if.end ]
- %save_es14.pre-phi447 = phi i32* [ %save_es, %if.end.thread ], [ %save_es14.phi.trans.insert, %if.end ]
- %save_N15.pre-phi445 = phi i32* [ %save_N, %if.end.thread ], [ %save_N15.phi.trans.insert, %if.end ]
- %save_curr16.pre-phi443 = phi i32* [ %save_curr, %if.end.thread ], [ %save_curr16.phi.trans.insert, %if.end ]
- %save_zt17.pre-phi441 = phi i32* [ %save_zt, %if.end.thread ], [ %save_zt17.phi.trans.insert, %if.end ]
- %save_zn18.pre-phi439 = phi i32* [ %save_zn, %if.end.thread ], [ %save_zn18.phi.trans.insert, %if.end ]
- %save_zvec19.pre-phi437 = phi i32* [ %save_zvec, %if.end.thread ], [ %save_zvec19.phi.trans.insert, %if.end ]
- %save_zj20.pre-phi435 = phi i32* [ %save_zj, %if.end.thread ], [ %save_zj20.phi.trans.insert, %if.end ]
- tail call void @bar(i32 4001)
- br label %save_state_and_return
-
-save_state_and_return: ; preds = %sw.default, %if.end.140, %if.then.130, %if.end.82, %if.end.33, %if.then.29
- %tmp58 = phi i32 [ %tmp39, %sw.default ], [ %.pre, %if.then.29 ], [ %.pre, %if.then.130 ], [ %.pre, %if.end.140 ], [ %.pre, %if.end.82 ], [ %.pre, %if.end.33 ]
- %tmp59 = phi i32 [ %tmp40, %sw.default ], [ %.pre406, %if.then.29 ], [ %.pre406, %if.then.130 ], [ %.pre406, %if.end.140 ], [ %.pre406, %if.end.82 ], [ %.pre406, %if.end.33 ]
- %tmp60 = phi i32 [ %tmp41, %sw.default ], [ %.pre407, %if.then.29 ], [ %.pre407, %if.then.130 ], [ %.pre407, %if.end.140 ], [ %.pre407, %if.end.82 ], [ %.pre407, %if.end.33 ]
- %tmp61 = phi i32 [ %tmp43, %sw.default ], [ %.pre409, %if.then.29 ], [ %.pre409, %if.then.130 ], [ %.pre409, %if.end.140 ], [ %.pre409, %if.end.82 ], [ %.pre409, %if.end.33 ]
- %tmp62 = phi i32 [ %tmp44, %sw.default ], [ %.pre410, %if.then.29 ], [ %.pre410, %if.then.130 ], [ %.pre410, %if.end.140 ], [ %.pre410, %if.end.82 ], [ %.pre410, %if.end.33 ]
- %tmp63 = phi i32 [ %tmp45, %sw.default ], [ %.pre411, %if.then.29 ], [ %.pre411, %if.then.130 ], [ %.pre411, %if.end.140 ], [ %.pre411, %if.end.82 ], [ %.pre411, %if.end.33 ]
- %tmp64 = phi i32 [ %tmp46, %sw.default ], [ %.pre412, %if.then.29 ], [ %.pre412, %if.then.130 ], [ %.pre412, %if.end.140 ], [ %.pre412, %if.end.82 ], [ %.pre412, %if.end.33 ]
- %tmp65 = phi i32 [ %tmp47, %sw.default ], [ %.pre413, %if.then.29 ], [ %.pre413, %if.then.130 ], [ %.pre413, %if.end.140 ], [ %.pre413, %if.end.82 ], [ %.pre413, %if.end.33 ]
- %tmp66 = phi i32 [ %tmp48, %sw.default ], [ %.pre414, %if.then.29 ], [ %.pre414, %if.then.130 ], [ %.pre414, %if.end.140 ], [ %.pre414, %if.end.82 ], [ %.pre414, %if.end.33 ]
- %tmp67 = phi i32 [ %tmp49, %sw.default ], [ %.pre415, %if.then.29 ], [ %.pre415, %if.then.130 ], [ %.pre415, %if.end.140 ], [ %.pre415, %if.end.82 ], [ %.pre415, %if.end.33 ]
- %tmp68 = phi i32 [ %tmp51, %sw.default ], [ %.pre417, %if.then.29 ], [ %.pre417, %if.then.130 ], [ %.pre417, %if.end.140 ], [ %.pre417, %if.end.82 ], [ %.pre417, %if.end.33 ]
- %tmp69 = phi i32 [ %tmp52, %sw.default ], [ %.pre418, %if.then.29 ], [ %.pre418, %if.then.130 ], [ %.pre418, %if.end.140 ], [ %.pre418, %if.end.82 ], [ %.pre418, %if.end.33 ]
- %tmp70 = phi i32 [ %tmp53, %sw.default ], [ %.pre419, %if.then.29 ], [ %.pre419, %if.then.130 ], [ %.pre419, %if.end.140 ], [ %.pre419, %if.end.82 ], [ %.pre419, %if.end.33 ]
- %tmp71 = phi i32 [ %tmp54, %sw.default ], [ %.pre420, %if.then.29 ], [ %.pre420, %if.then.130 ], [ %.pre420, %if.end.140 ], [ %.pre420, %if.end.82 ], [ %.pre420, %if.end.33 ]
- %tmp72 = phi i32 [ %tmp55, %sw.default ], [ %.pre421, %if.then.29 ], [ %.pre421, %if.then.130 ], [ %.pre421, %if.end.140 ], [ %.pre421, %if.end.82 ], [ %.pre421, %if.end.33 ]
- %tmp73 = phi i32 [ %tmp56, %sw.default ], [ %.pre422, %if.then.29 ], [ %.pre422, %if.then.130 ], [ %.pre422, %if.end.140 ], [ %.pre422, %if.end.82 ], [ %.pre422, %if.end.33 ]
- %tmp74 = phi i32 [ %tmp57, %sw.default ], [ %.pre423, %if.then.29 ], [ %.pre423, %if.then.130 ], [ %.pre423, %if.end.140 ], [ %.pre423, %if.end.82 ], [ %.pre423, %if.end.33 ]
- %save_j3.pre-phi468 = phi i32* [ %save_j3.pre-phi469, %sw.default ], [ %save_j3.phi.trans.insert, %if.then.29 ], [ %save_j3.phi.trans.insert, %if.then.130 ], [ %save_j3.phi.trans.insert, %if.end.140 ], [ %save_j3.phi.trans.insert, %if.end.82 ], [ %save_j3.phi.trans.insert, %if.end.33 ]
- %save_t4.pre-phi466 = phi i32* [ %save_t4.pre-phi467, %sw.default ], [ %save_t4.phi.trans.insert, %if.then.29 ], [ %save_t4.phi.trans.insert, %if.then.130 ], [ %save_t4.phi.trans.insert, %if.end.140 ], [ %save_t4.phi.trans.insert, %if.end.82 ], [ %save_t4.phi.trans.insert, %if.end.33 ]
- %save_alphaSize5.pre-phi464 = phi i32* [ %save_alphaSize5.pre-phi465, %sw.default ], [ %save_alphaSize5.phi.trans.insert, %if.then.29 ], [ %save_alphaSize5.phi.trans.insert, %if.then.130 ], [ %save_alphaSize5.phi.trans.insert, %if.end.140 ], [ %save_alphaSize5.phi.trans.insert, %if.end.82 ], [ %save_alphaSize5.phi.trans.insert, %if.end.33 ]
- %save_nGroups6.pre-phi462 = phi i32* [ %save_nGroups6.pre-phi463, %sw.default ], [ %save_nGroups6.phi.trans.insert, %if.then.29 ], [ %save_nGroups6.phi.trans.insert, %if.then.130 ], [ %save_nGroups6.phi.trans.insert, %if.end.140 ], [ %save_nGroups6.phi.trans.insert, %if.end.82 ], [ %save_nGroups6.phi.trans.insert, %if.end.33 ]
- %save_nSelectors7.pre-phi460 = phi i32* [ %save_nSelectors7.pre-phi461, %sw.default ], [ %save_nSelectors7.phi.trans.insert, %if.then.29 ], [ %save_nSelectors7.phi.trans.insert, %if.then.130 ], [ %save_nSelectors7.phi.trans.insert, %if.end.140 ], [ %save_nSelectors7.phi.trans.insert, %if.end.82 ], [ %save_nSelectors7.phi.trans.insert, %if.end.33 ]
- %save_EOB8.pre-phi458 = phi i32* [ %save_EOB8.pre-phi459, %sw.default ], [ %save_EOB8.phi.trans.insert, %if.then.29 ], [ %save_EOB8.phi.trans.insert, %if.then.130 ], [ %save_EOB8.phi.trans.insert, %if.end.140 ], [ %save_EOB8.phi.trans.insert, %if.end.82 ], [ %save_EOB8.phi.trans.insert, %if.end.33 ]
- %save_groupNo9.pre-phi456 = phi i32* [ %save_groupNo9.pre-phi457, %sw.default ], [ %save_groupNo9.phi.trans.insert, %if.then.29 ], [ %save_groupNo9.phi.trans.insert, %if.then.130 ], [ %save_groupNo9.phi.trans.insert, %if.end.140 ], [ %save_groupNo9.phi.trans.insert, %if.end.82 ], [ %save_groupNo9.phi.trans.insert, %if.end.33 ]
- %save_groupPos10.pre-phi454 = phi i32* [ %save_groupPos10.pre-phi455, %sw.default ], [ %save_groupPos10.phi.trans.insert, %if.then.29 ], [ %save_groupPos10.phi.trans.insert, %if.then.130 ], [ %save_groupPos10.phi.trans.insert, %if.end.140 ], [ %save_groupPos10.phi.trans.insert, %if.end.82 ], [ %save_groupPos10.phi.trans.insert, %if.end.33 ]
- %save_nextSym11.pre-phi452 = phi i32* [ %save_nextSym11.pre-phi453, %sw.default ], [ %save_nextSym11.phi.trans.insert, %if.then.29 ], [ %save_nextSym11.phi.trans.insert, %if.then.130 ], [ %save_nextSym11.phi.trans.insert, %if.end.140 ], [ %save_nextSym11.phi.trans.insert, %if.end.82 ], [ %save_nextSym11.phi.trans.insert, %if.end.33 ]
- %save_nblockMAX12.pre-phi450 = phi i32* [ %save_nblockMAX12.pre-phi451, %sw.default ], [ %save_nblockMAX12.phi.trans.insert, %if.then.29 ], [ %save_nblockMAX12.phi.trans.insert, %if.then.130 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.140 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.82 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.33 ]
- %save_nblock13.pre-phi448 = phi i32* [ %save_nblock13.pre-phi449, %sw.default ], [ %save_nblock13.phi.trans.insert, %if.then.29 ], [ %save_nblock13.phi.trans.insert, %if.then.130 ], [ %save_nblock13.phi.trans.insert, %if.end.140 ], [ %save_nblock13.phi.trans.insert, %if.end.82 ], [ %save_nblock13.phi.trans.insert, %if.end.33 ]
- %save_es14.pre-phi446 = phi i32* [ %save_es14.pre-phi447, %sw.default ], [ %save_es14.phi.trans.insert, %if.then.29 ], [ %save_es14.phi.trans.insert, %if.then.130 ], [ %save_es14.phi.trans.insert, %if.end.140 ], [ %save_es14.phi.trans.insert, %if.end.82 ], [ %save_es14.phi.trans.insert, %if.end.33 ]
- %save_N15.pre-phi444 = phi i32* [ %save_N15.pre-phi445, %sw.default ], [ %save_N15.phi.trans.insert, %if.then.29 ], [ %save_N15.phi.trans.insert, %if.then.130 ], [ %save_N15.phi.trans.insert, %if.end.140 ], [ %save_N15.phi.trans.insert, %if.end.82 ], [ %save_N15.phi.trans.insert, %if.end.33 ]
- %save_curr16.pre-phi442 = phi i32* [ %save_curr16.pre-phi443, %sw.default ], [ %save_curr16.phi.trans.insert, %if.then.29 ], [ %save_curr16.phi.trans.insert, %if.then.130 ], [ %save_curr16.phi.trans.insert, %if.end.140 ], [ %save_curr16.phi.trans.insert, %if.end.82 ], [ %save_curr16.phi.trans.insert, %if.end.33 ]
- %save_zt17.pre-phi440 = phi i32* [ %save_zt17.pre-phi441, %sw.default ], [ %save_zt17.phi.trans.insert, %if.then.29 ], [ %save_zt17.phi.trans.insert, %if.then.130 ], [ %save_zt17.phi.trans.insert, %if.end.140 ], [ %save_zt17.phi.trans.insert, %if.end.82 ], [ %save_zt17.phi.trans.insert, %if.end.33 ]
- %save_zn18.pre-phi438 = phi i32* [ %save_zn18.pre-phi439, %sw.default ], [ %save_zn18.phi.trans.insert, %if.then.29 ], [ %save_zn18.phi.trans.insert, %if.then.130 ], [ %save_zn18.phi.trans.insert, %if.end.140 ], [ %save_zn18.phi.trans.insert, %if.end.82 ], [ %save_zn18.phi.trans.insert, %if.end.33 ]
- %save_zvec19.pre-phi436 = phi i32* [ %save_zvec19.pre-phi437, %sw.default ], [ %save_zvec19.phi.trans.insert, %if.then.29 ], [ %save_zvec19.phi.trans.insert, %if.then.130 ], [ %save_zvec19.phi.trans.insert, %if.end.140 ], [ %save_zvec19.phi.trans.insert, %if.end.82 ], [ %save_zvec19.phi.trans.insert, %if.end.33 ]
- %save_zj20.pre-phi434 = phi i32* [ %save_zj20.pre-phi435, %sw.default ], [ %save_zj20.phi.trans.insert, %if.then.29 ], [ %save_zj20.phi.trans.insert, %if.then.130 ], [ %save_zj20.phi.trans.insert, %if.end.140 ], [ %save_zj20.phi.trans.insert, %if.end.82 ], [ %save_zj20.phi.trans.insert, %if.end.33 ]
- %nblock.1 = phi i32 [ %tmp50, %sw.default ], [ %.pre416, %if.then.29 ], [ 0, %if.then.130 ], [ %.pre416, %if.end.140 ], [ %.pre416, %if.end.82 ], [ %.pre416, %if.end.33 ]
- %alphaSize.1 = phi i32 [ %tmp42, %sw.default ], [ %.pre408, %if.then.29 ], [ %add179, %if.then.130 ], [ %.pre408, %if.end.140 ], [ %.pre408, %if.end.82 ], [ %.pre408, %if.end.33 ]
- %retVal.0 = phi i32 [ 0, %sw.default ], [ -5, %if.then.29 ], [ -4, %if.then.130 ], [ 0, %if.end.140 ], [ 0, %if.end.82 ], [ 0, %if.end.33 ]
- store i32 %tmp58, i32* %save_i, align 4
- store i32 %tmp59, i32* %save_j3.pre-phi468, align 4
- store i32 %tmp60, i32* %save_t4.pre-phi466, align 4
- store i32 %alphaSize.1, i32* %save_alphaSize5.pre-phi464, align 4
- store i32 %tmp61, i32* %save_nGroups6.pre-phi462, align 4
- store i32 %tmp62, i32* %save_nSelectors7.pre-phi460, align 4
- store i32 %tmp63, i32* %save_EOB8.pre-phi458, align 4
- store i32 %tmp64, i32* %save_groupNo9.pre-phi456, align 4
- store i32 %tmp65, i32* %save_groupPos10.pre-phi454, align 4
- store i32 %tmp66, i32* %save_nextSym11.pre-phi452, align 4
- store i32 %tmp67, i32* %save_nblockMAX12.pre-phi450, align 4
- store i32 %nblock.1, i32* %save_nblock13.pre-phi448, align 4
- store i32 %tmp68, i32* %save_es14.pre-phi446, align 4
- store i32 %tmp69, i32* %save_N15.pre-phi444, align 4
- store i32 %tmp70, i32* %save_curr16.pre-phi442, align 4
- store i32 %tmp71, i32* %save_zt17.pre-phi440, align 4
- store i32 %tmp72, i32* %save_zn18.pre-phi438, align 4
- store i32 %tmp73, i32* %save_zvec19.pre-phi436, align 4
- store i32 %tmp74, i32* %save_zj20.pre-phi434, align 4
- ret i32 %retVal.0
-}
-
-!0 = !{!"branch_weights", i32 10, i32 1}
--- /dev/null
+; RUN: llc -verify-regalloc < %s
+; PR27275: When enabling remat for vreg defined by PHIs, make sure the update
+; of the live range removes dead phi. Otherwise, we may end up with PHIs with
+; incorrect operands and that will trigger assertions or verifier failures
+; in later passes.
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-ios9.0.0"
+
+%class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326 = type { %class.MockTransportClientSocketPool.0.225.275.1300.2325, i32 }
+%class.MockTransportClientSocketPool.0.225.275.1300.2325 = type { i8 }
+%class.StaticSocketDataProvider.6.231.281.1306.2331 = type { i8, %struct.MockConnect.5.230.280.1305.2330 }
+%struct.MockConnect.5.230.280.1305.2330 = type { %class.IPEndPoint.4.229.279.1304.2329 }
+%class.IPEndPoint.4.229.279.1304.2329 = type { %class.IPAddress.3.228.278.1303.2328 }
+%class.IPAddress.3.228.278.1303.2328 = type { %"class.(anonymous namespace)::vector.2.227.277.1302.2327" }
+%"class.(anonymous namespace)::vector.2.227.277.1302.2327" = type { i8 }
+%class.TestCompletionCallback.9.234.284.1309.2334 = type { %class.TestCompletionCallbackTemplate.8.233.283.1308.2333, i32 }
+%class.TestCompletionCallbackTemplate.8.233.283.1308.2333 = type { i32 }
+%class.AssertionResult.24.249.299.1324.2349 = type { i8, %class.scoped_ptr.23.248.298.1323.2348 }
+%class.scoped_ptr.23.248.298.1323.2348 = type { %class.Trans_NS___1_basic_string.18.243.293.1318.2343* }
+%class.Trans_NS___1_basic_string.18.243.293.1318.2343 = type { %class.Trans_NS___1___libcpp_compressed_pair_imp.17.242.292.1317.2342 }
+%class.Trans_NS___1___libcpp_compressed_pair_imp.17.242.292.1317.2342 = type { %"struct.Trans_NS___1_basic_string<char, int, int>::__rep.16.241.291.1316.2341" }
+%"struct.Trans_NS___1_basic_string<char, int, int>::__rep.16.241.291.1316.2341" = type { %"struct.Trans_NS___1_basic_string<char, int, int>::__long.15.240.290.1315.2340" }
+%"struct.Trans_NS___1_basic_string<char, int, int>::__long.15.240.290.1315.2340" = type { i64, i32 }
+%class.AssertHelper.10.235.285.1310.2335 = type { i8 }
+%class.Message.13.238.288.1313.2338 = type { %class.scoped_ptr.0.12.237.287.1312.2337 }
+%class.scoped_ptr.0.12.237.287.1312.2337 = type { %"class.(anonymous namespace)::basic_stringstream.11.236.286.1311.2336"* }
+%"class.(anonymous namespace)::basic_stringstream.11.236.286.1311.2336" = type { i8 }
+%class.scoped_refptr.19.244.294.1319.2344 = type { i8 }
+%class.BoundNetLog.20.245.295.1320.2345 = type { i32 }
+%struct.MockReadWrite.7.232.282.1307.2332 = type { i32 }
+%"class.(anonymous namespace)::basic_iostream.22.247.297.1322.2347" = type { i8 }
+%class.ClientSocketHandle.14.239.289.1314.2339 = type { i8 }
+%"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346" = type { i8 }
+
+@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+define void @_ZN53SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test6m_fn10Ev(%class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326* %this) align 2 {
+entry:
+ %socket_data = alloca %class.StaticSocketDataProvider.6.231.281.1306.2331, align 1
+ %agg.tmp = alloca %struct.MockConnect.5.230.280.1305.2330, align 1
+ %callback = alloca %class.TestCompletionCallback.9.234.284.1309.2334, align 4
+ %gtest_ar = alloca %class.AssertionResult.24.249.299.1324.2349, align 4
+ %temp.lvalue = alloca %class.AssertHelper.10.235.285.1310.2335, align 1
+ %agg.tmp10 = alloca %class.Message.13.238.288.1313.2338, align 4
+ %ref.tmp = alloca %class.Trans_NS___1_basic_string.18.243.293.1318.2343, align 4
+ %agg.tmp16 = alloca %class.scoped_refptr.19.244.294.1319.2344, align 1
+ %agg.tmp18 = alloca %class.BoundNetLog.20.245.295.1320.2345, align 4
+ %call2 = call %class.StaticSocketDataProvider.6.231.281.1306.2331* @_ZN24StaticSocketDataProviderC1EP13MockReadWritejS1_j(%class.StaticSocketDataProvider.6.231.281.1306.2331* nonnull %socket_data, %struct.MockReadWrite.7.232.282.1307.2332* undef, i32 1, %struct.MockReadWrite.7.232.282.1307.2332* null, i32 0)
+ %call3 = call %struct.MockConnect.5.230.280.1305.2330* @_ZN11MockConnectC1Ev(%struct.MockConnect.5.230.280.1305.2330* nonnull %agg.tmp)
+ call void @_ZN24StaticSocketDataProvider5m_fn8E11MockConnect(%class.StaticSocketDataProvider.6.231.281.1306.2331* nonnull %socket_data, %struct.MockConnect.5.230.280.1305.2330* nonnull %agg.tmp)
+ %call5 = call %class.TestCompletionCallback.9.234.284.1309.2334* @_ZN22TestCompletionCallbackC1Ev(%class.TestCompletionCallback.9.234.284.1309.2334* nonnull %callback)
+ %transport_socket_pool_ = getelementptr inbounds %class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326, %class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326* %this, i32 0, i32 0
+ %call6 = call i32 @_ZN29MockTransportClientSocketPool5m_fn9Ev(%class.MockTransportClientSocketPool.0.225.275.1300.2325* %transport_socket_pool_)
+ call void @_Z11CmpHelperEQPcS_xx(%class.AssertionResult.24.249.299.1324.2349* nonnull sret %gtest_ar, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i64 0, i64 undef)
+ %tmp = load i8, i8* undef, align 4
+ %tobool.i = icmp eq i8 %tmp, 0
+ br i1 %tobool.i, label %if.else, label %if.end
+
+if.else: ; preds = %entry
+ br i1 undef, label %_ZN15AssertionResult5m_fn6Ev.exit, label %cond.true.i
+
+cond.true.i: ; preds = %if.else
+ %call4.i = call i8* @_ZN25Trans_NS___1_basic_stringIciiE5m_fn1Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343* nonnull undef)
+ br label %_ZN15AssertionResult5m_fn6Ev.exit
+
+_ZN15AssertionResult5m_fn6Ev.exit: ; preds = %cond.true.i, %if.else
+ %cond.i = phi i8* [ %call4.i, %cond.true.i ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), %if.else ]
+ %call9 = call %class.AssertHelper.10.235.285.1310.2335* @_ZN12AssertHelperC1EPKc(%class.AssertHelper.10.235.285.1310.2335* nonnull %temp.lvalue, i8* %cond.i)
+ %call11 = call %class.Message.13.238.288.1313.2338* @_ZN7MessageC1Ev(%class.Message.13.238.288.1313.2338* nonnull %agg.tmp10)
+ call void @_ZN12AssertHelperaSE7Message(%class.AssertHelper.10.235.285.1310.2335* nonnull %temp.lvalue, %class.Message.13.238.288.1313.2338* nonnull %agg.tmp10)
+ %call.i.i.i.i27 = call zeroext i1 @_Z6IsTruev()
+ %brmerge = or i1 false, undef
+ br i1 %brmerge, label %_ZN7MessageD1Ev.exit33, label %delete.notnull.i.i.i.i32
+
+delete.notnull.i.i.i.i32: ; preds = %_ZN15AssertionResult5m_fn6Ev.exit
+ %call.i.i.i.i.i.i31 = call %"class.(anonymous namespace)::basic_iostream.22.247.297.1322.2347"* @_ZN12_GLOBAL__N_114basic_iostreamD2Ev(%"class.(anonymous namespace)::basic_iostream.22.247.297.1322.2347"* undef)
+ call void @_ZdlPv(i8* undef)
+ br label %_ZN7MessageD1Ev.exit33
+
+_ZN7MessageD1Ev.exit33: ; preds = %delete.notnull.i.i.i.i32, %_ZN15AssertionResult5m_fn6Ev.exit
+ %call13 = call %class.AssertHelper.10.235.285.1310.2335* @_ZN12AssertHelperD1Ev(%class.AssertHelper.10.235.285.1310.2335* nonnull %temp.lvalue)
+ br label %if.end
+
+if.end: ; preds = %_ZN7MessageD1Ev.exit33, %entry
+ %message_.i.i = getelementptr inbounds %class.AssertionResult.24.249.299.1324.2349, %class.AssertionResult.24.249.299.1324.2349* %gtest_ar, i32 0, i32 1
+ %call.i.i.i = call %class.scoped_ptr.23.248.298.1323.2348* @_ZN10scoped_ptrI25Trans_NS___1_basic_stringIciiEED2Ev(%class.scoped_ptr.23.248.298.1323.2348* %message_.i.i)
+ call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 12, i32 4, i1 false)
+ call void @_ZN25Trans_NS___1_basic_stringIciiE5m_fn2Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343* nonnull %ref.tmp)
+ call void @_Z19CreateSOCKSv5Paramsv(%class.scoped_refptr.19.244.294.1319.2344* nonnull sret %agg.tmp16)
+ %callback_.i = getelementptr inbounds %class.TestCompletionCallback.9.234.284.1309.2334, %class.TestCompletionCallback.9.234.284.1309.2334* %callback, i32 0, i32 1
+ %pool_ = getelementptr inbounds %class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326, %class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326* %this, i32 0, i32 1
+ %tmp1 = getelementptr inbounds %class.BoundNetLog.20.245.295.1320.2345, %class.BoundNetLog.20.245.295.1320.2345* %agg.tmp18, i32 0, i32 0
+ store i32 0, i32* %tmp1, align 4
+ call void @_ZN18ClientSocketHandle5m_fn3IPiEEvRK25Trans_NS___1_basic_stringIciiE13scoped_refptr15RequestPriorityN16ClientSocketPool13RespectLimitsERiT_11BoundNetLog(%class.ClientSocketHandle.14.239.289.1314.2339* nonnull undef, %class.Trans_NS___1_basic_string.18.243.293.1318.2343* nonnull dereferenceable(12) %ref.tmp, %class.scoped_refptr.19.244.294.1319.2344* nonnull %agg.tmp16, i32 0, i32 1, i32* nonnull dereferenceable(4) %callback_.i, i32* %pool_, %class.BoundNetLog.20.245.295.1320.2345* nonnull %agg.tmp18)
+ %call19 = call %class.BoundNetLog.20.245.295.1320.2345* @_ZN11BoundNetLogD1Ev(%class.BoundNetLog.20.245.295.1320.2345* nonnull %agg.tmp18)
+ call void @_Z11CmpHelperEQPcS_xx(%class.AssertionResult.24.249.299.1324.2349* nonnull sret undef, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i64 -1, i64 0)
+ br i1 undef, label %if.then.i.i.i.i, label %_ZN7MessageD1Ev.exit
+
+if.then.i.i.i.i: ; preds = %if.end
+ %tmp2 = load %"class.(anonymous namespace)::basic_stringstream.11.236.286.1311.2336"*, %"class.(anonymous namespace)::basic_stringstream.11.236.286.1311.2336"** undef, align 4
+ br label %_ZN7MessageD1Ev.exit
+
+_ZN7MessageD1Ev.exit: ; preds = %if.then.i.i.i.i, %if.end
+ %connect_.i.i = getelementptr inbounds %class.StaticSocketDataProvider.6.231.281.1306.2331, %class.StaticSocketDataProvider.6.231.281.1306.2331* %socket_data, i32 0, i32 1
+ %tmp3 = bitcast %struct.MockConnect.5.230.280.1305.2330* %connect_.i.i to %"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346"*
+ %call.i.i.i.i.i.i.i.i.i.i = call %"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346"* @_ZN12_GLOBAL__N_113__vector_baseD2Ev(%"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346"* %tmp3)
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+declare %class.StaticSocketDataProvider.6.231.281.1306.2331* @_ZN24StaticSocketDataProviderC1EP13MockReadWritejS1_j(%class.StaticSocketDataProvider.6.231.281.1306.2331* returned, %struct.MockReadWrite.7.232.282.1307.2332*, i32, %struct.MockReadWrite.7.232.282.1307.2332*, i32) unnamed_addr
+
+declare void @_ZN24StaticSocketDataProvider5m_fn8E11MockConnect(%class.StaticSocketDataProvider.6.231.281.1306.2331*, %struct.MockConnect.5.230.280.1305.2330*)
+
+declare %struct.MockConnect.5.230.280.1305.2330* @_ZN11MockConnectC1Ev(%struct.MockConnect.5.230.280.1305.2330* returned) unnamed_addr
+
+declare %class.TestCompletionCallback.9.234.284.1309.2334* @_ZN22TestCompletionCallbackC1Ev(%class.TestCompletionCallback.9.234.284.1309.2334* returned) unnamed_addr
+
+declare i32 @_ZN29MockTransportClientSocketPool5m_fn9Ev(%class.MockTransportClientSocketPool.0.225.275.1300.2325*)
+
+declare %class.AssertHelper.10.235.285.1310.2335* @_ZN12AssertHelperC1EPKc(%class.AssertHelper.10.235.285.1310.2335* returned, i8*) unnamed_addr
+
+declare void @_ZN12AssertHelperaSE7Message(%class.AssertHelper.10.235.285.1310.2335*, %class.Message.13.238.288.1313.2338*)
+
+declare %class.Message.13.238.288.1313.2338* @_ZN7MessageC1Ev(%class.Message.13.238.288.1313.2338* returned) unnamed_addr
+
+declare %class.AssertHelper.10.235.285.1310.2335* @_ZN12AssertHelperD1Ev(%class.AssertHelper.10.235.285.1310.2335* returned) unnamed_addr
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+declare void @_ZN18ClientSocketHandle5m_fn3IPiEEvRK25Trans_NS___1_basic_stringIciiE13scoped_refptr15RequestPriorityN16ClientSocketPool13RespectLimitsERiT_11BoundNetLog(%class.ClientSocketHandle.14.239.289.1314.2339*, %class.Trans_NS___1_basic_string.18.243.293.1318.2343* dereferenceable(12), %class.scoped_refptr.19.244.294.1319.2344*, i32, i32, i32* dereferenceable(4), i32*, %class.BoundNetLog.20.245.295.1320.2345*)
+
+declare void @_Z19CreateSOCKSv5Paramsv(%class.scoped_refptr.19.244.294.1319.2344* sret)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+declare %class.BoundNetLog.20.245.295.1320.2345* @_ZN11BoundNetLogD1Ev(%class.BoundNetLog.20.245.295.1320.2345* returned) unnamed_addr
+
+declare %class.scoped_refptr.19.244.294.1319.2344* @_ZN13scoped_refptrD1Ev(%class.scoped_refptr.19.244.294.1319.2344* returned) unnamed_addr
+
+declare %"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346"* @_ZN12_GLOBAL__N_113__vector_baseD2Ev(%"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346"* returned) unnamed_addr
+
+declare i8* @_ZN25Trans_NS___1_basic_stringIciiE5m_fn1Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343*)
+
+declare zeroext i1 @_Z6IsTruev()
+
+declare void @_ZdlPv(i8*)
+
+declare %"class.(anonymous namespace)::basic_iostream.22.247.297.1322.2347"* @_ZN12_GLOBAL__N_114basic_iostreamD2Ev(%"class.(anonymous namespace)::basic_iostream.22.247.297.1322.2347"* returned) unnamed_addr
+
+declare %class.scoped_ptr.23.248.298.1323.2348* @_ZN10scoped_ptrI25Trans_NS___1_basic_stringIciiEED2Ev(%class.scoped_ptr.23.248.298.1323.2348* readonly returned) unnamed_addr align 2
+
+declare void @_Z11CmpHelperEQPcS_xx(%class.AssertionResult.24.249.299.1324.2349* sret, i8*, i8*, i64, i64)
+
+declare void @_ZN25Trans_NS___1_basic_stringIciiE5m_fn2Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343*)
+
+attributes #0 = { argmemonly nounwind }
%cond = select i1 %cmp, fp128 %x, fp128 %y
ret fp128 %cond
; CHECK-LABEL: TestMax:
-; CHECK: movaps %xmm1
; CHECK: movaps %xmm0
+; CHECK: movaps %xmm1
; CHECK: callq __gttf2
; CHECK: movaps {{.*}}, %xmm0
; CHECK: testl %eax, %eax
--- /dev/null
+; RUN: llc < %s | FileCheck %s
+
+; grep 'Spill' |sed 's%.*\(-[0-9]\+(\%rsp)\).*%\1%g' |sort |uniq -d |awk '{if (/rsp/); exit -1}'
+; Check no spills to the same stack slot after hoisting.
+; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
+; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
+; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp)
+; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
+; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
+; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp)
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = external global i32*, align 8
+@b = external global i32, align 4
+@d = external global i32*, align 8
+
+; Function Attrs: norecurse noreturn nounwind uwtable
+define void @fn1(i32 %p1) {
+entry:
+ %tmp = load i32*, i32** @d, align 8
+ %tmp1 = load i32*, i32** @a, align 8
+ %tmp2 = sext i32 %p1 to i64
+ br label %for.cond
+
+for.cond: ; preds = %for.inc14, %entry
+ %indvar = phi i32 [ %indvar.next, %for.inc14 ], [ 0, %entry ]
+ %indvars.iv30.in = phi i32 [ %indvars.iv30, %for.inc14 ], [ %p1, %entry ]
+ %c.0 = phi i32 [ %inc15, %for.inc14 ], [ 1, %entry ]
+ %k.0 = phi i32 [ %k.1.lcssa, %for.inc14 ], [ undef, %entry ]
+ %tmp3 = icmp sgt i32 undef, 0
+ %smax52 = select i1 %tmp3, i32 undef, i32 0
+ %tmp4 = zext i32 %smax52 to i64
+ %tmp5 = icmp sgt i64 undef, %tmp4
+ %smax53 = select i1 %tmp5, i64 undef, i64 %tmp4
+ %tmp6 = add nsw i64 %smax53, 1
+ %tmp7 = sub nsw i64 %tmp6, %tmp4
+ %tmp8 = add nsw i64 %tmp7, -8
+ %tmp9 = sub i32 undef, %indvar
+ %tmp10 = icmp sgt i64 %tmp2, 0
+ %smax40 = select i1 %tmp10, i64 %tmp2, i64 0
+ %scevgep41 = getelementptr i32, i32* %tmp1, i64 %smax40
+ %indvars.iv30 = add i32 %indvars.iv30.in, -1
+ %tmp11 = icmp sgt i32 %indvars.iv30, 0
+ %smax = select i1 %tmp11, i32 %indvars.iv30, i32 0
+ %tmp12 = zext i32 %smax to i64
+ %sub = sub nsw i32 %p1, %c.0
+ %cmp = icmp sgt i32 %sub, 0
+ %sub. = select i1 %cmp, i32 %sub, i32 0
+ %cmp326 = icmp sgt i32 %k.0, %p1
+ br i1 %cmp326, label %for.cond4.preheader, label %for.body.preheader
+
+for.body.preheader: ; preds = %for.cond
+ br label %for.body
+
+for.cond4.preheader: ; preds = %for.body, %for.cond
+ %k.1.lcssa = phi i32 [ %k.0, %for.cond ], [ %add, %for.body ]
+ %cmp528 = icmp sgt i32 %sub., %p1
+ br i1 %cmp528, label %for.inc14, label %for.body6.preheader
+
+for.body6.preheader: ; preds = %for.cond4.preheader
+ br i1 undef, label %for.body6, label %min.iters.checked
+
+min.iters.checked: ; preds = %for.body6.preheader
+ br i1 undef, label %for.body6, label %vector.memcheck
+
+vector.memcheck: ; preds = %min.iters.checked
+ %bound1 = icmp ule i32* undef, %scevgep41
+ %memcheck.conflict = and i1 undef, %bound1
+ br i1 %memcheck.conflict, label %for.body6, label %vector.body.preheader
+
+vector.body.preheader: ; preds = %vector.memcheck
+ %lcmp.mod = icmp eq i64 undef, 0
+ br i1 %lcmp.mod, label %vector.body.preheader.split, label %vector.body.prol
+
+vector.body.prol: ; preds = %vector.body.prol, %vector.body.preheader
+ %prol.iter.cmp = icmp eq i64 undef, 0
+ br i1 %prol.iter.cmp, label %vector.body.preheader.split, label %vector.body.prol
+
+vector.body.preheader.split: ; preds = %vector.body.prol, %vector.body.preheader
+ %tmp13 = icmp ult i64 %tmp8, 24
+ br i1 %tmp13, label %middle.block, label %vector.body
+
+vector.body: ; preds = %vector.body, %vector.body.preheader.split
+ %index = phi i64 [ %index.next.3, %vector.body ], [ 0, %vector.body.preheader.split ]
+ %index.next = add i64 %index, 8
+ %offset.idx.1 = add i64 %tmp12, %index.next
+ %tmp14 = getelementptr inbounds i32, i32* %tmp, i64 %offset.idx.1
+ %tmp15 = bitcast i32* %tmp14 to <4 x i32>*
+ %wide.load.1 = load <4 x i32>, <4 x i32>* %tmp15, align 4
+ %tmp16 = getelementptr inbounds i32, i32* %tmp1, i64 %offset.idx.1
+ %tmp17 = bitcast i32* %tmp16 to <4 x i32>*
+ store <4 x i32> %wide.load.1, <4 x i32>* %tmp17, align 4
+ %index.next.3 = add i64 %index, 32
+ br i1 undef, label %middle.block, label %vector.body
+
+middle.block: ; preds = %vector.body, %vector.body.preheader.split
+ br i1 undef, label %for.inc14, label %for.body6
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %k.127 = phi i32 [ %k.0, %for.body.preheader ], [ %add, %for.body ]
+ %add = add nsw i32 %k.127, 1
+ %tmp18 = load i32, i32* undef, align 4
+ store i32 %tmp18, i32* @b, align 4
+ br i1 undef, label %for.body, label %for.cond4.preheader
+
+for.body6: ; preds = %for.body6, %middle.block, %vector.memcheck, %min.iters.checked, %for.body6.preheader
+ %indvars.iv32 = phi i64 [ undef, %for.body6 ], [ %tmp12, %vector.memcheck ], [ %tmp12, %min.iters.checked ], [ %tmp12, %for.body6.preheader ], [ undef, %middle.block ]
+ %arrayidx8 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv32
+ %tmp19 = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds i32, i32* %tmp1, i64 %indvars.iv32
+ store i32 %tmp19, i32* %arrayidx10, align 4
+ %cmp5 = icmp slt i64 %indvars.iv32, undef
+ br i1 %cmp5, label %for.body6, label %for.inc14
+
+for.inc14: ; preds = %for.body6, %middle.block, %for.cond4.preheader
+ %inc15 = add nuw nsw i32 %c.0, 1
+ %indvar.next = add i32 %indvar, 1
+ br label %for.cond
+}
--- /dev/null
+; RUN: llc -verify-regalloc < %s
+; PR27275: When enabling remat for vreg defined by PHIs, make sure the update
+; of the live range removes dead phi. Otherwise, we may end up with PHIs with
+; incorrect operands and that will trigger assertions or verifier failures
+; in later passes.
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+@b = external global i64, align 8
+@d = external global i32, align 4
+@e = external global i64, align 8
+@h = external global i16, align 2
+@a = external global i8, align 1
+@g = external global i64, align 8
+@j = external global i32, align 4
+@f = external global i16, align 2
+@.str = external unnamed_addr constant [12 x i8], align 1
+
+define void @fn1() {
+entry:
+ %tmp = load i64, i64* @b, align 8
+ %or = or i64 0, 3299921317
+ %and = and i64 %or, %tmp
+ %tmp1 = load i32, i32* @d, align 4
+ br i1 undef, label %lor.rhs, label %lor.end
+
+lor.rhs: ; preds = %entry
+ %tobool3 = icmp ne i8 undef, 0
+ br label %lor.end
+
+lor.end: ; preds = %lor.rhs, %entry
+ %lor.ext = zext i1 undef to i32
+ %tmp2 = load i64, i64* @e, align 8
+ br i1 undef, label %lor.rhs5, label %lor.end7
+
+lor.rhs5: ; preds = %lor.end
+ br label %lor.end7
+
+lor.end7: ; preds = %lor.rhs5, %lor.end
+ %tmp3 = phi i1 [ true, %lor.end ], [ false, %lor.rhs5 ]
+ %neg13 = xor i64 %tmp, -1
+ %conv25 = zext i1 %tmp3 to i32
+ %tobool46 = icmp eq i64 %tmp, 0
+ %.pre = load i16, i16* @h, align 2
+ %tobool10 = icmp eq i16 %.pre, 0
+ %neg.us = xor i32 %tmp1, -1
+ %conv12.us = sext i32 %neg.us to i64
+ %tobool23.us = icmp eq i64 %tmp2, %and
+ %conv39.us = sext i32 %tmp1 to i64
+ br label %LABEL_mSmSDb
+
+LABEL_mSmSDb.loopexit: ; preds = %lor.end32.us
+ %conv42.us.lcssa = phi i32 [ %conv42.us, %lor.end32.us ]
+ store i64 undef, i64* @g, align 8
+ br label %LABEL_mSmSDb
+
+LABEL_mSmSDb: ; preds = %LABEL_mSmSDb.loopexit, %lor.end7
+ %tmp4 = phi i32 [ undef, %lor.end7 ], [ %conv42.us.lcssa, %LABEL_mSmSDb.loopexit ]
+ %tmp5 = phi i64 [ %tmp, %lor.end7 ], [ 0, %LABEL_mSmSDb.loopexit ]
+ br i1 %tobool10, label %LABEL_BRBRN.preheader, label %if.then
+
+if.then: ; preds = %LABEL_mSmSDb
+ store i8 undef, i8* @a, align 1
+ br label %LABEL_BRBRN.preheader
+
+LABEL_BRBRN.preheader: ; preds = %if.then, %LABEL_mSmSDb
+ %.pre63 = load i64, i64* @g, align 8
+ br i1 %tobool46, label %LABEL_BRBRN.us, label %LABEL_BRBRN.outer
+
+LABEL_BRBRN.outer: ; preds = %if.then47, %LABEL_BRBRN.preheader
+ %.ph = phi i32 [ 0, %if.then47 ], [ %tmp4, %LABEL_BRBRN.preheader ]
+ %.ph64 = phi i32 [ %conv50, %if.then47 ], [ %tmp1, %LABEL_BRBRN.preheader ]
+ %.ph65 = phi i64 [ %tmp16, %if.then47 ], [ %.pre63, %LABEL_BRBRN.preheader ]
+ %.ph66 = phi i64 [ 0, %if.then47 ], [ %tmp2, %LABEL_BRBRN.preheader ]
+ %.ph67 = phi i64 [ %.pre56.pre, %if.then47 ], [ %tmp5, %LABEL_BRBRN.preheader ]
+ %neg = xor i32 %.ph64, -1
+ %conv12 = sext i32 %neg to i64
+ %tobool23 = icmp eq i64 %.ph66, %and
+ %tmp6 = load i32, i32* @j, align 4
+ %shr = lshr i32 %conv25, %tmp6
+ %conv39 = sext i32 %.ph64 to i64
+ br label %LABEL_BRBRN
+
+LABEL_BRBRN.us: ; preds = %lor.end32.us, %LABEL_BRBRN.preheader
+ %tmp7 = phi i32 [ %conv42.us, %lor.end32.us ], [ %tmp4, %LABEL_BRBRN.preheader ]
+ %tmp8 = phi i64 [ undef, %lor.end32.us ], [ %.pre63, %LABEL_BRBRN.preheader ]
+ %tmp9 = phi i64 [ %tmp10, %lor.end32.us ], [ %tmp5, %LABEL_BRBRN.preheader ]
+ %mul.us = mul i64 %tmp8, %neg13
+ %mul14.us = mul i64 %mul.us, %conv12.us
+ %cmp.us = icmp sgt i64 %tmp2, %mul14.us
+ %conv16.us = zext i1 %cmp.us to i64
+ %xor.us = xor i64 %conv16.us, %tmp9
+ %rem18.us = urem i32 %lor.ext, %tmp7
+ %conv19.us = zext i32 %rem18.us to i64
+ br i1 %tobool23.us, label %lor.rhs24.us, label %lor.end32.us
+
+lor.rhs24.us: ; preds = %LABEL_BRBRN.us
+ br label %lor.end32.us
+
+lor.end32.us: ; preds = %lor.rhs24.us, %LABEL_BRBRN.us
+ %tmp10 = phi i64 [ -2, %LABEL_BRBRN.us ], [ -1, %lor.rhs24.us ]
+ %xor.us.not = xor i64 %xor.us, -1
+ %neg36.us = and i64 %conv19.us, %xor.us.not
+ %conv37.us = zext i32 %tmp7 to i64
+ %sub38.us = sub nsw i64 %neg36.us, %conv37.us
+ %mul40.us = mul nsw i64 %sub38.us, %conv39.us
+ %neg41.us = xor i64 %mul40.us, 4294967295
+ %conv42.us = trunc i64 %neg41.us to i32
+ %tobool43.us = icmp eq i8 undef, 0
+ br i1 %tobool43.us, label %LABEL_mSmSDb.loopexit, label %LABEL_BRBRN.us
+
+LABEL_BRBRN: ; preds = %lor.end32, %LABEL_BRBRN.outer
+ %tmp11 = phi i32 [ %conv42, %lor.end32 ], [ %.ph, %LABEL_BRBRN.outer ]
+ %tmp12 = phi i64 [ %neg21, %lor.end32 ], [ %.ph65, %LABEL_BRBRN.outer ]
+ %tmp13 = phi i64 [ %conv35, %lor.end32 ], [ %.ph67, %LABEL_BRBRN.outer ]
+ %mul = mul i64 %tmp12, %neg13
+ %mul14 = mul i64 %mul, %conv12
+ %cmp = icmp sgt i64 %.ph66, %mul14
+ %conv16 = zext i1 %cmp to i64
+ %xor = xor i64 %conv16, %tmp13
+ %rem18 = urem i32 %lor.ext, %tmp11
+ %conv19 = zext i32 %rem18 to i64
+ %neg21 = or i64 %xor, undef
+ br i1 %tobool23, label %lor.rhs24, label %lor.end32
+
+lor.rhs24: ; preds = %LABEL_BRBRN
+ %tmp14 = load volatile i16, i16* @f, align 2
+ %conv26 = sext i16 %tmp14 to i32
+ %and27 = and i32 %conv26, %shr
+ %conv28 = sext i32 %and27 to i64
+ %mul29 = mul nsw i64 %conv28, %tmp
+ %and30 = and i64 %mul29, %tmp13
+ %tobool31 = icmp ne i64 %and30, 0
+ br label %lor.end32
+
+lor.end32: ; preds = %lor.rhs24, %LABEL_BRBRN
+ %tmp15 = phi i1 [ true, %LABEL_BRBRN ], [ %tobool31, %lor.rhs24 ]
+ %lor.ext33 = zext i1 %tmp15 to i32
+ %neg34 = xor i32 %lor.ext33, -1
+ %conv35 = sext i32 %neg34 to i64
+ %xor.not = xor i64 %xor, -1
+ %neg36 = and i64 %conv19, %xor.not
+ %conv37 = zext i32 %tmp11 to i64
+ %sub38 = sub nsw i64 %neg36, %conv37
+ %mul40 = mul nsw i64 %sub38, %conv39
+ %neg41 = xor i64 %mul40, 4294967295
+ %conv42 = trunc i64 %neg41 to i32
+ %tobool43 = icmp eq i8 undef, 0
+ br i1 %tobool43, label %if.then47, label %LABEL_BRBRN
+
+if.then47: ; preds = %lor.end32
+ tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i64 %conv39)
+ %tmp16 = load i64, i64* @g, align 8
+ %neg49 = xor i64 %tmp16, 4294967295
+ %conv50 = trunc i64 %neg49 to i32
+ %.pre56.pre = load i64, i64* @b, align 8
+ br label %LABEL_BRBRN.outer
+}
+
+declare void @printf(i8* nocapture readonly, ...)
--- /dev/null
+; RUN: llc -verify-regalloc < %s | FileCheck %s
+; Check all spills are rematerialized.
+; CHECK-NOT: Spill
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global double 0.000000e+00, align 8
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define i32 @uniform_testdata(i32 %p1) {
+entry:
+ %cmp3 = icmp sgt i32 %p1, 0
+ br i1 %cmp3, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ %tmp = add i32 %p1, -1
+ %xtraiter = and i32 %p1, 7
+ %lcmp.mod = icmp eq i32 %xtraiter, 0
+ br i1 %lcmp.mod, label %for.body.preheader.split, label %for.body.prol.preheader
+
+for.body.prol.preheader: ; preds = %for.body.preheader
+ br label %for.body.prol
+
+for.body.prol: ; preds = %for.body.prol, %for.body.prol.preheader
+ %i.04.prol = phi i32 [ %inc.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
+ %prol.iter = phi i32 [ %prol.iter.sub, %for.body.prol ], [ %xtraiter, %for.body.prol.preheader ]
+ %tmp1 = load double, double* @b, align 8
+ %call.prol = tail call double @pow(double %tmp1, double 2.500000e-01)
+ %inc.prol = add nuw nsw i32 %i.04.prol, 1
+ %prol.iter.sub = add i32 %prol.iter, -1
+ %prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0
+ br i1 %prol.iter.cmp, label %for.body.preheader.split.loopexit, label %for.body.prol
+
+for.body.preheader.split.loopexit: ; preds = %for.body.prol
+ %inc.prol.lcssa = phi i32 [ %inc.prol, %for.body.prol ]
+ br label %for.body.preheader.split
+
+for.body.preheader.split: ; preds = %for.body.preheader.split.loopexit, %for.body.preheader
+ %i.04.unr = phi i32 [ 0, %for.body.preheader ], [ %inc.prol.lcssa, %for.body.preheader.split.loopexit ]
+ %tmp2 = icmp ult i32 %tmp, 7
+ br i1 %tmp2, label %for.end.loopexit, label %for.body.preheader.split.split
+
+for.body.preheader.split.split: ; preds = %for.body.preheader.split
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader.split.split
+ %i.04 = phi i32 [ %i.04.unr, %for.body.preheader.split.split ], [ %inc.7, %for.body ]
+ %tmp3 = load double, double* @b, align 8
+ %call = tail call double @pow(double %tmp3, double 2.500000e-01)
+ %tmp4 = load double, double* @b, align 8
+ %call.1 = tail call double @pow(double %tmp4, double 2.500000e-01)
+ %inc.7 = add nsw i32 %i.04, 8
+ %exitcond.7 = icmp eq i32 %inc.7, %p1
+ br i1 %exitcond.7, label %for.end.loopexit.unr-lcssa, label %for.body
+
+for.end.loopexit.unr-lcssa: ; preds = %for.body
+ br label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.end.loopexit.unr-lcssa, %for.body.preheader.split
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %tmp5 = load i32, i32* @a, align 4
+ ret i32 %tmp5
+}
+
+; Function Attrs: nounwind
+declare double @pow(double, double)
; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s
; This testing case is reduced from 254.gap SyFgets function.
-; We make sure a spill is not hoisted to a hotter outer loop.
+; We make sure a spill is hoisted to a cold BB inside the hotter outer loop.
%struct.TMP.1 = type { %struct.TMP.2*, %struct.TMP.2*, [1024 x i8] }
%struct.TMP.2 = type { i8*, i32, i32, i16, i16, %struct.TMP.3, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.TMP.3, %struct.TMP.4*, i32, [3 x i8], [1 x i8], %struct.TMP.3, i32, i64 }
br i1 %cmp476, label %if.end517, label %do.body479.preheader
do.body479.preheader:
+ ; CHECK: do.body479.preheader
+ ; spill is hoisted here. Although loop depth1 is even hotter than loop depth2, do.body479.preheader is cold.
+ ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
+ ; CHECK: land.rhs485
%cmp4833314 = icmp eq i8 undef, 0
br i1 %cmp4833314, label %if.end517, label %land.rhs485
lor.rhs500:
; CHECK: lor.rhs500
- ; Make sure that we don't hoist the spill to outer loops.
- ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
+ ; Make sure spill is hoisted to a cold preheader in outside loop.
+ ; CHECK-NOT: movq %r{{.*}}, {{[0-9]+}}(%rsp)
; CHECK: callq {{.*}}maskrune
%call3.i.i2792 = call i32 @__maskrune(i32 undef, i64 256)
br i1 undef, label %land.lhs.true504, label %do.body479.backedge