From: Wei Mi Date: Mon, 4 Apr 2016 17:45:03 +0000 (+0000) Subject: Revert r265309 and r265312 because they caused some errors I need to investigate. X-Git-Tag: llvmorg-3.9.0-rc1~10087 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fb5252cac1bd1feb22e8b30950f3b1c36e01bb44;p=platform%2Fupstream%2Fllvm.git Revert r265309 and r265312 because they caused some errors I need to investigate. llvm-svn: 265317 --- diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h index fdf7cbd..2271e33 100644 --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -72,10 +72,6 @@ private: /// ScannedRemattable - true when remattable values have been identified. bool ScannedRemattable; - /// DeadRemats - The saved instructions which have already been dead after - /// rematerialization but not deleted yet -- to be done in postOptimization. - SmallPtrSet *DeadRemats; - /// Remattable - Values defined by remattable instructions as identified by /// tii.isTriviallyReMaterializable(). SmallPtrSet Remattable; @@ -120,16 +116,13 @@ public: /// @param vrm Map of virtual registers to physical registers for this /// function. If NULL, no virtual register map updates will /// be done. This could be the case if called before Regalloc. - /// @param deadRemats The collection of all the instructions defining an - /// original reg and are dead after remat. LiveRangeEdit(LiveInterval *parent, SmallVectorImpl &newRegs, MachineFunction &MF, LiveIntervals &lis, VirtRegMap *vrm, - Delegate *delegate = nullptr, - SmallPtrSet *deadRemats = nullptr) + Delegate *delegate = nullptr) : Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis), - VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), TheDelegate(delegate), - FirstNew(newRegs.size()), ScannedRemattable(false), - DeadRemats(deadRemats) { + VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), + TheDelegate(delegate), FirstNew(newRegs.size()), + ScannedRemattable(false) { MRI.setDelegate(this); } @@ -149,16 +142,6 @@ public: bool empty() const { return size() == 0; } unsigned get(unsigned idx) const { return NewRegs[idx+FirstNew]; } - /// pop_back - It allows LiveRangeEdit users to drop new registers. - /// The context is when an original def instruction of a register is - /// dead after rematerialization, we still want to keep it for following - /// rematerializations. We save the def instruction in DeadRemats, - /// and replace the original dst register with a new dummy register so - /// the live range of original dst register can be shrinked normally. - /// We don't want to allocate phys register for the dummy register, so - /// we want to drop it from the NewRegs set. - void pop_back() { NewRegs.pop_back(); } - ArrayRef regs() const { return makeArrayRef(NewRegs).slice(FirstNew); } @@ -192,15 +175,15 @@ public: /// Remat - Information needed to rematerialize at a specific location. struct Remat { VNInfo *ParentVNI; // parent_'s value at the remat location. - MachineInstr *OrigMI; // Instruction defining OrigVNI. It contains the - // real expr for remat. + MachineInstr *OrigMI; // Instruction defining ParentVNI. explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(nullptr) {} }; /// canRematerializeAt - Determine if ParentVNI can be rematerialized at /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI. /// When cheapAsAMove is set, only cheap remats are allowed. - bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx, + bool canRematerializeAt(Remat &RM, + SlotIndex UseIdx, bool cheapAsAMove); /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an @@ -225,12 +208,6 @@ public: return Rematted.count(ParentVNI); } - void markDeadRemat(MachineInstr *inst) { - // DeadRemats is an optional field. - if (DeadRemats) - DeadRemats->insert(inst); - } - /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try /// to erase it from LIS. void eraseVirtReg(unsigned Reg); @@ -241,11 +218,8 @@ public: /// RegsBeingSpilled lists registers currently being spilled by the register /// allocator. These registers should not be split into new intervals /// as currently those new intervals are not guaranteed to spill. - /// NoSplit indicates this func is used after the iterations of selectOrSplit - /// where registers should not be split into new intervals. - void eliminateDeadDefs(SmallVectorImpl &Dead, - ArrayRef RegsBeingSpilled = None, - bool NoSplit = false); + void eliminateDeadDefs(SmallVectorImpl &Dead, + ArrayRef RegsBeingSpilled = None); /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index e8abb3d..693e59f 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -48,77 +48,13 @@ STATISTIC(NumReloadsRemoved, "Number of reloads removed"); STATISTIC(NumFolded, "Number of folded stack accesses"); STATISTIC(NumFoldedLoads, "Number of folded loads"); STATISTIC(NumRemats, "Number of rematerialized defs for spilling"); +STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads"); +STATISTIC(NumHoists, "Number of hoisted spills"); static cl::opt DisableHoisting("disable-spill-hoist", cl::Hidden, cl::desc("Disable inline spill hoisting")); namespace { -class HoistSpillHelper { - LiveIntervals &LIS; - LiveStacks &LSS; - AliasAnalysis *AA; - MachineDominatorTree &MDT; - MachineLoopInfo &Loops; - VirtRegMap &VRM; - MachineFrameInfo &MFI; - MachineRegisterInfo &MRI; - const TargetInstrInfo &TII; - const TargetRegisterInfo &TRI; - const MachineBlockFrequencyInfo &MBFI; - - // Map from StackSlot to its original register. - DenseMap StackSlotToReg; - // Map from pair of (StackSlot and Original VNI) to a set of spills which - // have the same stackslot and have equal values defined by Original VNI. - // These spills are mergeable and are hoist candiates. - typedef DenseMap, SmallPtrSet> - MergeableSpillsMap; - MergeableSpillsMap MergeableSpills; - - /// This is the map from original register to a set containing all its - /// siblings. To hoist a spill to another BB, we need to find out a live - /// sibling there and use it as the source of the new spill. - DenseMap> Virt2SiblingsMap; - - bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock &BB, - unsigned &LiveReg); - - void rmRedundantSpills( - SmallPtrSet &Spills, - SmallVectorImpl &SpillsToRm, - DenseMap &SpillBBToSpill); - - void getVisitOrders( - MachineBasicBlock *Root, SmallPtrSet &Spills, - SmallVectorImpl &Orders, - SmallVectorImpl &SpillsToRm, - DenseMap &SpillsToKeep, - DenseMap &SpillBBToSpill); - - void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI, - SmallPtrSet &Spills, - SmallVectorImpl &SpillsToRm, - DenseMap &SpillsToIns); - -public: - HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf, - VirtRegMap &vrm) - : LIS(pass.getAnalysis()), - LSS(pass.getAnalysis()), - AA(&pass.getAnalysis().getAAResults()), - MDT(pass.getAnalysis()), - Loops(pass.getAnalysis()), VRM(vrm), - MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), - TII(*mf.getSubtarget().getInstrInfo()), - TRI(*mf.getSubtarget().getRegisterInfo()), - MBFI(pass.getAnalysis()) {} - - void addToMergeableSpills(MachineInstr *Spill, int StackSlot, - unsigned Original); - bool rmFromMergeableSpills(MachineInstr *Spill, int StackSlot); - void hoistAllSpills(LiveRangeEdit &Edit); -}; - class InlineSpiller : public Spiller { MachineFunction &MF; LiveIntervals &LIS; @@ -149,12 +85,56 @@ class InlineSpiller : public Spiller { // Values that failed to remat at some point. SmallPtrSet UsedValues; +public: + // Information about a value that was defined by a copy from a sibling + // register. + struct SibValueInfo { + // True when all reaching defs were reloads: No spill is necessary. + bool AllDefsAreReloads; + + // True when value is defined by an original PHI not from splitting. + bool DefByOrigPHI; + + // True when the COPY defining this value killed its source. + bool KillsSource; + + // The preferred register to spill. + unsigned SpillReg; + + // The value of SpillReg that should be spilled. + VNInfo *SpillVNI; + + // The block where SpillVNI should be spilled. Currently, this must be the + // block containing SpillVNI->def. + MachineBasicBlock *SpillMBB; + + // A defining instruction that is not a sibling copy or a reload, or NULL. + // This can be used as a template for rematerialization. + MachineInstr *DefMI; + + // List of values that depend on this one. These values are actually the + // same, but live range splitting has placed them in different registers, + // or SSA update needed to insert PHI-defs to preserve SSA form. This is + // copies of the current value and phi-kills. Usually only phi-kills cause + // more than one dependent value. + TinyPtrVector Deps; + + SibValueInfo(unsigned Reg, VNInfo *VNI) + : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false), + SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {} + + // Returns true when a def has been found. + bool hasDef() const { return DefByOrigPHI || DefMI; } + }; + +private: + // Values in RegsToSpill defined by sibling copies. + typedef DenseMap SibValueMap; + SibValueMap SibValues; + // Dead defs generated during spilling. SmallVector DeadDefs; - // Object records spills information and does the hoisting. - HoistSpillHelper HSpiller; - ~InlineSpiller() override {} public: @@ -167,11 +147,9 @@ public: MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), TRI(*mf.getSubtarget().getRegisterInfo()), - MBFI(pass.getAnalysis()), - HSpiller(pass, mf, vrm) {} + MBFI(pass.getAnalysis()) {} void spill(LiveRangeEdit &) override; - void postOptimization(); private: bool isSnippet(const LiveInterval &SnipLI); @@ -183,7 +161,11 @@ private: } bool isSibling(unsigned Reg); - bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI); + MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*); + void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = nullptr); + void analyzeSiblingValues(); + + bool hoistSpill(LiveInterval &SpillLI, MachineInstr &CopyMI); void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); void markValueUsed(LiveInterval*, VNInfo*); @@ -315,45 +297,418 @@ void InlineSpiller::collectRegsToSpill() { } } + +//===----------------------------------------------------------------------===// +// Sibling Values +//===----------------------------------------------------------------------===// + +// After live range splitting, some values to be spilled may be defined by +// copies from sibling registers. We trace the sibling copies back to the +// original value if it still exists. We need it for rematerialization. +// +// Even when the value can't be rematerialized, we still want to determine if +// the value has already been spilled, or we may want to hoist the spill from a +// loop. + bool InlineSpiller::isSibling(unsigned Reg) { return TargetRegisterInfo::isVirtualRegister(Reg) && VRM.getOriginal(Reg) == Original; } -/// It is beneficial to spill to earlier place in the same BB in case -/// as follows: -/// There is an alternative def earlier in the same MBB. -/// Hoist the spill as far as possible in SpillMBB. This can ease -/// register pressure: +#ifndef NDEBUG +static raw_ostream &operator<<(raw_ostream &OS, + const InlineSpiller::SibValueInfo &SVI) { + OS << "spill " << PrintReg(SVI.SpillReg) << ':' + << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def; + if (SVI.SpillMBB) + OS << " in BB#" << SVI.SpillMBB->getNumber(); + if (SVI.AllDefsAreReloads) + OS << " all-reloads"; + if (SVI.DefByOrigPHI) + OS << " orig-phi"; + if (SVI.KillsSource) + OS << " kill"; + OS << " deps["; + for (VNInfo *Dep : SVI.Deps) + OS << ' ' << Dep->id << '@' << Dep->def; + OS << " ]"; + if (SVI.DefMI) + OS << " def: " << *SVI.DefMI; + else + OS << '\n'; + return OS; +} +#endif + +/// propagateSiblingValue - Propagate the value in SVI to dependents if it is +/// known. Otherwise remember the dependency for later. /// -/// x = def -/// y = use x -/// s = copy x +/// @param SVIIter SibValues entry to propagate. +/// @param VNI Dependent value, or NULL to propagate to all saved dependents. +void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, + VNInfo *VNI) { + SibValueMap::value_type *SVI = &*SVIIter; + + // When VNI is non-NULL, add it to SVI's deps, and only propagate to that. + TinyPtrVector FirstDeps; + if (VNI) { + FirstDeps.push_back(VNI); + SVI->second.Deps.push_back(VNI); + } + + // Has the value been completely determined yet? If not, defer propagation. + if (!SVI->second.hasDef()) + return; + + // Work list of values to propagate. + SmallSetVector WorkList; + WorkList.insert(SVI); + + do { + SVI = WorkList.pop_back_val(); + TinyPtrVector *Deps = VNI ? &FirstDeps : &SVI->second.Deps; + VNI = nullptr; + + SibValueInfo &SV = SVI->second; + if (!SV.SpillMBB) + SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def); + + DEBUG(dbgs() << " prop to " << Deps->size() << ": " + << SVI->first->id << '@' << SVI->first->def << ":\t" << SV); + + assert(SV.hasDef() && "Propagating undefined value"); + + // Should this value be propagated as a preferred spill candidate? We don't + // propagate values of registers that are about to spill. + bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg); + unsigned SpillDepth = ~0u; + + for (VNInfo *Dep : *Deps) { + SibValueMap::iterator DepSVI = SibValues.find(Dep); + assert(DepSVI != SibValues.end() && "Dependent value not in SibValues"); + SibValueInfo &DepSV = DepSVI->second; + if (!DepSV.SpillMBB) + DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def); + + bool Changed = false; + + // Propagate defining instruction. + if (!DepSV.hasDef()) { + Changed = true; + DepSV.DefMI = SV.DefMI; + DepSV.DefByOrigPHI = SV.DefByOrigPHI; + } + + // Propagate AllDefsAreReloads. For PHI values, this computes an AND of + // all predecessors. + if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) { + Changed = true; + DepSV.AllDefsAreReloads = false; + } + + // Propagate best spill value. + if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) { + if (SV.SpillMBB == DepSV.SpillMBB) { + // DepSV is in the same block. Hoist when dominated. + if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) { + // This is an alternative def earlier in the same MBB. + // Hoist the spill as far as possible in SpillMBB. This can ease + // register pressure: + // + // x = def + // y = use x + // s = copy x + // + // Hoisting the spill of s to immediately after the def removes the + // interference between x and y: + // + // x = def + // spill x + // y = use x + // + // This hoist only helps when the DepSV copy kills its source. + Changed = true; + DepSV.SpillReg = SV.SpillReg; + DepSV.SpillVNI = SV.SpillVNI; + DepSV.SpillMBB = SV.SpillMBB; + } + } else { + // DepSV is in a different block. + if (SpillDepth == ~0u) + SpillDepth = Loops.getLoopDepth(SV.SpillMBB); + + // Also hoist spills to blocks with smaller loop depth, but make sure + // that the new value dominates. Non-phi dependents are always + // dominated, phis need checking. + + const BranchProbability MarginProb(4, 5); // 80% + // Hoist a spill to outer loop if there are multiple dependents (it + // can be beneficial if more than one dependents are hoisted) or + // if DepSV (the hoisting source) is hotter than SV (the hoisting + // destination) (we add a 80% margin to bias a little towards + // loop depth). + bool HoistCondition = + (MBFI.getBlockFreq(DepSV.SpillMBB) >= + (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) || + Deps->size() > 1; + + if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) && + HoistCondition && + (!DepSVI->first->isPHIDef() || + MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) { + Changed = true; + DepSV.SpillReg = SV.SpillReg; + DepSV.SpillVNI = SV.SpillVNI; + DepSV.SpillMBB = SV.SpillMBB; + } + } + } + + if (!Changed) + continue; + + // Something changed in DepSVI. Propagate to dependents. + WorkList.insert(&*DepSVI); + + DEBUG(dbgs() << " update " << DepSVI->first->id << '@' + << DepSVI->first->def << " to:\t" << DepSV); + } + } while (!WorkList.empty()); +} + +/// traceSiblingValue - Trace a value that is about to be spilled back to the +/// real defining instructions by looking through sibling copies. Always stay +/// within the range of OrigVNI so the registers are known to carry the same +/// value. /// -/// Hoisting the spill of s to immediately after the def removes the -/// interference between x and y: +/// Determine if the value is defined by all reloads, so spilling isn't +/// necessary - the value is already in the stack slot. /// -/// x = def -/// spill x -/// y = use x +/// Return a defining instruction that may be a candidate for rematerialization. /// -/// This hoist only helps when the copy kills its source. +MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, + VNInfo *OrigVNI) { + // Check if a cached value already exists. + SibValueMap::iterator SVI; + bool Inserted; + std::tie(SVI, Inserted) = + SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI))); + if (!Inserted) { + DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':' + << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second); + return SVI->second.DefMI; + } + + DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':' + << UseVNI->id << '@' << UseVNI->def << '\n'); + + // List of (Reg, VNI) that have been inserted into SibValues, but need to be + // processed. + SmallVector, 8> WorkList; + WorkList.push_back(std::make_pair(UseReg, UseVNI)); + + LiveInterval &OrigLI = LIS.getInterval(Original); + do { + unsigned Reg; + VNInfo *VNI; + std::tie(Reg, VNI) = WorkList.pop_back_val(); + DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def + << ":\t"); + + // First check if this value has already been computed. + SVI = SibValues.find(VNI); + assert(SVI != SibValues.end() && "Missing SibValues entry"); + + // Trace through PHI-defs created by live range splitting. + if (VNI->isPHIDef()) { + // Stop at original PHIs. We don't know the value at the + // predecessors. Look up the VNInfo for the current definition + // in OrigLI, to properly determine whether or not this phi was + // added by splitting. + if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) { + DEBUG(dbgs() << "orig phi value\n"); + SVI->second.DefByOrigPHI = true; + SVI->second.AllDefsAreReloads = false; + propagateSiblingValue(SVI); + continue; + } + + // This is a PHI inserted by live range splitting. We could trace the + // live-out value from predecessor blocks, but that search can be very + // expensive if there are many predecessors and many more PHIs as + // generated by tail-dup when it sees an indirectbr. Instead, look at + // all the non-PHI defs that have the same value as OrigVNI. They must + // jointly dominate VNI->def. This is not optimal since VNI may actually + // be jointly dominated by a smaller subset of defs, so there is a change + // we will miss a AllDefsAreReloads optimization. + + // Separate all values dominated by OrigVNI into PHIs and non-PHIs. + SmallVector PHIs, NonPHIs; + LiveInterval &LI = LIS.getInterval(Reg); + + for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end(); + VI != VE; ++VI) { + VNInfo *VNI2 = *VI; + if (VNI2->isUnused()) + continue; + if (!OrigLI.containsOneValue() && + OrigLI.getVNInfoAt(VNI2->def) != OrigVNI) + continue; + if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def) + PHIs.push_back(VNI2); + else + NonPHIs.push_back(VNI2); + } + DEBUG(dbgs() << "split phi value, checking " << PHIs.size() + << " phi-defs, and " << NonPHIs.size() + << " non-phi/orig defs\n"); + + // Create entries for all the PHIs. Don't add them to the worklist, we + // are processing all of them in one go here. + for (VNInfo *PHI : PHIs) + SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI))); + + // Add every PHI as a dependent of all the non-PHIs. + for (VNInfo *NonPHI : NonPHIs) { + // Known value? Try an insertion. + std::tie(SVI, Inserted) = + SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); + // Add all the PHIs as dependents of NonPHI. + SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(), + PHIs.end()); + // This is the first time we see NonPHI, add it to the worklist. + if (Inserted) + WorkList.push_back(std::make_pair(Reg, NonPHI)); + else + // Propagate to all inserted PHIs, not just VNI. + propagateSiblingValue(SVI); + } + + // Next work list item. + continue; + } + + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + + // Trace through sibling copies. + if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { + if (isSibling(SrcReg)) { + LiveInterval &SrcLI = LIS.getInterval(SrcReg); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); + assert(SrcQ.valueIn() && "Copy from non-existing value"); + // Check if this COPY kills its source. + SVI->second.KillsSource = SrcQ.isKill(); + VNInfo *SrcVNI = SrcQ.valueIn(); + DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':' + << SrcVNI->id << '@' << SrcVNI->def + << " kill=" << unsigned(SVI->second.KillsSource) << '\n'); + // Known sibling source value? Try an insertion. + std::tie(SVI, Inserted) = SibValues.insert( + std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI))); + // This is the first time we see Src, add it to the worklist. + if (Inserted) + WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); + propagateSiblingValue(SVI, VNI); + // Next work list item. + continue; + } + } + + // Track reachable reloads. + SVI->second.DefMI = MI; + SVI->second.SpillMBB = MI->getParent(); + int FI; + if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << "reload\n"); + propagateSiblingValue(SVI); + // Next work list item. + continue; + } + + // Potential remat candidate. + DEBUG(dbgs() << "def " << *MI); + SVI->second.AllDefsAreReloads = false; + propagateSiblingValue(SVI); + } while (!WorkList.empty()); + + // Look up the value we were looking for. We already did this lookup at the + // top of the function, but SibValues may have been invalidated. + SVI = SibValues.find(UseVNI); + assert(SVI != SibValues.end() && "Didn't compute requested info"); + DEBUG(dbgs() << " traced to:\t" << SVI->second); + return SVI->second.DefMI; +} + +/// analyzeSiblingValues - Trace values defined by sibling copies back to +/// something that isn't a sibling copy. /// -bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, - MachineInstr &CopyMI) { +/// Keep track of values that may be rematerializable. +void InlineSpiller::analyzeSiblingValues() { + SibValues.clear(); + + // No siblings at all? + if (Edit->getReg() == Original) + return; + + LiveInterval &OrigLI = LIS.getInterval(Original); + for (unsigned Reg : RegsToSpill) { + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::const_vni_iterator VI = LI.vni_begin(), + VE = LI.vni_end(); VI != VE; ++VI) { + VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; + MachineInstr *DefMI = nullptr; + if (!VNI->isPHIDef()) { + DefMI = LIS.getInstructionFromIndex(VNI->def); + assert(DefMI && "No defining instruction"); + } + // Check possible sibling copies. + if (VNI->isPHIDef() || DefMI->isCopy()) { + VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + assert(OrigVNI && "Def outside original live range"); + if (OrigVNI->def != VNI->def) + DefMI = traceSiblingValue(Reg, VNI, OrigVNI); + } + if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) { + DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' + << VNI->def << " may remat from " << *DefMI); + } + } + } +} + +/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert +/// a spill at a better location. +bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr &CopyMI) { SlotIndex Idx = LIS.getInstructionIndex(CopyMI); -#ifndef NDEBUG VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); -#endif - unsigned SrcReg = CopyMI.getOperand(1).getReg(); - LiveInterval &SrcLI = LIS.getInterval(SrcReg); - VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx); - LiveQueryResult SrcQ = SrcLI.Query(Idx); - MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(SrcVNI->def); - if (DefMBB != CopyMI.getParent() || !SrcQ.isKill()) + SibValueMap::iterator I = SibValues.find(VNI); + if (I == SibValues.end()) + return false; + + const SibValueInfo &SVI = I->second; + + // Let the normal folding code deal with the boring case. + if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI) return false; + // SpillReg may have been deleted by remat and DCE. + if (!LIS.hasInterval(SVI.SpillReg)) { + DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg); + if (!SibLI.containsValue(SVI.SpillVNI)) { + DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + // Conservatively extend the stack slot range to the range of the original // value. We may be able to do better with stack slot coloring by being more // careful here. @@ -364,29 +719,35 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " << *StackInt << '\n'); - // We are going to spill SrcVNI immediately after its def, so clear out + // Already spilled everywhere. + if (SVI.AllDefsAreReloads) { + DEBUG(dbgs() << "\tno spill needed: " << SVI); + ++NumOmitReloadSpill; + return true; + } + // We are going to spill SVI.SpillVNI immediately after its def, so clear out // any later spills of the same value. - eliminateRedundantSpills(SrcLI, SrcVNI); + eliminateRedundantSpills(SibLI, SVI.SpillVNI); - MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def); MachineBasicBlock::iterator MII; - if (SrcVNI->isPHIDef()) + if (SVI.SpillVNI->isPHIDef()) MII = MBB->SkipPHIsAndLabels(MBB->begin()); else { - MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def); + MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def); assert(DefMI && "Defining instruction disappeared"); MII = DefMI; ++MII; } // Insert spill without kill flag immediately after def. - TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot, - MRI.getRegClass(SrcReg), &TRI); + TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot, + MRI.getRegClass(SVI.SpillReg), &TRI); --MII; // Point to store instruction. LIS.InsertMachineInstrInMaps(*MII); - DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII); + DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); - HSpiller.addToMergeableSpills(&(*MII), StackSlot, Original); ++NumSpills; + ++NumHoists; return true; } @@ -444,8 +805,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { MI->setDesc(TII.get(TargetOpcode::KILL)); DeadDefs.push_back(MI); ++NumSpillsRemoved; - if (HSpiller.rmFromMergeableSpills(MI, StackSlot)) - --NumSpills; + --NumSpills; } } } while (!WorkList.empty()); @@ -516,12 +876,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { if (SnippetCopies.count(&MI)) return false; - LiveInterval &OrigLI = LIS.getInterval(Original); - VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); + // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy. LiveRangeEdit::Remat RM(ParentVNI); - RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def); - - if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) { + SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); + if (SibI != SibValues.end()) + RM.OrigMI = SibI->second.DefMI; + if (!Edit->canRematerializeAt(RM, UseIdx, false)) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); return false; @@ -571,6 +931,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { /// reMaterializeAll - Try to rematerialize as many uses as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { + // analyzeSiblingValues has already tested all relevant defining instructions. if (!Edit->anyRematerializable(AA)) return; @@ -656,9 +1017,6 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { if (InstrReg != Reg || FI != StackSlot) return false; - if (!IsLoad) - HSpiller.rmFromMergeableSpills(MI, StackSlot); - DEBUG(dbgs() << "Coalescing stack access: " << *MI); LIS.RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); @@ -783,9 +1141,6 @@ foldMemoryOperand(ArrayRef > Ops, LIS.removePhysRegDefAt(Reg, Idx); } - int FI; - if (TII.isStoreToStackSlot(MI, FI) && HSpiller.rmFromMergeableSpills(MI, FI)) - --NumSpills; LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); MI->eraseFromParent(); @@ -811,10 +1166,9 @@ foldMemoryOperand(ArrayRef > Ops, if (!WasCopy) ++NumFolded; - else if (Ops.front().second == 0) { + else if (Ops.front().second == 0) ++NumSpills; - HSpiller.addToMergeableSpills(FoldMI, StackSlot, Original); - } else + else ++NumReloads; return true; } @@ -849,7 +1203,6 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, "spill")); ++NumSpills; - HSpiller.addToMergeableSpills(std::next(MI), StackSlot, Original); } /// spillAroundUses - insert spill code around each use of Reg. @@ -913,7 +1266,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { continue; } if (RI.Writes) { - if (hoistSpillInsideBB(OldLI, *MI)) { + // Hoist the spill of a sib-reg copy. + if (hoistSpill(OldLI, *MI)) { // This COPY is now dead, the value is already in the stack slot. MI->getOperand(0).setIsDead(); DeadDefs.push_back(MI); @@ -1026,6 +1380,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); collectRegsToSpill(); + analyzeSiblingValues(); reMaterializeAll(); // Remat may handle everything. @@ -1034,394 +1389,3 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { Edit->calculateRegClassAndHint(MF, Loops, MBFI); } - -/// Optimizations after all the reg selections and spills are done. -/// -void InlineSpiller::postOptimization() { - SmallVector NewVRegs; - LiveRangeEdit LRE(nullptr, NewVRegs, MF, LIS, &VRM, nullptr); - HSpiller.hoistAllSpills(LRE); - assert(NewVRegs.size() == 0 && - "No new vregs should be generated in hoistAllSpills"); -} - -/// When a spill is inserted, add the spill to MergeableSpills map. -/// -void HoistSpillHelper::addToMergeableSpills(MachineInstr *Spill, int StackSlot, - unsigned Original) { - StackSlotToReg[StackSlot] = Original; - SlotIndex Idx = LIS.getInstructionIndex(*Spill); - VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot()); - std::pair MIdx = std::make_pair(StackSlot, OrigVNI); - MergeableSpills[MIdx].insert(Spill); -} - -/// When a spill is removed, remove the spill from MergeableSpills map. -/// Return true if the spill is removed successfully. -/// -bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr *Spill, - int StackSlot) { - int Original = StackSlotToReg[StackSlot]; - if (!Original) - return false; - SlotIndex Idx = LIS.getInstructionIndex(*Spill); - VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot()); - std::pair MIdx = std::make_pair(StackSlot, OrigVNI); - return MergeableSpills[MIdx].erase(Spill); -} - -/// Check BB to see if it is a possible target BB to place a hoisted spill, -/// i.e., there should be a living sibling of OrigReg at the insert point. -/// -bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, - MachineBasicBlock &BB, unsigned &LiveReg) { - SlotIndex Idx; - MachineBasicBlock::iterator MI = BB.getFirstTerminator(); - if (MI != BB.end()) - Idx = LIS.getInstructionIndex(*MI); - else - Idx = LIS.getMBBEndIdx(&BB).getPrevSlot(); - SmallSetVector &Siblings = Virt2SiblingsMap[OrigReg]; - assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI && - "Unexpected VNI"); - - for (auto const SibReg : Siblings) { - LiveInterval &LI = LIS.getInterval(SibReg); - VNInfo *VNI = LI.getVNInfoAt(Idx); - if (VNI) { - LiveReg = SibReg; - return true; - } - } - return false; -} - -/// Remove redundent spills in the same BB. Save those redundent spills in -/// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map. -/// -void HoistSpillHelper::rmRedundantSpills( - SmallPtrSet &Spills, - SmallVectorImpl &SpillsToRm, - DenseMap &SpillBBToSpill) { - // For each spill saw, check SpillBBToSpill[] and see if its BB already has - // another spill inside. If a BB contains more than one spill, only keep the - // earlier spill with smaller SlotIndex. - for (const auto CurrentSpill : Spills) { - MachineBasicBlock *Block = CurrentSpill->getParent(); - MachineDomTreeNode *Node = MDT.DT->getNode(Block); - MachineInstr *PrevSpill = SpillBBToSpill[Node]; - if (PrevSpill) { - SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill); - SlotIndex CIdx = LIS.getInstructionIndex(*CurrentSpill); - MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill; - MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill; - SpillsToRm.push_back(SpillToRm); - SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep; - } else { - SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill; - } - } - for (const auto SpillToRm : SpillsToRm) - Spills.erase(SpillToRm); -} - -/// Starting from \p Root find a top-down traversal order of the dominator -/// tree to visit all basic blocks containing the elements of \p Spills. -/// Redundant spills will be found and put into \p SpillsToRm at the same -/// time. \p SpillBBToSpill will be populated as part of the process and -/// maps a basic block to the first store occurring in the basic block. -/// \post SpillsToRm.union(Spills@post) == Spills@pre -/// -void HoistSpillHelper::getVisitOrders( - MachineBasicBlock *Root, SmallPtrSet &Spills, - SmallVectorImpl &Orders, - SmallVectorImpl &SpillsToRm, - DenseMap &SpillsToKeep, - DenseMap &SpillBBToSpill) { - // The set contains all the possible BB nodes to which we may hoist - // original spills. - SmallPtrSet WorkSet; - // Save the BB nodes on the path from the first BB node containing - // non-redundent spill to the Root node. - SmallPtrSet NodesOnPath; - // All the spills to be hoisted must originate from a single def instruction - // to the OrigReg. It means the def instruction should dominate all the spills - // to be hoisted. We choose the BB where the def instruction is located as - // the Root. - MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom(); - // For every node on the dominator tree with spill, walk up on the dominator - // tree towards the Root node until it is reached. If there is other node - // containing spill in the middle of the path, the previous spill saw will - // be redundent and the node containing it will be removed. All the nodes on - // the path starting from the first node with non-redundent spill to the Root - // node will be added to the WorkSet, which will contain all the possible - // locations where spills may be hoisted to after the loop below is done. - for (const auto Spill : Spills) { - MachineBasicBlock *Block = Spill->getParent(); - MachineDomTreeNode *Node = MDT[Block]; - MachineInstr *SpillToRm = nullptr; - while (Node != RootIDomNode) { - // If Node dominates Block, and it already contains a spill, the spill in - // Block will be redundent. - if (Node != MDT[Block] && SpillBBToSpill[Node]) { - SpillToRm = SpillBBToSpill[MDT[Block]]; - break; - /// If we see the Node already in WorkSet, the path from the Node to - /// the Root node must already be traversed by another spill. - /// Then no need to repeat. - } else if (WorkSet.count(Node)) { - break; - } else { - NodesOnPath.insert(Node); - } - Node = Node->getIDom(); - } - if (SpillToRm) { - SpillsToRm.push_back(SpillToRm); - } else { - // Add a BB containing the original spills to SpillsToKeep -- i.e., - // set the initial status before hoisting start. The value of BBs - // containing original spills is set to 0, in order to descriminate - // with BBs containing hoisted spills which will be inserted to - // SpillsToKeep later during hoisting. - SpillsToKeep[MDT[Block]] = 0; - WorkSet.insert(NodesOnPath.begin(), NodesOnPath.end()); - } - NodesOnPath.clear(); - } - - // Sort the nodes in WorkSet in top-down order and save the nodes - // in Orders. Orders will be used for hoisting in runHoistSpills. - unsigned idx = 0; - Orders.push_back(MDT.DT->getNode(Root)); - do { - MachineDomTreeNode *Node = Orders[idx++]; - const std::vector &Children = Node->getChildren(); - unsigned NumChildren = Children.size(); - for (unsigned i = 0; i != NumChildren; ++i) { - MachineDomTreeNode *Child = Children[i]; - if (WorkSet.count(Child)) - Orders.push_back(Child); - } - } while (idx != Orders.size()); - assert(Orders.size() == WorkSet.size() && - "Orders have different size with WorkSet"); - -#ifndef NDEBUG - DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n"); - SmallVector::reverse_iterator RIt = Orders.rbegin(); - for (; RIt != Orders.rend(); RIt++) - DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ","); - DEBUG(dbgs() << "\n"); -#endif -} - -/// Try to hoist spills according to BB hotness. The spills to removed will -/// be saved in \p SpillsToRm. The spills to be inserted will be saved in -/// \p SpillsToIns. -/// -void HoistSpillHelper::runHoistSpills( - unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet &Spills, - SmallVectorImpl &SpillsToRm, - DenseMap &SpillsToIns) { - // Visit order of dominator tree nodes. - SmallVector Orders; - // SpillsToKeep contains all the nodes where spills are to be inserted - // during hoisting. If the spill to be inserted is an original spill - // (not a hoisted one), the value of the map entry is 0. If the spill - // is a hoisted spill, the value of the map entry is the VReg to be used - // as the source of the spill. - DenseMap SpillsToKeep; - // Map from BB to the first spill inside of it. - DenseMap SpillBBToSpill; - - rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill); - - MachineBasicBlock *Root = LIS.getMBBFromIndex(OrigVNI.def); - getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep, - SpillBBToSpill); - - // SpillsInSubTree keeps the map from a dom tree node to a pair of - // nodes set and the cost of all the spills inside those nodes. - // The nodes set are the locations where spills are to be inserted - // in the subtree of current node. - typedef std::pair, BlockFrequency> - NodesCostPair; - DenseMap SpillsInSubTreeMap; - // Iterate Orders set in reverse order, which will be a bottom-up order - // in the dominator tree. Once we visit a dom tree node, we know its - // children have already been visited and the spill locations in the - // subtrees of all the children have been determined. - SmallVector::reverse_iterator RIt = Orders.rbegin(); - for (; RIt != Orders.rend(); RIt++) { - MachineBasicBlock *Block = (*RIt)->getBlock(); - SmallPtrSet &SpillsInSubTree = - SpillsInSubTreeMap[*RIt].first; - // Total spill costs inside the sub tree. - BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second; - - // If Block contains an original spill, simply continue. - if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() && !SpillsToKeep[*RIt]) { - SpillsInSubTree.insert(*RIt); - SubTreeCost = MBFI.getBlockFreq(Block); - continue; - } - - // Collect spills in subtree of current node (*RIt) to - // SpillsInSubTree. - const std::vector &Children = (*RIt)->getChildren(); - unsigned NumChildren = Children.size(); - for (unsigned i = 0; i != NumChildren; ++i) { - MachineDomTreeNode *Child = Children[i]; - SpillsInSubTree.insert(SpillsInSubTreeMap[Child].first.begin(), - SpillsInSubTreeMap[Child].first.end()); - SubTreeCost += SpillsInSubTreeMap[Child].second; - SpillsInSubTreeMap.erase(Child); - } - - // No spills in subtree, simply continue. - if (SpillsInSubTree.empty()) - continue; - - // Check whether Block is a possible candidate to insert spill. - unsigned LiveReg = 0; - if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg)) - continue; - - // If there are multiple spills that could be merged, bias a little - // to hoist the spill. - BranchProbability MarginProb = (SpillsInSubTree.size() > 1) - ? BranchProbability(9, 10) - : BranchProbability(1, 1); - if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) { - // Hoist: Move spills to current Block. - for (const auto SpillBB : SpillsInSubTree) { - // When SpillBB is a BB contains original spill, insert the spill - // to SpillsToRm. - if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() && - !SpillsToKeep[SpillBB]) { - MachineInstr *SpillToRm = SpillBBToSpill[SpillBB]; - SpillsToRm.push_back(SpillToRm); - } - // SpillBB will not contain spill anymore, remove it from SpillsToKeep. - SpillsToKeep.erase(SpillBB); - } - // Current Block is the BB containing the new hoisted spill. Add it to - // SpillsToKeep. LiveReg is the source of the new spill. - SpillsToKeep[*RIt] = LiveReg; - DEBUG({ - dbgs() << "spills in BB: "; - for (const auto Rspill : SpillsInSubTree) - dbgs() << Rspill->getBlock()->getNumber() << " "; - dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber() - << "\n"; - }); - SpillsInSubTree.clear(); - SpillsInSubTree.insert(*RIt); - SubTreeCost = MBFI.getBlockFreq(Block); - } - } - // For spills in SpillsToKeep with LiveReg set (i.e., not original spill), - // save them to SpillsToIns. - for (const auto Ent : SpillsToKeep) { - if (Ent.second) - SpillsToIns[Ent.first->getBlock()] = Ent.second; - } -} - -/// For spills with equal values, remove redundent spills and hoist the left -/// to less hot spots. -/// -/// Spills with equal values will be collected into the same set in -/// MergeableSpills when spill is inserted. These equal spills are originated -/// from the same define instruction and are dominated by the instruction. -/// Before hoisting all the equal spills, redundent spills inside in the same -/// BB is first marked to be deleted. Then starting from spills left, walk up -/// on the dominator tree towards the Root node where the define instruction -/// is located, mark the dominated spills to be deleted along the way and -/// collect the BB nodes on the path from non-dominated spills to the define -/// instruction into a WorkSet. The nodes in WorkSet are the candidate places -/// where we consider to hoist the spills. We iterate the WorkSet in bottom-up -/// order, and for each node, we will decide whether to hoist spills inside -/// its subtree to that node. In this way, we can get benefit locally even if -/// hoisting all the equal spills to one cold place is impossible. -/// -void HoistSpillHelper::hoistAllSpills(LiveRangeEdit &Edit) { - // Save the mapping between stackslot and its original reg. - DenseMap SlotToOrigReg; - for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - int Slot = VRM.getStackSlot(Reg); - if (Slot != VirtRegMap::NO_STACK_SLOT) - SlotToOrigReg[Slot] = VRM.getOriginal(Reg); - unsigned Original = VRM.getPreSplitReg(Reg); - if (!MRI.def_empty(Reg)) - Virt2SiblingsMap[Original].insert(Reg); - } - - // Each entry in MergeableSpills contains a spill set with equal values. - for (auto &Ent : MergeableSpills) { - int Slot = Ent.first.first; - unsigned OrigReg = SlotToOrigReg[Slot]; - VNInfo *OrigVNI = Ent.first.second; - SmallPtrSet &EqValSpills = Ent.second; - if (Ent.second.empty()) - continue; - - DEBUG({ - dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n" - << "Equal spills in BB: "; - for (const auto spill : EqValSpills) - dbgs() << spill->getParent()->getNumber() << " "; - dbgs() << "\n"; - }); - - // SpillsToRm is the spill set to be removed from EqValSpills. - SmallVector SpillsToRm; - // SpillsToIns is the spill set to be newly inserted after hoisting. - DenseMap SpillsToIns; - - runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns); - - DEBUG({ - dbgs() << "Finally inserted spills in BB: "; - for (const auto Ispill : SpillsToIns) - dbgs() << Ispill.first->getNumber() << " "; - dbgs() << "\nFinally removed spills in BB: "; - for (const auto Rspill : SpillsToRm) - dbgs() << Rspill->getParent()->getNumber() << " "; - dbgs() << "\n"; - }); - - // Stack live range update. - LiveInterval &StackIntvl = LSS.getInterval(Slot); - if (!SpillsToIns.empty() || !SpillsToRm.empty()) { - LiveInterval &OrigLI = LIS.getInterval(OrigReg); - StackIntvl.MergeValueInAsValue(OrigLI, OrigVNI, - StackIntvl.getValNumInfo(0)); - } - - // Insert hoisted spills. - for (auto const Insert : SpillsToIns) { - MachineBasicBlock *BB = Insert.first; - unsigned LiveReg = Insert.second; - MachineBasicBlock::iterator MI = BB->getFirstTerminator(); - TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot, - MRI.getRegClass(LiveReg), &TRI); - LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI); - ++NumSpills; - } - - // Remove redundent spills or change them to dead instructions. - NumSpills -= SpillsToRm.size(); - for (auto const RMEnt : SpillsToRm) { - RMEnt->setDesc(TII.get(TargetOpcode::KILL)); - for (unsigned i = RMEnt->getNumOperands(); i; --i) { - MachineOperand &MO = RMEnt->getOperand(i - 1); - if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead()) - RMEnt->RemoveOperand(i - 1); - } - } - Edit.eliminateDeadDefs(SpillsToRm, None, true); - } -} diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 5610c5a..72eafcd 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -63,13 +63,10 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { for (VNInfo *VNI : getParent().valnos) { if (VNI->isUnused()) continue; - unsigned Original = VRM->getOriginal(getReg()); - LiveInterval &OrigLI = LIS.getInterval(Original); - VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); - MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def); + MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); if (!DefMI) continue; - checkRematerializable(OrigVNI, DefMI, aa); + checkRematerializable(VNI, DefMI, aa); } ScannedRemattable = true; } @@ -116,18 +113,24 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, return true; } -bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI, - SlotIndex UseIdx, bool cheapAsAMove) { +bool LiveRangeEdit::canRematerializeAt(Remat &RM, + SlotIndex UseIdx, + bool cheapAsAMove) { assert(ScannedRemattable && "Call anyRematerializable first"); // Use scanRemattable info. - if (!Remattable.count(OrigVNI)) + if (!Remattable.count(RM.ParentVNI)) return false; // No defining instruction provided. SlotIndex DefIdx; - assert(RM.OrigMI && "No defining instruction for remattable value"); - DefIdx = LIS.getInstructionIndex(*RM.OrigMI); + if (RM.OrigMI) + DefIdx = LIS.getInstructionIndex(*RM.OrigMI); + else { + DefIdx = RM.ParentVNI->def; + RM.OrigMI = LIS.getInstructionFromIndex(DefIdx); + assert(RM.OrigMI && "No defining instruction for remattable value"); + } // If only cheap remats were requested, bail out early. if (cheapAsAMove && !TII.isAsCheapAsAMove(RM.OrigMI)) @@ -258,15 +261,6 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Collect virtual registers to be erased after MI is gone. SmallVector RegsToErase; bool ReadsPhysRegs = false; - bool isOrigDef = false; - unsigned Dest; - if (VRM && MI->getOperand(0).isReg()) { - Dest = MI->getOperand(0).getReg(); - unsigned Original = VRM->getOriginal(Dest); - LiveInterval &OrigLI = LIS.getInterval(Original); - VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); - isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx); - } // Check for live intervals that may shrink for (MachineInstr::mop_iterator MOI = MI->operands_begin(), @@ -320,24 +314,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { } DEBUG(dbgs() << "Converted physregs to:\t" << *MI); } else { - // If the dest of MI is an original reg, don't delete the inst. Replace - // the dest with a new reg, keep the inst for remat of other siblings. - // The inst is saved in LiveRangeEdit::DeadRemats and will be deleted - // after all the allocations of the func are done. - if (isOrigDef) { - unsigned NewDest = createFrom(Dest); - pop_back(); - markDeadRemat(MI); - const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); - MI->substituteRegister(Dest, NewDest, 0, TRI); - MI->getOperand(0).setIsDead(false); - } else { - if (TheDelegate) - TheDelegate->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(*MI); - MI->eraseFromParent(); - ++NumDCEDeleted; - } + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + ++NumDCEDeleted; } // Erase any virtregs that are now empty and unused. There may be @@ -351,9 +332,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { } } -void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, - ArrayRef RegsBeingSpilled, - bool NoSplit) { +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, + ArrayRef RegsBeingSpilled) { ToShrinkSet ToShrink; for (;;) { @@ -375,9 +355,6 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, if (!LIS.shrinkToUses(LI, &Dead)) continue; - if (NoSplit) - continue; - // Don't create new intervals for a register being spilled. // The new intervals would have to be spilled anyway so its not worth it. // Also they currently aren't spilled so creating them and not spilling diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index 1130d64..16ff48e 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -153,12 +153,3 @@ void RegAllocBase::allocatePhysRegs() { } } } - -void RegAllocBase::postOptimization() { - spiller().postOptimization(); - for (auto DeadInst : DeadRemats) { - LIS->RemoveMachineInstrFromMaps(*DeadInst); - DeadInst->eraseFromParent(); - } - DeadRemats.clear(); -} diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h index 296ffe8..659b8f5 100644 --- a/llvm/lib/CodeGen/RegAllocBase.h +++ b/llvm/lib/CodeGen/RegAllocBase.h @@ -65,12 +65,6 @@ protected: LiveRegMatrix *Matrix; RegisterClassInfo RegClassInfo; - /// Inst which is a def of an original reg and whose defs are already all - /// dead after remat is saved in DeadRemats. The deletion of such inst is - /// postponed till all the allocations are done, so its remat expr is - /// always available for the remat of all the siblings of the original reg. - SmallPtrSet DeadRemats; - RegAllocBase() : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {} @@ -83,10 +77,6 @@ protected: // physical register assignments. void allocatePhysRegs(); - // Include spiller post optimization and removing dead defs left because of - // rematerialization. - virtual void postOptimization(); - // Get a temporary reference to a Spiller instance. virtual Spiller &spiller() = 0; diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 11dfda6..cfe367d 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -199,7 +199,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, Matrix->unassign(Spill); // Spill the extracted interval. - LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats); + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); } return true; @@ -258,7 +258,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats); + LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell @@ -283,7 +283,6 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); - postOptimization(); // Diagnostic output before rewriting DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 4736da6..b243d43 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" @@ -32,7 +33,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -44,7 +44,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -56,14 +55,14 @@ STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); STATISTIC(NumEvicted, "Number of interferences evicted"); -static cl::opt SplitSpillMode( - "split-spill-mode", cl::Hidden, - cl::desc("Spill mode for splitting live ranges"), - cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), - clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), - clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), - clEnumValEnd), - cl::init(SplitEditor::SM_Speed)); +static cl::opt +SplitSpillMode("split-spill-mode", cl::Hidden, + cl::desc("Spill mode for splitting live ranges"), + cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), + clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), + clEnumValEnd), + cl::init(SplitEditor::SM_Partition)); static cl::opt LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, @@ -1466,7 +1465,7 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, SmallVectorImpl &NewVRegs) { SmallVector UsedCands; // Prepare split editor. - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); // Assign all edge bundles to the preferred candidate, or NoCand. @@ -1514,7 +1513,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -1586,7 +1585,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Always enable split spill mode, since we're effectively spilling to a // register. - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitEditor::SM_Size); ArrayRef Uses = SA->getUseSlots(); @@ -1909,7 +1908,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit); SE->openIntv(); @@ -2552,7 +2551,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, NewVRegs.push_back(VirtReg.reg); } else { NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); @@ -2610,8 +2609,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { allocatePhysRegs(); tryHintsRecoloring(); - postOptimization(); - releaseMemory(); return true; } diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index d1221ec..d5b0f96 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -123,12 +123,6 @@ private: RegSet VRegsToAlloc, EmptyIntervalVRegs; - /// Inst which is a def of an original reg and whose defs are already all - /// dead after remat is saved in DeadRemats. The deletion of such inst is - /// postponed till all the allocations are done, so its remat expr is - /// always available for the remat of all the siblings of the original reg. - SmallPtrSet DeadRemats; - /// \brief Finds the initial set of vreg intervals to allocate. void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); @@ -152,7 +146,6 @@ private: void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM) const; - void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS); }; char RegAllocPBQP::ID = 0; @@ -638,8 +631,7 @@ void RegAllocPBQP::spillVReg(unsigned VReg, VirtRegMap &VRM, Spiller &VRegSpiller) { VRegsToAlloc.erase(VReg); - LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM, - nullptr, &DeadRemats); + LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM); VRegSpiller.spill(LRE); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); @@ -721,16 +713,6 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, } } -void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) { - VRegSpiller.postOptimization(); - /// Remove dead defs because of rematerialization. - for (auto DeadInst : DeadRemats) { - LIS.RemoveMachineInstrFromMaps(*DeadInst); - DeadInst->eraseFromParent(); - } - DeadRemats.clear(); -} - static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr) { // All intervals have a spill weight that is mostly proportional to the number @@ -816,7 +798,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Finalise allocation, allocate empty ranges. finalizeAlloc(MF, LIS, VRM); - postOptimization(*VRegSpiller, LIS); VRegsToAlloc.clear(); EmptyIntervalVRegs.clear(); diff --git a/llvm/lib/CodeGen/Spiller.h b/llvm/lib/CodeGen/Spiller.h index 21037e6..08f99ec 100644 --- a/llvm/lib/CodeGen/Spiller.h +++ b/llvm/lib/CodeGen/Spiller.h @@ -16,7 +16,6 @@ namespace llvm { class MachineFunction; class MachineFunctionPass; class VirtRegMap; - class LiveIntervals; /// Spiller interface. /// @@ -29,7 +28,7 @@ namespace llvm { /// spill - Spill the LRE.getParent() live interval. virtual void spill(LiveRangeEdit &LRE) = 0; - virtual void postOptimization() {}; + }; /// Create and return a spiller that will insert spill code directly instead @@ -37,6 +36,7 @@ namespace llvm { Spiller *createInlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm); + } #endif diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 0289519..5be82b8 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -431,13 +430,8 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, bool Late = RegIdx != 0; // Attempt cheap-as-a-copy rematerialization. - unsigned Original = VRM.getOriginal(Edit->get(RegIdx)); - LiveInterval &OrigLI = LIS.getInterval(Original); - VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); LiveRangeEdit::Remat RM(ParentVNI); - RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def); - - if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) { + if (Edit->canRematerializeAt(RM, UseIdx, true)) { Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); ++NumRemats; } else { @@ -722,62 +716,7 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, } } -void SplitEditor::computeRedundantBackCopies( - DenseSet &NotToHoistSet, SmallVectorImpl &BackCopies) { - LiveInterval *LI = &LIS.getInterval(Edit->get(0)); - LiveInterval *Parent = &Edit->getParent(); - SmallVector, 8> EqualVNs(Parent->getNumValNums()); - SmallPtrSet DominatedVNIs; - - // Aggregate VNIs having the same value as ParentVNI. - for (VNInfo *VNI : LI->valnos) { - if (VNI->isUnused()) - continue; - VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); - EqualVNs[ParentVNI->id].insert(VNI); - } - - // For VNI aggregation of each ParentVNI, collect dominated, i.e., - // redundant VNIs to BackCopies. - for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) { - VNInfo *ParentVNI = Parent->getValNumInfo(i); - if (!NotToHoistSet.count(ParentVNI->id)) - continue; - SmallPtrSetIterator It1 = EqualVNs[ParentVNI->id].begin(); - SmallPtrSetIterator It2 = It1; - for (; It1 != EqualVNs[ParentVNI->id].end(); ++It1) { - It2 = It1; - for (++It2; It2 != EqualVNs[ParentVNI->id].end(); ++It2) { - if (DominatedVNIs.count(*It1) || DominatedVNIs.count(*It2)) - continue; - - MachineBasicBlock *MBB1 = LIS.getMBBFromIndex((*It1)->def); - MachineBasicBlock *MBB2 = LIS.getMBBFromIndex((*It2)->def); - if (MBB1 == MBB2) { - DominatedVNIs.insert((*It1)->def < (*It2)->def ? (*It2) : (*It1)); - } else if (MDT.dominates(MBB1, MBB2)) { - DominatedVNIs.insert(*It2); - } else if (MDT.dominates(MBB2, MBB1)) { - DominatedVNIs.insert(*It1); - } - } - } - if (!DominatedVNIs.empty()) { - forceRecompute(0, ParentVNI); - for (auto VNI : DominatedVNIs) { - BackCopies.push_back(VNI); - } - DominatedVNIs.clear(); - } - } -} - -/// For SM_Size mode, find a common dominator for all the back-copies for -/// the same ParentVNI and hoist the backcopies to the dominator BB. -/// For SM_Speed mode, if the common dominator is hot and it is not beneficial -/// to do the hoisting, simply remove the dominated backcopies for the same -/// ParentVNI. -void SplitEditor::hoistCopies() { +void SplitEditor::hoistCopiesForSize() { // Get the complement interval, always RegIdx 0. LiveInterval *LI = &LIS.getInterval(Edit->get(0)); LiveInterval *Parent = &Edit->getParent(); @@ -786,11 +725,6 @@ void SplitEditor::hoistCopies() { // indexed by ParentVNI->id. typedef std::pair DomPair; SmallVector NearestDom(Parent->getNumValNums()); - // The total cost of all the back-copies for each ParentVNI. - SmallVector Costs(Parent->getNumValNums()); - // The ParentVNI->id set for which hoisting back-copies are not beneficial - // for Speed. - DenseSet NotToHoistSet; // Find the nearest common dominator for parent values with multiple // back-copies. If a single back-copy dominates, put it in DomPair.second. @@ -806,7 +740,6 @@ void SplitEditor::hoistCopies() { continue; MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def); - DomPair &Dom = NearestDom[ParentVNI->id]; // Keep directly defined parent values. This is either a PHI or an @@ -841,7 +774,6 @@ void SplitEditor::hoistCopies() { else if (Near != Dom.first) // None dominate. Hoist to common dominator, need new def. Dom = DomPair(Near, SlotIndex()); - Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB); } DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def @@ -860,11 +792,6 @@ void SplitEditor::hoistCopies() { MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def); // Get a less loopy dominator than Dom.first. Dom.first = findShallowDominator(Dom.first, DefMBB); - if (SpillMode == SM_Speed && - MBFI.getBlockFreq(Dom.first) > Costs[ParentVNI->id]) { - NotToHoistSet.insert(ParentVNI->id); - continue; - } SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot(); Dom.second = defFromParent(0, ParentVNI, Last, *Dom.first, @@ -879,18 +806,11 @@ void SplitEditor::hoistCopies() { continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); const DomPair &Dom = NearestDom[ParentVNI->id]; - if (!Dom.first || Dom.second == VNI->def || - NotToHoistSet.count(ParentVNI->id)) + if (!Dom.first || Dom.second == VNI->def) continue; BackCopies.push_back(VNI); forceRecompute(0, ParentVNI); } - - // If it is not beneficial to hoist all the BackCopies, simply remove - // redundant BackCopies in speed mode. - if (SpillMode == SM_Speed && !NotToHoistSet.empty()) - computeRedundantBackCopies(NotToHoistSet, BackCopies); - removeBackCopies(BackCopies); } @@ -1084,8 +1004,6 @@ void SplitEditor::deleteRematVictims() { // Dead defs end at the dead slot. if (S.end != S.valno->def.getDeadSlot()) continue; - if (S.valno->isPHIDef()) - continue; MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def); assert(MI && "Missing instruction for dead def"); MI->addRegisterDead(LI->reg, &TRI); @@ -1130,9 +1048,10 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { // Leave all back-copies as is. break; case SM_Size: + hoistCopiesForSize(); + break; case SM_Speed: - // hoistCopies will behave differently between size and speed. - hoistCopies(); + llvm_unreachable("Spill mode 'speed' not implemented yet"); } // Transfer the simply mapped values, check if any are skipped. diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h index 6bff9e8..69c65ff 100644 --- a/llvm/lib/CodeGen/SplitKit.h +++ b/llvm/lib/CodeGen/SplitKit.h @@ -18,7 +18,6 @@ #include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -330,14 +329,9 @@ private: MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB, MachineBasicBlock *DefMBB); - /// Find out all the backCopies dominated by others. - void computeRedundantBackCopies(DenseSet &NotToHoistSet, - SmallVectorImpl &BackCopies); - - /// Hoist back-copies to the complement interval. It tries to hoist all - /// the back-copies to one BB if it is beneficial, or else simply remove - /// redundent backcopies dominated by others. - void hoistCopies(); + /// hoistCopiesForSize - Hoist back-copies to the complement interval in a + /// way that minimizes code size. This implements the SM_Size spill mode. + void hoistCopiesForSize(); /// transferValues - Transfer values to the new ranges. /// Return true if any ranges were skipped. diff --git a/llvm/test/CodeGen/AArch64/aarch64-deferred-spilling.ll b/llvm/test/CodeGen/AArch64/aarch64-deferred-spilling.ll new file mode 100644 index 0000000..2dd6d35 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-deferred-spilling.ll @@ -0,0 +1,514 @@ +;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=true -mcpu=cortex-a57 -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED +;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=false -mcpu=cortex-a57 -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR + +; Check that we do not end up with useless spill code. +; +; Move to the basic block we are interested in. +; +; CHECK: // %if.then.120 +; +; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill +; Check that w21 wouldn't need to be spilled since it is never reused. +; REGULAR-NOT: {{[wx]}}21{{,?}} +; +; Check that w22 is used to carry a value through the call. +; DEFERRED-NOT: str {{[wx]}}22, +; DEFERRED: mov {{[wx]}}22, +; DEFERRED-NOT: str {{[wx]}}22, +; +; CHECK: bl fprintf +; +; DEFERRED-NOT: ldr {{[wx]}}22, +; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22 +; DEFERRED-NOT: ldr {{[wx]}}22, +; +; REGULAR-NOT: {{[wx]}}21{{,?}} +; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload +; +; End of the basic block we are interested in. +; CHECK: b +; CHECK: {{[^:]+}}: // %sw.bb.123 + +%struct.__sFILE = type { i8*, i32, i32, i32, i32, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } +%struct.__sbuf = type { i8*, i64 } +%struct.DState = type { %struct.bz_stream*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* } +%struct.bz_stream = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* } + +@__sF = external global [0 x %struct.__sFILE], align 8 +@.str = private unnamed_addr constant [20 x i8] c"\0A [%d: stuff+mf \00", align 1 + +declare i32 @fprintf(%struct.__sFILE* nocapture, i8* nocapture readonly, ...) + +declare void @bar(i32) + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) + +define i32 @foo(%struct.DState* %s) { +entry: + %state = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 1 + %tmp = load i32, i32* %state, align 4 + %cmp = icmp eq i32 %tmp, 10 + %save_i = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 40 + br i1 %cmp, label %if.end.thread, label %if.end + +if.end.thread: ; preds = %entry + %save_j = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41 + %save_t = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42 + %save_alphaSize = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43 + %save_nGroups = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44 + %save_nSelectors = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45 + %save_EOB = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46 + %save_groupNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47 + %save_groupPos = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48 + %save_nextSym = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49 + %save_nblockMAX = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50 + %save_nblock = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51 + %save_es = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52 + %save_N = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53 + %save_curr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54 + %save_zt = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55 + %save_zn = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56 + %save_zvec = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57 + %save_zj = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58 + %tmp1 = bitcast i32* %save_i to i8* + call void @llvm.memset.p0i8.i64(i8* %tmp1, i8 0, i64 108, i32 4, i1 false) + br label %sw.default + +if.end: ; preds = %entry + %.pre = load i32, i32* %save_i, align 4 + %save_j3.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41 + %.pre406 = load i32, i32* %save_j3.phi.trans.insert, align 4 + %save_t4.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42 + %.pre407 = load i32, i32* %save_t4.phi.trans.insert, align 4 + %save_alphaSize5.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43 + %.pre408 = load i32, i32* %save_alphaSize5.phi.trans.insert, align 4 + %save_nGroups6.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44 + %.pre409 = load i32, i32* %save_nGroups6.phi.trans.insert, align 4 + %save_nSelectors7.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45 + %.pre410 = load i32, i32* %save_nSelectors7.phi.trans.insert, align 4 + %save_EOB8.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46 + %.pre411 = load i32, i32* %save_EOB8.phi.trans.insert, align 4 + %save_groupNo9.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47 + %.pre412 = load i32, i32* %save_groupNo9.phi.trans.insert, align 4 + %save_groupPos10.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48 + %.pre413 = load i32, i32* %save_groupPos10.phi.trans.insert, align 4 + %save_nextSym11.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49 + %.pre414 = load i32, i32* %save_nextSym11.phi.trans.insert, align 4 + %save_nblockMAX12.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50 + %.pre415 = load i32, i32* %save_nblockMAX12.phi.trans.insert, align 4 + %save_nblock13.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51 + %.pre416 = load i32, i32* %save_nblock13.phi.trans.insert, align 4 + %save_es14.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52 + %.pre417 = load i32, i32* %save_es14.phi.trans.insert, align 4 + %save_N15.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53 + %.pre418 = load i32, i32* %save_N15.phi.trans.insert, align 4 + %save_curr16.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54 + %.pre419 = load i32, i32* %save_curr16.phi.trans.insert, align 4 + %save_zt17.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55 + %.pre420 = load i32, i32* %save_zt17.phi.trans.insert, align 4 + %save_zn18.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56 + %.pre421 = load i32, i32* %save_zn18.phi.trans.insert, align 4 + %save_zvec19.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57 + %.pre422 = load i32, i32* %save_zvec19.phi.trans.insert, align 4 + %save_zj20.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58 + %.pre423 = load i32, i32* %save_zj20.phi.trans.insert, align 4 + switch i32 %tmp, label %sw.default [ + i32 13, label %sw.bb + i32 14, label %if.end.sw.bb.65_crit_edge + i32 25, label %if.end.sw.bb.123_crit_edge + ] + +if.end.sw.bb.123_crit_edge: ; preds = %if.end + %.pre433 = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8 + br label %sw.bb.123 + +if.end.sw.bb.65_crit_edge: ; preds = %if.end + %bsLive69.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8 + %.pre426 = load i32, i32* %bsLive69.phi.trans.insert, align 4 + br label %sw.bb.65 + +sw.bb: ; preds = %if.end + %sunkaddr = ptrtoint %struct.DState* %s to i64 + %sunkaddr485 = add i64 %sunkaddr, 8 + %sunkaddr486 = inttoptr i64 %sunkaddr485 to i32* + store i32 13, i32* %sunkaddr486, align 4 + %bsLive = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8 + %tmp2 = load i32, i32* %bsLive, align 4 + %cmp28.400 = icmp sgt i32 %tmp2, 7 + br i1 %cmp28.400, label %sw.bb.if.then.29_crit_edge, label %if.end.33.lr.ph + +sw.bb.if.then.29_crit_edge: ; preds = %sw.bb + %sunkaddr487 = ptrtoint %struct.DState* %s to i64 + %sunkaddr488 = add i64 %sunkaddr487, 32 + %sunkaddr489 = inttoptr i64 %sunkaddr488 to i32* + %.pre425 = load i32, i32* %sunkaddr489, align 4 + br label %if.then.29 + +if.end.33.lr.ph: ; preds = %sw.bb + %tmp3 = bitcast %struct.DState* %s to %struct.bz_stream** + %.pre424 = load %struct.bz_stream*, %struct.bz_stream** %tmp3, align 8 + %avail_in.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre424, i64 0, i32 1 + %.pre430 = load i32, i32* %avail_in.phi.trans.insert, align 4 + %tmp4 = add i32 %.pre430, -1 + br label %if.end.33 + +if.then.29: ; preds = %while.body.backedge, %sw.bb.if.then.29_crit_edge + %tmp5 = phi i32 [ %.pre425, %sw.bb.if.then.29_crit_edge ], [ %or, %while.body.backedge ] + %.lcssa393 = phi i32 [ %tmp2, %sw.bb.if.then.29_crit_edge ], [ %add, %while.body.backedge ] + %sub = add nsw i32 %.lcssa393, -8 + %shr = lshr i32 %tmp5, %sub + %and = and i32 %shr, 255 + %sunkaddr491 = ptrtoint %struct.DState* %s to i64 + %sunkaddr492 = add i64 %sunkaddr491, 36 + %sunkaddr493 = inttoptr i64 %sunkaddr492 to i32* + store i32 %sub, i32* %sunkaddr493, align 4 + %blockSize100k = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 9 + store i32 %and, i32* %blockSize100k, align 4 + %and.off = add nsw i32 %and, -49 + %tmp6 = icmp ugt i32 %and.off, 8 + br i1 %tmp6, label %save_state_and_return, label %if.end.62 + +if.end.33: ; preds = %while.body.backedge, %if.end.33.lr.ph + %lsr.iv482 = phi i32 [ %tmp4, %if.end.33.lr.ph ], [ %lsr.iv.next483, %while.body.backedge ] + %tmp7 = phi i32 [ %tmp2, %if.end.33.lr.ph ], [ %add, %while.body.backedge ] + %cmp35 = icmp eq i32 %lsr.iv482, -1 + br i1 %cmp35, label %save_state_and_return, label %if.end.37 + +if.end.37: ; preds = %if.end.33 + %tmp8 = bitcast %struct.bz_stream* %.pre424 to i8** + %sunkaddr494 = ptrtoint %struct.DState* %s to i64 + %sunkaddr495 = add i64 %sunkaddr494, 32 + %sunkaddr496 = inttoptr i64 %sunkaddr495 to i32* + %tmp9 = load i32, i32* %sunkaddr496, align 4 + %shl = shl i32 %tmp9, 8 + %tmp10 = load i8*, i8** %tmp8, align 8 + %tmp11 = load i8, i8* %tmp10, align 1 + %conv = zext i8 %tmp11 to i32 + %or = or i32 %conv, %shl + store i32 %or, i32* %sunkaddr496, align 4 + %add = add nsw i32 %tmp7, 8 + %sunkaddr497 = ptrtoint %struct.DState* %s to i64 + %sunkaddr498 = add i64 %sunkaddr497, 36 + %sunkaddr499 = inttoptr i64 %sunkaddr498 to i32* + store i32 %add, i32* %sunkaddr499, align 4 + %incdec.ptr = getelementptr inbounds i8, i8* %tmp10, i64 1 + store i8* %incdec.ptr, i8** %tmp8, align 8 + %sunkaddr500 = ptrtoint %struct.bz_stream* %.pre424 to i64 + %sunkaddr501 = add i64 %sunkaddr500, 8 + %sunkaddr502 = inttoptr i64 %sunkaddr501 to i32* + store i32 %lsr.iv482, i32* %sunkaddr502, align 4 + %sunkaddr503 = ptrtoint %struct.bz_stream* %.pre424 to i64 + %sunkaddr504 = add i64 %sunkaddr503, 12 + %sunkaddr505 = inttoptr i64 %sunkaddr504 to i32* + %tmp12 = load i32, i32* %sunkaddr505, align 4 + %inc = add i32 %tmp12, 1 + store i32 %inc, i32* %sunkaddr505, align 4 + %cmp49 = icmp eq i32 %inc, 0 + br i1 %cmp49, label %if.then.51, label %while.body.backedge + +if.then.51: ; preds = %if.end.37 + %sunkaddr506 = ptrtoint %struct.bz_stream* %.pre424 to i64 + %sunkaddr507 = add i64 %sunkaddr506, 16 + %sunkaddr508 = inttoptr i64 %sunkaddr507 to i32* + %tmp13 = load i32, i32* %sunkaddr508, align 4 + %inc53 = add i32 %tmp13, 1 + store i32 %inc53, i32* %sunkaddr508, align 4 + br label %while.body.backedge + +while.body.backedge: ; preds = %if.then.51, %if.end.37 + %lsr.iv.next483 = add i32 %lsr.iv482, -1 + %cmp28 = icmp sgt i32 %add, 7 + br i1 %cmp28, label %if.then.29, label %if.end.33 + +if.end.62: ; preds = %if.then.29 + %sub64 = add nsw i32 %and, -48 + %sunkaddr509 = ptrtoint %struct.DState* %s to i64 + %sunkaddr510 = add i64 %sunkaddr509, 40 + %sunkaddr511 = inttoptr i64 %sunkaddr510 to i32* + store i32 %sub64, i32* %sunkaddr511, align 4 + br label %sw.bb.65 + +sw.bb.65: ; preds = %if.end.62, %if.end.sw.bb.65_crit_edge + %bsLive69.pre-phi = phi i32* [ %bsLive69.phi.trans.insert, %if.end.sw.bb.65_crit_edge ], [ %bsLive, %if.end.62 ] + %tmp14 = phi i32 [ %.pre426, %if.end.sw.bb.65_crit_edge ], [ %sub, %if.end.62 ] + %sunkaddr512 = ptrtoint %struct.DState* %s to i64 + %sunkaddr513 = add i64 %sunkaddr512, 8 + %sunkaddr514 = inttoptr i64 %sunkaddr513 to i32* + store i32 14, i32* %sunkaddr514, align 4 + %cmp70.397 = icmp sgt i32 %tmp14, 7 + br i1 %cmp70.397, label %if.then.72, label %if.end.82.lr.ph + +if.end.82.lr.ph: ; preds = %sw.bb.65 + %tmp15 = bitcast %struct.DState* %s to %struct.bz_stream** + %.pre427 = load %struct.bz_stream*, %struct.bz_stream** %tmp15, align 8 + %avail_in84.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre427, i64 0, i32 1 + %.pre431 = load i32, i32* %avail_in84.phi.trans.insert, align 4 + %tmp16 = add i32 %.pre431, -1 + br label %if.end.82 + +if.then.72: ; preds = %while.body.68.backedge, %sw.bb.65 + %.lcssa390 = phi i32 [ %tmp14, %sw.bb.65 ], [ %add97, %while.body.68.backedge ] + %sub76 = add nsw i32 %.lcssa390, -8 + %sunkaddr516 = ptrtoint %struct.DState* %s to i64 + %sunkaddr517 = add i64 %sunkaddr516, 36 + %sunkaddr518 = inttoptr i64 %sunkaddr517 to i32* + store i32 %sub76, i32* %sunkaddr518, align 4 + %currBlockNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 11 + %tmp17 = load i32, i32* %currBlockNo, align 4 + %inc117 = add nsw i32 %tmp17, 1 + store i32 %inc117, i32* %currBlockNo, align 4 + %verbosity = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 12 + %tmp18 = load i32, i32* %verbosity, align 4 + %cmp118 = icmp sgt i32 %tmp18, 1 + br i1 %cmp118, label %if.then.120, label %sw.bb.123, !prof !0 + +if.end.82: ; preds = %while.body.68.backedge, %if.end.82.lr.ph + %lsr.iv480 = phi i32 [ %tmp16, %if.end.82.lr.ph ], [ %lsr.iv.next481, %while.body.68.backedge ] + %tmp19 = phi i32 [ %tmp14, %if.end.82.lr.ph ], [ %add97, %while.body.68.backedge ] + %cmp85 = icmp eq i32 %lsr.iv480, -1 + br i1 %cmp85, label %save_state_and_return, label %if.end.88 + +if.end.88: ; preds = %if.end.82 + %tmp20 = bitcast %struct.bz_stream* %.pre427 to i8** + %sunkaddr519 = ptrtoint %struct.DState* %s to i64 + %sunkaddr520 = add i64 %sunkaddr519, 32 + %sunkaddr521 = inttoptr i64 %sunkaddr520 to i32* + %tmp21 = load i32, i32* %sunkaddr521, align 4 + %shl90 = shl i32 %tmp21, 8 + %tmp22 = load i8*, i8** %tmp20, align 8 + %tmp23 = load i8, i8* %tmp22, align 1 + %conv93 = zext i8 %tmp23 to i32 + %or94 = or i32 %conv93, %shl90 + store i32 %or94, i32* %sunkaddr521, align 4 + %add97 = add nsw i32 %tmp19, 8 + %sunkaddr522 = ptrtoint %struct.DState* %s to i64 + %sunkaddr523 = add i64 %sunkaddr522, 36 + %sunkaddr524 = inttoptr i64 %sunkaddr523 to i32* + store i32 %add97, i32* %sunkaddr524, align 4 + %incdec.ptr100 = getelementptr inbounds i8, i8* %tmp22, i64 1 + store i8* %incdec.ptr100, i8** %tmp20, align 8 + %sunkaddr525 = ptrtoint %struct.bz_stream* %.pre427 to i64 + %sunkaddr526 = add i64 %sunkaddr525, 8 + %sunkaddr527 = inttoptr i64 %sunkaddr526 to i32* + store i32 %lsr.iv480, i32* %sunkaddr527, align 4 + %sunkaddr528 = ptrtoint %struct.bz_stream* %.pre427 to i64 + %sunkaddr529 = add i64 %sunkaddr528, 12 + %sunkaddr530 = inttoptr i64 %sunkaddr529 to i32* + %tmp24 = load i32, i32* %sunkaddr530, align 4 + %inc106 = add i32 %tmp24, 1 + store i32 %inc106, i32* %sunkaddr530, align 4 + %cmp109 = icmp eq i32 %inc106, 0 + br i1 %cmp109, label %if.then.111, label %while.body.68.backedge + +if.then.111: ; preds = %if.end.88 + %sunkaddr531 = ptrtoint %struct.bz_stream* %.pre427 to i64 + %sunkaddr532 = add i64 %sunkaddr531, 16 + %sunkaddr533 = inttoptr i64 %sunkaddr532 to i32* + %tmp25 = load i32, i32* %sunkaddr533, align 4 + %inc114 = add i32 %tmp25, 1 + store i32 %inc114, i32* %sunkaddr533, align 4 + br label %while.body.68.backedge + +while.body.68.backedge: ; preds = %if.then.111, %if.end.88 + %lsr.iv.next481 = add i32 %lsr.iv480, -1 + %cmp70 = icmp sgt i32 %add97, 7 + br i1 %cmp70, label %if.then.72, label %if.end.82 + +if.then.120: ; preds = %if.then.72 + %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* getelementptr inbounds ([0 x %struct.__sFILE], [0 x %struct.__sFILE]* @__sF, i64 0, i64 2), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i64 0, i64 0), i32 %inc117) + br label %sw.bb.123 + +sw.bb.123: ; preds = %if.then.120, %if.then.72, %if.end.sw.bb.123_crit_edge + %bsLive127.pre-phi = phi i32* [ %.pre433, %if.end.sw.bb.123_crit_edge ], [ %bsLive69.pre-phi, %if.then.72 ], [ %bsLive69.pre-phi, %if.then.120 ] + %sunkaddr534 = ptrtoint %struct.DState* %s to i64 + %sunkaddr535 = add i64 %sunkaddr534, 8 + %sunkaddr536 = inttoptr i64 %sunkaddr535 to i32* + store i32 25, i32* %sunkaddr536, align 4 + %tmp26 = load i32, i32* %bsLive127.pre-phi, align 4 + %cmp128.395 = icmp sgt i32 %tmp26, 7 + br i1 %cmp128.395, label %sw.bb.123.if.then.130_crit_edge, label %if.end.140.lr.ph + +sw.bb.123.if.then.130_crit_edge: ; preds = %sw.bb.123 + %sunkaddr537 = ptrtoint %struct.DState* %s to i64 + %sunkaddr538 = add i64 %sunkaddr537, 32 + %sunkaddr539 = inttoptr i64 %sunkaddr538 to i32* + %.pre429 = load i32, i32* %sunkaddr539, align 4 + br label %if.then.130 + +if.end.140.lr.ph: ; preds = %sw.bb.123 + %tmp27 = bitcast %struct.DState* %s to %struct.bz_stream** + %.pre428 = load %struct.bz_stream*, %struct.bz_stream** %tmp27, align 8 + %avail_in142.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre428, i64 0, i32 1 + %.pre432 = load i32, i32* %avail_in142.phi.trans.insert, align 4 + %tmp28 = add i32 %.pre432, -1 + br label %if.end.140 + +if.then.130: ; preds = %while.body.126.backedge, %sw.bb.123.if.then.130_crit_edge + %tmp29 = phi i32 [ %.pre429, %sw.bb.123.if.then.130_crit_edge ], [ %or152, %while.body.126.backedge ] + %.lcssa = phi i32 [ %tmp26, %sw.bb.123.if.then.130_crit_edge ], [ %add155, %while.body.126.backedge ] + %sub134 = add nsw i32 %.lcssa, -8 + %shr135 = lshr i32 %tmp29, %sub134 + store i32 %sub134, i32* %bsLive127.pre-phi, align 4 + %origPtr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 13 + %tmp30 = load i32, i32* %origPtr, align 4 + %shl175 = shl i32 %tmp30, 8 + %conv176 = and i32 %shr135, 255 + %or177 = or i32 %shl175, %conv176 + store i32 %or177, i32* %origPtr, align 4 + %nInUse = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 27 + %tmp31 = load i32, i32* %nInUse, align 4 + %add179 = add nsw i32 %tmp31, 2 + br label %save_state_and_return + +if.end.140: ; preds = %while.body.126.backedge, %if.end.140.lr.ph + %lsr.iv = phi i32 [ %tmp28, %if.end.140.lr.ph ], [ %lsr.iv.next, %while.body.126.backedge ] + %tmp32 = phi i32 [ %tmp26, %if.end.140.lr.ph ], [ %add155, %while.body.126.backedge ] + %cmp143 = icmp eq i32 %lsr.iv, -1 + br i1 %cmp143, label %save_state_and_return, label %if.end.146 + +if.end.146: ; preds = %if.end.140 + %tmp33 = bitcast %struct.bz_stream* %.pre428 to i8** + %sunkaddr541 = ptrtoint %struct.DState* %s to i64 + %sunkaddr542 = add i64 %sunkaddr541, 32 + %sunkaddr543 = inttoptr i64 %sunkaddr542 to i32* + %tmp34 = load i32, i32* %sunkaddr543, align 4 + %shl148 = shl i32 %tmp34, 8 + %tmp35 = load i8*, i8** %tmp33, align 8 + %tmp36 = load i8, i8* %tmp35, align 1 + %conv151 = zext i8 %tmp36 to i32 + %or152 = or i32 %conv151, %shl148 + store i32 %or152, i32* %sunkaddr543, align 4 + %add155 = add nsw i32 %tmp32, 8 + store i32 %add155, i32* %bsLive127.pre-phi, align 4 + %incdec.ptr158 = getelementptr inbounds i8, i8* %tmp35, i64 1 + store i8* %incdec.ptr158, i8** %tmp33, align 8 + %sunkaddr544 = ptrtoint %struct.bz_stream* %.pre428 to i64 + %sunkaddr545 = add i64 %sunkaddr544, 8 + %sunkaddr546 = inttoptr i64 %sunkaddr545 to i32* + store i32 %lsr.iv, i32* %sunkaddr546, align 4 + %sunkaddr547 = ptrtoint %struct.bz_stream* %.pre428 to i64 + %sunkaddr548 = add i64 %sunkaddr547, 12 + %sunkaddr549 = inttoptr i64 %sunkaddr548 to i32* + %tmp37 = load i32, i32* %sunkaddr549, align 4 + %inc164 = add i32 %tmp37, 1 + store i32 %inc164, i32* %sunkaddr549, align 4 + %cmp167 = icmp eq i32 %inc164, 0 + br i1 %cmp167, label %if.then.169, label %while.body.126.backedge + +if.then.169: ; preds = %if.end.146 + %sunkaddr550 = ptrtoint %struct.bz_stream* %.pre428 to i64 + %sunkaddr551 = add i64 %sunkaddr550, 16 + %sunkaddr552 = inttoptr i64 %sunkaddr551 to i32* + %tmp38 = load i32, i32* %sunkaddr552, align 4 + %inc172 = add i32 %tmp38, 1 + store i32 %inc172, i32* %sunkaddr552, align 4 + br label %while.body.126.backedge + +while.body.126.backedge: ; preds = %if.then.169, %if.end.146 + %lsr.iv.next = add i32 %lsr.iv, -1 + %cmp128 = icmp sgt i32 %add155, 7 + br i1 %cmp128, label %if.then.130, label %if.end.140 + +sw.default: ; preds = %if.end, %if.end.thread + %tmp39 = phi i32 [ 0, %if.end.thread ], [ %.pre, %if.end ] + %tmp40 = phi i32 [ 0, %if.end.thread ], [ %.pre406, %if.end ] + %tmp41 = phi i32 [ 0, %if.end.thread ], [ %.pre407, %if.end ] + %tmp42 = phi i32 [ 0, %if.end.thread ], [ %.pre408, %if.end ] + %tmp43 = phi i32 [ 0, %if.end.thread ], [ %.pre409, %if.end ] + %tmp44 = phi i32 [ 0, %if.end.thread ], [ %.pre410, %if.end ] + %tmp45 = phi i32 [ 0, %if.end.thread ], [ %.pre411, %if.end ] + %tmp46 = phi i32 [ 0, %if.end.thread ], [ %.pre412, %if.end ] + %tmp47 = phi i32 [ 0, %if.end.thread ], [ %.pre413, %if.end ] + %tmp48 = phi i32 [ 0, %if.end.thread ], [ %.pre414, %if.end ] + %tmp49 = phi i32 [ 0, %if.end.thread ], [ %.pre415, %if.end ] + %tmp50 = phi i32 [ 0, %if.end.thread ], [ %.pre416, %if.end ] + %tmp51 = phi i32 [ 0, %if.end.thread ], [ %.pre417, %if.end ] + %tmp52 = phi i32 [ 0, %if.end.thread ], [ %.pre418, %if.end ] + %tmp53 = phi i32 [ 0, %if.end.thread ], [ %.pre419, %if.end ] + %tmp54 = phi i32 [ 0, %if.end.thread ], [ %.pre420, %if.end ] + %tmp55 = phi i32 [ 0, %if.end.thread ], [ %.pre421, %if.end ] + %tmp56 = phi i32 [ 0, %if.end.thread ], [ %.pre422, %if.end ] + %tmp57 = phi i32 [ 0, %if.end.thread ], [ %.pre423, %if.end ] + %save_j3.pre-phi469 = phi i32* [ %save_j, %if.end.thread ], [ %save_j3.phi.trans.insert, %if.end ] + %save_t4.pre-phi467 = phi i32* [ %save_t, %if.end.thread ], [ %save_t4.phi.trans.insert, %if.end ] + %save_alphaSize5.pre-phi465 = phi i32* [ %save_alphaSize, %if.end.thread ], [ %save_alphaSize5.phi.trans.insert, %if.end ] + %save_nGroups6.pre-phi463 = phi i32* [ %save_nGroups, %if.end.thread ], [ %save_nGroups6.phi.trans.insert, %if.end ] + %save_nSelectors7.pre-phi461 = phi i32* [ %save_nSelectors, %if.end.thread ], [ %save_nSelectors7.phi.trans.insert, %if.end ] + %save_EOB8.pre-phi459 = phi i32* [ %save_EOB, %if.end.thread ], [ %save_EOB8.phi.trans.insert, %if.end ] + %save_groupNo9.pre-phi457 = phi i32* [ %save_groupNo, %if.end.thread ], [ %save_groupNo9.phi.trans.insert, %if.end ] + %save_groupPos10.pre-phi455 = phi i32* [ %save_groupPos, %if.end.thread ], [ %save_groupPos10.phi.trans.insert, %if.end ] + %save_nextSym11.pre-phi453 = phi i32* [ %save_nextSym, %if.end.thread ], [ %save_nextSym11.phi.trans.insert, %if.end ] + %save_nblockMAX12.pre-phi451 = phi i32* [ %save_nblockMAX, %if.end.thread ], [ %save_nblockMAX12.phi.trans.insert, %if.end ] + %save_nblock13.pre-phi449 = phi i32* [ %save_nblock, %if.end.thread ], [ %save_nblock13.phi.trans.insert, %if.end ] + %save_es14.pre-phi447 = phi i32* [ %save_es, %if.end.thread ], [ %save_es14.phi.trans.insert, %if.end ] + %save_N15.pre-phi445 = phi i32* [ %save_N, %if.end.thread ], [ %save_N15.phi.trans.insert, %if.end ] + %save_curr16.pre-phi443 = phi i32* [ %save_curr, %if.end.thread ], [ %save_curr16.phi.trans.insert, %if.end ] + %save_zt17.pre-phi441 = phi i32* [ %save_zt, %if.end.thread ], [ %save_zt17.phi.trans.insert, %if.end ] + %save_zn18.pre-phi439 = phi i32* [ %save_zn, %if.end.thread ], [ %save_zn18.phi.trans.insert, %if.end ] + %save_zvec19.pre-phi437 = phi i32* [ %save_zvec, %if.end.thread ], [ %save_zvec19.phi.trans.insert, %if.end ] + %save_zj20.pre-phi435 = phi i32* [ %save_zj, %if.end.thread ], [ %save_zj20.phi.trans.insert, %if.end ] + tail call void @bar(i32 4001) + br label %save_state_and_return + +save_state_and_return: ; preds = %sw.default, %if.end.140, %if.then.130, %if.end.82, %if.end.33, %if.then.29 + %tmp58 = phi i32 [ %tmp39, %sw.default ], [ %.pre, %if.then.29 ], [ %.pre, %if.then.130 ], [ %.pre, %if.end.140 ], [ %.pre, %if.end.82 ], [ %.pre, %if.end.33 ] + %tmp59 = phi i32 [ %tmp40, %sw.default ], [ %.pre406, %if.then.29 ], [ %.pre406, %if.then.130 ], [ %.pre406, %if.end.140 ], [ %.pre406, %if.end.82 ], [ %.pre406, %if.end.33 ] + %tmp60 = phi i32 [ %tmp41, %sw.default ], [ %.pre407, %if.then.29 ], [ %.pre407, %if.then.130 ], [ %.pre407, %if.end.140 ], [ %.pre407, %if.end.82 ], [ %.pre407, %if.end.33 ] + %tmp61 = phi i32 [ %tmp43, %sw.default ], [ %.pre409, %if.then.29 ], [ %.pre409, %if.then.130 ], [ %.pre409, %if.end.140 ], [ %.pre409, %if.end.82 ], [ %.pre409, %if.end.33 ] + %tmp62 = phi i32 [ %tmp44, %sw.default ], [ %.pre410, %if.then.29 ], [ %.pre410, %if.then.130 ], [ %.pre410, %if.end.140 ], [ %.pre410, %if.end.82 ], [ %.pre410, %if.end.33 ] + %tmp63 = phi i32 [ %tmp45, %sw.default ], [ %.pre411, %if.then.29 ], [ %.pre411, %if.then.130 ], [ %.pre411, %if.end.140 ], [ %.pre411, %if.end.82 ], [ %.pre411, %if.end.33 ] + %tmp64 = phi i32 [ %tmp46, %sw.default ], [ %.pre412, %if.then.29 ], [ %.pre412, %if.then.130 ], [ %.pre412, %if.end.140 ], [ %.pre412, %if.end.82 ], [ %.pre412, %if.end.33 ] + %tmp65 = phi i32 [ %tmp47, %sw.default ], [ %.pre413, %if.then.29 ], [ %.pre413, %if.then.130 ], [ %.pre413, %if.end.140 ], [ %.pre413, %if.end.82 ], [ %.pre413, %if.end.33 ] + %tmp66 = phi i32 [ %tmp48, %sw.default ], [ %.pre414, %if.then.29 ], [ %.pre414, %if.then.130 ], [ %.pre414, %if.end.140 ], [ %.pre414, %if.end.82 ], [ %.pre414, %if.end.33 ] + %tmp67 = phi i32 [ %tmp49, %sw.default ], [ %.pre415, %if.then.29 ], [ %.pre415, %if.then.130 ], [ %.pre415, %if.end.140 ], [ %.pre415, %if.end.82 ], [ %.pre415, %if.end.33 ] + %tmp68 = phi i32 [ %tmp51, %sw.default ], [ %.pre417, %if.then.29 ], [ %.pre417, %if.then.130 ], [ %.pre417, %if.end.140 ], [ %.pre417, %if.end.82 ], [ %.pre417, %if.end.33 ] + %tmp69 = phi i32 [ %tmp52, %sw.default ], [ %.pre418, %if.then.29 ], [ %.pre418, %if.then.130 ], [ %.pre418, %if.end.140 ], [ %.pre418, %if.end.82 ], [ %.pre418, %if.end.33 ] + %tmp70 = phi i32 [ %tmp53, %sw.default ], [ %.pre419, %if.then.29 ], [ %.pre419, %if.then.130 ], [ %.pre419, %if.end.140 ], [ %.pre419, %if.end.82 ], [ %.pre419, %if.end.33 ] + %tmp71 = phi i32 [ %tmp54, %sw.default ], [ %.pre420, %if.then.29 ], [ %.pre420, %if.then.130 ], [ %.pre420, %if.end.140 ], [ %.pre420, %if.end.82 ], [ %.pre420, %if.end.33 ] + %tmp72 = phi i32 [ %tmp55, %sw.default ], [ %.pre421, %if.then.29 ], [ %.pre421, %if.then.130 ], [ %.pre421, %if.end.140 ], [ %.pre421, %if.end.82 ], [ %.pre421, %if.end.33 ] + %tmp73 = phi i32 [ %tmp56, %sw.default ], [ %.pre422, %if.then.29 ], [ %.pre422, %if.then.130 ], [ %.pre422, %if.end.140 ], [ %.pre422, %if.end.82 ], [ %.pre422, %if.end.33 ] + %tmp74 = phi i32 [ %tmp57, %sw.default ], [ %.pre423, %if.then.29 ], [ %.pre423, %if.then.130 ], [ %.pre423, %if.end.140 ], [ %.pre423, %if.end.82 ], [ %.pre423, %if.end.33 ] + %save_j3.pre-phi468 = phi i32* [ %save_j3.pre-phi469, %sw.default ], [ %save_j3.phi.trans.insert, %if.then.29 ], [ %save_j3.phi.trans.insert, %if.then.130 ], [ %save_j3.phi.trans.insert, %if.end.140 ], [ %save_j3.phi.trans.insert, %if.end.82 ], [ %save_j3.phi.trans.insert, %if.end.33 ] + %save_t4.pre-phi466 = phi i32* [ %save_t4.pre-phi467, %sw.default ], [ %save_t4.phi.trans.insert, %if.then.29 ], [ %save_t4.phi.trans.insert, %if.then.130 ], [ %save_t4.phi.trans.insert, %if.end.140 ], [ %save_t4.phi.trans.insert, %if.end.82 ], [ %save_t4.phi.trans.insert, %if.end.33 ] + %save_alphaSize5.pre-phi464 = phi i32* [ %save_alphaSize5.pre-phi465, %sw.default ], [ %save_alphaSize5.phi.trans.insert, %if.then.29 ], [ %save_alphaSize5.phi.trans.insert, %if.then.130 ], [ %save_alphaSize5.phi.trans.insert, %if.end.140 ], [ %save_alphaSize5.phi.trans.insert, %if.end.82 ], [ %save_alphaSize5.phi.trans.insert, %if.end.33 ] + %save_nGroups6.pre-phi462 = phi i32* [ %save_nGroups6.pre-phi463, %sw.default ], [ %save_nGroups6.phi.trans.insert, %if.then.29 ], [ %save_nGroups6.phi.trans.insert, %if.then.130 ], [ %save_nGroups6.phi.trans.insert, %if.end.140 ], [ %save_nGroups6.phi.trans.insert, %if.end.82 ], [ %save_nGroups6.phi.trans.insert, %if.end.33 ] + %save_nSelectors7.pre-phi460 = phi i32* [ %save_nSelectors7.pre-phi461, %sw.default ], [ %save_nSelectors7.phi.trans.insert, %if.then.29 ], [ %save_nSelectors7.phi.trans.insert, %if.then.130 ], [ %save_nSelectors7.phi.trans.insert, %if.end.140 ], [ %save_nSelectors7.phi.trans.insert, %if.end.82 ], [ %save_nSelectors7.phi.trans.insert, %if.end.33 ] + %save_EOB8.pre-phi458 = phi i32* [ %save_EOB8.pre-phi459, %sw.default ], [ %save_EOB8.phi.trans.insert, %if.then.29 ], [ %save_EOB8.phi.trans.insert, %if.then.130 ], [ %save_EOB8.phi.trans.insert, %if.end.140 ], [ %save_EOB8.phi.trans.insert, %if.end.82 ], [ %save_EOB8.phi.trans.insert, %if.end.33 ] + %save_groupNo9.pre-phi456 = phi i32* [ %save_groupNo9.pre-phi457, %sw.default ], [ %save_groupNo9.phi.trans.insert, %if.then.29 ], [ %save_groupNo9.phi.trans.insert, %if.then.130 ], [ %save_groupNo9.phi.trans.insert, %if.end.140 ], [ %save_groupNo9.phi.trans.insert, %if.end.82 ], [ %save_groupNo9.phi.trans.insert, %if.end.33 ] + %save_groupPos10.pre-phi454 = phi i32* [ %save_groupPos10.pre-phi455, %sw.default ], [ %save_groupPos10.phi.trans.insert, %if.then.29 ], [ %save_groupPos10.phi.trans.insert, %if.then.130 ], [ %save_groupPos10.phi.trans.insert, %if.end.140 ], [ %save_groupPos10.phi.trans.insert, %if.end.82 ], [ %save_groupPos10.phi.trans.insert, %if.end.33 ] + %save_nextSym11.pre-phi452 = phi i32* [ %save_nextSym11.pre-phi453, %sw.default ], [ %save_nextSym11.phi.trans.insert, %if.then.29 ], [ %save_nextSym11.phi.trans.insert, %if.then.130 ], [ %save_nextSym11.phi.trans.insert, %if.end.140 ], [ %save_nextSym11.phi.trans.insert, %if.end.82 ], [ %save_nextSym11.phi.trans.insert, %if.end.33 ] + %save_nblockMAX12.pre-phi450 = phi i32* [ %save_nblockMAX12.pre-phi451, %sw.default ], [ %save_nblockMAX12.phi.trans.insert, %if.then.29 ], [ %save_nblockMAX12.phi.trans.insert, %if.then.130 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.140 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.82 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.33 ] + %save_nblock13.pre-phi448 = phi i32* [ %save_nblock13.pre-phi449, %sw.default ], [ %save_nblock13.phi.trans.insert, %if.then.29 ], [ %save_nblock13.phi.trans.insert, %if.then.130 ], [ %save_nblock13.phi.trans.insert, %if.end.140 ], [ %save_nblock13.phi.trans.insert, %if.end.82 ], [ %save_nblock13.phi.trans.insert, %if.end.33 ] + %save_es14.pre-phi446 = phi i32* [ %save_es14.pre-phi447, %sw.default ], [ %save_es14.phi.trans.insert, %if.then.29 ], [ %save_es14.phi.trans.insert, %if.then.130 ], [ %save_es14.phi.trans.insert, %if.end.140 ], [ %save_es14.phi.trans.insert, %if.end.82 ], [ %save_es14.phi.trans.insert, %if.end.33 ] + %save_N15.pre-phi444 = phi i32* [ %save_N15.pre-phi445, %sw.default ], [ %save_N15.phi.trans.insert, %if.then.29 ], [ %save_N15.phi.trans.insert, %if.then.130 ], [ %save_N15.phi.trans.insert, %if.end.140 ], [ %save_N15.phi.trans.insert, %if.end.82 ], [ %save_N15.phi.trans.insert, %if.end.33 ] + %save_curr16.pre-phi442 = phi i32* [ %save_curr16.pre-phi443, %sw.default ], [ %save_curr16.phi.trans.insert, %if.then.29 ], [ %save_curr16.phi.trans.insert, %if.then.130 ], [ %save_curr16.phi.trans.insert, %if.end.140 ], [ %save_curr16.phi.trans.insert, %if.end.82 ], [ %save_curr16.phi.trans.insert, %if.end.33 ] + %save_zt17.pre-phi440 = phi i32* [ %save_zt17.pre-phi441, %sw.default ], [ %save_zt17.phi.trans.insert, %if.then.29 ], [ %save_zt17.phi.trans.insert, %if.then.130 ], [ %save_zt17.phi.trans.insert, %if.end.140 ], [ %save_zt17.phi.trans.insert, %if.end.82 ], [ %save_zt17.phi.trans.insert, %if.end.33 ] + %save_zn18.pre-phi438 = phi i32* [ %save_zn18.pre-phi439, %sw.default ], [ %save_zn18.phi.trans.insert, %if.then.29 ], [ %save_zn18.phi.trans.insert, %if.then.130 ], [ %save_zn18.phi.trans.insert, %if.end.140 ], [ %save_zn18.phi.trans.insert, %if.end.82 ], [ %save_zn18.phi.trans.insert, %if.end.33 ] + %save_zvec19.pre-phi436 = phi i32* [ %save_zvec19.pre-phi437, %sw.default ], [ %save_zvec19.phi.trans.insert, %if.then.29 ], [ %save_zvec19.phi.trans.insert, %if.then.130 ], [ %save_zvec19.phi.trans.insert, %if.end.140 ], [ %save_zvec19.phi.trans.insert, %if.end.82 ], [ %save_zvec19.phi.trans.insert, %if.end.33 ] + %save_zj20.pre-phi434 = phi i32* [ %save_zj20.pre-phi435, %sw.default ], [ %save_zj20.phi.trans.insert, %if.then.29 ], [ %save_zj20.phi.trans.insert, %if.then.130 ], [ %save_zj20.phi.trans.insert, %if.end.140 ], [ %save_zj20.phi.trans.insert, %if.end.82 ], [ %save_zj20.phi.trans.insert, %if.end.33 ] + %nblock.1 = phi i32 [ %tmp50, %sw.default ], [ %.pre416, %if.then.29 ], [ 0, %if.then.130 ], [ %.pre416, %if.end.140 ], [ %.pre416, %if.end.82 ], [ %.pre416, %if.end.33 ] + %alphaSize.1 = phi i32 [ %tmp42, %sw.default ], [ %.pre408, %if.then.29 ], [ %add179, %if.then.130 ], [ %.pre408, %if.end.140 ], [ %.pre408, %if.end.82 ], [ %.pre408, %if.end.33 ] + %retVal.0 = phi i32 [ 0, %sw.default ], [ -5, %if.then.29 ], [ -4, %if.then.130 ], [ 0, %if.end.140 ], [ 0, %if.end.82 ], [ 0, %if.end.33 ] + store i32 %tmp58, i32* %save_i, align 4 + store i32 %tmp59, i32* %save_j3.pre-phi468, align 4 + store i32 %tmp60, i32* %save_t4.pre-phi466, align 4 + store i32 %alphaSize.1, i32* %save_alphaSize5.pre-phi464, align 4 + store i32 %tmp61, i32* %save_nGroups6.pre-phi462, align 4 + store i32 %tmp62, i32* %save_nSelectors7.pre-phi460, align 4 + store i32 %tmp63, i32* %save_EOB8.pre-phi458, align 4 + store i32 %tmp64, i32* %save_groupNo9.pre-phi456, align 4 + store i32 %tmp65, i32* %save_groupPos10.pre-phi454, align 4 + store i32 %tmp66, i32* %save_nextSym11.pre-phi452, align 4 + store i32 %tmp67, i32* %save_nblockMAX12.pre-phi450, align 4 + store i32 %nblock.1, i32* %save_nblock13.pre-phi448, align 4 + store i32 %tmp68, i32* %save_es14.pre-phi446, align 4 + store i32 %tmp69, i32* %save_N15.pre-phi444, align 4 + store i32 %tmp70, i32* %save_curr16.pre-phi442, align 4 + store i32 %tmp71, i32* %save_zt17.pre-phi440, align 4 + store i32 %tmp72, i32* %save_zn18.pre-phi438, align 4 + store i32 %tmp73, i32* %save_zvec19.pre-phi436, align 4 + store i32 %tmp74, i32* %save_zj20.pre-phi434, align 4 + ret i32 %retVal.0 +} + +!0 = !{!"branch_weights", i32 10, i32 1} diff --git a/llvm/test/CodeGen/X86/fp128-compare.ll b/llvm/test/CodeGen/X86/fp128-compare.ll index d9a48c5..b5d4fbe 100644 --- a/llvm/test/CodeGen/X86/fp128-compare.ll +++ b/llvm/test/CodeGen/X86/fp128-compare.ll @@ -86,8 +86,8 @@ entry: %cond = select i1 %cmp, fp128 %x, fp128 %y ret fp128 %cond ; CHECK-LABEL: TestMax: -; CHECK: movaps %xmm0 ; CHECK: movaps %xmm1 +; CHECK: movaps %xmm0 ; CHECK: callq __gttf2 ; CHECK: movaps {{.*}}, %xmm0 ; CHECK: testl %eax, %eax diff --git a/llvm/test/CodeGen/X86/hoist-spill.ll b/llvm/test/CodeGen/X86/hoist-spill.ll deleted file mode 100644 index db9c410..0000000 --- a/llvm/test/CodeGen/X86/hoist-spill.ll +++ /dev/null @@ -1,121 +0,0 @@ -; RUN: llc < %s | FileCheck %s - -; grep 'Spill' |sed 's%.*\(-[0-9]\+(\%rsp)\).*%\1%g' |sort |uniq -d |awk '{if (/rsp/); exit -1}' -; Check no spills to the same stack slot after hoisting. -; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp) -; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp) -; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp) -; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp) -; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp) -; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp) - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@a = external global i32*, align 8 -@b = external global i32, align 4 -@d = external global i32*, align 8 - -; Function Attrs: norecurse noreturn nounwind uwtable -define void @fn1(i32 %p1) { -entry: - %tmp = load i32*, i32** @d, align 8 - %tmp1 = load i32*, i32** @a, align 8 - %tmp2 = sext i32 %p1 to i64 - br label %for.cond - -for.cond: ; preds = %for.inc14, %entry - %indvar = phi i32 [ %indvar.next, %for.inc14 ], [ 0, %entry ] - %indvars.iv30.in = phi i32 [ %indvars.iv30, %for.inc14 ], [ %p1, %entry ] - %c.0 = phi i32 [ %inc15, %for.inc14 ], [ 1, %entry ] - %k.0 = phi i32 [ %k.1.lcssa, %for.inc14 ], [ undef, %entry ] - %tmp3 = icmp sgt i32 undef, 0 - %smax52 = select i1 %tmp3, i32 undef, i32 0 - %tmp4 = zext i32 %smax52 to i64 - %tmp5 = icmp sgt i64 undef, %tmp4 - %smax53 = select i1 %tmp5, i64 undef, i64 %tmp4 - %tmp6 = add nsw i64 %smax53, 1 - %tmp7 = sub nsw i64 %tmp6, %tmp4 - %tmp8 = add nsw i64 %tmp7, -8 - %tmp9 = sub i32 undef, %indvar - %tmp10 = icmp sgt i64 %tmp2, 0 - %smax40 = select i1 %tmp10, i64 %tmp2, i64 0 - %scevgep41 = getelementptr i32, i32* %tmp1, i64 %smax40 - %indvars.iv30 = add i32 %indvars.iv30.in, -1 - %tmp11 = icmp sgt i32 %indvars.iv30, 0 - %smax = select i1 %tmp11, i32 %indvars.iv30, i32 0 - %tmp12 = zext i32 %smax to i64 - %sub = sub nsw i32 %p1, %c.0 - %cmp = icmp sgt i32 %sub, 0 - %sub. = select i1 %cmp, i32 %sub, i32 0 - %cmp326 = icmp sgt i32 %k.0, %p1 - br i1 %cmp326, label %for.cond4.preheader, label %for.body.preheader - -for.body.preheader: ; preds = %for.cond - br label %for.body - -for.cond4.preheader: ; preds = %for.body, %for.cond - %k.1.lcssa = phi i32 [ %k.0, %for.cond ], [ %add, %for.body ] - %cmp528 = icmp sgt i32 %sub., %p1 - br i1 %cmp528, label %for.inc14, label %for.body6.preheader - -for.body6.preheader: ; preds = %for.cond4.preheader - br i1 undef, label %for.body6, label %min.iters.checked - -min.iters.checked: ; preds = %for.body6.preheader - br i1 undef, label %for.body6, label %vector.memcheck - -vector.memcheck: ; preds = %min.iters.checked - %bound1 = icmp ule i32* undef, %scevgep41 - %memcheck.conflict = and i1 undef, %bound1 - br i1 %memcheck.conflict, label %for.body6, label %vector.body.preheader - -vector.body.preheader: ; preds = %vector.memcheck - %lcmp.mod = icmp eq i64 undef, 0 - br i1 %lcmp.mod, label %vector.body.preheader.split, label %vector.body.prol - -vector.body.prol: ; preds = %vector.body.prol, %vector.body.preheader - %prol.iter.cmp = icmp eq i64 undef, 0 - br i1 %prol.iter.cmp, label %vector.body.preheader.split, label %vector.body.prol - -vector.body.preheader.split: ; preds = %vector.body.prol, %vector.body.preheader - %tmp13 = icmp ult i64 %tmp8, 24 - br i1 %tmp13, label %middle.block, label %vector.body - -vector.body: ; preds = %vector.body, %vector.body.preheader.split - %index = phi i64 [ %index.next.3, %vector.body ], [ 0, %vector.body.preheader.split ] - %index.next = add i64 %index, 8 - %offset.idx.1 = add i64 %tmp12, %index.next - %tmp14 = getelementptr inbounds i32, i32* %tmp, i64 %offset.idx.1 - %tmp15 = bitcast i32* %tmp14 to <4 x i32>* - %wide.load.1 = load <4 x i32>, <4 x i32>* %tmp15, align 4 - %tmp16 = getelementptr inbounds i32, i32* %tmp1, i64 %offset.idx.1 - %tmp17 = bitcast i32* %tmp16 to <4 x i32>* - store <4 x i32> %wide.load.1, <4 x i32>* %tmp17, align 4 - %index.next.3 = add i64 %index, 32 - br i1 undef, label %middle.block, label %vector.body - -middle.block: ; preds = %vector.body, %vector.body.preheader.split - br i1 undef, label %for.inc14, label %for.body6 - -for.body: ; preds = %for.body, %for.body.preheader - %k.127 = phi i32 [ %k.0, %for.body.preheader ], [ %add, %for.body ] - %add = add nsw i32 %k.127, 1 - %tmp18 = load i32, i32* undef, align 4 - store i32 %tmp18, i32* @b, align 4 - br i1 undef, label %for.body, label %for.cond4.preheader - -for.body6: ; preds = %for.body6, %middle.block, %vector.memcheck, %min.iters.checked, %for.body6.preheader - %indvars.iv32 = phi i64 [ undef, %for.body6 ], [ %tmp12, %vector.memcheck ], [ %tmp12, %min.iters.checked ], [ %tmp12, %for.body6.preheader ], [ undef, %middle.block ] - %arrayidx8 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv32 - %tmp19 = load i32, i32* %arrayidx8, align 4 - %arrayidx10 = getelementptr inbounds i32, i32* %tmp1, i64 %indvars.iv32 - store i32 %tmp19, i32* %arrayidx10, align 4 - %cmp5 = icmp slt i64 %indvars.iv32, undef - br i1 %cmp5, label %for.body6, label %for.inc14 - -for.inc14: ; preds = %for.body6, %middle.block, %for.cond4.preheader - %inc15 = add nuw nsw i32 %c.0, 1 - %indvar.next = add i32 %indvar, 1 - br label %for.cond -} diff --git a/llvm/test/CodeGen/X86/new-remat.ll b/llvm/test/CodeGen/X86/new-remat.ll deleted file mode 100644 index 4d311aa..0000000 --- a/llvm/test/CodeGen/X86/new-remat.ll +++ /dev/null @@ -1,71 +0,0 @@ -; RUN: llc < %s | FileCheck %s -; Check all spills are rematerialized. -; CHECK-NOT: Spill - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@b = common global double 0.000000e+00, align 8 -@a = common global i32 0, align 4 - -; Function Attrs: nounwind uwtable -define i32 @uniform_testdata(i32 %p1) { -entry: - %cmp3 = icmp sgt i32 %p1, 0 - br i1 %cmp3, label %for.body.preheader, label %for.end - -for.body.preheader: ; preds = %entry - %tmp = add i32 %p1, -1 - %xtraiter = and i32 %p1, 7 - %lcmp.mod = icmp eq i32 %xtraiter, 0 - br i1 %lcmp.mod, label %for.body.preheader.split, label %for.body.prol.preheader - -for.body.prol.preheader: ; preds = %for.body.preheader - br label %for.body.prol - -for.body.prol: ; preds = %for.body.prol, %for.body.prol.preheader - %i.04.prol = phi i32 [ %inc.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ] - %prol.iter = phi i32 [ %prol.iter.sub, %for.body.prol ], [ %xtraiter, %for.body.prol.preheader ] - %tmp1 = load double, double* @b, align 8 - %call.prol = tail call double @pow(double %tmp1, double 2.500000e-01) - %inc.prol = add nuw nsw i32 %i.04.prol, 1 - %prol.iter.sub = add i32 %prol.iter, -1 - %prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0 - br i1 %prol.iter.cmp, label %for.body.preheader.split.loopexit, label %for.body.prol - -for.body.preheader.split.loopexit: ; preds = %for.body.prol - %inc.prol.lcssa = phi i32 [ %inc.prol, %for.body.prol ] - br label %for.body.preheader.split - -for.body.preheader.split: ; preds = %for.body.preheader.split.loopexit, %for.body.preheader - %i.04.unr = phi i32 [ 0, %for.body.preheader ], [ %inc.prol.lcssa, %for.body.preheader.split.loopexit ] - %tmp2 = icmp ult i32 %tmp, 7 - br i1 %tmp2, label %for.end.loopexit, label %for.body.preheader.split.split - -for.body.preheader.split.split: ; preds = %for.body.preheader.split - br label %for.body - -for.body: ; preds = %for.body, %for.body.preheader.split.split - %i.04 = phi i32 [ %i.04.unr, %for.body.preheader.split.split ], [ %inc.7, %for.body ] - %tmp3 = load double, double* @b, align 8 - %call = tail call double @pow(double %tmp3, double 2.500000e-01) - %tmp4 = load double, double* @b, align 8 - %call.1 = tail call double @pow(double %tmp4, double 2.500000e-01) - %inc.7 = add nsw i32 %i.04, 8 - %exitcond.7 = icmp eq i32 %inc.7, %p1 - br i1 %exitcond.7, label %for.end.loopexit.unr-lcssa, label %for.body - -for.end.loopexit.unr-lcssa: ; preds = %for.body - br label %for.end.loopexit - -for.end.loopexit: ; preds = %for.end.loopexit.unr-lcssa, %for.body.preheader.split - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - %tmp5 = load i32, i32* @a, align 4 - ret i32 %tmp5 -} - -; Function Attrs: nounwind -declare double @pow(double, double) - diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index 1d6b4f9..46b65bd 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s ; This testing case is reduced from 254.gap SyFgets function. -; We make sure a spill is hoisted to a cold BB inside the hotter outer loop. +; We make sure a spill is not hoisted to a hotter outer loop. %struct.TMP.1 = type { %struct.TMP.2*, %struct.TMP.2*, [1024 x i8] } %struct.TMP.2 = type { i8*, i32, i32, i16, i16, %struct.TMP.3, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.TMP.3, %struct.TMP.4*, i32, [3 x i8], [1 x i8], %struct.TMP.3, i32, i64 } @@ -181,10 +181,6 @@ sw.bb474: br i1 %cmp476, label %if.end517, label %do.body479.preheader do.body479.preheader: - ; CHECK: do.body479.preheader - ; spill is hoisted here. Although loop depth1 is even hotter than loop depth2, do.body479.preheader is cold. - ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp) - ; CHECK: land.rhs485 %cmp4833314 = icmp eq i8 undef, 0 br i1 %cmp4833314, label %if.end517, label %land.rhs485 @@ -204,8 +200,8 @@ land.lhs.true490: lor.rhs500: ; CHECK: lor.rhs500 - ; Make sure spill is hoisted to a cold preheader in outside loop. - ; CHECK-NOT: movq %r{{.*}}, {{[0-9]+}}(%rsp) + ; Make sure that we don't hoist the spill to outer loops. + ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp) ; CHECK: callq {{.*}}maskrune %call3.i.i2792 = call i32 @__maskrune(i32 undef, i64 256) br i1 undef, label %land.lhs.true504, label %do.body479.backedge