From 0ce90494e6810a6eff2083335043350a364df6a1 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 23 May 2012 22:37:27 +0000 Subject: [PATCH] Add a last resort tryInstructionSplit() to RAGreedy. Live ranges with a constrained register class may benefit from splitting around individual uses. It allows the remaining live range to use a larger register class where it may allocate. This is like spilling to a different register class. This is only attempted on constrained register classes. llvm-svn: 157354 --- llvm/lib/CodeGen/RegAllocGreedy.cpp | 66 +++++++++++++++++++++++++++++++- llvm/test/CodeGen/Thumb2/inflate-regs.ll | 22 +++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Thumb2/inflate-regs.ll diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 03a12ae..7808552 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -286,6 +286,8 @@ private: SmallVectorImpl&); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned trySplit(LiveInterval&, AllocationOrder&, @@ -1265,6 +1267,65 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; } + +//===----------------------------------------------------------------------===// +// Per-Instruction Splitting +//===----------------------------------------------------------------------===// + +/// tryInstructionSplit - Split a live range around individual instructions. +/// This is normally not worthwhile since the spiller is doing essentially the +/// same thing. However, when the live range is in a constrained register +/// class, it may help to insert copies such that parts of the live range can +/// be moved to a larger register class. +/// +/// This is similar to spilling to a larger register class. +unsigned +RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { + // There is no point to this if there are no larger sub-classes. + if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) + return 0; + + // Always enable split spill mode, since we're effectively spilling to a + // register. + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitEditor::SM_Size); + + ArrayRef Uses = SA->getUseSlots(); + if (Uses.size() <= 1) + return 0; + + DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); + + // Split around every non-copy instruction. + for (unsigned i = 0; i != Uses.size(); ++i) { + if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) + if (MI->isFullCopy()) { + DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); + continue; + } + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); + SE->useIntv(SegStart, SegStop); + } + + if (LREdit.empty()) { + DEBUG(dbgs() << "All uses were copies.\n"); + return 0; + } + + SmallVector IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Assign all new registers to RS_Spill. This was the last chance. + setStage(LREdit.begin(), LREdit.end(), RS_Spill); + return 0; +} + + //===----------------------------------------------------------------------===// // Local Splitting //===----------------------------------------------------------------------===// @@ -1561,7 +1622,10 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); SA->analyze(&VirtReg); - return tryLocalSplit(VirtReg, Order, NewVRegs); + unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + return tryInstructionSplit(VirtReg, Order, NewVRegs); } NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); diff --git a/llvm/test/CodeGen/Thumb2/inflate-regs.ll b/llvm/test/CodeGen/Thumb2/inflate-regs.ll new file mode 100644 index 0000000..2648635 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/inflate-regs.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s +target triple = "thumbv7-apple-ios" + +; CHECK: local_split +; +; The load must go into d0-15 which are all clobbered by the asm. +; RAGreedy should split the range and use d16-d31 to avoid a spill. +; +; CHECK: vldr s +; CHECK-NOT: vstr +; CHECK: vadd.f32 +; CHECK-NOT: vstr +; CHECK: vorr +; CHECK: vstr s +define void @local_split(float* nocapture %p) nounwind ssp { +entry: + %x = load float* %p, align 4 + %a = fadd float %x, 1.0 + tail call void asm sideeffect "", "~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind + store float %a, float* %p, align 4 + ret void +} -- 2.7.4