Add a last resort tryInstructionSplit() to RAGreedy.

author Jakob Stoklund Olesen <stoklund@2pi.dk>

Wed, 23 May 2012 22:37:27 +0000 (22:37 +0000)

committer Jakob Stoklund Olesen <stoklund@2pi.dk>

Wed, 23 May 2012 22:37:27 +0000 (22:37 +0000)
author Jakob Stoklund Olesen <stoklund@2pi.dk>
Wed, 23 May 2012 22:37:27 +0000 (22:37 +0000)
committer Jakob Stoklund Olesen <stoklund@2pi.dk>
Wed, 23 May 2012 22:37:27 +0000 (22:37 +0000)
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp

index 03a12ae..7808552 100644 (file)
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -286,6 +286,8 @@ private:
                            SmallVectorImpl<LiveInterval*>&);
    unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
                           SmallVectorImpl<LiveInterval*>&);
+  unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
+                               SmallVectorImpl<LiveInterval*>&);
    unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
      SmallVectorImpl<LiveInterval*>&);
    unsigned trySplit(LiveInterval&, AllocationOrder&,
@@ -1265,6 +1267,65 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
    return 0;
  }
  
+
+//===----------------------------------------------------------------------===//
+//                         Per-Instruction Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryInstructionSplit - Split a live range around individual instructions.
+/// This is normally not worthwhile since the spiller is doing essentially the
+/// same thing. However, when the live range is in a constrained register
+/// class, it may help to insert copies such that parts of the live range can
+/// be moved to a larger register class.
+///
+/// This is similar to spilling to a larger register class.
+unsigned
+RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                              SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  // There is no point to this if there are no larger sub-classes.
+  if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg)))
+    return 0;
+
+  // Always enable split spill mode, since we're effectively spilling to a
+  // register.
+  LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+  SE->reset(LREdit, SplitEditor::SM_Size);
+
+  ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+  if (Uses.size() <= 1)
+    return 0;
+
+  DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
+
+  // Split around every non-copy instruction.
+  for (unsigned i = 0; i != Uses.size(); ++i) {
+    if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
+      if (MI->isFullCopy()) {
+        DEBUG(dbgs() << "    skip:\t" << Uses[i] << '\t' << *MI);
+        continue;
+      }
+    SE->openIntv();
+    SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
+    SlotIndex SegStop  = SE->leaveIntvAfter(Uses[i]);
+    SE->useIntv(SegStart, SegStop);
+  }
+
+  if (LREdit.empty()) {
+    DEBUG(dbgs() << "All uses were copies.\n");
+    return 0;
+  }
+
+  SmallVector<unsigned, 8> IntvMap;
+  SE->finish(&IntvMap);
+  DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+  ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+  // Assign all new registers to RS_Spill. This was the last chance.
+  setStage(LREdit.begin(), LREdit.end(), RS_Spill);
+  return 0;
+}
+
+
  //===----------------------------------------------------------------------===//
  //                             Local Splitting
  //===----------------------------------------------------------------------===//
@@ -1561,7 +1622,10 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
    if (LIS->intervalIsInOneMBB(VirtReg)) {
      NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
      SA->analyze(&VirtReg);
-    return tryLocalSplit(VirtReg, Order, NewVRegs);
+    unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
+    if (PhysReg || !NewVRegs.empty())
+      return PhysReg;
+    return tryInstructionSplit(VirtReg, Order, NewVRegs);
    }
  
    NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
diff --git a/llvm/test/CodeGen/Thumb2/inflate-regs.ll b/llvm/test/CodeGen/Thumb2/inflate-regs.ll

new file mode 100644 (file)

index 0000000..2648635
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/inflate-regs.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+target triple = "thumbv7-apple-ios"
+
+; CHECK: local_split
+;
+; The load must go into d0-15 which are all clobbered by the asm.
+; RAGreedy should split the range and use d16-d31 to avoid a spill.
+;
+; CHECK: vldr s
+; CHECK-NOT: vstr
+; CHECK: vadd.f32
+; CHECK-NOT: vstr
+; CHECK: vorr
+; CHECK: vstr s
+define void @local_split(float* nocapture %p) nounwind ssp {
+entry:
+  %x = load float* %p, align 4
+  %a = fadd float %x, 1.0
+  tail call void asm sideeffect "", "~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  store float %a, float* %p, align 4
+  ret void
+}
author	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Wed, 23 May 2012 22:37:27 +0000 (22:37 +0000)
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Wed, 23 May 2012 22:37:27 +0000 (22:37 +0000)
llvm/lib/CodeGen/RegAllocGreedy.cpp		patch \| blob \| history
llvm/test/CodeGen/Thumb2/inflate-regs.ll	[new file with mode: 0644]	patch \| blob