const LaneBitmask LaneMask;
/// Classes with a higher priority value are assigned first by register
/// allocators using a greedy heuristic. The value is in the range [0,63].
+ /// Values >= 32 should be used with care since they may overlap with other
+ /// fields in the allocator's priority heuristics.
const uint8_t AllocationPriority;
/// Configurable target specific flags.
const uint8_t TSFlags;
return false;
}
+ /// When prioritizing live ranges in register allocation, if this hook returns
+ /// true then the AllocationPriority of the register class will be treated as
+ /// more important than whether the range is local to a basic block or global.
+ virtual bool
+ regClassPriorityTrumpsGlobalness(const MachineFunction &MF) const {
+ return false;
+ }
+
//===--------------------------------------------------------------------===//
/// Debug information queries.
// heuristic. Classes with higher priority values are assigned first. This is
// useful as it is sometimes beneficial to assign registers to highly
// constrained classes first. The value has to be in the range [0,63].
+ // Values >= 32 should be used with care since they may overlap with other
+ // fields in the allocator's priority heuristics.
int AllocationPriority = 0;
// Generate register pressure set for this register class and any class
"limit its budget and bail out once we reach the limit."),
cl::init(10000), cl::Hidden);
+static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
+ "greedy-regclass-priority-trumps-globalness",
+ cl::desc("Change the greedy register allocator's live range priority "
+ "calculation to make the AllocationPriority of the register class "
+ "more important then whether the range is global"),
+ cl::Hidden);
+
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
bool ForceGlobal = !ReverseLocal &&
(Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
+ unsigned GlobalBit = 0;
if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
// Allocate global and split ranges in long->short order. Long ranges that
// don't fit should be spilled (or split) ASAP so they don't create
// interference. Mark a bit to prioritize global above local ranges.
- Prio = (1u << 29) + Size;
+ Prio = Size;
+ GlobalBit = 1;
}
- Prio |= RC.AllocationPriority << 24;
+ if (RegClassPriorityTrumpsGlobalness)
+ Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
+ else
+ Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
// Mark a higher bit to prioritize global and local above RS_Split.
Prio |= (1u << 31);
initializeCSRCost();
RegCosts = TRI->getRegisterCosts(*MF);
+ RegClassPriorityTrumpsGlobalness =
+ GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
+ ? GreedyRegClassPriorityTrumpsGlobalness
+ : TRI->regClassPriorityTrumpsGlobalness(*MF);
ExtraInfo.emplace();
EvictAdvisor =
/// Function
ArrayRef<uint8_t> RegCosts;
+ /// Flags for the live range priority calculation, determined once per
+ /// machine function.
+ bool RegClassPriorityTrumpsGlobalness;
+
public:
RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
--- /dev/null
+# RUN: llc -march=amdgcn -mcpu=gfx1030 -greedy-regclass-priority-trumps-globalness=0 -start-before greedy -o - %s | FileCheck %s -check-prefix=OLD
+# RUN: llc -march=amdgcn -mcpu=gfx1030 -greedy-regclass-priority-trumps-globalness=1 -start-before greedy -o - %s | FileCheck %s -check-prefix=NEW
+
+# At the time of writing -greedy-regclass-priority-trumps-globalness makes a
+# significant improvement in the total number of vgprs needed to compile this
+# test, from 11 down to 7.
+
+# OLD: NumVgprs: 11{{$}}
+# NEW: NumVgprs: 7{{$}}
+
+---
+name: _amdgpu_cs_main
+tracksRegLiveness: true
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $vgpr0, $vgpr6
+
+ %6:vgpr_32 = COPY $vgpr6
+ undef %30.sub0:vreg_128 = COPY $vgpr0
+ undef %27.sub0:vreg_128 = V_MED3_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ undef %16.sub0:sgpr_256 = S_MOV_B32 0
+ undef %26.sub1:vreg_64 = V_LSHRREV_B32_e32 1, %6, implicit $exec
+ %27.sub1:vreg_128 = COPY %27.sub0
+ %27.sub2:vreg_128 = COPY %27.sub0
+ %27.sub3:vreg_128 = COPY %27.sub0
+ %26.sub0:vreg_64 = V_MOV_B32_e32 1, implicit $exec
+ %16.sub1:sgpr_256 = COPY %16.sub0
+ %16.sub2:sgpr_256 = COPY %16.sub0
+ %16.sub3:sgpr_256 = COPY %16.sub0
+ %16.sub4:sgpr_256 = COPY %16.sub0
+ %16.sub5:sgpr_256 = COPY %16.sub0
+ %16.sub6:sgpr_256 = COPY %16.sub0
+ %16.sub7:sgpr_256 = COPY %16.sub0
+ IMAGE_STORE_V4_V2_gfx10 %27, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource")
+ S_CBRANCH_SCC1 %bb.2, implicit undef $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ %30.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+ %30.sub2:vreg_128 = COPY %30.sub1
+ %30.sub3:vreg_128 = COPY %30.sub1
+ %26.sub1:vreg_64 = COPY %30.sub1
+ IMAGE_STORE_V4_V2_gfx10 %30, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource")
+
+ bb.2:
+ S_ENDPGM 0
+...