return false;
}
+/// Return true if the existing assignment of \p Intf overlaps, but is not the
+/// same, as \p PhysReg.
+static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
+ const VirtRegMap &VRM,
+ MCRegister PhysReg,
+ const LiveInterval &Intf) {
+ MCRegister AssignedReg = VRM.getPhys(Intf.reg());
+ if (PhysReg == AssignedReg)
+ return false;
+ return TRI.regsOverlap(PhysReg, AssignedReg);
+}
+
/// mayRecolorAllInterferences - Check if the virtual registers that
/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
/// recolored to free \p PhysReg.
return false;
}
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
- // If Intf is done and sit on the same register class as VirtReg,
- // it would not be recolorable as it is in the same state as VirtReg.
- // However, if VirtReg has tied defs and Intf doesn't, then
+ // If Intf is done and sits on the same register class as VirtReg, it
+ // would not be recolorable as it is in the same state as
+ // VirtReg. However there are at least two exceptions.
+ //
+ // If VirtReg has tied defs and Intf doesn't, then
// there is still a point in examining if it can be recolorable.
+ //
+ // Additionally, if the register class has overlapping tuple members, it
+ // may still be recolorable using a different tuple. This is more likely
+ // if the existing assignment aliases with the candidate.
+ //
if (((ExtraInfo->getStage(*Intf) == RS_Done &&
- MRI->getRegClass(Intf->reg()) == CurRC) &&
+ MRI->getRegClass(Intf->reg()) == CurRC &&
+ !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
!(hasTiedDef(MRI, VirtReg.reg()) &&
!hasTiedDef(MRI, Intf->reg()))) ||
FixedRegisters.count(Intf->reg())) {
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=greedy -o - %s | FileCheck %s
+
+# This testcase is restricted to use a maximum of 24 VGPRs. It is
+# therefore possible to allocate a maximum of 3 vreg_256s at a
+# time. The apparent number of registers in the class is larger, but
+# each one overlaps with the next. Allocating a vreg_64 will prevent a
+# full vreg_256 from being live at a given point.
+
+# The hints are trying to force allocation of overlapping vreg_256s
+# which cannot be satisfied. The last S_NOP in %bb.0 with 2 vreg_256s
+# and a vreg_64 use can be satisfied as long as the hints are ignored.
+
+# With the resulting allocation order, this ends up using last chance
+# recoloring for a vreg_256. We should try to recolor for completed
+# virtual registers with the same class, since the existing assignment
+# can only be corrected by adjusting to a non-overlapping register.
+
+--- |
+ define void @recolor_impossible_hint() #0 {
+ ret void
+ }
+
+ attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
+---
+
+---
+name: recolor_impossible_hint
+alignment: 1
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_256, preferred-register: '$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7' }
+ - { id: 1, class: vreg_256, preferred-register: '$vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8' }
+ - { id: 2, class: vreg_256, preferred-register: '$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9' }
+ - { id: 3, class: vreg_256, preferred-register: '$vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10' }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+body: |
+ ; CHECK-LABEL: name: recolor_impossible_hint
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def %7, implicit-def %19, implicit-def %5
+ ; CHECK-NEXT: SI_SPILL_V256_SAVE %19, %stack.3, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.3, align 4, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V256_SAVE %7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V256_SAVE %5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: S_NOP 0, implicit-def %17
+ ; CHECK-NEXT: SI_SPILL_V256_SAVE %17, %stack.2, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.2, align 4, addrspace 5)
+ ; CHECK-NEXT: S_NOP 0, implicit-def %4
+ ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE1:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5)
+ ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE]], implicit [[SI_SPILL_V256_RESTORE1]], implicit %4
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[SI_SPILL_V256_RESTORE1]]
+ ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit [[COPY]]
+ ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE2:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE2]]
+ ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE3:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.2, align 4, addrspace 5)
+ ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE3]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ S_NOP 0, implicit-def %0:vreg_256, implicit-def %1:vreg_256, implicit-def %2:vreg_256
+ S_NOP 0, implicit-def %3:vreg_256
+ S_NOP 0, implicit-def %4:vreg_64
+ S_NOP 0, implicit %0, implicit %1, implicit %4
+ S_CBRANCH_EXECNZ %bb.3, implicit $exec
+
+ bb.2:
+ S_NOP 0, implicit %1
+ S_NOP 0, implicit %2
+ S_NOP 0, implicit %3
+
+ bb.3:
+ S_ENDPGM 0
+
+...