RegAllocGreedy: Allow last chance recolor to retry overlapping tuples

author Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 1 Apr 2022 13:59:05 +0000 (09:59 -0400)

committer Matt Arsenault <arsenm2@gmail.com>

Mon, 25 Apr 2022 21:07:17 +0000 (17:07 -0400)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 1 Apr 2022 13:59:05 +0000 (09:59 -0400)
committer Matt Arsenault <arsenm2@gmail.com>
Mon, 25 Apr 2022 21:07:17 +0000 (17:07 -0400)
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp

index 029f05d..a6fc9b1 100644 (file)
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1833,6 +1833,18 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
    return false;
  }
  
+/// Return true if the existing assignment of \p Intf overlaps, but is not the
+/// same, as \p PhysReg.
+static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
+                                         const VirtRegMap &VRM,
+                                         MCRegister PhysReg,
+                                         const LiveInterval &Intf) {
+  MCRegister AssignedReg = VRM.getPhys(Intf.reg());
+  if (PhysReg == AssignedReg)
+    return false;
+  return TRI.regsOverlap(PhysReg, AssignedReg);
+}
+
  /// mayRecolorAllInterferences - Check if the virtual registers that
  /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
  /// recolored to free \p PhysReg.
@@ -1858,12 +1870,20 @@ bool RAGreedy::mayRecolorAllInterferences(
        return false;
      }
      for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
-      // If Intf is done and sit on the same register class as VirtReg,
-      // it would not be recolorable as it is in the same state as VirtReg.
-      // However, if VirtReg has tied defs and Intf doesn't, then
+      // If Intf is done and sits on the same register class as VirtReg, it
+      // would not be recolorable as it is in the same state as
+      // VirtReg. However there are at least two exceptions.
+      //
+      // If VirtReg has tied defs and Intf doesn't, then
        // there is still a point in examining if it can be recolorable.
+      //
+      // Additionally, if the register class has overlapping tuple members, it
+      // may still be recolorable using a different tuple. This is more likely
+      // if the existing assignment aliases with the candidate.
+      //
        if (((ExtraInfo->getStage(*Intf) == RS_Done &&
-            MRI->getRegClass(Intf->reg()) == CurRC) &&
+            MRI->getRegClass(Intf->reg()) == CurRC &&
+            !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
             !(hasTiedDef(MRI, VirtReg.reg()) &&
               !hasTiedDef(MRI, Intf->reg()))) ||
            FixedRegisters.count(Intf->reg())) {
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir

new file mode 100644 (file)

index 0000000..09be927
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir
@@ -0,0 +1,84 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=greedy -o - %s | FileCheck %s
+
+# This testcase is restricted to use a maximum of 24 VGPRs. It is
+# therefore possible to allocate a maximum of 3 vreg_256s at a
+# time. The apparent number of registers in the class is larger, but
+# each one overlaps with the next. Allocating a vreg_64 will prevent a
+# full vreg_256 from being live at a given point.
+
+# The hints are trying to force allocation of overlapping vreg_256s
+# which cannot be satisfied. The last S_NOP in %bb.0 with 2 vreg_256s
+# and a vreg_64 use can be satisfied as long as the hints are ignored.
+
+# With the resulting allocation order, this ends up using last chance
+# recoloring for a vreg_256. We should try to recolor for completed
+# virtual registers with the same class, since the existing assignment
+# can only be corrected by adjusting to a non-overlapping register.
+
+--- |
+  define void @recolor_impossible_hint() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
+---
+
+---
+name:            recolor_impossible_hint
+alignment:       1
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vreg_256, preferred-register: '$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7' }
+  - { id: 1, class: vreg_256, preferred-register: '$vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8' }
+  - { id: 2, class: vreg_256, preferred-register: '$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9' }
+  - { id: 3, class: vreg_256, preferred-register: '$vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10' }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy:       10
+body:             |
+  ; CHECK-LABEL: name: recolor_impossible_hint
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit-def %7, implicit-def %19, implicit-def %5
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %19, %stack.3, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.3, align 4, addrspace 5)
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.1, align 4, addrspace 5)
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit-def %17
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %17, %stack.2, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.2, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit-def %4
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.1, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE1:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V256_RESTORE]], implicit [[SI_SPILL_V256_RESTORE1]], implicit %4
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vreg_256 = COPY [[SI_SPILL_V256_RESTORE1]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY]]
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE2:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V256_RESTORE2]]
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE3:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.2, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V256_RESTORE3]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    S_NOP 0, implicit-def %0:vreg_256, implicit-def %1:vreg_256, implicit-def %2:vreg_256
+    S_NOP 0, implicit-def %3:vreg_256
+    S_NOP 0, implicit-def %4:vreg_64
+    S_NOP 0, implicit %0, implicit %1, implicit %4
+    S_CBRANCH_EXECNZ %bb.3, implicit $exec
+
+  bb.2:
+    S_NOP 0, implicit %1
+    S_NOP 0, implicit %2
+    S_NOP 0, implicit %3
+
+  bb.3:
+    S_ENDPGM 0
+
+...
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 1 Apr 2022 13:59:05 +0000 (09:59 -0400)
committer	Matt Arsenault <arsenm2@gmail.com>
	Mon, 25 Apr 2022 21:07:17 +0000 (17:07 -0400)
llvm/lib/CodeGen/RegAllocGreedy.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir	[new file with mode: 0644]	patch \| blob