RegAllocFast: Fix dropping subreg indexes on unassigned subreg defs
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Fri, 3 Mar 2023 17:11:58 +0000 (13:11 -0400)
committerMatt Arsenault <arsenm2@gmail.com>
Wed, 5 Apr 2023 22:25:51 +0000 (18:25 -0400)
This was assuming all register operands were assigned to physical registers.
This should ignore the operands which weren't assigned in this run.

Fixes #61134

llvm/lib/CodeGen/RegAllocFast.cpp
llvm/test/CodeGen/AMDGPU/regalloc-fast-dont-drop-subreg-index-issue61134.mir [new file with mode: 0644]

index 613e146..dd6f315 100644 (file)
@@ -1321,9 +1321,11 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
       if (!MO.isReg() || !MO.isDef())
         continue;
 
+      Register Reg = MO.getReg();
+
       // subreg defs don't free the full register. We left the subreg number
       // around as a marker in setPhysReg() to recognize this case here.
-      if (MO.getSubReg() != 0) {
+      if (Reg.isPhysical() && MO.getSubReg() != 0) {
         MO.setSubReg(0);
         continue;
       }
@@ -1334,7 +1336,6 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
       // Do not free tied operands and early clobbers.
       if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || MO.isEarlyClobber())
         continue;
-      Register Reg = MO.getReg();
       if (!Reg)
         continue;
       if (Reg.isVirtual()) {
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-fast-dont-drop-subreg-index-issue61134.mir b/llvm/test/CodeGen/AMDGPU/regalloc-fast-dont-drop-subreg-index-issue61134.mir
new file mode 100644 (file)
index 0000000..86f6bfb
--- /dev/null
@@ -0,0 +1,39 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -sgpr-regalloc=fast -start-before=regallocfast,0 -stop-before=greedy -o - %s | FileCheck %s
+
+# RegAllocFast was incorrectly dropping subregister indexes on
+# unassigned virtual registers. Make sure they are passed through
+# undisturbed to the second regalloc run.
+
+---
+name:            func
+alignment:       1
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: func
+    ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef %0.sub0:vreg_64 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
+    ; CHECK-NEXT: undef %2.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: undef %0.sub0:vreg_64, renamable $sgpr0_sgpr1 = V_ADD_CO_U32_e64 1, %0.sub0, 0, implicit $exec
+    ; CHECK-NEXT: undef %2.sub1:vreg_64, dead renamable $sgpr0_sgpr1 = V_ADDC_U32_e64 0, %2.sub1, killed $sgpr0_sgpr1, 0, implicit $exec
+    ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit %0, implicit %2, implicit $vgpr0
+    undef %0.sub0:vreg_64 = COPY $vgpr0
+    %2:vreg_64 = COPY $vgpr1_vgpr2
+    undef %1.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    undef %0.sub0:vreg_64, %3:sreg_64_xexec = V_ADD_CO_U32_e64 1, %0.sub0, 0, implicit $exec
+    undef %1.sub1:vreg_64, dead %4:sreg_64_xexec = V_ADDC_U32_e64 0, %1.sub1, %3, 0, implicit $exec
+    %5:vreg_64 = GLOBAL_LOAD_DWORDX2 %2, 0, 0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 %5.sub0, implicit $exec
+    S_ENDPGM 0, implicit %0, implicit %1, implicit $vgpr0
+
+...