From d91ee2f782ebeec7ae0b5f8ca879c4f10dccb29f Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 27 Jan 2021 13:02:43 -0800 Subject: [PATCH] [AMDGPU] Do not reassign spilled registers We cannot call LRM::unassign() if LRM::assign() was never called before, these are symmetrical calls. There are two ways of assigning a physical register to virtual, via LRM::assign() and via VRM::assignVirt2Phys(). LRM::assign() will call the VRM to assign the register and then update LiveIntervalUnion. Inline spiller calls VRM directly and thus LiveIntervalUnion never gets updated. A call to LRM::unassign() then asserts about inconsistent liveness. We have to note that not all callers of the InlineSpiller even have LRM to pass, RegAllocPBQP does not have it, so we cannot always pass LRM into the spiller. The only way to get into that spiller LRE_DidCloneVirtReg() call is from LiveRangeEdit::eliminateDeadDefs if we split an LI. This patch refuses to reassign a LiveInterval created by a split to workaround the problem. In fact we cannot reassign a spill anyway as all registers of the needed class are occupied and we are spilling. Fixes: SWDEV-267996 Differential Revision: https://reviews.llvm.org/D95489 --- llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp | 8 +++ llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp | 8 +++ llvm/test/CodeGen/AMDGPU/nsa-reassign.mir | 72 ++++++++++++++++++++++ .../test/CodeGen/AMDGPU/regbank-reassign-split.mir | 38 ++++++++++++ 4 files changed, 126 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/nsa-reassign.mir create mode 100644 llvm/test/CodeGen/AMDGPU/regbank-reassign-split.mir diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp index fc7105b..9f98f9a 100644 --- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -190,6 +190,14 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg()) return NSA_Status::FIXED; + // InlineSpiller does not call LRM::assign() after an LI split leaving + // it in an inconsistent state, so we cannot call LRM::unassign(). + // See llvm bug #48911. + // Skip reassign if a register has originated from such split. + // FIXME: Remove the workaround when bug #48911 is fixed. + if (VRM->getPreSplitReg(Reg)) + return NSA_Status::FIXED; + const MachineInstr *Def = MRI->getUniqueVRegDef(Reg); if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg) diff --git a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp index a12e9ab..4fd76ba 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -471,6 +471,14 @@ bool GCNRegBankReassign::isReassignable(Register Reg) const { if (Reg.isPhysical() || !VRM->isAssignedReg(Reg)) return false; + // InlineSpiller does not call LRM::assign() after an LI split leaving it + // in an inconsistent state, so we cannot call LRM::unassign(). + // See llvm bug #48911. + // Skip reassign if a register has originated from such split. + // FIXME: Remove the workaround when bug #48911 is fixed. + if (VRM->getPreSplitReg(Reg)) + return false; + const MachineInstr *Def = MRI->getUniqueVRegDef(Reg); Register PhysReg = VRM->getPhys(Reg); diff --git a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir new file mode 100644 index 0000000..1d1815a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir @@ -0,0 +1,72 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass greedy,amdgpu-nsa-reassign,virtregrewriter,si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_kernel void @nsa_reassign() #0 { ret void } + define amdgpu_kernel void @do_not_reassign_spill() #0 { ret void } + + attributes #0 = { "amdgpu-num-vgpr"="8" } +... + +# GCN-LABEL: name: nsa_reassign +# GCN: IMAGE_SAMPLE_C_L_V1_V8_gfx10 +--- +name: nsa_reassign +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 4 } +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr0' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 2, class: vgpr_32, preferred-register: '$vgpr2' } + - { id: 3, class: vgpr_32, preferred-register: '$vgpr3' } + - { id: 4, class: vgpr_32, preferred-register: '$vgpr4' } + - { id: 5, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 6, class: vgpr_32, preferred-register: '$vgpr6' } + - { id: 7, class: vgpr_32, preferred-register: '$vgpr7' } +body: | + bb.0: + %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource") + S_ENDPGM 0, implicit %7 +... + +# GCN-LABEL: do_not_reassign_spill +# GCN: IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 +--- +name: do_not_reassign_spill +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 4 } +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr0' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 2, class: vgpr_32, preferred-register: '$vgpr2' } + - { id: 3, class: vgpr_32, preferred-register: '$vgpr3' } + - { id: 4, class: vgpr_32, preferred-register: '$vgpr4' } + - { id: 5, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 6, class: vgpr_32, preferred-register: '$vgpr6' } + - { id: 7, class: vgpr_32, preferred-register: '$vgpr7' } +body: | + bb.0: + %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6 + %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource") + S_ENDPGM 0, implicit %7 +... diff --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign-split.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign-split.mir new file mode 100644 index 0000000..8862644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign-split.mir @@ -0,0 +1,38 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass greedy,amdgpu-regbanks-reassign,virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_kernel void @do_not_reassign_spill() #0 { ret void } + + attributes #0 = { "amdgpu-num-vgpr"="8" } +... + +# GCN-LABEL: do_not_reassign_spill{{$}} +# GCN: V_AND_B32_e32 killed $vgpr1, killed $vgpr5, +--- +name: do_not_reassign_spill +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 4 } +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr0' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 2, class: vgpr_32, preferred-register: '$vgpr2' } + - { id: 3, class: vgpr_32, preferred-register: '$vgpr3' } + - { id: 4, class: vgpr_32, preferred-register: '$vgpr4' } + - { id: 5, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 6, class: vgpr_32 } +body: | + bb.0: + %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5 + %6 = V_AND_B32_e32 %1, %5, implicit $exec + S_ENDPGM 0, implicit %6 +... -- 2.7.4