From 57c37b2dcd5959660ebab63ca049c8de4da116c7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 14 Nov 2017 02:16:54 +0000 Subject: [PATCH] AMDGPU: Fix producing saveexec when the copy is spilled If the register from the copy from exec was spilled, the copy before the spill was deleted leaving a spill of undefined register verifier error and miscompiling. Check for other use instructions of the copy register. llvm-svn: 318132 --- llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | 18 ++- .../AMDGPU/undefined-physreg-sgpr-spill.mir | 144 +++++++++++++++++++++ 2 files changed, 160 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 4c991c7..4f3fb97 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -10,7 +10,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -276,6 +276,8 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { break; } + bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI); + if (J->modifiesRegister(CopyToExec, TRI)) { if (SaveExecInst) { DEBUG(dbgs() << "Multiple instructions modify " @@ -288,7 +290,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END) break; - if (J->readsRegister(CopyFromExec, TRI)) { + if (ReadsCopyFromExec) { SaveExecInst = &*J; DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n'); continue; @@ -296,6 +298,18 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n'); break; } + } else if (ReadsCopyFromExec && !SaveExecInst) { + // Make sure no other instruction is trying to use this copy, before it + // will be rewritten by the saveexec, i.e. hasOneUse. There may have + // been another use, such as an inserted spill. For example: + // + // %sgpr0_sgpr1 = COPY %exec + // spill %sgpr0_sgpr1 + // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1 + // + DEBUG(dbgs() << "Found second use of save inst candidate: " + << *J << '\n'); + break; } if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) { diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir new file mode 100644 index 0000000..a29fd73 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir @@ -0,0 +1,144 @@ +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -run-pass si-optimize-exec-masking -verify-machineinstrs %s +--- | + define amdgpu_kernel void @undefined_physreg_sgpr_spill() #0 { + unreachable + } + + define amdgpu_kernel void @undefined_physreg_sgpr_spill_reorder() #0 { + unreachable + } + + attributes #0 = { nounwind "amdgpu-num-sgpr"="16" } + +... +--- + +# copy + s_and_b64 was turned into saveexec, deleting the copy, +# leaving a spill of the undefined register. + +# CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill: +# CHECK: %sgpr0_sgpr1 = COPY %exec, implicit-def %exec +# CHECK-NEXT: SI_SPILL_S64_SAVE %sgpr0_sgpr1, +# CHECK-NEXT: %sgpr2_sgpr3 = S_AND_B64 killed %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc +# CHECK: %exec = COPY killed %sgpr2_sgpr3 +name: undefined_physreg_sgpr_spill +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: +liveins: + - { reg: '%vgpr0', virtual-reg: '' } + - { reg: '%sgpr4_sgpr5', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4, + stack-id: 1, callee-saved-register: '', callee-saved-restored: true, + di-variable: '', di-expression: '', di-location: '' } +constants: +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %vgpr0, %sgpr4_sgpr5 + + %vgpr1_vgpr2 = COPY killed %sgpr4_sgpr5, implicit %exec + %vgpr1 = GLOBAL_LOAD_UBYTE killed %vgpr1_vgpr2, 0, 0, 0, implicit %exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(2)* undef`) + %vcc = V_CMP_NE_U32_e64 0, %vgpr0, implicit %exec + %sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed %vgpr1, implicit %exec + %vgpr1 = V_CNDMASK_B32_e64 0, -1, killed %sgpr0_sgpr1, implicit %exec + %sgpr0_sgpr1 = COPY %exec, implicit-def %exec + SI_SPILL_S64_SAVE %sgpr0_sgpr1, %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (store 8 into %stack.0, align 4) + %sgpr2_sgpr3 = S_AND_B64 killed %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc + %exec = S_MOV_B64_term killed %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.3(0x80000000) + liveins: %vgpr0, %vgpr1 + + %sgpr2_sgpr3 = S_MOV_B64 0 + %vgpr2 = V_MOV_B32_e32 0, implicit %exec + %sgpr4_sgpr5 = IMPLICIT_DEF + S_BRANCH %bb.3 + + bb.2: + successors: + + %sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (load 8 from %stack.0, align 4) + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + + bb.3: + liveins: %vgpr0, %vgpr1, %vgpr2, %sgpr2_sgpr3, %sgpr4_sgpr5 + + %vcc = COPY %vgpr1 + S_ENDPGM + +... +--- + +# Move spill to after future save instruction +# CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill_reorder: +# CHECK: %sgpr0_sgpr1 = COPY %exec, implicit-def %exec +# CHECK: %exec = COPY killed %sgpr2_sgpr3 + +# CHECK: %sgpr0_sgpr1 = COPY %exec, implicit-def %exec +# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc +# CHECK: SI_SPILL_S64_SAVE killed %sgpr0_sgpr1, %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (store 8 into %stack.0, align 4) +# CHECK: %exec = COPY killed %sgpr2_sgpr3 +name: undefined_physreg_sgpr_spill_reorder +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: +liveins: + - { reg: '%vgpr0', virtual-reg: '' } + - { reg: '%sgpr4_sgpr5', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4, + stack-id: 1, callee-saved-register: '', callee-saved-restored: true, + di-variable: '', di-expression: '', di-location: '' } +constants: +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %vgpr0, %sgpr4_sgpr5 + + %vgpr1_vgpr2 = COPY killed %sgpr4_sgpr5, implicit %exec + %vgpr1 = GLOBAL_LOAD_UBYTE killed %vgpr1_vgpr2, 0, 0, 0, implicit %exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(2)* undef`) + %vcc = V_CMP_NE_U32_e64 0, %vgpr0, implicit %exec + %sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed %vgpr1, implicit %exec + %vgpr1 = V_CNDMASK_B32_e64 0, -1, killed %sgpr0_sgpr1, implicit %exec + %sgpr0_sgpr1 = COPY %exec, implicit-def %exec + %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc + SI_SPILL_S64_SAVE killed %sgpr0_sgpr1, %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (store 8 into %stack.0, align 4) + %exec = S_MOV_B64_term killed %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.3(0x80000000) + liveins: %vgpr0, %vgpr1 + + %sgpr2_sgpr3 = S_MOV_B64 0 + %vgpr2 = V_MOV_B32_e32 0, implicit %exec + %sgpr4_sgpr5 = IMPLICIT_DEF + S_BRANCH %bb.3 + + bb.2: + successors: + + %sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (load 8 from %stack.0, align 4) + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + + bb.3: + liveins: %vgpr0, %vgpr1, %vgpr2, %sgpr2_sgpr3, %sgpr4_sgpr5 + + %vcc = COPY %vgpr1 + S_ENDPGM + +... -- 2.7.4