From 69932e4d692f20615935db0d48f45b21a89cae23 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 19 Mar 2018 14:07:15 +0000 Subject: [PATCH] AMDGPU: Don't leave dead illegal VGPR->SGPR copies Normally DCE kills these, but at -O0 these get left behind leaving suspicious looking illegal copies. Replace with IMPLICIT_DEF to avoid iterator issues. llvm-svn: 327842 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 7 ++++ .../CodeGen/AMDGPU/control-flow-fastregalloc.ll | 5 +-- llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir | 38 ++++++++++++++++++++-- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 86c5f62..35b1df4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3923,6 +3923,13 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); MRI.clearKillFlags(Inst.getOperand(1).getReg()); Inst.getOperand(0).setReg(DstReg); + + // Make sure we don't leave around a dead VGPR->SGPR copy. Normally + // these are deleted later, but at -O0 it would leave a suspicious + // looking illegal copy of an undef register. + for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) + Inst.RemoveOperand(I); + Inst.setDesc(get(AMDGPU::IMPLICIT_DEF)); continue; } diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 071bcbc..c51b8e0 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -41,16 +41,17 @@ ; GCN: {{^}}BB{{[0-9]+}}_1: ; %if ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] -; GCN: s_waitcnt lgkmcnt(0) ; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload +; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) + ; Spill val register ; GCN: v_add_i32_e32 [[VAL:v[0-9]+]], vcc, [[LOAD1]], [[RELOAD_LOAD0]] ; GCN: buffer_store_dword [[VAL]], off, s[0:3], s7 offset:[[VAL_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; VMEM: [[ENDIF]]: + ; Reload and restore exec mask -; VGPR: s_waitcnt lgkmcnt(0) ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir index f2c7466..94b6055 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir @@ -1,7 +1,6 @@ # RUN: llc -march=amdgcn -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GCN ---- | - define amdgpu_kernel void @phi_visit_order() { ret void } +--- name: phi_visit_order tracksRegLiveness: true @@ -37,4 +36,39 @@ body: | S_BRANCH %bb.1 ... + +--- + +# GCN-LABEL: name: dead_illegal_virtreg_copy +# GCN: %0:vgpr_32 = COPY $vgpr0 +# GCN: %1:sreg_32_xm0 = IMPLICIT_DEF +# GCN: S_ENDPGM implicit %0 + +name: dead_illegal_virtreg_copy +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_32_xm0 = COPY %0 + S_ENDPGM implicit %1 +... + --- + +# GCN-LABEL: name: dead_illegal_physreg_copy +# GCN %2:vgpr_32 = COPY $vgpr0 +# GCN: %1:sreg_32_xm0 = IMPLICIT_DEF +# GCN: S_ENDPGM implicit %2 + +name: dead_illegal_physreg_copy +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + %0:sreg_32_xm0 = COPY $vgpr0 + %1:sreg_32_xm0 = COPY %0 + S_ENDPGM implicit %1 +... -- 2.7.4