From 3b17cb1506e5d79cfb2b7c0b903395c3f0a1a310 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 2 Dec 2021 14:02:01 -0800 Subject: [PATCH] [AMDGPU] Kill def when folding immediate in two-addr pass Two-address pass works right before RA and if an immediate was folded into an instruction there is nothing to remove the dead def. We end up with something like: v_mov_b32_e32 v14, 0xc1700000 v_mov_b32_e32 v14, 0x41200000 v_fmaak_f32 v51, s67, v19, 0xc1700000 v_fmaak_f32 v38, v51, v19, 0x4120000 The patch kills the dead move instruction right in the folding. Differential Revision: https://reviews.llvm.org/D114999 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 30 +++++++++--- llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll | 2 +- llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir | 62 +++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 92f5322..3d8474a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3106,23 +3106,26 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, } static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, - int64_t &Imm) { + int64_t &Imm, MachineInstr **DefMI = nullptr) { if (Reg.isPhysical()) return false; auto *Def = MRI.getUniqueVRegDef(Reg); if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) { Imm = Def->getOperand(1).getImm(); + if (DefMI) + *DefMI = Def; return true; } return false; } -static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm) { +static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm, + MachineInstr **DefMI = nullptr) { if (!MO->isReg()) return false; const MachineFunction *MF = MO->getParent()->getParent()->getParent(); const MachineRegisterInfo &MRI = MF->getRegInfo(); - return getFoldableImm(MO->getReg(), MRI, Imm); + return getFoldableImm(MO->getReg(), MRI, Imm, DefMI); } static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, @@ -3195,8 +3198,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, // If we have an SGPR input, we will violate the constant bus restriction. (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { + MachineInstr *DefMI; + const auto killDef = [&DefMI, &MBB, this]() -> void { + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + // The only user is the instruction which will be killed. + if (!MRI.hasOneNonDBGUse(DefMI->getOperand(0).getReg())) + return; + // We cannot just remove the DefMI here, calling pass will crash. + DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); + for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) + DefMI->RemoveOperand(I); + }; + int64_t Imm; - if (getFoldableImm(Src2, Imm)) { + if (getFoldableImm(Src2, Imm, &DefMI)) { unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32) : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); @@ -3209,13 +3224,14 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, updateLiveVariables(LV, MI, *MIB); if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *MIB); + killDef(); return MIB; } } unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32) : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); - if (getFoldableImm(Src1, Imm)) { + if (getFoldableImm(Src1, Imm, &DefMI)) { if (pseudoToMCOpcode(NewOpc) != -1) { MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) .add(*Dst) @@ -3225,10 +3241,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, updateLiveVariables(LV, MI, *MIB); if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *MIB); + killDef(); return MIB; } } - if (getFoldableImm(Src0, Imm)) { + if (getFoldableImm(Src0, Imm, &DefMI)) { if (pseudoToMCOpcode(NewOpc) != -1 && isOperandLegal( MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0), @@ -3241,6 +3258,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, updateLiveVariables(LV, MI, *MIB); if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *MIB); + killDef(); return MIB; } } diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll index 375a32a..708b7d9 100644 --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -68,8 +68,8 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3 ; GFX9-NEXT: v_lshlrev_b32_e32 v8, 2, v2 ; GFX9-NEXT: v_add_u32_e32 v9, v17, v12 ; GFX9-NEXT: s_mov_b64 s[10:11], 0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x3727c5ac ; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: ; implicit-def: $vgpr3 ; GFX9-NEXT: .LBB1_2: ; %bb23 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v0 diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir index 343864c..49de226 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir @@ -1,6 +1,8 @@ # RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s # GCN-LABEL: name: test_fmamk_reg_imm_f32 +# GCN: %2:vgpr_32 = IMPLICIT_DEF +# GCN-NOT: V_MOV_B32 # GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f32 @@ -20,6 +22,8 @@ body: | ... # GCN-LABEL: name: test_fmamk_imm_reg_f32 +# GCN: %2:vgpr_32 = IMPLICIT_DEF +# GCN-NOT: V_MOV_B32 # GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_imm_reg_f32 @@ -39,6 +43,8 @@ body: | ... # GCN-LABEL: name: test_fmaak_f32 +# GCN: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NOT: V_MOV_B32 # GCN: V_FMAAK_F32 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec --- name: test_fmaak_f32 @@ -56,6 +62,8 @@ body: | ... # GCN-LABEL: name: test_fmamk_reg_imm_f16 +# GCN: %2:vgpr_32 = IMPLICIT_DEF +# GCN-NOT: V_MOV_B32 # GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16 @@ -75,6 +83,8 @@ body: | ... # GCN-LABEL: name: test_fmamk_imm_reg_f16 +# GCN: %2:vgpr_32 = IMPLICIT_DEF +# GCN-NOT: V_MOV_B32 # GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_imm_reg_f16 @@ -94,6 +104,8 @@ body: | ... # GCN-LABEL: name: test_fmaak_f16 +# GCN: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NOT: V_MOV_B32 # GCN: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec --- name: test_fmaak_f16 @@ -110,6 +122,8 @@ body: | ... # GCN-LABEL: name: test_fmaak_sgpr_src0_f32 +# GCN: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NOT: V_MOV_B32 # GCN: %2:vgpr_32 = V_FMAMK_F32 killed %0, 1078523331, %3:vgpr_32, implicit $mode, implicit $exec --- @@ -129,6 +143,8 @@ body: | ... # GCN-LABEL: name: test_fmaak_inlineimm_src0_f32 +# GCN: %0:vgpr_32 = IMPLICIT_DEF +# GCN-NOT: V_MOV_B32 # GCN: %1:vgpr_32 = V_FMAMK_F32 1073741824, 1078523331, %2:vgpr_32, implicit $mode, implicit $exec --- @@ -183,6 +199,8 @@ body: | ... # GCN-LABEL: name: test_fmaak_inline_literal_f16 +# GCN: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NOT: V_MOV_B32 # GCN: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec --- @@ -201,3 +219,47 @@ body: | ... +# GCN-LABEL: name: test_fmamk_reg_imm_f32_2_folds +# GCN: %2:vgpr_32 = IMPLICIT_DEF +# GCN-NOT: V_MOV_B32 +# GCN: V_FMAMK_F32 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec +# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +--- +name: test_fmamk_reg_imm_f32_2_folds +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit $exec + %3 = V_FMAC_F32_e32 %0.sub0, %2, %1, implicit $mode, implicit $exec + %4 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + +... + +# GCN-LABEL: name: test_fmamk_reg_imm_f32_used_imm +# GCN: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec +# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +--- +name: test_fmamk_reg_imm_f32_used_imm +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit $exec + %3 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + S_NOP 0, implicit %2 + +... -- 2.7.4