From c24d68fff1fe8d3115c411d6e81092eb1f855b52 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 2 Aug 2022 17:09:58 +0100 Subject: [PATCH] [AMDGPU] Take advantage of VOP3 literals in convertToThreeAddress This improves a corner case where v_fmac can be converted to v_fma on GFX10+ even if it has a literal operand. Differential Revision: https://reviews.llvm.org/D130992 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 5 ++--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll | 7 ++----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 3eaac39..8eed1b0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3416,9 +3416,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, } // VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma - // because VOP3 does not allow a literal operand. - // TODO: Remove this restriction for GFX10. - if (Src0Literal) + // if VOP3 does not allow a literal operand. + if (Src0Literal && !ST.hasVOP3Literal()) return nullptr; unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll index 3dca12d..be3d6e7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll @@ -34,17 +34,14 @@ define float @v_fma_imm(float %a, float %c) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_fmac_legacy_f32_e32 v1, 0x41200000, v0 -; GFX10-NEXT: v_mov_b32_e32 v0, v1 +; GFX10-NEXT: v_fma_legacy_f32 v0, 0x41200000, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_fmac_dx9_zero_f32_e32 v1, 0x41200000, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mov_b32_e32 v0, v1 +; GFX11-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fma = call float @llvm.amdgcn.fma.legacy(float %a, float 10.0, float %c) ret float %fma -- 2.7.4