From d6467e00dfccf10aa91e5a1d55d981a9b13642a3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 12 Jun 2021 11:21:57 -0400 Subject: [PATCH] AMDGPU: Fix infinite loop in DAG combine with fneg + fma We were not reporting isFNegFree for v2f32, although it is effectively free after legalization. The generic combine was pulling fneg out of the fma source operands, and the AMDGPU combine was doing the opposite. --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 6 +++--- llvm/test/CodeGen/AMDGPU/fneg-combines.ll | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 895ccef..50873a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -848,9 +848,9 @@ bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { assert(VT.isFloatingPoint()); - return VT == MVT::f32 || VT == MVT::f64 || - (Subtarget->has16BitInsts() && VT == MVT::f16) || - (Subtarget->hasVOP3PInsts() && VT == MVT::v2f16); + // Report this based on the end legalized type. + VT = VT.getScalarType(); + return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16; } bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT, diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 2ed74ac..900d07c 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -2575,8 +2575,31 @@ define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* % ret void } +; The AMDGPU combine to pull fneg into the FMA operands was being +; undone by the generic combine to pull the fneg out of the fma if +; !isFNegFree. We were reporting false for v2f32 even though it will +; be split into f32 where it will be free. +; GCN-LABEL: {{^}}fneg_fma_fneg_dagcombine_loop: +; GCN: s_brev_b32 [[NEGZERO:s[0-9]+]], 1{{$}} +; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], v2, -v4, [[NEGZERO]] +; GCN-DAG: v_fma_f32 [[FMA1:v[0-9]+]], v3, -v5, [[NEGZERO]] +; GCN-DAG: v_sub_f32_e32 [[SUB0:v[0-9]+]], [[FMA0]], v0 +; GCN-DAG: v_sub_f32_e32 [[SUB1:v[0-9]+]], [[FMA1]], v1 +; GCN-DAG: v_mul_f32_e32 v0, [[SUB0]], v4 +; GCN-DAG: v_mul_f32_e32 v1, [[SUB1]], v5 +; GCN: s_setpc_b64 +define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) #0 { +bb: + %i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer) + %i4 = fadd fast <2 x float> %i3, %arg + %i5 = fneg <2 x float> %i4 + %i6 = fmul fast <2 x float> %i5, %arg2 + ret <2 x float> %i6 +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fma.f32(float, float, float) #1 +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) declare float @llvm.fmuladd.f32(float, float, float) #1 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 declare float @llvm.sin.f32(float) #1 @@ -2601,3 +2624,4 @@ declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind "unsafe-fp-math"="true" } +attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" } -- 2.7.4