From 83ba349ae0a853e0c2cd8e8aadc88993e9fb9a19 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 14 Feb 2023 17:49:40 -0500 Subject: [PATCH] [InstSimplify] fix/improve folding with an SNaN operand There are 2 issues here: 1. In the default LLVM FP environment (regular FP math instructions), SNaN is some flavor of "don't care" which we will nail down in D143074, so this is just a quality-of-implementation improvement for default FP. 2. In the constrained FP environment (constrained intrinsics), SNaN must not propagate through a math operation; it has to be quieted according to IEEE-754 spec. That is independent of exception handling mode, so the current behavior is a miscompile. Differential Revision: https://reviews.llvm.org/D143505 --- llvm/include/llvm/ADT/APFloat.h | 8 ++++++++ llvm/lib/Analysis/InstructionSimplify.cpp | 15 +++++++++------ llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 9 ++------- llvm/test/Transforms/InstSimplify/fp-nan.ll | 8 ++++---- llvm/test/Transforms/InstSimplify/strictfp-fadd.ll | 8 ++++++-- 5 files changed, 29 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 402ffba..3823850 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -1136,6 +1136,14 @@ public: return Value; } + /// Assuming this is an IEEE-754 NaN value, quiet its signaling bit. + /// This preserves the sign and payload bits. + APFloat makeQuiet() const { + APFloat Result(*this); + Result.getIEEE().makeQuiet(); + return Result; + } + opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo); opStatus convertToInteger(MutableArrayRef Input, diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 4ca117b..c71ff94 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5309,13 +5309,15 @@ Value *llvm::simplifyFNegInst(Value *Op, FastMathFlags FMF, /// Try to propagate existing NaN values when possible. If not, replace the /// constant or elements in the constant with a canonical NaN. static Constant *propagateNaN(Constant *In) { - if (auto *VecTy = dyn_cast(In->getType())) { + Type *Ty = In->getType(); + if (auto *VecTy = dyn_cast(Ty)) { unsigned NumElts = VecTy->getNumElements(); SmallVector NewC(NumElts); for (unsigned i = 0; i != NumElts; ++i) { Constant *EltC = In->getAggregateElement(i); // Poison and existing NaN elements propagate. // Replace unknown or undef elements with canonical NaN. + // TODO: Quiet a signaling NaN element. if (EltC && (isa(EltC) || EltC->isNaN())) NewC[i] = EltC; else @@ -5324,13 +5326,14 @@ static Constant *propagateNaN(Constant *In) { return ConstantVector::get(NewC); } - // It is not a fixed vector, but not a simple NaN either? + // If it is not a fixed vector, but not a simple NaN either, return a + // canonical NaN. if (!In->isNaN()) - return ConstantFP::getNaN(In->getType()); + return ConstantFP::getNaN(Ty); - // Propagate the existing NaN constant when possible. - // TODO: Should we quiet a signaling NaN? - return In; + // Propagate an existing QNaN constant. If it is an SNaN, make it quiet, but + // preserve the sign/payload. + return ConstantFP::get(Ty, cast(In)->getValue().makeQuiet()); } /// Perform folds that are common to any floating-point operation. This implies diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 62c3eec..a50511e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -981,13 +981,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { if (II.isStrictFP()) break; - if (C && C->isNaN()) { - // FIXME: We just need to make the nan quiet here, but that's unavailable - // on APFloat, only IEEEfloat - auto *Quieted = - ConstantFP::get(Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven)); - return IC.replaceInstUsesWith(II, Quieted); - } + if (C && C->isNaN()) + return IC.replaceInstUsesWith(II, ConstantFP::get(Ty, C->makeQuiet())); // ldexp(x, 0) -> x // ldexp(x, undef) -> x diff --git a/llvm/test/Transforms/InstSimplify/fp-nan.ll b/llvm/test/Transforms/InstSimplify/fp-nan.ll index 1a27972..25d3a0a 100644 --- a/llvm/test/Transforms/InstSimplify/fp-nan.ll +++ b/llvm/test/Transforms/InstSimplify/fp-nan.ll @@ -31,21 +31,21 @@ define float @fsub_nan_op0(float %x) { ret float %r } -; Signaling +; Signaling - make quiet and preserve the payload and signbit define float @fsub_nan_op1(float %x) { ; CHECK-LABEL: @fsub_nan_op1( -; CHECK-NEXT: ret float 0x7FF1000000000000 +; CHECK-NEXT: ret float 0x7FF9000000000000 ; %r = fsub float %x, 0x7FF1000000000000 ret float %r } -; Signaling and signed +; Signaling and signed - make quiet and preserve the payload and signbit define double @fmul_nan_op0(double %x) { ; CHECK-LABEL: @fmul_nan_op0( -; CHECK-NEXT: ret double 0xFFF0000000000001 +; CHECK-NEXT: ret double 0xFFF8000000000001 ; %r = fmul double 0xFFF0000000000001, %x ret double %r diff --git a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll index f36b618..f27e430c 100644 --- a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll +++ b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll @@ -370,17 +370,21 @@ define float @fold_fadd_qnan_qnan_ebstrict() #0 { ret float %add } +; Exceptions are ignored, so this can be folded, but constrained math requires that SNaN is quieted per IEEE-754 spec. + define float @fold_fadd_snan_variable_ebignore(float %x) #0 { ; CHECK-LABEL: @fold_fadd_snan_variable_ebignore( -; CHECK-NEXT: ret float 0x7FF4000000000000 +; CHECK-NEXT: ret float 0x7FFC000000000000 ; %add = call float @llvm.experimental.constrained.fadd.f32(float 0x7ff4000000000000, float %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 ret float %add } +; Exceptions may (not) trap, so this can be folded, but constrained math requires that SNaN is quieted per IEEE-754 spec. + define float @fold_fadd_snan_variable_ebmaytrap(float %x) #0 { ; CHECK-LABEL: @fold_fadd_snan_variable_ebmaytrap( -; CHECK-NEXT: ret float 0x7FF4000000000000 +; CHECK-NEXT: ret float 0x7FFC000000000000 ; %add = call float @llvm.experimental.constrained.fadd.f32(float 0x7ff4000000000000, float %x, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 ret float %add -- 2.7.4