From a253a2a793cda34d1f6421ee9b7ca76a03fdfc59 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 23 Feb 2020 11:26:28 -0500 Subject: [PATCH] [SDAG] fold fsub -0.0, undef to undef rather than NaN A question about this behavior came up on llvm-dev: http://lists.llvm.org/pipermail/llvm-dev/2020-February/139003.html ...and as part of backend improvements in D73978. We decided not to implement a more general change that would have folded any FP binop with nearly arbitrary constant + undef operand to undef because that is not theoretically correct (even if it is practically correct). This is the SDAG-equivalent to the IR change in D74713. --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 7 ++++++- llvm/test/CodeGen/X86/vec_fneg.ll | 2 -- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e809816..d2db4bc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5112,8 +5112,13 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, } switch (Opcode) { - case ISD::FADD: case ISD::FSUB: + // -0.0 - undef --> undef (consistent with "fneg undef") + if (N1CFP && N1CFP->getValueAPF().isNegZero() && N2.isUndef()) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + + case ISD::FADD: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: diff --git a/llvm/test/CodeGen/X86/vec_fneg.ll b/llvm/test/CodeGen/X86/vec_fneg.ll index 4d5539f..c3c1932 100644 --- a/llvm/test/CodeGen/X86/vec_fneg.ll +++ b/llvm/test/CodeGen/X86/vec_fneg.ll @@ -76,12 +76,10 @@ define <4 x float> @fneg_undef(<4 x float> %Q) nounwind { define <4 x float> @fsub_neg0_undef_elts_undef(<4 x float> %x) { ; X32-SSE-LABEL: fsub_neg0_undef_elts_undef: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: fsub_neg0_undef_elts_undef: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = ; X64-SSE-NEXT: retq %r = fsub <4 x float> , undef ret <4 x float> %r -- 2.7.4