From f6a160a1023079f02e89917c6d7a74f90f12e23c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 5 Oct 2018 17:42:19 +0000 Subject: [PATCH] [SelectionDAG] allow undefs when matching splat constants And use that to transform fsub with zero constant operands. The integer part isn't used yet, but it is proposed for use in D44548, so adding both enhancements here makes that patch simpler. llvm-svn: 343865 --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 4 ++-- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 ++++---- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 ++++----- llvm/test/CodeGen/X86/vec_fneg.ll | 32 +++++++------------------- 4 files changed, 19 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index d482b53..28d27b7 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1602,10 +1602,10 @@ SDValue peekThroughOneUseBitcasts(SDValue V); bool isBitwiseNot(SDValue V); /// Returns the SDNode if it is a constant splat BuildVector or constant int. -ConstantSDNode *isConstOrConstSplat(SDValue N); +ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false); /// Returns the SDNode if it is a constant splat BuildVector or constant float. -ConstantFPSDNode *isConstOrConstSplatFP(SDValue N); +ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false); class GlobalAddressSDNode : public SDNode { friend class SelectionDAG; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3872f2d..ec7f63a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11011,8 +11011,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); - ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -11044,9 +11044,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return DAG.getConstantFP(0.0f, DL, VT); } - // (fsub 0, B) -> -B + // (fsub -0.0, N1) -> -N1 if (N0CFP && N0CFP->isZero()) { - if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { + if (N0CFP->isNegative() || + (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a884331..0f8bd08 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8211,7 +8211,7 @@ bool llvm::isBitwiseNot(SDValue V) { return C && C->isAllOnesValue(); } -ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) { +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) { if (ConstantSDNode *CN = dyn_cast(N)) return CN; @@ -8220,9 +8220,7 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) { ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); // BuildVectors can truncate their operands. Ignore that case here. - // FIXME: We blindly ignore splats which include undef which is overly - // pessimistic. - if (CN && UndefElements.none() && + if (CN && (UndefElements.none() || AllowUndefs) && CN->getValueType(0) == N.getValueType().getScalarType()) return CN; } @@ -8230,15 +8228,14 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) { return nullptr; } -ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) { +ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { if (ConstantFPSDNode *CN = dyn_cast(N)) return CN; if (BuildVectorSDNode *BV = dyn_cast(N)) { BitVector UndefElements; ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); - - if (CN && UndefElements.none()) + if (CN && (UndefElements.none() || AllowUndefs)) return CN; } diff --git a/llvm/test/CodeGen/X86/vec_fneg.ll b/llvm/test/CodeGen/X86/vec_fneg.ll index e9dc88e..e84f716 100644 --- a/llvm/test/CodeGen/X86/vec_fneg.ll +++ b/llvm/test/CodeGen/X86/vec_fneg.ll @@ -102,16 +102,12 @@ define <2 x float> @fneg_bitcast(i64 %i) nounwind { define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) { ; X32-SSE-LABEL: fneg_undef_elts_v4f32: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: movaps {{.*#+}} xmm1 = <-0,u,u,-0> -; X32-SSE-NEXT: subps %xmm0, %xmm1 -; X32-SSE-NEXT: movaps %xmm1, %xmm0 +; X32-SSE-NEXT: xorps {{\.LCPI.*}}, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: fneg_undef_elts_v4f32: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = <-0,u,u,-0> -; X64-SSE-NEXT: subps %xmm0, %xmm1 -; X64-SSE-NEXT: movaps %xmm1, %xmm0 +; X64-SSE-NEXT: xorps {{.*}}(%rip), %xmm0 ; X64-SSE-NEXT: retq %r = fsub <4 x float> , %x ret <4 x float> %r @@ -120,25 +116,13 @@ define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) { ; This isn't fneg, but similarly check that (X - 0.0) is simplified. define <4 x float> @fsub0_undef_elts_v4f32(<4 x float> %x) { -; X32-SSE1-LABEL: fsub0_undef_elts_v4f32: -; X32-SSE1: # %bb.0: -; X32-SSE1-NEXT: retl -; -; X32-SSE2-LABEL: fsub0_undef_elts_v4f32: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: xorps %xmm1, %xmm1 -; X32-SSE2-NEXT: subps %xmm1, %xmm0 -; X32-SSE2-NEXT: retl -; -; X64-SSE1-LABEL: fsub0_undef_elts_v4f32: -; X64-SSE1: # %bb.0: -; X64-SSE1-NEXT: retq +; X32-SSE-LABEL: fsub0_undef_elts_v4f32: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: retl ; -; X64-SSE2-LABEL: fsub0_undef_elts_v4f32: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: xorps %xmm1, %xmm1 -; X64-SSE2-NEXT: subps %xmm1, %xmm0 -; X64-SSE2-NEXT: retq +; X64-SSE-LABEL: fsub0_undef_elts_v4f32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: retq %r = fsub <4 x float> %x, ret <4 x float> %r } -- 2.7.4