From 764ae8bd72aac2cf77cb24d0e1c1136c3179e09b Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 27 Sep 2016 22:28:13 +0000
Subject: [PATCH] [x86] add folds for FP logic with vector zeros

The 'or' case shows up in copysign. The copysign code also had
redundant checking for a scalar zero operand with 'and', so I
removed that.

I'm not sure how to test vector 'and', 'andn', and 'xor' yet,
but it seems better to just include all of the logic ops since
we're fixing 'or' anyway.

llvm-svn: 282546
---
 llvm/lib/Target/X86/X86ISelLowering.cpp            | 51 ++++++++++++++--------
 .../CodeGen/X86/copysign-constant-magnitude.ll     |  8 +---
 2 files changed, 36 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dfb64be..27a49a3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14670,16 +14670,11 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   SDValue MagMask = DAG.getConstantFP(
       APFloat(Sem, ~APInt::getSignBit(SizeInBits)), dl, LogicVT);
 
-  // FIXME: This check shouldn't be necessary. Logic instructions with constant
-  // operands should be folded!
+  // TODO: If we had general constant folding for FP logic ops, this check
+  // wouldn't be necessary.
   SDValue MagBits;
   if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Mag)) {
     APFloat APF = Op0CN->getValueAPF();
-    // If the magnitude is a positive zero, the sign bit alone is enough.
-    if (APF.isPosZero())
-      return IsF128 ? SignBit :
-          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, SignBit,
-                      DAG.getIntPtrConstant(0, dl));
     APF.clearSign();
     MagBits = DAG.getConstantFP(APF, dl, LogicVT);
   } else {
@@ -30495,30 +30490,52 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+
+static bool isNullFPScalarOrVectorConst(SDValue V) {
+  return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode());
+}
+
+/// If a value is a scalar FP zero or a vector FP zero (potentially including
+/// undefined elements), return a zero constant that may be used to fold away
+/// that value. In the case of a vector, the returned constant will not contain
+/// undefined elements even if the input parameter does. This makes it suitable
+/// to be used as a replacement operand with operations (eg, bitwise-and) where
+/// an undef should not propagate.
+static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG,
+                                        const X86Subtarget &Subtarget) {
+  if (!isNullFPScalarOrVectorConst(V))
+    return SDValue();
+
+  if (V.getValueType().isVector())
+    return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V));
+
+  return V;
+}
+
 /// Do target-specific dag combines on X86ISD::FAND nodes.
 static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG,
                            const X86Subtarget &Subtarget) {
   // FAND(0.0, x) -> 0.0
-  if (isNullFPConstant(N->getOperand(0)))
-    return N->getOperand(0);
+  if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget))
+    return V;
 
   // FAND(x, 0.0) -> 0.0
-  if (isNullFPConstant(N->getOperand(1)))
-    return N->getOperand(1);
+  if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
+    return V;
 
   return lowerX86FPLogicOp(N, DAG, Subtarget);
 }
 
-/// Do target-specific dag combines on X86ISD::FANDN nodes
+/// Do target-specific dag combines on X86ISD::FANDN nodes.
 static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,
                             const X86Subtarget &Subtarget) {
   // FANDN(0.0, x) -> x
-  if (isNullFPConstant(N->getOperand(0)))
+  if (isNullFPScalarOrVectorConst(N->getOperand(0)))
     return N->getOperand(1);
 
   // FANDN(x, 0.0) -> 0.0
-  if (isNullFPConstant(N->getOperand(1)))
-    return N->getOperand(1);
+  if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
+    return V;
 
   return lowerX86FPLogicOp(N, DAG, Subtarget);
 }
@@ -30529,11 +30546,11 @@ static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
   assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
 
   // F[X]OR(0.0, x) -> x
-  if (isNullFPConstant(N->getOperand(0)))
+  if (isNullFPScalarOrVectorConst(N->getOperand(0)))
     return N->getOperand(1);
 
   // F[X]OR(x, 0.0) -> x
-  if (isNullFPConstant(N->getOperand(1)))
+  if (isNullFPScalarOrVectorConst(N->getOperand(1)))
     return N->getOperand(0);
 
   if (isFNEG(N))
diff --git a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll
index 4f09ab4..8af0459 100644
--- a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll
+++ b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll
@@ -27,9 +27,7 @@ define double @mag_neg0_double(double %x) nounwind {
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movsd [[SIGNMASK2]](%rip), %xmm1
 ; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0,0]
-; CHECK-NEXT:    andps %xmm0, %xmm1
-; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    andps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 ;
   %y = call double @copysign(double -0.0, double %x)
@@ -95,9 +93,7 @@ define float @mag_neg0_float(float %x) nounwind {
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movss [[SIGNMASK6]](%rip), %xmm1
 ; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; CHECK-NEXT:    andps %xmm0, %xmm1
-; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    andps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 ;
   %y = call float @copysignf(float -0.0, float %x)
-- 
2.7.4