[TargetLowering] try harder to determine undef elements of vector binops

author Sanjay Patel <spatel@rotateright.com>

Fri, 1 Feb 2019 15:35:12 +0000 (15:35 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Fri, 1 Feb 2019 15:35:12 +0000 (15:35 +0000)
author Sanjay Patel <spatel@rotateright.com>
Fri, 1 Feb 2019 15:35:12 +0000 (15:35 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Fri, 1 Feb 2019 15:35:12 +0000 (15:35 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index e759089..7c96813 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1433,6 +1433,53 @@ bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
    return Simplified;
  }
  
+/// Given a vector binary operation and known undefined elements for each input
+/// operand, compute whether each element of the output is undefined.
+static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
+                                         const APInt &UndefOp0,
+                                         const APInt &UndefOp1) {
+  EVT VT = BO.getValueType();
+  assert(ISD::isBinaryOp(BO.getNode()) && VT.isVector() && "Vector binop only");
+
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  assert(UndefOp0.getBitWidth() == NumElts &&
+         UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
+
+  auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
+                                   const APInt &UndefVals) {
+    if (UndefVals[Index])
+      return DAG.getUNDEF(EltVT);
+
+    if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+      // Try hard to make sure that the getNode() call is not creating temporary
+      // nodes. Ignore opaque integers because they do not constant fold.
+      SDValue Elt = BV->getOperand(Index);
+      auto *C = dyn_cast<ConstantSDNode>(Elt);
+      if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
+        return Elt;
+    }
+
+    return SDValue();
+  };
+
+  APInt KnownUndef = APInt::getNullValue(NumElts);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    // If both inputs for this element are either constant or undef and match
+    // the element type, compute the constant/undef result for this element of
+    // the vector.
+    // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
+    // not handle FP constants. The code within getNode() should be refactored
+    // to avoid the danger of creating a bogus temporary node here.
+    SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
+    SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
+    if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
+      if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
+        KnownUndef.setBit(i);
+  }
+  return KnownUndef;
+}
+
  bool TargetLowering::SimplifyDemandedVectorElts(
      SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
      APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
@@ -1805,6 +1852,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
      }
      break;
    }
+
+  // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
+  // MAX, saturated math, etc.
    case ISD::OR:
    case ISD::XOR:
    case ISD::ADD:
@@ -1814,15 +1864,17 @@ bool TargetLowering::SimplifyDemandedVectorElts(
    case ISD::FMUL:
    case ISD::FDIV:
    case ISD::FREM: {
-    APInt SrcUndef, SrcZero;
-    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
-                                   SrcZero, TLO, Depth + 1))
+    APInt UndefRHS, ZeroRHS;
+    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+                                   ZeroRHS, TLO, Depth + 1))
        return true;
-    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
-                                   KnownZero, TLO, Depth + 1))
+    APInt UndefLHS, ZeroLHS;
+    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+                                   ZeroLHS, TLO, Depth + 1))
        return true;
-    KnownZero &= SrcZero;
-    KnownUndef &= SrcUndef;
+
+    KnownZero = ZeroLHS & ZeroRHS;
+    KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
      break;
    }
    case ISD::AND: {
@@ -1836,6 +1888,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
  
      // If either side has a zero element, then the result element is zero, even
      // if the other is an UNDEF.
+    // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
+    // and then handle 'and' nodes with the rest of the binop opcodes.
      KnownZero |= SrcZero;
      KnownUndef &= SrcUndef;
      KnownUndef &= ~KnownZero;
diff --git a/llvm/test/CodeGen/X86/vector-partial-undef.ll b/llvm/test/CodeGen/X86/vector-partial-undef.ll

index effa341..69f7ebb 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-partial-undef.ll
+++ b/llvm/test/CodeGen/X86/vector-partial-undef.ll
@@ -51,10 +51,6 @@ define <8 x i32> @add_undef_elts(<4 x i32> %x) {
  ;
  ; AVX-LABEL: add_undef_elts:
  ; AVX:       # %bb.0:
-; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
-; AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,0,5,4,3,2,1,7]
-; AVX-NEXT:    vpermd %ymm0, %ymm1, %ymm0
  ; AVX-NEXT:    retq
    %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    %bogus_bo = add <8 x i32> %extend, <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>
@@ -71,11 +67,6 @@ define <8 x i32> @sub_undef_elts(<4 x i32> %x) {
  ;
  ; AVX-LABEL: sub_undef_elts:
  ; AVX:       # %bb.0:
-; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,u,42,43,44,12>
-; AVX-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
-; AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,5,4,3,2,6,7]
-; AVX-NEXT:    vpermd %ymm0, %ymm1, %ymm0
  ; AVX-NEXT:    retq
    %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    %bogus_bo = sub <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>, %extend
@@ -130,24 +121,10 @@ define <4 x i64> @or_undef_elts(<2 x i64> %x) {
  define <8 x i32> @xor_undef_elts(<4 x i32> %x) {
  ; SSE-LABEL: xor_undef_elts:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
-; SSE-NEXT:    pxor {{.*}}(%rip), %xmm2
-; SSE-NEXT:    pxor {{.*}}(%rip), %xmm1
-; SSE-NEXT:    movdqa %xmm1, %xmm0
-; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[2,0]
-; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[1,0]
-; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[0,0]
-; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,2],xmm2[2,0]
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: xor_undef_elts:
  ; AVX:       # %bb.0:
-; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,3,0,2]
-; AVX-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; AVX-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
-; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
-; AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0
  ; AVX-NEXT:    retq
    %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 1, i32 3, i32 0, i32 2, i32 undef, i32 undef>
    %bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>
author	Sanjay Patel <spatel@rotateright.com>
	Fri, 1 Feb 2019 15:35:12 +0000 (15:35 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Fri, 1 Feb 2019 15:35:12 +0000 (15:35 +0000)
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/vector-partial-undef.ll		patch \| blob \| history