return Simplified;
}
+/// Given a vector binary operation and known undefined elements for each input
+/// operand, compute whether each element of the output is undefined.
+static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
+ const APInt &UndefOp0,
+ const APInt &UndefOp1) {
+ EVT VT = BO.getValueType();
+ assert(ISD::isBinaryOp(BO.getNode()) && VT.isVector() && "Vector binop only");
+
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(UndefOp0.getBitWidth() == NumElts &&
+ UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
+
+ auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
+ const APInt &UndefVals) {
+ if (UndefVals[Index])
+ return DAG.getUNDEF(EltVT);
+
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ // Try hard to make sure that the getNode() call is not creating temporary
+ // nodes. Ignore opaque integers because they do not constant fold.
+ SDValue Elt = BV->getOperand(Index);
+ auto *C = dyn_cast<ConstantSDNode>(Elt);
+ if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
+ return Elt;
+ }
+
+ return SDValue();
+ };
+
+ APInt KnownUndef = APInt::getNullValue(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // If both inputs for this element are either constant or undef and match
+ // the element type, compute the constant/undef result for this element of
+ // the vector.
+ // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
+ // not handle FP constants. The code within getNode() should be refactored
+ // to avoid the danger of creating a bogus temporary node here.
+ SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
+ SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
+ if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
+ if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
+ KnownUndef.setBit(i);
+ }
+ return KnownUndef;
+}
+
bool TargetLowering::SimplifyDemandedVectorElts(
SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
}
break;
}
+
+ // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
+ // MAX, saturated math, etc.
case ISD::OR:
case ISD::XOR:
case ISD::ADD:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM: {
- APInt SrcUndef, SrcZero;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
- SrcZero, TLO, Depth + 1))
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
- KnownZero, TLO, Depth + 1))
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
return true;
- KnownZero &= SrcZero;
- KnownUndef &= SrcUndef;
+
+ KnownZero = ZeroLHS & ZeroRHS;
+ KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
break;
}
case ISD::AND: {
// If either side has a zero element, then the result element is zero, even
// if the other is an UNDEF.
+ // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
+ // and then handle 'and' nodes with the rest of the binop opcodes.
KnownZero |= SrcZero;
KnownUndef &= SrcUndef;
KnownUndef &= ~KnownZero;
;
; AVX-LABEL: add_undef_elts:
; AVX: # %bb.0:
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
-; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,5,4,3,2,1,7]
-; AVX-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%bogus_bo = add <8 x i32> %extend, <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>
;
; AVX-LABEL: sub_undef_elts:
; AVX: # %bb.0:
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,u,42,43,44,12>
-; AVX-NEXT: vpsubd %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,5,4,3,2,6,7]
-; AVX-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%bogus_bo = sub <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>, %extend
define <8 x i32> @xor_undef_elts(<4 x i32> %x) {
; SSE-LABEL: xor_undef_elts:
; SSE: # %bb.0:
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
-; SSE-NEXT: pxor {{.*}}(%rip), %xmm2
-; SSE-NEXT: pxor {{.*}}(%rip), %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[2,0]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[1,0]
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[0,0]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,2],xmm2[2,0]
; SSE-NEXT: retq
;
; AVX-LABEL: xor_undef_elts:
; AVX: # %bb.0:
-; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,0,2]
-; AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; AVX-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
-; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 1, i32 3, i32 0, i32 2, i32 undef, i32 undef>
%bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>