SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- if (VT.isVector())
- return SDValue();
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
DAG.getUNDEF(CarryVT));
// canonicalize constant to RHS.
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N0C && !N1C)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
// fold (uaddo x, 0) -> x + no carry out
- if (isNullConstant(N1))
+ if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// If it cannot overflow, transform into an add.
}
SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
- auto VT = N0.getValueType();
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
// (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
// If Y + 1 cannot overflow.
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- if (VT.isVector())
- return SDValue();
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
DAG.getConstant(0, DL, CarryVT));
// fold (usubo x, 0) -> x + no borrow
- if (isNullConstant(N1))
+ if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
- if (isAllOnesConstant(N0))
+ if (isAllOnesOrAllOnesSplat(N0))
return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
DAG.getConstant(0, DL, CarryVT));
define <4 x i32> @combine_vec_uadd_zero(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_vec_uadd_zero:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: pmaxud %xmm0, %xmm0
-; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_uadd_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vpmaxud %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%1 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer)
%2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
define <4 x i32> @combine_vec_uadd_not(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_vec_uadd_not:
; SSE: # %bb.0:
-; SSE-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE-NEXT: pxor %xmm2, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm3
-; SSE-NEXT: psubd %xmm2, %xmm3
-; SSE-NEXT: pmaxud %xmm3, %xmm0
-; SSE-NEXT: pcmpeqd %xmm3, %xmm0
-; SSE-NEXT: blendvps %xmm0, %xmm3, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm2
+; SSE-NEXT: psubd %xmm0, %xmm2
+; SSE-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1]
+; SSE-NEXT: pmaxud %xmm2, %xmm0
+; SSE-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_uadd_not:
; AVX: # %bb.0:
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vpmaxud %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX-NEXT: vpmaxud %xmm2, %xmm0, %xmm2
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
+; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%1 = xor <4 x i32> %a0, <i32 -1, i32 -1, i32 -1, i32 -1>
%2 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
define <4 x i32> @combine_vec_usub_zero(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_vec_usub_zero:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: pminud %xmm0, %xmm0
-; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_usub_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vpminud %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer)
%2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
define <4 x i32> @combine_vec_usub_self(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_vec_usub_self:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psubd %xmm0, %xmm2
-; SSE-NEXT: pminud %xmm2, %xmm0
-; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_usub_self:
; AVX: # %bb.0:
-; AVX-NEXT: vpsubd %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vpminud %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0)
%2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_vec_usub_negone:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE-NEXT: pxor %xmm0, %xmm2
-; SSE-NEXT: pminud %xmm2, %xmm0
-; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_usub_negone:
; AVX: # %bb.0:
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpminud %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a0)
%2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0