ADDCARRY,
SUBCARRY,
+ /// Carry-using overflow-aware nodes for multiple precision addition and
+ /// subtraction. These nodes take three operands: The first two are normal lhs
+ /// and rhs to the add or sub, and the third is a boolean indicating if there
+ /// is an incoming carry. They produce two results: the normal result of the
+ /// add or sub, and a boolean that indicates if an overflow occured (*not*
+ /// flag, because it may be a store to memory, etc.). If the type of the
+ /// boolean is not i1 then the high bits conform to getBooleanContents.
+ SADDO_CARRY,
+ SSUBO_CARRY,
+
/// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
/// These nodes take two operands: the normal LHS and RHS to the add. They
/// produce two results: the normal result of the add, and a boolean that
SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitADDCARRY(SDNode *N);
+ SDValue visitSADDO_CARRY(SDNode *N);
SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitSUBCARRY(SDNode *N);
+ SDValue visitSSUBO_CARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::ADDCARRY: return visitADDCARRY(N);
+ case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::SUBCARRY: return visitSUBCARRY(N);
+ case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
return SDValue();
}
+SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+ // fold (saddo_carry x, y, false) -> (saddo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
/**
* If we are facing some sort of diamond carry propapagtion pattern try to
* break it up to generate something like:
return SDValue();
}
+SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (ssubo_carry x, y, false) -> (ssubo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
+ return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
// UMULFIXSAT here.
SDValue DAGCombiner::visitMULFIX(SDNode *N) {
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;
+
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
return SDValue(Res.getNode(), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
+ unsigned ResNo) {
+ assert(ResNo == 1 && "Don't know how to promote other results yet.");
+ return PromoteIntRes_Overflow(N);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
case ISD::ROTL:
case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY:
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
case ISD::ADDCARRY:
case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;
+
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+
+ // We need to use an unsigned carry op for the lo part.
+ unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY
+ : ISD::SUBCARRY;
+ Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue RHS = Node->getOperand(1);
SDLoc dl(Node);
- // Expand the result by simply replacing it with the equivalent
- // non-overflow-checking operation.
- SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
- ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
- LHS, RHS);
- SplitInteger(Sum, Lo, Hi);
+ SDValue Ovf;
- // Compute the overflow.
- //
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Sum >= 0
- //
- // Add:
- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
- // Sub:
- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
- //
- EVT OType = Node->getValueType(1);
- SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+ unsigned CarryOp;
+ switch(Node->getOpcode()) {
+ default: llvm_unreachable("Node has unexpected Opcode");
+ case ISD::SADDO: CarryOp = ISD::SADDO_CARRY; break;
+ case ISD::SSUBO: CarryOp = ISD::SSUBO_CARRY; break;
+ }
- SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
- SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
- SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
- Node->getOpcode() == ISD::SADDO ?
- ISD::SETEQ : ISD::SETNE);
+ bool HasCarryOp = TLI.isOperationLegalOrCustom(
+ CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
- SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
- SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+ if (HasCarryOp) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1));
+
+ Lo = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::UADDO : ISD::USUBO, dl, VTList, { LHSL, RHSL });
+ Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
- SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Ovf = Hi.getValue(1);
+ } else {
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ EVT OType = Node->getValueType(1);
+ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ }
// Use the calculated overflow everywhere.
- ReplaceValueWith(SDValue(Node, 1), Cmp);
+ ReplaceValueWith(SDValue(Node, 1), Ovf);
}
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_VSCALE(SDNode *N);
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_PARITY (SDNode *N, SDValue &Lo, SDValue &Hi);
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
case ISD::ADDCARRY: return "addcarry";
+ case ISD::SADDO_CARRY: return "saddo_carry";
case ISD::SADDO: return "saddo";
case ISD::UADDO: return "uaddo";
case ISD::SSUBO: return "ssubo";
case ISD::SUBC: return "subc";
case ISD::SUBE: return "sube";
case ISD::SUBCARRY: return "subcarry";
+ case ISD::SSUBO_CARRY: return "ssubo_carry";
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";
setOperationAction(ISD::ADDCARRY, VT, Expand);
setOperationAction(ISD::SUBCARRY, VT, Expand);
setOperationAction(ISD::SETCCCARRY, VT, Expand);
+ setOperationAction(ISD::SADDO_CARRY, VT, Expand);
+ setOperationAction(ISD::SSUBO_CARRY, VT, Expand);
// ADDC/ADDE/SUBC/SUBE default to expand.
setOperationAction(ISD::ADDC, VT, Expand);
setOperationAction(ISD::ADDCARRY, VT, Custom);
setOperationAction(ISD::SUBCARRY, VT, Custom);
setOperationAction(ISD::SETCCCARRY, VT, Custom);
+ setOperationAction(ISD::SADDO_CARRY, VT, Custom);
+ setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
}
if (!Subtarget.is64Bit()) {
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
MVT VT = N->getSimpleValueType(0);
+ unsigned Opc = Op.getOpcode();
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
Carry, DAG.getAllOnesConstant(DL, CarryVT));
- unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB;
- SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0),
- Op.getOperand(1), Carry.getValue(1));
+ bool IsAdd = Opc == ISD::ADDCARRY || Opc == ISD::SADDO_CARRY;
+ SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs,
+ Op.getOperand(0), Op.getOperand(1),
+ Carry.getValue(1));
- SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG);
+ bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY;
+ SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B,
+ Sum.getValue(1), DL, DAG);
if (N->getValueType(1) == MVT::i1)
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY:
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:
define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind {
; X32-LABEL: knownbits_uaddo_saddo:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: addl %eax, %edx
-; X32-NEXT: setb %bl
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: setns %al
-; X32-NEXT: testl %ecx, %ecx
-; X32-NEXT: setns %cl
-; X32-NEXT: cmpb %al, %cl
-; X32-NEXT: sete %al
-; X32-NEXT: testl %edx, %edx
-; X32-NEXT: setns %dl
-; X32-NEXT: cmpb %dl, %cl
-; X32-NEXT: setne %dl
-; X32-NEXT: andb %al, %dl
-; X32-NEXT: orb %bl, %dl
+; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: setb %al
+; X32-NEXT: seto %dl
+; X32-NEXT: orb %al, %dl
; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: knownbits_uaddo_saddo:
; X32-LABEL: knownbits_usubo_ssubo:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: cmpl %eax, %ecx
-; X32-NEXT: setb %dh
-; X32-NEXT: setns %dl
-; X32-NEXT: testl %ecx, %ecx
-; X32-NEXT: setns %cl
-; X32-NEXT: cmpb %dl, %cl
-; X32-NEXT: setne %ch
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: setns %al
-; X32-NEXT: cmpb %al, %cl
-; X32-NEXT: setne %dl
-; X32-NEXT: andb %ch, %dl
-; X32-NEXT: orb %dh, %dl
+; X32-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: setb %al
+; X32-NEXT: seto %dl
+; X32-NEXT: orb %al, %dl
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: retl
;
define i64 @func2(i64 %x, i64 %y) nounwind {
; X86-LABEL: func2:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: adcl %esi, %ebp
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: seto %bl
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: testl %ebp, %ebp
-; X86-NEXT: setns %cl
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: setns %bl
-; X86-NEXT: cmpb %cl, %bl
-; X86-NEXT: setne %cl
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: xorl %edx, %edx
; X86-NEXT: testl %esi, %esi
-; X86-NEXT: setns %ch
-; X86-NEXT: cmpb %ch, %bl
-; X86-NEXT: sete %ch
-; X86-NEXT: testb %cl, %ch
-; X86-NEXT: cmovel %ebp, %edx
-; X86-NEXT: cmovel %edi, %eax
+; X86-NEXT: setns %dl
+; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: cmovel %esi, %edx
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: func2:
define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-LABEL: func64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: adcl %esi, %ebp
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: seto %bl
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: testl %ebp, %ebp
-; X86-NEXT: setns %cl
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: setns %bl
-; X86-NEXT: cmpb %cl, %bl
-; X86-NEXT: setne %cl
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: xorl %edx, %edx
; X86-NEXT: testl %esi, %esi
-; X86-NEXT: setns %ch
-; X86-NEXT: cmpb %ch, %bl
-; X86-NEXT: sete %ch
-; X86-NEXT: testb %cl, %ch
-; X86-NEXT: cmovel %ebp, %edx
-; X86-NEXT: cmovel %edi, %eax
+; X86-NEXT: setns %dl
+; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: cmovel %esi, %edx
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: func64:
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-LABEL: v2i128:
; SSE: # %bb.0:
-; SSE-NEXT: pushq %r15
-; SSE-NEXT: pushq %r14
-; SSE-NEXT: pushq %r13
-; SSE-NEXT: pushq %r12
; SSE-NEXT: pushq %rbx
; SSE-NEXT: movq %rdi, %rax
-; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11
-; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
-; SSE-NEXT: movq %r8, %r13
-; SSE-NEXT: adcq %r14, %r13
-; SSE-NEXT: movq %r13, %r10
-; SSE-NEXT: sarq $63, %r10
-; SSE-NEXT: xorl %edi, %edi
-; SSE-NEXT: testq %r13, %r13
-; SSE-NEXT: setns %dil
-; SSE-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
-; SSE-NEXT: leaq (%rdi,%r12), %r15
+; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT: seto %r10b
+; SSE-NEXT: movq %r8, %rbx
+; SSE-NEXT: sarq $63, %rbx
+; SSE-NEXT: testb %r10b, %r10b
+; SSE-NEXT: cmoveq %rcx, %rbx
+; SSE-NEXT: xorl %ecx, %ecx
; SSE-NEXT: testq %r8, %r8
-; SSE-NEXT: setns %r8b
-; SSE-NEXT: cmpb %dil, %r8b
-; SSE-NEXT: setne %dil
-; SSE-NEXT: testq %r14, %r14
-; SSE-NEXT: setns %bl
-; SSE-NEXT: cmpb %bl, %r8b
-; SSE-NEXT: sete %bl
-; SSE-NEXT: testb %dil, %bl
-; SSE-NEXT: cmoveq %r13, %r15
-; SSE-NEXT: cmoveq %rcx, %r10
+; SSE-NEXT: setns %cl
+; SSE-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
+; SSE-NEXT: addq %r11, %rcx
+; SSE-NEXT: testb %r10b, %r10b
+; SSE-NEXT: cmoveq %r8, %rcx
; SSE-NEXT: addq %r9, %rsi
+; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT: seto %r8b
; SSE-NEXT: movq %rdx, %rdi
-; SSE-NEXT: adcq %r11, %rdi
-; SSE-NEXT: setns %bl
-; SSE-NEXT: movzbl %bl, %ebx
-; SSE-NEXT: addq %rbx, %r12
-; SSE-NEXT: movq %rdi, %rcx
-; SSE-NEXT: sarq $63, %rcx
-; SSE-NEXT: testq %r11, %r11
-; SSE-NEXT: setns %r8b
+; SSE-NEXT: sarq $63, %rdi
+; SSE-NEXT: testb %r8b, %r8b
+; SSE-NEXT: cmoveq %rsi, %rdi
+; SSE-NEXT: xorl %esi, %esi
; SSE-NEXT: testq %rdx, %rdx
-; SSE-NEXT: setns %dl
-; SSE-NEXT: cmpb %r8b, %dl
-; SSE-NEXT: sete %r8b
-; SSE-NEXT: cmpb %bl, %dl
-; SSE-NEXT: setne %dl
-; SSE-NEXT: testb %dl, %r8b
-; SSE-NEXT: cmoveq %rsi, %rcx
-; SSE-NEXT: cmoveq %rdi, %r12
-; SSE-NEXT: movq %r15, 24(%rax)
-; SSE-NEXT: movq %r10, 16(%rax)
-; SSE-NEXT: movq %r12, 8(%rax)
-; SSE-NEXT: movq %rcx, (%rax)
+; SSE-NEXT: setns %sil
+; SSE-NEXT: addq %r11, %rsi
+; SSE-NEXT: testb %r8b, %r8b
+; SSE-NEXT: cmoveq %rdx, %rsi
+; SSE-NEXT: movq %rbx, 16(%rax)
+; SSE-NEXT: movq %rdi, (%rax)
+; SSE-NEXT: movq %rcx, 24(%rax)
+; SSE-NEXT: movq %rsi, 8(%rax)
; SSE-NEXT: popq %rbx
-; SSE-NEXT: popq %r12
-; SSE-NEXT: popq %r13
-; SSE-NEXT: popq %r14
-; SSE-NEXT: popq %r15
; SSE-NEXT: retq
;
; AVX-LABEL: v2i128:
; AVX: # %bb.0:
-; AVX-NEXT: pushq %r15
-; AVX-NEXT: pushq %r14
-; AVX-NEXT: pushq %r13
-; AVX-NEXT: pushq %r12
; AVX-NEXT: pushq %rbx
; AVX-NEXT: movq %rdi, %rax
-; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r11
-; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r14
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
-; AVX-NEXT: movq %r8, %r13
-; AVX-NEXT: adcq %r14, %r13
-; AVX-NEXT: movq %r13, %r10
-; AVX-NEXT: sarq $63, %r10
-; AVX-NEXT: xorl %edi, %edi
-; AVX-NEXT: testq %r13, %r13
-; AVX-NEXT: setns %dil
-; AVX-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
-; AVX-NEXT: leaq (%rdi,%r12), %r15
+; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
+; AVX-NEXT: seto %r10b
+; AVX-NEXT: movq %r8, %rbx
+; AVX-NEXT: sarq $63, %rbx
+; AVX-NEXT: testb %r10b, %r10b
+; AVX-NEXT: cmoveq %rcx, %rbx
+; AVX-NEXT: xorl %ecx, %ecx
; AVX-NEXT: testq %r8, %r8
-; AVX-NEXT: setns %r8b
-; AVX-NEXT: cmpb %dil, %r8b
-; AVX-NEXT: setne %dil
-; AVX-NEXT: testq %r14, %r14
-; AVX-NEXT: setns %bl
-; AVX-NEXT: cmpb %bl, %r8b
-; AVX-NEXT: sete %bl
-; AVX-NEXT: testb %dil, %bl
-; AVX-NEXT: cmoveq %r13, %r15
-; AVX-NEXT: cmoveq %rcx, %r10
+; AVX-NEXT: setns %cl
+; AVX-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
+; AVX-NEXT: addq %r11, %rcx
+; AVX-NEXT: testb %r10b, %r10b
+; AVX-NEXT: cmoveq %r8, %rcx
; AVX-NEXT: addq %r9, %rsi
+; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
+; AVX-NEXT: seto %r8b
; AVX-NEXT: movq %rdx, %rdi
-; AVX-NEXT: adcq %r11, %rdi
-; AVX-NEXT: setns %bl
-; AVX-NEXT: movzbl %bl, %ebx
-; AVX-NEXT: addq %rbx, %r12
-; AVX-NEXT: movq %rdi, %rcx
-; AVX-NEXT: sarq $63, %rcx
-; AVX-NEXT: testq %r11, %r11
-; AVX-NEXT: setns %r8b
+; AVX-NEXT: sarq $63, %rdi
+; AVX-NEXT: testb %r8b, %r8b
+; AVX-NEXT: cmoveq %rsi, %rdi
+; AVX-NEXT: xorl %esi, %esi
; AVX-NEXT: testq %rdx, %rdx
-; AVX-NEXT: setns %dl
-; AVX-NEXT: cmpb %r8b, %dl
-; AVX-NEXT: sete %r8b
-; AVX-NEXT: cmpb %bl, %dl
-; AVX-NEXT: setne %dl
-; AVX-NEXT: testb %dl, %r8b
-; AVX-NEXT: cmoveq %rsi, %rcx
-; AVX-NEXT: cmoveq %rdi, %r12
-; AVX-NEXT: movq %r15, 24(%rax)
-; AVX-NEXT: movq %r10, 16(%rax)
-; AVX-NEXT: movq %r12, 8(%rax)
-; AVX-NEXT: movq %rcx, (%rax)
+; AVX-NEXT: setns %sil
+; AVX-NEXT: addq %r11, %rsi
+; AVX-NEXT: testb %r8b, %r8b
+; AVX-NEXT: cmoveq %rdx, %rsi
+; AVX-NEXT: movq %rbx, 16(%rax)
+; AVX-NEXT: movq %rdi, (%rax)
+; AVX-NEXT: movq %rcx, 24(%rax)
+; AVX-NEXT: movq %rsi, 8(%rax)
; AVX-NEXT: popq %rbx
-; AVX-NEXT: popq %r12
-; AVX-NEXT: popq %r13
-; AVX-NEXT: popq %r14
-; AVX-NEXT: popq %r15
; AVX-NEXT: retq
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
ret <2 x i128> %z
define i64 @func2(i64 %x, i64 %y) nounwind {
; X86-LABEL: func2:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: seto %bl
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: testl %ebp, %ebp
-; X86-NEXT: setns %cl
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: setns %bl
-; X86-NEXT: cmpb %cl, %bl
-; X86-NEXT: setne %cl
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: xorl %edx, %edx
; X86-NEXT: testl %esi, %esi
-; X86-NEXT: setns %ch
-; X86-NEXT: cmpb %ch, %bl
-; X86-NEXT: setne %ch
-; X86-NEXT: testb %cl, %ch
-; X86-NEXT: cmovel %ebp, %edx
-; X86-NEXT: cmovel %edi, %eax
+; X86-NEXT: setns %dl
+; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: cmovel %esi, %edx
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: func2:
define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-LABEL: func64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: seto %bl
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: testl %ebp, %ebp
-; X86-NEXT: setns %cl
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: setns %bl
-; X86-NEXT: cmpb %cl, %bl
-; X86-NEXT: setne %cl
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: xorl %edx, %edx
; X86-NEXT: testl %esi, %esi
-; X86-NEXT: setns %ch
-; X86-NEXT: cmpb %ch, %bl
-; X86-NEXT: setne %ch
-; X86-NEXT: testb %cl, %ch
-; X86-NEXT: cmovel %ebp, %edx
-; X86-NEXT: cmovel %edi, %eax
+; X86-NEXT: setns %dl
+; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: cmovel %esi, %edx
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: func64:
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-LABEL: v2i128:
; SSE: # %bb.0:
-; SSE-NEXT: pushq %r15
-; SSE-NEXT: pushq %r14
-; SSE-NEXT: pushq %r13
-; SSE-NEXT: pushq %r12
; SSE-NEXT: pushq %rbx
; SSE-NEXT: movq %rdi, %rax
-; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11
-; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14
; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx
-; SSE-NEXT: movq %r8, %r13
-; SSE-NEXT: sbbq %r14, %r13
-; SSE-NEXT: movq %r13, %r10
-; SSE-NEXT: sarq $63, %r10
-; SSE-NEXT: xorl %edi, %edi
-; SSE-NEXT: testq %r13, %r13
-; SSE-NEXT: setns %dil
-; SSE-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
-; SSE-NEXT: leaq (%rdi,%r12), %r15
+; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT: seto %r10b
+; SSE-NEXT: movq %r8, %rbx
+; SSE-NEXT: sarq $63, %rbx
+; SSE-NEXT: testb %r10b, %r10b
+; SSE-NEXT: cmoveq %rcx, %rbx
+; SSE-NEXT: xorl %ecx, %ecx
; SSE-NEXT: testq %r8, %r8
-; SSE-NEXT: setns %r8b
-; SSE-NEXT: cmpb %dil, %r8b
-; SSE-NEXT: setne %dil
-; SSE-NEXT: testq %r14, %r14
-; SSE-NEXT: setns %bl
-; SSE-NEXT: cmpb %bl, %r8b
-; SSE-NEXT: setne %bl
-; SSE-NEXT: testb %dil, %bl
-; SSE-NEXT: cmoveq %r13, %r15
-; SSE-NEXT: cmoveq %rcx, %r10
+; SSE-NEXT: setns %cl
+; SSE-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
+; SSE-NEXT: addq %r11, %rcx
+; SSE-NEXT: testb %r10b, %r10b
+; SSE-NEXT: cmoveq %r8, %rcx
; SSE-NEXT: subq %r9, %rsi
+; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT: seto %r8b
; SSE-NEXT: movq %rdx, %rdi
-; SSE-NEXT: sbbq %r11, %rdi
-; SSE-NEXT: setns %bl
-; SSE-NEXT: movzbl %bl, %ebx
-; SSE-NEXT: addq %rbx, %r12
-; SSE-NEXT: movq %rdi, %rcx
-; SSE-NEXT: sarq $63, %rcx
-; SSE-NEXT: testq %r11, %r11
-; SSE-NEXT: setns %r8b
+; SSE-NEXT: sarq $63, %rdi
+; SSE-NEXT: testb %r8b, %r8b
+; SSE-NEXT: cmoveq %rsi, %rdi
+; SSE-NEXT: xorl %esi, %esi
; SSE-NEXT: testq %rdx, %rdx
-; SSE-NEXT: setns %dl
-; SSE-NEXT: cmpb %r8b, %dl
-; SSE-NEXT: setne %r8b
-; SSE-NEXT: cmpb %bl, %dl
-; SSE-NEXT: setne %dl
-; SSE-NEXT: testb %dl, %r8b
-; SSE-NEXT: cmoveq %rsi, %rcx
-; SSE-NEXT: cmoveq %rdi, %r12
-; SSE-NEXT: movq %r15, 24(%rax)
-; SSE-NEXT: movq %r10, 16(%rax)
-; SSE-NEXT: movq %r12, 8(%rax)
-; SSE-NEXT: movq %rcx, (%rax)
+; SSE-NEXT: setns %sil
+; SSE-NEXT: addq %r11, %rsi
+; SSE-NEXT: testb %r8b, %r8b
+; SSE-NEXT: cmoveq %rdx, %rsi
+; SSE-NEXT: movq %rbx, 16(%rax)
+; SSE-NEXT: movq %rdi, (%rax)
+; SSE-NEXT: movq %rcx, 24(%rax)
+; SSE-NEXT: movq %rsi, 8(%rax)
; SSE-NEXT: popq %rbx
-; SSE-NEXT: popq %r12
-; SSE-NEXT: popq %r13
-; SSE-NEXT: popq %r14
-; SSE-NEXT: popq %r15
; SSE-NEXT: retq
;
; AVX-LABEL: v2i128:
; AVX: # %bb.0:
-; AVX-NEXT: pushq %r15
-; AVX-NEXT: pushq %r14
-; AVX-NEXT: pushq %r13
-; AVX-NEXT: pushq %r12
; AVX-NEXT: pushq %rbx
; AVX-NEXT: movq %rdi, %rax
-; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r11
-; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r14
; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx
-; AVX-NEXT: movq %r8, %r13
-; AVX-NEXT: sbbq %r14, %r13
-; AVX-NEXT: movq %r13, %r10
-; AVX-NEXT: sarq $63, %r10
-; AVX-NEXT: xorl %edi, %edi
-; AVX-NEXT: testq %r13, %r13
-; AVX-NEXT: setns %dil
-; AVX-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
-; AVX-NEXT: leaq (%rdi,%r12), %r15
+; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
+; AVX-NEXT: seto %r10b
+; AVX-NEXT: movq %r8, %rbx
+; AVX-NEXT: sarq $63, %rbx
+; AVX-NEXT: testb %r10b, %r10b
+; AVX-NEXT: cmoveq %rcx, %rbx
+; AVX-NEXT: xorl %ecx, %ecx
; AVX-NEXT: testq %r8, %r8
-; AVX-NEXT: setns %r8b
-; AVX-NEXT: cmpb %dil, %r8b
-; AVX-NEXT: setne %dil
-; AVX-NEXT: testq %r14, %r14
-; AVX-NEXT: setns %bl
-; AVX-NEXT: cmpb %bl, %r8b
-; AVX-NEXT: setne %bl
-; AVX-NEXT: testb %dil, %bl
-; AVX-NEXT: cmoveq %r13, %r15
-; AVX-NEXT: cmoveq %rcx, %r10
+; AVX-NEXT: setns %cl
+; AVX-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
+; AVX-NEXT: addq %r11, %rcx
+; AVX-NEXT: testb %r10b, %r10b
+; AVX-NEXT: cmoveq %r8, %rcx
; AVX-NEXT: subq %r9, %rsi
+; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
+; AVX-NEXT: seto %r8b
; AVX-NEXT: movq %rdx, %rdi
-; AVX-NEXT: sbbq %r11, %rdi
-; AVX-NEXT: setns %bl
-; AVX-NEXT: movzbl %bl, %ebx
-; AVX-NEXT: addq %rbx, %r12
-; AVX-NEXT: movq %rdi, %rcx
-; AVX-NEXT: sarq $63, %rcx
-; AVX-NEXT: testq %r11, %r11
-; AVX-NEXT: setns %r8b
+; AVX-NEXT: sarq $63, %rdi
+; AVX-NEXT: testb %r8b, %r8b
+; AVX-NEXT: cmoveq %rsi, %rdi
+; AVX-NEXT: xorl %esi, %esi
; AVX-NEXT: testq %rdx, %rdx
-; AVX-NEXT: setns %dl
-; AVX-NEXT: cmpb %r8b, %dl
-; AVX-NEXT: setne %r8b
-; AVX-NEXT: cmpb %bl, %dl
-; AVX-NEXT: setne %dl
-; AVX-NEXT: testb %dl, %r8b
-; AVX-NEXT: cmoveq %rsi, %rcx
-; AVX-NEXT: cmoveq %rdi, %r12
-; AVX-NEXT: movq %r15, 24(%rax)
-; AVX-NEXT: movq %r10, 16(%rax)
-; AVX-NEXT: movq %r12, 8(%rax)
-; AVX-NEXT: movq %rcx, (%rax)
+; AVX-NEXT: setns %sil
+; AVX-NEXT: addq %r11, %rsi
+; AVX-NEXT: testb %r8b, %r8b
+; AVX-NEXT: cmoveq %rdx, %rsi
+; AVX-NEXT: movq %rbx, 16(%rax)
+; AVX-NEXT: movq %rdi, (%rax)
+; AVX-NEXT: movq %rcx, 24(%rax)
+; AVX-NEXT: movq %rsi, 8(%rax)
; AVX-NEXT: popq %rbx
-; AVX-NEXT: popq %r12
-; AVX-NEXT: popq %r13
-; AVX-NEXT: popq %r14
-; AVX-NEXT: popq %r15
; AVX-NEXT: retq
%z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
ret <2 x i128> %z
define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
; SSE2-LABEL: saddo_v2i128:
; SSE2: # %bb.0:
-; SSE2-NEXT: pushq %rbp
-; SSE2-NEXT: pushq %rbx
-; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; SSE2-NEXT: testq %r9, %r9
-; SSE2-NEXT: setns %al
-; SSE2-NEXT: testq %rsi, %rsi
-; SSE2-NEXT: setns %bl
-; SSE2-NEXT: cmpb %al, %bl
-; SSE2-NEXT: sete %bpl
; SSE2-NEXT: addq %r8, %rdi
; SSE2-NEXT: adcq %r9, %rsi
-; SSE2-NEXT: setns %al
-; SSE2-NEXT: cmpb %al, %bl
-; SSE2-NEXT: setne %al
-; SSE2-NEXT: andb %bpl, %al
+; SSE2-NEXT: seto %r8b
; SSE2-NEXT: addq {{[0-9]+}}(%rsp), %rdx
-; SSE2-NEXT: movq %rcx, %rbp
-; SSE2-NEXT: adcq %r10, %rbp
-; SSE2-NEXT: setns %bl
-; SSE2-NEXT: testq %rcx, %rcx
-; SSE2-NEXT: setns %cl
-; SSE2-NEXT: cmpb %bl, %cl
-; SSE2-NEXT: setne %r8b
-; SSE2-NEXT: testq %r10, %r10
-; SSE2-NEXT: setns %bl
-; SSE2-NEXT: cmpb %bl, %cl
-; SSE2-NEXT: sete %cl
-; SSE2-NEXT: andb %r8b, %cl
-; SSE2-NEXT: movzbl %cl, %ecx
-; SSE2-NEXT: negl %ecx
-; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
+; SSE2-NEXT: seto %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: negl %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movzbl %r8b, %eax
+; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movq %rdx, 16(%r11)
-; SSE2-NEXT: movq %rdi, (%r11)
-; SSE2-NEXT: movq %rbp, 24(%r11)
-; SSE2-NEXT: movq %rsi, 8(%r11)
-; SSE2-NEXT: popq %rbx
-; SSE2-NEXT: popq %rbp
+; SSE2-NEXT: movq %rdx, 16(%r10)
+; SSE2-NEXT: movq %rdi, (%r10)
+; SSE2-NEXT: movq %rcx, 24(%r10)
+; SSE2-NEXT: movq %rsi, 8(%r10)
; SSE2-NEXT: retq
;
; SSSE3-LABEL: saddo_v2i128:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: pushq %rbp
-; SSSE3-NEXT: pushq %rbx
-; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; SSSE3-NEXT: testq %r9, %r9
-; SSSE3-NEXT: setns %al
-; SSSE3-NEXT: testq %rsi, %rsi
-; SSSE3-NEXT: setns %bl
-; SSSE3-NEXT: cmpb %al, %bl
-; SSSE3-NEXT: sete %bpl
; SSSE3-NEXT: addq %r8, %rdi
; SSSE3-NEXT: adcq %r9, %rsi
-; SSSE3-NEXT: setns %al
-; SSSE3-NEXT: cmpb %al, %bl
-; SSSE3-NEXT: setne %al
-; SSSE3-NEXT: andb %bpl, %al
+; SSSE3-NEXT: seto %r8b
; SSSE3-NEXT: addq {{[0-9]+}}(%rsp), %rdx
-; SSSE3-NEXT: movq %rcx, %rbp
-; SSSE3-NEXT: adcq %r10, %rbp
-; SSSE3-NEXT: setns %bl
-; SSSE3-NEXT: testq %rcx, %rcx
-; SSSE3-NEXT: setns %cl
-; SSSE3-NEXT: cmpb %bl, %cl
-; SSSE3-NEXT: setne %r8b
-; SSSE3-NEXT: testq %r10, %r10
-; SSSE3-NEXT: setns %bl
-; SSSE3-NEXT: cmpb %bl, %cl
-; SSSE3-NEXT: sete %cl
-; SSSE3-NEXT: andb %r8b, %cl
-; SSSE3-NEXT: movzbl %cl, %ecx
-; SSSE3-NEXT: negl %ecx
-; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
+; SSSE3-NEXT: seto %al
; SSSE3-NEXT: movzbl %al, %eax
; SSSE3-NEXT: negl %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: movzbl %r8b, %eax
+; SSSE3-NEXT: negl %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT: movq %rdx, 16(%r11)
-; SSSE3-NEXT: movq %rdi, (%r11)
-; SSSE3-NEXT: movq %rbp, 24(%r11)
-; SSSE3-NEXT: movq %rsi, 8(%r11)
-; SSSE3-NEXT: popq %rbx
-; SSSE3-NEXT: popq %rbp
+; SSSE3-NEXT: movq %rdx, 16(%r10)
+; SSSE3-NEXT: movq %rdi, (%r10)
+; SSSE3-NEXT: movq %rcx, 24(%r10)
+; SSSE3-NEXT: movq %rsi, 8(%r10)
; SSSE3-NEXT: retq
;
; SSE41-LABEL: saddo_v2i128:
; SSE41: # %bb.0:
-; SSE41-NEXT: pushq %rbp
-; SSE41-NEXT: pushq %rbx
-; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; SSE41-NEXT: testq %r9, %r9
-; SSE41-NEXT: setns %al
-; SSE41-NEXT: testq %rsi, %rsi
-; SSE41-NEXT: setns %bl
-; SSE41-NEXT: cmpb %al, %bl
-; SSE41-NEXT: sete %bpl
; SSE41-NEXT: addq %r8, %rdi
; SSE41-NEXT: adcq %r9, %rsi
-; SSE41-NEXT: setns %al
-; SSE41-NEXT: cmpb %al, %bl
-; SSE41-NEXT: setne %al
-; SSE41-NEXT: andb %bpl, %al
+; SSE41-NEXT: seto %r8b
; SSE41-NEXT: addq {{[0-9]+}}(%rsp), %rdx
-; SSE41-NEXT: movq %rcx, %rbp
-; SSE41-NEXT: adcq %r10, %rbp
-; SSE41-NEXT: setns %bl
-; SSE41-NEXT: testq %rcx, %rcx
-; SSE41-NEXT: setns %cl
-; SSE41-NEXT: cmpb %bl, %cl
-; SSE41-NEXT: setne %r8b
-; SSE41-NEXT: testq %r10, %r10
-; SSE41-NEXT: setns %bl
-; SSE41-NEXT: cmpb %bl, %cl
-; SSE41-NEXT: sete %cl
-; SSE41-NEXT: andb %r8b, %cl
-; SSE41-NEXT: movzbl %cl, %ecx
-; SSE41-NEXT: negl %ecx
-; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
+; SSE41-NEXT: seto %al
+; SSE41-NEXT: movzbl %al, %r9d
+; SSE41-NEXT: negl %r9d
+; SSE41-NEXT: movzbl %r8b, %eax
; SSE41-NEXT: negl %eax
; SSE41-NEXT: movd %eax, %xmm0
-; SSE41-NEXT: pinsrd $1, %ecx, %xmm0
-; SSE41-NEXT: movq %rdx, 16(%r11)
-; SSE41-NEXT: movq %rdi, (%r11)
-; SSE41-NEXT: movq %rbp, 24(%r11)
-; SSE41-NEXT: movq %rsi, 8(%r11)
-; SSE41-NEXT: popq %rbx
-; SSE41-NEXT: popq %rbp
+; SSE41-NEXT: pinsrd $1, %r9d, %xmm0
+; SSE41-NEXT: movq %rdx, 16(%r10)
+; SSE41-NEXT: movq %rdi, (%r10)
+; SSE41-NEXT: movq %rcx, 24(%r10)
+; SSE41-NEXT: movq %rsi, 8(%r10)
; SSE41-NEXT: retq
;
; AVX1-LABEL: saddo_v2i128:
; AVX1: # %bb.0:
-; AVX1-NEXT: pushq %rbp
-; AVX1-NEXT: pushq %rbx
-; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX1-NEXT: testq %r9, %r9
-; AVX1-NEXT: setns %al
-; AVX1-NEXT: testq %rsi, %rsi
-; AVX1-NEXT: setns %bl
-; AVX1-NEXT: cmpb %al, %bl
-; AVX1-NEXT: sete %bpl
; AVX1-NEXT: addq %r8, %rdi
; AVX1-NEXT: adcq %r9, %rsi
-; AVX1-NEXT: setns %al
-; AVX1-NEXT: cmpb %al, %bl
-; AVX1-NEXT: setne %al
-; AVX1-NEXT: andb %bpl, %al
+; AVX1-NEXT: seto %r8b
; AVX1-NEXT: addq {{[0-9]+}}(%rsp), %rdx
-; AVX1-NEXT: movq %rcx, %rbp
-; AVX1-NEXT: adcq %r10, %rbp
-; AVX1-NEXT: setns %bl
-; AVX1-NEXT: testq %rcx, %rcx
-; AVX1-NEXT: setns %cl
-; AVX1-NEXT: cmpb %bl, %cl
-; AVX1-NEXT: setne %r8b
-; AVX1-NEXT: testq %r10, %r10
-; AVX1-NEXT: setns %bl
-; AVX1-NEXT: cmpb %bl, %cl
-; AVX1-NEXT: sete %cl
-; AVX1-NEXT: andb %r8b, %cl
-; AVX1-NEXT: movzbl %cl, %ecx
-; AVX1-NEXT: negl %ecx
-; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
+; AVX1-NEXT: seto %al
+; AVX1-NEXT: movzbl %al, %r9d
+; AVX1-NEXT: negl %r9d
+; AVX1-NEXT: movzbl %r8b, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: vmovd %eax, %xmm0
-; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT: movq %rdx, 16(%r11)
-; AVX1-NEXT: movq %rdi, (%r11)
-; AVX1-NEXT: movq %rbp, 24(%r11)
-; AVX1-NEXT: movq %rsi, 8(%r11)
-; AVX1-NEXT: popq %rbx
-; AVX1-NEXT: popq %rbp
+; AVX1-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
+; AVX1-NEXT: movq %rdx, 16(%r10)
+; AVX1-NEXT: movq %rdi, (%r10)
+; AVX1-NEXT: movq %rcx, 24(%r10)
+; AVX1-NEXT: movq %rsi, 8(%r10)
; AVX1-NEXT: retq
;
; AVX2-LABEL: saddo_v2i128:
; AVX2: # %bb.0:
-; AVX2-NEXT: pushq %rbp
-; AVX2-NEXT: pushq %rbx
-; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX2-NEXT: testq %r9, %r9
-; AVX2-NEXT: setns %al
-; AVX2-NEXT: testq %rsi, %rsi
-; AVX2-NEXT: setns %bl
-; AVX2-NEXT: cmpb %al, %bl
-; AVX2-NEXT: sete %bpl
; AVX2-NEXT: addq %r8, %rdi
; AVX2-NEXT: adcq %r9, %rsi
-; AVX2-NEXT: setns %al
-; AVX2-NEXT: cmpb %al, %bl
-; AVX2-NEXT: setne %al
-; AVX2-NEXT: andb %bpl, %al
+; AVX2-NEXT: seto %r8b
; AVX2-NEXT: addq {{[0-9]+}}(%rsp), %rdx
-; AVX2-NEXT: movq %rcx, %rbp
-; AVX2-NEXT: adcq %r10, %rbp
-; AVX2-NEXT: setns %bl
-; AVX2-NEXT: testq %rcx, %rcx
-; AVX2-NEXT: setns %cl
-; AVX2-NEXT: cmpb %bl, %cl
-; AVX2-NEXT: setne %r8b
-; AVX2-NEXT: testq %r10, %r10
-; AVX2-NEXT: setns %bl
-; AVX2-NEXT: cmpb %bl, %cl
-; AVX2-NEXT: sete %cl
-; AVX2-NEXT: andb %r8b, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: negl %ecx
-; AVX2-NEXT: movzbl %al, %eax
+; AVX2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
+; AVX2-NEXT: seto %al
+; AVX2-NEXT: movzbl %al, %r9d
+; AVX2-NEXT: negl %r9d
+; AVX2-NEXT: movzbl %r8b, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT: movq %rdx, 16(%r11)
-; AVX2-NEXT: movq %rdi, (%r11)
-; AVX2-NEXT: movq %rbp, 24(%r11)
-; AVX2-NEXT: movq %rsi, 8(%r11)
-; AVX2-NEXT: popq %rbx
-; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
+; AVX2-NEXT: movq %rdx, 16(%r10)
+; AVX2-NEXT: movq %rdi, (%r10)
+; AVX2-NEXT: movq %rcx, 24(%r10)
+; AVX2-NEXT: movq %rsi, 8(%r10)
; AVX2-NEXT: retq
;
; AVX512-LABEL: saddo_v2i128:
; AVX512: # %bb.0:
-; AVX512-NEXT: pushq %r14
-; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT: movq %rcx, %r14
-; AVX512-NEXT: adcq %r11, %r14
-; AVX512-NEXT: setns %bl
-; AVX512-NEXT: testq %rcx, %rcx
-; AVX512-NEXT: setns %cl
-; AVX512-NEXT: cmpb %bl, %cl
-; AVX512-NEXT: setne %bl
-; AVX512-NEXT: testq %r11, %r11
-; AVX512-NEXT: setns %al
-; AVX512-NEXT: cmpb %al, %cl
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: andb %bl, %al
+; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
+; AVX512-NEXT: seto %al
; AVX512-NEXT: kmovd %eax, %k0
-; AVX512-NEXT: testq %r9, %r9
-; AVX512-NEXT: setns %al
-; AVX512-NEXT: testq %rsi, %rsi
-; AVX512-NEXT: setns %cl
-; AVX512-NEXT: cmpb %al, %cl
-; AVX512-NEXT: sete %al
; AVX512-NEXT: addq %r8, %rdi
; AVX512-NEXT: adcq %r9, %rsi
-; AVX512-NEXT: setns %bl
-; AVX512-NEXT: cmpb %bl, %cl
-; AVX512-NEXT: setne %cl
-; AVX512-NEXT: andb %al, %cl
-; AVX512-NEXT: andl $1, %ecx
-; AVX512-NEXT: kmovw %ecx, %k1
+; AVX512-NEXT: seto %al
+; AVX512-NEXT: andl $1, %eax
+; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: movq %rdx, 16(%r10)
; AVX512-NEXT: movq %rdi, (%r10)
-; AVX512-NEXT: movq %r14, 24(%r10)
+; AVX512-NEXT: movq %rcx, 24(%r10)
; AVX512-NEXT: movq %rsi, 8(%r10)
-; AVX512-NEXT: popq %rbx
-; AVX512-NEXT: popq %r14
; AVX512-NEXT: retq
%t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
; SSE2-LABEL: ssubo_v2i128:
; SSE2: # %bb.0:
-; SSE2-NEXT: pushq %rbp
-; SSE2-NEXT: pushq %rbx
-; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; SSE2-NEXT: testq %r9, %r9
-; SSE2-NEXT: setns %al
-; SSE2-NEXT: testq %rsi, %rsi
-; SSE2-NEXT: setns %bl
-; SSE2-NEXT: cmpb %al, %bl
-; SSE2-NEXT: setne %bpl
; SSE2-NEXT: subq %r8, %rdi
; SSE2-NEXT: sbbq %r9, %rsi
-; SSE2-NEXT: setns %al
-; SSE2-NEXT: cmpb %al, %bl
-; SSE2-NEXT: setne %al
-; SSE2-NEXT: andb %bpl, %al
+; SSE2-NEXT: seto %r8b
; SSE2-NEXT: subq {{[0-9]+}}(%rsp), %rdx
-; SSE2-NEXT: movq %rcx, %rbp
-; SSE2-NEXT: sbbq %r10, %rbp
-; SSE2-NEXT: setns %bl
-; SSE2-NEXT: testq %rcx, %rcx
-; SSE2-NEXT: setns %cl
-; SSE2-NEXT: cmpb %bl, %cl
-; SSE2-NEXT: setne %r8b
-; SSE2-NEXT: testq %r10, %r10
-; SSE2-NEXT: setns %bl
-; SSE2-NEXT: cmpb %bl, %cl
-; SSE2-NEXT: setne %cl
-; SSE2-NEXT: andb %r8b, %cl
-; SSE2-NEXT: movzbl %cl, %ecx
-; SSE2-NEXT: negl %ecx
-; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
+; SSE2-NEXT: seto %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: negl %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movzbl %r8b, %eax
+; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movq %rdx, 16(%r11)
-; SSE2-NEXT: movq %rdi, (%r11)
-; SSE2-NEXT: movq %rbp, 24(%r11)
-; SSE2-NEXT: movq %rsi, 8(%r11)
-; SSE2-NEXT: popq %rbx
-; SSE2-NEXT: popq %rbp
+; SSE2-NEXT: movq %rdx, 16(%r10)
+; SSE2-NEXT: movq %rdi, (%r10)
+; SSE2-NEXT: movq %rcx, 24(%r10)
+; SSE2-NEXT: movq %rsi, 8(%r10)
; SSE2-NEXT: retq
;
; SSSE3-LABEL: ssubo_v2i128:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: pushq %rbp
-; SSSE3-NEXT: pushq %rbx
-; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; SSSE3-NEXT: testq %r9, %r9
-; SSSE3-NEXT: setns %al
-; SSSE3-NEXT: testq %rsi, %rsi
-; SSSE3-NEXT: setns %bl
-; SSSE3-NEXT: cmpb %al, %bl
-; SSSE3-NEXT: setne %bpl
; SSSE3-NEXT: subq %r8, %rdi
; SSSE3-NEXT: sbbq %r9, %rsi
-; SSSE3-NEXT: setns %al
-; SSSE3-NEXT: cmpb %al, %bl
-; SSSE3-NEXT: setne %al
-; SSSE3-NEXT: andb %bpl, %al
+; SSSE3-NEXT: seto %r8b
; SSSE3-NEXT: subq {{[0-9]+}}(%rsp), %rdx
-; SSSE3-NEXT: movq %rcx, %rbp
-; SSSE3-NEXT: sbbq %r10, %rbp
-; SSSE3-NEXT: setns %bl
-; SSSE3-NEXT: testq %rcx, %rcx
-; SSSE3-NEXT: setns %cl
-; SSSE3-NEXT: cmpb %bl, %cl
-; SSSE3-NEXT: setne %r8b
-; SSSE3-NEXT: testq %r10, %r10
-; SSSE3-NEXT: setns %bl
-; SSSE3-NEXT: cmpb %bl, %cl
-; SSSE3-NEXT: setne %cl
-; SSSE3-NEXT: andb %r8b, %cl
-; SSSE3-NEXT: movzbl %cl, %ecx
-; SSSE3-NEXT: negl %ecx
-; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
+; SSSE3-NEXT: seto %al
; SSSE3-NEXT: movzbl %al, %eax
; SSSE3-NEXT: negl %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: movzbl %r8b, %eax
+; SSSE3-NEXT: negl %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT: movq %rdx, 16(%r11)
-; SSSE3-NEXT: movq %rdi, (%r11)
-; SSSE3-NEXT: movq %rbp, 24(%r11)
-; SSSE3-NEXT: movq %rsi, 8(%r11)
-; SSSE3-NEXT: popq %rbx
-; SSSE3-NEXT: popq %rbp
+; SSSE3-NEXT: movq %rdx, 16(%r10)
+; SSSE3-NEXT: movq %rdi, (%r10)
+; SSSE3-NEXT: movq %rcx, 24(%r10)
+; SSSE3-NEXT: movq %rsi, 8(%r10)
; SSSE3-NEXT: retq
;
; SSE41-LABEL: ssubo_v2i128:
; SSE41: # %bb.0:
-; SSE41-NEXT: pushq %rbp
-; SSE41-NEXT: pushq %rbx
-; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; SSE41-NEXT: testq %r9, %r9
-; SSE41-NEXT: setns %al
-; SSE41-NEXT: testq %rsi, %rsi
-; SSE41-NEXT: setns %bl
-; SSE41-NEXT: cmpb %al, %bl
-; SSE41-NEXT: setne %bpl
; SSE41-NEXT: subq %r8, %rdi
; SSE41-NEXT: sbbq %r9, %rsi
-; SSE41-NEXT: setns %al
-; SSE41-NEXT: cmpb %al, %bl
-; SSE41-NEXT: setne %al
-; SSE41-NEXT: andb %bpl, %al
+; SSE41-NEXT: seto %r8b
; SSE41-NEXT: subq {{[0-9]+}}(%rsp), %rdx
-; SSE41-NEXT: movq %rcx, %rbp
-; SSE41-NEXT: sbbq %r10, %rbp
-; SSE41-NEXT: setns %bl
-; SSE41-NEXT: testq %rcx, %rcx
-; SSE41-NEXT: setns %cl
-; SSE41-NEXT: cmpb %bl, %cl
-; SSE41-NEXT: setne %r8b
-; SSE41-NEXT: testq %r10, %r10
-; SSE41-NEXT: setns %bl
-; SSE41-NEXT: cmpb %bl, %cl
-; SSE41-NEXT: setne %cl
-; SSE41-NEXT: andb %r8b, %cl
-; SSE41-NEXT: movzbl %cl, %ecx
-; SSE41-NEXT: negl %ecx
-; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
+; SSE41-NEXT: seto %al
+; SSE41-NEXT: movzbl %al, %r9d
+; SSE41-NEXT: negl %r9d
+; SSE41-NEXT: movzbl %r8b, %eax
; SSE41-NEXT: negl %eax
; SSE41-NEXT: movd %eax, %xmm0
-; SSE41-NEXT: pinsrd $1, %ecx, %xmm0
-; SSE41-NEXT: movq %rdx, 16(%r11)
-; SSE41-NEXT: movq %rdi, (%r11)
-; SSE41-NEXT: movq %rbp, 24(%r11)
-; SSE41-NEXT: movq %rsi, 8(%r11)
-; SSE41-NEXT: popq %rbx
-; SSE41-NEXT: popq %rbp
+; SSE41-NEXT: pinsrd $1, %r9d, %xmm0
+; SSE41-NEXT: movq %rdx, 16(%r10)
+; SSE41-NEXT: movq %rdi, (%r10)
+; SSE41-NEXT: movq %rcx, 24(%r10)
+; SSE41-NEXT: movq %rsi, 8(%r10)
; SSE41-NEXT: retq
;
; AVX1-LABEL: ssubo_v2i128:
; AVX1: # %bb.0:
-; AVX1-NEXT: pushq %rbp
-; AVX1-NEXT: pushq %rbx
-; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX1-NEXT: testq %r9, %r9
-; AVX1-NEXT: setns %al
-; AVX1-NEXT: testq %rsi, %rsi
-; AVX1-NEXT: setns %bl
-; AVX1-NEXT: cmpb %al, %bl
-; AVX1-NEXT: setne %bpl
; AVX1-NEXT: subq %r8, %rdi
; AVX1-NEXT: sbbq %r9, %rsi
-; AVX1-NEXT: setns %al
-; AVX1-NEXT: cmpb %al, %bl
-; AVX1-NEXT: setne %al
-; AVX1-NEXT: andb %bpl, %al
+; AVX1-NEXT: seto %r8b
; AVX1-NEXT: subq {{[0-9]+}}(%rsp), %rdx
-; AVX1-NEXT: movq %rcx, %rbp
-; AVX1-NEXT: sbbq %r10, %rbp
-; AVX1-NEXT: setns %bl
-; AVX1-NEXT: testq %rcx, %rcx
-; AVX1-NEXT: setns %cl
-; AVX1-NEXT: cmpb %bl, %cl
-; AVX1-NEXT: setne %r8b
-; AVX1-NEXT: testq %r10, %r10
-; AVX1-NEXT: setns %bl
-; AVX1-NEXT: cmpb %bl, %cl
-; AVX1-NEXT: setne %cl
-; AVX1-NEXT: andb %r8b, %cl
-; AVX1-NEXT: movzbl %cl, %ecx
-; AVX1-NEXT: negl %ecx
-; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
+; AVX1-NEXT: seto %al
+; AVX1-NEXT: movzbl %al, %r9d
+; AVX1-NEXT: negl %r9d
+; AVX1-NEXT: movzbl %r8b, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: vmovd %eax, %xmm0
-; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT: movq %rdx, 16(%r11)
-; AVX1-NEXT: movq %rdi, (%r11)
-; AVX1-NEXT: movq %rbp, 24(%r11)
-; AVX1-NEXT: movq %rsi, 8(%r11)
-; AVX1-NEXT: popq %rbx
-; AVX1-NEXT: popq %rbp
+; AVX1-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
+; AVX1-NEXT: movq %rdx, 16(%r10)
+; AVX1-NEXT: movq %rdi, (%r10)
+; AVX1-NEXT: movq %rcx, 24(%r10)
+; AVX1-NEXT: movq %rsi, 8(%r10)
; AVX1-NEXT: retq
;
; AVX2-LABEL: ssubo_v2i128:
; AVX2: # %bb.0:
-; AVX2-NEXT: pushq %rbp
-; AVX2-NEXT: pushq %rbx
-; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX2-NEXT: testq %r9, %r9
-; AVX2-NEXT: setns %al
-; AVX2-NEXT: testq %rsi, %rsi
-; AVX2-NEXT: setns %bl
-; AVX2-NEXT: cmpb %al, %bl
-; AVX2-NEXT: setne %bpl
; AVX2-NEXT: subq %r8, %rdi
; AVX2-NEXT: sbbq %r9, %rsi
-; AVX2-NEXT: setns %al
-; AVX2-NEXT: cmpb %al, %bl
-; AVX2-NEXT: setne %al
-; AVX2-NEXT: andb %bpl, %al
+; AVX2-NEXT: seto %r8b
; AVX2-NEXT: subq {{[0-9]+}}(%rsp), %rdx
-; AVX2-NEXT: movq %rcx, %rbp
-; AVX2-NEXT: sbbq %r10, %rbp
-; AVX2-NEXT: setns %bl
-; AVX2-NEXT: testq %rcx, %rcx
-; AVX2-NEXT: setns %cl
-; AVX2-NEXT: cmpb %bl, %cl
-; AVX2-NEXT: setne %r8b
-; AVX2-NEXT: testq %r10, %r10
-; AVX2-NEXT: setns %bl
-; AVX2-NEXT: cmpb %bl, %cl
-; AVX2-NEXT: setne %cl
-; AVX2-NEXT: andb %r8b, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: negl %ecx
-; AVX2-NEXT: movzbl %al, %eax
+; AVX2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
+; AVX2-NEXT: seto %al
+; AVX2-NEXT: movzbl %al, %r9d
+; AVX2-NEXT: negl %r9d
+; AVX2-NEXT: movzbl %r8b, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT: movq %rdx, 16(%r11)
-; AVX2-NEXT: movq %rdi, (%r11)
-; AVX2-NEXT: movq %rbp, 24(%r11)
-; AVX2-NEXT: movq %rsi, 8(%r11)
-; AVX2-NEXT: popq %rbx
-; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
+; AVX2-NEXT: movq %rdx, 16(%r10)
+; AVX2-NEXT: movq %rdi, (%r10)
+; AVX2-NEXT: movq %rcx, 24(%r10)
+; AVX2-NEXT: movq %rsi, 8(%r10)
; AVX2-NEXT: retq
;
; AVX512-LABEL: ssubo_v2i128:
; AVX512: # %bb.0:
-; AVX512-NEXT: pushq %r14
-; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT: movq %rcx, %r14
-; AVX512-NEXT: sbbq %r11, %r14
-; AVX512-NEXT: setns %bl
-; AVX512-NEXT: testq %rcx, %rcx
-; AVX512-NEXT: setns %cl
-; AVX512-NEXT: cmpb %bl, %cl
-; AVX512-NEXT: setne %bl
-; AVX512-NEXT: testq %r11, %r11
-; AVX512-NEXT: setns %al
-; AVX512-NEXT: cmpb %al, %cl
-; AVX512-NEXT: setne %al
-; AVX512-NEXT: andb %bl, %al
+; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
+; AVX512-NEXT: seto %al
; AVX512-NEXT: kmovd %eax, %k0
-; AVX512-NEXT: testq %r9, %r9
-; AVX512-NEXT: setns %al
-; AVX512-NEXT: testq %rsi, %rsi
-; AVX512-NEXT: setns %cl
-; AVX512-NEXT: cmpb %al, %cl
-; AVX512-NEXT: setne %al
; AVX512-NEXT: subq %r8, %rdi
; AVX512-NEXT: sbbq %r9, %rsi
-; AVX512-NEXT: setns %bl
-; AVX512-NEXT: cmpb %bl, %cl
-; AVX512-NEXT: setne %cl
-; AVX512-NEXT: andb %al, %cl
-; AVX512-NEXT: andl $1, %ecx
-; AVX512-NEXT: kmovw %ecx, %k1
+; AVX512-NEXT: seto %al
+; AVX512-NEXT: andl $1, %eax
+; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: movq %rdx, 16(%r10)
; AVX512-NEXT: movq %rdi, (%r10)
-; AVX512-NEXT: movq %r14, 24(%r10)
+; AVX512-NEXT: movq %rcx, 24(%r10)
; AVX512-NEXT: movq %rsi, 8(%r10)
-; AVX512-NEXT: popq %rbx
-; AVX512-NEXT: popq %r14
; AVX512-NEXT: retq
%t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
define zeroext i1 @saddoi128(i128 %v1, i128 %v2, i128* %res) nounwind {
; X64-LABEL: saddoi128:
; X64: ## %bb.0:
-; X64-NEXT: testq %rcx, %rcx
-; X64-NEXT: setns %r9b
-; X64-NEXT: testq %rsi, %rsi
-; X64-NEXT: setns %al
-; X64-NEXT: cmpb %r9b, %al
-; X64-NEXT: sete %r9b
; X64-NEXT: addq %rdx, %rdi
; X64-NEXT: adcq %rcx, %rsi
-; X64-NEXT: setns %cl
-; X64-NEXT: cmpb %cl, %al
-; X64-NEXT: setne %al
-; X64-NEXT: andb %r9b, %al
+; X64-NEXT: seto %al
; X64-NEXT: movq %rdi, (%r8)
; X64-NEXT: movq %rsi, 8(%r8)
; X64-NEXT: retq
;
; X86-LABEL: saddoi128:
; X86: ## %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: setns %al
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: setns %ah
-; X86-NEXT: cmpb %al, %ah
-; X86-NEXT: sete %cl
-; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: setns %al
-; X86-NEXT: cmpb %al, %ah
-; X86-NEXT: setne %al
-; X86-NEXT: andb %cl, %al
-; X86-NEXT: movl %esi, (%ebp)
-; X86-NEXT: movl %edi, 4(%ebp)
-; X86-NEXT: movl %edx, 8(%ebp)
-; X86-NEXT: movl %ebx, 12(%ebp)
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: seto %al
+; X86-NEXT: movl %edi, (%ecx)
+; X86-NEXT: movl %ebx, 4(%ecx)
+; X86-NEXT: movl %esi, 8(%ecx)
+; X86-NEXT: movl %edx, 12(%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
%t = call {i128, i1} @llvm.sadd.with.overflow.i128(i128 %v1, i128 %v2)
%val = extractvalue {i128, i1} %t, 0
define zeroext i1 @ssuboi128(i128 %v1, i128 %v2, i128* %res) nounwind {
; X64-LABEL: ssuboi128:
; X64: ## %bb.0:
-; X64-NEXT: testq %rcx, %rcx
-; X64-NEXT: setns %r9b
-; X64-NEXT: testq %rsi, %rsi
-; X64-NEXT: setns %al
-; X64-NEXT: cmpb %r9b, %al
-; X64-NEXT: setne %r9b
; X64-NEXT: subq %rdx, %rdi
; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: setns %cl
-; X64-NEXT: cmpb %cl, %al
-; X64-NEXT: setne %al
-; X64-NEXT: andb %r9b, %al
+; X64-NEXT: seto %al
; X64-NEXT: movq %rdi, (%r8)
; X64-NEXT: movq %rsi, 8(%r8)
; X64-NEXT: retq
;
; X86-LABEL: ssuboi128:
; X86: ## %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: setns %al
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: setns %ah
-; X86-NEXT: cmpb %al, %ah
-; X86-NEXT: setne %cl
-; X86-NEXT: subl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: setns %al
-; X86-NEXT: cmpb %al, %ah
-; X86-NEXT: setne %al
-; X86-NEXT: andb %cl, %al
-; X86-NEXT: movl %esi, (%ebp)
-; X86-NEXT: movl %edi, 4(%ebp)
-; X86-NEXT: movl %edx, 8(%ebp)
-; X86-NEXT: movl %ebx, 12(%ebp)
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: seto %al
+; X86-NEXT: movl %edi, (%ecx)
+; X86-NEXT: movl %ebx, 4(%ecx)
+; X86-NEXT: movl %esi, 8(%ecx)
+; X86-NEXT: movl %edx, 12(%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
%t = call {i128, i1} @llvm.ssub.with.overflow.i128(i128 %v1, i128 %v2)
%val = extractvalue {i128, i1} %t, 0