From: Simon Pilgrim Date: Mon, 21 Mar 2022 21:37:37 +0000 (+0000) Subject: [X86] combineAddOrSubToADCOrSBB - Fold ADD/SUB + (AND(SRL(X,Y),1) -> ADC/SBB+BT(X... X-Git-Tag: upstream/15.0.7~12831 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=438ac282db97c584daf2d4d1a90e6b3d49ef9189;p=platform%2Fupstream%2Fllvm.git [X86] combineAddOrSubToADCOrSBB - Fold ADD/SUB + (AND(SRL(X,Y),1) -> ADC/SBB+BT(X,Y) (REAPPLIED) As suggested on PR35908, if we are adding/subtracting an extracted bit, attempt to use BT instead to fold the op and use a ADC/SBB op. Reapply with extra type legality checks - LowerAndToBT was originally only used during lowering, now that it can occur earlier we might encounter illegal types that we can either promote to i32 or just bail. Differential Revision: https://reviews.llvm.org/D122084 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6e1c83f..a634574 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23523,9 +23523,8 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, /// Result of 'and' is compared against zero. Change to a BT node if possible. /// Returns the BT node and the condition code needed to use it. -static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, - const SDLoc &dl, SelectionDAG &DAG, - SDValue &X86CC) { +static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl, + SelectionDAG &DAG, X86::CondCode &X86CC) { assert(And.getOpcode() == ISD::AND && "Expected AND node!"); SDValue Op0 = And.getOperand(0); SDValue Op1 = And.getOperand(1); @@ -23587,9 +23586,13 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, // that doing a bittest on the i32 value is ok. We extend to i32 because // the encoding for the i16 version is larger than the i32 version. // Also promote i16 to i32 for performance / code size reason. - if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16) + if (Src.getValueType().getScalarSizeInBits() < 32) Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src); + // No legal type found, give up. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType())) + return SDValue(); + // See if we can use the 32-bit instruction instead of the 64-bit one for a // shorter encoding. Since the former takes the modulo 32 of BitNo and the // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is @@ -23603,8 +23606,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, if (Src.getValueType() != BitNo.getValueType()) BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); - X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, - dl, MVT::i8); + X86CC = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo); } @@ -24310,8 +24312,11 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, // Lower ((X >>s N) & 1) != 0 to BT(X, N). if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC)) + X86::CondCode X86CondCode; + if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CondCode)) { + X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8); return BT; + } } // Try to use PTEST/PMOVMSKB for a tree ORs equality compared with 0. @@ -24783,9 +24788,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // We know the result of AND is compared against zero. Try to match // it to BT. if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { - SDValue BTCC; - if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) { - CC = BTCC; + X86::CondCode X86CondCode; + if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, X86CondCode)) { + CC = DAG.getTargetConstant(X86CondCode, DL, MVT::i8); Cond = BT; AddTest = false; } @@ -52294,6 +52299,7 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG, /// If this is an add or subtract where one operand is produced by a cmp+setcc, /// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB} /// with CMP+{ADC, SBB}. +/// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}. static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT, SDValue X, SDValue Y, SelectionDAG &DAG) { @@ -52304,11 +52310,20 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT, if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) Y = Y.getOperand(0); - if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse()) + if (!Y.hasOneUse()) return SDValue(); - X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); - SDValue EFLAGS = Y.getOperand(1); + X86::CondCode CC; + SDValue EFLAGS; + if (Y.getOpcode() == X86ISD::SETCC) { + CC = (X86::CondCode)Y.getConstantOperandVal(0); + EFLAGS = Y.getOperand(1); + } else if (Y.getOpcode() == ISD::AND && isOneConstant(Y.getOperand(1))) { + EFLAGS = LowerAndToBT(Y, ISD::SETNE, DL, DAG, CC); + } + + if (!EFLAGS) + return SDValue(); // If X is -1 or 0, then we have an opportunity to avoid constants required in // the general case below. diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll index 10e6fb2..f2dfc05 100644 --- a/llvm/test/CodeGen/X86/add-sub-bool.ll +++ b/llvm/test/CodeGen/X86/add-sub-bool.ll @@ -18,31 +18,16 @@ define i32 @test_i32_add_add_idx(i32 %x, i32 %y, i32 %z) nounwind { ; X86-LABEL: test_i32_add_add_idx: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $30, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: btl $30, {{[0-9]+}}(%esp) +; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; NOTBM-LABEL: test_i32_add_add_idx: -; NOTBM: # %bb.0: -; NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; NOTBM-NEXT: # kill: def $edi killed $edi def $rdi -; NOTBM-NEXT: leal (%rdi,%rsi), %eax -; NOTBM-NEXT: shrl $30, %edx -; NOTBM-NEXT: andl $1, %edx -; NOTBM-NEXT: addl %edx, %eax -; NOTBM-NEXT: retq -; -; TBM-LABEL: test_i32_add_add_idx: -; TBM: # %bb.0: -; TBM-NEXT: # kill: def $esi killed $esi def $rsi -; TBM-NEXT: # kill: def $edi killed $edi def $rdi -; TBM-NEXT: bextrl $286, %edx, %eax # imm = 0x11E -; TBM-NEXT: addl %edi, %eax -; TBM-NEXT: addl %esi, %eax -; TBM-NEXT: retq +; X64-LABEL: test_i32_add_add_idx: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: btl $30, %edx +; X64-NEXT: adcl %esi, %eax +; X64-NEXT: retq %add = add i32 %y, %x %shift = lshr i32 %z, 30 %mask = and i32 %shift, 1 @@ -54,31 +39,16 @@ define i32 @test_i32_add_add_commute_idx(i32 %x, i32 %y, i32 %z) nounwind { ; X86-LABEL: test_i32_add_add_commute_idx: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $2, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: btl $2, {{[0-9]+}}(%esp) +; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; NOTBM-LABEL: test_i32_add_add_commute_idx: -; NOTBM: # %bb.0: -; NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; NOTBM-NEXT: # kill: def $edi killed $edi def $rdi -; NOTBM-NEXT: leal (%rdi,%rsi), %eax -; NOTBM-NEXT: shrl $2, %edx -; NOTBM-NEXT: andl $1, %edx -; NOTBM-NEXT: addl %edx, %eax -; NOTBM-NEXT: retq -; -; TBM-LABEL: test_i32_add_add_commute_idx: -; TBM: # %bb.0: -; TBM-NEXT: # kill: def $esi killed $esi def $rsi -; TBM-NEXT: # kill: def $edi killed $edi def $rdi -; TBM-NEXT: bextrl $258, %edx, %eax # imm = 0x102 -; TBM-NEXT: addl %edi, %eax -; TBM-NEXT: addl %esi, %eax -; TBM-NEXT: retq +; X64-LABEL: test_i32_add_add_commute_idx: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: btl $2, %edx +; X64-NEXT: adcl %esi, %eax +; X64-NEXT: retq %add = add i32 %y, %x %shift = lshr i32 %z, 2 %mask = and i32 %shift, 1 @@ -110,6 +80,27 @@ define i32 @test_i32_add_add_idx0(i32 %x, i32 %y, i32 %z) nounwind { ret i32 %add1 } +define i24 @test_i24_add_add_idx(i24 %x, i24 %y, i24 %z) nounwind { +; X86-LABEL: test_i24_add_add_idx: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: btl $15, {{[0-9]+}}(%esp) +; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax +; X86-NEXT: retl +; +; X64-LABEL: test_i24_add_add_idx: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: btl $15, %edx +; X64-NEXT: adcl %esi, %eax +; X64-NEXT: retq + %add = add i24 %y, %x + %shift = lshr i24 %z, 15 + %mask = and i24 %shift, 1 + %add1 = add i24 %add, %mask + ret i24 %add1 +} + define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind { ; X86-LABEL: test_i128_add_add_idx: ; X86: # %bb.0: @@ -198,29 +189,18 @@ define i32 @test_i32_add_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind { ; X86-LABEL: test_i32_add_sub_commute_idx: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $8, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: btl $8, {{[0-9]+}}(%esp) +; X86-NEXT: adcl $0, %eax ; X86-NEXT: retl ; -; NOTBM-LABEL: test_i32_add_sub_commute_idx: -; NOTBM: # %bb.0: -; NOTBM-NEXT: # kill: def $edx killed $edx def $rdx -; NOTBM-NEXT: # kill: def $edi killed $edi def $rdi -; NOTBM-NEXT: subl %esi, %edi -; NOTBM-NEXT: shrl $8, %edx -; NOTBM-NEXT: andl $1, %edx -; NOTBM-NEXT: leal (%rdx,%rdi), %eax -; NOTBM-NEXT: retq -; -; TBM-LABEL: test_i32_add_sub_commute_idx: -; TBM: # %bb.0: -; TBM-NEXT: subl %esi, %edi -; TBM-NEXT: bextrl $264, %edx, %eax # imm = 0x108 -; TBM-NEXT: addl %edi, %eax -; TBM-NEXT: retq +; X64-LABEL: test_i32_add_sub_commute_idx: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: btl $8, %edx +; X64-NEXT: adcl $0, %eax +; X64-NEXT: retq %sub = sub i32 %x, %y %shift = lshr i32 %z, 8 %mask = and i32 %shift, 1 @@ -231,32 +211,20 @@ define i32 @test_i32_add_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind { define i32 @test_i32_sub_add_idx(i32 %x, i32 %y, i32 %z) nounwind { ; X86-LABEL: test_i32_sub_add_idx: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl %ecx -; X86-NEXT: andl $1, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: btl $1, {{[0-9]+}}(%esp) +; X86-NEXT: sbbl $0, %eax ; X86-NEXT: retl ; -; NOTBM-LABEL: test_i32_sub_add_idx: -; NOTBM: # %bb.0: -; NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; NOTBM-NEXT: # kill: def $edi killed $edi def $rdi -; NOTBM-NEXT: leal (%rdi,%rsi), %eax -; NOTBM-NEXT: shrl %edx -; NOTBM-NEXT: andl $1, %edx -; NOTBM-NEXT: subl %edx, %eax -; NOTBM-NEXT: retq -; -; TBM-LABEL: test_i32_sub_add_idx: -; TBM: # %bb.0: -; TBM-NEXT: # kill: def $esi killed $esi def $rsi -; TBM-NEXT: # kill: def $edi killed $edi def $rdi -; TBM-NEXT: leal (%rdi,%rsi), %eax -; TBM-NEXT: bextrl $257, %edx, %ecx # imm = 0x101 -; TBM-NEXT: subl %ecx, %eax -; TBM-NEXT: retq +; X64-LABEL: test_i32_sub_add_idx: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rsi), %eax +; X64-NEXT: btl $1, %edx +; X64-NEXT: sbbl $0, %eax +; X64-NEXT: retq %add = add i32 %y, %x %shift = lshr i32 %z, 1 %mask = and i32 %shift, 1 @@ -268,28 +236,18 @@ define i32 @test_i32_sub_sub_idx(i32 %x, i32 %y, i32 %z) nounwind { ; X86-LABEL: test_i32_sub_sub_idx: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $16, %eax -; X86-NEXT: andl $1, %eax +; X86-NEXT: btl $16, {{[0-9]+}}(%esp) +; X86-NEXT: adcl $0, %eax ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; NOTBM-LABEL: test_i32_sub_sub_idx: -; NOTBM: # %bb.0: -; NOTBM-NEXT: # kill: def $edx killed $edx def $rdx -; NOTBM-NEXT: # kill: def $edi killed $edi def $rdi -; NOTBM-NEXT: shrl $16, %edx -; NOTBM-NEXT: andl $1, %edx -; NOTBM-NEXT: subl %esi, %edx -; NOTBM-NEXT: leal (%rdx,%rdi), %eax -; NOTBM-NEXT: retq -; -; TBM-LABEL: test_i32_sub_sub_idx: -; TBM: # %bb.0: -; TBM-NEXT: bextrl $272, %edx, %eax # imm = 0x110 -; TBM-NEXT: subl %esi, %eax -; TBM-NEXT: addl %edi, %eax -; TBM-NEXT: retq +; X64-LABEL: test_i32_sub_sub_idx: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: btl $16, %edx +; X64-NEXT: adcl $0, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: retq %shift = lshr i32 %z, 16 %mask = and i32 %shift, 1 %sub0 = sub i32 %y, %mask @@ -301,29 +259,16 @@ define i32 @test_i32_sub_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind { ; X86-LABEL: test_i32_sub_sub_commute_idx: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $15, %ecx -; X86-NEXT: andl $1, %ecx -; X86-NEXT: subl {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: btl $15, {{[0-9]+}}(%esp) +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; NOTBM-LABEL: test_i32_sub_sub_commute_idx: -; NOTBM: # %bb.0: -; NOTBM-NEXT: movl %edi, %eax -; NOTBM-NEXT: shrl $15, %edx -; NOTBM-NEXT: andl $1, %edx -; NOTBM-NEXT: subl %esi, %eax -; NOTBM-NEXT: subl %edx, %eax -; NOTBM-NEXT: retq -; -; TBM-LABEL: test_i32_sub_sub_commute_idx: -; TBM: # %bb.0: -; TBM-NEXT: movl %edi, %eax -; TBM-NEXT: bextrl $271, %edx, %ecx # imm = 0x10F -; TBM-NEXT: subl %esi, %eax -; TBM-NEXT: subl %ecx, %eax -; TBM-NEXT: retq +; X64-LABEL: test_i32_sub_sub_commute_idx: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: btl $15, %edx +; X64-NEXT: sbbl %esi, %eax +; X64-NEXT: retq %shift = lshr i32 %z, 15 %mask = and i32 %shift, 1 %sub0 = sub i32 %x, %y @@ -334,29 +279,22 @@ define i32 @test_i32_sub_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind { define i32 @test_i32_sub_sum_idx(i32 %x, i32 %y, i32 %z) nounwind { ; X86-LABEL: test_i32_sub_sum_idx: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $30, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: btl $30, {{[0-9]+}}(%esp) +; X86-NEXT: sbbl $0, %eax +; X86-NEXT: negl %eax ; X86-NEXT: retl ; -; NOTBM-LABEL: test_i32_sub_sum_idx: -; NOTBM: # %bb.0: -; NOTBM-NEXT: movl %edx, %eax -; NOTBM-NEXT: shrl $30, %eax -; NOTBM-NEXT: andl $1, %eax -; NOTBM-NEXT: addl %esi, %edi -; NOTBM-NEXT: subl %edi, %eax -; NOTBM-NEXT: retq -; -; TBM-LABEL: test_i32_sub_sum_idx: -; TBM: # %bb.0: -; TBM-NEXT: bextrl $286, %edx, %eax # imm = 0x11E -; TBM-NEXT: addl %esi, %edi -; TBM-NEXT: subl %edi, %eax -; TBM-NEXT: retq +; X64-LABEL: test_i32_sub_sum_idx: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rsi), %eax +; X64-NEXT: btl $30, %edx +; X64-NEXT: sbbl $0, %eax +; X64-NEXT: negl %eax +; X64-NEXT: retq %shift = lshr i32 %z, 30 %mask = and i32 %shift, 1 %add = add i32 %y, %x @@ -371,24 +309,18 @@ define i32 @test_i32_sub_sum_idx(i32 %x, i32 %y, i32 %z) nounwind { define i32 @test_i32_add_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { ; X86-LABEL: test_i32_add_add_var: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: addl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %edx, %eax +; X86-NEXT: btl %ecx, %edx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_add_add_var: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (%rdi,%rsi), %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edx -; X64-NEXT: andl $1, %edx -; X64-NEXT: addl %edx, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: adcl %esi, %eax ; X64-NEXT: retq %add = add i32 %y, %x %shift = lshr i32 %z, %w @@ -400,24 +332,18 @@ define i32 @test_i32_add_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { define i32 @test_i32_add_add_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { ; X86-LABEL: test_i32_add_add_commute_var: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: addl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %edx, %eax +; X86-NEXT: btl %ecx, %edx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_add_add_commute_var: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (%rdi,%rsi), %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edx -; X64-NEXT: andl $1, %edx -; X64-NEXT: addl %edx, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: adcl %esi, %eax ; X64-NEXT: retq %add = add i32 %y, %x %shift = lshr i32 %z, %w @@ -443,10 +369,10 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind { ; X86-NEXT: shrl %cl, %edi ; X86-NEXT: shrdl %cl, %ebx, %esi ; X86-NEXT: testb $32, %cl -; X86-NEXT: jne .LBB12_2 +; X86-NEXT: jne .LBB13_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl %esi, %edi -; X86-NEXT: .LBB12_2: +; X86-NEXT: .LBB13_2: ; X86-NEXT: andl $1, %edi ; X86-NEXT: addl %edi, %eax ; X86-NEXT: adcl $0, %edx @@ -457,11 +383,9 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind { ; ; X64-LABEL: test_i64_add_add_var: ; X64: # %bb.0: -; X64-NEXT: leaq (%rdi,%rsi), %rax -; X64-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NEXT: shrq %cl, %rdx -; X64-NEXT: andl $1, %edx -; X64-NEXT: addq %rdx, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: btq %rcx, %rdx +; X64-NEXT: adcq %rsi, %rax ; X64-NEXT: retq %add = add i64 %y, %x %shift = lshr i64 %z, %w @@ -473,24 +397,20 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind { define i32 @test_i32_add_sub_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { ; X86-LABEL: test_i32_add_sub_var: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %edx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: btl %ecx, %edx +; X86-NEXT: adcl $0, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_add_sub_var: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edx killed $edx def $rdx -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: subl %esi, %edi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edx -; X64-NEXT: andl $1, %edx -; X64-NEXT: leal (%rdx,%rdi), %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: adcl $0, %eax ; X64-NEXT: retq %sub = sub i32 %x, %y %shift = lshr i32 %z, %w @@ -502,24 +422,20 @@ define i32 @test_i32_add_sub_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { define i32 @test_i32_add_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { ; X86-LABEL: test_i32_add_sub_commute_var: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %edx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: btl %ecx, %edx +; X86-NEXT: adcl $0, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_add_sub_commute_var: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edx killed $edx def $rdx -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: subl %esi, %edi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edx -; X64-NEXT: andl $1, %edx -; X64-NEXT: leal (%rdx,%rdi), %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: adcl $0, %eax ; X64-NEXT: retq %sub = sub i32 %x, %y %shift = lshr i32 %z, %w @@ -531,13 +447,12 @@ define i32 @test_i32_add_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwin define i32 @test_i32_sub_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { ; X86-LABEL: test_i32_sub_add_var: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl %cl, %edx -; X86-NEXT: andl $1, %edx -; X86-NEXT: subl %edx, %eax +; X86-NEXT: btl %ecx, %edx +; X86-NEXT: sbbl $0, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_sub_add_var: @@ -545,10 +460,8 @@ define i32 @test_i32_sub_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { ; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: leal (%rdi,%rsi), %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edx -; X64-NEXT: andl $1, %edx -; X64-NEXT: subl %edx, %eax +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: sbbl $0, %eax ; X64-NEXT: retq %add = add i32 %y, %x %shift = lshr i32 %z, %w @@ -560,23 +473,20 @@ define i32 @test_i32_sub_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { define i32 @test_i32_sub_sub_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { ; X86-LABEL: test_i32_sub_sub_var: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $1, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: btl %ecx, %edx +; X86-NEXT: adcl $0, %eax ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_sub_sub_var: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edx killed $edx def $rdx -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edx -; X64-NEXT: andl $1, %edx -; X64-NEXT: subl %esi, %edx -; X64-NEXT: leal (%rdx,%rdi), %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: adcl $0, %eax +; X64-NEXT: subl %esi, %eax ; X64-NEXT: retq %shift = lshr i32 %z, %w %mask = and i32 %shift, 1 @@ -589,22 +499,17 @@ define i32 @test_i32_sub_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwin ; X86-LABEL: test_i32_sub_sub_commute_var: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shrl %cl, %edx -; X86-NEXT: andl $1, %edx -; X86-NEXT: subl {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl %edx, %eax +; X86-NEXT: btl %ecx, %edx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_sub_sub_commute_var: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edx -; X64-NEXT: andl $1, %edx -; X64-NEXT: subl %esi, %eax -; X64-NEXT: subl %edx, %eax +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: sbbl %esi, %eax ; X64-NEXT: retq %shift = lshr i32 %z, %w %mask = and i32 %shift, 1 @@ -616,23 +521,23 @@ define i32 @test_i32_sub_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwin define i32 @test_i32_sub_sum_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { ; X86-LABEL: test_i32_sub_sum_var: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl {{[0-9]+}}(%esp), %edx -; X86-NEXT: subl %edx, %eax +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: btl %ecx, %edx +; X86-NEXT: sbbl $0, %eax +; X86-NEXT: negl %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_sub_sum_var: ; X64: # %bb.0: -; X64-NEXT: movl %edx, %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %eax -; X64-NEXT: andl $1, %eax -; X64-NEXT: addl %esi, %edi -; X64-NEXT: subl %edi, %eax +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rsi), %eax +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: sbbl $0, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq %shift = lshr i32 %z, %w %mask = and i32 %shift, 1