From d1baed7c9c8341c43c696cce1b7ec846c21b0b45 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Amaury=20S=C3=A9chet?= Date: Fri, 5 Aug 2022 13:33:07 +0000 Subject: [PATCH] [DAG] select Cond, -1, C --> or (sext Cond), C if Cond is MVT::i1 This seems to be beneficial overall, except for midpoint-int.ll . The X86 backend seems to generate zeroing that are not necesary. Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D131260 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 59 ++++-- llvm/test/CodeGen/PowerPC/crbits.ll | 6 +- llvm/test/CodeGen/PowerPC/prefer-dqform.ll | 15 +- .../test/CodeGen/X86/memcmp-more-load-pairs-x32.ll | 49 ++--- llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll | 105 ++++++----- llvm/test/CodeGen/X86/memcmp-optsize-x32.ll | 18 +- llvm/test/CodeGen/X86/memcmp-optsize.ll | 38 ++-- llvm/test/CodeGen/X86/memcmp-pgso-x32.ll | 18 +- llvm/test/CodeGen/X86/memcmp-pgso.ll | 38 ++-- llvm/test/CodeGen/X86/memcmp-x32.ll | 33 ++-- llvm/test/CodeGen/X86/memcmp.ll | 85 +++++---- llvm/test/CodeGen/X86/midpoint-int.ll | 208 +++++++++++---------- llvm/test/CodeGen/X86/select.ll | 42 ++--- llvm/test/CodeGen/X86/select_const.ll | 6 +- 14 files changed, 389 insertions(+), 331 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7b7ced4..673456c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10238,6 +10238,25 @@ static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, + const TargetLowering &TLI) { + if (!TLI.convertSelectOfConstantsToMath(VT)) + return false; + + if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse()) + return true; + if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) + return true; + + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1))) + return true; + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1))) + return true; + + return false; +} + SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -10288,9 +10307,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { // Use a target hook because some targets may prefer to transform in the // other direction. - if (TLI.convertSelectOfConstantsToMath(VT)) { - // For any constants that differ by 1, we can transform the select into an - // extend and add. + if (shouldConvertSelectOfConstantsToMath(Cond, VT, TLI)) { + // For any constants that differ by 1, we can transform the select into + // an extend and add. const APInt &C1Val = C1->getAPIntValue(); const APInt &C2Val = C2->getAPIntValue(); if (C1Val - 1 == C2Val) { @@ -10299,6 +10318,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } + if (C1Val + 1 == C2Val) { // select Cond, C1, C1+1 --> add (sext Cond), C1+1 if (VT != MVT::i1) @@ -10315,6 +10335,12 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); } + // select Cond, -1, C --> or (sext Cond), C + if (C1->isAllOnes()) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, Cond, N2); + } + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) return V; } @@ -10451,10 +10477,17 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; - if (SDValue V = foldSelectOfConstants(N)) + if (SDValue V = foldBoolSelectToLogic(N, DAG)) return V; - if (SDValue V = foldBoolSelectToLogic(N, DAG)) + // select (not Cond), N1, N2 -> select Cond, N2, N1 + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { + SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); + SelectOp->setFlags(Flags); + return SelectOp; + } + + if (SDValue V = foldSelectOfConstants(N)) return V; // If we can fold this based on the true/false value, do so. @@ -10539,13 +10572,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } - // select (not Cond), N1, N2 -> select Cond, N2, N1 - if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { - SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); - SelectOp->setFlags(Flags); - return SelectOp; - } - // Fold selects based on a setcc into other things, such as min/max/abs. if (N0.getOpcode() == ISD::SETCC) { SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1); @@ -10955,7 +10981,7 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 || - !TLI.convertSelectOfConstantsToMath(VT) || + !shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) || !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) return SDValue(); @@ -11282,6 +11308,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { if (N2 == N3) return N2; + // select_cc bool, 0, x, y, seteq -> select bool, y, x + if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 && + isNullConstant(N1)) + return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2); + // Determine if the condition we're dealing with is constant if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, SDLoc(N), false)) { @@ -12125,7 +12156,7 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) { if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) return SCC; - if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { + if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) { EVT SetCCVT = getSetCCResultType(N00VT); // Don't do this transform for i1 because there's a select transform // that would reverse it. diff --git a/llvm/test/CodeGen/PowerPC/crbits.ll b/llvm/test/CodeGen/PowerPC/crbits.ll index 0bc9343..e545143 100644 --- a/llvm/test/CodeGen/PowerPC/crbits.ll +++ b/llvm/test/CodeGen/PowerPC/crbits.ll @@ -322,7 +322,7 @@ define signext i32 @exttest7(i32 signext %a) #0 { ; CHECK-LABEL: exttest7: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li 4, 8 -; CHECK-NEXT: cmpwi 3, 5 +; CHECK-NEXT: cmplwi 3, 5 ; CHECK-NEXT: li 3, 7 ; CHECK-NEXT: iseleq 3, 3, 4 ; CHECK-NEXT: blr @@ -330,7 +330,7 @@ define signext i32 @exttest7(i32 signext %a) #0 { ; CHECK-NO-ISEL-LABEL: exttest7: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: li 4, 8 -; CHECK-NO-ISEL-NEXT: cmpwi 3, 5 +; CHECK-NO-ISEL-NEXT: cmplwi 3, 5 ; CHECK-NO-ISEL-NEXT: li 3, 7 ; CHECK-NO-ISEL-NEXT: bclr 12, 2, 0 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry @@ -339,7 +339,7 @@ define signext i32 @exttest7(i32 signext %a) #0 { ; ; CHECK-P10-LABEL: exttest7: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: cmpwi r3, 5 +; CHECK-P10-NEXT: cmplwi r3, 5 ; CHECK-P10-NEXT: li r3, 8 ; CHECK-P10-NEXT: li r4, 7 ; CHECK-P10-NEXT: iseleq r3, r4, r3 diff --git a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll index 5c5979e..0d0803d 100644 --- a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll +++ b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll @@ -14,14 +14,12 @@ define void @test(i32* dereferenceable(4) %.ial, i32* noalias dereferenceable(4) ; CHECK-P9-LABEL: test: ; CHECK-P9: # %bb.0: # %test_entry ; CHECK-P9-NEXT: andi. r3, r6, 15 +; CHECK-P9-NEXT: li r3, 2 +; CHECK-P9-NEXT: li r10, 1 ; CHECK-P9-NEXT: lwz r4, 0(r4) ; CHECK-P9-NEXT: lwz r5, 0(r5) -; CHECK-P9-NEXT: li r11, 1 -; CHECK-P9-NEXT: addic r3, r3, -1 -; CHECK-P9-NEXT: subfe r10, r3, r3 -; CHECK-P9-NEXT: li r3, 2 -; CHECK-P9-NEXT: not r10, r10 -; CHECK-P9-NEXT: iseleq r3, r11, r3 +; CHECK-P9-NEXT: iseleq r3, r10, r3 +; CHECK-P9-NEXT: subfic r10, r3, 1 ; CHECK-P9-NEXT: add r4, r10, r4 ; CHECK-P9-NEXT: srawi r4, r4, 4 ; CHECK-P9-NEXT: addze r4, r4 @@ -67,14 +65,13 @@ define void @test(i32* dereferenceable(4) %.ial, i32* noalias dereferenceable(4) ; ; CHECK-P10-LABEL: test: ; CHECK-P10: # %bb.0: # %test_entry -; CHECK-P10-NEXT: lwz r4, 0(r4) ; CHECK-P10-NEXT: andi. r3, r6, 15 ; CHECK-P10-NEXT: li r3, 2 ; CHECK-P10-NEXT: li r10, 1 +; CHECK-P10-NEXT: lwz r4, 0(r4) ; CHECK-P10-NEXT: lwz r5, 0(r5) ; CHECK-P10-NEXT: iseleq r3, r10, r3 -; CHECK-P10-NEXT: setnbc r10, eq -; CHECK-P10-NEXT: not r10, r10 +; CHECK-P10-NEXT: subfic r10, r3, 1 ; CHECK-P10-NEXT: add r4, r10, r4 ; CHECK-P10-NEXT: srawi r4, r4, 4 ; CHECK-P10-NEXT: addze r4, r4 diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll index bd55b98..f852bc8 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll @@ -160,9 +160,10 @@ define i32 @length3(ptr %X, ptr %Y) nounwind { ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB9_3: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpw %si, %dx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind @@ -292,9 +293,10 @@ define i32 @length5(ptr %X, ptr %Y) nounwind { ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB16_3: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind @@ -337,9 +339,10 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: jmp .LBB18_2 ; X86-NEXT: .LBB18_3: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB18_2: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -373,8 +376,8 @@ define i32 @length7(ptr %X, ptr %Y) nounwind { ; X86-NEXT: .LBB19_2: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB19_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -422,8 +425,8 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind { ; X86-NEXT: .LBB21_2: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB21_3: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -457,8 +460,8 @@ define i32 @length8(ptr %X, ptr %Y) nounwind { ; X86-NEXT: .LBB22_2: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB22_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -619,8 +622,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind { ; X86-NEXT: .LBB29_3: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB29_4: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -744,8 +747,8 @@ define i32 @length16(ptr %X, ptr %Y) nounwind { ; X86-NEXT: .LBB33_4: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB33_5: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -858,8 +861,8 @@ define i1 @length16_lt(ptr %x, ptr %y) nounwind { ; X86-NEXT: .LBB35_4: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB35_5: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -907,8 +910,8 @@ define i1 @length16_gt(ptr %x, ptr %y) nounwind { ; X86-NEXT: .LBB36_4: # %res_block ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: setae %dl -; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: orl $1, %edx ; X86-NEXT: .LBB36_5: # %endblock ; X86-NEXT: testl %edx, %edx ; X86-NEXT: setg %al diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll index ae5d9e1..a555cba 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -140,11 +140,11 @@ define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { define i32 @length3(ptr %X, ptr %Y) nounwind { ; X64-LABEL: length3: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax +; X64-NEXT: movzwl (%rdi), %ecx +; X64-NEXT: movzwl (%rsi), %edx ; X64-NEXT: rolw $8, %cx -; X64-NEXT: cmpw %cx, %ax +; X64-NEXT: rolw $8, %dx +; X64-NEXT: cmpw %dx, %cx ; X64-NEXT: jne .LBB9_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 2(%rdi), %eax @@ -152,9 +152,10 @@ define i32 @length3(ptr %X, ptr %Y) nounwind { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB9_3: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpw %dx, %cx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind ret i32 %m @@ -256,11 +257,11 @@ define i1 @length4_eq_const(ptr %X) nounwind { define i32 @length5(ptr %X, ptr %Y) nounwind { ; X64-LABEL: length5: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: bswapl %edx +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: jne .LBB16_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax @@ -268,9 +269,10 @@ define i32 @length5(ptr %X, ptr %Y) nounwind { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB16_3: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind ret i32 %m @@ -295,11 +297,11 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind { define i1 @length5_lt(ptr %X, ptr %Y) nounwind { ; X64-LABEL: length5_lt: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: bswapl %edx +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: jne .LBB18_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax @@ -309,9 +311,10 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind { ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; X64-NEXT: .LBB18_3: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -340,8 +343,8 @@ define i32 @length7(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB19_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB19_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind @@ -383,8 +386,8 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB21_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB21_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -516,8 +519,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB29_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB29_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind @@ -591,8 +594,8 @@ define i32 @length16(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB33_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB33_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind @@ -661,8 +664,8 @@ define i1 @length16_lt(ptr %x, ptr %y) nounwind { ; X64-NEXT: .LBB35_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB35_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -692,8 +695,8 @@ define i1 @length16_gt(ptr %x, ptr %y) nounwind { ; X64-NEXT: .LBB36_2: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: sbbl %edx, %edx +; X64-NEXT: orl $1, %edx ; X64-NEXT: .LBB36_3: # %endblock ; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al @@ -772,8 +775,8 @@ define i32 @length24(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB38_3: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB38_4: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind @@ -864,8 +867,8 @@ define i1 @length24_lt(ptr %x, ptr %y) nounwind { ; X64-NEXT: .LBB40_3: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB40_4: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -902,8 +905,8 @@ define i1 @length24_gt(ptr %x, ptr %y) nounwind { ; X64-NEXT: .LBB41_3: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: sbbl %edx, %edx +; X64-NEXT: orl $1, %edx ; X64-NEXT: .LBB41_4: # %endblock ; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al @@ -999,8 +1002,8 @@ define i32 @length31(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB43_4: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB43_5: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind @@ -1097,8 +1100,8 @@ define i1 @length31_lt(ptr %x, ptr %y) nounwind { ; X64-NEXT: .LBB45_4: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB45_5: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -1142,8 +1145,8 @@ define i1 @length31_gt(ptr %x, ptr %y) nounwind { ; X64-NEXT: .LBB46_4: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: sbbl %edx, %edx +; X64-NEXT: orl $1, %edx ; X64-NEXT: .LBB46_5: # %endblock ; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al @@ -1295,8 +1298,8 @@ define i32 @length32(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB49_4: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB49_5: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind @@ -1408,8 +1411,8 @@ define i1 @length32_lt(ptr %x, ptr %y) nounwind { ; X64-NEXT: .LBB51_4: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB51_5: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -1453,8 +1456,8 @@ define i1 @length32_gt(ptr %x, ptr %y) nounwind { ; X64-NEXT: .LBB52_4: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: sbbl %edx, %edx +; X64-NEXT: orl $1, %edx ; X64-NEXT: .LBB52_5: # %endblock ; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll index 9d49fb7..7626911 100644 --- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll @@ -88,9 +88,10 @@ define i32 @length3(ptr %X, ptr %Y) nounwind optsize { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: jmp .LBB4_2 ; X86-NEXT: .LBB4_3: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpw %si, %dx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB4_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -178,9 +179,10 @@ define i32 @length5(ptr %X, ptr %Y) nounwind optsize { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: jmp .LBB9_2 ; X86-NEXT: .LBB9_3: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB9_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -229,8 +231,8 @@ define i32 @length8(ptr %X, ptr %Y) nounwind optsize { ; X86-NEXT: .LBB11_2: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB11_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll index 0844adb..c0c7b98 100644 --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -68,11 +68,11 @@ define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize { define i32 @length3(ptr %X, ptr %Y) nounwind optsize { ; X64-LABEL: length3: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax +; X64-NEXT: movzwl (%rdi), %ecx +; X64-NEXT: movzwl (%rsi), %edx ; X64-NEXT: rolw $8, %cx -; X64-NEXT: cmpw %cx, %ax +; X64-NEXT: rolw $8, %dx +; X64-NEXT: cmpw %dx, %cx ; X64-NEXT: jne .LBB4_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 2(%rdi), %eax @@ -80,9 +80,10 @@ define i32 @length3(ptr %X, ptr %Y) nounwind optsize { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB4_3: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpw %dx, %cx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind ret i32 %m @@ -146,11 +147,11 @@ define i1 @length4_eq_const(ptr %X) nounwind optsize { define i32 @length5(ptr %X, ptr %Y) nounwind optsize { ; X64-LABEL: length5: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: bswapl %edx +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: jne .LBB9_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax @@ -158,9 +159,10 @@ define i32 @length5(ptr %X, ptr %Y) nounwind optsize { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB9_3: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind ret i32 %m @@ -257,8 +259,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind optsize { ; X64-NEXT: .LBB15_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB15_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind @@ -287,8 +289,8 @@ define i32 @length16(ptr %X, ptr %Y) nounwind optsize { ; X64-NEXT: .LBB16_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB16_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll index 8f9b23a..cb45fd3 100644 --- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll @@ -88,9 +88,10 @@ define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: jmp .LBB4_2 ; X86-NEXT: .LBB4_3: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpw %si, %dx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB4_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -178,9 +179,10 @@ define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: jmp .LBB9_2 ; X86-NEXT: .LBB9_3: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB9_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -229,8 +231,8 @@ define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 { ; X86-NEXT: .LBB11_2: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB11_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll index 7b9571f..720344a 100644 --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll @@ -68,11 +68,11 @@ define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 { define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 { ; X64-LABEL: length3: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax +; X64-NEXT: movzwl (%rdi), %ecx +; X64-NEXT: movzwl (%rsi), %edx ; X64-NEXT: rolw $8, %cx -; X64-NEXT: cmpw %cx, %ax +; X64-NEXT: rolw $8, %dx +; X64-NEXT: cmpw %dx, %cx ; X64-NEXT: jne .LBB4_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 2(%rdi), %eax @@ -80,9 +80,10 @@ define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB4_3: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpw %dx, %cx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind ret i32 %m @@ -146,11 +147,11 @@ define i1 @length4_eq_const(ptr %X) nounwind !prof !14 { define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 { ; X64-LABEL: length5: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: bswapl %edx +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: jne .LBB9_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax @@ -158,9 +159,10 @@ define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB9_3: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind ret i32 %m @@ -257,8 +259,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 { ; X64-NEXT: .LBB15_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB15_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind @@ -287,8 +289,8 @@ define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 { ; X64-NEXT: .LBB16_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB16_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll index 718db45..ab439b3 100644 --- a/llvm/test/CodeGen/X86/memcmp-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-x32.ll @@ -188,9 +188,10 @@ define i32 @length3(ptr %X, ptr %Y) nounwind { ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB11_3: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpw %si, %dx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind @@ -320,9 +321,10 @@ define i32 @length5(ptr %X, ptr %Y) nounwind { ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB18_3: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind @@ -365,9 +367,10 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: jmp .LBB20_2 ; X86-NEXT: .LBB20_3: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB20_2: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -401,8 +404,8 @@ define i32 @length7(ptr %X, ptr %Y) nounwind { ; X86-NEXT: .LBB21_2: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB21_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -433,8 +436,8 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind { ; X86-NEXT: .LBB22_2: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB22_3: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -485,8 +488,8 @@ define i32 @length8(ptr %X, ptr %Y) nounwind { ; X86-NEXT: .LBB24_2: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl $1, %eax ; X86-NEXT: .LBB24_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 6aac8b8..5295a70 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -166,11 +166,11 @@ define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { define i32 @length3(ptr %X, ptr %Y) nounwind { ; X64-LABEL: length3: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax +; X64-NEXT: movzwl (%rdi), %ecx +; X64-NEXT: movzwl (%rsi), %edx ; X64-NEXT: rolw $8, %cx -; X64-NEXT: cmpw %cx, %ax +; X64-NEXT: rolw $8, %dx +; X64-NEXT: cmpw %dx, %cx ; X64-NEXT: jne .LBB11_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 2(%rdi), %eax @@ -178,9 +178,10 @@ define i32 @length3(ptr %X, ptr %Y) nounwind { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB11_3: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpw %dx, %cx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind ret i32 %m @@ -282,11 +283,11 @@ define i1 @length4_eq_const(ptr %X) nounwind { define i32 @length5(ptr %X, ptr %Y) nounwind { ; X64-LABEL: length5: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: bswapl %edx +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: jne .LBB18_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax @@ -294,9 +295,10 @@ define i32 @length5(ptr %X, ptr %Y) nounwind { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB18_3: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind ret i32 %m @@ -321,11 +323,11 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind { define i1 @length5_lt(ptr %X, ptr %Y) nounwind { ; X64-LABEL: length5_lt: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: bswapl %edx +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: jne .LBB20_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax @@ -335,9 +337,10 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind { ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; X64-NEXT: .LBB20_3: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -366,8 +369,8 @@ define i32 @length7(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB21_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB21_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind @@ -394,8 +397,8 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB22_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB22_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -542,8 +545,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB31_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB31_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind @@ -600,8 +603,8 @@ define i32 @length15(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB34_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB34_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind @@ -628,8 +631,8 @@ define i1 @length15_lt(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB35_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB35_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -657,8 +660,8 @@ define i32 @length15_const(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB36_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rcx, %rdx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB36_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind @@ -698,8 +701,8 @@ define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB38_2: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rax, %rcx -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: sbbl %edx, %edx +; X64-NEXT: orl $1, %edx ; X64-NEXT: .LBB38_3: # %endblock ; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al @@ -731,8 +734,8 @@ define i32 @length16(ptr %X, ptr %Y) nounwind { ; X64-NEXT: .LBB39_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB39_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind @@ -801,8 +804,8 @@ define i1 @length16_lt(ptr %x, ptr %y) nounwind { ; X64-NEXT: .LBB41_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl $1, %eax ; X64-NEXT: .LBB41_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -832,8 +835,8 @@ define i1 @length16_gt(ptr %x, ptr %y) nounwind { ; X64-NEXT: .LBB42_2: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: sbbl %edx, %edx +; X64-NEXT: orl $1, %edx ; X64-NEXT: .LBB42_3: # %endblock ; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al diff --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll index 40947c2..601166d 100644 --- a/llvm/test/CodeGen/X86/midpoint-int.ll +++ b/llvm/test/CodeGen/X86/midpoint-int.ll @@ -303,38 +303,41 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl $-1, %ebx +; X86-NEXT: sbbl %ebp, %edx +; X86-NEXT: setl %dl +; X86-NEXT: movzbl %dl, %ebx ; X86-NEXT: jl .LBB5_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: movl $1, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: jmp .LBB5_3 ; X86-NEXT: .LBB5_1: -; X86-NEXT: movl $-1, %ebp ; X86-NEXT: movl %edi, %edx ; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: .LBB5_3: +; X86-NEXT: negl %ebx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: orl $1, %ebp ; X86-NEXT: subl %esi, %eax ; X86-NEXT: sbbl %edx, %edi ; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebp -; X86-NEXT: mull %ebx -; X86-NEXT: addl %ebp, %edx +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %ebp +; X86-NEXT: addl %ebx, %edx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebx, %edi +; X86-NEXT: imull %ebp, %edi ; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -373,38 +376,42 @@ define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl $-1, %ebx -; X86-NEXT: jb .LBB6_1 +; X86-NEXT: sbbl %ebp, %edx +; X86-NEXT: setb %dl +; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: testb %dl, %dl +; X86-NEXT: jne .LBB6_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: movl $1, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: jmp .LBB6_3 ; X86-NEXT: .LBB6_1: -; X86-NEXT: movl $-1, %ebp ; X86-NEXT: movl %edi, %edx ; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: .LBB6_3: +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: orl $1, %ebp ; X86-NEXT: subl %esi, %eax ; X86-NEXT: sbbl %edx, %edi ; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebp -; X86-NEXT: mull %ebx -; X86-NEXT: addl %ebp, %edx +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %ebp +; X86-NEXT: addl %ebx, %edx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebx, %edi +; X86-NEXT: imull %ebp, %edi ; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -455,29 +462,30 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind { ; X86-NEXT: cmpl %esi, %eax ; X86-NEXT: movl %edi, %edx ; X86-NEXT: sbbl %ecx, %edx -; X86-NEXT: movl $-1, %ebx +; X86-NEXT: setl %dl +; X86-NEXT: movzbl %dl, %ebx ; X86-NEXT: jl .LBB7_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: movl $1, %ebx ; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-NEXT: movl %esi, %edx ; X86-NEXT: jmp .LBB7_3 ; X86-NEXT: .LBB7_1: -; X86-NEXT: movl $-1, %ebp ; X86-NEXT: movl %edi, (%esp) # 4-byte Spill ; X86-NEXT: movl %eax, %edx ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: movl %esi, %eax ; X86-NEXT: .LBB7_3: +; X86-NEXT: negl %ebx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: orl $1, %ebp ; X86-NEXT: subl %edx, %eax ; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload ; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebp -; X86-NEXT: mull %ebx -; X86-NEXT: addl %ebp, %edx +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %ebp +; X86-NEXT: addl %ebx, %edx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebx, %edi +; X86-NEXT: imull %ebp, %edi ; X86-NEXT: addl %edi, %edx ; X86-NEXT: addl %esi, %eax ; X86-NEXT: adcl %ecx, %edx @@ -522,39 +530,42 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl (%edx), %eax ; X86-NEXT: movl 4(%edx), %edi -; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl $-1, %ebx +; X86-NEXT: sbbl %ebp, %edx +; X86-NEXT: setl %dl +; X86-NEXT: movzbl %dl, %ebx ; X86-NEXT: jl .LBB8_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: movl $1, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: jmp .LBB8_3 ; X86-NEXT: .LBB8_1: -; X86-NEXT: movl $-1, %ebp ; X86-NEXT: movl %edi, %edx ; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: .LBB8_3: +; X86-NEXT: negl %ebx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: orl $1, %ebp ; X86-NEXT: subl %esi, %eax ; X86-NEXT: sbbl %edx, %edi ; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebp -; X86-NEXT: mull %ebx -; X86-NEXT: addl %ebp, %edx +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %ebp +; X86-NEXT: addl %ebx, %edx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebx, %edi +; X86-NEXT: imull %ebp, %edi ; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -606,29 +617,30 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X86-NEXT: cmpl %esi, %eax ; X86-NEXT: movl %edi, %edx ; X86-NEXT: sbbl %ecx, %edx -; X86-NEXT: movl $-1, %ebx +; X86-NEXT: setl %dl +; X86-NEXT: movzbl %dl, %ebx ; X86-NEXT: jl .LBB9_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: movl $1, %ebx ; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-NEXT: movl %esi, %edx ; X86-NEXT: jmp .LBB9_3 ; X86-NEXT: .LBB9_1: -; X86-NEXT: movl $-1, %ebp ; X86-NEXT: movl %edi, (%esp) # 4-byte Spill ; X86-NEXT: movl %eax, %edx ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: movl %esi, %eax ; X86-NEXT: .LBB9_3: +; X86-NEXT: negl %ebx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: orl $1, %ebp ; X86-NEXT: subl %edx, %eax ; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload ; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebp -; X86-NEXT: mull %ebx -; X86-NEXT: addl %ebp, %edx +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %ebp +; X86-NEXT: addl %ebx, %edx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebx, %edi +; X86-NEXT: imull %ebp, %edi ; X86-NEXT: addl %edi, %edx ; X86-NEXT: addl %esi, %eax ; X86-NEXT: adcl %ecx, %edx @@ -952,12 +964,12 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax ; X64-NEXT: cmpb %al, %dil -; X64-NEXT: setle %cl +; X64-NEXT: setg %cl ; X64-NEXT: movl %edi, %edx ; X64-NEXT: cmovgl %esi, %edx ; X64-NEXT: cmovgl %edi, %eax -; X64-NEXT: addb %cl, %cl -; X64-NEXT: decb %cl +; X64-NEXT: negb %cl +; X64-NEXT: orb $1, %cl ; X64-NEXT: subb %dl, %al ; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -970,7 +982,7 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: cmpb %al, %cl -; X86-NEXT: setle %dl +; X86-NEXT: setg %dl ; X86-NEXT: jg .LBB15_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: movb %cl, %ah @@ -980,8 +992,8 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; X86-NEXT: movb %cl, %al ; X86-NEXT: .LBB15_3: ; X86-NEXT: subb %ah, %al -; X86-NEXT: addb %dl, %dl -; X86-NEXT: decb %dl +; X86-NEXT: negb %dl +; X86-NEXT: orb $1, %dl ; X86-NEXT: shrb %al ; X86-NEXT: mulb %dl ; X86-NEXT: addb %cl, %al @@ -1002,12 +1014,12 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax ; X64-NEXT: cmpb %al, %dil -; X64-NEXT: setbe %cl +; X64-NEXT: seta %cl ; X64-NEXT: movl %edi, %edx ; X64-NEXT: cmoval %esi, %edx ; X64-NEXT: cmoval %edi, %eax -; X64-NEXT: addb %cl, %cl -; X64-NEXT: decb %cl +; X64-NEXT: negb %cl +; X64-NEXT: orb $1, %cl ; X64-NEXT: subb %dl, %al ; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -1020,7 +1032,7 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: cmpb %al, %cl -; X86-NEXT: setbe %dl +; X86-NEXT: seta %dl ; X86-NEXT: ja .LBB16_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: movb %cl, %ah @@ -1030,8 +1042,8 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; X86-NEXT: movb %cl, %al ; X86-NEXT: .LBB16_3: ; X86-NEXT: subb %ah, %al -; X86-NEXT: addb %dl, %dl -; X86-NEXT: decb %dl +; X86-NEXT: negb %dl +; X86-NEXT: orb $1, %dl ; X86-NEXT: shrb %al ; X86-NEXT: mulb %dl ; X86-NEXT: addb %cl, %al @@ -1054,13 +1066,13 @@ define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movzbl (%rdi), %ecx ; X64-NEXT: cmpb %sil, %cl -; X64-NEXT: setle %dl +; X64-NEXT: setg %dl ; X64-NEXT: movl %ecx, %edi ; X64-NEXT: cmovgl %esi, %edi ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: cmovlel %esi, %eax -; X64-NEXT: addb %dl, %dl -; X64-NEXT: decb %dl +; X64-NEXT: negb %dl +; X64-NEXT: orb $1, %dl ; X64-NEXT: subb %dil, %al ; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -1074,7 +1086,7 @@ define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl (%ecx), %ecx ; X86-NEXT: cmpb %al, %cl -; X86-NEXT: setle %dl +; X86-NEXT: setg %dl ; X86-NEXT: jg .LBB17_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: movb %cl, %ah @@ -1084,8 +1096,8 @@ define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { ; X86-NEXT: movb %cl, %al ; X86-NEXT: .LBB17_3: ; X86-NEXT: subb %ah, %al -; X86-NEXT: addb %dl, %dl -; X86-NEXT: decb %dl +; X86-NEXT: negb %dl +; X86-NEXT: orb $1, %dl ; X86-NEXT: shrb %al ; X86-NEXT: mulb %dl ; X86-NEXT: addb %cl, %al @@ -1107,12 +1119,12 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind { ; X64: # %bb.0: ; X64-NEXT: movzbl (%rsi), %eax ; X64-NEXT: cmpb %al, %dil -; X64-NEXT: setle %cl +; X64-NEXT: setg %cl ; X64-NEXT: movl %edi, %edx ; X64-NEXT: cmovgl %eax, %edx ; X64-NEXT: cmovgl %edi, %eax -; X64-NEXT: addb %cl, %cl -; X64-NEXT: decb %cl +; X64-NEXT: negb %cl +; X64-NEXT: orb $1, %cl ; X64-NEXT: subb %dl, %al ; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -1126,7 +1138,7 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl (%eax), %eax ; X86-NEXT: cmpb %al, %cl -; X86-NEXT: setle %dl +; X86-NEXT: setg %dl ; X86-NEXT: jg .LBB18_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: movb %cl, %ah @@ -1136,8 +1148,8 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind { ; X86-NEXT: movb %cl, %al ; X86-NEXT: .LBB18_3: ; X86-NEXT: subb %ah, %al -; X86-NEXT: addb %dl, %dl -; X86-NEXT: decb %dl +; X86-NEXT: negb %dl +; X86-NEXT: orb $1, %dl ; X86-NEXT: shrb %al ; X86-NEXT: mulb %dl ; X86-NEXT: addb %cl, %al @@ -1160,12 +1172,12 @@ define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X64-NEXT: movzbl (%rdi), %ecx ; X64-NEXT: movzbl (%rsi), %eax ; X64-NEXT: cmpb %al, %cl -; X64-NEXT: setle %dl +; X64-NEXT: setg %dl ; X64-NEXT: movl %ecx, %esi ; X64-NEXT: cmovgl %eax, %esi ; X64-NEXT: cmovgl %ecx, %eax -; X64-NEXT: addb %dl, %dl -; X64-NEXT: decb %dl +; X64-NEXT: negb %dl +; X64-NEXT: orb $1, %dl ; X64-NEXT: subb %sil, %al ; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -1180,7 +1192,7 @@ define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X86-NEXT: movzbl (%ecx), %ecx ; X86-NEXT: movzbl (%eax), %eax ; X86-NEXT: cmpb %al, %cl -; X86-NEXT: setle %dl +; X86-NEXT: setg %dl ; X86-NEXT: jg .LBB19_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: movb %cl, %ah @@ -1190,8 +1202,8 @@ define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X86-NEXT: movb %cl, %al ; X86-NEXT: .LBB19_3: ; X86-NEXT: subb %ah, %al -; X86-NEXT: addb %dl, %dl -; X86-NEXT: decb %dl +; X86-NEXT: negb %dl +; X86-NEXT: orb $1, %dl ; X86-NEXT: shrb %al ; X86-NEXT: mulb %dl ; X86-NEXT: addb %cl, %al diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index bad7ef4..fbdec48 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -759,22 +759,21 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax ; ATHLON-NEXT: xorl %edx, %edx ; ATHLON-NEXT: orl {{[0-9]+}}(%esp), %eax -; ATHLON-NEXT: movl $-1, %ecx -; ATHLON-NEXT: movl $1, %eax -; ATHLON-NEXT: cmovel %ecx, %eax -; ATHLON-NEXT: cmovel %ecx, %edx +; ATHLON-NEXT: sete %dl +; ATHLON-NEXT: negl %edx +; ATHLON-NEXT: movl %edx, %eax +; ATHLON-NEXT: orl $1, %eax ; ATHLON-NEXT: retl ; ; MCU-LABEL: test10: ; MCU: # %bb.0: -; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: movl $-1, %eax -; MCU-NEXT: movl $-1, %edx -; MCU-NEXT: je .LBB11_2 -; MCU-NEXT: # %bb.1: +; MCU-NEXT: movl %edx, %ecx ; MCU-NEXT: xorl %edx, %edx -; MCU-NEXT: movl $1, %eax -; MCU-NEXT: .LBB11_2: +; MCU-NEXT: orl %ecx, %eax +; MCU-NEXT: sete %dl +; MCU-NEXT: negl %edx +; MCU-NEXT: movl %edx, %eax +; MCU-NEXT: orl $1, %eax ; MCU-NEXT: retl %cmp = icmp eq i64 %x, 0 %cond = select i1 %cmp, i64 -1, i64 1 @@ -932,22 +931,21 @@ define i64 @eqzero_all_ones_or_const(i64 %x) { ; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax ; ATHLON-NEXT: xorl %edx, %edx ; ATHLON-NEXT: orl {{[0-9]+}}(%esp), %eax -; ATHLON-NEXT: movl $-1, %ecx -; ATHLON-NEXT: movl $42, %eax -; ATHLON-NEXT: cmovel %ecx, %eax -; ATHLON-NEXT: cmovel %ecx, %edx +; ATHLON-NEXT: sete %dl +; ATHLON-NEXT: negl %edx +; ATHLON-NEXT: movl %edx, %eax +; ATHLON-NEXT: orl $42, %eax ; ATHLON-NEXT: retl ; ; MCU-LABEL: eqzero_all_ones_or_const: ; MCU: # %bb.0: -; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: movl $-1, %eax -; MCU-NEXT: movl $-1, %edx -; MCU-NEXT: je .LBB16_2 -; MCU-NEXT: # %bb.1: +; MCU-NEXT: movl %edx, %ecx ; MCU-NEXT: xorl %edx, %edx -; MCU-NEXT: movl $42, %eax -; MCU-NEXT: .LBB16_2: +; MCU-NEXT: orl %ecx, %eax +; MCU-NEXT: sete %dl +; MCU-NEXT: negl %edx +; MCU-NEXT: movl %edx, %eax +; MCU-NEXT: orl $42, %eax ; MCU-NEXT: retl %z = icmp eq i64 %x, 0 %r = select i1 %z, i64 -1, i64 42 diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll index 32a827f..cbf55fb 100644 --- a/llvm/test/CodeGen/X86/select_const.ll +++ b/llvm/test/CodeGen/X86/select_const.ll @@ -206,9 +206,9 @@ define i32 @select_C_Cplus1_signext(i1 signext %cond) { define i32 @select_lea_2(i1 zeroext %cond) { ; CHECK-LABEL: select_lea_2: ; CHECK: # %bb.0: -; CHECK-NEXT: xorb $1, %dil -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: leal -1(%rax,%rax), %eax +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: orl $1, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 1 ret i32 %sel -- 2.7.4